Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,19 @@ MAIL_PASSWORD=
XUNFEI_TTS_APP_ID=
XUNFEI_TTS_API_KEY=
XUNFEI_TTS_SECRET_KEY=
XUNFEI_TTS_HOST=tts-api.xfyun.cn
XUNFEI_TTS_PATH=/v2/tts

# 科大讯飞 STT
XUNFEI_STT_APP_ID=
XUNFEI_STT_API_KEY=
XUNFEI_STT_SECRET_KEY=
XUNFEI_STT_HOST=iat-api.xfyun.cn
XUNFEI_STT_PATH=/v2/iat

# AI provider 选择
AI_LLM_PROVIDER=qiniu
AI_STT_PROVIDER=qiniu
AI_STT_PROVIDER=xunfei
AI_TTS_PROVIDER=xunfei

# 七牛云对象存储
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
import reactor.core.publisher.Sinks;
import reactor.core.scheduler.Schedulers;

import java.time.LocalDateTime;
import java.util.*;
Expand Down Expand Up @@ -360,8 +361,7 @@ private UnifiedAiRequest buildLlmRequest(Conversation conversation, Character ch
// ═══════════════════════════════════════════════════════

private Mono<Message> saveMessage(Long conversationId, String content, SenderType senderType, Long userId) {
return Mono.fromCallable(() -> {
Message message = new Message();
return Mono.fromCallable(() -> { Message message = new Message();
Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mono.fromCallable 的 lambda 开头被挤在同一行({ Message message...),看起来像是误删了换行/缩进,容易触发格式化或 Checkstyle/lint 规则,也降低可读性。建议把 { 后的语句按项目常规格式换行并对齐缩进。

Suggested change
return Mono.fromCallable(() -> { Message message = new Message();
return Mono.fromCallable(() -> {
Message message = new Message();

Copilot uses AI. Check for mistakes.
message.setMessageUuid(UUID.randomUUID());
message.setConversationId(conversationId);
message.setSenderType(senderType.getCode());
Expand Down Expand Up @@ -399,7 +399,7 @@ private Mono<Message> saveMessage(Long conversationId, String content, SenderTyp
}

return message;
});
}).subscribeOn(Schedulers.boundedElastic());
}

// ── 内部上下文对象 ──
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.CompletionStage;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;

Expand Down Expand Up @@ -93,6 +94,9 @@ public Flux<SttResult> streamRecognize(Flux<byte[]> audioStream, SttConfig confi
AtomicBoolean isFirstFrame = new AtomicBoolean(true);
AtomicInteger status = new AtomicInteger(0); // 0: 第一帧, 1: 中间帧, 2: 最后一帧

// wpgs 模式下按 sn 累积识别段落
ConcurrentSkipListMap<Integer, String> segments = new ConcurrentSkipListMap<>();

// 添加心跳检测机制
AtomicBoolean heartbeatActive = new AtomicBoolean(true);

Expand All @@ -115,13 +119,67 @@ public CompletionStage<?> onText(WebSocket webSocket, CharSequence data, boolean
logger.debug("🎤【科大讯飞WebSocket STT】收到响应: {}", responseText);

JsonNode response = objectMapper.readTree(responseText);
SttResult result = parseWebSocketResponse(response, config);
int code = response.path("code").asInt(-1);
if (code != 0) {
String msg = response.path("message").asText("未知错误");
logger.error("🎤【科大讯飞WebSocket STT】API错误 code={}: {}", code, msg);
SttResult errorResult = new SttResult();
errorResult.setText("API错误: " + msg);
errorResult.setConfidence(0.0);
errorResult.setFinal(true);
Map<String, Object> errMeta = new HashMap<>();
errMeta.put("error", msg);
errMeta.put("provider", "XunfeiWebSocketSTT");
errorResult.setMetadata(errMeta);
sink.next(errorResult);
return WebSocket.Listener.super.onText(webSocket, data, last);
}
Comment on lines +122 to +136
Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

当 response.code != 0 时这里只是 sink.next(errorResult) 然后继续返回 onText,但没有主动关闭 WebSocket、停止心跳或终止 sink。这样音频流仍会继续发送帧,可能导致持续报错/资源占用,调用端也可能一直等不到完成信号。建议在该分支里:停止 heartbeatActive、对 webSocket 发送 close,并 sink.complete() 或 sink.error(...)(并确保后续不再发送音频帧)。

Copilot uses AI. Check for mistakes.

if (result != null && StringUtils.hasText(result.getText())) {
logger.info("【科大讯飞WebSocket STT识别】文字: '{}', 置信度: {}, 最终: {}, 语言: {}",
result.getText(), result.getConfidence(), result.isFinal(), config.getLanguage());
JsonNode dataNode = response.path("data");
int frameStatus = dataNode.path("status").asInt(-1);
JsonNode result = dataNode.path("result");

Comment on lines +138 to +141
Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里把解析逻辑内联后,类里原来的 parseWebSocketResponse(...) 已不再被调用(当前仓库内也没有其他引用),会留下死代码并增加后续维护/逻辑漂移风险。建议删除该方法,或复用该方法来解析响应并在其基础上做 wpgs 段落合并。

Copilot uses AI. Check for mistakes.
// 提取本帧文字
StringBuilder segText = new StringBuilder();
JsonNode ws = result.path("ws");
if (ws.isArray()) {
for (JsonNode wsItem : ws) {
for (JsonNode cwItem : wsItem.path("cw")) {
String w = cwItem.path("w").asText();
if (StringUtils.hasText(w)) segText.append(w);
}
}
}

// 按 sn + pgs/rg 累积文字(wpgs 模式)
int sn = result.path("sn").asInt(0);
String pgs = result.path("pgs").asText("apd");
if ("rpl".equals(pgs)) {
JsonNode rg = result.path("rg");
if (rg.isArray() && rg.size() == 2) {
int from = rg.get(0).asInt();
int to = rg.get(1).asInt();
for (int i = from; i <= to; i++) segments.remove(i);
}
}
if (segText.length() > 0) {
segments.put(sn, segText.toString());
}

sink.next(result);
String fullText = String.join("", segments.values());
if (!fullText.isEmpty()) {
boolean isFinal = (frameStatus == 2);
logger.info("【科大讯飞WebSocket STT识别】文字: '{}', 最终: {}", fullText, isFinal);
SttResult sttResult = new SttResult();
sttResult.setText(fullText);
sttResult.setConfidence(0.95);
sttResult.setFinal(isFinal);
Map<String, Object> metadata = new HashMap<>();
metadata.put("provider", "XunfeiWebSocketSTT");
metadata.put("language", config.getLanguage());
metadata.put("status", frameStatus);
sttResult.setMetadata(metadata);
sink.next(sttResult);
}

} catch (Exception e) {
Expand Down Expand Up @@ -270,8 +328,9 @@ private String buildWebSocketUrl() throws Exception {

logger.debug("🔐 Authorization字符串: {}", authorization);

// URL编码
String encodedAuthorization = URLEncoder.encode(authorization, StandardCharsets.UTF_8);
// 先 Base64 编码 authorization,再 URL 编码
String base64Authorization = Base64.getEncoder().encodeToString(authorization.getBytes(StandardCharsets.UTF_8));
String encodedAuthorization = URLEncoder.encode(base64Authorization, StandardCharsets.UTF_8);
String encodedDate = URLEncoder.encode(date, StandardCharsets.UTF_8);
String encodedHost = URLEncoder.encode(host, StandardCharsets.UTF_8);

Expand All @@ -298,16 +357,12 @@ private Map<String, Object> buildAudioFrame(byte[] audioData, SttConfig config,
if (status == 0) {
Map<String, Object> business = new HashMap<>();
business.put("language", mapLanguage(config.getLanguage()));
business.put("domain", "iat"); // 通用识别
business.put("accent", "mandarin"); // 普通话
business.put("vad_eos", 3000); // 静音检测时长3秒(优化:从10秒减少到3秒,提高响应速度)
business.put("max_rg", 30000); // 最大录音时长30秒,防止无限录音
business.put("nunum", 0); // 将返回结果数字格式化(0:数字,1:文字)
business.put("ptt", 1); // 开启标点符号添加
business.put("rlang", "zh-cn"); // 返回语言类型
business.put("vinfo", 1); // 是否返回语音信息
business.put("speex_size", 30); // speex音频帧长度,用于VAD
business.put("dwa", "wpgs"); // 动态修正
business.put("domain", "iat");
business.put("accent", "mandarin");
business.put("vad_eos", 3000);
business.put("dwa", "wpgs");
business.put("ptt", 1);
business.put("nunum", 0);
frame.put("business", business);
}

Expand Down
15 changes: 15 additions & 0 deletions vocata-server/src/main/resources/application.yml
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,21 @@ qiniu:
endpoint: ${QINIU_STT_ENDPOINT:https://api.qnaigc.com/v1}
model: ${QINIU_STT_MODEL:asr}

# 科大讯飞配置
xunfei:
stt:
app-id: ${XUNFEI_STT_APP_ID:your-xunfei-stt-app-id}
api-key: ${XUNFEI_STT_API_KEY:your-xunfei-stt-api-key}
secret-key: ${XUNFEI_STT_SECRET_KEY:your-xunfei-stt-secret-key}
Comment on lines +148 to +150
Copy link

Copilot AI Apr 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里给 xunfei.stt.* 配置设置了默认占位符值(your-xunfei-stt-...)。但 XunfeiWebSocketSttClient#isAvailable() 目前只排除了 "your-xunfei-app-id/api-key/secret-key" 这几个占位符字符串,导致在仅设置 AI_STT_PROVIDER=xunfei 且未提供真实凭据时,appId/apiKey/secretKey 会落到这些默认值并被误判为“已配置”,随后连接科大讯飞会失败。建议:要么去掉这些默认值(留空让应用明确判定未配置),要么把 isAvailable() 里的占位符判断与这里的默认值保持一致。

Suggested change
app-id: ${XUNFEI_STT_APP_ID:your-xunfei-stt-app-id}
api-key: ${XUNFEI_STT_API_KEY:your-xunfei-stt-api-key}
secret-key: ${XUNFEI_STT_SECRET_KEY:your-xunfei-stt-secret-key}
app-id: ${XUNFEI_STT_APP_ID:}
api-key: ${XUNFEI_STT_API_KEY:}
secret-key: ${XUNFEI_STT_SECRET_KEY:}

Copilot uses AI. Check for mistakes.
host: ${XUNFEI_STT_HOST:iat-api.xfyun.cn}
path: ${XUNFEI_STT_PATH:/v2/iat}
tts:
app-id: ${XUNFEI_TTS_APP_ID:your-xunfei-tts-app-id}
api-key: ${XUNFEI_TTS_API_KEY:your-xunfei-tts-api-key}
secret-key: ${XUNFEI_TTS_SECRET_KEY:your-xunfei-tts-secret-key}
host: ${XUNFEI_TTS_HOST:tts-api.xfyun.cn}
path: ${XUNFEI_TTS_PATH:/v2/tts}

# Gemini配置
gemini:
api:
Expand Down
4 changes: 4 additions & 0 deletions vocata-web/src/utils/aiChat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -922,6 +922,10 @@ export class VocaTaAIChat {

private handleError(message: ServerErrorMessage): void {
console.error('❌ 服务器错误:', message.error)
// 如果正在录音期间收到服务器错误,停止录音避免卡死
if (this.voiceState !== 'idle') {
this.stopRecording().catch(() => undefined)
}
}

// 公开方法
Expand Down
Loading