From a420d67fe3ae5c62905439c9061b4c4539700497 Mon Sep 17 00:00:00 2001 From: ailuckly Date: Thu, 16 Apr 2026 00:59:03 +0800 Subject: [PATCH 1/3] config: add Xunfei STT config to base application.yml xunfei.stt.* was only defined in test/prod profiles, causing XunfeiWebSocketSttClient.isAvailable() to return false on the local profile and silently fall back to QiniuSttClient. Moving the env-var mappings to application.yml makes them available across all profiles. Also updates .env.example with XUNFEI_STT_* fields and corrects AI_STT_PROVIDER default to xunfei. --- .env.example | 11 ++++++++++- vocata-server/src/main/resources/application.yml | 15 +++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/.env.example b/.env.example index e6c4d5a..31bf7f8 100644 --- a/.env.example +++ b/.env.example @@ -49,10 +49,19 @@ MAIL_PASSWORD= XUNFEI_TTS_APP_ID= XUNFEI_TTS_API_KEY= XUNFEI_TTS_SECRET_KEY= +XUNFEI_TTS_HOST=tts-api.xfyun.cn +XUNFEI_TTS_PATH=/v2/tts + +# 科大讯飞 STT +XUNFEI_STT_APP_ID= +XUNFEI_STT_API_KEY= +XUNFEI_STT_SECRET_KEY= +XUNFEI_STT_HOST=iat-api.xfyun.cn +XUNFEI_STT_PATH=/v2/iat # AI provider 选择 AI_LLM_PROVIDER=qiniu -AI_STT_PROVIDER=qiniu +AI_STT_PROVIDER=xunfei AI_TTS_PROVIDER=xunfei # 七牛云对象存储 diff --git a/vocata-server/src/main/resources/application.yml b/vocata-server/src/main/resources/application.yml index e950945..e71c67b 100644 --- a/vocata-server/src/main/resources/application.yml +++ b/vocata-server/src/main/resources/application.yml @@ -142,6 +142,21 @@ qiniu: endpoint: ${QINIU_STT_ENDPOINT:https://api.qnaigc.com/v1} model: ${QINIU_STT_MODEL:asr} +# 科大讯飞配置 +xunfei: + stt: + app-id: ${XUNFEI_STT_APP_ID:your-xunfei-stt-app-id} + api-key: ${XUNFEI_STT_API_KEY:your-xunfei-stt-api-key} + secret-key: ${XUNFEI_STT_SECRET_KEY:your-xunfei-stt-secret-key} + host: ${XUNFEI_STT_HOST:iat-api.xfyun.cn} + path: ${XUNFEI_STT_PATH:/v2/iat} + tts: + app-id: ${XUNFEI_TTS_APP_ID:your-xunfei-tts-app-id} + api-key: ${XUNFEI_TTS_API_KEY:your-xunfei-tts-api-key} + secret-key: ${XUNFEI_TTS_SECRET_KEY:your-xunfei-tts-secret-key} + host: ${XUNFEI_TTS_HOST:tts-api.xfyun.cn} + path: ${XUNFEI_TTS_PATH:/v2/tts} + # Gemini配置 gemini: api: From 6094457d3020ddc9ba7c0e24fed84a6193924daa Mon Sep 17 00:00:00 2001 From: ailuckly Date: Thu, 16 Apr 2026 01:01:20 +0800 Subject: [PATCH 2/3] fix: fix Xunfei IAT WebSocket authentication and result accumulation Three issues fixed: 1. Authorization missing base64 encoding: Xunfei API requires the authorization string to be base64-encoded before URL-encoding. Previously only URL-encoded, causing HTTP 401 on every connection. 2. Invalid business parameter: `max_rg` is not a valid Xunfei IAT field. Removed it along with other non-standard params (rlang, vinfo, speex_size) that caused "param validate error" responses. 3. wpgs mode returns only the last segment in the final frame: Added ConcurrentSkipListMap to accumulate recognition segments across frames. Handles pgs=rpl (replacement) and pgs=apd (append) so the final isFinal=true result contains the complete utterance. --- .../ai/stt/impl/XunfeiWebSocketSttClient.java | 89 +++++++++++++++---- 1 file changed, 72 insertions(+), 17 deletions(-) diff --git a/vocata-server/src/main/java/com/vocata/ai/stt/impl/XunfeiWebSocketSttClient.java b/vocata-server/src/main/java/com/vocata/ai/stt/impl/XunfeiWebSocketSttClient.java index d61a6fc..78cdbdf 100644 --- a/vocata-server/src/main/java/com/vocata/ai/stt/impl/XunfeiWebSocketSttClient.java +++ b/vocata-server/src/main/java/com/vocata/ai/stt/impl/XunfeiWebSocketSttClient.java @@ -25,6 +25,7 @@ import java.util.HashMap; import java.util.Map; import java.util.concurrent.CompletionStage; +import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -93,6 +94,9 @@ public Flux streamRecognize(Flux audioStream, SttConfig confi AtomicBoolean isFirstFrame = new AtomicBoolean(true); AtomicInteger status = new AtomicInteger(0); // 0: 第一帧, 1: 中间帧, 2: 最后一帧 + // wpgs 模式下按 sn 累积识别段落 + ConcurrentSkipListMap segments = new ConcurrentSkipListMap<>(); + // 添加心跳检测机制 AtomicBoolean heartbeatActive = new AtomicBoolean(true); @@ -115,13 +119,67 @@ public CompletionStage onText(WebSocket webSocket, CharSequence data, boolean logger.debug("🎤【科大讯飞WebSocket STT】收到响应: {}", responseText); JsonNode response = objectMapper.readTree(responseText); - SttResult result = parseWebSocketResponse(response, config); + int code = response.path("code").asInt(-1); + if (code != 0) { + String msg = response.path("message").asText("未知错误"); + logger.error("🎤【科大讯飞WebSocket STT】API错误 code={}: {}", code, msg); + SttResult errorResult = new SttResult(); + errorResult.setText("API错误: " + msg); + errorResult.setConfidence(0.0); + errorResult.setFinal(true); + Map errMeta = new HashMap<>(); + errMeta.put("error", msg); + errMeta.put("provider", "XunfeiWebSocketSTT"); + errorResult.setMetadata(errMeta); + sink.next(errorResult); + return WebSocket.Listener.super.onText(webSocket, data, last); + } - if (result != null && StringUtils.hasText(result.getText())) { - logger.info("【科大讯飞WebSocket STT识别】文字: '{}', 置信度: {}, 最终: {}, 语言: {}", - result.getText(), result.getConfidence(), result.isFinal(), config.getLanguage()); + JsonNode dataNode = response.path("data"); + int frameStatus = dataNode.path("status").asInt(-1); + JsonNode result = dataNode.path("result"); + + // 提取本帧文字 + StringBuilder segText = new StringBuilder(); + JsonNode ws = result.path("ws"); + if (ws.isArray()) { + for (JsonNode wsItem : ws) { + for (JsonNode cwItem : wsItem.path("cw")) { + String w = cwItem.path("w").asText(); + if (StringUtils.hasText(w)) segText.append(w); + } + } + } + + // 按 sn + pgs/rg 累积文字(wpgs 模式) + int sn = result.path("sn").asInt(0); + String pgs = result.path("pgs").asText("apd"); + if ("rpl".equals(pgs)) { + JsonNode rg = result.path("rg"); + if (rg.isArray() && rg.size() == 2) { + int from = rg.get(0).asInt(); + int to = rg.get(1).asInt(); + for (int i = from; i <= to; i++) segments.remove(i); + } + } + if (segText.length() > 0) { + segments.put(sn, segText.toString()); + } - sink.next(result); + String fullText = String.join("", segments.values()); + if (!fullText.isEmpty()) { + boolean isFinal = (frameStatus == 2); + logger.info("【科大讯飞WebSocket STT识别】文字: '{}', 最终: {}", fullText, isFinal); + SttResult sttResult = new SttResult(); + sttResult.setText(fullText); + sttResult.setConfidence(0.95); + sttResult.setFinal(isFinal); + Map metadata = new HashMap<>(); + metadata.put("provider", "XunfeiWebSocketSTT"); + metadata.put("language", config.getLanguage()); + metadata.put("status", frameStatus); + sttResult.setMetadata(metadata); + sink.next(sttResult); } } catch (Exception e) { @@ -270,8 +328,9 @@ private String buildWebSocketUrl() throws Exception { logger.debug("🔐 Authorization字符串: {}", authorization); - // URL编码 - String encodedAuthorization = URLEncoder.encode(authorization, StandardCharsets.UTF_8); + // 先 Base64 编码 authorization,再 URL 编码 + String base64Authorization = Base64.getEncoder().encodeToString(authorization.getBytes(StandardCharsets.UTF_8)); + String encodedAuthorization = URLEncoder.encode(base64Authorization, StandardCharsets.UTF_8); String encodedDate = URLEncoder.encode(date, StandardCharsets.UTF_8); String encodedHost = URLEncoder.encode(host, StandardCharsets.UTF_8); @@ -298,16 +357,12 @@ private Map buildAudioFrame(byte[] audioData, SttConfig config, if (status == 0) { Map business = new HashMap<>(); business.put("language", mapLanguage(config.getLanguage())); - business.put("domain", "iat"); // 通用识别 - business.put("accent", "mandarin"); // 普通话 - business.put("vad_eos", 3000); // 静音检测时长3秒(优化:从10秒减少到3秒,提高响应速度) - business.put("max_rg", 30000); // 最大录音时长30秒,防止无限录音 - business.put("nunum", 0); // 将返回结果数字格式化(0:数字,1:文字) - business.put("ptt", 1); // 开启标点符号添加 - business.put("rlang", "zh-cn"); // 返回语言类型 - business.put("vinfo", 1); // 是否返回语音信息 - business.put("speex_size", 30); // speex音频帧长度,用于VAD - business.put("dwa", "wpgs"); // 动态修正 + business.put("domain", "iat"); + business.put("accent", "mandarin"); + business.put("vad_eos", 3000); + business.put("dwa", "wpgs"); + business.put("ptt", 1); + business.put("nunum", 0); frame.put("business", business); } From 86498fefe294791746e78382577dc1b941d29257 Mon Sep 17 00:00:00 2001 From: ailuckly Date: Thu, 16 Apr 2026 01:01:45 +0800 Subject: [PATCH 3/3] fix: prevent second-round voice deadlock Two related fixes: 1. saveMessage blocks Netty IO thread: Mono.fromCallable with a blocking JDBC call was running on the reactive pipeline thread (Netty IO), causing doFinally to be delayed. This left audioSink non-null when the user started the second recording, resulting in a silent "already in progress" rejection. Fixed by adding .subscribeOn(Schedulers.boundedElastic()). 2. Server error silently deadlocks frontend recording: When the server rejected audio_start with an error, handleError() only logged it. The frontend stayed in voiceState='recording' with no active server pipeline, so no STT results ever came back. Fixed by calling stopRecording() on any server error received while recording. --- .../vocata/ai/pipeline/StreamingPipelineOrchestrator.java | 6 +++--- vocata-web/src/utils/aiChat.ts | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/vocata-server/src/main/java/com/vocata/ai/pipeline/StreamingPipelineOrchestrator.java b/vocata-server/src/main/java/com/vocata/ai/pipeline/StreamingPipelineOrchestrator.java index 68176c7..2a6d870 100644 --- a/vocata-server/src/main/java/com/vocata/ai/pipeline/StreamingPipelineOrchestrator.java +++ b/vocata-server/src/main/java/com/vocata/ai/pipeline/StreamingPipelineOrchestrator.java @@ -22,6 +22,7 @@ import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; import reactor.core.publisher.Sinks; +import reactor.core.scheduler.Schedulers; import java.time.LocalDateTime; import java.util.*; @@ -360,8 +361,7 @@ private UnifiedAiRequest buildLlmRequest(Conversation conversation, Character ch // ═══════════════════════════════════════════════════════ private Mono saveMessage(Long conversationId, String content, SenderType senderType, Long userId) { - return Mono.fromCallable(() -> { - Message message = new Message(); + return Mono.fromCallable(() -> { Message message = new Message(); message.setMessageUuid(UUID.randomUUID()); message.setConversationId(conversationId); message.setSenderType(senderType.getCode()); @@ -399,7 +399,7 @@ private Mono saveMessage(Long conversationId, String content, SenderTyp } return message; - }); + }).subscribeOn(Schedulers.boundedElastic()); } // ── 内部上下文对象 ── diff --git a/vocata-web/src/utils/aiChat.ts b/vocata-web/src/utils/aiChat.ts index 5eaa4c4..65eb8cb 100644 --- a/vocata-web/src/utils/aiChat.ts +++ b/vocata-web/src/utils/aiChat.ts @@ -922,6 +922,10 @@ export class VocaTaAIChat { private handleError(message: ServerErrorMessage): void { console.error('❌ 服务器错误:', message.error) + // 如果正在录音期间收到服务器错误,停止录音避免卡死 + if (this.voiceState !== 'idle') { + this.stopRecording().catch(() => undefined) + } } // 公开方法