diff --git a/.env.example b/.env.example index e6c4d5a..31bf7f8 100644 --- a/.env.example +++ b/.env.example @@ -49,10 +49,19 @@ MAIL_PASSWORD= XUNFEI_TTS_APP_ID= XUNFEI_TTS_API_KEY= XUNFEI_TTS_SECRET_KEY= +XUNFEI_TTS_HOST=tts-api.xfyun.cn +XUNFEI_TTS_PATH=/v2/tts + +# 科大讯飞 STT +XUNFEI_STT_APP_ID= +XUNFEI_STT_API_KEY= +XUNFEI_STT_SECRET_KEY= +XUNFEI_STT_HOST=iat-api.xfyun.cn +XUNFEI_STT_PATH=/v2/iat # AI provider 选择 AI_LLM_PROVIDER=qiniu -AI_STT_PROVIDER=qiniu +AI_STT_PROVIDER=xunfei AI_TTS_PROVIDER=xunfei # 七牛云对象存储 diff --git a/vocata-server/src/main/java/com/vocata/ai/pipeline/StreamingPipelineOrchestrator.java b/vocata-server/src/main/java/com/vocata/ai/pipeline/StreamingPipelineOrchestrator.java index 68176c7..2a6d870 100644 --- a/vocata-server/src/main/java/com/vocata/ai/pipeline/StreamingPipelineOrchestrator.java +++ b/vocata-server/src/main/java/com/vocata/ai/pipeline/StreamingPipelineOrchestrator.java @@ -22,6 +22,7 @@ import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; import reactor.core.publisher.Sinks; +import reactor.core.scheduler.Schedulers; import java.time.LocalDateTime; import java.util.*; @@ -360,8 +361,7 @@ private UnifiedAiRequest buildLlmRequest(Conversation conversation, Character ch // ═══════════════════════════════════════════════════════ private Mono saveMessage(Long conversationId, String content, SenderType senderType, Long userId) { - return Mono.fromCallable(() -> { - Message message = new Message(); + return Mono.fromCallable(() -> { Message message = new Message(); message.setMessageUuid(UUID.randomUUID()); message.setConversationId(conversationId); message.setSenderType(senderType.getCode()); @@ -399,7 +399,7 @@ private Mono saveMessage(Long conversationId, String content, SenderTyp } return message; - }); + }).subscribeOn(Schedulers.boundedElastic()); } // ── 内部上下文对象 ── diff --git a/vocata-server/src/main/java/com/vocata/ai/stt/impl/XunfeiWebSocketSttClient.java b/vocata-server/src/main/java/com/vocata/ai/stt/impl/XunfeiWebSocketSttClient.java index d61a6fc..78cdbdf 100644 --- a/vocata-server/src/main/java/com/vocata/ai/stt/impl/XunfeiWebSocketSttClient.java +++ b/vocata-server/src/main/java/com/vocata/ai/stt/impl/XunfeiWebSocketSttClient.java @@ -25,6 +25,7 @@ import java.util.HashMap; import java.util.Map; import java.util.concurrent.CompletionStage; +import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -93,6 +94,9 @@ public Flux streamRecognize(Flux audioStream, SttConfig confi AtomicBoolean isFirstFrame = new AtomicBoolean(true); AtomicInteger status = new AtomicInteger(0); // 0: 第一帧, 1: 中间帧, 2: 最后一帧 + // wpgs 模式下按 sn 累积识别段落 + ConcurrentSkipListMap segments = new ConcurrentSkipListMap<>(); + // 添加心跳检测机制 AtomicBoolean heartbeatActive = new AtomicBoolean(true); @@ -115,13 +119,67 @@ public CompletionStage onText(WebSocket webSocket, CharSequence data, boolean logger.debug("🎤【科大讯飞WebSocket STT】收到响应: {}", responseText); JsonNode response = objectMapper.readTree(responseText); - SttResult result = parseWebSocketResponse(response, config); + int code = response.path("code").asInt(-1); + if (code != 0) { + String msg = response.path("message").asText("未知错误"); + logger.error("🎤【科大讯飞WebSocket STT】API错误 code={}: {}", code, msg); + SttResult errorResult = new SttResult(); + errorResult.setText("API错误: " + msg); + errorResult.setConfidence(0.0); + errorResult.setFinal(true); + Map errMeta = new HashMap<>(); + errMeta.put("error", msg); + errMeta.put("provider", "XunfeiWebSocketSTT"); + errorResult.setMetadata(errMeta); + sink.next(errorResult); + return WebSocket.Listener.super.onText(webSocket, data, last); + } - if (result != null && StringUtils.hasText(result.getText())) { - logger.info("【科大讯飞WebSocket STT识别】文字: '{}', 置信度: {}, 最终: {}, 语言: {}", - result.getText(), result.getConfidence(), result.isFinal(), config.getLanguage()); + JsonNode dataNode = response.path("data"); + int frameStatus = dataNode.path("status").asInt(-1); + JsonNode result = dataNode.path("result"); + + // 提取本帧文字 + StringBuilder segText = new StringBuilder(); + JsonNode ws = result.path("ws"); + if (ws.isArray()) { + for (JsonNode wsItem : ws) { + for (JsonNode cwItem : wsItem.path("cw")) { + String w = cwItem.path("w").asText(); + if (StringUtils.hasText(w)) segText.append(w); + } + } + } + + // 按 sn + pgs/rg 累积文字(wpgs 模式) + int sn = result.path("sn").asInt(0); + String pgs = result.path("pgs").asText("apd"); + if ("rpl".equals(pgs)) { + JsonNode rg = result.path("rg"); + if (rg.isArray() && rg.size() == 2) { + int from = rg.get(0).asInt(); + int to = rg.get(1).asInt(); + for (int i = from; i <= to; i++) segments.remove(i); + } + } + if (segText.length() > 0) { + segments.put(sn, segText.toString()); + } - sink.next(result); + String fullText = String.join("", segments.values()); + if (!fullText.isEmpty()) { + boolean isFinal = (frameStatus == 2); + logger.info("【科大讯飞WebSocket STT识别】文字: '{}', 最终: {}", fullText, isFinal); + SttResult sttResult = new SttResult(); + sttResult.setText(fullText); + sttResult.setConfidence(0.95); + sttResult.setFinal(isFinal); + Map metadata = new HashMap<>(); + metadata.put("provider", "XunfeiWebSocketSTT"); + metadata.put("language", config.getLanguage()); + metadata.put("status", frameStatus); + sttResult.setMetadata(metadata); + sink.next(sttResult); } } catch (Exception e) { @@ -270,8 +328,9 @@ private String buildWebSocketUrl() throws Exception { logger.debug("🔐 Authorization字符串: {}", authorization); - // URL编码 - String encodedAuthorization = URLEncoder.encode(authorization, StandardCharsets.UTF_8); + // 先 Base64 编码 authorization,再 URL 编码 + String base64Authorization = Base64.getEncoder().encodeToString(authorization.getBytes(StandardCharsets.UTF_8)); + String encodedAuthorization = URLEncoder.encode(base64Authorization, StandardCharsets.UTF_8); String encodedDate = URLEncoder.encode(date, StandardCharsets.UTF_8); String encodedHost = URLEncoder.encode(host, StandardCharsets.UTF_8); @@ -298,16 +357,12 @@ private Map buildAudioFrame(byte[] audioData, SttConfig config, if (status == 0) { Map business = new HashMap<>(); business.put("language", mapLanguage(config.getLanguage())); - business.put("domain", "iat"); // 通用识别 - business.put("accent", "mandarin"); // 普通话 - business.put("vad_eos", 3000); // 静音检测时长3秒(优化:从10秒减少到3秒,提高响应速度) - business.put("max_rg", 30000); // 最大录音时长30秒,防止无限录音 - business.put("nunum", 0); // 将返回结果数字格式化(0:数字,1:文字) - business.put("ptt", 1); // 开启标点符号添加 - business.put("rlang", "zh-cn"); // 返回语言类型 - business.put("vinfo", 1); // 是否返回语音信息 - business.put("speex_size", 30); // speex音频帧长度,用于VAD - business.put("dwa", "wpgs"); // 动态修正 + business.put("domain", "iat"); + business.put("accent", "mandarin"); + business.put("vad_eos", 3000); + business.put("dwa", "wpgs"); + business.put("ptt", 1); + business.put("nunum", 0); frame.put("business", business); } diff --git a/vocata-server/src/main/resources/application.yml b/vocata-server/src/main/resources/application.yml index e950945..e71c67b 100644 --- a/vocata-server/src/main/resources/application.yml +++ b/vocata-server/src/main/resources/application.yml @@ -142,6 +142,21 @@ qiniu: endpoint: ${QINIU_STT_ENDPOINT:https://api.qnaigc.com/v1} model: ${QINIU_STT_MODEL:asr} +# 科大讯飞配置 +xunfei: + stt: + app-id: ${XUNFEI_STT_APP_ID:your-xunfei-stt-app-id} + api-key: ${XUNFEI_STT_API_KEY:your-xunfei-stt-api-key} + secret-key: ${XUNFEI_STT_SECRET_KEY:your-xunfei-stt-secret-key} + host: ${XUNFEI_STT_HOST:iat-api.xfyun.cn} + path: ${XUNFEI_STT_PATH:/v2/iat} + tts: + app-id: ${XUNFEI_TTS_APP_ID:your-xunfei-tts-app-id} + api-key: ${XUNFEI_TTS_API_KEY:your-xunfei-tts-api-key} + secret-key: ${XUNFEI_TTS_SECRET_KEY:your-xunfei-tts-secret-key} + host: ${XUNFEI_TTS_HOST:tts-api.xfyun.cn} + path: ${XUNFEI_TTS_PATH:/v2/tts} + # Gemini配置 gemini: api: diff --git a/vocata-web/src/utils/aiChat.ts b/vocata-web/src/utils/aiChat.ts index 5eaa4c4..65eb8cb 100644 --- a/vocata-web/src/utils/aiChat.ts +++ b/vocata-web/src/utils/aiChat.ts @@ -922,6 +922,10 @@ export class VocaTaAIChat { private handleError(message: ServerErrorMessage): void { console.error('❌ 服务器错误:', message.error) + // 如果正在录音期间收到服务器错误,停止录音避免卡死 + if (this.voiceState !== 'idle') { + this.stopRecording().catch(() => undefined) + } } // 公开方法