diff --git a/openless-all/app/src-tauri/src/asr/volcengine.rs b/openless-all/app/src-tauri/src/asr/volcengine.rs index f356710c..cc595600 100644 --- a/openless-all/app/src-tauri/src/asr/volcengine.rs +++ b/openless-all/app/src-tauri/src/asr/volcengine.rs @@ -32,6 +32,15 @@ const BYTES_PER_MS: f64 = 32.0; const HOTWORD_CAP: usize = 80; const FINAL_RESULT_TIMEOUT: Duration = Duration::from_secs(12); +/// 弱网下 TLS/WebSocket 握手可能一直挂到 OS 级 TCP 超时(几十秒),期间用户卡在 +/// 「Starting」无法语音输入。协调器的全局超时只覆盖 `await_final_result`,**不**覆盖 +/// `open_session`,所以这里必须自己给握手设上限:超时即快速失败并重试,而不是冻结。 +const CONNECT_TIMEOUT: Duration = Duration::from_secs(5); +/// 单次网络抖动(连接被重置 / 瞬时 DNS 失败)以前会直接让整次听写失败。重试几次让 +/// 抖动可恢复。`AuthRejected`(凭据被拒)不在重试之列——重试也不会变好,只会拖慢报错。 +const CONNECT_MAX_ATTEMPTS: usize = 3; +const CONNECT_RETRY_BACKOFF: Duration = Duration::from_millis(250); + #[derive(Clone, Debug)] pub struct VolcengineCredentials { pub app_id: String, @@ -131,34 +140,7 @@ impl VolcengineStreamingASR { } let connect_id = Uuid::new_v4().to_string(); - let mut request = ENDPOINT - .into_client_request() - .map_err(|e| VolcengineASRError::ConnectionFailed(e.to_string()))?; - let headers = request.headers_mut(); - headers.insert( - "X-Api-App-Key", - HeaderValue::from_str(&self.credentials.app_id) - .map_err(|e| VolcengineASRError::ConnectionFailed(e.to_string()))?, - ); - headers.insert( - "X-Api-Access-Key", - HeaderValue::from_str(&self.credentials.access_token) - .map_err(|e| VolcengineASRError::ConnectionFailed(e.to_string()))?, - ); - headers.insert( - "X-Api-Resource-Id", - HeaderValue::from_str(&self.credentials.resource_id) - .map_err(|e| VolcengineASRError::ConnectionFailed(e.to_string()))?, - ); - headers.insert( - "X-Api-Connect-Id", - HeaderValue::from_str(&connect_id) - .map_err(|e| VolcengineASRError::ConnectionFailed(e.to_string()))?, - ); - - let (ws, _resp) = connect_async(request) - .await - .map_err(classify_connect_error)?; + let ws = self.connect_with_retry(&connect_id).await?; let (write, read) = ws.split(); let (tx, rx) = oneshot::channel(); @@ -260,6 +242,79 @@ impl VolcengineStreamingASR { Ok(()) } + /// Build the WebSocket handshake request (endpoint + auth headers). Rebuilt + /// per connect attempt because `connect_async` consumes the request and + /// `http::Request` is not `Clone`. + fn build_connect_request( + &self, + connect_id: &str, + ) -> Result + { + let mut request = ENDPOINT + .into_client_request() + .map_err(|e| VolcengineASRError::ConnectionFailed(e.to_string()))?; + let headers = request.headers_mut(); + headers.insert( + "X-Api-App-Key", + HeaderValue::from_str(&self.credentials.app_id) + .map_err(|e| VolcengineASRError::ConnectionFailed(e.to_string()))?, + ); + headers.insert( + "X-Api-Access-Key", + HeaderValue::from_str(&self.credentials.access_token) + .map_err(|e| VolcengineASRError::ConnectionFailed(e.to_string()))?, + ); + headers.insert( + "X-Api-Resource-Id", + HeaderValue::from_str(&self.credentials.resource_id) + .map_err(|e| VolcengineASRError::ConnectionFailed(e.to_string()))?, + ); + headers.insert( + "X-Api-Connect-Id", + HeaderValue::from_str(connect_id) + .map_err(|e| VolcengineASRError::ConnectionFailed(e.to_string()))?, + ); + Ok(request) + } + + /// Connect with a per-attempt timeout and bounded retries so a poor network + /// (hung handshake or a transient blip) doesn't kill the whole dictation. + /// `AuthRejected` short-circuits — bad credentials never heal on retry. + async fn connect_with_retry(&self, connect_id: &str) -> Result { + let mut attempt = 0usize; + loop { + attempt += 1; + let request = self.build_connect_request(connect_id)?; + match tokio::time::timeout(CONNECT_TIMEOUT, connect_async(request)).await { + Ok(Ok((ws, _resp))) => return Ok(ws), + Ok(Err(e)) => { + let classified = classify_connect_error(e); + if matches!(classified, VolcengineASRError::AuthRejected(_)) + || attempt >= CONNECT_MAX_ATTEMPTS + { + return Err(classified); + } + log::warn!( + "[asr] 连接尝试 {attempt}/{CONNECT_MAX_ATTEMPTS} 失败: {classified};重试中" + ); + } + Err(_) => { + if attempt >= CONNECT_MAX_ATTEMPTS { + return Err(VolcengineASRError::ConnectionFailed(format!( + "连接超时({} ms)", + CONNECT_TIMEOUT.as_millis() + ))); + } + log::warn!( + "[asr] 连接尝试 {attempt}/{CONNECT_MAX_ATTEMPTS} 超时({} ms);重试中", + CONNECT_TIMEOUT.as_millis() + ); + } + } + tokio::time::sleep(CONNECT_RETRY_BACKOFF * attempt as u32).await; + } + } + pub async fn send_last_frame(&self) -> Result<(), VolcengineASRError> { // 等所有 fire-and-forget 发送完成。否则末帧(NegativeSequence)可能比尾部 // chunk 先到服务端,被识别为「流已结束」之后再到的 chunk 全部丢弃 = 尾句吞掉。 diff --git a/openless-all/app/src-tauri/src/persistence/credentials.rs b/openless-all/app/src-tauri/src/persistence/credentials.rs index 05718145..4c28e20e 100644 --- a/openless-all/app/src-tauri/src/persistence/credentials.rs +++ b/openless-all/app/src-tauri/src/persistence/credentials.rs @@ -362,13 +362,62 @@ fn read_chunk_manifest(json: &str) -> Option { } } +/// Windows Credential Manager (`CredReadW`) can transiently fail right after +/// login / under contention when we read the manifest entry plus every chunk +/// entry in quick succession. A single failed read makes the whole credential +/// set look empty → `load_keyring_credentials` returns `Err` → `load_credentials` +/// falls back to an empty default → Overview shows「火山引擎未配置」even though the +/// secrets are present (the next dictation re-reads and succeeds, which is why the +/// bug is *probabilistic* and the app "实际可以正常使用"). The more chunks a +/// credential set spans, the more reads per load, the higher the odds at least +/// one trips. Retry transient errors a few times with short backoff. +/// +/// macOS / Linux keep the original single-shot behavior on purpose: their read +/// errors are ACL denials that won't heal on retry, and the un-cached error path +/// already retries on the next call — adding sleeps there would only slow the +/// macOS first-launch Keychain authorization flow. +#[cfg(target_os = "windows")] +const KEYRING_READ_RETRY_ATTEMPTS: usize = 4; +#[cfg(target_os = "windows")] +const KEYRING_READ_RETRY_BACKOFF_MS: u64 = 60; + #[cfg(not(target_os = "android"))] fn get_keyring_password(account: &str) -> Result> { - match keyring_entry_for(account)?.get_password() { - Ok(value) => Ok(Some(value)), - Err(keyring::Error::NoEntry) => Ok(None), - Err(e) => { - Err(anyhow!(e)).with_context(|| format!("read system credential vault {account}")) + #[cfg(target_os = "windows")] + { + let mut attempt = 0usize; + loop { + match keyring_entry_for(account)?.get_password() { + Ok(value) => return Ok(Some(value)), + // NoEntry is a definitive "not stored" answer, never a transient + // failure — return immediately so genuinely-unconfigured providers + // don't pay the retry latency. + Err(keyring::Error::NoEntry) => return Ok(None), + Err(e) => { + attempt += 1; + if attempt >= KEYRING_READ_RETRY_ATTEMPTS { + return Err(anyhow!(e)) + .with_context(|| format!("read system credential vault {account}")); + } + log::warn!( + "[vault] transient credential read for {account} failed \ + (attempt {attempt}/{KEYRING_READ_RETRY_ATTEMPTS}): {e}; retrying" + ); + std::thread::sleep(std::time::Duration::from_millis( + KEYRING_READ_RETRY_BACKOFF_MS * attempt as u64, + )); + } + } + } + } + #[cfg(not(target_os = "windows"))] + { + match keyring_entry_for(account)?.get_password() { + Ok(value) => Ok(Some(value)), + Err(keyring::Error::NoEntry) => Ok(None), + Err(e) => { + Err(anyhow!(e)).with_context(|| format!("read system credential vault {account}")) + } } } }