From f5f82e7a28450d18d769724987ab6eb4f7e34daa Mon Sep 17 00:00:00 2001 From: sim Date: Sun, 21 Jun 2026 11:08:55 +0800 Subject: [PATCH 1/2] =?UTF-8?q?fix(vault):=20Windows=20=E5=87=AD=E6=8D=AE?= =?UTF-8?q?=E8=AF=BB=E5=8F=96=E7=9E=AC=E6=97=B6=E5=A4=B1=E8=B4=A5=E9=87=8D?= =?UTF-8?q?=E8=AF=95=EF=BC=8C=E4=BF=AE=E6=A6=82=E7=8E=87=E6=80=A7=E3=80=8C?= =?UTF-8?q?=E7=81=AB=E5=B1=B1=E5=BC=95=E6=93=8E=E6=9C=AA=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E3=80=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Windows Credential Manager(CredReadW)在登录后/并发下读 manifest+各 chunk 条目 时可能瞬时失败。任一条目读失败会让整组凭据看起来为空 → load_keyring_credentials 返回 Err → load_credentials 回退到空默认值 → 概览页显示「火山引擎未配置」,但下 一次听写重读成功,所以是「概率性」且「实际可正常使用」。凭据 chunk 越多,单次 load 读取次数越多,命中瞬时失败的概率越高。 修法:仅在 Windows 上对非 NoEntry 的读错误做几次短退避重试(4 次 / 60·n ms)。 NoEntry 是确定的「未存储」,立即返回不付重试延迟。macOS/Linux 保持原单次行为 ——它们的读错误是 ACL 拒绝,重试无益,且未缓存的错误路径下次调用本就会重读, 加 sleep 只会拖慢 macOS 首次 Keychain 授权流程。纯后端、cfg 隔离,零 PC-macOS 影响。 --- .../src-tauri/src/persistence/credentials.rs | 59 +++++++++++++++++-- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/openless-all/app/src-tauri/src/persistence/credentials.rs b/openless-all/app/src-tauri/src/persistence/credentials.rs index 05718145..4c28e20e 100644 --- a/openless-all/app/src-tauri/src/persistence/credentials.rs +++ b/openless-all/app/src-tauri/src/persistence/credentials.rs @@ -362,13 +362,62 @@ fn read_chunk_manifest(json: &str) -> Option { } } +/// Windows Credential Manager (`CredReadW`) can transiently fail right after +/// login / under contention when we read the manifest entry plus every chunk +/// entry in quick succession. A single failed read makes the whole credential +/// set look empty → `load_keyring_credentials` returns `Err` → `load_credentials` +/// falls back to an empty default → Overview shows「火山引擎未配置」even though the +/// secrets are present (the next dictation re-reads and succeeds, which is why the +/// bug is *probabilistic* and the app "实际可以正常使用"). The more chunks a +/// credential set spans, the more reads per load, the higher the odds at least +/// one trips. Retry transient errors a few times with short backoff. +/// +/// macOS / Linux keep the original single-shot behavior on purpose: their read +/// errors are ACL denials that won't heal on retry, and the un-cached error path +/// already retries on the next call — adding sleeps there would only slow the +/// macOS first-launch Keychain authorization flow. +#[cfg(target_os = "windows")] +const KEYRING_READ_RETRY_ATTEMPTS: usize = 4; +#[cfg(target_os = "windows")] +const KEYRING_READ_RETRY_BACKOFF_MS: u64 = 60; + #[cfg(not(target_os = "android"))] fn get_keyring_password(account: &str) -> Result> { - match keyring_entry_for(account)?.get_password() { - Ok(value) => Ok(Some(value)), - Err(keyring::Error::NoEntry) => Ok(None), - Err(e) => { - Err(anyhow!(e)).with_context(|| format!("read system credential vault {account}")) + #[cfg(target_os = "windows")] + { + let mut attempt = 0usize; + loop { + match keyring_entry_for(account)?.get_password() { + Ok(value) => return Ok(Some(value)), + // NoEntry is a definitive "not stored" answer, never a transient + // failure — return immediately so genuinely-unconfigured providers + // don't pay the retry latency. + Err(keyring::Error::NoEntry) => return Ok(None), + Err(e) => { + attempt += 1; + if attempt >= KEYRING_READ_RETRY_ATTEMPTS { + return Err(anyhow!(e)) + .with_context(|| format!("read system credential vault {account}")); + } + log::warn!( + "[vault] transient credential read for {account} failed \ + (attempt {attempt}/{KEYRING_READ_RETRY_ATTEMPTS}): {e}; retrying" + ); + std::thread::sleep(std::time::Duration::from_millis( + KEYRING_READ_RETRY_BACKOFF_MS * attempt as u64, + )); + } + } + } + } + #[cfg(not(target_os = "windows"))] + { + match keyring_entry_for(account)?.get_password() { + Ok(value) => Ok(Some(value)), + Err(keyring::Error::NoEntry) => Ok(None), + Err(e) => { + Err(anyhow!(e)).with_context(|| format!("read system credential vault {account}")) + } } } } From e164d32198affd7c1e13b9269637064dfa9c854a Mon Sep 17 00:00:00 2001 From: sim Date: Sun, 21 Jun 2026 11:08:55 +0800 Subject: [PATCH 2/2] =?UTF-8?q?fix(asr):=20=E7=81=AB=E5=B1=B1=E5=BC=95?= =?UTF-8?q?=E6=93=8E=E8=BF=9E=E6=8E=A5=E5=A2=9E=E5=8A=A0=E8=B6=85=E6=97=B6?= =?UTF-8?q?=E4=B8=8E=E9=87=8D=E8=AF=95=EF=BC=8C=E4=BF=AE=E5=BC=B1=E7=BD=91?= =?UTF-8?q?=E4=B8=8B=E6=97=A0=E6=B3=95=E8=AF=AD=E9=9F=B3=E8=BE=93=E5=85=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit connect_async 此前既无超时也无重试:弱网下 TLS/WebSocket 握手可能挂到 OS 级 TCP 超时(几十秒),期间用户卡在「Starting」;而协调器全局超时只覆盖 await_final_result,不覆盖 open_session,所以握手挂起完全不受约束。单次网络 抖动(连接被重置/瞬时 DNS 失败)也会直接让整次听写失败。 修法:每次握手包 5s 超时,最多 3 次尝试、250·n ms 退避。AuthRejected(凭据被拒) 短路不重试——重试不会变好只会拖慢报错。把请求构造抽成 build_connect_request (connect_async 消费 request 且 http::Request 非 Clone,需每次重建)。 桌面与安卓共享此路径,改动对两端等效;最坏耗时从「无界(~75s)」降到有界(~15s) 并新增抖动可恢复,对 PC 端只增不减。 --- .../app/src-tauri/src/asr/volcengine.rs | 111 +++++++++++++----- 1 file changed, 83 insertions(+), 28 deletions(-) diff --git a/openless-all/app/src-tauri/src/asr/volcengine.rs b/openless-all/app/src-tauri/src/asr/volcengine.rs index f356710c..cc595600 100644 --- a/openless-all/app/src-tauri/src/asr/volcengine.rs +++ b/openless-all/app/src-tauri/src/asr/volcengine.rs @@ -32,6 +32,15 @@ const BYTES_PER_MS: f64 = 32.0; const HOTWORD_CAP: usize = 80; const FINAL_RESULT_TIMEOUT: Duration = Duration::from_secs(12); +/// 弱网下 TLS/WebSocket 握手可能一直挂到 OS 级 TCP 超时(几十秒),期间用户卡在 +/// 「Starting」无法语音输入。协调器的全局超时只覆盖 `await_final_result`,**不**覆盖 +/// `open_session`,所以这里必须自己给握手设上限:超时即快速失败并重试,而不是冻结。 +const CONNECT_TIMEOUT: Duration = Duration::from_secs(5); +/// 单次网络抖动(连接被重置 / 瞬时 DNS 失败)以前会直接让整次听写失败。重试几次让 +/// 抖动可恢复。`AuthRejected`(凭据被拒)不在重试之列——重试也不会变好,只会拖慢报错。 +const CONNECT_MAX_ATTEMPTS: usize = 3; +const CONNECT_RETRY_BACKOFF: Duration = Duration::from_millis(250); + #[derive(Clone, Debug)] pub struct VolcengineCredentials { pub app_id: String, @@ -131,34 +140,7 @@ impl VolcengineStreamingASR { } let connect_id = Uuid::new_v4().to_string(); - let mut request = ENDPOINT - .into_client_request() - .map_err(|e| VolcengineASRError::ConnectionFailed(e.to_string()))?; - let headers = request.headers_mut(); - headers.insert( - "X-Api-App-Key", - HeaderValue::from_str(&self.credentials.app_id) - .map_err(|e| VolcengineASRError::ConnectionFailed(e.to_string()))?, - ); - headers.insert( - "X-Api-Access-Key", - HeaderValue::from_str(&self.credentials.access_token) - .map_err(|e| VolcengineASRError::ConnectionFailed(e.to_string()))?, - ); - headers.insert( - "X-Api-Resource-Id", - HeaderValue::from_str(&self.credentials.resource_id) - .map_err(|e| VolcengineASRError::ConnectionFailed(e.to_string()))?, - ); - headers.insert( - "X-Api-Connect-Id", - HeaderValue::from_str(&connect_id) - .map_err(|e| VolcengineASRError::ConnectionFailed(e.to_string()))?, - ); - - let (ws, _resp) = connect_async(request) - .await - .map_err(classify_connect_error)?; + let ws = self.connect_with_retry(&connect_id).await?; let (write, read) = ws.split(); let (tx, rx) = oneshot::channel(); @@ -260,6 +242,79 @@ impl VolcengineStreamingASR { Ok(()) } + /// Build the WebSocket handshake request (endpoint + auth headers). Rebuilt + /// per connect attempt because `connect_async` consumes the request and + /// `http::Request` is not `Clone`. + fn build_connect_request( + &self, + connect_id: &str, + ) -> Result + { + let mut request = ENDPOINT + .into_client_request() + .map_err(|e| VolcengineASRError::ConnectionFailed(e.to_string()))?; + let headers = request.headers_mut(); + headers.insert( + "X-Api-App-Key", + HeaderValue::from_str(&self.credentials.app_id) + .map_err(|e| VolcengineASRError::ConnectionFailed(e.to_string()))?, + ); + headers.insert( + "X-Api-Access-Key", + HeaderValue::from_str(&self.credentials.access_token) + .map_err(|e| VolcengineASRError::ConnectionFailed(e.to_string()))?, + ); + headers.insert( + "X-Api-Resource-Id", + HeaderValue::from_str(&self.credentials.resource_id) + .map_err(|e| VolcengineASRError::ConnectionFailed(e.to_string()))?, + ); + headers.insert( + "X-Api-Connect-Id", + HeaderValue::from_str(connect_id) + .map_err(|e| VolcengineASRError::ConnectionFailed(e.to_string()))?, + ); + Ok(request) + } + + /// Connect with a per-attempt timeout and bounded retries so a poor network + /// (hung handshake or a transient blip) doesn't kill the whole dictation. + /// `AuthRejected` short-circuits — bad credentials never heal on retry. + async fn connect_with_retry(&self, connect_id: &str) -> Result { + let mut attempt = 0usize; + loop { + attempt += 1; + let request = self.build_connect_request(connect_id)?; + match tokio::time::timeout(CONNECT_TIMEOUT, connect_async(request)).await { + Ok(Ok((ws, _resp))) => return Ok(ws), + Ok(Err(e)) => { + let classified = classify_connect_error(e); + if matches!(classified, VolcengineASRError::AuthRejected(_)) + || attempt >= CONNECT_MAX_ATTEMPTS + { + return Err(classified); + } + log::warn!( + "[asr] 连接尝试 {attempt}/{CONNECT_MAX_ATTEMPTS} 失败: {classified};重试中" + ); + } + Err(_) => { + if attempt >= CONNECT_MAX_ATTEMPTS { + return Err(VolcengineASRError::ConnectionFailed(format!( + "连接超时({} ms)", + CONNECT_TIMEOUT.as_millis() + ))); + } + log::warn!( + "[asr] 连接尝试 {attempt}/{CONNECT_MAX_ATTEMPTS} 超时({} ms);重试中", + CONNECT_TIMEOUT.as_millis() + ); + } + } + tokio::time::sleep(CONNECT_RETRY_BACKOFF * attempt as u32).await; + } + } + pub async fn send_last_frame(&self) -> Result<(), VolcengineASRError> { // 等所有 fire-and-forget 发送完成。否则末帧(NegativeSequence)可能比尾部 // chunk 先到服务端,被识别为「流已结束」之后再到的 chunk 全部丢弃 = 尾句吞掉。