diff --git a/Diduny/App/AppDelegate+MeetingRecording.swift b/Diduny/App/AppDelegate+MeetingRecording.swift index 3f74645..3ae3486 100644 --- a/Diduny/App/AppDelegate+MeetingRecording.swift +++ b/Diduny/App/AppDelegate+MeetingRecording.swift @@ -346,8 +346,9 @@ extension AppDelegate { // Finalize and disconnect real-time transcription (if active) let hasRealtimeSession = await MainActor.run { appState.liveTranscriptStore != nil } + var didReceiveRealtimeFinalization = true if hasRealtimeSession { - _ = await realtimeTranscriptionService.finalize() + didReceiveRealtimeFinalization = await realtimeTranscriptionService.finalize() await realtimeTranscriptionService.disconnect() meetingRecorderService.onRealtimeAudioData = nil } @@ -370,6 +371,7 @@ extension AppDelegate { var capturedAudioURL: URL? var originalWavURL: URL? let stopTime = Date() + let duration = recordingStartTime.map { stopTime.timeIntervalSince($0) } ?? 0 let recordingId = UUID() // Capture in-progress recording ID before stopRecording() clears it (RLR-M1). let inProgressRecordingId = meetingRecorderService.currentRecordingId @@ -411,12 +413,23 @@ extension AppDelegate { let realtimeText = await MainActor.run { store?.finalTranscriptText ?? "" } let cloudModeEnabled = SettingsStorage.shared.effectiveMeetingRealtimeTranscriptionEnabled + let shouldUseRealtimeText = shouldAcceptRealtimeTranscript( + realtimeText, + duration: duration, + didReceiveFinalization: didReceiveRealtimeFinalization + ) let rawText: String? - if !realtimeText.isEmpty { + if shouldUseRealtimeText { rawText = realtimeText Log.app.info("Using real-time transcript (\(realtimeText.count) chars)") } else if cloudModeEnabled { + if !realtimeText.isEmpty { + Log.app + .warning( + "Ignoring partial real-time transcript (\(realtimeText.count) chars, finalized=\(didReceiveRealtimeFinalization)); falling back to async jobs API" + ) + } Log.app.info("No real-time transcript, falling back to async jobs API...") let audioData = try await loadAudioData(from: compressedURL) Log.app.info("Meeting recording size = \(audioData.count) bytes") @@ -521,7 +534,6 @@ extension AppDelegate { } Log.app.info("stopMeetingRecording: SUCCESS") - let duration = recordingStartTime.map { stopTime.timeIntervalSince($0) } ?? 0 RecordingsLibraryStorage.shared.saveRecording( id: recordingId, audioURL: compressedURL, @@ -596,6 +608,24 @@ extension AppDelegate { Log.app.info("stopMeetingRecording: END") } + private func shouldAcceptRealtimeTranscript( + _ text: String, + duration: TimeInterval, + didReceiveFinalization: Bool + ) -> Bool { + let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmed.isEmpty else { return false } + if didReceiveFinalization { return true } + + // Short recordings often stop before Soniox emits an explicit finished frame. + // For longer meetings, a tiny unfinalized transcript is usually partial and + // should fall back to the async jobs pipeline for a complete result. Measure + // visible content (trimmed) so a whitespace-padded transcript can't masquerade + // as substantial. + guard duration >= 30 else { return true } + return trimmed.count >= 120 + } + // MARK: - Escape Cancel Handler private func setupMeetingEscapeCancelHandler() { diff --git a/Diduny/App/AppDelegate.swift b/Diduny/App/AppDelegate.swift index 54d0395..41b9a4f 100644 --- a/Diduny/App/AppDelegate.swift +++ b/Diduny/App/AppDelegate.swift @@ -50,7 +50,15 @@ private final class SleepRecordingFlushBridge { let recordingId = meetingRecorderService.currentRecordingId if let recordingId { - Task { + // Persist the manifest synchronously before returning: the app can be + // suspended the instant this sleep-flush returns, so a deferred async + // write could be lost and leave recovery reading stale state after + // wake/crash. Block on a detached task (detached → not MainActor-bound, + // so waiting on the main thread can't deadlock the actor) with a short + // timeout so a wedged store can't hang the sleep transition. + let sem = DispatchSemaphore(value: 0) + Task.detached(priority: .userInitiated) { + defer { sem.signal() } do { let store = try InProgressRecordingStore.sharedStore() if var manifest = try await store.readManifest(for: recordingId) { @@ -76,6 +84,10 @@ private final class SleepRecordingFlushBridge { Log.recording.error("[Sleep] Failed to update manifest: \(error.localizedDescription)") } } + if sem.wait(timeout: .now() + 2) == .timedOut { + Log.recording + .error("[Sleep] manifest update timed out (2s) — proceeding without confirmed persist") + } } releaseActivityTokens?() diff --git a/Diduny/Core/Models/Recording.swift b/Diduny/Core/Models/Recording.swift index 1f553b5..59917c2 100644 --- a/Diduny/Core/Models/Recording.swift +++ b/Diduny/Core/Models/Recording.swift @@ -31,9 +31,15 @@ struct Recording: Identifiable, Codable, Equatable { var processedAt: Date? var chapters: [MeetingChapter]? let sourceDevice: RecordingDeviceInfo? - /// Non-nil when this recording was saved via a recovery path rather than a normal stop. - /// Drives the "Recovered" badge in the library and the detail-view notice. - /// Set once at recovery-save time; never cleared. + /// Marks a recording that originated from a recovery path rather than a normal + /// stop; intended to drive the "Recovered" badge in the library and the + /// detail-view notice. Once set it is preserved (never cleared), including + /// across `RecordingsLibraryStorage.replaceStoredAudioFile`. + /// + /// NOTE: no production save path sets this yet — `saveRecording(...)` doesn't + /// accept it and `recoverRecording(from:)` transcribes then discards without + /// creating a library entry. So in practice this is currently always nil. + /// TODO: populate it when the recovery-save-to-library flow is implemented. var recoverySource: RecoverySource? /// Nested to avoid conflict with RecoveryState.RecordingType diff --git a/Diduny/Core/Services/AsyncTranscriptionJobService.swift b/Diduny/Core/Services/AsyncTranscriptionJobService.swift index 472b778..824259d 100644 --- a/Diduny/Core/Services/AsyncTranscriptionJobService.swift +++ b/Diduny/Core/Services/AsyncTranscriptionJobService.swift @@ -6,7 +6,7 @@ final class AsyncTranscriptionJobService { SettingsStorage.shared.proxyBaseURL.trimmingCharacters(in: CharacterSet(charactersIn: "/")) } - private let maxRetries = 3 + private let maxJobWaitSeconds: TimeInterval = 7200 private let maxAudioBytesForSpeechPrecheck = 25 * 1024 * 1024 private let longRunningSessionBodyThresholdBytes = 10 * 1024 * 1024 private let strictSpeechPrecheck = false @@ -167,9 +167,10 @@ final class AsyncTranscriptionJobService { try Task.checkCancellation() let submission = try await submitJob(audioData: audioData, config: config) - var retries = 0 + var sseFailures = 0 + let deadline = Date().addingTimeInterval(maxJobWaitSeconds) - while retries < self.maxRetries { + while Date() < deadline { try Task.checkCancellation() do { let result = try await streamJobResult(jobId: submission.jobId, onUpdate: onUpdate) @@ -177,8 +178,8 @@ final class AsyncTranscriptionJobService { } catch is CancellationError { throw CancellationError() } catch { - retries += 1 - Log.transcription.warning("SSE stream failed (attempt \(retries)/\(self.maxRetries)): \(error)") + sseFailures += 1 + Log.transcription.warning("SSE stream failed (attempt \(sseFailures)): \(error)") // Check if job finished while disconnected let status = try await getJobStatus(jobId: submission.jobId) @@ -188,14 +189,19 @@ final class AsyncTranscriptionJobService { if status.status == "error" { throw TranscriptionError.apiError(status.error ?? "Transcription failed") } + if let parsed = JobStatus(rawValue: status.status) { + onUpdate(parsed) + } - // Still in progress — backoff and retry SSE + // Still in progress. SSE is best-effort; keep polling/retrying until + // the server-side job reaches a terminal state or the long job timeout. try Task.checkCancellation() - try await Task.sleep(nanoseconds: UInt64(retries) * 2_000_000_000) + let delaySeconds = min(Double(max(sseFailures, 1)) * 2, 30) + try await Task.sleep(nanoseconds: UInt64(delaySeconds * 1_000_000_000)) } } - throw TranscriptionError.apiError("Failed to get transcription result after \(self.maxRetries) retries") + throw TranscriptionError.apiError("Timed out waiting for transcription result") } // MARK: - Upload Preparation @@ -405,8 +411,13 @@ final class AsyncTranscriptionJobService { guard let jsonData = data.data(using: .utf8) else { throw TranscriptionError.invalidResponse } - let result = try JSONDecoder().decode(JobTranscriptionResult.self, from: jsonData) - return JobResult(text: result.text) + if let wrapped = try? JSONDecoder().decode(JobStatusResponse.self, from: jsonData), + let result = wrapped.result + { + return JobResult(text: result.text) + } + let direct = try JSONDecoder().decode(JobTranscriptionResult.self, from: jsonData) + return JobResult(text: direct.text) } private func parseErrorMessage(_ data: String) -> String { diff --git a/Diduny/Core/Services/CloudRealtimeService.swift b/Diduny/Core/Services/CloudRealtimeService.swift index c4d6846..e0d7028 100644 --- a/Diduny/Core/Services/CloudRealtimeService.swift +++ b/Diduny/Core/Services/CloudRealtimeService.swift @@ -150,7 +150,17 @@ final class CloudRealtimeService: NSObject, @unchecked Sendable { let configString = String(data: configData, encoding: .utf8) ?? "{}" NSLog("[Cloud RT] Sending config: %@", configString) - try await task.send(.string(configString)) + do { + try await task.send(.string(configString)) + } catch { + // A refused upgrade (e.g. HTTP 402 usage limit) surfaces as the first + // send/receive throwing. Map 402 to a typed usage error so the caller + // shows "limit reached" instead of a generic connection failure. + if let usageError = await usageLimitUpgradeError() { + throw usageError + } + throw error + } NSLog("[Cloud RT] Config sent successfully, WebSocket connected") isConnected = true @@ -417,6 +427,21 @@ final class CloudRealtimeService: NSObject, @unchecked Sendable { // MARK: - Reconnect + /// If the last WS upgrade was refused with HTTP 402, map it to a typed usage + /// error (using the best usage numbers we have) and kick off a refresh so the + /// UI shows accurate figures shortly. Returns nil for any other status. + private func usageLimitUpgradeError() async -> RealtimeTranscriptionError? { + guard (webSocketTask?.response as? HTTPURLResponse)?.statusCode == 402 else { + return nil + } + let usage = await UsageService.shared.cachedUsage + await UsageService.shared.refresh() + return .usageLimitExceeded( + usedHours: usage?.usedHours ?? 0, + limitHours: usage?.limitHours ?? 5 + ) + } + /// Called when the receive loop exits due to an error or a server-initiated close. /// /// ADR-0004 edge cases handled here: @@ -428,6 +453,26 @@ final class CloudRealtimeService: NSObject, @unchecked Sendable { guard isConnected else { return } isConnected = false + // A refused WS upgrade (HTTP 402 usage limit) lands here via the receive + // loop with no close code. Reconnecting is futile — the server will keep + // refusing — and would surface a generic "Connection lost" instead of the + // real reason. Detect it synchronously to stop the reconnect, then surface + // the typed usage error with the best numbers we have. + if (webSocketTask?.response as? HTTPURLResponse)?.statusCode == 402 { + Log.transcription.warning("Cloud RT: WS upgrade returned 402 — usage limit, not reconnecting") + Task { [weak self] in + guard let self else { return } + let usage = await UsageService.shared.cachedUsage + await UsageService.shared.refresh() + self.onError?(RealtimeTranscriptionError.usageLimitExceeded( + usedHours: usage?.usedHours ?? 0, + limitHours: usage?.limitHours ?? 5 + )) + self.onConnectionStatusChanged?(.failed("Cloud usage limit reached")) + } + return + } + // 1001 Going Away — proxy-initiated graceful close (8h cap or rolling restart). // Per ADR-0004: save partial transcript, show non-error UI, do NOT reconnect. if closeCode?.rawValue == 1001 { diff --git a/Diduny/Core/Services/PushToTalkService.swift b/Diduny/Core/Services/PushToTalkService.swift index a9cdc07..26e0365 100644 --- a/Diduny/Core/Services/PushToTalkService.swift +++ b/Diduny/Core/Services/PushToTalkService.swift @@ -256,28 +256,45 @@ final class PushToTalkService: PushToTalkServiceProtocol { return (clamped * 10).rounded() / 10 } + // Device-dependent modifier masks (NX_DEVICE*KEYMASK). NSEvent.ModifierFlags + // family bits (.shift/.option/.command/.control) don't tell left from right, + // so a side-specific key can't detect its own key-up while the opposite-side + // key is still held. These raw masks distinguish the physical side. + private enum DeviceModifierMask { + static let leftControl: UInt = 0x0000_0001 + static let leftShift: UInt = 0x0000_0002 + static let rightShift: UInt = 0x0000_0004 + static let leftCommand: UInt = 0x0000_0008 + static let rightCommand: UInt = 0x0000_0010 + static let leftOption: UInt = 0x0000_0020 + static let rightOption: UInt = 0x0000_0040 + static let rightControl: UInt = 0x0000_2000 + } + private func isKeyCurrentlyPressed(keyCode: UInt16, flags: NSEvent.ModifierFlags) -> Bool { + func has(_ mask: UInt) -> Bool { flags.rawValue & mask != 0 } switch selectedKey { case .none: - false + return false case .capsLock: - keyCode == 57 && flags.contains(.capsLock) + // Caps Lock has no left/right variant; the family flag is correct here. + return keyCode == 57 && flags.contains(.capsLock) case .leftShift: - keyCode == 56 && flags.contains(.shift) + return keyCode == 56 && has(DeviceModifierMask.leftShift) case .leftOption: - keyCode == 58 && flags.contains(.option) + return keyCode == 58 && has(DeviceModifierMask.leftOption) case .leftCommand: - keyCode == 55 && flags.contains(.command) + return keyCode == 55 && has(DeviceModifierMask.leftCommand) case .leftControl: - keyCode == 59 && flags.contains(.control) + return keyCode == 59 && has(DeviceModifierMask.leftControl) case .rightShift: - keyCode == 60 && flags.contains(.shift) + return keyCode == 60 && has(DeviceModifierMask.rightShift) case .rightOption: - keyCode == 61 && flags.contains(.option) + return keyCode == 61 && has(DeviceModifierMask.rightOption) case .rightCommand: - keyCode == 54 && flags.contains(.command) + return keyCode == 54 && has(DeviceModifierMask.rightCommand) case .rightControl: - keyCode == 62 && flags.contains(.control) + return keyCode == 62 && has(DeviceModifierMask.rightControl) } } diff --git a/DidunyTests/SleepFlushCoordinatorTests.swift b/DidunyTests/SleepFlushCoordinatorTests.swift index a8958e3..6069ac6 100644 --- a/DidunyTests/SleepFlushCoordinatorTests.swift +++ b/DidunyTests/SleepFlushCoordinatorTests.swift @@ -1,3 +1,4 @@ +import AppKit import XCTest @testable import Diduny