Skip to content
36 changes: 33 additions & 3 deletions Diduny/App/AppDelegate+MeetingRecording.swift
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,9 @@ extension AppDelegate {

// Finalize and disconnect real-time transcription (if active)
let hasRealtimeSession = await MainActor.run { appState.liveTranscriptStore != nil }
var didReceiveRealtimeFinalization = true
if hasRealtimeSession {
_ = await realtimeTranscriptionService.finalize()
didReceiveRealtimeFinalization = await realtimeTranscriptionService.finalize()
await realtimeTranscriptionService.disconnect()
meetingRecorderService.onRealtimeAudioData = nil
}
Expand All @@ -370,6 +371,7 @@ extension AppDelegate {
var capturedAudioURL: URL?
var originalWavURL: URL?
let stopTime = Date()
let duration = recordingStartTime.map { stopTime.timeIntervalSince($0) } ?? 0
let recordingId = UUID()
// Capture in-progress recording ID before stopRecording() clears it (RLR-M1).
let inProgressRecordingId = meetingRecorderService.currentRecordingId
Expand Down Expand Up @@ -411,12 +413,23 @@ extension AppDelegate {

let realtimeText = await MainActor.run { store?.finalTranscriptText ?? "" }
let cloudModeEnabled = SettingsStorage.shared.effectiveMeetingRealtimeTranscriptionEnabled
let shouldUseRealtimeText = shouldAcceptRealtimeTranscript(
realtimeText,
duration: duration,
didReceiveFinalization: didReceiveRealtimeFinalization
)

let rawText: String?
if !realtimeText.isEmpty {
if shouldUseRealtimeText {
rawText = realtimeText
Log.app.info("Using real-time transcript (\(realtimeText.count) chars)")
} else if cloudModeEnabled {
if !realtimeText.isEmpty {
Log.app
.warning(
"Ignoring partial real-time transcript (\(realtimeText.count) chars, finalized=\(didReceiveRealtimeFinalization)); falling back to async jobs API"
)
}
Log.app.info("No real-time transcript, falling back to async jobs API...")
let audioData = try await loadAudioData(from: compressedURL)
Log.app.info("Meeting recording size = \(audioData.count) bytes")
Expand Down Expand Up @@ -521,7 +534,6 @@ extension AppDelegate {
}
Log.app.info("stopMeetingRecording: SUCCESS")

let duration = recordingStartTime.map { stopTime.timeIntervalSince($0) } ?? 0
RecordingsLibraryStorage.shared.saveRecording(
id: recordingId,
audioURL: compressedURL,
Expand Down Expand Up @@ -596,6 +608,24 @@ extension AppDelegate {
Log.app.info("stopMeetingRecording: END")
}

private func shouldAcceptRealtimeTranscript(
_ text: String,
duration: TimeInterval,
didReceiveFinalization: Bool
) -> Bool {
let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines)
guard !trimmed.isEmpty else { return false }
if didReceiveFinalization { return true }

// Short recordings often stop before Soniox emits an explicit finished frame.
// For longer meetings, a tiny unfinalized transcript is usually partial and
// should fall back to the async jobs pipeline for a complete result. Measure
// visible content (trimmed) so a whitespace-padded transcript can't masquerade
// as substantial.
guard duration >= 30 else { return true }
return trimmed.count >= 120
}

// MARK: - Escape Cancel Handler

private func setupMeetingEscapeCancelHandler() {
Expand Down
14 changes: 13 additions & 1 deletion Diduny/App/AppDelegate.swift
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,15 @@ private final class SleepRecordingFlushBridge {

let recordingId = meetingRecorderService.currentRecordingId
if let recordingId {
Task {
// Persist the manifest synchronously before returning: the app can be
// suspended the instant this sleep-flush returns, so a deferred async
// write could be lost and leave recovery reading stale state after
// wake/crash. Block on a detached task (detached → not MainActor-bound,
// so waiting on the main thread can't deadlock the actor) with a short
// timeout so a wedged store can't hang the sleep transition.
let sem = DispatchSemaphore(value: 0)
Task.detached(priority: .userInitiated) {
defer { sem.signal() }
Comment on lines +53 to +61

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | 🏗️ Heavy lift

Keep the sleep-flush path within the coordinator’s timing budget.

SleepFlushCoordinator.flushCurrentChunk is documented to finish on the power-management thread within ~250 ms, but this path can now block for up to 2 seconds on actor/file I/O. That can stall system sleep and still leaves you without a confirmed manifest write if the timeout fires. Please move this persistence onto a truly bounded synchronous path, or at minimum cap the wait to the coordinator budget and treat timeout as a failed flush.

Also applies to: 87-90

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@Diduny/App/AppDelegate.swift` around lines 53 - 61, The sleep-flush path is
blocking for up to 2s via the DispatchSemaphore wait around a Task.detached;
change this so persistence is bounded to the coordinator budget: either perform
the manifest write synchronously on the current thread (remove Task.detached +
semaphore) or, if keeping the detached task, cap the semaphore wait to the
SleepFlushCoordinator budget (~250ms) and treat a timeout as a flush failure
(log/report/return failure) rather than proceeding as if the write succeeded;
make the same change for the other instance at the 87-90 region, ensure
sem.signal() runs on all code paths, and update any callers of
SleepFlushCoordinator.flushCurrentChunk to handle a failed/timed-out flush
result.

do {
let store = try InProgressRecordingStore.sharedStore()
if var manifest = try await store.readManifest(for: recordingId) {
Expand All @@ -76,6 +84,10 @@ private final class SleepRecordingFlushBridge {
Log.recording.error("[Sleep] Failed to update manifest: \(error.localizedDescription)")
}
}
if sem.wait(timeout: .now() + 2) == .timedOut {
Log.recording
.error("[Sleep] manifest update timed out (2s) — proceeding without confirmed persist")
}
}

releaseActivityTokens?()
Expand Down
12 changes: 9 additions & 3 deletions Diduny/Core/Models/Recording.swift
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,15 @@ struct Recording: Identifiable, Codable, Equatable {
var processedAt: Date?
var chapters: [MeetingChapter]?
let sourceDevice: RecordingDeviceInfo?
/// Non-nil when this recording was saved via a recovery path rather than a normal stop.
/// Drives the "Recovered" badge in the library and the detail-view notice.
/// Set once at recovery-save time; never cleared.
/// Marks a recording that originated from a recovery path rather than a normal
/// stop; intended to drive the "Recovered" badge in the library and the
/// detail-view notice. Once set it is preserved (never cleared), including
/// across `RecordingsLibraryStorage.replaceStoredAudioFile`.
///
/// NOTE: no production save path sets this yet — `saveRecording(...)` doesn't
/// accept it and `recoverRecording(from:)` transcribes then discards without
/// creating a library entry. So in practice this is currently always nil.
/// TODO: populate it when the recovery-save-to-library flow is implemented.
var recoverySource: RecoverySource?

/// Nested to avoid conflict with RecoveryState.RecordingType
Expand Down
31 changes: 21 additions & 10 deletions Diduny/Core/Services/AsyncTranscriptionJobService.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ final class AsyncTranscriptionJobService {
SettingsStorage.shared.proxyBaseURL.trimmingCharacters(in: CharacterSet(charactersIn: "/"))
}

private let maxRetries = 3
private let maxJobWaitSeconds: TimeInterval = 7200
private let maxAudioBytesForSpeechPrecheck = 25 * 1024 * 1024
private let longRunningSessionBodyThresholdBytes = 10 * 1024 * 1024
private let strictSpeechPrecheck = false
Expand Down Expand Up @@ -167,18 +167,19 @@ final class AsyncTranscriptionJobService {
try Task.checkCancellation()
let submission = try await submitJob(audioData: audioData, config: config)

var retries = 0
var sseFailures = 0
let deadline = Date().addingTimeInterval(maxJobWaitSeconds)

while retries < self.maxRetries {
while Date() < deadline {
try Task.checkCancellation()
do {
let result = try await streamJobResult(jobId: submission.jobId, onUpdate: onUpdate)
return result.text
} catch is CancellationError {
throw CancellationError()
} catch {
retries += 1
Log.transcription.warning("SSE stream failed (attempt \(retries)/\(self.maxRetries)): \(error)")
sseFailures += 1
Log.transcription.warning("SSE stream failed (attempt \(sseFailures)): \(error)")

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Use NSLog with [Transcription] prefix for this cloud API log

This changed line uses Log.transcription... instead of the required Swift logging convention from the repo guideline.

As per coding guidelines, "**/*.swift: Use NSLog() for logging with prefixes: [Diduny] for AppDelegate flow, [Transcription] for cloud API calls, [AppState] for state changes".

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@Diduny/Core/Services/AsyncTranscriptionJobService.swift` at line 182, Replace
the Log.transcription.warning call in AsyncTranscriptionJobService (the SSE
stream failure handling) with an NSLog call that uses the "[Transcription]"
prefix and includes the error details; specifically, change the logging
expression that currently reads Log.transcription.warning("SSE stream failed
(attempt \(sseFailures)): \(error)") to an NSLog invocation that starts with
"[Transcription]" and formats the same message including sseFailures and the
error for consistent repo logging conventions.

Source: Coding guidelines


// Check if job finished while disconnected
let status = try await getJobStatus(jobId: submission.jobId)
Expand All @@ -188,14 +189,19 @@ final class AsyncTranscriptionJobService {
if status.status == "error" {
throw TranscriptionError.apiError(status.error ?? "Transcription failed")
}
if let parsed = JobStatus(rawValue: status.status) {
onUpdate(parsed)
}

// Still in progress — backoff and retry SSE
// Still in progress. SSE is best-effort; keep polling/retrying until
// the server-side job reaches a terminal state or the long job timeout.
try Task.checkCancellation()
try await Task.sleep(nanoseconds: UInt64(retries) * 2_000_000_000)
let delaySeconds = min(Double(max(sseFailures, 1)) * 2, 30)
try await Task.sleep(nanoseconds: UInt64(delaySeconds * 1_000_000_000))
}
}

throw TranscriptionError.apiError("Failed to get transcription result after \(self.maxRetries) retries")
throw TranscriptionError.apiError("Timed out waiting for transcription result")
}

// MARK: - Upload Preparation
Expand Down Expand Up @@ -405,8 +411,13 @@ final class AsyncTranscriptionJobService {
guard let jsonData = data.data(using: .utf8) else {
throw TranscriptionError.invalidResponse
}
let result = try JSONDecoder().decode(JobTranscriptionResult.self, from: jsonData)
return JobResult(text: result.text)
if let wrapped = try? JSONDecoder().decode(JobStatusResponse.self, from: jsonData),
let result = wrapped.result
{
return JobResult(text: result.text)
}
let direct = try JSONDecoder().decode(JobTranscriptionResult.self, from: jsonData)
return JobResult(text: direct.text)
}

private func parseErrorMessage(_ data: String) -> String {
Expand Down
47 changes: 46 additions & 1 deletion Diduny/Core/Services/CloudRealtimeService.swift
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,17 @@ final class CloudRealtimeService: NSObject, @unchecked Sendable {
let configString = String(data: configData, encoding: .utf8) ?? "{}"

NSLog("[Cloud RT] Sending config: %@", configString)
try await task.send(.string(configString))
do {
try await task.send(.string(configString))
} catch {
// A refused upgrade (e.g. HTTP 402 usage limit) surfaces as the first
// send/receive throwing. Map 402 to a typed usage error so the caller
// shows "limit reached" instead of a generic connection failure.
if let usageError = await usageLimitUpgradeError() {
throw usageError
}
throw error
}
NSLog("[Cloud RT] Config sent successfully, WebSocket connected")

isConnected = true
Expand Down Expand Up @@ -417,6 +427,21 @@ final class CloudRealtimeService: NSObject, @unchecked Sendable {

// MARK: - Reconnect

/// If the last WS upgrade was refused with HTTP 402, map it to a typed usage
/// error (using the best usage numbers we have) and kick off a refresh so the
/// UI shows accurate figures shortly. Returns nil for any other status.
private func usageLimitUpgradeError() async -> RealtimeTranscriptionError? {
guard (webSocketTask?.response as? HTTPURLResponse)?.statusCode == 402 else {
return nil
}
let usage = await UsageService.shared.cachedUsage
await UsageService.shared.refresh()
return .usageLimitExceeded(
usedHours: usage?.usedHours ?? 0,
limitHours: usage?.limitHours ?? 5
)
Comment on lines +433 to +442

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Build the usage-limit error from the refreshed cache.

Both 402 paths snapshot cachedUsage before refresh(), then format usageLimitExceeded from that stale snapshot. The connect path bubbles error.localizedDescription straight into UI state, so a real quota hit can still show fallback numbers like 0.0h / 5h even when the refresh succeeded. Refresh first, then read the updated cache, and only fall back to the pre-refresh snapshot if the refresh did not populate it.

Also applies to: 465-470

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@Diduny/Core/Services/CloudRealtimeService.swift` around lines 433 - 442, The
code reads UsageService.shared.cachedUsage into `usage` before calling await
UsageService.shared.refresh(), so the created RealtimeTranscriptionError uses a
stale snapshot; change the order in usageLimitUpgradeError() to call await
UsageService.shared.refresh() first, then read UsageService.shared.cachedUsage
(falling back to the original snapshot only if the refreshed cache is nil), and
apply the same change in the connect path that constructs the usageLimitExceeded
error (the connect method) so the UI gets the refreshed quota values rather than
the pre-refresh snapshot.

}

/// Called when the receive loop exits due to an error or a server-initiated close.
///
/// ADR-0004 edge cases handled here:
Expand All @@ -428,6 +453,26 @@ final class CloudRealtimeService: NSObject, @unchecked Sendable {
guard isConnected else { return }
isConnected = false

// A refused WS upgrade (HTTP 402 usage limit) lands here via the receive
// loop with no close code. Reconnecting is futile — the server will keep
// refusing — and would surface a generic "Connection lost" instead of the
// real reason. Detect it synchronously to stop the reconnect, then surface
// the typed usage error with the best numbers we have.
if (webSocketTask?.response as? HTTPURLResponse)?.statusCode == 402 {
Log.transcription.warning("Cloud RT: WS upgrade returned 402 — usage limit, not reconnecting")
Task { [weak self] in
guard let self else { return }
let usage = await UsageService.shared.cachedUsage
await UsageService.shared.refresh()
self.onError?(RealtimeTranscriptionError.usageLimitExceeded(
usedHours: usage?.usedHours ?? 0,
limitHours: usage?.limitHours ?? 5
))
self.onConnectionStatusChanged?(.failed("Cloud usage limit reached"))
}
return
}
Comment on lines +456 to +474

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Tear down the failed socket before returning on 402.

This branch returns before either cleanup path runs. Since the ping loop is already started earlier, the dead webSocketTask/urlSession stay retained and pingTask keeps firing until someone manually calls disconnect(). Cancel/reset the live socket state here before returning.

Suggested fix
         if (webSocketTask?.response as? HTTPURLResponse)?.statusCode == 402 {
+            pingTask?.cancel()
+            pingTask = nil
+            receiveTask?.cancel()
+            receiveTask = nil
+            webSocketTask?.cancel(with: .normalClosure, reason: nil)
+            webSocketTask = nil
+            urlSession?.invalidateAndCancel()
+            urlSession = nil
             Log.transcription.warning("Cloud RT: WS upgrade returned 402 — usage limit, not reconnecting")
             Task { [weak self] in
                 guard let self else { return }
                 let usage = await UsageService.shared.cachedUsage
                 await UsageService.shared.refresh()
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
// A refused WS upgrade (HTTP 402 usage limit) lands here via the receive
// loop with no close code. Reconnecting is futile — the server will keep
// refusing — and would surface a generic "Connection lost" instead of the
// real reason. Detect it synchronously to stop the reconnect, then surface
// the typed usage error with the best numbers we have.
if (webSocketTask?.response as? HTTPURLResponse)?.statusCode == 402 {
Log.transcription.warning("Cloud RT: WS upgrade returned 402 — usage limit, not reconnecting")
Task { [weak self] in
guard let self else { return }
let usage = await UsageService.shared.cachedUsage
await UsageService.shared.refresh()
self.onError?(RealtimeTranscriptionError.usageLimitExceeded(
usedHours: usage?.usedHours ?? 0,
limitHours: usage?.limitHours ?? 5
))
self.onConnectionStatusChanged?(.failed("Cloud usage limit reached"))
}
return
}
// A refused WS upgrade (HTTP 402 usage limit) lands here via the receive
// loop with no close code. Reconnecting is futile — the server will keep
// refusing — and would surface a generic "Connection lost" instead of the
// real reason. Detect it synchronously to stop the reconnect, then surface
// the typed usage error with the best numbers we have.
if (webSocketTask?.response as? HTTPURLResponse)?.statusCode == 402 {
pingTask?.cancel()
pingTask = nil
receiveTask?.cancel()
receiveTask = nil
webSocketTask?.cancel(with: .normalClosure, reason: nil)
webSocketTask = nil
urlSession?.invalidateAndCancel()
urlSession = nil
Log.transcription.warning("Cloud RT: WS upgrade returned 402 — usage limit, not reconnecting")
Task { [weak self] in
guard let self else { return }
let usage = await UsageService.shared.cachedUsage
await UsageService.shared.refresh()
self.onError?(RealtimeTranscriptionError.usageLimitExceeded(
usedHours: usage?.usedHours ?? 0,
limitHours: usage?.limitHours ?? 5
))
self.onConnectionStatusChanged?(.failed("Cloud usage limit reached"))
}
return
}
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@Diduny/Core/Services/CloudRealtimeService.swift` around lines 456 - 474, The
402 branch exits early without tearing down the live socket, leaving
webSocketTask/urlSession retained and pingTask running; before returning, cancel
and cleanup the socket state (cancel webSocketTask, invalidate or finish the
urlSession if owned, clear/await/ cancel pingTask) and reset any
connection-related properties so the normal cleanup paths aren't required;
perform this teardown inside the same Task block (or synchronously before it)
that calls UsageService and before calling self.onError? /
self.onConnectionStatusChanged? so that webSocketTask, urlSession and pingTask
are stopped (refer to webSocketTask, urlSession, pingTask, disconnect()).


// 1001 Going Away — proxy-initiated graceful close (8h cap or rolling restart).
// Per ADR-0004: save partial transcript, show non-error UI, do NOT reconnect.
if closeCode?.rawValue == 1001 {
Expand Down
37 changes: 27 additions & 10 deletions Diduny/Core/Services/PushToTalkService.swift
Original file line number Diff line number Diff line change
Expand Up @@ -256,28 +256,45 @@ final class PushToTalkService: PushToTalkServiceProtocol {
return (clamped * 10).rounded() / 10
}

// Device-dependent modifier masks (NX_DEVICE*KEYMASK). NSEvent.ModifierFlags
// family bits (.shift/.option/.command/.control) don't tell left from right,
// so a side-specific key can't detect its own key-up while the opposite-side
// key is still held. These raw masks distinguish the physical side.
private enum DeviceModifierMask {
static let leftControl: UInt = 0x0000_0001
static let leftShift: UInt = 0x0000_0002
static let rightShift: UInt = 0x0000_0004
static let leftCommand: UInt = 0x0000_0008
static let rightCommand: UInt = 0x0000_0010
static let leftOption: UInt = 0x0000_0020
static let rightOption: UInt = 0x0000_0040
static let rightControl: UInt = 0x0000_2000
}

private func isKeyCurrentlyPressed(keyCode: UInt16, flags: NSEvent.ModifierFlags) -> Bool {
func has(_ mask: UInt) -> Bool { flags.rawValue & mask != 0 }
switch selectedKey {
case .none:
false
return false
case .capsLock:
keyCode == 57 && flags.contains(.capsLock)
// Caps Lock has no left/right variant; the family flag is correct here.
return keyCode == 57 && flags.contains(.capsLock)
case .leftShift:
keyCode == 56 && flags.contains(.shift)
return keyCode == 56 && has(DeviceModifierMask.leftShift)
case .leftOption:
keyCode == 58 && flags.contains(.option)
return keyCode == 58 && has(DeviceModifierMask.leftOption)
case .leftCommand:
keyCode == 55 && flags.contains(.command)
return keyCode == 55 && has(DeviceModifierMask.leftCommand)
case .leftControl:
keyCode == 59 && flags.contains(.control)
return keyCode == 59 && has(DeviceModifierMask.leftControl)
case .rightShift:
keyCode == 60 && flags.contains(.shift)
return keyCode == 60 && has(DeviceModifierMask.rightShift)
case .rightOption:
keyCode == 61 && flags.contains(.option)
return keyCode == 61 && has(DeviceModifierMask.rightOption)
case .rightCommand:
keyCode == 54 && flags.contains(.command)
return keyCode == 54 && has(DeviceModifierMask.rightCommand)
case .rightControl:
keyCode == 62 && flags.contains(.control)
return keyCode == 62 && has(DeviceModifierMask.rightControl)
}
}

Expand Down
1 change: 1 addition & 0 deletions DidunyTests/SleepFlushCoordinatorTests.swift
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import AppKit
import XCTest
@testable import Diduny

Expand Down
Loading