From 3064b60538b2b5e476361dfe82e46fd7f5dce934 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 7 Mar 2026 01:04:01 +0000 Subject: [PATCH 1/2] Enhance iOS app with language selection, auth, subscription, and LLM support - Add language selection view and integrate into onboarding flow and settings - Add account management section (sign in/out, usage display, upgrade to Pro) - Add subscription tracking with word count limits and upgrade prompts - Enhance recording flow with LLM post-processing via transcribeAndFormat - Update keyboard extension with dictionary, shortcut, and language support - Add iOS URL opening support to shared AuthManager and SubscriptionManager - Make LanguageManager init and selectedLanguages public for cross-module use - Read app version from bundle instead of hardcoding https://claude.ai/code/session_014WnimLYNLwfqYsqZn71BSU --- Whishpermate/WhisperMateIOS/ContentView.swift | 119 ++++++++++++++++-- .../LanguageSelectionView.swift | 36 ++++++ .../WhisperMateIOS/OnboardingView.swift | 75 ++++++++++- .../WhisperMateIOS/RecordingSheetView.swift | 48 ++++++- .../WhisperMateIOS/WhisperMateApp.swift | 4 + .../KeyboardViewController.swift | 37 +++++- .../WhisperMateShared/Models/Language.swift | 4 +- .../Services/AuthManager.swift | 11 +- .../Services/SubscriptionManager.swift | 11 +- 9 files changed, 320 insertions(+), 25 deletions(-) create mode 100644 Whishpermate/WhisperMateIOS/LanguageSelectionView.swift diff --git a/Whishpermate/WhisperMateIOS/ContentView.swift b/Whishpermate/WhisperMateIOS/ContentView.swift index 920ff707..97e6f3b1 100644 --- a/Whishpermate/WhisperMateIOS/ContentView.swift +++ b/Whishpermate/WhisperMateIOS/ContentView.swift @@ -7,6 +7,9 @@ struct ContentView: View { @StateObject private var dictionaryManager = DictionaryManager.shared @StateObject private var toneStyleManager = ToneStyleManager.shared @StateObject private var shortcutManager = ShortcutManager.shared + @StateObject private var languageManager = LanguageManager() + @EnvironmentObject var authManager: AuthManager + @EnvironmentObject var subscriptionManager: SubscriptionManager @State private var selectedTab: Int = 0 @State private var showRecordingSheet = false @State private var selectedRecording: Recording? @@ -42,10 +45,10 @@ struct ContentView: View { } .navigationTitle("WhisperMate") .sheet(isPresented: $showRecordingSheet) { - RecordingSheetView(historyManager: historyManager, dictionaryManager: dictionaryManager, toneStyleManager: toneStyleManager, shortcutManager: shortcutManager) + RecordingSheetView(historyManager: historyManager, dictionaryManager: dictionaryManager, toneStyleManager: toneStyleManager, shortcutManager: shortcutManager, languageManager: languageManager) } .sheet(item: $selectedRecording) { recording in - RecordingSheetView(historyManager: historyManager, dictionaryManager: dictionaryManager, toneStyleManager: toneStyleManager, shortcutManager: shortcutManager, recording: recording) + RecordingSheetView(historyManager: historyManager, dictionaryManager: dictionaryManager, toneStyleManager: toneStyleManager, shortcutManager: shortcutManager, languageManager: languageManager, recording: recording) } } .navigationViewStyle(.stack) @@ -110,7 +113,7 @@ struct ContentView: View { } } .sheet(isPresented: $showRecordingSheet) { - RecordingSheetView(historyManager: historyManager, dictionaryManager: dictionaryManager, toneStyleManager: toneStyleManager, shortcutManager: shortcutManager) + RecordingSheetView(historyManager: historyManager, dictionaryManager: dictionaryManager, toneStyleManager: toneStyleManager, shortcutManager: shortcutManager, languageManager: languageManager) } } @@ -378,7 +381,7 @@ struct ContentView: View { } .navigationTitle("History") .sheet(item: $selectedRecording) { recording in - RecordingSheetView(historyManager: historyManager, dictionaryManager: dictionaryManager, toneStyleManager: toneStyleManager, shortcutManager: shortcutManager, recording: recording) + RecordingSheetView(historyManager: historyManager, dictionaryManager: dictionaryManager, toneStyleManager: toneStyleManager, shortcutManager: shortcutManager, languageManager: languageManager, recording: recording) } } .navigationViewStyle(StackNavigationViewStyle()) @@ -389,6 +392,84 @@ struct ContentView: View { private var settingsView: some View { NavigationView { Form { + // Account Section + Section("Account") { + if authManager.isAuthenticated, let user = authManager.currentUser { + HStack { + Image(systemName: "person.circle.fill") + .font(.title2) + .foregroundColor(.blue) + VStack(alignment: .leading, spacing: 2) { + Text(user.email) + .font(.body) + Text(user.subscriptionTier.displayName) + .font(.caption) + .foregroundColor(.secondary) + } + } + + // Usage info + let usage = subscriptionManager.getUsageStatus() + if !usage.isPro { + VStack(alignment: .leading, spacing: 8) { + HStack { + Text("Words used") + .font(.subheadline) + Spacer() + Text("\(usage.used) / \(usage.limit)") + .font(.subheadline) + .foregroundColor(.secondary) + } + ProgressView(value: min(usage.percentage, 1.0)) + .tint(usage.percentage >= 0.9 ? .red : .blue) + } + + Button(action: { + subscriptionManager.openUpgrade() + }) { + Label("Upgrade to Pro", systemImage: "star.fill") + .foregroundColor(.orange) + } + } + + Button("Sign Out", role: .destructive) { + Task { + await authManager.logout() + } + } + } else { + Button(action: { + authManager.openSignUp() + }) { + HStack { + Image(systemName: "person.circle") + .font(.title2) + .foregroundColor(.blue) + VStack(alignment: .leading, spacing: 2) { + Text("Sign In / Create Account") + .font(.body) + Text("Get \(UsageLimits.freeMonthlyWordLimit) free words/month") + .font(.caption) + .foregroundColor(.secondary) + } + } + } + } + } + + // Language Section + Section("Language") { + NavigationLink(destination: LanguageSelectionView(languageManager: languageManager)) { + HStack { + Label("Transcription Language", systemImage: "globe") + Spacer() + Text(languageDisplayText) + .foregroundColor(.secondary) + .lineLimit(1) + } + } + } + Section("Permissions") { Button(action: openAppSettings) { HStack { @@ -409,21 +490,21 @@ struct ContentView: View { } } + Section("Transcription") { + NavigationLink(destination: TranscriptionSettingsView(dictionaryManager: dictionaryManager, toneStyleManager: toneStyleManager, shortcutManager: shortcutManager)) { + Label("Dictionary & Shortcuts", systemImage: "text.badge.checkmark") + } + } + Section("About") { HStack { Text("Version") Spacer() - Text("0.0.20") + Text(appVersion) .foregroundColor(.secondary) } } - Section("Transcription") { - NavigationLink(destination: TranscriptionSettingsView(dictionaryManager: dictionaryManager, toneStyleManager: toneStyleManager, shortcutManager: shortcutManager)) { - Label("Transcription Settings", systemImage: "text.badge.checkmark") - } - } - Section("Data") { Button("Clear All History", role: .destructive) { historyManager.clearAll() @@ -436,6 +517,20 @@ struct ContentView: View { .navigationViewStyle(StackNavigationViewStyle()) } + // MARK: - Computed Properties + + private var appVersion: String { + Bundle.main.infoDictionary?["CFBundleShortVersionString"] as? String ?? "0.0.1" + } + + private var languageDisplayText: String { + if languageManager.selectedLanguages.contains(.auto) { + return "Auto-detect" + } + let names = languageManager.selectedLanguages.map { $0.displayName } + return names.sorted().joined(separator: ", ") + } + // MARK: - Permission Helpers private func checkMicrophonePermission() -> PermissionStatus { @@ -561,4 +656,6 @@ enum PermissionStatus { #Preview { ContentView() + .environmentObject(AuthManager.shared) + .environmentObject(SubscriptionManager.shared) } diff --git a/Whishpermate/WhisperMateIOS/LanguageSelectionView.swift b/Whishpermate/WhisperMateIOS/LanguageSelectionView.swift new file mode 100644 index 00000000..cdd658d2 --- /dev/null +++ b/Whishpermate/WhisperMateIOS/LanguageSelectionView.swift @@ -0,0 +1,36 @@ +import SwiftUI +import WhisperMateShared + +struct LanguageSelectionView: View { + @ObservedObject var languageManager: LanguageManager + + var body: some View { + List { + Section(footer: Text("Select the languages you speak. Auto-detect works best for single-language dictation.")) { + ForEach(Language.allCases) { language in + Button(action: { + languageManager.toggleLanguage(language) + }) { + HStack { + Text(language.flag) + .font(.title3) + + Text(language.displayName) + .foregroundColor(.primary) + + Spacer() + + if languageManager.isSelected(language) { + Image(systemName: "checkmark") + .foregroundColor(.blue) + .fontWeight(.semibold) + } + } + } + } + } + } + .navigationTitle("Language") + .navigationBarTitleDisplayMode(.inline) + } +} diff --git a/Whishpermate/WhisperMateIOS/OnboardingView.swift b/Whishpermate/WhisperMateIOS/OnboardingView.swift index a6b608a7..1205bba6 100644 --- a/Whishpermate/WhisperMateIOS/OnboardingView.swift +++ b/Whishpermate/WhisperMateIOS/OnboardingView.swift @@ -4,6 +4,7 @@ import WhisperMateShared struct OnboardingView: View { @ObservedObject var onboardingManager: OnboardingManager + @StateObject private var languageManager = LanguageManager() @State private var currentStep: OnboardingStep = .welcome @State private var isCheckingMicrophone = false @State private var refreshTrigger = false @@ -11,6 +12,7 @@ struct OnboardingView: View { enum OnboardingStep { case welcome case microphone + case language case keyboardSetup } @@ -24,6 +26,8 @@ struct OnboardingView: View { welcomeStep case .microphone: microphoneStep + case .language: + languageStep case .keyboardSetup: keyboardSetupStep } @@ -119,6 +123,58 @@ struct OnboardingView: View { } } + private var languageStep: some View { + VStack(spacing: 20) { + Image(systemName: "globe") + .resizable() + .scaledToFit() + .frame(width: 80, height: 80) + .foregroundColor(.blue) + + Text("Select Languages") + .font(.title2) + .fontWeight(.bold) + + Text("Choose the languages you'll be speaking. You can change this later in Settings.") + .multilineTextAlignment(.center) + .foregroundColor(.secondary) + .padding(.horizontal) + + ScrollView { + VStack(spacing: 8) { + ForEach(Language.allCases) { language in + Button(action: { + languageManager.toggleLanguage(language) + }) { + HStack { + Text(language.flag) + .font(.title3) + Text(language.displayName) + .foregroundColor(.primary) + Spacer() + if languageManager.isSelected(language) { + Image(systemName: "checkmark.circle.fill") + .foregroundColor(.blue) + } else { + Image(systemName: "circle") + .foregroundColor(.secondary) + } + } + .padding(.horizontal, 16) + .padding(.vertical, 10) + .background( + RoundedRectangle(cornerRadius: 10) + .fill(languageManager.isSelected(language) ? Color.blue.opacity(0.1) : Color(uiColor: .secondarySystemGroupedBackground)) + ) + } + } + } + .padding(.horizontal) + } + .frame(maxHeight: 300) + } + } + private var keyboardSetupStep: some View { VStack(spacing: 20) { Image(systemName: "keyboard") @@ -173,7 +229,7 @@ struct OnboardingView: View { case .microphone: Button(action: { if isMicrophoneGranted() { - currentStep = .keyboardSetup + currentStep = .language } else { requestMicrophonePermission() } @@ -189,6 +245,21 @@ struct OnboardingView: View { ) } + case .language: + Button(action: { + currentStep = .keyboardSetup + }) { + Text("Continue") + .font(.system(size: 17, weight: .semibold)) + .foregroundColor(.white) + .frame(maxWidth: .infinity) + .frame(height: 50) + .background( + Capsule() + .fill(Color.blue) + ) + } + case .keyboardSetup: Button(action: { onboardingManager.completeOnboarding() @@ -234,7 +305,7 @@ struct OnboardingView: View { if isMicrophoneGranted() { // Auto-advance to next step isCheckingMicrophone = false - currentStep = .keyboardSetup + currentStep = .language return } diff --git a/Whishpermate/WhisperMateIOS/RecordingSheetView.swift b/Whishpermate/WhisperMateIOS/RecordingSheetView.swift index b86c530d..c02df1ab 100644 --- a/Whishpermate/WhisperMateIOS/RecordingSheetView.swift +++ b/Whishpermate/WhisperMateIOS/RecordingSheetView.swift @@ -9,6 +9,7 @@ struct RecordingSheetView: View { @ObservedObject var dictionaryManager: DictionaryManager @ObservedObject var toneStyleManager: ToneStyleManager @ObservedObject var shortcutManager: ShortcutManager + @ObservedObject var languageManager: LanguageManager @State private var sheetState: SheetState = .recording @State private var transcription = "" @@ -25,11 +26,12 @@ struct RecordingSheetView: View { case viewing } - init(historyManager: HistoryManager, dictionaryManager: DictionaryManager, toneStyleManager: ToneStyleManager, shortcutManager: ShortcutManager, recording: Recording? = nil) { + init(historyManager: HistoryManager, dictionaryManager: DictionaryManager, toneStyleManager: ToneStyleManager, shortcutManager: ShortcutManager, languageManager: LanguageManager, recording: Recording? = nil) { self.historyManager = historyManager self.dictionaryManager = dictionaryManager self.toneStyleManager = toneStyleManager self.shortcutManager = shortcutManager + self.languageManager = languageManager if let recording = recording { _sheetState = State(initialValue: .viewing) _transcription = State(initialValue: recording.transcription) @@ -275,6 +277,15 @@ struct RecordingSheetView: View { return } + // Check subscription limits + let subscriptionCheck = SubscriptionManager.shared.checkCanTranscribe() + if !subscriptionCheck.canTranscribe { + errorMessage = subscriptionCheck.reason ?? "Transcription limit reached" + sheetState = .viewing + try? FileManager.default.removeItem(at: audioURL) + return + } + sheetState = .processing Task { @@ -292,6 +303,11 @@ struct RecordingSheetView: View { // Combine prompts from all sources var promptComponents: [String] = [] + // Add language hint + if let languageCodes = languageManager.apiLanguageCode { + promptComponents.append("Language: \(languageCodes)") + } + // Add dictionary hints for better recognition let dictionaryHints = dictionaryManager.transcriptionHints if !dictionaryHints.isEmpty { @@ -312,16 +328,36 @@ struct RecordingSheetView: View { let promptText = promptComponents.joined(separator: ". ") - let result = try await openAIClient.transcribe( - audioURL: audioURL, - prompt: promptText.isEmpty ? nil : promptText - ) + // Build formatting rules from tone/style + var formattingRules: [String] = [] + for style in toneStyleManager.styles where style.isEnabled { + formattingRules.append(style.instructions) + } + + let result: String + if !formattingRules.isEmpty { + result = try await openAIClient.transcribeAndFormat( + audioURL: audioURL, + prompt: promptText.isEmpty ? nil : promptText, + formattingRules: formattingRules, + languageCodes: languageManager.apiLanguageCode + ) + } else { + result = try await openAIClient.transcribe( + audioURL: audioURL, + prompt: promptText.isEmpty ? nil : promptText + ) + } // Apply post-processing: dictionary replacements and shortcut expansion var processedResult = result processedResult = dictionaryManager.applyReplacements(to: processedResult) processedResult = shortcutManager.expandShortcuts(in: processedResult) + // Track word count + let wordCount = processedResult.split(separator: " ").count + await SubscriptionManager.shared.recordWords(wordCount) + await MainActor.run { transcription = processedResult sheetState = .viewing @@ -339,7 +375,7 @@ struct RecordingSheetView: View { // Save to history with audio file URL let recording = Recording( id: recordingID, - transcription: result, + transcription: processedResult, duration: duration, audioFileURL: permanentAudioURL ) diff --git a/Whishpermate/WhisperMateIOS/WhisperMateApp.swift b/Whishpermate/WhisperMateIOS/WhisperMateApp.swift index 14cc32d7..8340e9a3 100644 --- a/Whishpermate/WhisperMateIOS/WhisperMateApp.swift +++ b/Whishpermate/WhisperMateIOS/WhisperMateApp.swift @@ -5,6 +5,8 @@ import WhisperMateShared @main struct WhisperMateApp: App { @StateObject private var onboardingManager = OnboardingManager() + @StateObject private var authManager = AuthManager.shared + @StateObject private var subscriptionManager = SubscriptionManager.shared init() {} @@ -12,6 +14,8 @@ struct WhisperMateApp: App { WindowGroup { if onboardingManager.hasCompletedOnboarding { ContentView() + .environmentObject(authManager) + .environmentObject(subscriptionManager) } else { OnboardingView(onboardingManager: onboardingManager) } diff --git a/Whishpermate/WhisperMateKeyboard/KeyboardViewController.swift b/Whishpermate/WhisperMateKeyboard/KeyboardViewController.swift index f72babc2..bbfa52a2 100644 --- a/Whishpermate/WhisperMateKeyboard/KeyboardViewController.swift +++ b/Whishpermate/WhisperMateKeyboard/KeyboardViewController.swift @@ -12,6 +12,9 @@ class KeyboardViewController: UIInputViewController { private var hostingController: UIHostingController! private var statusLabel: UILabel! private var cancellables = Set() + private let dictionaryManager = DictionaryManager.shared + private let shortcutManager = ShortcutManager.shared + private let languageManager = LanguageManager() // MARK: - Lifecycle @@ -186,12 +189,42 @@ class KeyboardViewController: UIInputViewController { Task { do { - let transcription = try await openAIClient.transcribe(audioURL: recordingURL) + // Build prompt with language, dictionary, and shortcut hints + var promptComponents: [String] = [] + + if let languageCodes = languageManager.apiLanguageCode { + promptComponents.append("Language: \(languageCodes)") + } + + let dictionaryHints = dictionaryManager.transcriptionHints + if !dictionaryHints.isEmpty { + promptComponents.append("Vocabulary: \(dictionaryHints)") + } + + let shortcutHints = shortcutManager.transcriptionHints + if !shortcutHints.isEmpty { + promptComponents.append("Phrases: \(shortcutHints)") + } + + let promptText = promptComponents.joined(separator: ". ") + + var transcription = try await openAIClient.transcribe( + audioURL: recordingURL, + prompt: promptText.isEmpty ? nil : promptText + ) + + // Apply post-processing + transcription = dictionaryManager.applyReplacements(to: transcription) + transcription = shortcutManager.expandShortcuts(in: transcription) + + // Track word count + let wordCount = transcription.split(separator: " ").count + await SubscriptionManager.shared.recordWords(wordCount) // Insert transcription into text field await MainActor.run { self.textDocumentProxy.insertText(transcription) - self.statusLabel.text = "✓ Transcribed" + self.statusLabel.text = "Transcribed" self.statusLabel.textColor = UIColor.systemGreen // Save to history diff --git a/Whishpermate/WhisperMateShared/Models/Language.swift b/Whishpermate/WhisperMateShared/Models/Language.swift index b8c7cf96..f30b3338 100644 --- a/Whishpermate/WhisperMateShared/Models/Language.swift +++ b/Whishpermate/WhisperMateShared/Models/Language.swift @@ -77,11 +77,11 @@ public enum Language: String, CaseIterable, Identifiable { } public class LanguageManager: ObservableObject { - @Published var selectedLanguages: Set = [] + @Published public var selectedLanguages: Set = [] private let userDefaultsKey = "selected_languages" - init() { + public init() { loadLanguages() } diff --git a/Whishpermate/WhisperMateShared/Services/AuthManager.swift b/Whishpermate/WhisperMateShared/Services/AuthManager.swift index 0e9593d3..d040e834 100644 --- a/Whishpermate/WhisperMateShared/Services/AuthManager.swift +++ b/Whishpermate/WhisperMateShared/Services/AuthManager.swift @@ -4,6 +4,9 @@ import Supabase #if canImport(AppKit) import AppKit #endif +#if canImport(UIKit) + import UIKit +#endif /// Manages user authentication state and session lifecycle via Supabase public class AuthManager: ObservableObject { @@ -77,10 +80,16 @@ public class AuthManager: ObservableObject { let authURL = "\(authWebURL)?redirect_to=\(Constants.authCallbackScheme.addingPercentEncoding(withAllowedCharacters: .urlQueryAllowed) ?? Constants.authCallbackScheme)" - #if canImport(AppKit) + #if canImport(AppKit) && !targetEnvironment(macCatalyst) if let url = URL(string: authURL) { NSWorkspace.shared.open(url) } + #elseif canImport(UIKit) + if let url = URL(string: authURL) { + Task { @MainActor in + UIApplication.shared.open(url) + } + } #endif } diff --git a/Whishpermate/WhisperMateShared/Services/SubscriptionManager.swift b/Whishpermate/WhisperMateShared/Services/SubscriptionManager.swift index 725bf641..2940e90d 100644 --- a/Whishpermate/WhisperMateShared/Services/SubscriptionManager.swift +++ b/Whishpermate/WhisperMateShared/Services/SubscriptionManager.swift @@ -10,6 +10,9 @@ import Foundation #if canImport(AppKit) import AppKit #endif +#if canImport(UIKit) + import UIKit +#endif public class SubscriptionManager: ObservableObject { public static let shared = SubscriptionManager() @@ -73,10 +76,16 @@ public class SubscriptionManager: ObservableObject { } } - #if canImport(AppKit) + #if canImport(AppKit) && !targetEnvironment(macCatalyst) if let url = URL(string: urlString) { NSWorkspace.shared.open(url) } + #elseif canImport(UIKit) + if let url = URL(string: urlString) { + Task { @MainActor in + UIApplication.shared.open(url) + } + } #endif } From 94b7d63c552f92547a214e3a95312677617fc98a Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 7 Mar 2026 01:11:59 +0000 Subject: [PATCH 2/2] Add Parakeet on-device transcription to iOS app - Link FluidAudio SPM package to WhisperMateIOS target - Create ParakeetTranscriptionService for iOS (NVIDIA Parakeet v3 model) - Add on-device/cloud transcription toggle in iOS Settings - Integrate Parakeet into RecordingSheetView transcription flow - On-device mode bypasses cloud API and subscription limits https://claude.ai/code/session_014WnimLYNLwfqYsqZn71BSU --- Whishpermate/WhisperMateIOS/ContentView.swift | 76 +++++++++ .../ParakeetTranscriptionService.swift | 145 ++++++++++++++++++ .../WhisperMateIOS/RecordingSheetView.swift | 55 +++++++ .../Whispermate.xcodeproj/project.pbxproj | 10 ++ 4 files changed, 286 insertions(+) create mode 100644 Whishpermate/WhisperMateIOS/ParakeetTranscriptionService.swift diff --git a/Whishpermate/WhisperMateIOS/ContentView.swift b/Whishpermate/WhisperMateIOS/ContentView.swift index 97e6f3b1..7218843b 100644 --- a/Whishpermate/WhisperMateIOS/ContentView.swift +++ b/Whishpermate/WhisperMateIOS/ContentView.swift @@ -8,8 +8,10 @@ struct ContentView: View { @StateObject private var toneStyleManager = ToneStyleManager.shared @StateObject private var shortcutManager = ShortcutManager.shared @StateObject private var languageManager = LanguageManager() + @StateObject private var parakeetService = ParakeetTranscriptionService.shared @EnvironmentObject var authManager: AuthManager @EnvironmentObject var subscriptionManager: SubscriptionManager + @AppStorage("useOnDeviceTranscription", store: AppDefaults.shared) private var useOnDeviceTranscription = false @State private var selectedTab: Int = 0 @State private var showRecordingSheet = false @State private var selectedRecording: Recording? @@ -470,6 +472,80 @@ struct ContentView: View { } } + // Transcription Engine Section + Section { + Toggle(isOn: $useOnDeviceTranscription) { + Label("On-Device (Parakeet)", systemImage: "iphone") + } + .onChange(of: useOnDeviceTranscription) { enabled in + if enabled { + Task { + try? await parakeetService.initialize() + } + } + } + + switch parakeetService.state { + case .notInitialized: + if useOnDeviceTranscription { + HStack { + Text("Model not downloaded") + .font(.caption) + .foregroundColor(.secondary) + Spacer() + Button("Download") { + Task { + try? await parakeetService.initialize() + } + } + .font(.caption) + } + } + case .downloading: + HStack { + ProgressView() + .controlSize(.small) + Text("Downloading model...") + .font(.caption) + .foregroundColor(.secondary) + } + case .initializing: + HStack { + ProgressView() + .controlSize(.small) + Text("Initializing...") + .font(.caption) + .foregroundColor(.secondary) + } + case .ready: + HStack { + Image(systemName: "checkmark.circle.fill") + .foregroundColor(.green) + Text("Model ready") + .font(.caption) + .foregroundColor(.green) + } + case .transcribing: + HStack { + ProgressView() + .controlSize(.small) + Text("Transcribing...") + .font(.caption) + .foregroundColor(.secondary) + } + case .error(let message): + Text(message) + .font(.caption) + .foregroundColor(.red) + } + } header: { + Text("Transcription Engine") + } footer: { + Text(useOnDeviceTranscription + ? "Transcription runs privately on your device. No internet required." + : "Transcription uses cloud API. Requires internet connection.") + } + Section("Permissions") { Button(action: openAppSettings) { HStack { diff --git a/Whishpermate/WhisperMateIOS/ParakeetTranscriptionService.swift b/Whishpermate/WhisperMateIOS/ParakeetTranscriptionService.swift new file mode 100644 index 00000000..472e8c51 --- /dev/null +++ b/Whishpermate/WhisperMateIOS/ParakeetTranscriptionService.swift @@ -0,0 +1,145 @@ +import AVFoundation +internal import Combine +import FluidAudio +import Foundation +import WhisperMateShared + +/// On-device transcription service using NVIDIA Parakeet model via FluidAudio for iOS +class ParakeetTranscriptionService: ObservableObject { + static let shared = ParakeetTranscriptionService() + + // MARK: - Types + + enum ServiceState: Equatable { + case notInitialized + case downloading + case initializing + case ready + case transcribing + case error(String) + + static func == (lhs: ServiceState, rhs: ServiceState) -> Bool { + switch (lhs, rhs) { + case (.notInitialized, .notInitialized), + (.downloading, .downloading), + (.initializing, .initializing), + (.ready, .ready), + (.transcribing, .transcribing): + return true + case let (.error(a), .error(b)): + return a == b + default: + return false + } + } + } + + // MARK: - Published Properties + + @Published var state: ServiceState = .notInitialized + @Published var isModelDownloaded: Bool = false + + // MARK: - Private Properties + + private var asrManager: AsrManager? + private var models: AsrModels? + private let audioConverter = AudioConverter() + + // MARK: - Initialization + + private init() {} + + // MARK: - Public API + + /// Download and initialize the Parakeet model (v3 multilingual) + func initialize() async throws { + guard case .notInitialized = state else { + DebugLog.info("Already initialized or in progress", context: "ParakeetTranscriptionService") + return + } + + await MainActor.run { + self.state = .downloading + } + + do { + DebugLog.info("Downloading Parakeet v3 multilingual model...", context: "ParakeetTranscriptionService") + + let downloadedModels = try await AsrModels.downloadAndLoad(version: .v3) + + await MainActor.run { + self.state = .initializing + self.models = downloadedModels + } + + DebugLog.info("Initializing ASR manager...", context: "ParakeetTranscriptionService") + + let manager = AsrManager(config: .default) + try await manager.initialize(models: downloadedModels) + + await MainActor.run { + self.asrManager = manager + self.state = .ready + self.isModelDownloaded = true + } + + DebugLog.info("Parakeet model ready", context: "ParakeetTranscriptionService") + + } catch { + DebugLog.info("Failed to initialize Parakeet: \(error.localizedDescription)", context: "ParakeetTranscriptionService") + await MainActor.run { + self.state = .error(error.localizedDescription) + } + throw error + } + } + + /// Transcribe audio file to text + func transcribe(audioURL: URL) async throws -> String { + if case .notInitialized = state { + try await initialize() + } + + guard let manager = asrManager else { + throw NSError(domain: "ParakeetTranscriptionService", code: -1, + userInfo: [NSLocalizedDescriptionKey: "ASR manager not initialized"]) + } + + await MainActor.run { + self.state = .transcribing + } + + defer { + Task { @MainActor in + self.state = .ready + } + } + + do { + DebugLog.info("Converting audio to 16kHz mono...", context: "ParakeetTranscriptionService") + + let samples = try audioConverter.resampleAudioFile(path: audioURL.path) + + DebugLog.info("Transcribing \(samples.count) samples...", context: "ParakeetTranscriptionService") + + let result = try await manager.transcribe(samples) + + DebugLog.info("Transcription complete: \(result.text.prefix(100))...", context: "ParakeetTranscriptionService") + + return result.text + + } catch { + DebugLog.info("Transcription failed: \(error.localizedDescription)", context: "ParakeetTranscriptionService") + throw error + } + } + + /// Cleanup resources + func cleanup() { + asrManager?.cleanup() + asrManager = nil + models = nil + state = .notInitialized + isModelDownloaded = false + } +} diff --git a/Whishpermate/WhisperMateIOS/RecordingSheetView.swift b/Whishpermate/WhisperMateIOS/RecordingSheetView.swift index c02df1ab..357b414e 100644 --- a/Whishpermate/WhisperMateIOS/RecordingSheetView.swift +++ b/Whishpermate/WhisperMateIOS/RecordingSheetView.swift @@ -10,6 +10,7 @@ struct RecordingSheetView: View { @ObservedObject var toneStyleManager: ToneStyleManager @ObservedObject var shortcutManager: ShortcutManager @ObservedObject var languageManager: LanguageManager + @AppStorage("useOnDeviceTranscription", store: AppDefaults.shared) private var useOnDeviceTranscription = false @State private var sheetState: SheetState = .recording @State private var transcription = "" @@ -266,6 +267,60 @@ struct RecordingSheetView: View { } private func transcribeAudio(audioURL: URL) { + if useOnDeviceTranscription { + transcribeWithParakeet(audioURL: audioURL) + } else { + transcribeWithCloud(audioURL: audioURL) + } + } + + private func transcribeWithParakeet(audioURL: URL) { + sheetState = .processing + + Task { + do { + let result = try await ParakeetTranscriptionService.shared.transcribe(audioURL: audioURL) + + // Apply post-processing + var processedResult = result + processedResult = dictionaryManager.applyReplacements(to: processedResult) + processedResult = shortcutManager.expandShortcuts(in: processedResult) + + // Track word count + let wordCount = processedResult.split(separator: " ").count + await SubscriptionManager.shared.recordWords(wordCount) + + await MainActor.run { + transcription = processedResult + sheetState = .viewing + errorMessage = "" + + let duration = recordingStartTime.map { Date().timeIntervalSince($0) } + let recordingID = UUID() + let permanentAudioURL = historyManager.saveAudioFile(from: audioURL, for: recordingID) + + let recording = Recording( + id: recordingID, + transcription: processedResult, + duration: duration, + audioFileURL: permanentAudioURL + ) + historyManager.addRecording(recording) + currentRecording = recording + + try? FileManager.default.removeItem(at: audioURL) + } + } catch { + await MainActor.run { + transcription = "" + sheetState = .viewing + errorMessage = "On-device transcription failed: \(error.localizedDescription)" + } + } + } + } + + private func transcribeWithCloud(audioURL: URL) { // Get API key from Secrets.plist or keychain let apiKey = KeychainHelper.get(key: "custom_transcription_api_key") ?? SecretsLoader.transcriptionKey(for: .custom) let endpoint = SecretsLoader.customTranscriptionEndpoint() ?? "https://writingmate.ai/api/openai/v1/audio/transcriptions" diff --git a/Whishpermate/Whispermate.xcodeproj/project.pbxproj b/Whishpermate/Whispermate.xcodeproj/project.pbxproj index 2fc7c721..8143f0d7 100644 --- a/Whishpermate/Whispermate.xcodeproj/project.pbxproj +++ b/Whishpermate/Whispermate.xcodeproj/project.pbxproj @@ -14,6 +14,7 @@ C12A3A182ED4D0AA002D97E6 /* PostgREST in Frameworks */ = {isa = PBXBuildFile; productRef = C12A3A172ED4D0AA002D97E6 /* PostgREST */; }; C1FA00032F0B000000000003 /* FluidAudio in Frameworks */ = {isa = PBXBuildFile; productRef = C1FA00022F0B000000000002 /* FluidAudio */; }; C2A100012F2B000000000001 /* Sparkle in Frameworks */ = {isa = PBXBuildFile; productRef = C2A100032F2B000000000003 /* Sparkle */; }; + C3FA00042F0B000000000004 /* FluidAudio in Frameworks */ = {isa = PBXBuildFile; productRef = C3FA00052F0B000000000005 /* FluidAudio */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -180,6 +181,7 @@ isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( + C3FA00042F0B000000000004 /* FluidAudio in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -300,6 +302,9 @@ C1E7D2EF2EBA92A500A07338 /* WhisperMateIOS */, ); name = WhisperMateIOS; + packageProductDependencies = ( + C3FA00052F0B000000000005 /* FluidAudio */, + ); productName = WhisperMateIOS; productReference = C1CE64402ED8417000D24235 /* WhisperMateIOS.app */; productType = "com.apple.product-type.application"; @@ -1023,6 +1028,11 @@ package = C2A100022F2B000000000002 /* XCRemoteSwiftPackageReference "Sparkle" */; productName = Sparkle; }; + C3FA00052F0B000000000005 /* FluidAudio */ = { + isa = XCSwiftPackageProductDependency; + package = C1FA00012F0B000000000001 /* XCRemoteSwiftPackageReference "FluidAudio" */; + productName = FluidAudio; + }; /* End XCSwiftPackageProductDependency section */ }; rootObject = C18566CB2EA0D31000B46F55 /* Project object */;