From 1d62e23a77fea68999d36fad896a64732e3de742 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 12 May 2026 21:40:55 +0000 Subject: [PATCH 1/4] Remove third-party data collection and disable auto-update Three changes to make this fork safe to run on a personal machine without leaking conversation contents or accepting remote code updates: - Strip PostHog analytics entirely. The SDK was capturing full push-to-talk transcripts and full Claude responses as event properties, plus identifying the user by email. ClickyAnalytics functions are now no-ops, the PostHog import and Swift Package dependency are removed. - Remove SUFeedURL and SUPublicEDKey from Info.plist. The feed pointed at an unrelated GitHub account that could have pushed arbitrary updates if Sparkle were enabled. - Remove the FormSpark POST in submitEmail. The email entered during onboarding is no longer sent to any third party. --- leanring-buddy.xcodeproj/project.pbxproj | 17 --- .../xcshareddata/swiftpm/Package.resolved | 18 --- leanring-buddy/ClickyAnalytics.swift | 112 ++++-------------- leanring-buddy/CompanionManager.swift | 19 +-- leanring-buddy/Info.plist | 4 - 5 files changed, 26 insertions(+), 144 deletions(-) diff --git a/leanring-buddy.xcodeproj/project.pbxproj b/leanring-buddy.xcodeproj/project.pbxproj index 75e57261..a199d8d2 100644 --- a/leanring-buddy.xcodeproj/project.pbxproj +++ b/leanring-buddy.xcodeproj/project.pbxproj @@ -8,7 +8,6 @@ /* Begin PBXBuildFile section */ AA00BB032F6500030039DA55 /* Sparkle in Frameworks */ = {isa = PBXBuildFile; productRef = AA00BB022F6500020039DA55 /* Sparkle */; }; - AA00BB062F6500060039DA55 /* PostHog in Frameworks */ = {isa = PBXBuildFile; productRef = AA00BB052F6500050039DA55 /* PostHog */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -58,7 +57,6 @@ buildActionMask = 2147483647; files = ( AA00BB032F6500030039DA55 /* Sparkle in Frameworks */, - AA00BB062F6500060039DA55 /* PostHog in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -120,7 +118,6 @@ name = "leanring-buddy"; packageProductDependencies = ( AA00BB022F6500020039DA55 /* Sparkle */, - AA00BB052F6500050039DA55 /* PostHog */, ); productName = "leanring-buddy"; productReference = 28F22CBF2F56440300A0FC59 /* Clicky.app */; @@ -206,7 +203,6 @@ minimizedProjectReferenceProxies = 1; packageReferences = ( AA00BB012F6500010039DA55 /* XCRemoteSwiftPackageReference "Sparkle" */, - AA00BB042F6500040039DA55 /* XCRemoteSwiftPackageReference "posthog-ios" */, ); preferredProjectObjectVersion = 77; productRefGroup = 28F22CC02F56440300A0FC59 /* Products */; @@ -608,14 +604,6 @@ minimumVersion = 2.9.0; }; }; - AA00BB042F6500040039DA55 /* XCRemoteSwiftPackageReference "posthog-ios" */ = { - isa = XCRemoteSwiftPackageReference; - repositoryURL = "https://github.com/PostHog/posthog-ios.git"; - requirement = { - kind = upToNextMajorVersion; - minimumVersion = 3.0.0; - }; - }; /* End XCRemoteSwiftPackageReference section */ /* Begin XCSwiftPackageProductDependency section */ @@ -624,11 +612,6 @@ package = AA00BB012F6500010039DA55 /* XCRemoteSwiftPackageReference "Sparkle" */; productName = Sparkle; }; - AA00BB052F6500050039DA55 /* PostHog */ = { - isa = XCSwiftPackageProductDependency; - package = AA00BB042F6500040039DA55 /* XCRemoteSwiftPackageReference "posthog-ios" */; - productName = PostHog; - }; /* End XCSwiftPackageProductDependency section */ }; rootObject = 28F22CB72F56440300A0FC59 /* Project object */; diff --git a/leanring-buddy.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/leanring-buddy.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved index d88adb21..e2eb51c3 100644 --- a/leanring-buddy.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved +++ b/leanring-buddy.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -1,24 +1,6 @@ { "originHash" : "3c6fb67fefedcfcd00708e24ca8088151f21dccfc0ade32ea80c406646277e89", "pins" : [ - { - "identity" : "plcrashreporter", - "kind" : "remoteSourceControl", - "location" : "https://github.com/microsoft/plcrashreporter.git", - "state" : { - "revision" : "0254f941c646b1ed17b243654723d0f071e990d0", - "version" : "1.12.2" - } - }, - { - "identity" : "posthog-ios", - "kind" : "remoteSourceControl", - "location" : "https://github.com/PostHog/posthog-ios.git", - "state" : { - "revision" : "09da1be6a614325a6a464c6d2017a9ac858d1b5a", - "version" : "3.47.0" - } - }, { "identity" : "sparkle", "kind" : "remoteSourceControl", diff --git a/leanring-buddy/ClickyAnalytics.swift b/leanring-buddy/ClickyAnalytics.swift index 29e26138..0d8974fa 100644 --- a/leanring-buddy/ClickyAnalytics.swift +++ b/leanring-buddy/ClickyAnalytics.swift @@ -2,120 +2,54 @@ // ClickyAnalytics.swift // leanring-buddy // -// Centralized PostHog analytics wrapper. All event names and properties -// are defined here so instrumentation is consistent and easy to audit. +// Analytics were intentionally disabled in this fork — no usage data, +// transcripts, or AI responses are sent anywhere. All functions remain +// as no-ops so existing call sites compile unchanged. // import Foundation -import PostHog enum ClickyAnalytics { // MARK: - Setup - static func configure() { - let config = PostHogConfig( - apiKey: "phc_xcQPygmhTMzzYh8wNW92CCwoXmnzqyChAixh8zgpqC3C", - host: "https://us.i.posthog.com" - ) - PostHogSDK.shared.setup(config) - } + static func configure() {} // MARK: - App Lifecycle - /// Fired once on every app launch in applicationDidFinishLaunching. - static func trackAppOpened() { - let version = Bundle.main.infoDictionary?["CFBundleShortVersionString"] as? String ?? "unknown" - PostHogSDK.shared.capture("app_opened", properties: [ - "app_version": version - ]) - } + static func trackAppOpened() {} // MARK: - Onboarding - /// User clicked the Start button to begin onboarding for the first time. - static func trackOnboardingStarted() { - PostHogSDK.shared.capture("onboarding_started") - } + static func trackOnboardingStarted() {} - /// User clicked "Watch Onboarding Again" from the panel footer. - static func trackOnboardingReplayed() { - PostHogSDK.shared.capture("onboarding_replayed") - } + static func trackOnboardingReplayed() {} - /// The onboarding video finished playing to the end. - static func trackOnboardingVideoCompleted() { - PostHogSDK.shared.capture("onboarding_video_completed") - } + static func trackOnboardingVideoCompleted() {} - /// The 40s onboarding demo interaction where Clicky points at something. - static func trackOnboardingDemoTriggered() { - PostHogSDK.shared.capture("onboarding_demo_triggered") - } + static func trackOnboardingDemoTriggered() {} // MARK: - Permissions - /// All three permissions (accessibility, screen recording, mic) are granted. - static func trackAllPermissionsGranted() { - PostHogSDK.shared.capture("all_permissions_granted") - } + static func trackAllPermissionsGranted() {} - /// A single permission was granted. Called when polling detects a change. - static func trackPermissionGranted(permission: String) { - PostHogSDK.shared.capture("permission_granted", properties: [ - "permission": permission - ]) - } + static func trackPermissionGranted(permission: String) {} // MARK: - Voice Interaction - /// User pressed the push-to-talk shortcut (control+option) to start talking. - static func trackPushToTalkStarted() { - PostHogSDK.shared.capture("push_to_talk_started") - } - - /// User released the shortcut — transcript is being finalized. - static func trackPushToTalkReleased() { - PostHogSDK.shared.capture("push_to_talk_released") - } - - /// Transcription completed and the user's message is being sent to the AI. - static func trackUserMessageSent(transcript: String) { - PostHogSDK.shared.capture("user_message_sent", properties: [ - "transcript": transcript, - "character_count": transcript.count - ]) - } - - /// Claude responded and the response is being spoken via TTS. - static func trackAIResponseReceived(response: String) { - PostHogSDK.shared.capture("ai_response_received", properties: [ - "response": response, - "character_count": response.count - ]) - } - - /// Claude's response included a [POINT:x,y:label] coordinate tag, - /// so the buddy is flying to point at a UI element. - static func trackElementPointed(elementLabel: String?) { - PostHogSDK.shared.capture("element_pointed", properties: [ - "element_label": elementLabel ?? "unknown" - ]) - } + static func trackPushToTalkStarted() {} + + static func trackPushToTalkReleased() {} + + static func trackUserMessageSent(transcript: String) {} + + static func trackAIResponseReceived(response: String) {} + + static func trackElementPointed(elementLabel: String?) {} // MARK: - Errors - /// An error occurred during the AI response pipeline. - static func trackResponseError(error: String) { - PostHogSDK.shared.capture("response_error", properties: [ - "error": error - ]) - } - - /// An error occurred during TTS playback. - static func trackTTSError(error: String) { - PostHogSDK.shared.capture("tts_error", properties: [ - "error": error - ]) - } + static func trackResponseError(error: String) {} + + static func trackTTSError(error: String) {} } diff --git a/leanring-buddy/CompanionManager.swift b/leanring-buddy/CompanionManager.swift index 0234cf19..63030f89 100644 --- a/leanring-buddy/CompanionManager.swift +++ b/leanring-buddy/CompanionManager.swift @@ -10,7 +10,6 @@ import AVFoundation import Combine import Foundation -import PostHog import ScreenCaptureKit import SwiftUI @@ -149,27 +148,15 @@ final class CompanionManager: ObservableObject { /// Whether the user has submitted their email during onboarding. @Published var hasSubmittedEmail: Bool = UserDefaults.standard.bool(forKey: "hasSubmittedEmail") - /// Submits the user's email to FormSpark and identifies them in PostHog. + /// Records that the user has dismissed the email prompt. The email itself + /// is never sent anywhere — analytics and the FormSpark submission were + /// removed in this fork. func submitEmail(_ email: String) { let trimmedEmail = email.trimmingCharacters(in: .whitespacesAndNewlines) guard !trimmedEmail.isEmpty else { return } hasSubmittedEmail = true UserDefaults.standard.set(true, forKey: "hasSubmittedEmail") - - // Identify user in PostHog - PostHogSDK.shared.identify(trimmedEmail, userProperties: [ - "email": trimmedEmail - ]) - - // Submit to FormSpark - Task { - var request = URLRequest(url: URL(string: "https://submit-form.com/RWbGJxmIs")!) - request.httpMethod = "POST" - request.setValue("application/json", forHTTPHeaderField: "Content-Type") - request.httpBody = try? JSONSerialization.data(withJSONObject: ["email": trimmedEmail]) - _ = try? await URLSession.shared.data(for: request) - } } func start() { diff --git a/leanring-buddy/Info.plist b/leanring-buddy/Info.plist index e3d2b455..b21d7e4e 100644 --- a/leanring-buddy/Info.plist +++ b/leanring-buddy/Info.plist @@ -4,10 +4,6 @@ LSUIElement - SUFeedURL - https://raw.githubusercontent.com/julianjear/makesomething-mac-app/main/appcast.xml - SUPublicEDKey - /l3d2rw5ZZFRU3AadP/w2Zf8FHfhA6bKv16BQOV5OSk= VoiceTranscriptionProvider assemblyai NSMicrophoneUsageDescription From b3e15061c3d798a92e3d96aa9502b7bb0c9d0f6c Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 12 May 2026 21:49:14 +0000 Subject: [PATCH 2/4] Deep clean: drop Sparkle, OpenAI bypass paths, email gate, auto-login Second pass through the repo. Removes everything that either ships data to a third party or could let the app talk to an external API without going through the user's own Cloudflare Worker. - Sparkle removed end-to-end. The Swift Package dependency, the `SPUStandardUpdaterController` boot wiring, the `appcast.xml` feed, and the `scripts/release.sh` pipeline that pushed updates to a third-party GitHub account are all gone. Update by re-building. - Direct API code paths deleted. `OpenAIAPI.swift` (Anthropic-bypass vision client), `ElementLocationDetector.swift` (called `api.anthropic.com` directly with an in-bundle key), and `OpenAIAudioTranscriptionProvider.swift` (would have sent audio straight to `api.openai.com`). The transcription factory now resolves to AssemblyAI (via the Worker) or Apple Speech locally. - Silent login-item registration removed from `applicationDidFinishLaunching`. The app no longer adds itself to Login Items on launch; add it manually in System Settings if wanted. - Onboarding email gate removed. `submitEmail`, `hasSubmittedEmail`, and the email TextField + Submit button were doing nothing useful once the FormSpark POST and PostHog identify were stripped. The Start button is now shown directly when permissions are granted. - AGENTS.md updated: architecture, Key Files table, and a new "Fork-specific changes" section documenting what was removed and why. --- AGENTS.md | 52 ++- appcast.xml | 30 -- dmg-background.png | Bin 2484 -> 0 bytes leanring-buddy.xcodeproj/project.pbxproj | 17 - .../xcshareddata/swiftpm/Package.resolved | 9 - .../BuddyTranscriptionProvider.swift | 31 +- leanring-buddy/CompanionManager.swift | 14 - leanring-buddy/CompanionPanelView.swift | 77 +--- leanring-buddy/ElementLocationDetector.swift | 335 ------------------ leanring-buddy/OpenAIAPI.swift | 142 -------- .../OpenAIAudioTranscriptionProvider.swift | 317 ----------------- leanring-buddy/leanring_buddyApp.swift | 38 -- scripts/README.md | 62 ---- scripts/release.sh | 276 --------------- 14 files changed, 56 insertions(+), 1344 deletions(-) delete mode 100644 appcast.xml delete mode 100644 dmg-background.png delete mode 100644 leanring-buddy/ElementLocationDetector.swift delete mode 100644 leanring-buddy/OpenAIAPI.swift delete mode 100644 leanring-buddy/OpenAIAudioTranscriptionProvider.swift delete mode 100644 scripts/README.md delete mode 100755 scripts/release.sh diff --git a/AGENTS.md b/AGENTS.md index 6946d441..cb445e2d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -15,13 +15,14 @@ All API keys live on a Cloudflare Worker proxy — nothing sensitive ships in th - **Framework**: SwiftUI (macOS native) with AppKit bridging for menu bar panel and cursor overlay - **Pattern**: MVVM with `@StateObject` / `@Published` state management - **AI Chat**: Claude (Sonnet 4.6 default, Opus 4.6 optional) via Cloudflare Worker proxy with SSE streaming -- **Speech-to-Text**: AssemblyAI real-time streaming (`u3-rt-pro` model) via websocket, with OpenAI and Apple Speech as fallbacks +- **Speech-to-Text**: AssemblyAI real-time streaming (`u3-rt-pro` model) via websocket, with Apple Speech as the local fallback - **Text-to-Speech**: ElevenLabs (`eleven_flash_v2_5` model) via Cloudflare Worker proxy - **Screen Capture**: ScreenCaptureKit (macOS 14.2+), multi-monitor support - **Voice Input**: Push-to-talk via `AVAudioEngine` + pluggable transcription-provider layer. System-wide keyboard shortcut via listen-only CGEvent tap. - **Element Pointing**: Claude embeds `[POINT:x,y:label:screenN]` tags in responses. The overlay parses these, maps coordinates to the correct monitor, and animates the blue cursor along a bezier arc to the target. - **Concurrency**: `@MainActor` isolation, async/await throughout -- **Analytics**: PostHog via `ClickyAnalytics.swift` +- **Analytics**: None — the PostHog integration was removed in this fork. `ClickyAnalytics.swift` remains as a no-op shim so call sites compile unchanged. +- **Auto-update**: None — Sparkle was removed in this fork. Update by re-building from source. ### API Proxy (Cloudflare Worker) @@ -52,26 +53,23 @@ Worker vars: `ELEVENLABS_VOICE_ID` | File | Lines | Purpose | |------|-------|---------| -| `leanring_buddyApp.swift` | ~89 | Menu bar app entry point. Uses `@NSApplicationDelegateAdaptor` with `CompanionAppDelegate` which creates `MenuBarPanelManager` and starts `CompanionManager`. No main window — the app lives entirely in the status bar. | -| `CompanionManager.swift` | ~1026 | Central state machine. Owns dictation, shortcut monitoring, screen capture, Claude API, ElevenLabs TTS, and overlay management. Tracks voice state (idle/listening/processing/responding), conversation history, model selection, and cursor visibility. Coordinates the full push-to-talk → screenshot → Claude → TTS → pointing pipeline. | +| `leanring_buddyApp.swift` | ~50 | Menu bar app entry point. Uses `@NSApplicationDelegateAdaptor` with `CompanionAppDelegate` which creates `MenuBarPanelManager` and starts `CompanionManager`. No main window — the app lives entirely in the status bar. | +| `CompanionManager.swift` | ~990 | Central state machine. Owns dictation, shortcut monitoring, screen capture, Claude API, ElevenLabs TTS, and overlay management. Tracks voice state (idle/listening/processing/responding), conversation history, model selection, and cursor visibility. Coordinates the full push-to-talk → screenshot → Claude → TTS → pointing pipeline. | | `MenuBarPanelManager.swift` | ~243 | NSStatusItem + custom NSPanel lifecycle. Creates the menu bar icon, manages the floating companion panel (show/hide/position), installs click-outside-to-dismiss monitor. | -| `CompanionPanelView.swift` | ~761 | SwiftUI panel content for the menu bar dropdown. Shows companion status, push-to-talk instructions, model picker (Sonnet/Opus), permissions UI, DM feedback button, and quit button. Dark aesthetic using `DS` design system. | +| `CompanionPanelView.swift` | ~705 | SwiftUI panel content for the menu bar dropdown. Shows companion status, push-to-talk instructions, model picker (Sonnet/Opus), permissions UI, DM feedback button, and quit button. Dark aesthetic using `DS` design system. | | `OverlayWindow.swift` | ~881 | Full-screen transparent overlay hosting the blue cursor, response text, waveform, and spinner. Handles cursor animation, element pointing with bezier arcs, multi-monitor coordinate mapping, and fade-out transitions. | | `CompanionResponseOverlay.swift` | ~217 | SwiftUI view for the response text bubble and waveform displayed next to the cursor in the overlay. | | `CompanionScreenCaptureUtility.swift` | ~132 | Multi-monitor screenshot capture using ScreenCaptureKit. Returns labeled image data for each connected display. | | `BuddyDictationManager.swift` | ~866 | Push-to-talk voice pipeline. Handles microphone capture via `AVAudioEngine`, provider-aware permission checks, keyboard/button dictation sessions, transcript finalization, shortcut parsing, contextual keyterms, and live audio-level reporting for waveform feedback. | -| `BuddyTranscriptionProvider.swift` | ~100 | Protocol surface and provider factory for voice transcription backends. Resolves provider based on `VoiceTranscriptionProvider` in Info.plist — AssemblyAI, OpenAI, or Apple Speech. | +| `BuddyTranscriptionProvider.swift` | ~75 | Protocol surface and provider factory for voice transcription backends. Resolves provider based on `VoiceTranscriptionProvider` in Info.plist — AssemblyAI (primary) or Apple Speech (local fallback). | | `AssemblyAIStreamingTranscriptionProvider.swift` | ~478 | Streaming transcription provider. Fetches temp tokens from the Cloudflare Worker, opens an AssemblyAI v3 websocket, streams PCM16 audio, tracks turn-based transcripts, and delivers finalized text on key-up. Shares a single URLSession across all sessions. | -| `OpenAIAudioTranscriptionProvider.swift` | ~317 | Upload-based transcription provider. Buffers push-to-talk audio locally, uploads as WAV on release, returns finalized transcript. | | `AppleSpeechTranscriptionProvider.swift` | ~147 | Local fallback transcription provider backed by Apple's Speech framework. | -| `BuddyAudioConversionSupport.swift` | ~108 | Audio conversion helpers. Converts live mic buffers to PCM16 mono audio and builds WAV payloads for upload-based providers. | +| `BuddyAudioConversionSupport.swift` | ~108 | Audio conversion helpers. Converts live mic buffers to PCM16 mono audio. | | `GlobalPushToTalkShortcutMonitor.swift` | ~132 | System-wide push-to-talk monitor. Owns the listen-only `CGEvent` tap and publishes press/release transitions. | | `ClaudeAPI.swift` | ~291 | Claude vision API client with streaming (SSE) and non-streaming modes. TLS warmup optimization, image MIME detection, conversation history support. | -| `OpenAIAPI.swift` | ~142 | OpenAI GPT vision API client. | | `ElevenLabsTTSClient.swift` | ~81 | ElevenLabs TTS client. Sends text to the Worker proxy, plays back audio via `AVAudioPlayer`. Exposes `isPlaying` for transient cursor scheduling. | -| `ElementLocationDetector.swift` | ~335 | Detects UI element locations in screenshots for cursor pointing. | | `DesignSystem.swift` | ~880 | Design system tokens — colors, corner radii, shared styles. All UI references `DS.Colors`, `DS.CornerRadius`, etc. | -| `ClickyAnalytics.swift` | ~121 | PostHog analytics integration for usage tracking. | +| `ClickyAnalytics.swift` | ~55 | No-op analytics shim. The PostHog integration was removed in this fork; the functions remain so call sites compile unchanged. | | `WindowPositionManager.swift` | ~262 | Window placement logic, Screen Recording permission flow, and accessibility permission helpers. | | `AppBundleConfiguration.swift` | ~28 | Runtime configuration reader for keys stored in the app bundle Info.plist. | | `worker/src/index.ts` | ~142 | Cloudflare Worker proxy. Three routes: `/chat` (Claude), `/tts` (ElevenLabs), `/transcribe-token` (AssemblyAI temp token). | @@ -165,3 +163,35 @@ When you make changes to this project that affect the information in this file, 6. **Line count drift**: If a file's line count changes significantly (>50 lines), update the approximate count in the Key Files table Do NOT update this file for minor edits, bug fixes, or changes that don't affect the documented architecture or conventions. + +## Fork-specific changes (data-safety cleanup) + +This fork intentionally drops several pieces of the upstream project to keep +the app's data flows narrow and obvious. If you re-add any of these you are +changing what the app sends to third parties — be deliberate about it. + +- **PostHog analytics removed.** Upstream sent full push-to-talk transcripts + and full Claude responses (plus the user's email after onboarding) to + PostHog. The SDK, Swift Package dependency, and all `capture()` / + `identify()` calls were stripped. `ClickyAnalytics.swift` is kept as a + no-op shim so call sites compile. +- **Sparkle auto-update removed.** Upstream's `SUFeedURL` pointed at an + unrelated GitHub account (`julianjear/makesomething-mac-app`) that could + have shipped arbitrary signed updates. The Sparkle dependency, the + appcast feed keys in `Info.plist`, and the release pipeline that wrote + to that repo (`scripts/release.sh`) were all removed. Update by + re-building from source. +- **FormSpark email submission removed.** The onboarding email field used + to POST to `submit-form.com`. The submission, the field, and the + `hasSubmittedEmail` gate were removed; the Start button is shown + directly once permissions are granted. +- **Auto-login-item registration removed.** Upstream silently registered + itself with `SMAppService` on every launch. Now the app only runs when + you start it; add it to Login Items via System Settings if you want + it to auto-start. +- **Direct-API code paths removed.** `OpenAIAPI.swift`, + `ElementLocationDetector.swift`, and `OpenAIAudioTranscriptionProvider.swift` + could have been configured to call OpenAI / Anthropic directly with + in-bundle API keys, bypassing the Cloudflare Worker. All three were + deleted to enforce the worker-proxy invariant: nothing sensitive ships + in the app binary. diff --git a/appcast.xml b/appcast.xml deleted file mode 100644 index 60471425..00000000 --- a/appcast.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - - makesomething - - 2.0 - Sat, 14 Mar 2026 09:56:02 -0700 - 11 - 2.0 - 14.0 - - - - 1.9 - Sat, 14 Mar 2026 08:33:19 -0700 - 10 - 1.9 - 14.0 - - - - 1.8 - Wed, 11 Mar 2026 10:17:12 -0700 - 9 - 1.8 - 14.0 - - - - \ No newline at end of file diff --git a/dmg-background.png b/dmg-background.png deleted file mode 100644 index 8a6af5171608513e1b23a0f8fe95990d50054a9c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2484 zcmeHJYfqX76n;f*RwwS#d8sIL9~QT1)mF#LI!8CFb#_I&w$WxPYMba(98y}bikjAp zY0XeOHmXHgXM#;8f~Bjq8ZN=hrbD{WSyA8>88Az^4FLs#v7fQb56_pAoSY}mB{^MZ z3i99)@eu$3!1F)JIST+V1^|Tgz61+S*iWn10bqYie$L0|n^_Q^ZkNHaBLK&&77I?187UpcCQ?I{!Sy(rQayz-TP&@{wPI+s*}H5mj_nq> zh?qCg#ES5!DCF)|l6=YNn0TlWHedbs5CJ`PO;_UuSz-2lbki47jauF6nse5m>(xqO zbFN4m)&=rmflaqYhA5O*W%y_Z-qMI`gS_JH`-)DAv{yPo{wWW92okE(@wi|kfycSY=ac21c{82uAjUWh_4cm29_eF=L&r` zly%zS_o4`RA=PL$cXarwhx~n)rKCLn5g zS+12!I?R`>551cs>_jATjK}NIy{)x1%pi&?SR7KqJn0Q9uOyXOS65fP#Ojw-DKJ>S zdLBfh83_7`Y3&92_f`}&(89J_#<*PBjQ3@Zk}$xTRSK*^uV%^Tb?od0d3U&vtJLbC zokADwYc$pM>wJ$VvUg&99P7Bh`YeUH*v(9>hm>ubdPr`pb6d=%Q?40ayK`~&BB`O_ z2bJv!LBof&w0OU5ZkJ@@bQt{OE1NQr-yC!|6^{`K1Z4B-fi$6Q_u}{p1RZa_8I_e_ zr$xsR`pVICoxz~za?NXLA`iDameSZbME$!fw!F!&dS6eAPR|zKJj`5NWTw^E*F%qe z3tx4|3QH}adYUdHSNv}4&nhe(tf(TciehcsT9;^jgWd&rlFR@f6-y)?#BSixdl06*%deb0o^0{ A=>Px# diff --git a/leanring-buddy.xcodeproj/project.pbxproj b/leanring-buddy.xcodeproj/project.pbxproj index a199d8d2..2254d472 100644 --- a/leanring-buddy.xcodeproj/project.pbxproj +++ b/leanring-buddy.xcodeproj/project.pbxproj @@ -7,7 +7,6 @@ objects = { /* Begin PBXBuildFile section */ - AA00BB032F6500030039DA55 /* Sparkle in Frameworks */ = {isa = PBXBuildFile; productRef = AA00BB022F6500020039DA55 /* Sparkle */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -56,7 +55,6 @@ isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( - AA00BB032F6500030039DA55 /* Sparkle in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -117,7 +115,6 @@ ); name = "leanring-buddy"; packageProductDependencies = ( - AA00BB022F6500020039DA55 /* Sparkle */, ); productName = "leanring-buddy"; productReference = 28F22CBF2F56440300A0FC59 /* Clicky.app */; @@ -202,7 +199,6 @@ mainGroup = 28F22CB62F56440300A0FC59; minimizedProjectReferenceProxies = 1; packageReferences = ( - AA00BB012F6500010039DA55 /* XCRemoteSwiftPackageReference "Sparkle" */, ); preferredProjectObjectVersion = 77; productRefGroup = 28F22CC02F56440300A0FC59 /* Products */; @@ -596,22 +592,9 @@ /* End XCConfigurationList section */ /* Begin XCRemoteSwiftPackageReference section */ - AA00BB012F6500010039DA55 /* XCRemoteSwiftPackageReference "Sparkle" */ = { - isa = XCRemoteSwiftPackageReference; - repositoryURL = "https://github.com/sparkle-project/Sparkle"; - requirement = { - kind = upToNextMajorVersion; - minimumVersion = 2.9.0; - }; - }; /* End XCRemoteSwiftPackageReference section */ /* Begin XCSwiftPackageProductDependency section */ - AA00BB022F6500020039DA55 /* Sparkle */ = { - isa = XCSwiftPackageProductDependency; - package = AA00BB012F6500010039DA55 /* XCRemoteSwiftPackageReference "Sparkle" */; - productName = Sparkle; - }; /* End XCSwiftPackageProductDependency section */ }; rootObject = 28F22CB72F56440300A0FC59 /* Project object */; diff --git a/leanring-buddy.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/leanring-buddy.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved index e2eb51c3..9b7eda40 100644 --- a/leanring-buddy.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved +++ b/leanring-buddy.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -1,15 +1,6 @@ { "originHash" : "3c6fb67fefedcfcd00708e24ca8088151f21dccfc0ade32ea80c406646277e89", "pins" : [ - { - "identity" : "sparkle", - "kind" : "remoteSourceControl", - "location" : "https://github.com/sparkle-project/Sparkle", - "state" : { - "revision" : "21d8df80440b1ca3b65fa82e40782f1e5a9e6ba2", - "version" : "2.9.0" - } - } ], "version" : 3 } diff --git a/leanring-buddy/BuddyTranscriptionProvider.swift b/leanring-buddy/BuddyTranscriptionProvider.swift index 0a75715d..dc1977d4 100644 --- a/leanring-buddy/BuddyTranscriptionProvider.swift +++ b/leanring-buddy/BuddyTranscriptionProvider.swift @@ -32,7 +32,6 @@ protocol BuddyTranscriptionProvider { enum BuddyTranscriptionProviderFactory { private enum PreferredProvider: String { case assemblyAI = "assemblyai" - case openAI = "openai" case appleSpeech = "apple" } @@ -49,7 +48,6 @@ enum BuddyTranscriptionProviderFactory { let preferredProvider = preferredProviderRawValue.flatMap(PreferredProvider.init(rawValue:)) let assemblyAIProvider = AssemblyAIStreamingTranscriptionProvider() - let openAIProvider = OpenAIAudioTranscriptionProvider() if preferredProvider == .appleSpeech { return AppleSpeechTranscriptionProvider() @@ -60,30 +58,7 @@ enum BuddyTranscriptionProviderFactory { return assemblyAIProvider } - print("⚠️ Transcription: AssemblyAI preferred but not configured, falling back") - - if openAIProvider.isConfigured { - print("⚠️ Transcription: using OpenAI as fallback") - return openAIProvider - } - - print("⚠️ Transcription: using Apple Speech as fallback") - return AppleSpeechTranscriptionProvider() - } - - if preferredProvider == .openAI { - if openAIProvider.isConfigured { - return openAIProvider - } - - print("⚠️ Transcription: OpenAI preferred but not configured, falling back") - - if assemblyAIProvider.isConfigured { - print("⚠️ Transcription: using AssemblyAI as fallback") - return assemblyAIProvider - } - - print("⚠️ Transcription: using Apple Speech as fallback") + print("⚠️ Transcription: AssemblyAI preferred but not configured, falling back to Apple Speech") return AppleSpeechTranscriptionProvider() } @@ -91,10 +66,6 @@ enum BuddyTranscriptionProviderFactory { return assemblyAIProvider } - if openAIProvider.isConfigured { - return openAIProvider - } - return AppleSpeechTranscriptionProvider() } } diff --git a/leanring-buddy/CompanionManager.swift b/leanring-buddy/CompanionManager.swift index 63030f89..37d18b84 100644 --- a/leanring-buddy/CompanionManager.swift +++ b/leanring-buddy/CompanionManager.swift @@ -145,20 +145,6 @@ final class CompanionManager: ObservableObject { set { UserDefaults.standard.set(newValue, forKey: "hasCompletedOnboarding") } } - /// Whether the user has submitted their email during onboarding. - @Published var hasSubmittedEmail: Bool = UserDefaults.standard.bool(forKey: "hasSubmittedEmail") - - /// Records that the user has dismissed the email prompt. The email itself - /// is never sent anywhere — analytics and the FormSpark submission were - /// removed in this fork. - func submitEmail(_ email: String) { - let trimmedEmail = email.trimmingCharacters(in: .whitespacesAndNewlines) - guard !trimmedEmail.isEmpty else { return } - - hasSubmittedEmail = true - UserDefaults.standard.set(true, forKey: "hasSubmittedEmail") - } - func start() { refreshAllPermissions() print("🔑 Clicky start — accessibility: \(hasAccessibilityPermission), screen: \(hasScreenRecordingPermission), mic: \(hasMicrophonePermission), screenContent: \(hasScreenContentPermission), onboarded: \(hasCompletedOnboarding)") diff --git a/leanring-buddy/CompanionPanelView.swift b/leanring-buddy/CompanionPanelView.swift index 76789b4c..851e16a0 100644 --- a/leanring-buddy/CompanionPanelView.swift +++ b/leanring-buddy/CompanionPanelView.swift @@ -12,7 +12,6 @@ import SwiftUI struct CompanionPanelView: View { @ObservedObject var companionManager: CompanionManager - @State private var emailInput: String = "" var body: some View { VStack(alignment: .leading, spacing: 0) { @@ -131,16 +130,6 @@ struct CompanionPanelView: View { .font(.system(size: 12, weight: .medium)) .foregroundColor(DS.Colors.textSecondary) .frame(maxWidth: .infinity, alignment: .leading) - } else if companionManager.allPermissionsGranted && !companionManager.hasSubmittedEmail { - VStack(alignment: .leading, spacing: 4) { - Text("Drop your email to get started.") - .font(.system(size: 12, weight: .medium)) - .foregroundColor(DS.Colors.textSecondary) - Text("If I keep building this, I'll keep you in the loop.") - .font(.system(size: 11)) - .foregroundColor(DS.Colors.textTertiary) - } - .frame(maxWidth: .infinity, alignment: .leading) } else if companionManager.allPermissionsGranted { Text("You're all set. Hit Start to meet Clicky.") .font(.system(size: 12, weight: .medium)) @@ -184,59 +173,21 @@ struct CompanionPanelView: View { @ViewBuilder private var startButton: some View { if !companionManager.hasCompletedOnboarding && companionManager.allPermissionsGranted { - if !companionManager.hasSubmittedEmail { - VStack(spacing: 8) { - TextField("Enter your email", text: $emailInput) - .textFieldStyle(.plain) - .font(.system(size: 13)) - .foregroundColor(DS.Colors.textPrimary) - .padding(.horizontal, 12) - .padding(.vertical, 8) - .background( - RoundedRectangle(cornerRadius: DS.CornerRadius.medium, style: .continuous) - .fill(Color.white.opacity(0.08)) - ) - .overlay( - RoundedRectangle(cornerRadius: DS.CornerRadius.medium, style: .continuous) - .stroke(DS.Colors.borderSubtle, lineWidth: 0.5) - ) - - Button(action: { - companionManager.submitEmail(emailInput) - }) { - Text("Submit") - .font(.system(size: 14, weight: .semibold)) - .foregroundColor(DS.Colors.textOnAccent) - .frame(maxWidth: .infinity) - .padding(.vertical, 10) - .background( - RoundedRectangle(cornerRadius: DS.CornerRadius.large, style: .continuous) - .fill(emailInput.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty - ? DS.Colors.accent.opacity(0.4) - : DS.Colors.accent) - ) - } - .buttonStyle(.plain) - .pointerCursor() - .disabled(emailInput.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty) - } - } else { - Button(action: { - companionManager.triggerOnboarding() - }) { - Text("Start") - .font(.system(size: 14, weight: .semibold)) - .foregroundColor(DS.Colors.textOnAccent) - .frame(maxWidth: .infinity) - .padding(.vertical, 10) - .background( - RoundedRectangle(cornerRadius: DS.CornerRadius.large, style: .continuous) - .fill(DS.Colors.accent) - ) - } - .buttonStyle(.plain) - .pointerCursor() + Button(action: { + companionManager.triggerOnboarding() + }) { + Text("Start") + .font(.system(size: 14, weight: .semibold)) + .foregroundColor(DS.Colors.textOnAccent) + .frame(maxWidth: .infinity) + .padding(.vertical, 10) + .background( + RoundedRectangle(cornerRadius: DS.CornerRadius.large, style: .continuous) + .fill(DS.Colors.accent) + ) } + .buttonStyle(.plain) + .pointerCursor() } } diff --git a/leanring-buddy/ElementLocationDetector.swift b/leanring-buddy/ElementLocationDetector.swift deleted file mode 100644 index 47072b11..00000000 --- a/leanring-buddy/ElementLocationDetector.swift +++ /dev/null @@ -1,335 +0,0 @@ -// -// ElementLocationDetector.swift -// leanring-buddy -// -// Uses Claude's Computer Use API to identify the screen location of UI elements -// in screenshots. When a user asks about a visible element (e.g., "click the -// blue button"), this detects the element's coordinates so the buddy can -// animate to it and point at it. -// - -import AppKit -import Foundation - -/// Detects the screen location of UI elements in screenshots using Claude's Computer Use API. -/// The Computer Use tool definition activates Claude's specialized pixel-counting training, -/// which is significantly more accurate than regular vision API coordinate extraction. -/// -/// **Aspect ratio matching**: Instead of always resizing to 1024x768 (4:3), we pick the -/// Anthropic-recommended resolution closest to the display's actual aspect ratio. Most -/// Macs are 16:10 → 1280x800. This avoids distorting the image Claude sees, which -/// significantly improves X-axis coordinate accuracy. -class ElementLocationDetector { - private let apiKey: String - private let apiURL: URL - private let model: String - private let session: URLSession - - /// Anthropic-recommended resolutions for Computer Use, paired with their aspect ratios. - /// We pick the one closest to the actual display aspect ratio to avoid distortion. - /// Higher resolutions get downsampled by the API and degrade precision, so these - /// are intentionally small. - private static let supportedComputerUseResolutions: [(width: Int, height: Int, aspectRatio: Double)] = [ - (1024, 768, 1024.0 / 768.0), // 4:3 = 1.333 (legacy displays) - (1280, 800, 1280.0 / 800.0), // 16:10 = 1.600 (MacBook Air, MacBook Pro, most Macs) - (1366, 768, 1366.0 / 768.0) // ~16:9 = 1.779 (external monitors, ultrawide fallback) - ] - - init(apiKey: String, model: String = "claude-sonnet-4-6") { - self.apiKey = apiKey - self.apiURL = URL(string: "https://api.anthropic.com/v1/messages")! - self.model = model - - let config = URLSessionConfiguration.default - config.timeoutIntervalForRequest = 15 - config.timeoutIntervalForResource = 20 - config.waitsForConnectivity = false - config.urlCache = nil - config.httpCookieStorage = nil - self.session = URLSession(configuration: config) - } - - /// Detects the screen location of a UI element the user is asking about. - /// - /// - Parameters: - /// - screenshotData: JPEG or PNG screenshot data from ScreenCaptureKit - /// - userQuestion: The user's voice transcript (e.g., "How do I add a project?") - /// - displayWidthInPoints: The captured display's width in screen points - /// - displayHeightInPoints: The captured display's height in screen points - /// - /// - Returns: A `CGPoint` in display-local macOS coordinates (bottom-left origin) if an - /// element was identified, or `nil` if no element was found or detection failed. - func detectElementLocation( - screenshotData: Data, - userQuestion: String, - displayWidthInPoints: Int, - displayHeightInPoints: Int - ) async -> CGPoint? { - // Pick the Computer Use resolution that best matches this display's aspect ratio. - // This avoids stretching the screenshot (e.g., squishing a 16:10 Mac display - // into 4:3), which would distort the image Claude sees and degrade X-axis accuracy. - let computerUseResolution = bestComputerUseResolution( - forDisplayWidth: displayWidthInPoints, - displayHeight: displayHeightInPoints - ) - - print("🎯 ElementLocationDetector: display is \(displayWidthInPoints)x\(displayHeightInPoints) " + - "(ratio \(String(format: "%.3f", Double(displayWidthInPoints) / Double(displayHeightInPoints)))), " + - "using Computer Use resolution \(computerUseResolution.width)x\(computerUseResolution.height)") - - // Resize the screenshot to the chosen Computer Use resolution - guard let resizedScreenshotData = resizeScreenshotForComputerUse( - originalImageData: screenshotData, - targetWidth: computerUseResolution.width, - targetHeight: computerUseResolution.height - ) else { - print("⚠️ ElementLocationDetector: failed to resize screenshot") - return nil - } - - // Make the Computer Use API call with the matching resolution declared - guard let computerUseCoordinate = await callComputerUseAPI( - resizedScreenshotData: resizedScreenshotData, - userQuestion: userQuestion, - declaredDisplayWidth: computerUseResolution.width, - declaredDisplayHeight: computerUseResolution.height - ) else { - return nil - } - - // Clamp coordinates to the valid range — Claude occasionally returns - // values slightly outside the declared display dimensions, which would - // map to off-screen positions after scaling. - let clampedX = max(0, min(computerUseCoordinate.x, CGFloat(computerUseResolution.width))) - let clampedY = max(0, min(computerUseCoordinate.y, CGFloat(computerUseResolution.height))) - - // Scale coordinates from the Computer Use resolution back to actual display point dimensions - let scaledX = (clampedX / CGFloat(computerUseResolution.width)) * CGFloat(displayWidthInPoints) - let scaledYTopLeftOrigin = (clampedY / CGFloat(computerUseResolution.height)) * CGFloat(displayHeightInPoints) - - // Convert from top-left origin (Computer Use / CoreGraphics) to bottom-left origin (AppKit) - let scaledYBottomLeftOrigin = CGFloat(displayHeightInPoints) - scaledYTopLeftOrigin - - print("🎯 ElementLocationDetector: mapped (\(Int(clampedX)), \(Int(clampedY))) in " + - "\(computerUseResolution.width)x\(computerUseResolution.height) → " + - "(\(Int(scaledX)), \(Int(scaledYBottomLeftOrigin))) in " + - "\(displayWidthInPoints)x\(displayHeightInPoints) display-local AppKit coords") - - return CGPoint(x: scaledX, y: scaledYBottomLeftOrigin) - } - - // MARK: - Private Helpers - - /// Picks the Anthropic-recommended Computer Use resolution whose aspect ratio - /// is closest to the actual display, minimizing image distortion. - private func bestComputerUseResolution( - forDisplayWidth displayWidth: Int, - displayHeight: Int - ) -> (width: Int, height: Int) { - let displayAspectRatio = Double(displayWidth) / Double(max(1, displayHeight)) - - var bestWidth = 1280 - var bestHeight = 800 - var smallestAspectRatioDifference = Double.greatestFiniteMagnitude - - for resolution in Self.supportedComputerUseResolutions { - let difference = abs(displayAspectRatio - resolution.aspectRatio) - if difference < smallestAspectRatioDifference { - smallestAspectRatioDifference = difference - bestWidth = resolution.width - bestHeight = resolution.height - } - } - - return (width: bestWidth, height: bestHeight) - } - - /// Calls the Claude Computer Use API with a resized screenshot and user question. - /// Returns the raw coordinate from Claude's response in the declared resolution space, or nil. - private func callComputerUseAPI( - resizedScreenshotData: Data, - userQuestion: String, - declaredDisplayWidth: Int, - declaredDisplayHeight: Int - ) async -> CGPoint? { - var request = URLRequest(url: apiURL) - request.httpMethod = "POST" - request.timeoutInterval = 15 - request.setValue(apiKey, forHTTPHeaderField: "x-api-key") - request.setValue("2023-06-01", forHTTPHeaderField: "anthropic-version") - request.setValue("application/json", forHTTPHeaderField: "Content-Type") - // The beta header activates Computer Use capabilities and the specialized - // pixel-counting training that makes coordinate detection accurate. - request.setValue("computer-use-2025-11-24", forHTTPHeaderField: "anthropic-beta") - - // Detect image media type (PNG vs JPEG) - let mediaType = detectImageMediaType(for: resizedScreenshotData) - let base64Screenshot = resizedScreenshotData.base64EncodedString() - - let userPrompt = """ - The user asked this question while looking at their screen: "\(userQuestion)" - - Look at the screenshot. If there is a specific UI element (button, link, menu item, text field, icon, etc.) that the user should interact with or is asking about, click on that element. - - If the question is purely conceptual (e.g., "what does HTML mean?") and there's no specific element to point to, just respond with text saying "no specific element". - """ - - let body: [String: Any] = [ - "model": model, - "max_tokens": 256, - "tools": [ - [ - "type": "computer_20251124", - "name": "computer", - "display_width_px": declaredDisplayWidth, - "display_height_px": declaredDisplayHeight - ] - ], - "messages": [ - [ - "role": "user", - "content": [ - [ - "type": "image", - "source": [ - "type": "base64", - "media_type": mediaType, - "data": base64Screenshot - ] - ], - [ - "type": "text", - "text": userPrompt - ] - ] - ] - ] - ] - - do { - let bodyData = try JSONSerialization.data(withJSONObject: body) - request.httpBody = bodyData - - let payloadMB = Double(bodyData.count) / 1_048_576.0 - print("🎯 ElementLocationDetector: sending \(String(format: "%.1f", payloadMB))MB request " + - "(declared \(declaredDisplayWidth)x\(declaredDisplayHeight))") - - let (data, response) = try await session.data(for: request) - - guard let httpResponse = response as? HTTPURLResponse, - (200...299).contains(httpResponse.statusCode) else { - let statusCode = (response as? HTTPURLResponse)?.statusCode ?? -1 - let errorBody = String(data: data, encoding: .utf8) ?? "unknown" - print("⚠️ ElementLocationDetector: API error \(statusCode): \(errorBody.prefix(200))") - return nil - } - - return parseCoordinateFromResponse(data: data) - - } catch { - print("⚠️ ElementLocationDetector: request failed: \(error.localizedDescription)") - return nil - } - } - - /// Parses the Computer Use API response to extract click coordinates. - /// Claude returns a `tool_use` content block with `{"action": "left_click", "coordinate": [x, y]}`. - /// If Claude returns text instead (no element found), returns nil. - private func parseCoordinateFromResponse(data: Data) -> CGPoint? { - guard let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any], - let contentBlocks = json["content"] as? [[String: Any]] else { - print("⚠️ ElementLocationDetector: could not parse response JSON") - return nil - } - - // Look for a tool_use content block (Claude's Computer Use response format) - for block in contentBlocks { - guard let blockType = block["type"] as? String, - blockType == "tool_use", - let input = block["input"] as? [String: Any], - let coordinate = input["coordinate"] as? [NSNumber], - coordinate.count == 2 else { - continue - } - - let x = CGFloat(coordinate[0].doubleValue) - let y = CGFloat(coordinate[1].doubleValue) - print("🎯 ElementLocationDetector: raw coordinate (\(Int(x)), \(Int(y)))") - return CGPoint(x: x, y: y) - } - - // No tool_use block found — Claude responded with text (no element to point at) - print("🎯 ElementLocationDetector: no specific element detected (conceptual question)") - return nil - } - - /// Resizes screenshot data to the specified Computer Use resolution. - /// The target resolution should match the display's aspect ratio to avoid - /// distortion that degrades coordinate accuracy. - /// - /// **Critical Retina fix**: Uses `NSBitmapImageRep` directly instead of - /// `NSImage.lockFocus()`. On Retina displays (2x backing scale), lockFocus - /// creates a bitmap at 2× the declared size (e.g., 2560×1600 for a 1280×800 - /// NSImage). This means the JPEG sent to Claude would be 2× larger than the - /// resolution declared in the Computer Use tool definition, causing Claude's - /// pixel-counting to return coordinates in the wrong scale. - private func resizeScreenshotForComputerUse( - originalImageData: Data, - targetWidth: Int, - targetHeight: Int - ) -> Data? { - guard let originalImage = NSImage(data: originalImageData) else { return nil } - - // Create a bitmap representation with exact pixel dimensions. - // This bypasses NSImage's Retina-aware coordinate system which would - // otherwise double the actual pixel count on 2x displays. - guard let bitmapRep = NSBitmapImageRep( - bitmapDataPlanes: nil, - pixelsWide: targetWidth, - pixelsHigh: targetHeight, - bitsPerSample: 8, - samplesPerPixel: 4, - hasAlpha: true, - isPlanar: false, - colorSpaceName: .deviceRGB, - bytesPerRow: 0, - bitsPerPixel: 0 - ) else { - return nil - } - - // Set the point size to match pixel dimensions (1:1, no Retina scaling). - bitmapRep.size = NSSize(width: targetWidth, height: targetHeight) - - // Draw the original image into the exact-pixel-dimension bitmap - NSGraphicsContext.saveGraphicsState() - let graphicsContext = NSGraphicsContext(bitmapImageRep: bitmapRep) - NSGraphicsContext.current = graphicsContext - graphicsContext?.imageInterpolation = .high - originalImage.draw( - in: NSRect(x: 0, y: 0, width: targetWidth, height: targetHeight), - from: NSRect(origin: .zero, size: originalImage.size), - operation: .copy, - fraction: 1.0 - ) - NSGraphicsContext.restoreGraphicsState() - - guard let jpegData = bitmapRep.representation(using: .jpeg, properties: [.compressionFactor: 0.85]) else { - return nil - } - - return jpegData - } - - /// Detects MIME type by inspecting the first bytes of image data. - private func detectImageMediaType(for imageData: Data) -> String { - if imageData.count >= 4 { - let pngSignature: [UInt8] = [0x89, 0x50, 0x4E, 0x47] - let firstFourBytes = [UInt8](imageData.prefix(4)) - if firstFourBytes == pngSignature { - return "image/png" - } - } - return "image/jpeg" - } -} diff --git a/leanring-buddy/OpenAIAPI.swift b/leanring-buddy/OpenAIAPI.swift deleted file mode 100644 index d0c3f2ae..00000000 --- a/leanring-buddy/OpenAIAPI.swift +++ /dev/null @@ -1,142 +0,0 @@ -// -// OpenAIAPI.swift -// OpenAI API Implementation -// - -import Foundation - -/// OpenAI API helper for vision analysis -class OpenAIAPI { - private let apiKey: String - private let apiURL: URL - private let model: String - private let session: URLSession - - init(apiKey: String, model: String = "gpt-5.2-2025-12-11") { - self.apiKey = apiKey - self.apiURL = URL(string: "https://api.openai.com/v1/chat/completions")! - self.model = model - - // Use .default instead of .ephemeral so TLS session tickets are cached. - // Ephemeral sessions do a full TLS handshake on every request, which causes - // transient -1200 (errSSLPeerHandshakeFail) errors with large image payloads. - // Disable URL/cookie caching to avoid storing responses or credentials on disk. - let config = URLSessionConfiguration.default - config.timeoutIntervalForRequest = 120 - config.timeoutIntervalForResource = 300 - config.waitsForConnectivity = true - config.urlCache = nil - config.httpCookieStorage = nil - self.session = URLSession(configuration: config) - - // Fire a lightweight HEAD request in the background to pre-establish the TLS - // connection. This caches the TLS session ticket so the first real API call - // (which carries a large image payload) doesn't need a cold TLS handshake. - warmUpTLSConnection() - } - - /// Sends a no-op HEAD request to the API host to establish and cache a TLS session. - /// Failures are silently ignored — this is purely an optimization. - private func warmUpTLSConnection() { - var warmupRequest = URLRequest(url: apiURL) - warmupRequest.httpMethod = "HEAD" - warmupRequest.timeoutInterval = 10 - session.dataTask(with: warmupRequest) { _, _, _ in - // Response doesn't matter — the TLS handshake is the goal - }.resume() - } - - /// Send a vision request to OpenAI with one or more labeled images. - func analyzeImage( - images: [(data: Data, label: String)], - systemPrompt: String, - conversationHistory: [(userPlaceholder: String, assistantResponse: String)] = [], - userPrompt: String - ) async throws -> (text: String, duration: TimeInterval) { - let startTime = Date() - - // Build request - var request = URLRequest(url: apiURL) - request.httpMethod = "POST" - request.timeoutInterval = 120 - request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization") - request.setValue("application/json", forHTTPHeaderField: "Content-Type") - - // Build messages array - var messages: [[String: Any]] = [] - - // Add system message first - messages.append([ - "role": "system", - "content": systemPrompt - ]) - - // Add conversation history - for (userPlaceholder, assistantResponse) in conversationHistory { - messages.append(["role": "user", "content": userPlaceholder]) - messages.append(["role": "assistant", "content": assistantResponse]) - } - - // Build current message with all labeled images + prompt - var contentBlocks: [[String: Any]] = [] - for image in images { - contentBlocks.append([ - "type": "text", - "text": image.label - ]) - contentBlocks.append([ - "type": "image_url", - "image_url": [ - "url": "data:image/jpeg;base64,\(image.data.base64EncodedString())" - ] - ]) - } - contentBlocks.append([ - "type": "text", - "text": userPrompt - ]) - messages.append(["role": "user", "content": contentBlocks]) - - // Build request body - let body: [String: Any] = [ - "model": model, - // `max_tokens` is deprecated/incompatible for some newer OpenAI models. - "max_completion_tokens": 600, - "messages": messages - ] - - let bodyData = try JSONSerialization.data(withJSONObject: body) - request.httpBody = bodyData - let payloadMB = Double(bodyData.count) / 1_048_576.0 - print("🌐 OpenAI request: \(String(format: "%.1f", payloadMB))MB, \(images.count) image(s)") - - // Send request - let (data, response) = try await session.data(for: request) - - guard let httpResponse = response as? HTTPURLResponse, - (200...299).contains(httpResponse.statusCode) else { - let responseString = String(data: data, encoding: .utf8) ?? "Unknown error" - throw NSError( - domain: "OpenAIAPI", - code: (response as? HTTPURLResponse)?.statusCode ?? -1, - userInfo: [NSLocalizedDescriptionKey: "API Error: \(responseString)"] - ) - } - - // Parse response - let json = try JSONSerialization.jsonObject(with: data) as? [String: Any] - guard let choices = json?["choices"] as? [[String: Any]], - let firstChoice = choices.first, - let message = firstChoice["message"] as? [String: Any], - let text = message["content"] as? String else { - throw NSError( - domain: "OpenAIAPI", - code: -1, - userInfo: [NSLocalizedDescriptionKey: "Invalid response format"] - ) - } - - let duration = Date().timeIntervalSince(startTime) - return (text: text, duration: duration) - } -} diff --git a/leanring-buddy/OpenAIAudioTranscriptionProvider.swift b/leanring-buddy/OpenAIAudioTranscriptionProvider.swift deleted file mode 100644 index 75092092..00000000 --- a/leanring-buddy/OpenAIAudioTranscriptionProvider.swift +++ /dev/null @@ -1,317 +0,0 @@ -// -// OpenAIAudioTranscriptionProvider.swift -// leanring-buddy -// -// AI transcription provider backed by OpenAI's audio transcription API. -// - -import AVFoundation -import Foundation - -struct OpenAIAudioTranscriptionProviderError: LocalizedError { - let message: String - - var errorDescription: String? { - message - } -} - -final class OpenAIAudioTranscriptionProvider: BuddyTranscriptionProvider { - private let apiKey = AppBundleConfiguration.stringValue(forKey: "OpenAIAPIKey") - private let modelName = AppBundleConfiguration.stringValue(forKey: "OpenAITranscriptionModel") - ?? "gpt-4o-transcribe" - - let displayName = "OpenAI" - let requiresSpeechRecognitionPermission = false - - var isConfigured: Bool { - apiKey != nil - } - - var unavailableExplanation: String? { - guard !isConfigured else { return nil } - return "OpenAI transcription is not configured. Add OpenAIAPIKey to Info.plist." - } - - func startStreamingSession( - keyterms: [String], - onTranscriptUpdate: @escaping (String) -> Void, - onFinalTranscriptReady: @escaping (String) -> Void, - onError: @escaping (Error) -> Void - ) async throws -> any BuddyStreamingTranscriptionSession { - guard let apiKey else { - throw OpenAIAudioTranscriptionProviderError( - message: unavailableExplanation ?? "OpenAI transcription is not configured." - ) - } - - return OpenAIAudioTranscriptionSession( - apiKey: apiKey, - modelName: modelName, - keyterms: keyterms, - onTranscriptUpdate: onTranscriptUpdate, - onFinalTranscriptReady: onFinalTranscriptReady, - onError: onError - ) - } -} - -private final class OpenAIAudioTranscriptionSession: BuddyStreamingTranscriptionSession { - let finalTranscriptFallbackDelaySeconds: TimeInterval = 8.0 - - private struct TranscriptionResponse: Decodable { - let text: String - } - - private static let transcriptionURL = URL(string: "https://api.openai.com/v1/audio/transcriptions")! - private static let targetSampleRate = 16_000 - - private let apiKey: String - private let modelName: String - private let keyterms: [String] - private let onTranscriptUpdate: (String) -> Void - private let onFinalTranscriptReady: (String) -> Void - private let onError: (Error) -> Void - - private let stateQueue = DispatchQueue(label: "com.learningbuddy.openai.transcription") - private let audioPCM16Converter = BuddyPCM16AudioConverter( - targetSampleRate: Double(targetSampleRate) - ) - private let urlSession: URLSession - - private var bufferedPCM16AudioData = Data() - private var hasRequestedFinalTranscript = false - private var hasDeliveredFinalTranscript = false - private var isCancelled = false - private var transcriptionUploadTask: Task? - - init( - apiKey: String, - modelName: String, - keyterms: [String], - onTranscriptUpdate: @escaping (String) -> Void, - onFinalTranscriptReady: @escaping (String) -> Void, - onError: @escaping (Error) -> Void - ) { - self.apiKey = apiKey - self.modelName = modelName - self.keyterms = keyterms - self.onTranscriptUpdate = onTranscriptUpdate - self.onFinalTranscriptReady = onFinalTranscriptReady - self.onError = onError - - let urlSessionConfiguration = URLSessionConfiguration.default - urlSessionConfiguration.timeoutIntervalForRequest = 45 - urlSessionConfiguration.timeoutIntervalForResource = 90 - urlSessionConfiguration.waitsForConnectivity = true - self.urlSession = URLSession(configuration: urlSessionConfiguration) - } - - func appendAudioBuffer(_ audioBuffer: AVAudioPCMBuffer) { - guard let audioPCM16Data = audioPCM16Converter.convertToPCM16Data(from: audioBuffer), - !audioPCM16Data.isEmpty else { - return - } - - stateQueue.async { - guard !self.hasRequestedFinalTranscript, !self.isCancelled else { return } - self.bufferedPCM16AudioData.append(audioPCM16Data) - } - } - - func requestFinalTranscript() { - stateQueue.async { - guard !self.hasRequestedFinalTranscript, !self.isCancelled else { return } - self.hasRequestedFinalTranscript = true - - let bufferedPCM16AudioData = self.bufferedPCM16AudioData - self.transcriptionUploadTask = Task { [weak self] in - await self?.transcribeBufferedAudio(bufferedPCM16AudioData) - } - } - } - - func cancel() { - stateQueue.async { - self.isCancelled = true - self.bufferedPCM16AudioData.removeAll(keepingCapacity: false) - } - - transcriptionUploadTask?.cancel() - urlSession.invalidateAndCancel() - } - - private func transcribeBufferedAudio(_ bufferedPCM16AudioData: Data) async { - guard !Task.isCancelled else { return } - - let trimmedAudioDataIsEmpty = stateQueue.sync { - isCancelled || bufferedPCM16AudioData.isEmpty - } - - if trimmedAudioDataIsEmpty { - deliverFinalTranscript("") - return - } - - let wavAudioData = BuddyWAVFileBuilder.buildWAVData( - fromPCM16MonoAudio: bufferedPCM16AudioData, - sampleRate: Self.targetSampleRate - ) - - do { - let transcriptText = try await requestTranscription(for: wavAudioData) - guard !stateQueue.sync(execute: { isCancelled }) else { return } - - if !transcriptText.isEmpty { - onTranscriptUpdate(transcriptText) - } - - deliverFinalTranscript(transcriptText) - } catch { - guard !stateQueue.sync(execute: { isCancelled }) else { return } - print("[OpenAI Transcription] ❌ Upload failed (audio size: \(wavAudioData.count) bytes): \(error.localizedDescription)") - onError(error) - } - } - - private func requestTranscription(for wavAudioData: Data) async throws -> String { - let multipartBoundary = "Boundary-\(UUID().uuidString)" - var request = URLRequest(url: Self.transcriptionURL) - request.httpMethod = "POST" - request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization") - request.setValue("multipart/form-data; boundary=\(multipartBoundary)", forHTTPHeaderField: "Content-Type") - - let requestBodyData = makeMultipartRequestBody( - boundary: multipartBoundary, - wavAudioData: wavAudioData - ) - request.httpBody = requestBodyData - - let (responseData, response) = try await urlSession.data(for: request) - - guard let httpResponse = response as? HTTPURLResponse else { - throw OpenAIAudioTranscriptionProviderError( - message: "OpenAI transcription returned an invalid response." - ) - } - - guard (200...299).contains(httpResponse.statusCode) else { - let responseText = String(data: responseData, encoding: .utf8) ?? "Unknown error" - throw OpenAIAudioTranscriptionProviderError( - message: "OpenAI transcription failed: \(responseText)" - ) - } - - if let transcriptionResponse = try? JSONDecoder().decode( - TranscriptionResponse.self, - from: responseData - ) { - return transcriptionResponse.text.trimmingCharacters(in: .whitespacesAndNewlines) - } - - let responseText = String(data: responseData, encoding: .utf8)? - .trimmingCharacters(in: .whitespacesAndNewlines) ?? "" - - if !responseText.isEmpty { - return responseText - } - - throw OpenAIAudioTranscriptionProviderError( - message: "OpenAI transcription returned an empty transcript." - ) - } - - private func makeMultipartRequestBody( - boundary: String, - wavAudioData: Data - ) -> Data { - var requestBodyData = Data() - - requestBodyData.appendMultipartFormField( - named: "model", - value: modelName, - usingBoundary: boundary - ) - requestBodyData.appendMultipartFormField( - named: "language", - value: "en", - usingBoundary: boundary - ) - requestBodyData.appendMultipartFormField( - named: "response_format", - value: "json", - usingBoundary: boundary - ) - - if let contextualPrompt = transcriptionPromptText() { - requestBodyData.appendMultipartFormField( - named: "prompt", - value: contextualPrompt, - usingBoundary: boundary - ) - } - - requestBodyData.appendMultipartFileField( - named: "file", - filename: "voice-input.wav", - mimeType: "audio/wav", - fileData: wavAudioData, - usingBoundary: boundary - ) - requestBodyData.appendString("--\(boundary)--\r\n") - - return requestBodyData - } - - private func transcriptionPromptText() -> String? { - let normalizedKeyterms = keyterms - .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) } - .filter { !$0.isEmpty } - - guard !normalizedKeyterms.isEmpty else { return nil } - - return """ - This is a short push-to-talk transcript for a coding and product app. Expect product names, technical terms, and app-specific vocabulary such as: \(normalizedKeyterms.joined(separator: ", ")). - """ - } - - private func deliverFinalTranscript(_ transcriptText: String) { - guard !hasDeliveredFinalTranscript else { return } - hasDeliveredFinalTranscript = true - onFinalTranscriptReady(transcriptText) - } - - deinit { - cancel() - } -} - -private extension Data { - mutating func appendString(_ string: String) { - append(string.data(using: .utf8)!) - } - - mutating func appendMultipartFormField( - named fieldName: String, - value: String, - usingBoundary boundary: String - ) { - appendString("--\(boundary)\r\n") - appendString("Content-Disposition: form-data; name=\"\(fieldName)\"\r\n\r\n") - appendString("\(value)\r\n") - } - - mutating func appendMultipartFileField( - named fieldName: String, - filename: String, - mimeType: String, - fileData: Data, - usingBoundary boundary: String - ) { - appendString("--\(boundary)\r\n") - appendString("Content-Disposition: form-data; name=\"\(fieldName)\"; filename=\"\(filename)\"\r\n") - appendString("Content-Type: \(mimeType)\r\n\r\n") - append(fileData) - appendString("\r\n") - } -} diff --git a/leanring-buddy/leanring_buddyApp.swift b/leanring-buddy/leanring_buddyApp.swift index b004a896..53d62b9e 100644 --- a/leanring-buddy/leanring_buddyApp.swift +++ b/leanring-buddy/leanring_buddyApp.swift @@ -7,9 +7,7 @@ // opens a floating panel with companion voice controls. // -import ServiceManagement import SwiftUI -import Sparkle @main struct leanring_buddyApp: App { @@ -31,7 +29,6 @@ struct leanring_buddyApp: App { final class CompanionAppDelegate: NSObject, NSApplicationDelegate { private var menuBarPanelManager: MenuBarPanelManager? private let companionManager = CompanionManager() - private var sparkleUpdaterController: SPUStandardUpdaterController? func applicationDidFinishLaunching(_ notification: Notification) { print("🎯 Clicky: Starting...") @@ -39,9 +36,6 @@ final class CompanionAppDelegate: NSObject, NSApplicationDelegate { UserDefaults.standard.register(defaults: ["NSInitialToolTipDelay": 0]) - ClickyAnalytics.configure() - ClickyAnalytics.trackAppOpened() - menuBarPanelManager = MenuBarPanelManager(companionManager: companionManager) companionManager.start() // Auto-open the panel if the user still needs to do something: @@ -49,41 +43,9 @@ final class CompanionAppDelegate: NSObject, NSApplicationDelegate { if !companionManager.hasCompletedOnboarding || !companionManager.allPermissionsGranted { menuBarPanelManager?.showPanelOnLaunch() } - registerAsLoginItemIfNeeded() - // startSparkleUpdater() } func applicationWillTerminate(_ notification: Notification) { companionManager.stop() } - - /// Registers the app as a login item so it launches automatically on - /// startup. Uses SMAppService which shows the app in System Settings > - /// General > Login Items, letting the user toggle it off if they want. - private func registerAsLoginItemIfNeeded() { - let loginItemService = SMAppService.mainApp - if loginItemService.status != .enabled { - do { - try loginItemService.register() - print("🎯 Clicky: Registered as login item") - } catch { - print("⚠️ Clicky: Failed to register as login item: \(error)") - } - } - } - - private func startSparkleUpdater() { - let updaterController = SPUStandardUpdaterController( - startingUpdater: false, - updaterDelegate: nil, - userDriverDelegate: nil - ) - self.sparkleUpdaterController = updaterController - - do { - try updaterController.updater.start() - } catch { - print("⚠️ Clicky: Sparkle updater failed to start: \(error)") - } - } } diff --git a/scripts/README.md b/scripts/README.md deleted file mode 100644 index 9b2f0b34..00000000 --- a/scripts/README.md +++ /dev/null @@ -1,62 +0,0 @@ -# Release Scripts - -## `release.sh` — Ship a new version of makesomething - -Automates the full release pipeline: build → sign → DMG → notarize → Sparkle appcast → GitHub Release. - -### Quick start - -```bash -# Auto-bumps version and build number from the latest GitHub Release -./scripts/release.sh -``` - -The script checks GitHub for the latest release (e.g. `v1.5`, build 6) and automatically bumps to `v1.6`, build 7. You'll see a confirmation prompt before anything runs. - -### Override version or build - -```bash -# Set a specific marketing version (auto-bumps build) -./scripts/release.sh 2.0 - -# Set both marketing version and build number -./scripts/release.sh 2.0 10 -``` - -### Safety - -- **Duplicate detection**: If the tag already exists on GitHub, the script exits with an error and suggests what to do. -- **Confirmation prompt**: Shows the version, build, and previous release before proceeding. Press `y` to continue. - -### What it does - -1. Fetches the latest release from GitHub to determine version + build -2. Archives the app via `xcodebuild` -3. Exports a signed `.app` with Developer ID -4. Creates a DMG with the drag-to-Applications background -5. Notarizes the DMG with Apple (Gatekeeper compliance) -6. Signs the DMG with the Sparkle EdDSA key -7. Generates `appcast.xml` for Sparkle auto-updates -8. Creates a GitHub Release with the DMG attached -9. Pushes the updated `appcast.xml` to the releases repo - -### One-time setup (prerequisites) - -1. **Xcode** with your Developer ID signing certificate -2. **Homebrew tools**: - ```bash - brew install create-dmg gh - ``` -3. **GitHub CLI auth**: - ```bash - gh auth login - ``` -4. **Apple notarization credentials** (stored in Keychain): - ```bash - xcrun notarytool store-credentials "AC_PASSWORD" \ - --apple-id YOUR_APPLE_ID \ - --team-id YOUR_TEAM_ID - ``` - You'll be prompted for an app-specific password (generate one at [appleid.apple.com](https://appleid.apple.com)). -5. **Sparkle EdDSA key** — already generated and stored in Keychain (done during initial Sparkle setup) -6. **Build the project in Xcode at least once** so SPM downloads Sparkle and the Sparkle CLI tools are available diff --git a/scripts/release.sh b/scripts/release.sh deleted file mode 100755 index da7f31da..00000000 --- a/scripts/release.sh +++ /dev/null @@ -1,276 +0,0 @@ -#!/bin/bash -set -euo pipefail - -# Add Homebrew to PATH so create-dmg and gh are available in non-interactive shells -export PATH="/opt/homebrew/bin:$PATH" - -# ============================================================================= -# release.sh — Automates the full release pipeline for makesomething -# -# What it does (in order): -# 1. Auto-detects version + build from the latest GitHub Release -# 2. Archives the app via xcodebuild -# 3. Exports a signed + notarized .app -# 4. Wraps it in a DMG with the drag-to-Applications background -# 5. Notarizes the DMG with Apple (so Gatekeeper won't block it) -# 6. Signs the DMG with your Sparkle EdDSA key -# 7. Generates/updates appcast.xml automatically -# 8. Creates a GitHub Release with the DMG attached -# 9. Pushes the updated appcast.xml to the releases repo (makesomething-mac-app) -# -# Usage: -# ./scripts/release.sh Auto-bumps: 1.5 → 1.6, build 6 → 7 -# ./scripts/release.sh 2.0 Sets marketing version to 2.0, auto-bumps build -# ./scripts/release.sh 2.0 10 Sets both marketing version and build number -# -# Prerequisites (one-time setup): -# - Xcode with your Developer ID signing certificate -# - `brew install create-dmg gh` -# - `gh auth login` (GitHub CLI authenticated) -# - Sparkle EdDSA key in your Keychain (already generated) -# - `xcrun notarytool store-credentials "AC_PASSWORD"` (Apple notarization credentials) -# ============================================================================= - -# ── Configuration ──────────────────────────────────────────────────────────── - -SCHEME="leanring-buddy" -APP_NAME="makesomething" -PROJECT_DIR="$(cd "$(dirname "$0")/.." && pwd)" -BUILD_DIR="${PROJECT_DIR}/build" -ARCHIVE_PATH="${BUILD_DIR}/${APP_NAME}.xcarchive" -EXPORT_DIR="${BUILD_DIR}/export" -DMG_OUTPUT_DIR="${BUILD_DIR}/dmg" -RELEASES_DIR="${PROJECT_DIR}/releases" # where generate_appcast reads DMGs from -DMG_BACKGROUND="${PROJECT_DIR}/dmg-background.png" - -GITHUB_REPO="julianjear/makesomething-mac-app" - -# Sparkle tools (auto-discovered from Xcode's SPM cache) -SPARKLE_BIN=$(find ~/Library/Developer/Xcode/DerivedData/leanring-buddy*/SourcePackages/artifacts/sparkle/Sparkle/bin -maxdepth 0 2>/dev/null | head -1) - -if [ -z "$SPARKLE_BIN" ]; then - echo "❌ Sparkle tools not found. Build the project in Xcode first so SPM downloads Sparkle." - exit 1 -fi - -# ── Auto-detect version from latest GitHub Release ────────────────────────── -# Fetches the latest release tag (e.g. "v1.5") and build number from GitHub. -# If no arguments are provided, bumps the minor version by 0.1 and build by 1. -# You can override either or both by passing arguments. - -echo "🔍 Checking latest release on GitHub..." - -LATEST_TAG=$(gh release view --repo "${GITHUB_REPO}" --json tagName --jq '.tagName' 2>/dev/null || echo "") - -if [ -n "$LATEST_TAG" ]; then - # Strip the "v" prefix to get the version number (e.g. "v1.5" → "1.5") - LATEST_VERSION="${LATEST_TAG#v}" - - # Get the build number from the latest release's app bundle inside the DMG. - # We download just the release metadata (not the DMG) and parse the body/notes, - # but the simplest reliable approach is to track it from the GitHub release title - # or from a known incrementing sequence. We use the GitHub API to get asset info - # and derive the build number from the release list count. - LATEST_BUILD=$(gh release list --repo "${GITHUB_REPO}" --json tagName --jq 'length' 2>/dev/null || echo "0") - - echo " Latest release: ${LATEST_TAG} (build ${LATEST_BUILD})" -else - LATEST_VERSION="0.0" - LATEST_BUILD=0 - echo " No previous releases found — starting from scratch" -fi - -# Determine the next marketing version: bump minor by 0.1 -# e.g. "1.5" → "1.6", "2.9" → "3.0" (carries over) -if [ $# -ge 1 ]; then - MARKETING_VERSION="$1" -else - MAJOR=$(echo "$LATEST_VERSION" | cut -d. -f1) - MINOR=$(echo "$LATEST_VERSION" | cut -d. -f2) - NEXT_MINOR=$((MINOR + 1)) - if [ "$NEXT_MINOR" -ge 10 ]; then - MAJOR=$((MAJOR + 1)) - NEXT_MINOR=0 - fi - MARKETING_VERSION="${MAJOR}.${NEXT_MINOR}" -fi - -# Determine the next build number: always increment by 1 -if [ $# -ge 2 ]; then - BUILD_NUMBER="$2" -else - BUILD_NUMBER=$((LATEST_BUILD + 1)) -fi - -DMG_FILENAME="${APP_NAME}.dmg" -TAG="v${MARKETING_VERSION}" - -# ── Safety checks ──────────────────────────────────────────────────────────── - -# Check if this tag already exists on GitHub to prevent accidental duplicates -if gh release view "${TAG}" --repo "${GITHUB_REPO}" &>/dev/null; then - echo "" - echo "❌ Release ${TAG} already exists on GitHub!" - echo " https://github.com/${GITHUB_REPO}/releases/tag/${TAG}" - echo "" - echo " To release a new version, either:" - echo " • Run without arguments to auto-bump: ./scripts/release.sh" - echo " • Specify a higher version: ./scripts/release.sh $(echo "${MARKETING_VERSION} + 0.1" | bc)" - echo " • Delete the existing release first: gh release delete ${TAG} --repo ${GITHUB_REPO} --yes" - exit 1 -fi - -echo "" -echo "🚀 Releasing ${APP_NAME} v${MARKETING_VERSION} (build ${BUILD_NUMBER})" -echo " Previous: ${LATEST_TAG:-none}" -echo "" - -# Confirm with the user before proceeding -read -p " Proceed? (y/N) " -n 1 -r -echo "" -if [[ ! $REPLY =~ ^[Yy]$ ]]; then - echo " Aborted." - exit 0 -fi -echo "" - -# ── Step 1: Clean build directory ──────────────────────────────────────────── - -echo "🧹 Cleaning build directory and stale DMGs..." -rm -rf "${BUILD_DIR}" -# Remove any leftover temp DMGs from create-dmg (rw.*.dmg) and the previous -# same-named DMG so create-dmg and generate_appcast don't choke on duplicates. -rm -f "${RELEASES_DIR}"/rw.*.dmg "${RELEASES_DIR}/${DMG_FILENAME}" -mkdir -p "${BUILD_DIR}" "${EXPORT_DIR}" "${DMG_OUTPUT_DIR}" "${RELEASES_DIR}" - -# ── Step 2: Archive ────────────────────────────────────────────────────────── - -echo "📦 Archiving..." -xcodebuild archive \ - -scheme "${SCHEME}" \ - -archivePath "${ARCHIVE_PATH}" \ - MARKETING_VERSION="${MARKETING_VERSION}" \ - CURRENT_PROJECT_VERSION="${BUILD_NUMBER}" \ - 2>&1 | tail -5 - -echo "✅ Archive created" - -# ── Step 3: Export (signed + notarized) ────────────────────────────────────── - -# Create an export options plist for Developer ID distribution. -# This tells xcodebuild to sign with your Developer ID certificate -# and submit to Apple for notarization automatically. -EXPORT_OPTIONS="${BUILD_DIR}/ExportOptions.plist" -cat > "${EXPORT_OPTIONS}" << 'PLIST' - - - - - method - developer-id - destination - export - - -PLIST - -echo "📤 Exporting (signing + notarizing — this may take a few minutes)..." -xcodebuild -exportArchive \ - -archivePath "${ARCHIVE_PATH}" \ - -exportPath "${EXPORT_DIR}" \ - -exportOptionsPlist "${EXPORT_OPTIONS}" \ - 2>&1 | tail -5 - -echo "✅ Export complete (signed + notarized)" - -# ── Step 4: Create DMG ────────────────────────────────────────────────────── - -DMG_PATH="${RELEASES_DIR}/${DMG_FILENAME}" - -echo "💿 Creating DMG..." -create-dmg \ - --volname "${APP_NAME}" \ - --window-pos 200 120 \ - --window-size 660 400 \ - --icon-size 100 \ - --icon "${APP_NAME}.app" 160 195 \ - --app-drop-link 500 195 \ - --background "${DMG_BACKGROUND}" \ - "${DMG_PATH}" \ - "${EXPORT_DIR}/${APP_NAME}.app" \ - 2>&1 | tail -3 - -echo "✅ DMG created: ${DMG_PATH}" - -# ── Step 5: Notarize the DMG ───────────────────────────────────────────────── -# The .app inside the DMG is already signed with Developer ID, but the DMG -# itself needs to be submitted to Apple for notarization so Gatekeeper -# allows users to open it without the "Apple could not verify" warning. -# Requires stored credentials: xcrun notarytool store-credentials "AC_PASSWORD" - -echo "🔏 Notarizing DMG with Apple (this may take a few minutes)..." -xcrun notarytool submit "${DMG_PATH}" \ - --keychain-profile "AC_PASSWORD" \ - --wait - -echo "📎 Stapling notarization ticket to DMG..." -xcrun stapler staple "${DMG_PATH}" - -echo "✅ DMG notarized and stapled" - -# ── Step 6: Sign DMG with Sparkle EdDSA key ───────────────────────────────── - -echo "🔐 Signing DMG with Sparkle EdDSA key..." -"${SPARKLE_BIN}/sign_update" "${DMG_PATH}" - -# ── Step 7: Generate / update appcast.xml ──────────────────────────────────── -# generate_appcast reads all DMGs in the releases/ directory, extracts version -# info from the app bundle inside each DMG, signs with your EdDSA key, and -# produces appcast.xml. The --download-url-prefix tells it where users will -# actually download the DMG from (GitHub Releases). - -echo "📡 Generating appcast.xml..." -"${SPARKLE_BIN}/generate_appcast" \ - --download-url-prefix "https://github.com/${GITHUB_REPO}/releases/download/${TAG}/" \ - -o "${PROJECT_DIR}/appcast.xml" \ - "${RELEASES_DIR}" - -echo "✅ appcast.xml updated" - -# ── Step 8: Create GitHub Release ──────────────────────────────────────────── -# Create the release first so the DMG download URL is live before we push the -# appcast that references it. - -echo "🏷️ Creating GitHub Release ${TAG}..." -gh release create "${TAG}" "${DMG_PATH}" \ - --repo "${GITHUB_REPO}" \ - --title "v${MARKETING_VERSION}" \ - --notes "makesomething v${MARKETING_VERSION}" \ - --latest - -# ── Step 9: Push appcast.xml to the releases repo ─────────────────────────── -# The appcast lives in makesomething-mac-app (the releases repo), not in the -# source code repo. We clone it to a temp dir, copy the new appcast, and push. - -echo "📝 Pushing appcast.xml to ${GITHUB_REPO}..." -RELEASES_REPO_DIR=$(mktemp -d) -git clone --depth 1 "https://github.com/${GITHUB_REPO}.git" "${RELEASES_REPO_DIR}" 2>&1 | tail -2 -cp "${PROJECT_DIR}/appcast.xml" "${RELEASES_REPO_DIR}/appcast.xml" -cd "${RELEASES_REPO_DIR}" -git add appcast.xml -git commit -m "Update appcast.xml for v${MARKETING_VERSION}" || echo " (no changes to commit)" -git push || echo " (push failed — you may need to push manually)" -cd "${PROJECT_DIR}" -rm -rf "${RELEASES_REPO_DIR}" - -echo "" -echo "═══════════════════════════════════════════════════════════════" -echo "✅ Release v${MARKETING_VERSION} (build ${BUILD_NUMBER}) complete!" -echo "" -echo " DMG: ${DMG_PATH}" -echo " Appcast: ${PROJECT_DIR}/appcast.xml" -echo " Release: https://github.com/${GITHUB_REPO}/releases/tag/${TAG}" -echo "" -echo " Download URL (always latest):" -echo " https://github.com/${GITHUB_REPO}/releases/latest/download/${DMG_FILENAME}" -echo "═══════════════════════════════════════════════════════════════" From 9e8d12a0e5172d75bbeecbeaab42a3d8279d4e56 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 12 May 2026 23:01:49 +0000 Subject: [PATCH 3/4] Drive Claude via the local Claude Code CLI instead of the Anthropic API Lets a Claude Max subscription cover the cost of Clicky's chat responses, so users don't have to set up a separate pay-per-token Anthropic API key. - New ClaudeAgentRunner.swift wraps the locally-installed `claude` binary as a subprocess. It speaks stream-json on both sides: writes a single user message (image content blocks + prompt) to stdin, parses the streamed `text_delta` events from stdout, and forwards them to the existing onTextChunk callback. Public surface matches the previous ClaudeAPI so call sites in CompanionManager stay one-line swaps. - Binary discovery checks an optional `ClaudeBinaryPath` override in Info.plist, then common install locations, then `command -v claude` in a login shell. - Runs the subprocess with `--permission-mode plan` so Claude can't invoke tools that modify the filesystem. - ClaudeAPI.swift deleted. Worker's /chat route and the ANTHROPIC_API_KEY secret are gone; the Worker now only proxies AssemblyAI and ElevenLabs. - README and AGENTS.md updated: Anthropic API key dropped from prerequisites, Claude Code added, Fork-specific-changes section records the move. --- AGENTS.md | 53 +++- README.md | 63 +++-- leanring-buddy/ClaudeAPI.swift | 291 --------------------- leanring-buddy/ClaudeAgentRunner.swift | 338 +++++++++++++++++++++++++ leanring-buddy/CompanionManager.swift | 21 +- worker/src/index.ts | 49 +--- 6 files changed, 443 insertions(+), 372 deletions(-) delete mode 100644 leanring-buddy/ClaudeAPI.swift create mode 100644 leanring-buddy/ClaudeAgentRunner.swift diff --git a/AGENTS.md b/AGENTS.md index cb445e2d..3703879d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -5,16 +5,16 @@ ## Overview -macOS menu bar companion app. Lives entirely in the macOS status bar (no dock icon, no main window). Clicking the menu bar icon opens a custom floating panel with companion voice controls. Uses push-to-talk (ctrl+option) to capture voice input, transcribes it via AssemblyAI streaming, and sends the transcript + a screenshot of the user's screen to Claude. Claude responds with text (streamed via SSE) and voice (ElevenLabs TTS). A blue cursor overlay can fly to and point at UI elements Claude references on any connected monitor. +macOS menu bar companion app. Lives entirely in the macOS status bar (no dock icon, no main window). Clicking the menu bar icon opens a custom floating panel with companion voice controls. Uses push-to-talk (ctrl+option) to capture voice input, transcribes it via AssemblyAI streaming, and sends the transcript + a screenshot of the user's screen to Claude. Claude responds with text (streamed) and voice (ElevenLabs TTS). A blue cursor overlay can fly to and point at UI elements Claude references on any connected monitor. -All API keys live on a Cloudflare Worker proxy — nothing sensitive ships in the app. +Claude runs **locally** via the user's installed Claude Code CLI, authenticated against their Claude Max subscription. AssemblyAI and ElevenLabs API keys live on a Cloudflare Worker proxy. No Anthropic API key is required anywhere. ## Architecture - **App Type**: Menu bar-only (`LSUIElement=true`), no dock icon or main window - **Framework**: SwiftUI (macOS native) with AppKit bridging for menu bar panel and cursor overlay - **Pattern**: MVVM with `@StateObject` / `@Published` state management -- **AI Chat**: Claude (Sonnet 4.6 default, Opus 4.6 optional) via Cloudflare Worker proxy with SSE streaming +- **AI Chat**: Claude (Sonnet 4.6 default, Opus 4.6 optional) via the local `claude` CLI subprocess (Claude Max subscription quota) - **Speech-to-Text**: AssemblyAI real-time streaming (`u3-rt-pro` model) via websocket, with Apple Speech as the local fallback - **Text-to-Speech**: ElevenLabs (`eleven_flash_v2_5` model) via Cloudflare Worker proxy - **Screen Capture**: ScreenCaptureKit (macOS 14.2+), multi-monitor support @@ -26,17 +26,34 @@ All API keys live on a Cloudflare Worker proxy — nothing sensitive ships in th ### API Proxy (Cloudflare Worker) -The app never calls external APIs directly. All requests go through a Cloudflare Worker (`worker/src/index.ts`) that holds the real API keys as secrets. +AssemblyAI and ElevenLabs both go through a Cloudflare Worker (`worker/src/index.ts`) that holds their API keys as secrets. Claude does **not** — it runs as a local subprocess (see "Claude via local CLI" below). | Route | Upstream | Purpose | |-------|----------|---------| -| `POST /chat` | `api.anthropic.com/v1/messages` | Claude vision + streaming chat | | `POST /tts` | `api.elevenlabs.io/v1/text-to-speech/{voiceId}` | ElevenLabs TTS audio | | `POST /transcribe-token` | `streaming.assemblyai.com/v3/token` | Fetches a short-lived (480s) AssemblyAI websocket token | -Worker secrets: `ANTHROPIC_API_KEY`, `ASSEMBLYAI_API_KEY`, `ELEVENLABS_API_KEY` +Worker secrets: `ASSEMBLYAI_API_KEY`, `ELEVENLABS_API_KEY` Worker vars: `ELEVENLABS_VOICE_ID` +### Claude via local CLI + +`ClaudeAgentRunner.swift` spawns the locally installed `claude` binary as +a subprocess in stream-json mode for each push-to-talk request. The user +message — including the screenshot(s) as base64 image content blocks — +is written to the subprocess's stdin, and the streaming JSON output is +parsed for `text_delta` chunks the same way the SSE stream was parsed +before. + +Binary discovery checks `ClaudeBinaryPath` in `Info.plist` first, then +common install locations (`~/.claude/local/claude`, `/opt/homebrew/bin/claude`, +`/usr/local/bin/claude`, `~/.local/bin/claude`, `~/.npm-global/bin/claude`, +`/usr/bin/claude`), and finally falls back to `command -v claude` in a +login shell so non-standard installs (nvm/asdf/mise) still work. + +The subprocess runs with `--permission-mode plan` so Claude cannot +invoke tools that modify the user's filesystem. + ### Key Architecture Decisions **Menu Bar Panel Pattern**: The companion panel uses `NSStatusItem` for the menu bar icon and a custom borderless `NSPanel` for the floating control panel. This gives full control over appearance (dark, rounded corners, custom shadow) and avoids the standard macOS menu/popover chrome. The panel is non-activating so it doesn't steal focus. A global event monitor auto-dismisses it on outside clicks. @@ -66,13 +83,13 @@ Worker vars: `ELEVENLABS_VOICE_ID` | `AppleSpeechTranscriptionProvider.swift` | ~147 | Local fallback transcription provider backed by Apple's Speech framework. | | `BuddyAudioConversionSupport.swift` | ~108 | Audio conversion helpers. Converts live mic buffers to PCM16 mono audio. | | `GlobalPushToTalkShortcutMonitor.swift` | ~132 | System-wide push-to-talk monitor. Owns the listen-only `CGEvent` tap and publishes press/release transitions. | -| `ClaudeAPI.swift` | ~291 | Claude vision API client with streaming (SSE) and non-streaming modes. TLS warmup optimization, image MIME detection, conversation history support. | +| `ClaudeAgentRunner.swift` | ~295 | Local `claude` CLI subprocess driver. Same public surface as the previous `ClaudeAPI` (so call sites are unchanged), but spawns the locally installed Claude Code binary in `--input-format stream-json --output-format stream-json --include-partial-messages` mode and parses the streamed `text_delta` events. Authenticates via the user's Claude Max subscription. | | `ElevenLabsTTSClient.swift` | ~81 | ElevenLabs TTS client. Sends text to the Worker proxy, plays back audio via `AVAudioPlayer`. Exposes `isPlaying` for transient cursor scheduling. | | `DesignSystem.swift` | ~880 | Design system tokens — colors, corner radii, shared styles. All UI references `DS.Colors`, `DS.CornerRadius`, etc. | | `ClickyAnalytics.swift` | ~55 | No-op analytics shim. The PostHog integration was removed in this fork; the functions remain so call sites compile unchanged. | | `WindowPositionManager.swift` | ~262 | Window placement logic, Screen Recording permission flow, and accessibility permission helpers. | | `AppBundleConfiguration.swift` | ~28 | Runtime configuration reader for keys stored in the app bundle Info.plist. | -| `worker/src/index.ts` | ~142 | Cloudflare Worker proxy. Three routes: `/chat` (Claude), `/tts` (ElevenLabs), `/transcribe-token` (AssemblyAI temp token). | +| `worker/src/index.ts` | ~110 | Cloudflare Worker proxy. Two routes: `/tts` (ElevenLabs) and `/transcribe-token` (AssemblyAI temp token). | ## Build & Run @@ -95,7 +112,6 @@ cd worker npm install # Add secrets -npx wrangler secret put ANTHROPIC_API_KEY npx wrangler secret put ASSEMBLYAI_API_KEY npx wrangler secret put ELEVENLABS_API_KEY @@ -106,6 +122,14 @@ npx wrangler deploy npx wrangler dev ``` +## Claude Code (local) + +Clicky drives the locally installed `claude` CLI for AI responses. +Install it once from , run `claude` +to authenticate against the user's Claude Max subscription, and then +Clicky finds the binary automatically. Override the binary path via +the `ClaudeBinaryPath` key in `Info.plist` if needed. + ## Code Style & Conventions ### Variable and Method Naming @@ -193,5 +217,12 @@ changing what the app sends to third parties — be deliberate about it. `ElementLocationDetector.swift`, and `OpenAIAudioTranscriptionProvider.swift` could have been configured to call OpenAI / Anthropic directly with in-bundle API keys, bypassing the Cloudflare Worker. All three were - deleted to enforce the worker-proxy invariant: nothing sensitive ships - in the app binary. + deleted to enforce the invariant that no third-party API keys ship in + the app binary. +- **Claude moved off the Anthropic API onto the local CLI.** Instead of + proxying requests through the Worker to `api.anthropic.com`, Clicky + now spawns the user's locally installed `claude` binary + (`ClaudeAgentRunner.swift`) and pipes a stream-json user message into + it. This lets the Claude Max subscription cover the cost — no + pay-per-token API access is needed. The Worker's `/chat` route and the + `ANTHROPIC_API_KEY` secret were removed. diff --git a/README.md b/README.md index 5ea1fbab..bf96556b 100644 --- a/README.md +++ b/README.md @@ -47,11 +47,36 @@ If you want to do it yourself, here's the deal. - Xcode 15+ - Node.js 18+ (for the Cloudflare Worker) - A [Cloudflare](https://cloudflare.com) account (free tier works) -- API keys for: [Anthropic](https://console.anthropic.com), [AssemblyAI](https://www.assemblyai.com), [ElevenLabs](https://elevenlabs.io) +- API keys for: [AssemblyAI](https://www.assemblyai.com) and [ElevenLabs](https://elevenlabs.io) +- A Claude Max subscription, plus [Claude Code](https://claude.com/claude-code) installed and signed in on this Mac -### 1. Set up the Cloudflare Worker +> **No Anthropic API key needed.** Clicky uses the locally installed Claude +> Code CLI as the backend, so all Claude usage counts against your Max +> subscription quota. The Cloudflare Worker only proxies AssemblyAI and +> ElevenLabs. -The Worker is a tiny proxy that holds your API keys. The app talks to the Worker, the Worker talks to the APIs. This way your keys never ship in the app binary. +### 1. Install and sign in to Claude Code + +Install the CLI per the official docs at , then +run it once and sign in with your Max account: + +```bash +claude +``` + +Sign in via the OAuth flow it walks you through. After that, exit. Clicky +will shell out to the same `claude` binary and inherit the authenticated +session. + +If you installed `claude` somewhere unusual (e.g. via `nvm`/`asdf`/`mise`) +and Clicky cannot find it, add a `ClaudeBinaryPath` key to +`leanring-buddy/Info.plist` with the absolute path. + +### 2. Set up the Cloudflare Worker + +The Worker is a tiny proxy that holds your AssemblyAI and ElevenLabs API +keys. The app talks to the Worker, the Worker talks to the APIs. This way +those keys never ship in the app binary. ```bash cd worker @@ -61,7 +86,6 @@ npm install Now add your secrets. Wrangler will prompt you to paste each one: ```bash -npx wrangler secret put ANTHROPIC_API_KEY npx wrangler secret put ASSEMBLYAI_API_KEY npx wrangler secret put ELEVENLABS_API_KEY ``` @@ -81,7 +105,7 @@ npx wrangler deploy It'll give you a URL like `https://your-worker-name.your-subdomain.workers.dev`. Copy that. -### 2. Run the Worker locally (for development) +### 3. Run the Worker locally (for development) If you want to test changes to the Worker without deploying: @@ -93,27 +117,26 @@ npx wrangler dev This starts a local server (usually `http://localhost:8787`) that behaves exactly like the deployed Worker. You'll need to create a `.dev.vars` file in the `worker/` directory with your keys: ``` -ANTHROPIC_API_KEY=sk-ant-... ASSEMBLYAI_API_KEY=... ELEVENLABS_API_KEY=... ELEVENLABS_VOICE_ID=... ``` -Then update the proxy URLs in the Swift code to point to `http://localhost:8787` instead of the deployed Worker URL while developing. Grep for `clicky-proxy` to find them all. +Then update the proxy URLs in the Swift code to point to `http://localhost:8787` instead of the deployed Worker URL while developing. Grep for `workers.dev` to find them all. -### 3. Update the proxy URLs in the app +### 4. Update the proxy URLs in the app The app has the Worker URL hardcoded in a few places. Search for `your-worker-name.your-subdomain.workers.dev` and replace it with your Worker URL: ```bash -grep -r "clicky-proxy" leanring-buddy/ +grep -r "your-worker-name" leanring-buddy/ ``` You'll find it in: -- `CompanionManager.swift` — Claude chat + ElevenLabs TTS +- `CompanionManager.swift` — ElevenLabs TTS - `AssemblyAIStreamingTranscriptionProvider.swift` — AssemblyAI token endpoint -### 4. Open in Xcode and run +### 5. Open in Xcode and run ```bash open leanring-buddy.xcodeproj @@ -137,21 +160,21 @@ The app will appear in your menu bar (not the dock). Click the icon to open the If you want the full technical breakdown, read `CLAUDE.md`. But here's the short version: -**Menu bar app** (no dock icon) with two `NSPanel` windows — one for the control panel dropdown, one for the full-screen transparent cursor overlay. Push-to-talk streams audio over a websocket to AssemblyAI, sends the transcript + screenshot to Claude via streaming SSE, and plays the response through ElevenLabs TTS. Claude can embed `[POINT:x,y:label:screenN]` tags in its responses to make the cursor fly to specific UI elements across multiple monitors. All three APIs are proxied through a Cloudflare Worker. +**Menu bar app** (no dock icon) with two `NSPanel` windows — one for the control panel dropdown, one for the full-screen transparent cursor overlay. Push-to-talk streams audio over a websocket to AssemblyAI, sends the transcript + screenshot to a locally-running `claude` CLI subprocess (authenticated against your Claude Max subscription), and plays the response through ElevenLabs TTS. Claude can embed `[POINT:x,y:label:screenN]` tags in its responses to make the cursor fly to specific UI elements across multiple monitors. AssemblyAI and ElevenLabs are proxied through a Cloudflare Worker; Claude is not. ## Project structure ``` leanring-buddy/ # Swift source (yes, the typo stays) - CompanionManager.swift # Central state machine - CompanionPanelView.swift # Menu bar panel UI - ClaudeAPI.swift # Claude streaming client - ElevenLabsTTSClient.swift # Text-to-speech playback - OverlayWindow.swift # Blue cursor overlay - AssemblyAI*.swift # Real-time transcription - BuddyDictation*.swift # Push-to-talk pipeline + CompanionManager.swift # Central state machine + CompanionPanelView.swift # Menu bar panel UI + ClaudeAgentRunner.swift # Local Claude Code CLI driver + ElevenLabsTTSClient.swift # Text-to-speech playback + OverlayWindow.swift # Blue cursor overlay + AssemblyAI*.swift # Real-time transcription + BuddyDictation*.swift # Push-to-talk pipeline worker/ # Cloudflare Worker proxy - src/index.ts # Three routes: /chat, /tts, /transcribe-token + src/index.ts # Two routes: /tts, /transcribe-token CLAUDE.md # Full architecture doc (agents read this) ``` diff --git a/leanring-buddy/ClaudeAPI.swift b/leanring-buddy/ClaudeAPI.swift deleted file mode 100644 index 0c7070b5..00000000 --- a/leanring-buddy/ClaudeAPI.swift +++ /dev/null @@ -1,291 +0,0 @@ -// -// ClaudeAPI.swift -// Claude API Implementation with streaming support -// - -import Foundation - -/// Claude API helper with streaming for progressive text display. -class ClaudeAPI { - private static let tlsWarmupLock = NSLock() - private static var hasStartedTLSWarmup = false - - private let apiURL: URL - var model: String - private let session: URLSession - - init(proxyURL: String, model: String = "claude-sonnet-4-6") { - self.apiURL = URL(string: proxyURL)! - self.model = model - - // Use .default instead of .ephemeral so TLS session tickets are cached. - // Ephemeral sessions do a full TLS handshake on every request, which causes - // transient -1200 (errSSLPeerHandshakeFail) errors with large image payloads. - // Disable URL/cookie caching to avoid storing responses or credentials on disk. - let config = URLSessionConfiguration.default - config.timeoutIntervalForRequest = 120 - config.timeoutIntervalForResource = 300 - config.waitsForConnectivity = true - config.urlCache = nil - config.httpCookieStorage = nil - self.session = URLSession(configuration: config) - - // Fire a lightweight HEAD request in the background to pre-establish the TLS - // connection. This caches the TLS session ticket so the first real API call - // (which carries a large image payload) doesn't need a cold TLS handshake. - warmUpTLSConnectionIfNeeded() - } - - private func makeAPIRequest() -> URLRequest { - var request = URLRequest(url: apiURL) - request.httpMethod = "POST" - request.timeoutInterval = 120 - request.setValue("application/json", forHTTPHeaderField: "Content-Type") - return request - } - - /// Detects the MIME type of image data by inspecting the first bytes. - /// Screen captures from ScreenCaptureKit are JPEG, but pasted images from the - /// clipboard are PNG. The API rejects requests where the declared media_type - /// doesn't match the actual image format. - private func detectImageMediaType(for imageData: Data) -> String { - // PNG files start with the 8-byte signature: 89 50 4E 47 0D 0A 1A 0A - if imageData.count >= 4 { - let pngSignature: [UInt8] = [0x89, 0x50, 0x4E, 0x47] - let firstFourBytes = [UInt8](imageData.prefix(4)) - if firstFourBytes == pngSignature { - return "image/png" - } - } - // Default to JPEG — screen captures use JPEG compression - return "image/jpeg" - } - - /// Sends a no-op HEAD request to the API host to establish and cache a TLS session. - /// Failures are silently ignored — this is purely an optimization. - private func warmUpTLSConnectionIfNeeded() { - Self.tlsWarmupLock.lock() - let shouldStartTLSWarmup = !Self.hasStartedTLSWarmup - if shouldStartTLSWarmup { - Self.hasStartedTLSWarmup = true - } - Self.tlsWarmupLock.unlock() - - guard shouldStartTLSWarmup else { return } - - guard var warmupURLComponents = URLComponents(url: apiURL, resolvingAgainstBaseURL: false) else { - return - } - - // The TLS session ticket is host-scoped, so warming the root host is enough. - // Hitting the host instead of `/v1/messages` avoids extra endpoint-specific noise. - warmupURLComponents.path = "/" - warmupURLComponents.query = nil - warmupURLComponents.fragment = nil - - guard let warmupURL = warmupURLComponents.url else { - return - } - - var warmupRequest = URLRequest(url: warmupURL) - warmupRequest.httpMethod = "HEAD" - warmupRequest.timeoutInterval = 10 - session.dataTask(with: warmupRequest) { _, _, _ in - // Response doesn't matter — the TLS handshake is the goal - }.resume() - } - - /// Send a vision request to Claude with streaming. - /// Calls `onTextChunk` on the main actor each time new text arrives so the UI updates progressively. - /// Returns the full accumulated text and total duration when the stream completes. - func analyzeImageStreaming( - images: [(data: Data, label: String)], - systemPrompt: String, - conversationHistory: [(userPlaceholder: String, assistantResponse: String)] = [], - userPrompt: String, - onTextChunk: @MainActor @Sendable (String) -> Void - ) async throws -> (text: String, duration: TimeInterval) { - let startTime = Date() - - var request = makeAPIRequest() - - // Build messages array - var messages: [[String: Any]] = [] - - for (userPlaceholder, assistantResponse) in conversationHistory { - messages.append(["role": "user", "content": userPlaceholder]) - messages.append(["role": "assistant", "content": assistantResponse]) - } - - // Build current message with all labeled images + prompt - var contentBlocks: [[String: Any]] = [] - for image in images { - contentBlocks.append([ - "type": "image", - "source": [ - "type": "base64", - "media_type": detectImageMediaType(for: image.data), - "data": image.data.base64EncodedString() - ] - ]) - contentBlocks.append([ - "type": "text", - "text": image.label - ]) - } - contentBlocks.append([ - "type": "text", - "text": userPrompt - ]) - messages.append(["role": "user", "content": contentBlocks]) - - let body: [String: Any] = [ - "model": model, - "max_tokens": 1024, - "stream": true, - "system": systemPrompt, - "messages": messages - ] - - let bodyData = try JSONSerialization.data(withJSONObject: body) - request.httpBody = bodyData - let payloadMB = Double(bodyData.count) / 1_048_576.0 - print("🌐 Claude streaming request: \(String(format: "%.1f", payloadMB))MB, \(images.count) image(s)") - - // Use bytes streaming for SSE (Server-Sent Events) - let (byteStream, response) = try await session.bytes(for: request) - - guard let httpResponse = response as? HTTPURLResponse else { - throw NSError( - domain: "ClaudeAPI", - code: -1, - userInfo: [NSLocalizedDescriptionKey: "Invalid HTTP response"] - ) - } - - // If non-2xx status, read the full body as error text - guard (200...299).contains(httpResponse.statusCode) else { - var errorBodyChunks: [String] = [] - for try await line in byteStream.lines { - errorBodyChunks.append(line) - } - let errorBody = errorBodyChunks.joined(separator: "\n") - throw NSError( - domain: "ClaudeAPI", - code: httpResponse.statusCode, - userInfo: [NSLocalizedDescriptionKey: "API Error (\(httpResponse.statusCode)): \(errorBody)"] - ) - } - - // Parse SSE stream — each event is "data: {json}\n\n" - var accumulatedResponseText = "" - - for try await line in byteStream.lines { - // SSE lines look like: "data: {...}" - guard line.hasPrefix("data: ") else { continue } - let jsonString = String(line.dropFirst(6)) // Drop "data: " prefix - - // End of stream marker - guard jsonString != "[DONE]" else { break } - - guard let jsonData = jsonString.data(using: .utf8), - let eventPayload = try? JSONSerialization.jsonObject(with: jsonData) as? [String: Any], - let eventType = eventPayload["type"] as? String else { - continue - } - - // We care about content_block_delta events that contain text chunks - if eventType == "content_block_delta", - let delta = eventPayload["delta"] as? [String: Any], - let deltaType = delta["type"] as? String, - deltaType == "text_delta", - let textChunk = delta["text"] as? String { - accumulatedResponseText += textChunk - // Send the accumulated text so far to the UI for progressive rendering - let currentAccumulatedText = accumulatedResponseText - await onTextChunk(currentAccumulatedText) - } - } - - let duration = Date().timeIntervalSince(startTime) - return (text: accumulatedResponseText, duration: duration) - } - - /// Non-streaming fallback for validation requests where we don't need progressive display. - func analyzeImage( - images: [(data: Data, label: String)], - systemPrompt: String, - conversationHistory: [(userPlaceholder: String, assistantResponse: String)] = [], - userPrompt: String - ) async throws -> (text: String, duration: TimeInterval) { - let startTime = Date() - - var request = makeAPIRequest() - - var messages: [[String: Any]] = [] - for (userPlaceholder, assistantResponse) in conversationHistory { - messages.append(["role": "user", "content": userPlaceholder]) - messages.append(["role": "assistant", "content": assistantResponse]) - } - - // Build current message with all labeled images + prompt - var contentBlocks: [[String: Any]] = [] - for image in images { - contentBlocks.append([ - "type": "image", - "source": [ - "type": "base64", - "media_type": detectImageMediaType(for: image.data), - "data": image.data.base64EncodedString() - ] - ]) - contentBlocks.append([ - "type": "text", - "text": image.label - ]) - } - contentBlocks.append([ - "type": "text", - "text": userPrompt - ]) - messages.append(["role": "user", "content": contentBlocks]) - - let body: [String: Any] = [ - "model": model, - "max_tokens": 256, - "system": systemPrompt, - "messages": messages - ] - - let bodyData = try JSONSerialization.data(withJSONObject: body) - request.httpBody = bodyData - let payloadMB = Double(bodyData.count) / 1_048_576.0 - print("🌐 Claude request: \(String(format: "%.1f", payloadMB))MB, \(images.count) image(s)") - - let (data, response) = try await session.data(for: request) - - guard let httpResponse = response as? HTTPURLResponse, - (200...299).contains(httpResponse.statusCode) else { - let responseString = String(data: data, encoding: .utf8) ?? "Unknown error" - throw NSError( - domain: "ClaudeAPI", - code: (response as? HTTPURLResponse)?.statusCode ?? -1, - userInfo: [NSLocalizedDescriptionKey: "API Error: \(responseString)"] - ) - } - - let json = try JSONSerialization.jsonObject(with: data) as? [String: Any] - guard let content = json?["content"] as? [[String: Any]], - let textBlock = content.first(where: { ($0["type"] as? String) == "text" }), - let text = textBlock["text"] as? String else { - throw NSError( - domain: "ClaudeAPI", - code: -1, - userInfo: [NSLocalizedDescriptionKey: "Invalid response format"] - ) - } - - let duration = Date().timeIntervalSince(startTime) - return (text: text, duration: duration) - } -} diff --git a/leanring-buddy/ClaudeAgentRunner.swift b/leanring-buddy/ClaudeAgentRunner.swift new file mode 100644 index 00000000..c6f665d5 --- /dev/null +++ b/leanring-buddy/ClaudeAgentRunner.swift @@ -0,0 +1,338 @@ +// +// ClaudeAgentRunner.swift +// leanring-buddy +// +// Runs Claude via the locally-installed Claude Code CLI as a subprocess. +// This lets the user's Claude Max subscription provide the quota instead +// of pay-per-token API access through the Cloudflare Worker. +// +// The public surface mirrors the previous `ClaudeAPI` class so the rest +// of the app does not have to know how the response is being produced. +// +// Prerequisite on the user's Mac: +// 1. Install Claude Code (https://claude.com/claude-code). +// 2. Run `claude` once and sign in with the Claude Max account. +// After that, Clicky shells out to the same binary and inherits the +// authenticated session — no API key is shipped in the app or worker. +// + +import Foundation + +enum ClaudeAgentRunnerError: LocalizedError { + case claudeBinaryNotFound(searchedPaths: [String]) + case processFailedToStart(underlying: Error) + case processFailed(exitCode: Int32, stderrSnippet: String) + case unableToWriteInput(underlying: Error) + + var errorDescription: String? { + switch self { + case .claudeBinaryNotFound(let searchedPaths): + return "The Claude CLI was not found. Install Claude Code from https://claude.com/claude-code and run `claude` once to authenticate with your Max account. Searched: \(searchedPaths.joined(separator: ", "))." + case .processFailedToStart(let underlying): + return "Could not start the Claude subprocess: \(underlying.localizedDescription)" + case .processFailed(let exitCode, let stderrSnippet): + return "Claude exited with code \(exitCode): \(stderrSnippet)" + case .unableToWriteInput(let underlying): + return "Could not send input to the Claude subprocess: \(underlying.localizedDescription)" + } + } +} + +/// Wraps the local `claude` CLI as if it were a network API client. +/// +/// Each call spawns a fresh `claude -p` subprocess in stream-json mode, +/// writes a single user message (with image content blocks) to stdin, +/// and parses the streaming JSON output for text deltas. The interface +/// matches the previous Cloudflare-Worker-backed `ClaudeAPI` so the +/// CompanionManager does not need to know which backend is in use. +final class ClaudeAgentRunner { + var model: String + + init(model: String = "claude-sonnet-4-6") { + self.model = model + } + + /// Stream a vision response from Claude using the locally installed CLI. + /// Calls `onTextChunk` on the main actor with the accumulated text every + /// time a new delta arrives, mirroring the previous SSE behavior. + func analyzeImageStreaming( + images: [(data: Data, label: String)], + systemPrompt: String, + conversationHistory: [(userPlaceholder: String, assistantResponse: String)] = [], + userPrompt: String, + onTextChunk: @MainActor @Sendable (String) -> Void + ) async throws -> (text: String, duration: TimeInterval) { + let startTime = Date() + + let claudeBinaryURL = try resolveClaudeBinaryPath() + + let userMessageJSONData = try buildUserMessageJSONData( + images: images, + conversationHistory: conversationHistory, + userPrompt: userPrompt + ) + + let subprocess = Process() + subprocess.executableURL = claudeBinaryURL + subprocess.arguments = [ + "-p", + "--model", model, + "--input-format", "stream-json", + "--output-format", "stream-json", + "--include-partial-messages", + "--verbose", + "--system-prompt", systemPrompt, + "--permission-mode", "plan" + ] + + let stdinPipe = Pipe() + let stdoutPipe = Pipe() + let stderrPipe = Pipe() + subprocess.standardInput = stdinPipe + subprocess.standardOutput = stdoutPipe + subprocess.standardError = stderrPipe + + let payloadMegabytes = Double(userMessageJSONData.count) / 1_048_576.0 + print("🌐 Claude (local CLI) request: \(String(format: "%.1f", payloadMegabytes))MB, \(images.count) image(s), model \(model)") + + do { + try subprocess.run() + } catch { + throw ClaudeAgentRunnerError.processFailedToStart(underlying: error) + } + + // Send the user message on stdin and close so Claude knows the + // input is complete. Without the close, `claude -p` keeps waiting. + do { + try stdinPipe.fileHandleForWriting.write(contentsOf: userMessageJSONData) + try stdinPipe.fileHandleForWriting.close() + } catch { + if subprocess.isRunning { subprocess.terminate() } + throw ClaudeAgentRunnerError.unableToWriteInput(underlying: error) + } + + let accumulatedResponseText = try await withTaskCancellationHandler { + try await readStreamJSONOutput( + from: stdoutPipe.fileHandleForReading, + onTextChunk: onTextChunk + ) + } onCancel: { + if subprocess.isRunning { subprocess.terminate() } + } + + subprocess.waitUntilExit() + + guard subprocess.terminationStatus == 0 else { + let stderrData = (try? stderrPipe.fileHandleForReading.readToEnd()) ?? Data() + let stderrText = String(data: stderrData ?? Data(), encoding: .utf8) ?? "" + let stderrSnippet = String(stderrText.prefix(500)) + print("⚠️ Claude subprocess failed (exit \(subprocess.terminationStatus)): \(stderrSnippet)") + throw ClaudeAgentRunnerError.processFailed( + exitCode: subprocess.terminationStatus, + stderrSnippet: stderrSnippet + ) + } + + let duration = Date().timeIntervalSince(startTime) + return (text: accumulatedResponseText, duration: duration) + } + + // MARK: - Binary discovery + + /// Common install locations for the `claude` CLI on macOS, checked in + /// order. The first executable file wins. Users with a non-standard + /// install can override this by setting `ClaudeBinaryPath` in Info.plist. + private static let commonClaudeBinaryPaths: [String] = [ + "~/.claude/local/claude", + "/opt/homebrew/bin/claude", + "/usr/local/bin/claude", + "~/.local/bin/claude", + "~/.npm-global/bin/claude", + "/usr/bin/claude" + ] + + private func resolveClaudeBinaryPath() throws -> URL { + if let configuredPathString = AppBundleConfiguration.stringValue(forKey: "ClaudeBinaryPath") { + let expandedConfiguredPath = (configuredPathString as NSString).expandingTildeInPath + if FileManager.default.isExecutableFile(atPath: expandedConfiguredPath) { + return URL(fileURLWithPath: expandedConfiguredPath) + } + } + + for candidatePathString in Self.commonClaudeBinaryPaths { + let expandedCandidatePath = (candidatePathString as NSString).expandingTildeInPath + if FileManager.default.isExecutableFile(atPath: expandedCandidatePath) { + return URL(fileURLWithPath: expandedCandidatePath) + } + } + + // Last resort: ask the user's login shell where `claude` lives. + // Catches installs in non-standard locations (nvm, asdf, mise, etc.). + if let shellResolvedPath = lookupClaudeViaLoginShell() { + return URL(fileURLWithPath: shellResolvedPath) + } + + throw ClaudeAgentRunnerError.claudeBinaryNotFound( + searchedPaths: Self.commonClaudeBinaryPaths + ) + } + + private func lookupClaudeViaLoginShell() -> String? { + let shellPath = ProcessInfo.processInfo.environment["SHELL"] ?? "/bin/zsh" + let shellProcess = Process() + shellProcess.executableURL = URL(fileURLWithPath: shellPath) + shellProcess.arguments = ["-l", "-c", "command -v claude"] + + let shellStdoutPipe = Pipe() + shellProcess.standardOutput = shellStdoutPipe + shellProcess.standardError = Pipe() + + do { + try shellProcess.run() + shellProcess.waitUntilExit() + } catch { + return nil + } + + guard shellProcess.terminationStatus == 0, + let shellStdoutData = (try? shellStdoutPipe.fileHandleForReading.readToEnd()) ?? nil, + let shellStdoutText = String(data: shellStdoutData, encoding: .utf8)? + .trimmingCharacters(in: .whitespacesAndNewlines), + !shellStdoutText.isEmpty, + FileManager.default.isExecutableFile(atPath: shellStdoutText) else { + return nil + } + + return shellStdoutText + } + + // MARK: - Input construction + + /// Builds a single newline-terminated JSON line for Claude Code's + /// `--input-format stream-json` mode. The shape mirrors what was sent + /// to the Anthropic Messages API previously: a user turn with image + /// content blocks followed by a text block carrying the prompt. + private func buildUserMessageJSONData( + images: [(data: Data, label: String)], + conversationHistory: [(userPlaceholder: String, assistantResponse: String)], + userPrompt: String + ) throws -> Data { + var userContentBlocks: [[String: Any]] = [] + + for image in images { + userContentBlocks.append([ + "type": "image", + "source": [ + "type": "base64", + "media_type": detectImageMediaType(for: image.data), + "data": image.data.base64EncodedString() + ] + ]) + userContentBlocks.append([ + "type": "text", + "text": image.label + ]) + } + + let historyAndPromptText = composeHistoryAndPromptText( + conversationHistory: conversationHistory, + userPrompt: userPrompt + ) + userContentBlocks.append([ + "type": "text", + "text": historyAndPromptText + ]) + + let userMessageEnvelope: [String: Any] = [ + "type": "user", + "message": [ + "role": "user", + "content": userContentBlocks + ] + ] + + var serializedJSONData = try JSONSerialization.data( + withJSONObject: userMessageEnvelope, + options: [] + ) + // stream-json input is newline-delimited; the trailing newline signals + // that the message is complete before we close stdin. + serializedJSONData.append(0x0A) // '\n' + return serializedJSONData + } + + /// Inlines the prior conversation turns into the new user message as + /// plain text. Claude Code's stream-json input does accept replayed + /// assistant messages, but mixing them with image attachments is + /// brittle in some CLI versions — embedding as text is the safe path. + private func composeHistoryAndPromptText( + conversationHistory: [(userPlaceholder: String, assistantResponse: String)], + userPrompt: String + ) -> String { + if conversationHistory.isEmpty { + return userPrompt + } + + var composedLines: [String] = [] + composedLines.append("Previous conversation (for your context):") + for (previousUserText, previousAssistantText) in conversationHistory { + composedLines.append("User: \(previousUserText)") + composedLines.append("Assistant: \(previousAssistantText)") + } + composedLines.append("") + composedLines.append("User question: \(userPrompt)") + return composedLines.joined(separator: "\n") + } + + private func detectImageMediaType(for imageData: Data) -> String { + if imageData.count >= 4 { + let pngSignatureBytes: [UInt8] = [0x89, 0x50, 0x4E, 0x47] + let firstFourImageBytes = [UInt8](imageData.prefix(4)) + if firstFourImageBytes == pngSignatureBytes { + return "image/png" + } + } + return "image/jpeg" + } + + // MARK: - Output parsing + + /// Reads Claude Code's newline-delimited JSON output and pulls the + /// streaming `text_delta` chunks out of the `stream_event` envelopes. + /// Each delta is appended to the running text and forwarded to the + /// caller's `onTextChunk` so the UI can render progressively. + private func readStreamJSONOutput( + from outputFileHandle: FileHandle, + onTextChunk: @MainActor @Sendable (String) -> Void + ) async throws -> String { + var accumulatedResponseText = "" + + for try await outputLine in outputFileHandle.bytes.lines { + guard !outputLine.isEmpty, + let outputLineData = outputLine.data(using: .utf8), + let parsedEvent = try? JSONSerialization.jsonObject(with: outputLineData) as? [String: Any] else { + continue + } + + guard let eventType = parsedEvent["type"] as? String else { continue } + + // With --include-partial-messages, Claude Code emits the raw + // Anthropic SSE events wrapped in a "stream_event" envelope. + // We care about content_block_delta → text_delta. + if eventType == "stream_event", + let innerEvent = parsedEvent["event"] as? [String: Any], + let innerEventType = innerEvent["type"] as? String, + innerEventType == "content_block_delta", + let contentDelta = innerEvent["delta"] as? [String: Any], + let contentDeltaType = contentDelta["type"] as? String, + contentDeltaType == "text_delta", + let textChunk = contentDelta["text"] as? String { + accumulatedResponseText += textChunk + let currentAccumulatedText = accumulatedResponseText + await onTextChunk(currentAccumulatedText) + } + } + + return accumulatedResponseText + } +} diff --git a/leanring-buddy/CompanionManager.swift b/leanring-buddy/CompanionManager.swift index 37d18b84..8be4ff17 100644 --- a/leanring-buddy/CompanionManager.swift +++ b/leanring-buddy/CompanionManager.swift @@ -67,12 +67,13 @@ final class CompanionManager: ObservableObject { // Response text is now displayed inline on the cursor overlay via // streamingResponseText, so no separate response overlay manager is needed. - /// Base URL for the Cloudflare Worker proxy. All API requests route - /// through this so keys never ship in the app binary. + /// Base URL for the Cloudflare Worker proxy. Only AssemblyAI and + /// ElevenLabs go through it now — Claude runs locally via the Claude + /// Code CLI against the user's Max subscription, see ClaudeAgentRunner. private static let workerBaseURL = "https://your-worker-name.your-subdomain.workers.dev" - private lazy var claudeAPI: ClaudeAPI = { - return ClaudeAPI(proxyURL: "\(Self.workerBaseURL)/chat", model: selectedModel) + private lazy var claudeAgentRunner: ClaudeAgentRunner = { + return ClaudeAgentRunner(model: selectedModel) }() private lazy var elevenLabsTTSClient: ElevenLabsTTSClient = { @@ -112,7 +113,7 @@ final class CompanionManager: ObservableObject { func setSelectedModel(_ model: String) { selectedModel = model UserDefaults.standard.set(model, forKey: "selectedClaudeModel") - claudeAPI.model = model + claudeAgentRunner.model = model } /// User preference for whether the Clicky cursor should be shown. @@ -152,9 +153,9 @@ final class CompanionManager: ObservableObject { bindVoiceStateObservation() bindAudioPowerLevel() bindShortcutTransitions() - // Eagerly touch the Claude API so its TLS warmup handshake completes - // well before the onboarding demo fires at ~40s into the video. - _ = claudeAPI + // Eagerly instantiate the Claude runner so its binary-discovery + // pass completes before the first push-to-talk request. + _ = claudeAgentRunner // If the user already completed onboarding AND all permissions are // still granted, show the cursor overlay immediately. If permissions @@ -583,7 +584,7 @@ final class CompanionManager: ObservableObject { (userPlaceholder: entry.userTranscript, assistantResponse: entry.assistantResponse) } - let (fullResponseText, _) = try await claudeAPI.analyzeImageStreaming( + let (fullResponseText, _) = try await claudeAgentRunner.analyzeImageStreaming( images: labeledImages, systemPrompt: Self.companionVoiceResponseSystemPrompt, conversationHistory: historyForAPI, @@ -955,7 +956,7 @@ final class CompanionManager: ObservableObject { let dimensionInfo = " (image dimensions: \(cursorScreenCapture.screenshotWidthInPixels)x\(cursorScreenCapture.screenshotHeightInPixels) pixels)" let labeledImages = [(data: cursorScreenCapture.imageData, label: cursorScreenCapture.label + dimensionInfo)] - let (fullResponseText, _) = try await claudeAPI.analyzeImageStreaming( + let (fullResponseText, _) = try await claudeAgentRunner.analyzeImageStreaming( images: labeledImages, systemPrompt: Self.onboardingDemoSystemPrompt, userPrompt: "look around my screen and find something interesting to point at", diff --git a/worker/src/index.ts b/worker/src/index.ts index 2e3e9345..ab47be3f 100644 --- a/worker/src/index.ts +++ b/worker/src/index.ts @@ -1,16 +1,20 @@ /** * Clicky Proxy Worker * - * Proxies requests to Claude and ElevenLabs APIs so the app never - * ships with raw API keys. Keys are stored as Cloudflare secrets. + * Proxies requests to AssemblyAI and ElevenLabs so the app never ships + * those API keys. Keys are stored as Cloudflare secrets. + * + * Claude is NOT proxied through this worker — Clicky shells out to the + * locally installed Claude Code CLI on the user's Mac, which authenticates + * against the user's Claude Max subscription. No Anthropic API key is + * needed anywhere in the stack. * * Routes: - * POST /chat → Anthropic Messages API (streaming) - * POST /tts → ElevenLabs TTS API + * POST /tts → ElevenLabs TTS API + * POST /transcribe-token → short-lived AssemblyAI websocket token */ interface Env { - ANTHROPIC_API_KEY: string; ELEVENLABS_API_KEY: string; ELEVENLABS_VOICE_ID: string; ASSEMBLYAI_API_KEY: string; @@ -25,10 +29,6 @@ export default { } try { - if (url.pathname === "/chat") { - return await handleChat(request, env); - } - if (url.pathname === "/tts") { return await handleTTS(request, env); } @@ -48,37 +48,6 @@ export default { }, }; -async function handleChat(request: Request, env: Env): Promise { - const body = await request.text(); - - const response = await fetch("https://api.anthropic.com/v1/messages", { - method: "POST", - headers: { - "x-api-key": env.ANTHROPIC_API_KEY, - "anthropic-version": "2023-06-01", - "content-type": "application/json", - }, - body, - }); - - if (!response.ok) { - const errorBody = await response.text(); - console.error(`[/chat] Anthropic API error ${response.status}: ${errorBody}`); - return new Response(errorBody, { - status: response.status, - headers: { "content-type": "application/json" }, - }); - } - - return new Response(response.body, { - status: response.status, - headers: { - "content-type": response.headers.get("content-type") || "text/event-stream", - "cache-control": "no-cache", - }, - }); -} - async function handleTranscribeToken(env: Env): Promise { const response = await fetch( "https://streaming.assemblyai.com/v3/token?expires_in_seconds=480", From ef8fc5f7724c0f33311c31f8eb9957b0e549e29f Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 14 May 2026 00:31:26 +0000 Subject: [PATCH 4/4] Drop unused com.apple.security.device.camera entitlement Clicky only ever calls AVCaptureDevice with media type .audio (the microphone). The camera entitlement was declared but never exercised, so macOS was offering a "this app may access your camera" permission slot for nothing. Removing it shrinks the surface that macOS asks the user to trust. --- leanring-buddy/leanring-buddy.entitlements | 2 -- 1 file changed, 2 deletions(-) diff --git a/leanring-buddy/leanring-buddy.entitlements b/leanring-buddy/leanring-buddy.entitlements index 48d74a4b..92e59b13 100644 --- a/leanring-buddy/leanring-buddy.entitlements +++ b/leanring-buddy/leanring-buddy.entitlements @@ -6,8 +6,6 @@ com.apple.security.network.client - com.apple.security.device.camera - com.apple.security.device.audio-input com.apple.security.temporary-exception.mach-lookup.global-name