From eb5447fc662fe354835506035dd9725e9f1fc759 Mon Sep 17 00:00:00 2001
From: Connor Tessaro <connorex2@gmail.com>
Date: Sun, 22 Mar 2026 03:38:11 -0400
Subject: [PATCH] chore: throttle arc self-drive and drop broken review
 workflow

---
 .github/workflows/code-review-gpt.yml | 34 ------------------
 src/code-cockpit/runtime.test.ts      | 51 +++++++++++++++++++++++++--
 src/code-cockpit/runtime.ts           | 13 +++++--
 3 files changed, 59 insertions(+), 39 deletions(-)
 delete mode 100644 .github/workflows/code-review-gpt.yml

diff --git a/.github/workflows/code-review-gpt.yml b/.github/workflows/code-review-gpt.yml
deleted file mode 100644
index d4aae95eb8..0000000000
--- a/.github/workflows/code-review-gpt.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-name: Code Review (GPT)
-
-on:
-  pull_request:
-    types: [opened, synchronize, reopened]
-
-permissions:
-  contents: read
-  pull-requests: write
-
-jobs:
-  code-review:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v6
-
-      - name: GPT Code Review
-        uses: anc95/ChatGPT-CodeReview@74683225bf06d9a3c56f7733a5a71e2929ba2f2c
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          LANGUAGE: English
-          OPENAI_API_ENDPOINT: https://api.openai.com/v1
-          MODEL: gpt-4o
-          PROMPT: |
-            You are an expert software engineer reviewing a pull request.
-            Focus on correctness, security, performance, and code quality.
-            Be concise and constructive. Highlight any bugs, potential issues,
-            or improvements. Skip trivial style comments already handled by linting.
-          top_p: 1
-          temperature: 0.2
-          max_tokens: 2000
-          MAX_PATCH_LENGTH: 10000
diff --git a/src/code-cockpit/runtime.test.ts b/src/code-cockpit/runtime.test.ts
index 06b157b480..4c9b73cd4d 100644
--- a/src/code-cockpit/runtime.test.ts
+++ b/src/code-cockpit/runtime.test.ts
@@ -1273,7 +1273,7 @@ describe("code cockpit runtime", () => {
     });
     expect(result.worker).toMatchObject({
       engineId: "claude",
-      engineModel: "claude-opus-4-6",
+      engineModel: "claude-sonnet-4-6",
     });
     expect(pendingRuns).toHaveLength(1);
     expect(pendingRuns[0]?.input.argv).toEqual(expect.arrayContaining(["claude", "-p"]));
@@ -1365,7 +1365,7 @@ describe("code cockpit runtime", () => {
     expect(result.action).toBe("started");
     expect(result.worker).toMatchObject({
       engineId: "claude",
-      engineModel: "claude-opus-4-6",
+      engineModel: "claude-sonnet-4-6",
       backendId: "claude-cli",
       authHealth: "healthy",
     });
@@ -1470,6 +1470,53 @@ describe("code cockpit runtime", () => {
     expect(pendingRuns).toHaveLength(0);
   });
 
+  it("uses ARC_SELF_DRIVE_CLAUDE_MODEL for new strict-Claude self-drive workers", async () => {
+    const { supervisor, pendingRuns } = createSupervisorStub();
+
+    const task = await store.createCodeTask({
+      title: "Review workstation queue cleanup",
+      repoRoot: tempRepoRoot,
+    });
+
+    vi.stubEnv("ARC_SELF_DRIVE_STRICT_ENGINE", "claude");
+    vi.stubEnv("ARC_SELF_DRIVE_CLAUDE_MODEL", "claude-sonnet-4-6");
+
+    const runtime = createCodeCockpitRuntime({
+      getProcessSupervisor: () => supervisor,
+      loadConfig: () => ({}),
+      resolveCliBackendConfig: (provider) => {
+        if (provider === "claude-cli") {
+          return { id: "claude-cli", config: claudeBackend };
+        }
+        if (provider === "codex-cli") {
+          return { id: "codex-cli", config: backend };
+        }
+        return null;
+      },
+      prepareCliBundleMcpConfig: async ({ backendId, backend: input }) => ({
+        backendId,
+        backend: input,
+      }),
+      runCommandWithTimeout: createRunCommandWithEngineHealthStub({
+        codexHealthy: true,
+        claudeHealthy: true,
+      }),
+    });
+
+    const result = await runtime.supervisorTick({ repoRoot: tempRepoRoot });
+
+    expect(result.action).toBe("started");
+    expect(task.id).toBe(result.task?.id);
+    expect(result.worker).toMatchObject({
+      engineId: "claude",
+      engineModel: "claude-sonnet-4-6",
+    });
+    expect(pendingRuns).toHaveLength(1);
+    expect(pendingRuns[0]?.input.argv).toEqual(
+      expect.arrayContaining(["--model", "claude-sonnet-4-6"]),
+    );
+  });
+
   it("skips retrying tasks until their backoff expires and then starts them again", async () => {
     const { supervisor, pendingRuns } = createSupervisorStub();
     let currentTime = new Date("2026-03-20T00:05:00.000Z");
diff --git a/src/code-cockpit/runtime.ts b/src/code-cockpit/runtime.ts
index d1288aed80..4a24a305b9 100644
--- a/src/code-cockpit/runtime.ts
+++ b/src/code-cockpit/runtime.ts
@@ -48,7 +48,7 @@ import {
 import { isTaskInRetryBackoff, resolveTaskFailure } from "./task-reliability.js";
 
 const DEFAULT_CODEX_WORKER_MODEL = "gpt-5.4";
-const DEFAULT_CLAUDE_WORKER_MODEL = "claude-opus-4-6";
+const DEFAULT_CLAUDE_WORKER_MODEL = "claude-sonnet-4-6";
 const DEFAULT_WORKER_TIMEOUT_MS = 30 * 60_000;
 const MAX_LOG_TAIL_CHARS = 8_000;
 const ENGINE_FAILURE_COOLDOWN_MS = 6 * 60 * 60_000;
@@ -190,6 +190,13 @@ function normalizeString(value: string | undefined): string | undefined {
   return trimmed ? trimmed : undefined;
 }
 
+function resolveDefaultModelForEngine(engine: WorkerEngineAdapter): string {
+  if (engine.engineId === "claude") {
+    return normalizeString(process.env.ARC_SELF_DRIVE_CLAUDE_MODEL) ?? DEFAULT_CLAUDE_WORKER_MODEL;
+  }
+  return engine.defaultModel;
+}
+
 function slugifyWorkerName(name: string): string {
   const normalized = name
     .trim()
@@ -1300,7 +1307,7 @@ class CodeCockpitRuntime {
     if (!prompt) {
       throw new Error(`Worker "${worker.id}" has no objective or task goal to execute`);
     }
-    const modelId = normalizeString(worker.engineModel) ?? engine.defaultModel;
+    const modelId = normalizeString(worker.engineModel) ?? resolveDefaultModelForEngine(engine);
     const useResume = Boolean(worker.threadId && preparedBackend.backend.resumeArgs?.length);
     const baseArgs = useResume
       ? (preparedBackend.backend.resumeArgs ?? []).map((entry) =>
@@ -1729,7 +1736,7 @@ class CodeCockpitRuntime {
       repoRoot: candidate.task.repoRoot,
       objective: buildSelfDriveObjective(candidate.task),
       engineId: engineSelection.selected.engine.engineId,
-      engineModel: engineSelection.selected.engine.defaultModel,
+      engineModel: resolveDefaultModelForEngine(engineSelection.selected.engine),
       commandPath: engineSelection.selected.commandPath,
       authHealth: engineSelection.selected.authHealth,
     });