From eb5447fc662fe354835506035dd9725e9f1fc759 Mon Sep 17 00:00:00 2001 From: Connor Tessaro Date: Sun, 22 Mar 2026 03:38:11 -0400 Subject: [PATCH] chore: throttle arc self-drive and drop broken review workflow --- .github/workflows/code-review-gpt.yml | 34 ------------------ src/code-cockpit/runtime.test.ts | 51 +++++++++++++++++++++++++-- src/code-cockpit/runtime.ts | 13 +++++-- 3 files changed, 59 insertions(+), 39 deletions(-) delete mode 100644 .github/workflows/code-review-gpt.yml diff --git a/.github/workflows/code-review-gpt.yml b/.github/workflows/code-review-gpt.yml deleted file mode 100644 index d4aae95eb8..0000000000 --- a/.github/workflows/code-review-gpt.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: Code Review (GPT) - -on: - pull_request: - types: [opened, synchronize, reopened] - -permissions: - contents: read - pull-requests: write - -jobs: - code-review: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v6 - - - name: GPT Code Review - uses: anc95/ChatGPT-CodeReview@74683225bf06d9a3c56f7733a5a71e2929ba2f2c - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - LANGUAGE: English - OPENAI_API_ENDPOINT: https://api.openai.com/v1 - MODEL: gpt-4o - PROMPT: | - You are an expert software engineer reviewing a pull request. - Focus on correctness, security, performance, and code quality. - Be concise and constructive. Highlight any bugs, potential issues, - or improvements. Skip trivial style comments already handled by linting. - top_p: 1 - temperature: 0.2 - max_tokens: 2000 - MAX_PATCH_LENGTH: 10000 diff --git a/src/code-cockpit/runtime.test.ts b/src/code-cockpit/runtime.test.ts index 06b157b480..4c9b73cd4d 100644 --- a/src/code-cockpit/runtime.test.ts +++ b/src/code-cockpit/runtime.test.ts @@ -1273,7 +1273,7 @@ describe("code cockpit runtime", () => { }); expect(result.worker).toMatchObject({ engineId: "claude", - engineModel: "claude-opus-4-6", + engineModel: "claude-sonnet-4-6", }); expect(pendingRuns).toHaveLength(1); expect(pendingRuns[0]?.input.argv).toEqual(expect.arrayContaining(["claude", "-p"])); @@ -1365,7 +1365,7 @@ describe("code cockpit runtime", () => { expect(result.action).toBe("started"); expect(result.worker).toMatchObject({ engineId: "claude", - engineModel: "claude-opus-4-6", + engineModel: "claude-sonnet-4-6", backendId: "claude-cli", authHealth: "healthy", }); @@ -1470,6 +1470,53 @@ describe("code cockpit runtime", () => { expect(pendingRuns).toHaveLength(0); }); + it("uses ARC_SELF_DRIVE_CLAUDE_MODEL for new strict-Claude self-drive workers", async () => { + const { supervisor, pendingRuns } = createSupervisorStub(); + + const task = await store.createCodeTask({ + title: "Review workstation queue cleanup", + repoRoot: tempRepoRoot, + }); + + vi.stubEnv("ARC_SELF_DRIVE_STRICT_ENGINE", "claude"); + vi.stubEnv("ARC_SELF_DRIVE_CLAUDE_MODEL", "claude-sonnet-4-6"); + + const runtime = createCodeCockpitRuntime({ + getProcessSupervisor: () => supervisor, + loadConfig: () => ({}), + resolveCliBackendConfig: (provider) => { + if (provider === "claude-cli") { + return { id: "claude-cli", config: claudeBackend }; + } + if (provider === "codex-cli") { + return { id: "codex-cli", config: backend }; + } + return null; + }, + prepareCliBundleMcpConfig: async ({ backendId, backend: input }) => ({ + backendId, + backend: input, + }), + runCommandWithTimeout: createRunCommandWithEngineHealthStub({ + codexHealthy: true, + claudeHealthy: true, + }), + }); + + const result = await runtime.supervisorTick({ repoRoot: tempRepoRoot }); + + expect(result.action).toBe("started"); + expect(task.id).toBe(result.task?.id); + expect(result.worker).toMatchObject({ + engineId: "claude", + engineModel: "claude-sonnet-4-6", + }); + expect(pendingRuns).toHaveLength(1); + expect(pendingRuns[0]?.input.argv).toEqual( + expect.arrayContaining(["--model", "claude-sonnet-4-6"]), + ); + }); + it("skips retrying tasks until their backoff expires and then starts them again", async () => { const { supervisor, pendingRuns } = createSupervisorStub(); let currentTime = new Date("2026-03-20T00:05:00.000Z"); diff --git a/src/code-cockpit/runtime.ts b/src/code-cockpit/runtime.ts index d1288aed80..4a24a305b9 100644 --- a/src/code-cockpit/runtime.ts +++ b/src/code-cockpit/runtime.ts @@ -48,7 +48,7 @@ import { import { isTaskInRetryBackoff, resolveTaskFailure } from "./task-reliability.js"; const DEFAULT_CODEX_WORKER_MODEL = "gpt-5.4"; -const DEFAULT_CLAUDE_WORKER_MODEL = "claude-opus-4-6"; +const DEFAULT_CLAUDE_WORKER_MODEL = "claude-sonnet-4-6"; const DEFAULT_WORKER_TIMEOUT_MS = 30 * 60_000; const MAX_LOG_TAIL_CHARS = 8_000; const ENGINE_FAILURE_COOLDOWN_MS = 6 * 60 * 60_000; @@ -190,6 +190,13 @@ function normalizeString(value: string | undefined): string | undefined { return trimmed ? trimmed : undefined; } +function resolveDefaultModelForEngine(engine: WorkerEngineAdapter): string { + if (engine.engineId === "claude") { + return normalizeString(process.env.ARC_SELF_DRIVE_CLAUDE_MODEL) ?? DEFAULT_CLAUDE_WORKER_MODEL; + } + return engine.defaultModel; +} + function slugifyWorkerName(name: string): string { const normalized = name .trim() @@ -1300,7 +1307,7 @@ class CodeCockpitRuntime { if (!prompt) { throw new Error(`Worker "${worker.id}" has no objective or task goal to execute`); } - const modelId = normalizeString(worker.engineModel) ?? engine.defaultModel; + const modelId = normalizeString(worker.engineModel) ?? resolveDefaultModelForEngine(engine); const useResume = Boolean(worker.threadId && preparedBackend.backend.resumeArgs?.length); const baseArgs = useResume ? (preparedBackend.backend.resumeArgs ?? []).map((entry) => @@ -1729,7 +1736,7 @@ class CodeCockpitRuntime { repoRoot: candidate.task.repoRoot, objective: buildSelfDriveObjective(candidate.task), engineId: engineSelection.selected.engine.engineId, - engineModel: engineSelection.selected.engine.defaultModel, + engineModel: resolveDefaultModelForEngine(engineSelection.selected.engine), commandPath: engineSelection.selected.commandPath, authHealth: engineSelection.selected.authHealth, });