Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 0 additions & 34 deletions .github/workflows/code-review-gpt.yml

This file was deleted.

51 changes: 49 additions & 2 deletions src/code-cockpit/runtime.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1273,7 +1273,7 @@ describe("code cockpit runtime", () => {
});
expect(result.worker).toMatchObject({
engineId: "claude",
engineModel: "claude-opus-4-6",
engineModel: "claude-sonnet-4-6",
});
expect(pendingRuns).toHaveLength(1);
expect(pendingRuns[0]?.input.argv).toEqual(expect.arrayContaining(["claude", "-p"]));
Expand Down Expand Up @@ -1365,7 +1365,7 @@ describe("code cockpit runtime", () => {
expect(result.action).toBe("started");
expect(result.worker).toMatchObject({
engineId: "claude",
engineModel: "claude-opus-4-6",
engineModel: "claude-sonnet-4-6",
backendId: "claude-cli",
authHealth: "healthy",
});
Expand Down Expand Up @@ -1470,6 +1470,53 @@ describe("code cockpit runtime", () => {
expect(pendingRuns).toHaveLength(0);
});

it("uses ARC_SELF_DRIVE_CLAUDE_MODEL for new strict-Claude self-drive workers", async () => {
const { supervisor, pendingRuns } = createSupervisorStub();

const task = await store.createCodeTask({
title: "Review workstation queue cleanup",
repoRoot: tempRepoRoot,
});

vi.stubEnv("ARC_SELF_DRIVE_STRICT_ENGINE", "claude");
vi.stubEnv("ARC_SELF_DRIVE_CLAUDE_MODEL", "claude-sonnet-4-6");

const runtime = createCodeCockpitRuntime({
getProcessSupervisor: () => supervisor,
loadConfig: () => ({}),
resolveCliBackendConfig: (provider) => {
if (provider === "claude-cli") {
return { id: "claude-cli", config: claudeBackend };
}
if (provider === "codex-cli") {
return { id: "codex-cli", config: backend };
}
return null;
},
prepareCliBundleMcpConfig: async ({ backendId, backend: input }) => ({
backendId,
backend: input,
}),
runCommandWithTimeout: createRunCommandWithEngineHealthStub({
codexHealthy: true,
claudeHealthy: true,
}),
});

const result = await runtime.supervisorTick({ repoRoot: tempRepoRoot });

expect(result.action).toBe("started");
expect(task.id).toBe(result.task?.id);
expect(result.worker).toMatchObject({
engineId: "claude",
engineModel: "claude-sonnet-4-6",
});
expect(pendingRuns).toHaveLength(1);
expect(pendingRuns[0]?.input.argv).toEqual(
expect.arrayContaining(["--model", "claude-sonnet-4-6"]),
);
});

it("skips retrying tasks until their backoff expires and then starts them again", async () => {
const { supervisor, pendingRuns } = createSupervisorStub();
let currentTime = new Date("2026-03-20T00:05:00.000Z");
Expand Down
13 changes: 10 additions & 3 deletions src/code-cockpit/runtime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ import {
import { isTaskInRetryBackoff, resolveTaskFailure } from "./task-reliability.js";

const DEFAULT_CODEX_WORKER_MODEL = "gpt-5.4";
const DEFAULT_CLAUDE_WORKER_MODEL = "claude-opus-4-6";
const DEFAULT_CLAUDE_WORKER_MODEL = "claude-sonnet-4-6";
const DEFAULT_WORKER_TIMEOUT_MS = 30 * 60_000;
const MAX_LOG_TAIL_CHARS = 8_000;
const ENGINE_FAILURE_COOLDOWN_MS = 6 * 60 * 60_000;
Expand Down Expand Up @@ -190,6 +190,13 @@ function normalizeString(value: string | undefined): string | undefined {
return trimmed ? trimmed : undefined;
}

function resolveDefaultModelForEngine(engine: WorkerEngineAdapter): string {
if (engine.engineId === "claude") {
return normalizeString(process.env.ARC_SELF_DRIVE_CLAUDE_MODEL) ?? DEFAULT_CLAUDE_WORKER_MODEL;
}
return engine.defaultModel;
}

function slugifyWorkerName(name: string): string {
const normalized = name
.trim()
Expand Down Expand Up @@ -1300,7 +1307,7 @@ class CodeCockpitRuntime {
if (!prompt) {
throw new Error(`Worker "${worker.id}" has no objective or task goal to execute`);
}
const modelId = normalizeString(worker.engineModel) ?? engine.defaultModel;
const modelId = normalizeString(worker.engineModel) ?? resolveDefaultModelForEngine(engine);
const useResume = Boolean(worker.threadId && preparedBackend.backend.resumeArgs?.length);
const baseArgs = useResume
? (preparedBackend.backend.resumeArgs ?? []).map((entry) =>
Expand Down Expand Up @@ -1729,7 +1736,7 @@ class CodeCockpitRuntime {
repoRoot: candidate.task.repoRoot,
objective: buildSelfDriveObjective(candidate.task),
engineId: engineSelection.selected.engine.engineId,
engineModel: engineSelection.selected.engine.defaultModel,
engineModel: resolveDefaultModelForEngine(engineSelection.selected.engine),
commandPath: engineSelection.selected.commandPath,
authHealth: engineSelection.selected.authHealth,
});
Expand Down
Loading