From 78908fe14aaa8a532f73e908c10a5b947ef6ba68 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 15 Jun 2026 05:24:30 +0000 Subject: [PATCH] Remove MiMo V2.5 Pro and Gemini 3.1 Pro Preview models Drop both models from the shared registry and clean up every reference: - models.ts: remove from AvailableModels, SUPPORTED_MODELS, MODEL_SELECTOR_MODELS, and ModelInfos (Qwen 3.7 Max and Kimi K2.6 remain) - agent-prompt-steering.ts: drop the google/xiaomi provider overlays, narrow PromptProvider, and remove the resolver branches - Delete ai-sdk-gateway-provider-options.ts (it only configured Gemini's thinking knob) and strip its now-dead taskMode threading through agent-runtime and agent-route - Update tests + docs; delete the Gemini-only provider-options test https://claude.ai/code/session_01JzzEBjdPyv9GmBrz6PEeWP --- CLAUDE.md | 18 ++-- README.md | 2 +- src/app/api/agent/route.ts | 1 - src/lib/server/agent-prompt-steering.ts | 26 +----- src/lib/server/agent-route.ts | 3 - .../llm/agent-runtime-synthesis-gating.ts | 6 +- src/lib/server/llm/agent-runtime.ts | 14 --- .../llm/ai-sdk-gateway-provider-options.ts | 59 ------------ src/lib/shared/llm/models.ts | 13 --- tests/agent-helper-behavior.test.mjs | 22 ++--- .../agent-prompt-steering-inference.test.mjs | 27 ++---- tests/agent-prompt-steering.test.mjs | 14 +-- .../agent-runtime-task-mode-routing.test.mjs | 91 ------------------- tests/agent-system-prompt.test.mjs | 2 +- tests/gateway-search-tools.test.mjs | 4 - tests/model-registry.test.mjs | 36 ++------ tests/stubs/actions-api-keys.mjs | 1 - tests/thread-payload-contract.test.mjs | 2 +- 18 files changed, 42 insertions(+), 299 deletions(-) delete mode 100644 src/lib/server/llm/ai-sdk-gateway-provider-options.ts delete mode 100644 tests/agent-runtime-task-mode-routing.test.mjs diff --git a/CLAUDE.md b/CLAUDE.md index 17992c1..1e4ebee 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -78,7 +78,7 @@ This boundary is **enforced by Next.js bundling at build time** (importing `pg`/ ### Task Modes -**Task mode** (`inferPromptTaskMode` in `agent-prompt-steering.ts`) is inferred from message content and drives only the **prompt overlay text** (`TASK MODE OVERLAY: `) and the Gemini thinking level. Modes: `general`, `coding`, `debugging`, `writing`, `research`, `high_stakes`, `closed_answer`, `instruction_following`. The tool-step budget is a single fixed constant (`AGENT_TOOL_MAX_STEPS`) — there is no per-request runtime-profile selection. (The `research` task mode is an automatic content-based overlay, not a user-facing toggle.) +**Task mode** (`inferPromptTaskMode` in `agent-prompt-steering.ts`) is inferred from message content and drives only the **prompt overlay text** (`TASK MODE OVERLAY: `). Modes: `general`, `coding`, `debugging`, `writing`, `research`, `high_stakes`, `closed_answer`, `instruction_following`. The tool-step budget is a single fixed constant (`AGENT_TOOL_MAX_STEPS`) — there is no per-request runtime-profile selection. (The `research` task mode is an automatic content-based overlay, not a user-facing toggle.) ### System Prompt Composition @@ -86,7 +86,7 @@ This boundary is **enforced by Next.js bundling at build time** (importing `pg`/ 1. `OPERATING INSTRUCTIONS` — `DEFAULT_OPERATING_INSTRUCTION` (`src/lib/shared/llm/system-instructions.ts`) 2. `RUNTIME DATE CONTEXT` — current UTC timestamp + user timezone (from `X-User-Timezone` header) -3. **Provider overlay** (`PROVIDER OVERLAY: ALIBABA|GOOGLE|MOONSHOTAI|XIAOMI`) — keyed by the model's **provider org**, not its nickname (alibaba=Qwen, google=Gemini, moonshotai=Kimi, xiaomi=MiMo). Always applied for a supported model. +3. **Provider overlay** (`PROVIDER OVERLAY: ALIBABA|MOONSHOTAI`) — keyed by the model's **provider org**, not its nickname (alibaba=Qwen, moonshotai=Kimi). Always applied for a supported model. 4. **Task mode overlay** (`TASK MODE OVERLAY: `) 5. `IDENTITY AND TONE CONTEXT` — `DEFAULT_SOUL_FALLBACK_INSTRUCTION` (`src/lib/shared/llm/system-instructions.ts`) 6. `AUTH USER CONTEXT` — authenticated user id, name, email @@ -154,15 +154,12 @@ Each tool is only registered when its requirements are met. All models are defined in `src/lib/shared/llm/models.ts`: -| Key | Model ID | Display Name | -| ------------------------------- | ------------------------------- | ---------------------- | -| `ALIBABA_QWEN3_7_MAX` | `alibaba/qwen3.7-max` | Qwen 3.7 Max | -| `GOOGLE_GEMINI_3_1_PRO_PREVIEW` | `google/gemini-3.1-pro-preview` | Gemini 3.1 Pro Preview | -| `MOONSHOTAI_KIMI_K2_6` | `moonshotai/kimi-k2.6` | Kimi K2.6 | -| `XIAOMI_MIMO_V2_5_PRO` | `xiaomi/mimo-v2.5-pro` | MiMo V2.5 Pro | +| Key | Model ID | Display Name | +| ---------------------- | ---------------------- | ------------ | +| `ALIBABA_QWEN3_7_MAX` | `alibaba/qwen3.7-max` | Qwen 3.7 Max | +| `MOONSHOTAI_KIMI_K2_6` | `moonshotai/kimi-k2.6` | Kimi K2.6 | -- `MODEL_SELECTOR_MODELS` — the chat selector subset: Qwen 3.7 Max, Kimi K2.6, MiMo V2.5 Pro. -- Gemini stays in `SUPPORTED_MODELS` for Gateway availability but is not a standalone chat selector option. +- `MODEL_SELECTOR_MODELS` — the chat selector subset: Qwen 3.7 Max and Kimi K2.6. - The agent is text-only: all chat input is plain text (no image, file, or PDF input). - Adding a model means updating `AvailableModels`, `ModelInfos`, `SUPPORTED_MODELS`, and optionally `MODEL_SELECTOR_MODELS`. `/api/models` filters this registry by configured keys (`getModels()` in `src/lib/actions/api-keys.ts`). @@ -298,7 +295,6 @@ src/ agent-runtime-synthesis-gating.ts # Final-step + mid-budget synthesis predicates gateway-responses.ts # startGatewayResponseStream gateway-client.ts # undici dispatcher for AI Gateway - ai-sdk-gateway-provider-options.ts # Per-model provider options (Gemini thinking) ai-sdk-tavily-tools.ts # tavily_search / tavily_extract code-execution-tools.ts # Sandboxed JS/Python execution initial-reasoning-chunk-sanitizer.ts # Redacted-reasoning filtering diff --git a/README.md b/README.md index d1bc2f8..59c7d86 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Chloei -Chloei is a Next.js 16 chat app backed by Vercel AI Gateway. It currently exposes a curated model selector that defaults to Qwen 3.7 Max and also includes Kimi K2.6 and MiMo V2.5 Pro, and offers local code execution, optional Tavily retrieval, and Better Auth email/password authentication with PostgreSQL-backed users and sessions. +Chloei is a Next.js 16 chat app backed by Vercel AI Gateway. It currently exposes a curated model selector that defaults to Qwen 3.7 Max and also includes Kimi K2.6, and offers local code execution, optional Tavily retrieval, and Better Auth email/password authentication with PostgreSQL-backed users and sessions. ## Documentation diff --git a/src/app/api/agent/route.ts b/src/app/api/agent/route.ts index 6877310..d0fda79 100644 --- a/src/app/api/agent/route.ts +++ b/src/app/api/agent/route.ts @@ -240,7 +240,6 @@ export async function POST(request: NextRequest) { aiGatewayApiKey, tavilyApiKey, userTimeZone, - taskMode: promptTaskMode, userId: session.user.id, featureFlags, messages: parsedRequest.messages, diff --git a/src/lib/server/agent-prompt-steering.ts b/src/lib/server/agent-prompt-steering.ts index 122e659..9747b3b 100644 --- a/src/lib/server/agent-prompt-steering.ts +++ b/src/lib/server/agent-prompt-steering.ts @@ -7,7 +7,7 @@ import { type PromptTextMessage, } from "./prompt-message-utils" -export type PromptProvider = "alibaba" | "google" | "moonshotai" | "xiaomi" +export type PromptProvider = "alibaba" | "moonshotai" export type PromptTaskMode = | "general" @@ -64,14 +64,6 @@ Use Qwen reasoning mode efficiently. - On format-sensitive tasks, do a literal final-format check before finishing. - Treat hard word, line, and sentence caps as hard caps. Count the final output when close to the limit. - After tool use, synthesize the result and stop. Do not replay raw tool traces. -`.trim(), - google: ` -Use Gemini reasoning mode efficiently. -- Spend the thinking budget on the parts of the task that are actually uncertain; do not narrate planning that adds no information. -- Prefer direct execution and verification over speculative narration. -- On format-sensitive tasks, do a literal final-format check before finishing. -- Treat hard word, line, and sentence caps as hard caps. Count the final output when close to the limit. -- After tool use, synthesize the result and stop. Do not replay raw tool traces. `.trim(), moonshotai: ` Use Kimi reasoning mode efficiently. @@ -80,14 +72,6 @@ Use Kimi reasoning mode efficiently. - On format-sensitive tasks, do a literal final-format check before finishing. - Treat hard word, line, and sentence caps as hard caps. Count the final output when close to the limit. - After tool use, synthesize the result and stop. Do not replay raw tool traces. -`.trim(), - xiaomi: ` -Use MiMo reasoning mode efficiently. -- Optimize for streaming latency: start producing the user-facing answer as soon as you have a defensible thread; refine in-line. -- Prefer direct execution and verification over speculative narration. -- On format-sensitive tasks, do a literal final-format check before finishing. -- Treat hard word, line, and sentence caps as hard caps. Count the final output when close to the limit. -- After tool use, synthesize the result and stop. Do not replay raw tool traces. `.trim(), } @@ -157,18 +141,10 @@ export function resolvePromptProvider(model: ModelType): PromptProvider { return "alibaba" } - if (model.startsWith("google/")) { - return "google" - } - if (model.startsWith("moonshotai/")) { return "moonshotai" } - if (model.startsWith("xiaomi/")) { - return "xiaomi" - } - throw new Error(`Unsupported model provider for model: ${model}`) } diff --git a/src/lib/server/agent-route.ts b/src/lib/server/agent-route.ts index 6b722e3..05f90fa 100644 --- a/src/lib/server/agent-route.ts +++ b/src/lib/server/agent-route.ts @@ -15,7 +15,6 @@ import { resolveDefaultModelSelectorModel, } from "@/lib/shared" -import type { PromptTaskMode } from "./agent-prompt-steering" import { AGENT_MAX_MESSAGE_CHARS, AGENT_MAX_MESSAGES, @@ -102,7 +101,6 @@ interface CreateAgentStreamResponseParams { aiGatewayApiKey: string tavilyApiKey?: string userTimeZone?: string - taskMode: PromptTaskMode userId?: string featureFlags?: AgentFeatureFlags messages: AgentStreamRequest["messages"] @@ -517,7 +515,6 @@ export function createAgentStreamResponse( aiGatewayApiKey: params.aiGatewayApiKey, tavilyApiKey: params.tavilyApiKey, userTimeZone: params.userTimeZone, - taskMode: params.taskMode, userId: params.userId, featureFlags: params.featureFlags, messages: params.messages, diff --git a/src/lib/server/llm/agent-runtime-synthesis-gating.ts b/src/lib/server/llm/agent-runtime-synthesis-gating.ts index fc06b04..bc53307 100644 --- a/src/lib/server/llm/agent-runtime-synthesis-gating.ts +++ b/src/lib/server/llm/agent-runtime-synthesis-gating.ts @@ -16,9 +16,9 @@ export function shouldNudgeMidBudgetSynthesis( if (toolMaxSteps <= 3) { return false } - // Kick in at one-third of budget. The failing 10-K tasks (Kimi K2.6 and - // Gemini 3.1 Pro) tend to stop naturally with empty text after only 5-8 - // tool calls (~steps 4-7 of 20); half-budget fires too late to reach them. + // Kick in at one-third of budget. The failing 10-K tasks (e.g. Kimi K2.6) + // tend to stop naturally with empty text after only 5-8 tool calls + // (~steps 4-7 of 20); half-budget fires too late to reach them. const threshold = Math.max(2, Math.floor(toolMaxSteps / 3)) return stepNumber >= threshold && stepNumber < toolMaxSteps - 1 } diff --git a/src/lib/server/llm/agent-runtime.ts b/src/lib/server/llm/agent-runtime.ts index db61a1e..447b3f2 100644 --- a/src/lib/server/llm/agent-runtime.ts +++ b/src/lib/server/llm/agent-runtime.ts @@ -7,10 +7,6 @@ import { } from "ai" import { createLogger } from "@/lib/logger" -import { - type PromptTaskMode, - resolvePromptProvider, -} from "@/lib/server/agent-prompt-steering" import { AGENT_TOOL_MAX_STEPS } from "@/lib/server/agent-runtime-config" import { type AgentFeatureFlags, @@ -27,7 +23,6 @@ import { shouldForceFinalSynthesisStep, shouldNudgeMidBudgetSynthesis, } from "./agent-runtime-synthesis-gating" -import { getAiSdkGatewayProviderOptionsForTaskMode } from "./ai-sdk-gateway-provider-options" import { createAiSdkTavilyTools, getAiSdkTavilyToolCallMetadata, @@ -53,7 +48,6 @@ export interface StartAgentRuntimeStreamParams { userTimeZone?: string messages: AgentInputMessage[] systemInstruction: string - taskMode: PromptTaskMode temperature?: number signal?: AbortSignal userId?: string @@ -256,10 +250,6 @@ export async function* startAgentRuntimeStream( ...(params.temperature !== undefined ? { temperature: params.temperature } : {}), - providerOptions: getAiSdkGatewayProviderOptionsForTaskMode({ - provider: resolvePromptProvider(params.model), - taskMode: params.taskMode, - }), experimental_telemetry: { isEnabled: true, recordInputs: featureFlags.telemetryRecordIo, @@ -508,10 +498,6 @@ export async function* startAgentRuntimeStream( ...(params.temperature !== undefined ? { temperature: params.temperature } : {}), - providerOptions: getAiSdkGatewayProviderOptionsForTaskMode({ - provider: resolvePromptProvider(params.model), - taskMode: params.taskMode, - }), tools, toolChoice: "none" as const, stopWhen: stepCountIs(1), diff --git a/src/lib/server/llm/ai-sdk-gateway-provider-options.ts b/src/lib/server/llm/ai-sdk-gateway-provider-options.ts deleted file mode 100644 index d4156b6..0000000 --- a/src/lib/server/llm/ai-sdk-gateway-provider-options.ts +++ /dev/null @@ -1,59 +0,0 @@ -import type { - PromptProvider, - PromptTaskMode, -} from "@/lib/server/agent-prompt-steering" - -type GeminiThinkingLevel = "low" | "medium" | "high" - -function buildGeminiThinkingOptions(level: GeminiThinkingLevel) { - return { - google: { - thinkingConfig: { - thinkingLevel: level, - includeThoughts: true, - }, - }, - } as const -} - -/** - * Map a (provider, taskMode) pair to AI SDK provider options. - * - * Today only Gemini exposes an explicit reasoning knob through AI Gateway - * (`thinkingConfig.thinkingLevel`); Kimi K2.6, MiMo V2.5 Pro, and Qwen 3.7 - * Max reason natively without a per-call provider option. Returning `{}` for - * those providers is the correct no-op — once AI Gateway surfaces reasoning - * options for them, just extend this map. - */ -export function getAiSdkGatewayProviderOptionsForTaskMode(params: { - provider: PromptProvider - taskMode: PromptTaskMode -}) { - if (params.provider !== "google") { - return {} - } - - switch (params.taskMode) { - case "research": - case "high_stakes": - case "debugging": - return buildGeminiThinkingOptions("high") - case "coding": - return buildGeminiThinkingOptions("medium") - case "instruction_following": - case "closed_answer": - // Format-sensitive / one-answer tasks: minimal thinking, keep includeThoughts - // on so the trace stays observable for telemetry. - return buildGeminiThinkingOptions("low") - case "writing": - case "general": - return {} - } - - // Exhaustiveness guard: if a new PromptTaskMode is added, this assignment - // becomes a type error so we don't silently fall through. At runtime, return - // the safe no-op so an unknown mode never blocks the request. - const _unhandledTaskMode: never = params.taskMode - void _unhandledTaskMode - return {} -} diff --git a/src/lib/shared/llm/models.ts b/src/lib/shared/llm/models.ts index 08eecb6..90c166e 100644 --- a/src/lib/shared/llm/models.ts +++ b/src/lib/shared/llm/models.ts @@ -1,8 +1,6 @@ export const AvailableModels = { ALIBABA_QWEN3_7_MAX: "alibaba/qwen3.7-max", - GOOGLE_GEMINI_3_1_PRO_PREVIEW: "google/gemini-3.1-pro-preview", MOONSHOTAI_KIMI_K2_6: "moonshotai/kimi-k2.6", - XIAOMI_MIMO_V2_5_PRO: "xiaomi/mimo-v2.5-pro", } as const export type ModelType = (typeof AvailableModels)[keyof typeof AvailableModels] @@ -21,9 +19,7 @@ export interface ModelInfo { export const SUPPORTED_MODELS = [ AvailableModels.ALIBABA_QWEN3_7_MAX, - AvailableModels.GOOGLE_GEMINI_3_1_PRO_PREVIEW, AvailableModels.MOONSHOTAI_KIMI_K2_6, - AvailableModels.XIAOMI_MIMO_V2_5_PRO, ] as const export const ALL_MODELS = [...SUPPORTED_MODELS] as const @@ -31,7 +27,6 @@ export const ALL_MODELS = [...SUPPORTED_MODELS] as const export const MODEL_SELECTOR_MODELS = [ AvailableModels.ALIBABA_QWEN3_7_MAX, AvailableModels.MOONSHOTAI_KIMI_K2_6, - AvailableModels.XIAOMI_MIMO_V2_5_PRO, ] as const const MODEL_SELECTOR_MODEL_SET: ReadonlySet = new Set( @@ -66,16 +61,8 @@ export const ModelInfos: Record = { id: AvailableModels.ALIBABA_QWEN3_7_MAX, name: "Qwen 3.7 Max", }, - [AvailableModels.GOOGLE_GEMINI_3_1_PRO_PREVIEW]: { - id: AvailableModels.GOOGLE_GEMINI_3_1_PRO_PREVIEW, - name: "Gemini 3.1 Pro Preview", - }, [AvailableModels.MOONSHOTAI_KIMI_K2_6]: { id: AvailableModels.MOONSHOTAI_KIMI_K2_6, name: "Kimi K2.6", }, - [AvailableModels.XIAOMI_MIMO_V2_5_PRO]: { - id: AvailableModels.XIAOMI_MIMO_V2_5_PRO, - name: "MiMo V2.5 Pro", - }, } diff --git a/tests/agent-helper-behavior.test.mjs b/tests/agent-helper-behavior.test.mjs index be0c69d..57e5b5d 100644 --- a/tests/agent-helper-behavior.test.mjs +++ b/tests/agent-helper-behavior.test.mjs @@ -173,7 +173,6 @@ test("agent helper validates total size, last-message role, and default model su availableModels: [ { id: "alibaba/qwen3.7-max" }, { id: "moonshotai/kimi-k2.6" }, - { id: "xiaomi/mimo-v2.5-pro" }, ], requestId: "request-default-mode-qwen", }) @@ -181,29 +180,26 @@ test("agent helper validates total size, last-message role, and default model su assert(!(defaultModeWithQwenResult instanceof Response)) assert.equal(defaultModeWithQwenResult.selectedModel, "alibaba/qwen3.7-max") - const standaloneResearchModelResult = parseAgentStreamRequest({ + const unavailableModelResult = parseAgentStreamRequest({ body: { - model: "google/gemini-3.1-pro-preview", + model: "alibaba/qwen3.7-max", messages: [ { role: "user", - content: "Use Gemini as a normal chat model.", + content: "Use a model the caller cannot access.", }, ], }, - availableModels: [ - { id: "moonshotai/kimi-k2.6" }, - { id: "google/gemini-3.1-pro-preview" }, - ], - requestId: "request-standalone-research-model", + availableModels: [{ id: "moonshotai/kimi-k2.6" }], + requestId: "request-unavailable-model", }) - assert(standaloneResearchModelResult instanceof Response) - assert.equal(standaloneResearchModelResult.status, 400) - assert.deepEqual(await standaloneResearchModelResult.json(), { + assert(unavailableModelResult instanceof Response) + assert.equal(unavailableModelResult.status, 400) + assert.deepEqual(await unavailableModelResult.json(), { error: "Unsupported model selected.", errorCode: "AGENT_UNSUPPORTED_MODEL", - requestId: "request-standalone-research-model", + requestId: "request-unavailable-model", }) const unknownFieldResult = parseAgentStreamRequest({ diff --git a/tests/agent-prompt-steering-inference.test.mjs b/tests/agent-prompt-steering-inference.test.mjs index 3c782c9..29f2af5 100644 --- a/tests/agent-prompt-steering-inference.test.mjs +++ b/tests/agent-prompt-steering-inference.test.mjs @@ -182,24 +182,12 @@ test("inferPromptTaskMode userExpertise=writing routes ambiguous prompts to writ }) test("provider overlays are differentiated across providers", () => { - const google = createPromptSteeringBlocks({ - provider: "google", - taskMode: "research", - }) - .map((block) => block.body) - .join("\n\n") const moonshot = createPromptSteeringBlocks({ provider: "moonshotai", taskMode: "research", }) .map((block) => block.body) .join("\n\n") - const xiaomi = createPromptSteeringBlocks({ - provider: "xiaomi", - taskMode: "research", - }) - .map((block) => block.body) - .join("\n\n") const alibaba = createPromptSteeringBlocks({ provider: "alibaba", taskMode: "research", @@ -207,26 +195,25 @@ test("provider overlays are differentiated across providers", () => { .map((block) => block.body) .join("\n\n") - assert.match(google, /thinking budget/i) + assert.match(moonshot, /Use Kimi reasoning mode/i) assert.match(moonshot, /long context/i) - assert.match(xiaomi, /streaming latency/i) - assert.match(alibaba, /Qwen reasoning mode/i) + assert.match(alibaba, /Use Qwen reasoning mode/i) assert.notEqual( - google.split("Use Gemini")[1], - moonshot.split("Use Kimi")[1], - "Gemini and Kimi overlays should not be byte-identical." + alibaba, + moonshot, + "Qwen and Kimi overlays should not be byte-identical." ) }) test("debugging and writing overlays appear when their task modes are selected", () => { const debugging = createPromptSteeringBlocks({ - provider: "google", + provider: "moonshotai", taskMode: "debugging", }) .map((block) => block.body) .join("\n\n") const writing = createPromptSteeringBlocks({ - provider: "google", + provider: "moonshotai", taskMode: "writing", }) .map((block) => block.body) diff --git a/tests/agent-prompt-steering.test.mjs b/tests/agent-prompt-steering.test.mjs index 4921b68..fa2a564 100644 --- a/tests/agent-prompt-steering.test.mjs +++ b/tests/agent-prompt-steering.test.mjs @@ -25,31 +25,23 @@ test("prompt steering resolves supported model providers", () => { resolvePromptProvider(AvailableModels.ALIBABA_QWEN3_7_MAX), "alibaba" ) - assert.equal( - resolvePromptProvider(AvailableModels.GOOGLE_GEMINI_3_1_PRO_PREVIEW), - "google" - ) assert.equal( resolvePromptProvider(AvailableModels.MOONSHOTAI_KIMI_K2_6), "moonshotai" ) - assert.equal( - resolvePromptProvider(AvailableModels.XIAOMI_MIMO_V2_5_PRO), - "xiaomi" - ) }) test("prompt steering includes provider overlays for supported models", () => { const blocks = createPromptSteeringBlocks({ - provider: "google", + provider: "moonshotai", taskMode: "research", }) const overlayText = blocks.map((block) => block.body).join("\n\n") assert.match( overlayText, - /Use Gemini reasoning mode efficiently/, - "Expected Google prompts to receive the Gemini provider overlay." + /Use Kimi reasoning mode efficiently/, + "Expected Moonshot AI prompts to receive the Kimi provider overlay." ) assert.match( overlayText, diff --git a/tests/agent-runtime-task-mode-routing.test.mjs b/tests/agent-runtime-task-mode-routing.test.mjs deleted file mode 100644 index de7c3ec..0000000 --- a/tests/agent-runtime-task-mode-routing.test.mjs +++ /dev/null @@ -1,91 +0,0 @@ -import assert from "node:assert/strict" -import path from "node:path" -import test from "node:test" -import { fileURLToPath, pathToFileURL } from "node:url" - -import "./register-ts-path-hooks.mjs" - -const cwd = fileURLToPath(new URL("..", import.meta.url)) -const providerOptionsUrl = pathToFileURL( - path.join(cwd, "src/lib/server/llm/ai-sdk-gateway-provider-options.ts") -).href - -const { getAiSdkGatewayProviderOptionsForTaskMode } = await import( - providerOptionsUrl -) - -test("Gemini gets high thinking for research, high_stakes, debugging", () => { - for (const taskMode of ["research", "high_stakes", "debugging"]) { - assert.deepEqual( - getAiSdkGatewayProviderOptionsForTaskMode({ - provider: "google", - taskMode, - }), - { - google: { - thinkingConfig: { thinkingLevel: "high", includeThoughts: true }, - }, - }, - `expected high thinking for ${taskMode}` - ) - } -}) - -test("Gemini gets medium thinking for coding", () => { - assert.deepEqual( - getAiSdkGatewayProviderOptionsForTaskMode({ - provider: "google", - taskMode: "coding", - }), - { - google: { - thinkingConfig: { thinkingLevel: "medium", includeThoughts: true }, - }, - } - ) -}) - -test("Gemini gets low thinking for format-sensitive modes", () => { - for (const taskMode of ["instruction_following", "closed_answer"]) { - assert.deepEqual( - getAiSdkGatewayProviderOptionsForTaskMode({ - provider: "google", - taskMode, - }), - { - google: { - thinkingConfig: { thinkingLevel: "low", includeThoughts: true }, - }, - }, - `expected low thinking for ${taskMode}` - ) - } -}) - -test("Gemini falls back to default thinking for general/writing", () => { - for (const taskMode of ["general", "writing"]) { - assert.deepEqual( - getAiSdkGatewayProviderOptionsForTaskMode({ - provider: "google", - taskMode, - }), - {}, - `expected empty provider options for ${taskMode}` - ) - } -}) - -test("non-Gemini reasoning models receive no explicit provider options", () => { - for (const taskMode of ["research", "coding", "debugging", "general"]) { - for (const provider of ["alibaba", "moonshotai", "xiaomi"]) { - assert.deepEqual( - getAiSdkGatewayProviderOptionsForTaskMode({ - provider, - taskMode, - }), - {}, - `expected empty provider options for ${provider}+${taskMode}` - ) - } - } -}) diff --git a/tests/agent-system-prompt.test.mjs b/tests/agent-system-prompt.test.mjs index 09d8175..2f85af3 100644 --- a/tests/agent-system-prompt.test.mjs +++ b/tests/agent-system-prompt.test.mjs @@ -112,7 +112,7 @@ test("agent system prompt places the identity block after task steering", () => }, { now: new Date("2026-05-03T12:34:56.000Z"), - provider: "google", + provider: "moonshotai", taskMode: "coding", } ) diff --git a/tests/gateway-search-tools.test.mjs b/tests/gateway-search-tools.test.mjs index 3c94a3c..b9fa572 100644 --- a/tests/gateway-search-tools.test.mjs +++ b/tests/gateway-search-tools.test.mjs @@ -70,10 +70,6 @@ test("stale and fallback-only model ids fall back to Qwen 3.7 Max", () => { id: "alibaba/qwen3.7-max", name: "Qwen 3.7 Max", }, - { - id: "google/gemini-3.1-pro-preview", - name: "Gemini 3.1 Pro Preview", - }, { id: "moonshotai/kimi-k2.6", name: "Kimi K2.6", diff --git a/tests/model-registry.test.mjs b/tests/model-registry.test.mjs index c0309d2..75ed0a2 100644 --- a/tests/model-registry.test.mjs +++ b/tests/model-registry.test.mjs @@ -16,16 +16,16 @@ test("shared model registry includes the curated gateway models", async () => { "Expected legacy model ids to be fully removed from the shared model registry." ) - assert.match( + assert.doesNotMatch( source, - /ALIBABA_QWEN3_7_MAX:\s*"alibaba\/qwen3\.7-max"/, - "Expected AvailableModels to include ALIBABA_QWEN3_7_MAX." + /google\/gemini-3\.1-pro-preview|xiaomi\/mimo-v2\.5-pro|GOOGLE_GEMINI_3_1_PRO_PREVIEW|XIAOMI_MIMO_V2_5_PRO|Gemini 3\.1 Pro Preview|MiMo V2\.5 Pro/, + "Expected Gemini 3.1 Pro Preview and MiMo V2.5 Pro to be fully removed from the shared model registry." ) assert.match( source, - /GOOGLE_GEMINI_3_1_PRO_PREVIEW:\s*"google\/gemini-3\.1-pro-preview"/, - "Expected AvailableModels to include GOOGLE_GEMINI_3_1_PRO_PREVIEW." + /ALIBABA_QWEN3_7_MAX:\s*"alibaba\/qwen3\.7-max"/, + "Expected AvailableModels to include ALIBABA_QWEN3_7_MAX." ) assert.match( @@ -34,22 +34,16 @@ test("shared model registry includes the curated gateway models", async () => { "Expected AvailableModels to include MOONSHOTAI_KIMI_K2_6." ) - assert.match( - source, - /XIAOMI_MIMO_V2_5_PRO:\s*"xiaomi\/mimo-v2\.5-pro"/, - "Expected AvailableModels to include XIAOMI_MIMO_V2_5_PRO." - ) - assert.match( source.replace(/\s+/g, " "), - /SUPPORTED_MODELS = \[ AvailableModels\.ALIBABA_QWEN3_7_MAX, AvailableModels\.GOOGLE_GEMINI_3_1_PRO_PREVIEW, AvailableModels\.MOONSHOTAI_KIMI_K2_6, AvailableModels\.XIAOMI_MIMO_V2_5_PRO, \] as const/, - "Expected SUPPORTED_MODELS to list Qwen 3.7 Max, Gemini 3.1 Pro Preview, Kimi K2.6, and MiMo V2.5 Pro." + /SUPPORTED_MODELS = \[ AvailableModels\.ALIBABA_QWEN3_7_MAX, AvailableModels\.MOONSHOTAI_KIMI_K2_6, \] as const/, + "Expected SUPPORTED_MODELS to list Qwen 3.7 Max and Kimi K2.6." ) assert.match( source.replace(/\s+/g, " "), - /MODEL_SELECTOR_MODELS = \[ AvailableModels\.ALIBABA_QWEN3_7_MAX, AvailableModels\.MOONSHOTAI_KIMI_K2_6, AvailableModels\.XIAOMI_MIMO_V2_5_PRO, \] as const/, - "Expected the chat model selector to default to Qwen 3.7 Max before Kimi K2.6 and MiMo V2.5 Pro." + /MODEL_SELECTOR_MODELS = \[ AvailableModels\.ALIBABA_QWEN3_7_MAX, AvailableModels\.MOONSHOTAI_KIMI_K2_6, \] as const/, + "Expected the chat model selector to default to Qwen 3.7 Max before Kimi K2.6." ) assert.match( @@ -58,21 +52,9 @@ test("shared model registry includes the curated gateway models", async () => { "Expected ModelInfos to define display metadata for ALIBABA_QWEN3_7_MAX." ) - assert.match( - source, - /\[AvailableModels\.GOOGLE_GEMINI_3_1_PRO_PREVIEW\]:\s*\{[\s\S]*name:\s*"Gemini 3\.1 Pro Preview"/, - "Expected ModelInfos to define display metadata for GOOGLE_GEMINI_3_1_PRO_PREVIEW." - ) - assert.match( source, /\[AvailableModels\.MOONSHOTAI_KIMI_K2_6\]:\s*\{[\s\S]*name:\s*"Kimi K2\.6"/, "Expected ModelInfos to define display metadata for MOONSHOTAI_KIMI_K2_6." ) - - assert.match( - source, - /\[AvailableModels\.XIAOMI_MIMO_V2_5_PRO\]:\s*\{[\s\S]*name:\s*"MiMo V2\.5 Pro"/, - "Expected ModelInfos to define display metadata for XIAOMI_MIMO_V2_5_PRO." - ) }) diff --git a/tests/stubs/actions-api-keys.mjs b/tests/stubs/actions-api-keys.mjs index a46ef84..c75c178 100644 --- a/tests/stubs/actions-api-keys.mjs +++ b/tests/stubs/actions-api-keys.mjs @@ -5,7 +5,6 @@ export function getModels() { getTestMocks().apiKeys?.getModels?.() ?? [ { id: "alibaba/qwen3.7-max" }, { id: "moonshotai/kimi-k2.6" }, - { id: "xiaomi/mimo-v2.5-pro" }, ] ) } diff --git a/tests/thread-payload-contract.test.mjs b/tests/thread-payload-contract.test.mjs index 8a8770d..2b7a9a6 100644 --- a/tests/thread-payload-contract.test.mjs +++ b/tests/thread-payload-contract.test.mjs @@ -126,7 +126,7 @@ test("thread store delegates parsing and persistence shaping to the payload help test("thread payload drops legacy run-mode metadata from stored threads", () => { const parsed = parseThreadPayload({ id: "thread-1", - model: "google/gemini-3.1-pro-preview", + model: "alibaba/qwen3.7-max", messages: [ { id: "message-1",