From 78908fe14aaa8a532f73e908c10a5b947ef6ba68 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 15 Jun 2026 05:24:30 +0000
Subject: [PATCH] Remove MiMo V2.5 Pro and Gemini 3.1 Pro Preview models

Drop both models from the shared registry and clean up every reference:

- models.ts: remove from AvailableModels, SUPPORTED_MODELS,
  MODEL_SELECTOR_MODELS, and ModelInfos (Qwen 3.7 Max and Kimi K2.6 remain)
- agent-prompt-steering.ts: drop the google/xiaomi provider overlays,
  narrow PromptProvider, and remove the resolver branches
- Delete ai-sdk-gateway-provider-options.ts (it only configured Gemini's
  thinking knob) and strip its now-dead taskMode threading through
  agent-runtime and agent-route
- Update tests + docs; delete the Gemini-only provider-options test

https://claude.ai/code/session_01JzzEBjdPyv9GmBrz6PEeWP
---
 CLAUDE.md                                     | 18 ++--
 README.md                                     |  2 +-
 src/app/api/agent/route.ts                    |  1 -
 src/lib/server/agent-prompt-steering.ts       | 26 +-----
 src/lib/server/agent-route.ts                 |  3 -
 .../llm/agent-runtime-synthesis-gating.ts     |  6 +-
 src/lib/server/llm/agent-runtime.ts           | 14 ---
 .../llm/ai-sdk-gateway-provider-options.ts    | 59 ------------
 src/lib/shared/llm/models.ts                  | 13 ---
 tests/agent-helper-behavior.test.mjs          | 22 ++---
 .../agent-prompt-steering-inference.test.mjs  | 27 ++----
 tests/agent-prompt-steering.test.mjs          | 14 +--
 .../agent-runtime-task-mode-routing.test.mjs  | 91 -------------------
 tests/agent-system-prompt.test.mjs            |  2 +-
 tests/gateway-search-tools.test.mjs           |  4 -
 tests/model-registry.test.mjs                 | 36 ++------
 tests/stubs/actions-api-keys.mjs              |  1 -
 tests/thread-payload-contract.test.mjs        |  2 +-
 18 files changed, 42 insertions(+), 299 deletions(-)
 delete mode 100644 src/lib/server/llm/ai-sdk-gateway-provider-options.ts
 delete mode 100644 tests/agent-runtime-task-mode-routing.test.mjs
diff --git a/CLAUDE.md b/CLAUDE.md
index 17992c1..1e4ebee 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -78,7 +78,7 @@ This boundary is **enforced by Next.js bundling at build time** (importing `pg`/
 
 ### Task Modes
 
-**Task mode** (`inferPromptTaskMode` in `agent-prompt-steering.ts`) is inferred from message content and drives only the **prompt overlay text** (`TASK MODE OVERLAY: <MODE>`) and the Gemini thinking level. Modes: `general`, `coding`, `debugging`, `writing`, `research`, `high_stakes`, `closed_answer`, `instruction_following`. The tool-step budget is a single fixed constant (`AGENT_TOOL_MAX_STEPS`) — there is no per-request runtime-profile selection. (The `research` task mode is an automatic content-based overlay, not a user-facing toggle.)
+**Task mode** (`inferPromptTaskMode` in `agent-prompt-steering.ts`) is inferred from message content and drives only the **prompt overlay text** (`TASK MODE OVERLAY: <MODE>`). Modes: `general`, `coding`, `debugging`, `writing`, `research`, `high_stakes`, `closed_answer`, `instruction_following`. The tool-step budget is a single fixed constant (`AGENT_TOOL_MAX_STEPS`) — there is no per-request runtime-profile selection. (The `research` task mode is an automatic content-based overlay, not a user-facing toggle.)
 
 ### System Prompt Composition
 
@@ -86,7 +86,7 @@ This boundary is **enforced by Next.js bundling at build time** (importing `pg`/
 
 1. `OPERATING INSTRUCTIONS` — `DEFAULT_OPERATING_INSTRUCTION` (`src/lib/shared/llm/system-instructions.ts`)
 2. `RUNTIME DATE CONTEXT` — current UTC timestamp + user timezone (from `X-User-Timezone` header)
-3. **Provider overlay** (`PROVIDER OVERLAY: ALIBABA|GOOGLE|MOONSHOTAI|XIAOMI`) — keyed by the model's **provider org**, not its nickname (alibaba=Qwen, google=Gemini, moonshotai=Kimi, xiaomi=MiMo). Always applied for a supported model.
+3. **Provider overlay** (`PROVIDER OVERLAY: ALIBABA|MOONSHOTAI`) — keyed by the model's **provider org**, not its nickname (alibaba=Qwen, moonshotai=Kimi). Always applied for a supported model.
 4. **Task mode overlay** (`TASK MODE OVERLAY: <MODE>`)
 5. `IDENTITY AND TONE CONTEXT` — `DEFAULT_SOUL_FALLBACK_INSTRUCTION` (`src/lib/shared/llm/system-instructions.ts`)
 6. `AUTH USER CONTEXT` — authenticated user id, name, email
@@ -154,15 +154,12 @@ Each tool is only registered when its requirements are met.
 
 All models are defined in `src/lib/shared/llm/models.ts`:
 
-| Key                             | Model ID                        | Display Name           |
-| ------------------------------- | ------------------------------- | ---------------------- |
-| `ALIBABA_QWEN3_7_MAX`           | `alibaba/qwen3.7-max`           | Qwen 3.7 Max           |
-| `GOOGLE_GEMINI_3_1_PRO_PREVIEW` | `google/gemini-3.1-pro-preview` | Gemini 3.1 Pro Preview |
-| `MOONSHOTAI_KIMI_K2_6`          | `moonshotai/kimi-k2.6`          | Kimi K2.6              |
-| `XIAOMI_MIMO_V2_5_PRO`          | `xiaomi/mimo-v2.5-pro`          | MiMo V2.5 Pro          |
+| Key                    | Model ID               | Display Name |
+| ---------------------- | ---------------------- | ------------ |
+| `ALIBABA_QWEN3_7_MAX`  | `alibaba/qwen3.7-max`  | Qwen 3.7 Max |
+| `MOONSHOTAI_KIMI_K2_6` | `moonshotai/kimi-k2.6` | Kimi K2.6    |
 
-- `MODEL_SELECTOR_MODELS` — the chat selector subset: Qwen 3.7 Max, Kimi K2.6, MiMo V2.5 Pro.
-- Gemini stays in `SUPPORTED_MODELS` for Gateway availability but is not a standalone chat selector option.
+- `MODEL_SELECTOR_MODELS` — the chat selector subset: Qwen 3.7 Max and Kimi K2.6.
 - The agent is text-only: all chat input is plain text (no image, file, or PDF input).
 - Adding a model means updating `AvailableModels`, `ModelInfos`, `SUPPORTED_MODELS`, and optionally `MODEL_SELECTOR_MODELS`. `/api/models` filters this registry by configured keys (`getModels()` in `src/lib/actions/api-keys.ts`).
 
@@ -298,7 +295,6 @@ src/
         agent-runtime-synthesis-gating.ts # Final-step + mid-budget synthesis predicates
         gateway-responses.ts              # startGatewayResponseStream
         gateway-client.ts                 # undici dispatcher for AI Gateway
-        ai-sdk-gateway-provider-options.ts # Per-model provider options (Gemini thinking)
         ai-sdk-tavily-tools.ts            # tavily_search / tavily_extract
         code-execution-tools.ts           # Sandboxed JS/Python execution
         initial-reasoning-chunk-sanitizer.ts # Redacted-reasoning filtering
diff --git a/README.md b/README.md
index d1bc2f8..59c7d86 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Chloei
 
-Chloei is a Next.js 16 chat app backed by Vercel AI Gateway. It currently exposes a curated model selector that defaults to Qwen 3.7 Max and also includes Kimi K2.6 and MiMo V2.5 Pro, and offers local code execution, optional Tavily retrieval, and Better Auth email/password authentication with PostgreSQL-backed users and sessions.
+Chloei is a Next.js 16 chat app backed by Vercel AI Gateway. It currently exposes a curated model selector that defaults to Qwen 3.7 Max and also includes Kimi K2.6, and offers local code execution, optional Tavily retrieval, and Better Auth email/password authentication with PostgreSQL-backed users and sessions.
 
 ## Documentation
 
diff --git a/src/app/api/agent/route.ts b/src/app/api/agent/route.ts
index 6877310..d0fda79 100644
--- a/src/app/api/agent/route.ts
+++ b/src/app/api/agent/route.ts
@@ -240,7 +240,6 @@ export async function POST(request: NextRequest) {
         aiGatewayApiKey,
         tavilyApiKey,
         userTimeZone,
-        taskMode: promptTaskMode,
         userId: session.user.id,
         featureFlags,
         messages: parsedRequest.messages,
diff --git a/src/lib/server/agent-prompt-steering.ts b/src/lib/server/agent-prompt-steering.ts
index 122e659..9747b3b 100644
--- a/src/lib/server/agent-prompt-steering.ts
+++ b/src/lib/server/agent-prompt-steering.ts
@@ -7,7 +7,7 @@ import {
   type PromptTextMessage,
 } from "./prompt-message-utils"
 
-export type PromptProvider = "alibaba" | "google" | "moonshotai" | "xiaomi"
+export type PromptProvider = "alibaba" | "moonshotai"
 
 export type PromptTaskMode =
   | "general"
@@ -64,14 +64,6 @@ Use Qwen reasoning mode efficiently.
 - On format-sensitive tasks, do a literal final-format check before finishing.
 - Treat hard word, line, and sentence caps as hard caps. Count the final output when close to the limit.
 - After tool use, synthesize the result and stop. Do not replay raw tool traces.
-`.trim(),
-  google: `
-Use Gemini reasoning mode efficiently.
-- Spend the thinking budget on the parts of the task that are actually uncertain; do not narrate planning that adds no information.
-- Prefer direct execution and verification over speculative narration.
-- On format-sensitive tasks, do a literal final-format check before finishing.
-- Treat hard word, line, and sentence caps as hard caps. Count the final output when close to the limit.
-- After tool use, synthesize the result and stop. Do not replay raw tool traces.
 `.trim(),
   moonshotai: `
 Use Kimi reasoning mode efficiently.
@@ -80,14 +72,6 @@ Use Kimi reasoning mode efficiently.
 - On format-sensitive tasks, do a literal final-format check before finishing.
 - Treat hard word, line, and sentence caps as hard caps. Count the final output when close to the limit.
 - After tool use, synthesize the result and stop. Do not replay raw tool traces.
-`.trim(),
-  xiaomi: `
-Use MiMo reasoning mode efficiently.
-- Optimize for streaming latency: start producing the user-facing answer as soon as you have a defensible thread; refine in-line.
-- Prefer direct execution and verification over speculative narration.
-- On format-sensitive tasks, do a literal final-format check before finishing.
-- Treat hard word, line, and sentence caps as hard caps. Count the final output when close to the limit.
-- After tool use, synthesize the result and stop. Do not replay raw tool traces.
 `.trim(),
 }
 
@@ -157,18 +141,10 @@ export function resolvePromptProvider(model: ModelType): PromptProvider {
     return "alibaba"
   }
 
-  if (model.startsWith("google/")) {
-    return "google"
-  }
-
   if (model.startsWith("moonshotai/")) {
     return "moonshotai"
   }
 
-  if (model.startsWith("xiaomi/")) {
-    return "xiaomi"
-  }
-
   throw new Error(`Unsupported model provider for model: ${model}`)
 }
 
diff --git a/src/lib/server/agent-route.ts b/src/lib/server/agent-route.ts
index 6b722e3..05f90fa 100644
--- a/src/lib/server/agent-route.ts
+++ b/src/lib/server/agent-route.ts
@@ -15,7 +15,6 @@ import {
   resolveDefaultModelSelectorModel,
 } from "@/lib/shared"
 
-import type { PromptTaskMode } from "./agent-prompt-steering"
 import {
   AGENT_MAX_MESSAGE_CHARS,
   AGENT_MAX_MESSAGES,
@@ -102,7 +101,6 @@ interface CreateAgentStreamResponseParams {
   aiGatewayApiKey: string
   tavilyApiKey?: string
   userTimeZone?: string
-  taskMode: PromptTaskMode
   userId?: string
   featureFlags?: AgentFeatureFlags
   messages: AgentStreamRequest["messages"]
@@ -517,7 +515,6 @@ export function createAgentStreamResponse(
           aiGatewayApiKey: params.aiGatewayApiKey,
           tavilyApiKey: params.tavilyApiKey,
           userTimeZone: params.userTimeZone,
-          taskMode: params.taskMode,
           userId: params.userId,
           featureFlags: params.featureFlags,
           messages: params.messages,
diff --git a/src/lib/server/llm/agent-runtime-synthesis-gating.ts b/src/lib/server/llm/agent-runtime-synthesis-gating.ts
index fc06b04..bc53307 100644
--- a/src/lib/server/llm/agent-runtime-synthesis-gating.ts
+++ b/src/lib/server/llm/agent-runtime-synthesis-gating.ts
@@ -16,9 +16,9 @@ export function shouldNudgeMidBudgetSynthesis(
   if (toolMaxSteps <= 3) {
     return false
   }
-  // Kick in at one-third of budget. The failing 10-K tasks (Kimi K2.6 and
-  // Gemini 3.1 Pro) tend to stop naturally with empty text after only 5-8
-  // tool calls (~steps 4-7 of 20); half-budget fires too late to reach them.
+  // Kick in at one-third of budget. The failing 10-K tasks (e.g. Kimi K2.6)
+  // tend to stop naturally with empty text after only 5-8 tool calls
+  // (~steps 4-7 of 20); half-budget fires too late to reach them.
   const threshold = Math.max(2, Math.floor(toolMaxSteps / 3))
   return stepNumber >= threshold && stepNumber < toolMaxSteps - 1
 }
diff --git a/src/lib/server/llm/agent-runtime.ts b/src/lib/server/llm/agent-runtime.ts
index db61a1e..447b3f2 100644
--- a/src/lib/server/llm/agent-runtime.ts
+++ b/src/lib/server/llm/agent-runtime.ts
@@ -7,10 +7,6 @@ import {
 } from "ai"
 
 import { createLogger } from "@/lib/logger"
-import {
-  type PromptTaskMode,
-  resolvePromptProvider,
-} from "@/lib/server/agent-prompt-steering"
 import { AGENT_TOOL_MAX_STEPS } from "@/lib/server/agent-runtime-config"
 import {
   type AgentFeatureFlags,
@@ -27,7 +23,6 @@ import {
   shouldForceFinalSynthesisStep,
   shouldNudgeMidBudgetSynthesis,
 } from "./agent-runtime-synthesis-gating"
-import { getAiSdkGatewayProviderOptionsForTaskMode } from "./ai-sdk-gateway-provider-options"
 import {
   createAiSdkTavilyTools,
   getAiSdkTavilyToolCallMetadata,
@@ -53,7 +48,6 @@ export interface StartAgentRuntimeStreamParams {
   userTimeZone?: string
   messages: AgentInputMessage[]
   systemInstruction: string
-  taskMode: PromptTaskMode
   temperature?: number
   signal?: AbortSignal
   userId?: string
@@ -256,10 +250,6 @@ export async function* startAgentRuntimeStream(
     ...(params.temperature !== undefined
       ? { temperature: params.temperature }
       : {}),
-    providerOptions: getAiSdkGatewayProviderOptionsForTaskMode({
-      provider: resolvePromptProvider(params.model),
-      taskMode: params.taskMode,
-    }),
     experimental_telemetry: {
       isEnabled: true,
       recordInputs: featureFlags.telemetryRecordIo,
@@ -508,10 +498,6 @@ export async function* startAgentRuntimeStream(
         ...(params.temperature !== undefined
           ? { temperature: params.temperature }
           : {}),
-        providerOptions: getAiSdkGatewayProviderOptionsForTaskMode({
-          provider: resolvePromptProvider(params.model),
-          taskMode: params.taskMode,
-        }),
         tools,
         toolChoice: "none" as const,
         stopWhen: stepCountIs(1),
diff --git a/src/lib/server/llm/ai-sdk-gateway-provider-options.ts b/src/lib/server/llm/ai-sdk-gateway-provider-options.ts
deleted file mode 100644
index d4156b6..0000000
--- a/src/lib/server/llm/ai-sdk-gateway-provider-options.ts
+++ /dev/null
@@ -1,59 +0,0 @@
-import type {
-  PromptProvider,
-  PromptTaskMode,
-} from "@/lib/server/agent-prompt-steering"
-
-type GeminiThinkingLevel = "low" | "medium" | "high"
-
-function buildGeminiThinkingOptions(level: GeminiThinkingLevel) {
-  return {
-    google: {
-      thinkingConfig: {
-        thinkingLevel: level,
-        includeThoughts: true,
-      },
-    },
-  } as const
-}
-
-/**
- * Map a (provider, taskMode) pair to AI SDK provider options.
- *
- * Today only Gemini exposes an explicit reasoning knob through AI Gateway
- * (`thinkingConfig.thinkingLevel`); Kimi K2.6, MiMo V2.5 Pro, and Qwen 3.7
- * Max reason natively without a per-call provider option. Returning `{}` for
- * those providers is the correct no-op — once AI Gateway surfaces reasoning
- * options for them, just extend this map.
- */
-export function getAiSdkGatewayProviderOptionsForTaskMode(params: {
-  provider: PromptProvider
-  taskMode: PromptTaskMode
-}) {
-  if (params.provider !== "google") {
-    return {}
-  }
-
-  switch (params.taskMode) {
-    case "research":
-    case "high_stakes":
-    case "debugging":
-      return buildGeminiThinkingOptions("high")
-    case "coding":
-      return buildGeminiThinkingOptions("medium")
-    case "instruction_following":
-    case "closed_answer":
-      // Format-sensitive / one-answer tasks: minimal thinking, keep includeThoughts
-      // on so the trace stays observable for telemetry.
-      return buildGeminiThinkingOptions("low")
-    case "writing":
-    case "general":
-      return {}
-  }
-
-  // Exhaustiveness guard: if a new PromptTaskMode is added, this assignment
-  // becomes a type error so we don't silently fall through. At runtime, return
-  // the safe no-op so an unknown mode never blocks the request.
-  const _unhandledTaskMode: never = params.taskMode
-  void _unhandledTaskMode
-  return {}
-}
diff --git a/src/lib/shared/llm/models.ts b/src/lib/shared/llm/models.ts
index 08eecb6..90c166e 100644
--- a/src/lib/shared/llm/models.ts
+++ b/src/lib/shared/llm/models.ts
@@ -1,8 +1,6 @@
 export const AvailableModels = {
   ALIBABA_QWEN3_7_MAX: "alibaba/qwen3.7-max",
-  GOOGLE_GEMINI_3_1_PRO_PREVIEW: "google/gemini-3.1-pro-preview",
   MOONSHOTAI_KIMI_K2_6: "moonshotai/kimi-k2.6",
-  XIAOMI_MIMO_V2_5_PRO: "xiaomi/mimo-v2.5-pro",
 } as const
 
 export type ModelType = (typeof AvailableModels)[keyof typeof AvailableModels]
@@ -21,9 +19,7 @@ export interface ModelInfo {
 
 export const SUPPORTED_MODELS = [
   AvailableModels.ALIBABA_QWEN3_7_MAX,
-  AvailableModels.GOOGLE_GEMINI_3_1_PRO_PREVIEW,
   AvailableModels.MOONSHOTAI_KIMI_K2_6,
-  AvailableModels.XIAOMI_MIMO_V2_5_PRO,
 ] as const
 
 export const ALL_MODELS = [...SUPPORTED_MODELS] as const
@@ -31,7 +27,6 @@ export const ALL_MODELS = [...SUPPORTED_MODELS] as const
 export const MODEL_SELECTOR_MODELS = [
   AvailableModels.ALIBABA_QWEN3_7_MAX,
   AvailableModels.MOONSHOTAI_KIMI_K2_6,
-  AvailableModels.XIAOMI_MIMO_V2_5_PRO,
 ] as const
 
 const MODEL_SELECTOR_MODEL_SET: ReadonlySet<ModelType> = new Set(
@@ -66,16 +61,8 @@ export const ModelInfos: Record<ModelType, ModelInfo> = {
     id: AvailableModels.ALIBABA_QWEN3_7_MAX,
     name: "Qwen 3.7 Max",
   },
-  [AvailableModels.GOOGLE_GEMINI_3_1_PRO_PREVIEW]: {
-    id: AvailableModels.GOOGLE_GEMINI_3_1_PRO_PREVIEW,
-    name: "Gemini 3.1 Pro Preview",
-  },
   [AvailableModels.MOONSHOTAI_KIMI_K2_6]: {
     id: AvailableModels.MOONSHOTAI_KIMI_K2_6,
     name: "Kimi K2.6",
   },
-  [AvailableModels.XIAOMI_MIMO_V2_5_PRO]: {
-    id: AvailableModels.XIAOMI_MIMO_V2_5_PRO,
-    name: "MiMo V2.5 Pro",
-  },
 }
diff --git a/tests/agent-helper-behavior.test.mjs b/tests/agent-helper-behavior.test.mjs
index be0c69d..57e5b5d 100644
--- a/tests/agent-helper-behavior.test.mjs
+++ b/tests/agent-helper-behavior.test.mjs
@@ -173,7 +173,6 @@ test("agent helper validates total size, last-message role, and default model su
     availableModels: [
       { id: "alibaba/qwen3.7-max" },
       { id: "moonshotai/kimi-k2.6" },
-      { id: "xiaomi/mimo-v2.5-pro" },
     ],
     requestId: "request-default-mode-qwen",
   })
@@ -181,29 +180,26 @@ test("agent helper validates total size, last-message role, and default model su
   assert(!(defaultModeWithQwenResult instanceof Response))
   assert.equal(defaultModeWithQwenResult.selectedModel, "alibaba/qwen3.7-max")
 
-  const standaloneResearchModelResult = parseAgentStreamRequest({
+  const unavailableModelResult = parseAgentStreamRequest({
     body: {
-      model: "google/gemini-3.1-pro-preview",
+      model: "alibaba/qwen3.7-max",
       messages: [
         {
           role: "user",
-          content: "Use Gemini as a normal chat model.",
+          content: "Use a model the caller cannot access.",
         },
       ],
     },
-    availableModels: [
-      { id: "moonshotai/kimi-k2.6" },
-      { id: "google/gemini-3.1-pro-preview" },
-    ],
-    requestId: "request-standalone-research-model",
+    availableModels: [{ id: "moonshotai/kimi-k2.6" }],
+    requestId: "request-unavailable-model",
   })
 
-  assert(standaloneResearchModelResult instanceof Response)
-  assert.equal(standaloneResearchModelResult.status, 400)
-  assert.deepEqual(await standaloneResearchModelResult.json(), {
+  assert(unavailableModelResult instanceof Response)
+  assert.equal(unavailableModelResult.status, 400)
+  assert.deepEqual(await unavailableModelResult.json(), {
     error: "Unsupported model selected.",
     errorCode: "AGENT_UNSUPPORTED_MODEL",
-    requestId: "request-standalone-research-model",
+    requestId: "request-unavailable-model",
   })
 
   const unknownFieldResult = parseAgentStreamRequest({
diff --git a/tests/agent-prompt-steering-inference.test.mjs b/tests/agent-prompt-steering-inference.test.mjs
index 3c782c9..29f2af5 100644
--- a/tests/agent-prompt-steering-inference.test.mjs
+++ b/tests/agent-prompt-steering-inference.test.mjs
@@ -182,24 +182,12 @@ test("inferPromptTaskMode userExpertise=writing routes ambiguous prompts to writ
 })
 
 test("provider overlays are differentiated across providers", () => {
-  const google = createPromptSteeringBlocks({
-    provider: "google",
-    taskMode: "research",
-  })
-    .map((block) => block.body)
-    .join("\n\n")
   const moonshot = createPromptSteeringBlocks({
     provider: "moonshotai",
     taskMode: "research",
   })
     .map((block) => block.body)
     .join("\n\n")
-  const xiaomi = createPromptSteeringBlocks({
-    provider: "xiaomi",
-    taskMode: "research",
-  })
-    .map((block) => block.body)
-    .join("\n\n")
   const alibaba = createPromptSteeringBlocks({
     provider: "alibaba",
     taskMode: "research",
@@ -207,26 +195,25 @@ test("provider overlays are differentiated across providers", () => {
     .map((block) => block.body)
     .join("\n\n")
 
-  assert.match(google, /thinking budget/i)
+  assert.match(moonshot, /Use Kimi reasoning mode/i)
   assert.match(moonshot, /long context/i)
-  assert.match(xiaomi, /streaming latency/i)
-  assert.match(alibaba, /Qwen reasoning mode/i)
+  assert.match(alibaba, /Use Qwen reasoning mode/i)
   assert.notEqual(
-    google.split("Use Gemini")[1],
-    moonshot.split("Use Kimi")[1],
-    "Gemini and Kimi overlays should not be byte-identical."
+    alibaba,
+    moonshot,
+    "Qwen and Kimi overlays should not be byte-identical."
   )
 })
 
 test("debugging and writing overlays appear when their task modes are selected", () => {
   const debugging = createPromptSteeringBlocks({
-    provider: "google",
+    provider: "moonshotai",
     taskMode: "debugging",
   })
     .map((block) => block.body)
     .join("\n\n")
   const writing = createPromptSteeringBlocks({
-    provider: "google",
+    provider: "moonshotai",
     taskMode: "writing",
   })
     .map((block) => block.body)
diff --git a/tests/agent-prompt-steering.test.mjs b/tests/agent-prompt-steering.test.mjs
index 4921b68..fa2a564 100644
--- a/tests/agent-prompt-steering.test.mjs
+++ b/tests/agent-prompt-steering.test.mjs
@@ -25,31 +25,23 @@ test("prompt steering resolves supported model providers", () => {
     resolvePromptProvider(AvailableModels.ALIBABA_QWEN3_7_MAX),
     "alibaba"
   )
-  assert.equal(
-    resolvePromptProvider(AvailableModels.GOOGLE_GEMINI_3_1_PRO_PREVIEW),
-    "google"
-  )
   assert.equal(
     resolvePromptProvider(AvailableModels.MOONSHOTAI_KIMI_K2_6),
     "moonshotai"
   )
-  assert.equal(
-    resolvePromptProvider(AvailableModels.XIAOMI_MIMO_V2_5_PRO),
-    "xiaomi"
-  )
 })
 
 test("prompt steering includes provider overlays for supported models", () => {
   const blocks = createPromptSteeringBlocks({
-    provider: "google",
+    provider: "moonshotai",
     taskMode: "research",
   })
   const overlayText = blocks.map((block) => block.body).join("\n\n")
 
   assert.match(
     overlayText,
-    /Use Gemini reasoning mode efficiently/,
-    "Expected Google prompts to receive the Gemini provider overlay."
+    /Use Kimi reasoning mode efficiently/,
+    "Expected Moonshot AI prompts to receive the Kimi provider overlay."
   )
   assert.match(
     overlayText,
diff --git a/tests/agent-runtime-task-mode-routing.test.mjs b/tests/agent-runtime-task-mode-routing.test.mjs
deleted file mode 100644
index de7c3ec..0000000
--- a/tests/agent-runtime-task-mode-routing.test.mjs
+++ /dev/null
@@ -1,91 +0,0 @@
-import assert from "node:assert/strict"
-import path from "node:path"
-import test from "node:test"
-import { fileURLToPath, pathToFileURL } from "node:url"
-
-import "./register-ts-path-hooks.mjs"
-
-const cwd = fileURLToPath(new URL("..", import.meta.url))
-const providerOptionsUrl = pathToFileURL(
-  path.join(cwd, "src/lib/server/llm/ai-sdk-gateway-provider-options.ts")
-).href
-
-const { getAiSdkGatewayProviderOptionsForTaskMode } = await import(
-  providerOptionsUrl
-)
-
-test("Gemini gets high thinking for research, high_stakes, debugging", () => {
-  for (const taskMode of ["research", "high_stakes", "debugging"]) {
-    assert.deepEqual(
-      getAiSdkGatewayProviderOptionsForTaskMode({
-        provider: "google",
-        taskMode,
-      }),
-      {
-        google: {
-          thinkingConfig: { thinkingLevel: "high", includeThoughts: true },
-        },
-      },
-      `expected high thinking for ${taskMode}`
-    )
-  }
-})
-
-test("Gemini gets medium thinking for coding", () => {
-  assert.deepEqual(
-    getAiSdkGatewayProviderOptionsForTaskMode({
-      provider: "google",
-      taskMode: "coding",
-    }),
-    {
-      google: {
-        thinkingConfig: { thinkingLevel: "medium", includeThoughts: true },
-      },
-    }
-  )
-})
-
-test("Gemini gets low thinking for format-sensitive modes", () => {
-  for (const taskMode of ["instruction_following", "closed_answer"]) {
-    assert.deepEqual(
-      getAiSdkGatewayProviderOptionsForTaskMode({
-        provider: "google",
-        taskMode,
-      }),
-      {
-        google: {
-          thinkingConfig: { thinkingLevel: "low", includeThoughts: true },
-        },
-      },
-      `expected low thinking for ${taskMode}`
-    )
-  }
-})
-
-test("Gemini falls back to default thinking for general/writing", () => {
-  for (const taskMode of ["general", "writing"]) {
-    assert.deepEqual(
-      getAiSdkGatewayProviderOptionsForTaskMode({
-        provider: "google",
-        taskMode,
-      }),
-      {},
-      `expected empty provider options for ${taskMode}`
-    )
-  }
-})
-
-test("non-Gemini reasoning models receive no explicit provider options", () => {
-  for (const taskMode of ["research", "coding", "debugging", "general"]) {
-    for (const provider of ["alibaba", "moonshotai", "xiaomi"]) {
-      assert.deepEqual(
-        getAiSdkGatewayProviderOptionsForTaskMode({
-          provider,
-          taskMode,
-        }),
-        {},
-        `expected empty provider options for ${provider}+${taskMode}`
-      )
-    }
-  }
-})
diff --git a/tests/agent-system-prompt.test.mjs b/tests/agent-system-prompt.test.mjs
index 09d8175..2f85af3 100644
--- a/tests/agent-system-prompt.test.mjs
+++ b/tests/agent-system-prompt.test.mjs
@@ -112,7 +112,7 @@ test("agent system prompt places the identity block after task steering", () =>
     },
     {
       now: new Date("2026-05-03T12:34:56.000Z"),
-      provider: "google",
+      provider: "moonshotai",
       taskMode: "coding",
     }
   )
diff --git a/tests/gateway-search-tools.test.mjs b/tests/gateway-search-tools.test.mjs
index 3c94a3c..b9fa572 100644
--- a/tests/gateway-search-tools.test.mjs
+++ b/tests/gateway-search-tools.test.mjs
@@ -70,10 +70,6 @@ test("stale and fallback-only model ids fall back to Qwen 3.7 Max", () => {
           id: "alibaba/qwen3.7-max",
           name: "Qwen 3.7 Max",
         },
-        {
-          id: "google/gemini-3.1-pro-preview",
-          name: "Gemini 3.1 Pro Preview",
-        },
         {
           id: "moonshotai/kimi-k2.6",
           name: "Kimi K2.6",
diff --git a/tests/model-registry.test.mjs b/tests/model-registry.test.mjs
index c0309d2..75ed0a2 100644
--- a/tests/model-registry.test.mjs
+++ b/tests/model-registry.test.mjs
@@ -16,16 +16,16 @@ test("shared model registry includes the curated gateway models", async () => {
     "Expected legacy model ids to be fully removed from the shared model registry."
   )
 
-  assert.match(
+  assert.doesNotMatch(
     source,
-    /ALIBABA_QWEN3_7_MAX:\s*"alibaba\/qwen3\.7-max"/,
-    "Expected AvailableModels to include ALIBABA_QWEN3_7_MAX."
+    /google\/gemini-3\.1-pro-preview|xiaomi\/mimo-v2\.5-pro|GOOGLE_GEMINI_3_1_PRO_PREVIEW|XIAOMI_MIMO_V2_5_PRO|Gemini 3\.1 Pro Preview|MiMo V2\.5 Pro/,
+    "Expected Gemini 3.1 Pro Preview and MiMo V2.5 Pro to be fully removed from the shared model registry."
   )
 
   assert.match(
     source,
-    /GOOGLE_GEMINI_3_1_PRO_PREVIEW:\s*"google\/gemini-3\.1-pro-preview"/,
-    "Expected AvailableModels to include GOOGLE_GEMINI_3_1_PRO_PREVIEW."
+    /ALIBABA_QWEN3_7_MAX:\s*"alibaba\/qwen3\.7-max"/,
+    "Expected AvailableModels to include ALIBABA_QWEN3_7_MAX."
   )
 
   assert.match(
@@ -34,22 +34,16 @@ test("shared model registry includes the curated gateway models", async () => {
     "Expected AvailableModels to include MOONSHOTAI_KIMI_K2_6."
   )
 
-  assert.match(
-    source,
-    /XIAOMI_MIMO_V2_5_PRO:\s*"xiaomi\/mimo-v2\.5-pro"/,
-    "Expected AvailableModels to include XIAOMI_MIMO_V2_5_PRO."
-  )
-
   assert.match(
     source.replace(/\s+/g, " "),
-    /SUPPORTED_MODELS = \[ AvailableModels\.ALIBABA_QWEN3_7_MAX, AvailableModels\.GOOGLE_GEMINI_3_1_PRO_PREVIEW, AvailableModels\.MOONSHOTAI_KIMI_K2_6, AvailableModels\.XIAOMI_MIMO_V2_5_PRO, \] as const/,
-    "Expected SUPPORTED_MODELS to list Qwen 3.7 Max, Gemini 3.1 Pro Preview, Kimi K2.6, and MiMo V2.5 Pro."
+    /SUPPORTED_MODELS = \[ AvailableModels\.ALIBABA_QWEN3_7_MAX, AvailableModels\.MOONSHOTAI_KIMI_K2_6, \] as const/,
+    "Expected SUPPORTED_MODELS to list Qwen 3.7 Max and Kimi K2.6."
   )
 
   assert.match(
     source.replace(/\s+/g, " "),
-    /MODEL_SELECTOR_MODELS = \[ AvailableModels\.ALIBABA_QWEN3_7_MAX, AvailableModels\.MOONSHOTAI_KIMI_K2_6, AvailableModels\.XIAOMI_MIMO_V2_5_PRO, \] as const/,
-    "Expected the chat model selector to default to Qwen 3.7 Max before Kimi K2.6 and MiMo V2.5 Pro."
+    /MODEL_SELECTOR_MODELS = \[ AvailableModels\.ALIBABA_QWEN3_7_MAX, AvailableModels\.MOONSHOTAI_KIMI_K2_6, \] as const/,
+    "Expected the chat model selector to default to Qwen 3.7 Max before Kimi K2.6."
   )
 
   assert.match(
@@ -58,21 +52,9 @@ test("shared model registry includes the curated gateway models", async () => {
     "Expected ModelInfos to define display metadata for ALIBABA_QWEN3_7_MAX."
   )
 
-  assert.match(
-    source,
-    /\[AvailableModels\.GOOGLE_GEMINI_3_1_PRO_PREVIEW\]:\s*\{[\s\S]*name:\s*"Gemini 3\.1 Pro Preview"/,
-    "Expected ModelInfos to define display metadata for GOOGLE_GEMINI_3_1_PRO_PREVIEW."
-  )
-
   assert.match(
     source,
     /\[AvailableModels\.MOONSHOTAI_KIMI_K2_6\]:\s*\{[\s\S]*name:\s*"Kimi K2\.6"/,
     "Expected ModelInfos to define display metadata for MOONSHOTAI_KIMI_K2_6."
   )
-
-  assert.match(
-    source,
-    /\[AvailableModels\.XIAOMI_MIMO_V2_5_PRO\]:\s*\{[\s\S]*name:\s*"MiMo V2\.5 Pro"/,
-    "Expected ModelInfos to define display metadata for XIAOMI_MIMO_V2_5_PRO."
-  )
 })
diff --git a/tests/stubs/actions-api-keys.mjs b/tests/stubs/actions-api-keys.mjs
index a46ef84..c75c178 100644
--- a/tests/stubs/actions-api-keys.mjs
+++ b/tests/stubs/actions-api-keys.mjs
@@ -5,7 +5,6 @@ export function getModels() {
     getTestMocks().apiKeys?.getModels?.() ?? [
       { id: "alibaba/qwen3.7-max" },
       { id: "moonshotai/kimi-k2.6" },
-      { id: "xiaomi/mimo-v2.5-pro" },
     ]
   )
 }
diff --git a/tests/thread-payload-contract.test.mjs b/tests/thread-payload-contract.test.mjs
index 8a8770d..2b7a9a6 100644
--- a/tests/thread-payload-contract.test.mjs
+++ b/tests/thread-payload-contract.test.mjs
@@ -126,7 +126,7 @@ test("thread store delegates parsing and persistence shaping to the payload help
 test("thread payload drops legacy run-mode metadata from stored threads", () => {
   const parsed = parseThreadPayload({
     id: "thread-1",
-    model: "google/gemini-3.1-pro-preview",
+    model: "alibaba/qwen3.7-max",
     messages: [
       {
         id: "message-1",