chloeilabs · chloeilabs · Jun 15, 2026 · Jun 15, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -78,15 +78,15 @@ This boundary is **enforced by Next.js bundling at build time** (importing `pg`/
 
 ### Task Modes
 
-**Task mode** (`inferPromptTaskMode` in `agent-prompt-steering.ts`) is inferred from message content and drives only the **prompt overlay text** (`TASK MODE OVERLAY: <MODE>`) and the Gemini thinking level. Modes: `general`, `coding`, `debugging`, `writing`, `research`, `high_stakes`, `closed_answer`, `instruction_following`. The tool-step budget is a single fixed constant (`AGENT_TOOL_MAX_STEPS`) — there is no per-request runtime-profile selection. (The `research` task mode is an automatic content-based overlay, not a user-facing toggle.)
+**Task mode** (`inferPromptTaskMode` in `agent-prompt-steering.ts`) is inferred from message content and drives only the **prompt overlay text** (`TASK MODE OVERLAY: <MODE>`). Modes: `general`, `coding`, `debugging`, `writing`, `research`, `high_stakes`, `closed_answer`, `instruction_following`. The tool-step budget is a single fixed constant (`AGENT_TOOL_MAX_STEPS`) — there is no per-request runtime-profile selection. (The `research` task mode is an automatic content-based overlay, not a user-facing toggle.)
 
 ### System Prompt Composition
 
 `buildAgentSystemInstruction` (`src/lib/server/agent-context.ts`) assembles the prompt per-request from labeled blocks delimited by `--- BEGIN <LABEL> ---` / `--- END <LABEL> ---`, in this order:
 
 1. `OPERATING INSTRUCTIONS` — `DEFAULT_OPERATING_INSTRUCTION` (`src/lib/shared/llm/system-instructions.ts`)
 2. `RUNTIME DATE CONTEXT` — current UTC timestamp + user timezone (from `X-User-Timezone` header)
-3. **Provider overlay** (`PROVIDER OVERLAY: ALIBABA|GOOGLE|MOONSHOTAI|XIAOMI`) — keyed by the model's **provider org**, not its nickname (alibaba=Qwen, google=Gemini, moonshotai=Kimi, xiaomi=MiMo). Always applied for a supported model.
+3. **Provider overlay** (`PROVIDER OVERLAY: ALIBABA|MOONSHOTAI`) — keyed by the model's **provider org**, not its nickname (alibaba=Qwen, moonshotai=Kimi). Always applied for a supported model.
 4. **Task mode overlay** (`TASK MODE OVERLAY: <MODE>`)
 5. `IDENTITY AND TONE CONTEXT` — `DEFAULT_SOUL_FALLBACK_INSTRUCTION` (`src/lib/shared/llm/system-instructions.ts`)
 6. `AUTH USER CONTEXT` — authenticated user id, name, email
@@ -154,15 +154,12 @@ Each tool is only registered when its requirements are met.
 
 All models are defined in `src/lib/shared/llm/models.ts`:
 
-| Key                             | Model ID                        | Display Name           |
-| ------------------------------- | ------------------------------- | ---------------------- |
-| `ALIBABA_QWEN3_7_MAX`           | `alibaba/qwen3.7-max`           | Qwen 3.7 Max           |
-| `GOOGLE_GEMINI_3_1_PRO_PREVIEW` | `google/gemini-3.1-pro-preview` | Gemini 3.1 Pro Preview |
-| `MOONSHOTAI_KIMI_K2_6`          | `moonshotai/kimi-k2.6`          | Kimi K2.6              |
-| `XIAOMI_MIMO_V2_5_PRO`          | `xiaomi/mimo-v2.5-pro`          | MiMo V2.5 Pro          |
+| Key                    | Model ID               | Display Name |
+| ---------------------- | ---------------------- | ------------ |
+| `ALIBABA_QWEN3_7_MAX`  | `alibaba/qwen3.7-max`  | Qwen 3.7 Max |
+| `MOONSHOTAI_KIMI_K2_6` | `moonshotai/kimi-k2.6` | Kimi K2.6    |
 
-- `MODEL_SELECTOR_MODELS` — the chat selector subset: Qwen 3.7 Max, Kimi K2.6, MiMo V2.5 Pro.
-- Gemini stays in `SUPPORTED_MODELS` for Gateway availability but is not a standalone chat selector option.
+- `MODEL_SELECTOR_MODELS` — the chat selector subset: Qwen 3.7 Max and Kimi K2.6.
 - The agent is text-only: all chat input is plain text (no image, file, or PDF input).
 - Adding a model means updating `AvailableModels`, `ModelInfos`, `SUPPORTED_MODELS`, and optionally `MODEL_SELECTOR_MODELS`. `/api/models` filters this registry by configured keys (`getModels()` in `src/lib/actions/api-keys.ts`).
 
@@ -298,7 +295,6 @@ src/
         agent-runtime-synthesis-gating.ts # Final-step + mid-budget synthesis predicates
         gateway-responses.ts              # startGatewayResponseStream
         gateway-client.ts                 # undici dispatcher for AI Gateway
-        ai-sdk-gateway-provider-options.ts # Per-model provider options (Gemini thinking)
         ai-sdk-tavily-tools.ts            # tavily_search / tavily_extract
         code-execution-tools.ts           # Sandboxed JS/Python execution
         initial-reasoning-chunk-sanitizer.ts # Redacted-reasoning filtering

diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # Chloei
 
-Chloei is a Next.js 16 chat app backed by Vercel AI Gateway. It currently exposes a curated model selector that defaults to Qwen 3.7 Max and also includes Kimi K2.6 and MiMo V2.5 Pro, and offers local code execution, optional Tavily retrieval, and Better Auth email/password authentication with PostgreSQL-backed users and sessions.
+Chloei is a Next.js 16 chat app backed by Vercel AI Gateway. It currently exposes a curated model selector that defaults to Qwen 3.7 Max and also includes Kimi K2.6, and offers local code execution, optional Tavily retrieval, and Better Auth email/password authentication with PostgreSQL-backed users and sessions.
 
 ## Documentation
 

diff --git a/src/app/api/agent/route.ts b/src/app/api/agent/route.ts
@@ -240,7 +240,6 @@ export async function POST(request: NextRequest) {
         aiGatewayApiKey,
         tavilyApiKey,
         userTimeZone,
-        taskMode: promptTaskMode,
         userId: session.user.id,
         featureFlags,
         messages: parsedRequest.messages,

diff --git a/src/lib/server/agent-prompt-steering.ts b/src/lib/server/agent-prompt-steering.ts
@@ -7,7 +7,7 @@ import {
   type PromptTextMessage,
 } from "./prompt-message-utils"
 
-export type PromptProvider = "alibaba" | "google" | "moonshotai" | "xiaomi"
+export type PromptProvider = "alibaba" | "moonshotai"
 
 export type PromptTaskMode =
   | "general"
@@ -64,14 +64,6 @@ Use Qwen reasoning mode efficiently.
 - On format-sensitive tasks, do a literal final-format check before finishing.
 - Treat hard word, line, and sentence caps as hard caps. Count the final output when close to the limit.
 - After tool use, synthesize the result and stop. Do not replay raw tool traces.
-`.trim(),
-  google: `
-Use Gemini reasoning mode efficiently.
-- Spend the thinking budget on the parts of the task that are actually uncertain; do not narrate planning that adds no information.
-- Prefer direct execution and verification over speculative narration.
-- On format-sensitive tasks, do a literal final-format check before finishing.
-- Treat hard word, line, and sentence caps as hard caps. Count the final output when close to the limit.
-- After tool use, synthesize the result and stop. Do not replay raw tool traces.
 `.trim(),
   moonshotai: `
 Use Kimi reasoning mode efficiently.
@@ -80,14 +72,6 @@ Use Kimi reasoning mode efficiently.
 - On format-sensitive tasks, do a literal final-format check before finishing.
 - Treat hard word, line, and sentence caps as hard caps. Count the final output when close to the limit.
 - After tool use, synthesize the result and stop. Do not replay raw tool traces.
-`.trim(),
-  xiaomi: `
-Use MiMo reasoning mode efficiently.
-- Optimize for streaming latency: start producing the user-facing answer as soon as you have a defensible thread; refine in-line.
-- Prefer direct execution and verification over speculative narration.
-- On format-sensitive tasks, do a literal final-format check before finishing.
-- Treat hard word, line, and sentence caps as hard caps. Count the final output when close to the limit.
-- After tool use, synthesize the result and stop. Do not replay raw tool traces.
 `.trim(),
 }
 
@@ -157,18 +141,10 @@ export function resolvePromptProvider(model: ModelType): PromptProvider {
     return "alibaba"
   }
 
-  if (model.startsWith("google/")) {
-    return "google"
-  }
-
   if (model.startsWith("moonshotai/")) {
     return "moonshotai"
   }
 
-  if (model.startsWith("xiaomi/")) {
-    return "xiaomi"
-  }
-
   throw new Error(`Unsupported model provider for model: ${model}`)
 }
 

diff --git a/src/lib/server/agent-route.ts b/src/lib/server/agent-route.ts
@@ -15,7 +15,6 @@ import {
   resolveDefaultModelSelectorModel,
 } from "@/lib/shared"
 
-import type { PromptTaskMode } from "./agent-prompt-steering"
 import {
   AGENT_MAX_MESSAGE_CHARS,
   AGENT_MAX_MESSAGES,
@@ -102,7 +101,6 @@ interface CreateAgentStreamResponseParams {
   aiGatewayApiKey: string
   tavilyApiKey?: string
   userTimeZone?: string
-  taskMode: PromptTaskMode
   userId?: string
   featureFlags?: AgentFeatureFlags
   messages: AgentStreamRequest["messages"]
@@ -517,7 +515,6 @@ export function createAgentStreamResponse(
           aiGatewayApiKey: params.aiGatewayApiKey,
           tavilyApiKey: params.tavilyApiKey,
           userTimeZone: params.userTimeZone,
-          taskMode: params.taskMode,
           userId: params.userId,
           featureFlags: params.featureFlags,
           messages: params.messages,

diff --git a/src/lib/server/llm/agent-runtime-synthesis-gating.ts b/src/lib/server/llm/agent-runtime-synthesis-gating.ts
@@ -16,9 +16,9 @@ export function shouldNudgeMidBudgetSynthesis(
   if (toolMaxSteps <= 3) {
     return false
   }
-  // Kick in at one-third of budget. The failing 10-K tasks (Kimi K2.6 and
-  // Gemini 3.1 Pro) tend to stop naturally with empty text after only 5-8
-  // tool calls (~steps 4-7 of 20); half-budget fires too late to reach them.
+  // Kick in at one-third of budget. The failing 10-K tasks (e.g. Kimi K2.6)
+  // tend to stop naturally with empty text after only 5-8 tool calls
+  // (~steps 4-7 of 20); half-budget fires too late to reach them.
   const threshold = Math.max(2, Math.floor(toolMaxSteps / 3))
   return stepNumber >= threshold && stepNumber < toolMaxSteps - 1
 }
diff --git a/src/lib/server/llm/agent-runtime.ts b/src/lib/server/llm/agent-runtime.ts
@@ -7,10 +7,6 @@ import {
 } from "ai"
 
 import { createLogger } from "@/lib/logger"
-import {
-  type PromptTaskMode,
-  resolvePromptProvider,
-} from "@/lib/server/agent-prompt-steering"
 import { AGENT_TOOL_MAX_STEPS } from "@/lib/server/agent-runtime-config"
 import {
   type AgentFeatureFlags,
@@ -27,7 +23,6 @@ import {
   shouldForceFinalSynthesisStep,
   shouldNudgeMidBudgetSynthesis,
 } from "./agent-runtime-synthesis-gating"
-import { getAiSdkGatewayProviderOptionsForTaskMode } from "./ai-sdk-gateway-provider-options"
 import {
   createAiSdkTavilyTools,
   getAiSdkTavilyToolCallMetadata,
@@ -53,7 +48,6 @@ export interface StartAgentRuntimeStreamParams {
   userTimeZone?: string
   messages: AgentInputMessage[]
   systemInstruction: string
-  taskMode: PromptTaskMode
   temperature?: number
   signal?: AbortSignal
   userId?: string
@@ -256,10 +250,6 @@ export async function* startAgentRuntimeStream(
     ...(params.temperature !== undefined
       ? { temperature: params.temperature }
       : {}),
-    providerOptions: getAiSdkGatewayProviderOptionsForTaskMode({
-      provider: resolvePromptProvider(params.model),
-      taskMode: params.taskMode,
-    }),
     experimental_telemetry: {
       isEnabled: true,
       recordInputs: featureFlags.telemetryRecordIo,
@@ -508,10 +498,6 @@ export async function* startAgentRuntimeStream(
         ...(params.temperature !== undefined
           ? { temperature: params.temperature }
           : {}),
-        providerOptions: getAiSdkGatewayProviderOptionsForTaskMode({
-          provider: resolvePromptProvider(params.model),
-          taskMode: params.taskMode,
-        }),
         tools,
         toolChoice: "none" as const,
         stopWhen: stepCountIs(1),

diff --git a/src/lib/server/llm/ai-sdk-gateway-provider-options.ts b/src/lib/server/llm/ai-sdk-gateway-provider-options.ts
diff --git a/src/lib/shared/llm/models.ts b/src/lib/shared/llm/models.ts
@@ -1,8 +1,6 @@
 export const AvailableModels = {
   ALIBABA_QWEN3_7_MAX: "alibaba/qwen3.7-max",
-  GOOGLE_GEMINI_3_1_PRO_PREVIEW: "google/gemini-3.1-pro-preview",
   MOONSHOTAI_KIMI_K2_6: "moonshotai/kimi-k2.6",
-  XIAOMI_MIMO_V2_5_PRO: "xiaomi/mimo-v2.5-pro",
 } as const
 
 export type ModelType = (typeof AvailableModels)[keyof typeof AvailableModels]
@@ -21,17 +19,14 @@ export interface ModelInfo {
 
 export const SUPPORTED_MODELS = [
   AvailableModels.ALIBABA_QWEN3_7_MAX,
-  AvailableModels.GOOGLE_GEMINI_3_1_PRO_PREVIEW,
   AvailableModels.MOONSHOTAI_KIMI_K2_6,
-  AvailableModels.XIAOMI_MIMO_V2_5_PRO,
 ] as const
 
 export const ALL_MODELS = [...SUPPORTED_MODELS] as const
 
 export const MODEL_SELECTOR_MODELS = [
   AvailableModels.ALIBABA_QWEN3_7_MAX,
   AvailableModels.MOONSHOTAI_KIMI_K2_6,
-  AvailableModels.XIAOMI_MIMO_V2_5_PRO,
 ] as const
 
 const MODEL_SELECTOR_MODEL_SET: ReadonlySet<ModelType> = new Set(
@@ -66,16 +61,8 @@ export const ModelInfos: Record<ModelType, ModelInfo> = {
     id: AvailableModels.ALIBABA_QWEN3_7_MAX,
     name: "Qwen 3.7 Max",
   },
-  [AvailableModels.GOOGLE_GEMINI_3_1_PRO_PREVIEW]: {
-    id: AvailableModels.GOOGLE_GEMINI_3_1_PRO_PREVIEW,
-    name: "Gemini 3.1 Pro Preview",
-  },
   [AvailableModels.MOONSHOTAI_KIMI_K2_6]: {
     id: AvailableModels.MOONSHOTAI_KIMI_K2_6,
     name: "Kimi K2.6",
   },
-  [AvailableModels.XIAOMI_MIMO_V2_5_PRO]: {
-    id: AvailableModels.XIAOMI_MIMO_V2_5_PRO,
-    name: "MiMo V2.5 Pro",
-  },
 }
diff --git a/tests/agent-helper-behavior.test.mjs b/tests/agent-helper-behavior.test.mjs
@@ -173,37 +173,33 @@ test("agent helper validates total size, last-message role, and default model su
     availableModels: [
       { id: "alibaba/qwen3.7-max" },
       { id: "moonshotai/kimi-k2.6" },
-      { id: "xiaomi/mimo-v2.5-pro" },
     ],
     requestId: "request-default-mode-qwen",
   })
 
   assert(!(defaultModeWithQwenResult instanceof Response))
   assert.equal(defaultModeWithQwenResult.selectedModel, "alibaba/qwen3.7-max")
 
-  const standaloneResearchModelResult = parseAgentStreamRequest({
+  const unavailableModelResult = parseAgentStreamRequest({
     body: {
-      model: "google/gemini-3.1-pro-preview",
+      model: "alibaba/qwen3.7-max",
       messages: [
         {
           role: "user",
-          content: "Use Gemini as a normal chat model.",
+          content: "Use a model the caller cannot access.",
         },
       ],
     },
-    availableModels: [
-      { id: "moonshotai/kimi-k2.6" },
-      { id: "google/gemini-3.1-pro-preview" },
-    ],
-    requestId: "request-standalone-research-model",
+    availableModels: [{ id: "moonshotai/kimi-k2.6" }],
+    requestId: "request-unavailable-model",
   })
 
-  assert(standaloneResearchModelResult instanceof Response)
-  assert.equal(standaloneResearchModelResult.status, 400)
-  assert.deepEqual(await standaloneResearchModelResult.json(), {
+  assert(unavailableModelResult instanceof Response)
+  assert.equal(unavailableModelResult.status, 400)
+  assert.deepEqual(await unavailableModelResult.json(), {
     error: "Unsupported model selected.",
     errorCode: "AGENT_UNSUPPORTED_MODEL",
-    requestId: "request-standalone-research-model",
+    requestId: "request-unavailable-model",
   })
 
   const unknownFieldResult = parseAgentStreamRequest({