chloeilabs · chloeilabs · Jun 15, 2026 · Jun 15, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -53,10 +53,9 @@ Client (useAgentSession)
   → POST /api/agent (app/api/agent/route.ts)
     → Auth check (isAuthConfigured → getRequestSession)   [routes self-guard; see Middleware]
     → Sliding-window rate limit (rate-limit.ts, key user:<userId>)
-    → Zod validation (parseAgentStreamRequest) — incl. runMode (chat|research) and model
+    → Zod validation (parseAgentStreamRequest) — model + messages
     → Concurrency slot acquire (max 4 in-flight per user)
     → System prompt assembly (buildAgentSystemInstruction)
-    → Runtime profile resolution (chat_default | deep_research)
     → AI Gateway stream via Vercel AI SDK (startGatewayResponseStream → runAgentStream)
     → NDJSON chunks (application/x-ndjson) → client
       → readResponseStreamLines / parseStreamEventLine
@@ -77,14 +76,9 @@ Before the model sees them, messages pass through `toModelMessages` (`agent-runt
 
 This boundary is **enforced by Next.js bundling at build time** (importing `pg`/`better-auth`/server modules into a client bundle is a build error), **not** by an ESLint rule. Keep it in mind when adding imports.
 
-### Runtime Profiles vs. Task Modes (two independent axes)
+### Task Modes
 
-These are easy to confuse. They are orthogonal:
-
-- **Runtime profile** (`resolveRuntimeProfile` in `app/api/agent/route.ts`, `AGENT_RUNTIME_PROFILES` in `agent-runtime.ts`) — one of `chat_default`, `deep_research`. The profile drives the **tool-step budget** (the tool set itself is the same for both).
-- **Task mode** (`inferPromptTaskMode` in `agent-prompt-steering.ts`) — inferred from message content; drives only the **prompt overlay text** and the Gemini thinking level. Modes: `general`, `coding`, `debugging`, `writing`, `research`, `high_stakes`, `closed_answer`, `instruction_following`.
-
-Research mode is a **request flag** (`runMode: "research"`), not an inference. It selects the `deep_research` profile and `RESEARCH_MODEL` (Qwen 3.7 Max); if that model is unavailable the route returns 400 `AGENT_RESEARCH_MODEL_UNAVAILABLE`.
+**Task mode** (`inferPromptTaskMode` in `agent-prompt-steering.ts`) is inferred from message content and drives only the **prompt overlay text** (`TASK MODE OVERLAY: <MODE>`) and the Gemini thinking level. Modes: `general`, `coding`, `debugging`, `writing`, `research`, `high_stakes`, `closed_answer`, `instruction_following`. The tool-step budget is a single fixed constant (`AGENT_TOOL_MAX_STEPS`) — there is no per-request runtime-profile selection. (The `research` task mode is an automatic content-based overlay, not a user-facing toggle.)
 
 ### System Prompt Composition
 
@@ -94,9 +88,8 @@ Research mode is a **request flag** (`runMode: "research"`), not an inference. I
 2. `RUNTIME DATE CONTEXT` — current UTC timestamp + user timezone (from `X-User-Timezone` header)
 3. **Provider overlay** (`PROVIDER OVERLAY: ALIBABA|GOOGLE|MOONSHOTAI|XIAOMI`) — keyed by the model's **provider org**, not its nickname (alibaba=Qwen, google=Gemini, moonshotai=Kimi, xiaomi=MiMo). Always applied for a supported model.
 4. **Task mode overlay** (`TASK MODE OVERLAY: <MODE>`)
-5. `DEEP RESEARCH MODE` — only for `runMode: "research"`
-6. `IDENTITY AND TONE CONTEXT` — `DEFAULT_SOUL_FALLBACK_INSTRUCTION` (`src/lib/shared/llm/system-instructions.ts`)
-7. `AUTH USER CONTEXT` — authenticated user id, name, email
+5. `IDENTITY AND TONE CONTEXT` — `DEFAULT_SOUL_FALLBACK_INSTRUCTION` (`src/lib/shared/llm/system-instructions.ts`)
+6. `AUTH USER CONTEXT` — authenticated user id, name, email
 
 Inline-citation rules are appended **later**, by `withAiSdkInlineCitationInstruction` (`system-instruction-augmentations.ts`), inside `createAgentStreamResponse` — not by `buildAgentSystemInstruction`.
 
@@ -155,7 +148,7 @@ Each tool is only registered when its requirements are met.
 - **Restricted backend** (the only backend): computation-only Python imports (`math`, `collections`, `itertools`, …).
 - **Limits**: timeout default **10 s**, max **60 s**; code input and output each capped at **12,000 chars**.
 
-**Max tool steps** (constants in `agent-runtime-config.ts`): 12 default; 20 for research runs.
+**Max tool steps** (`AGENT_TOOL_MAX_STEPS` in `agent-runtime-config.ts`): 12.
 
 ### Model Registry
 
@@ -169,7 +162,6 @@ All models are defined in `src/lib/shared/llm/models.ts`:
 | `XIAOMI_MIMO_V2_5_PRO`          | `xiaomi/mimo-v2.5-pro`          | MiMo V2.5 Pro          |
 
 - `MODEL_SELECTOR_MODELS` — the chat selector subset: Qwen 3.7 Max, Kimi K2.6, MiMo V2.5 Pro.
-- `RESEARCH_MODEL` — Qwen 3.7 Max (Research mode also injects the Deep Research prompt block).
 - Gemini stays in `SUPPORTED_MODELS` for Gateway availability but is not a standalone chat selector option.
 - The agent is text-only: all chat input is plain text (no image, file, or PDF input).
 - Adding a model means updating `AvailableModels`, `ModelInfos`, `SUPPORTED_MODELS`, and optionally `MODEL_SELECTOR_MODELS`. `/api/models` filters this registry by configured keys (`getModels()` in `src/lib/actions/api-keys.ts`).
@@ -272,7 +264,7 @@ src/
                         #   follow-up-questions
     agent/messages/     # Message rendering (user, assistant, queued, activity timeline)
     agent/markdown/     # Memoized markdown renderer
-    agent/prompt-form/  # PromptForm (inline Research + Tools popover), ModelSelector
+    agent/prompt-form/  # PromptForm, ModelSelector
     app-sidebar.tsx     # Sidebar shell (lazy-loads SearchChats + NavThreads)
     nav-threads.tsx     # Thread list + client-side pinning (localStorage)
     nav-user.tsx        # Account menu + sign-out
@@ -282,8 +274,8 @@ src/
     layout/             # route group layout
     ui/                 # shadcn/ui primitives (base-lyra/stone) + ShikiCode
   hooks/
-    agent/              # use-models (server-seeded models context), use-persistent-selected-model,
-                        #   use-persistent-run-mode (both localStorage-backed)
+    agent/              # use-models (server-seeded models context),
+                        #   use-persistent-selected-model (localStorage-backed)
   lib/
     actions/api-keys.ts # getModels() server action
     brand/colors.ts     # App brand colors (used by layout/manifest)
@@ -312,7 +304,7 @@ src/
         initial-reasoning-chunk-sanitizer.ts # Redacted-reasoning filtering
         system-instruction-augmentations.ts  # Citation rules appended to prompt
     shared/
-      agent/messages.ts          # AgentStreamEvent, Message, ToolInvocation, run modes/statuses
+      agent/messages.ts          # AgentStreamEvent, Message, ToolInvocation, run statuses
       agent/reasoning-privacy.ts # sanitizeReasoningForDisplay
       agent-request-limits.ts    # Message/char limit defaults
       llm/models.ts              # AvailableModels, ModelInfos, SUPPORTED/SELECTOR/RESEARCH
@@ -394,5 +386,5 @@ Request size limits, stream/gateway timeouts, tool-step budgets, the rate-limit
 - The **mock smoke test uses the production server** (`next start`), so build first.
 - `pnpm.onlyBuiltDependencies` already approves the `sharp` build script — do not run `pnpm approve-builds`.
 - Don't reintroduce Sentry/PostHog/OpenTelemetry; they were intentionally removed in favor of Vercel Analytics/Speed Insights + structured logs.
-- Pinning, selected model, and run mode are **client-side localStorage** — there are no server columns or APIs for them.
+- Pinning and selected model are **client-side localStorage** — there are no server columns or APIs for them.
 - After signing up via `/api/auth/sign-up/email`, the session cookie is set automatically; no separate sign-in is needed.
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # Chloei
 
-Chloei is a Next.js 16 chat app backed by Vercel AI Gateway. It currently exposes a curated model selector that defaults to Qwen 3.7 Max and also includes Kimi K2.6 and MiMo V2.5 Pro, routes Research mode to Qwen 3.7 Max with a dedicated Deep Research instruction template, and offers local code execution, optional Tavily retrieval, and Better Auth email/password authentication with PostgreSQL-backed users and sessions.
+Chloei is a Next.js 16 chat app backed by Vercel AI Gateway. It currently exposes a curated model selector that defaults to Qwen 3.7 Max and also includes Kimi K2.6 and MiMo V2.5 Pro, and offers local code execution, optional Tavily retrieval, and Better Auth email/password authentication with PostgreSQL-backed users and sessions.
 
 ## Documentation
 

diff --git a/src/app/api/agent/follow-ups/route.ts b/src/app/api/agent/follow-ups/route.ts
@@ -237,7 +237,6 @@ export async function POST(request: NextRequest) {
         aiGatewayApiKey,
         messages: parsed.messages,
         model: parsed.model,
-        runMode: parsed.runMode,
         signal: request.signal,
         userId: session.user.id,
       })

diff --git a/src/app/api/agent/route.ts b/src/app/api/agent/route.ts
@@ -31,7 +31,6 @@ import {
   isE2eMockModeEnabled,
 } from "@/lib/server/e2e-test-mode"
 import { resolveAgentFeatureFlags } from "@/lib/server/integration-flags"
-import type { AgentRuntimeProfileId } from "@/lib/server/llm/agent-runtime"
 import {
   evaluateAndConsumeSlidingWindowRateLimit,
   tryAcquireConcurrencySlot,
@@ -41,7 +40,6 @@ import {
   observeRouteResponse,
 } from "@/lib/server/route-observability"
 import { isThreadStoreNotInitializedError } from "@/lib/server/threads"
-import type { AgentRunMode } from "@/lib/shared"
 
 export const runtime = "nodejs"
 export const maxDuration = 800
@@ -50,10 +48,6 @@ function resolveRateLimitIdentifier(userId: string): string {
   return `user:${userId}`
 }
 
-function resolveRuntimeProfile(runMode: AgentRunMode): AgentRuntimeProfileId {
-  return runMode === "research" ? "deep_research" : "chat_default"
-}
-
 export async function POST(request: NextRequest) {
   const requestId = resolveRequestId(request)
   const logger = createLogger(`agent:${requestId}`)
@@ -163,11 +157,7 @@ export async function POST(request: NextRequest) {
     const userTimeZone = resolveUserTimeZone(request)
     const featureFlags = await resolveAgentFeatureFlags()
     const promptProvider = resolvePromptProvider(selectedModel)
-    const inferredPromptTaskMode =
-      parsedRequest.runMode === "research"
-        ? "research"
-        : inferPromptTaskMode(parsedRequest.messages)
-    const promptTaskMode = inferredPromptTaskMode
+    const promptTaskMode = inferPromptTaskMode(parsedRequest.messages)
     const systemInstruction = buildAgentSystemInstruction(
       {
         id: session.user.id,
@@ -179,9 +169,6 @@ export async function POST(request: NextRequest) {
         userTimeZone,
         provider: promptProvider,
         taskMode: promptTaskMode,
-        ...(parsedRequest.runMode === "research"
-          ? { deepResearchMode: true }
-          : {}),
       }
     )
 
@@ -253,7 +240,6 @@ export async function POST(request: NextRequest) {
         aiGatewayApiKey,
         tavilyApiKey,
         userTimeZone,
-        runtimeProfile: resolveRuntimeProfile(parsedRequest.runMode),
         taskMode: promptTaskMode,
         userId: session.user.id,
         featureFlags,

diff --git a/src/components/agent/home/agent-session-state.ts b/src/components/agent/home/agent-session-state.ts
@@ -1,5 +1,4 @@
 import {
-  type AgentRunMode,
   type FollowUpQuestion,
   type Message as AgentMessage,
   type ModelType,
@@ -31,14 +30,12 @@ export function createAssistantMessageFromAccumulator({
   createdAt,
   accumulator,
   model,
-  runMode,
   isStreaming,
 }: {
   id: string
   createdAt: string
   accumulator: AgentStreamAccumulator
   model: ModelType
-  runMode: AgentRunMode
   isStreaming: boolean
 }): AgentMessage {
   return {
@@ -49,7 +46,6 @@ export function createAssistantMessageFromAccumulator({
     createdAt,
     metadata: {
       isStreaming,
-      runMode,
       parts: [{ type: "text", text: accumulator.content }],
       ...(accumulator.agentStatus
         ? { agentStatus: accumulator.agentStatus }

diff --git a/src/components/agent/home/follow-up-questions.ts b/src/components/agent/home/follow-up-questions.ts
@@ -1,5 +1,4 @@
 import {
-  type AgentRunMode,
   type FollowUpQuestion,
   isModelType,
   type Message as AgentMessage,
@@ -19,7 +18,6 @@ interface FollowUpQuestionRequestTarget {
   assistantMessageId: string
   messages: AgentMessage[]
   model: ModelType
-  runMode: AgentRunMode
 }
 
 type FollowUpQuestionRequestKind = "backfill" | "final" | "parallel"
@@ -29,7 +27,6 @@ export interface FollowUpQuestionRequestParams {
   requestKind: FollowUpQuestionRequestKind
   messages: AgentMessage[]
   model: ModelType
-  runMode: AgentRunMode
   threadId: string
 }
 
@@ -148,7 +145,6 @@ export function getFollowUpQuestionRequestTargets(
       assistantMessageId: message.id,
       messages: messages.slice(0, index + 1),
       model,
-      runMode: message.metadata.runMode ?? "chat",
     })
   })
 

diff --git a/src/components/agent/home/home-agent-utils.ts b/src/components/agent/home/home-agent-utils.ts
@@ -1,10 +1,6 @@
 import { ASSISTANT_EMPTY_RESPONSE_FALLBACK } from "@/lib/constants"
 import { createRequestHeaders } from "@/lib/request-id"
-import {
-  type AgentRunMode,
-  type Message as AgentMessage,
-  type ModelType,
-} from "@/lib/shared"
+import { type Message as AgentMessage, type ModelType } from "@/lib/shared"
 import {
   AGENT_REQUEST_MAX_MESSAGE_CHARS,
   AGENT_REQUEST_MAX_MESSAGES,
@@ -107,8 +103,7 @@ export function toRequestMessages(
 export function appendUserMessage(
   currentMessages: AgentMessage[],
   content: string,
-  model: ModelType,
-  runMode: AgentRunMode = "chat"
+  model: ModelType
 ): AgentMessage[] {
   const userMessage: AgentMessage = {
     id: createClientMessageId(),
@@ -119,7 +114,6 @@ export function appendUserMessage(
     metadata: {
       isStreaming: false,
       selectedModel: model,
-      runMode,
     },
   }
 

diff --git a/src/components/agent/home/home-content.tsx b/src/components/agent/home/home-content.tsx
@@ -28,11 +28,7 @@ import {
   TooltipTrigger,
 } from "@/components/ui/tooltip"
 import { useIsMobile } from "@/hooks/use-mobile"
-import {
-  type AgentRunMode,
-  type AuthViewer,
-  type ModelType,
-} from "@/lib/shared"
+import { type AuthViewer, type ModelType } from "@/lib/shared"
 import { cn } from "@/lib/utils"
 
 import { PromptForm } from "../prompt-form/prompt-form"
@@ -85,28 +81,15 @@ export function HomePageContent({
   } = useAgentSession(threadStore)
 
   const handlePromptFormSubmit = useCallback(
-    (
-      message: string,
-      model: ModelType,
-      _isStreaming: boolean,
-      runMode: AgentRunMode
-    ) => {
-      handlePromptSubmit(message, model, runMode)
+    (message: string, model: ModelType) => {
+      handlePromptSubmit(message, model)
     },
     [handlePromptSubmit]
   )
 
   const handleFollowUpQuestionClick = useCallback(
-    ({
-      model,
-      question,
-      runMode,
-    }: {
-      model: ModelType
-      question: string
-      runMode: AgentRunMode
-    }) => {
-      handlePromptSubmit(question, model, runMode)
+    ({ model, question }: { model: ModelType; question: string }) => {
+      handlePromptSubmit(question, model)
     },
     [handlePromptSubmit]
   )
@@ -221,15 +204,10 @@ export function HomePageContent({
   }, [fallbackTransitionMs])
 
   const handleAnimatedPromptSubmit = useCallback(
-    (
-      message: string,
-      model: ModelType,
-      _isStreaming: boolean,
-      runMode: AgentRunMode
-    ) => {
+    (message: string, model: ModelType) => {
       if (isMobile) {
         startFallbackConversationTransition()
-        handlePromptSubmit(message, model, runMode)
+        handlePromptSubmit(message, model)
         return
       }
 
@@ -244,13 +222,13 @@ export function HomePageContent({
 
       if (!startViewTransition) {
         startFallbackConversationTransition()
-        handlePromptSubmit(message, model, runMode)
+        handlePromptSubmit(message, model)
         return
       }
 
       startViewTransition(() => {
         flushSync(() => {
-          handlePromptSubmit(message, model, runMode)
+          handlePromptSubmit(message, model)
         })
       })
     },