From 696c9a32d09c3605152575d66db1f9488418f36b Mon Sep 17 00:00:00 2001 From: Dragan Bajcic Date: Fri, 15 May 2026 11:05:42 +0200 Subject: [PATCH 1/9] docs: add subagent session tree runtime plan and update kb checklist Introduces a new multi-slice plan for subagent execution and session tree persistence. Updates the KB implementation checklist with recent L1 search, context pack, and reasoning stream progress. --- ...5-15-subagent-session-tree-runtime-plan.md | 384 ++++++++++++++++++ docs/plans/kb-implementation-checklist.md | 50 ++- 2 files changed, 425 insertions(+), 9 deletions(-) create mode 100644 docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md diff --git a/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md b/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md new file mode 100644 index 0000000..f110197 --- /dev/null +++ b/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md @@ -0,0 +1,384 @@ +# Subagent Session Tree Runtime Plan + +## Summary + +Topchester's runtime is currently shaped around one visible chat turn: the TUI sends a message, the runtime emits a flat list of events, optional callbacks let the TUI update incrementally, and the session log records those events. That is enough for a single agent loop, but subagents need a stronger contract: a parent run should be able to spawn child runs, stream their progress, persist their output as real sessions, and feed controlled results back into the parent model. + +This plan promotes runtime events and sessions from a single flat chat stream into a session tree with streamed child runs. The first target is a `task`/`subagent` tool backed by a `SubagentManager`, with task-only parallelism before general multi-tool parallel execution. + +## Decisions + +- Child agents are real sessions, not hidden promises inside a tool call. +- The runtime event stream becomes the canonical in-process contract; session JSONL remains the durable source of truth. +- The parent model sees bounded child results as tool output; the TUI and session log can see streamed child events. +- Agent profiles define model slot, prompt additions, available tools, and permission constraints. +- Subagents inherit parent denies and cannot expand the parent's tool permissions. +- Parallel execution starts with `task` only. Mutating tools, git tools, and command tools stay sequential until the scheduler has explicit safety rules. +- Streaming child output is UI-visible by default, but model-visible child context remains summarized or final-result only. + +## Scope + +Included: + +- Define event and session tree types. +- Refactor runtime execution to a first-class event stream. +- Persist child sessions and parent-child links. +- Add agent profiles and tool permission filtering. +- Add a `task`/`subagent` tool backed by `SubagentManager`. +- Add parallel execution for `task` calls only. +- Expand later to general multi-tool parallelism with per-tool scheduling rules. + +Excluded for these slices: + +- External app server or remote worker protocol. +- Full GUI session-tree explorer. +- Worktree-per-subagent isolation. +- MCP-hosted subagent workers. +- Making every tool parallel in the first subagent implementation. +- Replacing the existing model gateway. + +## Current State + +- `src/agent/runtime.ts` owns the main coding-agent loop. `submitMessage(...)` returns `Promise`, accepts an optional `onEvent`, builds the model prompt, executes at most one selected tool call per loop iteration, appends tool results to the model context, and returns accumulated runtime events. +- `src/agent/events.ts` defines a flat `AgentRuntimeEvent` union for status, message, tool call, task plan, knowledge status, and choice events. +- `src/session/events.ts` defines append-only session metadata and JSONL event payloads. Metadata has `sessionId`, workspace path, timestamps, and `lastEventId`, but no parent/child session links. +- `src/agent/tools/types.ts` gives tools a narrow `ToolContext` with workspace, path env, logger, and task plan updater. There is no event sink, subagent manager, or permission view in the tool context. +- `src/tui/shell.ts` already consumes runtime events incrementally through `onEvent`, so the TUI does not need to wait for the full turn before rendering. That is useful, but the callback is still an adapter around a flat turn result rather than the runtime's primary interface. +- `docs/ARCHITECTURE.md` explicitly keeps the runtime boundary as a typed command/event boundary and says a scoped event hub can be added around the runtime/session boundary when plugins, background tasks, or multiple clients need fanout. +- `docs/SESSIONS.md` says session JSONL is the project-local source of truth and warns to keep model-facing roles separate from UI/runtime events. The subagent design should preserve that separation. + +## Competitor Findings + +Local competitor checkouts support the same direction: + +- OpenCode and Kilo implement a `task` tool that creates or resumes child sessions with `parentID`, stores task metadata, supports `subagent_type`, derives permissions from parent context, runs the child prompt, and returns a bounded task result to the parent. +- OpenCode profiles distinguish primary and subagent modes. Built-in subagents such as general/explore/scout have their own prompts, model choices, and permissions. +- OpenCode's subagent permissions start from parent/session denies and default-deny recursive task and todo-write style tools unless explicitly allowed. +- Pi models runtime as streamed events with message start/update/end and tool execution start/update/end. It supports sequential and parallel tool execution while preserving assistant source order for model-visible tool results. +- Codex app-server protocol models threads with `sessionId`, `forkedFromId`, source, nickname, and role. Its subagent tools create explicit child-thread edges instead of treating child work as invisible tool internals. + +The useful pattern is not "add a task helper." It is "make child work first-class in the runtime/session model, then expose it through a task tool." + +## Target Runtime Shape + +Parent run flow: + +```text +user message + -> parent session turn + -> parent model step + -> task tool call A and task tool call B + -> child session A streams events + -> child session B streams events + -> parent receives bounded task results in deterministic order + -> parent model summarizes and continues + -> parent session records final visible answer +``` + +The event stream should carry enough information for the TUI to render this shape without coupling the TUI to model internals: + +```ts +type RuntimeEvent = + | ParentMessageEvent + | ToolCallEvent + | ToolResultEvent + | SubagentStartedEvent + | SubagentEventForwardedEvent + | SubagentCompletedEvent + | SubagentFailedEvent; +``` + +The durable session tree should carry enough information for resume, inspection, and future UI tree views: + +```ts +type SessionMetadata = { + sessionId: string; + rootSessionId?: string; + parentSessionId?: string; + parentToolCallId?: string; + source: "user" | "subagent"; + agentProfileId?: string; + title?: string; +}; +``` + +Model-visible messages stay separate from runtime/session events. A subagent can stream hundreds of UI events, but the parent model should usually receive one tool result that contains the final answer, status, child session id, and optional concise findings. + +## Cross-Slice Rules + +- Keep every slice shippable and reviewable on its own. +- Keep existing sessions readable; add optional metadata fields or versioned migrations rather than breaking old JSONL. +- Keep `submitMessage(...)` as a compatibility wrapper while introducing the event-stream API. +- Keep TUI rendering as a consumer of runtime events, not the owner of runtime state. +- Do not add a global event bus. If fanout is needed, keep it scoped to a runtime run or session tree. +- Always preserve deterministic model-visible tool-result order, even when UI events stream in completion order. +- Pass cancellation from parent runs into child runs. +- Child profiles may reduce permissions, but never increase them beyond the parent context. +- Add docs when CLI/TUI behavior or persisted session shape changes. + +## Files To Change + +Likely additions: + +- `src/agent/profiles.ts` +- `src/agent/subagents.ts` +- `src/agent/runtime-stream.ts` or equivalent internal stream helper +- `src/agent/tools/task.ts` +- focused tests for runtime streaming, session tree persistence, profiles, permissions, and task tool behavior + +Likely updates: + +- `src/agent/events.ts` +- `src/agent/runtime.ts` +- `src/agent/tools/types.ts` +- `src/agent/tools/registry.ts` +- `src/model/index.ts` +- `src/session/events.ts` +- `src/session/store.ts` +- `src/tui/shell.ts` +- `src/tui/messages.ts` +- `src/tui/layout.ts` +- `src/cli/run.ts` +- `docs/ARCHITECTURE.md` +- `docs/SESSIONS.md` + +## Testing Strategy + +- Type-level tests for event/session schemas and backwards compatibility with old session metadata. +- Unit tests for stream ordering, `submitMessage(...)` compatibility, and abort propagation. +- Unit tests for child session creation, persistence, and rehydration. +- Unit tests for profile resolution and permission filtering. +- Fake-model tests for `task` tool execution and child result injection into parent context. +- TUI render tests for streamed child events, completed child task blocks, and failed child task blocks. +- Smoke tests with fake API before live API runs. +- Full `pnpm check` after each slice that changes runtime or shared type contracts. + +## Slice 1: Define Event And Session Tree Types + +Status: [ ] Not started + +Goal: + +Introduce the durable and in-memory type contracts required for session trees without changing runtime behavior. + +Implementation: + +- Extend `AgentRuntimeEvent` with subagent lifecycle events: + - `subagent_started` + - `subagent_event` + - `subagent_completed` + - `subagent_failed` +- Add optional session tree fields to session metadata: + - `rootSessionId` + - `parentSessionId` + - `parentToolCallId` + - `source` + - `agentProfileId` + - `title` +- Add session event payloads for child-session lifecycle references. +- Keep old session metadata readable by defaulting `source` to `user` and `rootSessionId` to `sessionId`. +- Add helper predicates and constructors so later slices do not hand-build these events. +- Update `docs/SESSIONS.md` with the new optional metadata fields and backwards compatibility rule. + +Verification: + +- Focused session schema tests. +- Existing TUI render tests still pass without child events. +- `pnpm check`. + +## Slice 2: Refactor Runtime To A First-Class Event Stream + +Status: [ ] Not started + +Goal: + +Make streaming the runtime's primary execution contract while keeping the existing `submitMessage(...)` behavior available to callers. + +Implementation: + +- Add a stream-oriented runtime method such as `submitMessageStream(...)` or `runTurn(...)` that returns an `AsyncIterable`. +- Rebuild `submitMessage(...)` as a thin collector over the stream API. +- Replace ad hoc callback emission with one internal event sink that writes to: + - the stream consumer + - the session appender + - optional compatibility callbacks +- Preserve existing single-agent loop behavior and tool execution order. +- Add abort-signal support through the stream path if it is not already complete. +- Update TUI shell consumption to use the stream API directly or keep the callback adapter only as a temporary compatibility layer. +- Update `docs/ARCHITECTURE.md` to describe runtime as a command/event stream boundary. + +Verification: + +- Fake-model runtime tests prove old `submitMessage(...)` results match streamed events. +- Abort tests prove no orphaned pending stream remains after cancellation. +- Existing CLI and TUI tests pass. +- `pnpm check`. + +## Slice 3: Add Child Session Persistence + +Status: [ ] Not started + +Goal: + +Persist child sessions as first-class sessions linked to their parent, before adding an actual subagent tool. + +Implementation: + +- Add session-store APIs for creating child sessions: + - parent session id + - root session id + - parent tool call id + - agent profile id + - title +- Add APIs to list child sessions for a parent and load a session tree. +- Record child lifecycle references in the parent event stream. +- Ensure child event JSONL files use the same append-only rules as parent sessions. +- Keep parent and child session logs separate so replay remains simple. +- Update session docs with examples of parent and child metadata. + +Verification: + +- Unit tests create a parent session, create children, append child events, and reload the tree. +- Backwards compatibility test loads an old metadata object with no parent fields. +- `pnpm check`. + +## Slice 4: Add Agent Profiles And Tool Permission Filtering + +Status: [ ] Not started + +Goal: + +Define what kinds of agents can run and which tools each profile may use, so subagents are constrained before they can execute. + +Implementation: + +- Add `AgentProfile` definitions for the primary agent and initial subagent profiles. +- Include profile fields for: + - id + - display name + - mode: `primary`, `subagent`, or `all` + - prompt additions + - model slot or model override + - allowed tools + - denied tools + - permission defaults +- Extend `ToolContext` with a permission view and profile information. +- Filter the tool registry per profile before prompt generation and tool execution. +- Enforce permissions at execution time, not only in prompts. +- Make recursive task execution denied by default for subagents unless explicitly enabled later. +- Document how profile permissions compose with project/user configuration. + +Verification: + +- Unit tests prove denied tools are absent from prompts and rejected at execution. +- Unit tests prove child profiles inherit parent denies. +- Existing tool tests pass under the primary profile. +- `pnpm check`. + +## Slice 5: Add `task`/`subagent` Tool Backed By `SubagentManager` + +Status: [ ] Not started + +Goal: + +Add the first real subagent execution path: a parent model can call `task`, the runtime creates a child session, streams child events, and returns a bounded child result to the parent model. + +Implementation: + +- Add `SubagentManager` as a runtime service, not as TUI state. +- Add a `task` tool with inputs similar to: + - `description` + - `prompt` + - `subagent_type` + - optional `task_id` +- Inject `SubagentManager` and event sink into `ToolContext`. +- When `task` runs: + - resolve the requested agent profile + - create a child session + - emit `subagent_started` + - run the child runtime with fresh context + - forward child events as `subagent_event` + - emit `subagent_completed` or `subagent_failed` + - return a model-visible tool result containing child session id, status, and final response +- Keep child context fresh. The parent prompt should not be dumped into child context except for the task prompt, workspace/KB context, and configured profile prompt. +- Render child events in the TUI as nested task blocks. +- Update tool prompt docs so the model knows when to use `task`. + +Verification: + +- Fake-model test proves parent receives a single task result with child output. +- Runtime event test proves child events stream before final parent completion. +- TUI render test covers running, completed, and failed child task states. +- `pnpm check`. + +## Slice 6: Add Parallel Execution For `task` Only + +Status: [ ] Not started + +Goal: + +Allow independent task calls from one parent model step to run concurrently while preserving deterministic model-visible results. + +Implementation: + +- Add a small scheduler in the runtime loop. +- If a model step returns multiple tool calls and every call is `task`, run them concurrently. +- Stream child events in real completion/update order. +- Append model-visible task results back into the parent conversation in assistant source order. +- Add concurrency limits with a conservative default. +- Propagate parent cancellation to all running child tasks. +- Make failures local to the failed task unless cancellation or policy says the whole step must stop. +- If text-JSON model output cannot represent multiple task calls cleanly, add a temporary `task_batch` compatibility path or defer parallel task calls to native multi-tool model responses. + +Verification: + +- Fake-model test with two child tasks proves both start before either completes. +- Ordering test proves parent model receives results in source order. +- Failure test proves one failed task is represented cleanly. +- Cancellation test proves all running children stop. +- `pnpm check`. + +## Slice 7: Expand To General Multi-Tool Parallelism + +Status: [ ] Not started + +Goal: + +Generalize the scheduler beyond `task` after the event stream, session tree, and permission model are stable. + +Implementation: + +- Add per-tool scheduling metadata: + - `parallelSafe` + - `mutatesWorkspace` + - `requiresExclusiveWorkspace` + - optional resource keys such as file paths, shell process, or git state +- Default unknown tools to sequential. +- Allow read-only tools to run in parallel when their resource keys do not conflict. +- Keep write tools sequential until specific conflict rules exist. +- Add tool execution lifecycle events compatible with streaming updates: + - execution started + - execution update + - execution completed + - execution failed +- Preserve source-order insertion for model-visible tool results. +- Add docs for tool authors explaining how to mark execution mode safely. + +Verification: + +- Scheduler unit tests for read/read, read/write, write/write, and unknown-tool cases. +- Existing tool behavior remains unchanged when tools are not marked parallel-safe. +- TUI tests cover interleaved tool progress. +- `pnpm check`. + +## Open Questions + +- Should child sessions be visible in the default session list, or only when expanding a parent session? +- Should the first child profiles be read-only by default, with write-capable subagents introduced later? +- Should child tasks get a separate scratchpad/task-plan event stream, or should they reuse the existing task plan events inside their child session only? +- What should the default child concurrency limit be for local alpha: 2, 3, or config-driven? +- Should the `task` tool be named only `task`, only `subagent`, or should `subagent` be an alias for user-facing clarity? + +## Next Slice + +Start with Slice 1. It creates the type and persistence surface area needed by every later slice, but it does not change runtime behavior. That makes it the right first PR before the event-stream refactor. diff --git a/docs/plans/kb-implementation-checklist.md b/docs/plans/kb-implementation-checklist.md index cdb86aa..2abb3ba 100644 --- a/docs/plans/kb-implementation-checklist.md +++ b/docs/plans/kb-implementation-checklist.md @@ -6,6 +6,12 @@ Track implementation of the global knowledge base feature from current L1 suppor `docs/KNOWLEDGE.md` remains the design source. This file is the implementation checklist. +Status legend: + +- `[x]` Done +- `[-]` Partial or in progress +- `[ ]` Not started + ## Current Status - [x] Project KB folder initialization @@ -17,9 +23,25 @@ Track implementation of the global knowledge base feature from current L1 suppor - [x] Mirrored L1 entry layout under `topchester-kb/l1-files/.json` - [x] Basic manifest updates with L1 counts - [x] CLI `topchester kb compile` +- [x] CLI `topchester kb search` +- [x] CLI `topchester kb context` +- [x] Top-level CLI `topchester search` alias for L1 KB search - [x] Slash command `/kb compile` - [x] CLI L1 progress with count, percentage, progress bar, and current file - [x] Workspace-scoped `edit_file` and create-by-default `write_file` tools mark changed files dirty-known and `needs_sync` in the runtime session overlay +- [x] L1 in-memory search over paths, symbols, exports, responsibilities, summaries, imports, relationships, evidence, and tests +- [x] L1 context pack generation for CLI and runtime prompt injection +- [x] Agent runtime injects relevant L1 context packs when the KB is ready +- [x] L1 post-processing infers file roles and reverse test links +- [x] Provider-exposed reasoning can stream into the TUI without persisting thinking text into session/model context + +## Recent Implementation Notes + +- `e411787` added L1 search, compact context pack generation, CLI `kb search`/`kb context`, top-level `search`, runtime L1 context injection, L1 post-processing, and tests. +- `eac6997`, `21a8825`, and `bbab1d2` added L1 search benchmarks, optimized index loading/prefix lookup, and stripped empty containers from JSON search/context-pack output. +- `e5ae0b7` added the `TOPCHESTER_DISABLE_L1_CONTEXT=1` escape hatch and token-usage context notes. +- `22b608c` added streamed provider reasoning in the TUI and hardened the plan/tool-call display path. This is TUI/runtime work, not canonical KB content. +- L1 structural fields are currently model-extracted and normalized/post-processed. Deterministic static extraction before model summarization is still only partial. ## Global KB Setup and Configuration @@ -44,16 +66,20 @@ Track implementation of the global knowledge base feature from current L1 suppor - [x] Durable queue statuses: `queued`, `in_progress`, `completed`, `failed`, `changed`, `missing_file` - [x] L1 entry schema/type validation - [x] Deterministic field overrides for model output +- [x] Model-owned L1 fields normalize symbols, imports, exports, module ids, feature ids, test ids, evidence, and confidence before schema validation - [x] Existing current-entry skip/resume behavior - [x] Per-file failure metadata - [x] Orphan L1 entry cleanup - [x] Mirrored path-safe entry writes - [ ] Add stronger language/type detection -- [ ] Add structural import/export/symbol extraction before summarization -- [ ] Add test/doc coverage links where detectable +- [-] Add structural import/export/symbol extraction before summarization; model prompt/schema normalization exists, but deterministic static extraction before summarization is still open +- [-] Add test/doc coverage links where detectable; test links are implemented through `declared_test_targets`, `likely_test_targets`, and reverse `tested_by` post-processing, while doc relationship links are still open - [ ] Add chunking or fallback strategy for oversized text files - [ ] Add L1 schema JSON files under `topchester-kb/schema/` - [ ] Add L1 validation command/check +- [x] Add L1 in-memory lexical index +- [x] Add compact L1 context pack assembly with omitted empty containers and optional full L1 entries +- [x] Add CLI formatting and JSON output for L1 search/context packs ## L2: Module Knowledge @@ -122,10 +148,10 @@ Track implementation of the global knowledge base feature from current L1 suppor - [ ] Add `GET /manifest` - [ ] Add `GET /nodes/:id` - [ ] Add `GET /files/:encodedPath` or replacement file lookup -- [ ] Implement `kb.search` +- [-] Implement `kb.search`; in-process L1 search exists, but no KB service/RPC endpoint yet - [ ] Implement `kb.getNode` - [ ] Implement `kb.neighbors` -- [ ] Implement `kb.contextPack` +- [-] Implement `kb.contextPack`; in-process L1 context packs and CLI output exist, but no KB service/RPC endpoint yet - [ ] Implement `kb.driftCheck` - [ ] Implement `kb.impact` - [ ] Implement `kb.updatePlan` @@ -143,6 +169,7 @@ Track implementation of the global knowledge base feature from current L1 suppor - [ ] Keep cache fully rebuildable from canonical KB - [ ] Add session overlay storage for dirty-but-known active work - [x] Add in-memory session overlay state for agent-authored `edit_file` changes +- [x] Reuse `.agents/topchester-kb-cache/` for durable L1 queue and sync queue artifacts - [ ] Add cache tests ## Tool Execution @@ -182,15 +209,15 @@ Track implementation of the global knowledge base feature from current L1 suppor ## Agent KB-Aware Behavior -- [ ] Query KB before architecture answers -- [ ] Request context pack before non-trivial coding tasks +- [-] Query KB before architecture answers; runtime injects L1 context for normal chat turns when KB is ready, but there is no architecture-specific policy yet +- [-] Request context pack before non-trivial coding tasks; runtime injects an L1 context pack for normal turns when KB is ready, with `TOPCHESTER_DISABLE_L1_CONTEXT=1` as an escape hatch - [ ] Run drift check before editing relevant files -- [ ] Use KB context to orient, plan, estimate impact, and identify verification +- [-] Use KB context to orient, plan, estimate impact, and identify verification; current implementation provides L1 orientation only, without impact or verification recommendations - [x] Keep non-trivial runtime work visible through session-only `plan_todo` state -- [ ] Resolve task-critical facts against current working tree before acting +- [-] Resolve task-critical facts against current working tree before acting; prompt contract says to read current files, but this is not enforced by runtime policy - [x] Track dirty files and suspect nodes in a session overlay during `edit_file` edits - [x] Track created files in the session overlay during `write_file` writes -- [ ] Warn clearly when relevant KB is stale +- [-] Warn clearly when relevant KB is stale; context packs carry `drift: unchecked` warnings, but scoped drift detection is not implemented - [x] Update or mark KB stale after `edit_file` edits - [x] Update or mark KB stale after `write_file` writes - [x] Mark session as `needs_sync` after `edit_file` edits and `write_file` writes @@ -208,8 +235,10 @@ Track implementation of the global knowledge base feature from current L1 suppor - [ ] Add drift warning panel - [ ] Add knowledge diff view - [x] Show current `plan_todo` state above the prompt during multi-step work +- [x] Show streamed provider reasoning as a non-persisted thinking row - [x] Add TUI tests for KB footer path health - [x] Add TUI tests for visible task-plan rendering +- [x] Add TUI tests for reasoning display and runtime failure rendering ## Validation and CI @@ -219,3 +248,6 @@ Track implementation of the global knowledge base feature from current L1 suppor - [ ] Add generated KB policy checks - [ ] Add test fixtures for small repos - [ ] Add end-to-end compile validation for a sample workspace +- [x] Add focused tests for L1 search and context pack generation +- [x] Add CLI integration tests for `kb search`, `kb context`, and top-level `search` +- [x] Add L1 post-processing tests for inferred test links From 4ee147f112dfe1ce77cd63f6ac92ebafc6312904 Mon Sep 17 00:00:00 2001 From: Dragan Bajcic Date: Fri, 15 May 2026 11:10:59 +0200 Subject: [PATCH 2/9] feat(runtime): add session tree event types --- docs/SESSIONS.md | 27 +++++- ...5-15-subagent-session-tree-runtime-plan.md | 17 +++- src/agent/events.ts | 80 +++++++++++++++- src/session/events.ts | 80 +++++++++++++++- src/session/store.ts | 7 ++ src/tui/runtime-events.ts | 5 + src/tui/shell.ts | 33 +++++++ test/session.test.ts | 96 +++++++++++++++++++ 8 files changed, 341 insertions(+), 4 deletions(-) diff --git a/docs/SESSIONS.md b/docs/SESSIONS.md index 751b375..ab178fd 100644 --- a/docs/SESSIONS.md +++ b/docs/SESSIONS.md @@ -40,6 +40,31 @@ Early event kinds: - `message` — user, agent, or system-visible chat row. - `status` — transient or persisted state changes. - `tool_call` — command/tool request. -- `tool_result` — command/tool result. +- `task_plan` — visible session-only task plan state. +- `choice` — visible user choice prompt. +- `subagent_started` — parent log reference to a child session starting. +- `subagent_event` — parent log reference to a forwarded child runtime event. +- `subagent_completed` — parent log reference to a child session completing. +- `subagent_failed` — parent log reference to a child session failing. + +`metadata.json` includes the root session identity and, for child sessions, the +parent link: + +```json +{ + "version": 1, + "sessionId": "019e0000-0000-7000-8000-000000000000", + "rootSessionId": "019e0000-0000-7000-8000-000000000000", + "parentSessionId": "019e0000-0000-7000-8000-000000000000", + "parentToolCallId": "task-call-1", + "source": "subagent", + "agentProfileId": "explore", + "title": "Inspect runtime" +} +``` + +For existing sessions that do not have tree fields, loaders treat `source` as +`user` and `rootSessionId` as the session's own `sessionId`. That keeps older +project-local sessions readable without rewriting their JSONL. Keep model-facing chat roles separate from UI/runtime events. The TUI can show both, but model context should only include what the agent runtime intentionally selects. diff --git a/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md b/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md index f110197..d0cb044 100644 --- a/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md +++ b/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md @@ -155,7 +155,7 @@ Likely updates: ## Slice 1: Define Event And Session Tree Types -Status: [ ] Not started +Status: [x] Done Goal: @@ -186,6 +186,21 @@ Verification: - Existing TUI render tests still pass without child events. - `pnpm check`. +Completed: + +- Added runtime subagent lifecycle event types, factories, and a subagent-event predicate. +- Added optional session-tree metadata fields with backwards-compatible defaults for old metadata. +- Added durable subagent lifecycle payload schemas and payload constructors. +- Kept current TUI/session rehydration behavior neutral for child events until later rendering slices. +- Updated session docs with metadata fields and compatibility rules. + +Verified: + +- `pnpm test -- test/session.test.ts test/tui.render.test.ts` +- `pnpm typecheck` +- `pnpm check` +- `mise run local-ci` + ## Slice 2: Refactor Runtime To A First-Class Event Stream Status: [ ] Not started diff --git a/src/agent/events.ts b/src/agent/events.ts index 354256d..6b9b062 100644 --- a/src/agent/events.ts +++ b/src/agent/events.ts @@ -8,7 +8,11 @@ export type AgentRuntimeEvent = | AgentToolCallEvent | AgentTaskPlanEvent | AgentKnowledgeStatusEvent - | AgentChoiceEvent; + | AgentChoiceEvent + | AgentSubagentStartedEvent + | AgentSubagentEvent + | AgentSubagentCompletedEvent + | AgentSubagentFailedEvent; export interface AgentStatusEvent { type: "status"; @@ -52,6 +56,39 @@ export interface AgentChoiceAction { value?: string; } +export interface AgentSubagentStartedEvent { + type: "subagent_started"; + sessionId: string; + parentSessionId: string; + parentToolCallId: string; + agentProfileId?: string; + title?: string; +} + +export interface AgentSubagentEvent { + type: "subagent_event"; + sessionId: string; + parentSessionId: string; + parentToolCallId: string; + event: AgentRuntimeEvent; +} + +export interface AgentSubagentCompletedEvent { + type: "subagent_completed"; + sessionId: string; + parentSessionId: string; + parentToolCallId: string; + result?: string; +} + +export interface AgentSubagentFailedEvent { + type: "subagent_failed"; + sessionId: string; + parentSessionId: string; + parentToolCallId: string; + error: string; +} + export interface AgentChoiceOptions { tone: AgentChoiceEvent["tone"]; title: string; @@ -59,6 +96,25 @@ export interface AgentChoiceOptions { actions: AgentChoiceAction[]; } +export interface AgentSubagentEventBaseOptions { + sessionId: string; + parentSessionId: string; + parentToolCallId: string; +} + +export interface AgentSubagentStartedOptions extends AgentSubagentEventBaseOptions { + agentProfileId?: string; + title?: string; +} + +export interface AgentSubagentCompletedOptions extends AgentSubagentEventBaseOptions { + result?: string; +} + +export interface AgentSubagentFailedOptions extends AgentSubagentEventBaseOptions { + error: string; +} + export const ABORT_CHOICE_VALUE = "__topchester_abort__"; export const agentEvent = { @@ -93,8 +149,30 @@ export const agentEvent = { choice(options: AgentChoiceOptions): AgentChoiceEvent { return { type: "choice", ...options }; }, + + subagentStarted(options: AgentSubagentStartedOptions): AgentSubagentStartedEvent { + return { type: "subagent_started", ...options }; + }, + + subagentEvent(options: AgentSubagentEventBaseOptions, event: AgentRuntimeEvent): AgentSubagentEvent { + return { type: "subagent_event", ...options, event }; + }, + + subagentCompleted(options: AgentSubagentCompletedOptions): AgentSubagentCompletedEvent { + return { type: "subagent_completed", ...options }; + }, + + subagentFailed(options: AgentSubagentFailedOptions): AgentSubagentFailedEvent { + return { type: "subagent_failed", ...options }; + }, } as const; export function choiceAction(label: string, value?: string): AgentChoiceAction { return value === undefined ? { label } : { label, value }; } + +export function isSubagentRuntimeEvent( + event: AgentRuntimeEvent +): event is AgentSubagentStartedEvent | AgentSubagentEvent | AgentSubagentCompletedEvent | AgentSubagentFailedEvent { + return event.type.startsWith("subagent_"); +} diff --git a/src/session/events.ts b/src/session/events.ts index 7267744..19c0768 100644 --- a/src/session/events.ts +++ b/src/session/events.ts @@ -15,15 +15,27 @@ const jsonValueSchema: z.ZodType = z.lazy(() => ]) ); -export const sessionMetadataSchema = z.object({ +const sessionMetadataBaseSchema = z.object({ version: z.literal(SESSION_METADATA_VERSION), sessionId: z.string(), + rootSessionId: z.string().optional(), + parentSessionId: z.string().optional(), + parentToolCallId: z.string().optional(), + source: z.enum(["user", "subagent"]).optional(), + agentProfileId: z.string().optional(), + title: z.string().optional(), workspaceRoot: z.string().min(1), createdAt: isoTimestampSchema, updatedAt: isoTimestampSchema, lastEventId: z.number().int().min(0), }); +export const sessionMetadataSchema = sessionMetadataBaseSchema.transform((metadata) => ({ + ...metadata, + rootSessionId: metadata.rootSessionId ?? metadata.sessionId, + source: metadata.source ?? "user", +})); + const eventEnvelopeSchema = z.object({ version: z.literal(SESSION_EVENT_VERSION), id: z.number().int().positive(), @@ -77,6 +89,33 @@ const choicePayloadSchema = z.object({ ), }); +const subagentLifecycleBasePayloadSchema = z.object({ + sessionId: z.string(), + parentSessionId: z.string(), + parentToolCallId: z.string(), +}); + +const subagentStartedPayloadSchema = subagentLifecycleBasePayloadSchema.extend({ + kind: z.literal("subagent_started"), + agentProfileId: z.string().optional(), + title: z.string().optional(), +}); + +const subagentEventPayloadSchema = subagentLifecycleBasePayloadSchema.extend({ + kind: z.literal("subagent_event"), + event: z.record(z.string(), jsonValueSchema), +}); + +const subagentCompletedPayloadSchema = subagentLifecycleBasePayloadSchema.extend({ + kind: z.literal("subagent_completed"), + result: z.string().optional(), +}); + +const subagentFailedPayloadSchema = subagentLifecycleBasePayloadSchema.extend({ + kind: z.literal("subagent_failed"), + error: z.string(), +}); + export const sessionEventPayloadSchema = z.discriminatedUnion("kind", [ messagePayloadSchema, toolCallPayloadSchema, @@ -84,6 +123,10 @@ export const sessionEventPayloadSchema = z.discriminatedUnion("kind", [ statusPayloadSchema, knowledgeStatusPayloadSchema, choicePayloadSchema, + subagentStartedPayloadSchema, + subagentEventPayloadSchema, + subagentCompletedPayloadSchema, + subagentFailedPayloadSchema, ]); export const sessionEventSchema = z.intersection(eventEnvelopeSchema, sessionEventPayloadSchema); @@ -91,3 +134,38 @@ export const sessionEventSchema = z.intersection(eventEnvelopeSchema, sessionEve export type SessionMetadata = z.infer; export type SessionEventPayload = z.infer; export type SessionEvent = z.infer; + +export interface SubagentSessionReference { + sessionId: string; + parentSessionId: string; + parentToolCallId: string; +} + +export const sessionEventPayload = { + subagentStarted( + reference: SubagentSessionReference, + options: { agentProfileId?: string; title?: string } = {} + ): SessionEventPayload { + return { kind: "subagent_started", ...reference, ...options }; + }, + + subagentEvent(reference: SubagentSessionReference, event: Record): SessionEventPayload { + return { kind: "subagent_event", ...reference, event }; + }, + + subagentCompleted(reference: SubagentSessionReference, result?: string): SessionEventPayload { + return result === undefined + ? { kind: "subagent_completed", ...reference } + : { kind: "subagent_completed", ...reference, result }; + }, + + subagentFailed(reference: SubagentSessionReference, error: string): SessionEventPayload { + return { kind: "subagent_failed", ...reference, error }; + }, +} as const; + +export function isSubagentSessionPayload( + payload: SessionEventPayload +): payload is Extract { + return payload.kind.startsWith("subagent_"); +} diff --git a/src/session/store.ts b/src/session/store.ts index b63e429..0be15ac 100644 --- a/src/session/store.ts +++ b/src/session/store.ts @@ -57,6 +57,8 @@ export async function createSession(workspaceRoot: string): Promise, + }; + case "subagent_completed": + return { + kind: "subagent_completed", + sessionId: event.sessionId, + parentSessionId: event.parentSessionId, + parentToolCallId: event.parentToolCallId, + ...(event.result === undefined ? {} : { result: event.result }), + }; + case "subagent_failed": + return { + kind: "subagent_failed", + sessionId: event.sessionId, + parentSessionId: event.parentSessionId, + parentToolCallId: event.parentToolCallId, + error: event.error, + }; case "status": return { kind: "status", diff --git a/test/session.test.ts b/test/session.test.ts index b15236d..2be7388 100644 --- a/test/session.test.ts +++ b/test/session.test.ts @@ -53,6 +53,8 @@ describe("session store", () => { expect(await readJson(session.metadataPath)).toEqual({ version: 1, sessionId: session.sessionId, + rootSessionId: session.sessionId, + source: "user", workspaceRoot: workspace, createdAt: session.metadata.createdAt, updatedAt: session.metadata.createdAt, @@ -259,6 +261,35 @@ describe("session store", () => { body: "Pick one", actions: [{ label: "Yes", value: "yes" }], }); + await session.append({ + kind: "subagent_started", + sessionId: "child-session", + parentSessionId: session.sessionId, + parentToolCallId: "task-call-1", + agentProfileId: "explore", + title: "Inspect runtime", + }); + await session.append({ + kind: "subagent_event", + sessionId: "child-session", + parentSessionId: session.sessionId, + parentToolCallId: "task-call-1", + event: { type: "status", status: "working" }, + }); + await session.append({ + kind: "subagent_completed", + sessionId: "child-session", + parentSessionId: session.sessionId, + parentToolCallId: "task-call-1", + result: "Done", + }); + await session.append({ + kind: "subagent_failed", + sessionId: "child-session-2", + parentSessionId: session.sessionId, + parentToolCallId: "task-call-2", + error: "Child failed", + }); const events = (await readFile(session.eventsPath, "utf8")) .trimEnd() @@ -295,9 +326,74 @@ describe("session store", () => { body: "Pick one", actions: [{ label: "Yes", value: "yes" }], }), + expect.objectContaining({ + version: 1, + id: 5, + kind: "subagent_started", + sessionId: "child-session", + parentSessionId: session.sessionId, + parentToolCallId: "task-call-1", + agentProfileId: "explore", + title: "Inspect runtime", + }), + expect.objectContaining({ + version: 1, + id: 6, + kind: "subagent_event", + sessionId: "child-session", + parentSessionId: session.sessionId, + parentToolCallId: "task-call-1", + event: { type: "status", status: "working" }, + }), + expect.objectContaining({ + version: 1, + id: 7, + kind: "subagent_completed", + sessionId: "child-session", + parentSessionId: session.sessionId, + parentToolCallId: "task-call-1", + result: "Done", + }), + expect.objectContaining({ + version: 1, + id: 8, + kind: "subagent_failed", + sessionId: "child-session-2", + parentSessionId: session.sessionId, + parentToolCallId: "task-call-2", + error: "Child failed", + }), ]); }); + it("defaults old metadata into a root user session when loading", async () => { + const workspace = await tempWorkspace(); + const session = await createSession(workspace); + await writeFile( + session.metadataPath, + `${JSON.stringify( + { + version: 1, + sessionId: session.sessionId, + workspaceRoot: workspace, + createdAt: session.metadata.createdAt, + updatedAt: session.metadata.updatedAt, + lastEventId: 0, + }, + null, + 2 + )}\n` + ); + + const loaded = await loadSession(workspace, session.sessionId); + + expect(loaded.metadata).toMatchObject({ + sessionId: session.sessionId, + rootSessionId: session.sessionId, + source: "user", + }); + }); + it("creates parent session folders inside the workspace only", async () => { const workspace = await tempWorkspace(); await mkdir(join(workspace, "nested"), { recursive: true }); From f6c672fad50f933232c03a7444d8bba6091e4405 Mon Sep 17 00:00:00 2001 From: Dragan Bajcic Date: Fri, 15 May 2026 11:17:17 +0200 Subject: [PATCH 3/9] refactor(runtime): stream agent turn events --- docs/ARCHITECTURE.md | 9 +- ...5-15-subagent-session-tree-runtime-plan.md | 17 ++- src/agent/runtime.ts | 115 +++++++++--------- src/cli/run.ts | 42 ++++--- src/tui/shell.ts | 23 ++-- test/commands.test.ts | 82 +++++++++++++ test/tui.render.test.ts | 27 ++++ 7 files changed, 230 insertions(+), 85 deletions(-) diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index d703553..7c49213 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -229,10 +229,15 @@ The core runtime should expose a small command/event boundary: client command -> runtime command handler -> KB-aware agent loop - -> typed runtime events + -> typed runtime event stream -> TUI, CLI, GUI, IDE, or session log consumer ``` +For chat turns, `submitMessageStream(...)` is the primary in-process contract: +clients consume an `AsyncIterable` of runtime events as the model and tools +progress. `submitMessage(...)` remains as a compatibility collector for callers +that still need the completed event array or the older callback shape. + Initial command types: - submit a user message, @@ -251,7 +256,7 @@ Initial event types: - user choice requested, - turn finished or failed. -This is enough structure to keep rendering code out of the runtime and runtime policy out of rendering code. +This is enough structure to keep rendering code out of the runtime and runtime policy out of rendering code. Stream consumers should persist or render events as they arrive; they should not own the agent loop. Do not add a big global event bus in V0. A global bus can make small apps feel clean at first, but it hides ownership when every module can publish anything. Start with typed runtime events and explicit subscribers. The session event log should be the durable event stream; the in-process event path should stay narrow and boring. diff --git a/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md b/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md index d0cb044..fac0633 100644 --- a/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md +++ b/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md @@ -203,7 +203,7 @@ Verified: ## Slice 2: Refactor Runtime To A First-Class Event Stream -Status: [ ] Not started +Status: [x] Done Goal: @@ -229,6 +229,21 @@ Verification: - Existing CLI and TUI tests pass. - `pnpm check`. +Completed: + +- Added `submitMessageStream(...)` as the primary async runtime event stream. +- Rebuilt `submitMessage(...)` as a collector over the stream path with callback compatibility. +- Updated the TUI chat submission path to consume streamed events directly. +- Updated non-interactive `topchester run` prompt execution to persist and print streamed events as they arrive. +- Documented the runtime boundary as a typed event stream in `docs/ARCHITECTURE.md`. + +Verified: + +- `pnpm test -- test/commands.test.ts test/tui.render.test.ts test/cli.integration.test.ts` +- `pnpm typecheck` +- `pnpm check` +- `mise run local-ci` + ## Slice 3: Add Child Session Persistence Status: [ ] Not started diff --git a/src/agent/runtime.ts b/src/agent/runtime.ts index 7ea3729..47607b1 100644 --- a/src/agent/runtime.ts +++ b/src/agent/runtime.ts @@ -36,6 +36,12 @@ interface TurnTokenUsageTotals { export interface AgentRuntime { checkAgent(abortSignal?: AbortSignal): Promise; checkKnowledgeBase(): Promise; + submitMessageStream( + conversation: ConversationTurn[], + message: string, + abortSignal?: AbortSignal, + options?: AgentRuntimeSubmitMessageOptions + ): AsyncIterable; submitMessage( conversation: ConversationTurn[], message: string, @@ -105,37 +111,23 @@ export class TopchesterAgentRuntime implements AgentRuntime { } /** - * Runs one user chat turn through the agent loop. It builds the model + * Streams one user chat turn through the agent loop. It builds the model * prompt with relevant KB context, calls the model, executes any requested * tools, feeds tool results back into the next prompt, and repeats until * the model returns a normal assistant message or the loop hits its safety * limit. * - * Events are accumulated for the caller and optionally streamed through - * `onEvent` as soon as tool calls, task-plan updates, choices, or final - * messages are available. The method also enforces visible task-plan - * closure before a final answer when the model leaves an open plan. + * This is the primary runtime execution contract. Compatibility wrappers + * can collect the stream, but the runtime's own turn loop only knows about + * ordered events. */ - async submitMessage( + async *submitMessageStream( conversation: ConversationTurn[], message: string, abortSignal?: AbortSignal, - onEvent?: AgentRuntimeEventSink, options: AgentRuntimeSubmitMessageOptions = {} - ): Promise { + ): AsyncIterable { const prompt = await this.buildPromptWithKnowledgeContext(buildConversationPrompt(conversation, message), message); - const events: AgentRuntimeEvent[] = []; - const emit = async (...nextEvents: AgentRuntimeEvent[]) => { - events.push(...nextEvents); - - if (!onEvent) { - return; - } - - for (const event of nextEvents) { - await onEvent(event); - } - }; let nextPrompt = prompt; let totalDurationMs = 0; const tokenUsageTotals: TurnTokenUsageTotals = {}; @@ -227,33 +219,29 @@ export class TopchesterAgentRuntime implements AgentRuntime { continue; } - await emit(agentEvent.taskPlan(this.taskPlan.update({ items: [] }))); + yield agentEvent.taskPlan(this.taskPlan.update({ items: [] })); } - await emit( - agentEvent.assistantMessage( - finalText.trim() || "I got an empty response from the model.", - formatAgentMessageMeta(result.modelId, totalDurationMs, tokenUsageTotals) - ), - agentEvent.status("ready") + yield agentEvent.assistantMessage( + finalText.trim() || "I got an empty response from the model.", + formatAgentMessageMeta(result.modelId, totalDurationMs, tokenUsageTotals) ); - return events; + yield agentEvent.status("ready"); + return; } if (toolCalls === MAX_TOOL_CALLS_PER_TURN) { - await emit( - agentEvent.choice({ - tone: "warning", - title: "Tool call limit reached", - body: `Stopped after ${MAX_TOOL_CALLS_PER_TURN} tool calls in one turn. Continue starts another turn; abort leaves the call stopped.`, - actions: [ - choiceAction("Continue", "Continue the previous task from where you stopped."), - choiceAction("Abort", ABORT_CHOICE_VALUE), - ], - }), - agentEvent.status("ready") - ); - return events; + yield agentEvent.choice({ + tone: "warning", + title: "Tool call limit reached", + body: `Stopped after ${MAX_TOOL_CALLS_PER_TURN} tool calls in one turn. Continue starts another turn; abort leaves the call stopped.`, + actions: [ + choiceAction("Continue", "Continue the previous task from where you stopped."), + choiceAction("Abort", ABORT_CHOICE_VALUE), + ], + }); + yield agentEvent.status("ready"); + return; } const executableToolCall = toolCall as ToolCall; @@ -264,23 +252,21 @@ export class TopchesterAgentRuntime implements AgentRuntime { ); if (suppressiblePlanTodoAnswer !== undefined) { - await emit( - agentEvent.assistantMessage( - suppressiblePlanTodoAnswer || "I got an empty response from the model.", - formatAgentMessageMeta(result.modelId, totalDurationMs, tokenUsageTotals) - ), - agentEvent.status("ready") + yield agentEvent.assistantMessage( + suppressiblePlanTodoAnswer || "I got an empty response from the model.", + formatAgentMessageMeta(result.modelId, totalDurationMs, tokenUsageTotals) ); - return events; + yield agentEvent.status("ready"); + return; } const toolResult = await executeToolCall(this.context.workspaceRoot, executableToolCall, { logger: this.context.logger, taskPlan: this.taskPlan, }); - await emit(agentEvent.toolCall(executableToolCall, formatToolCallMessage(executableToolCall, toolResult))); + yield agentEvent.toolCall(executableToolCall, formatToolCallMessage(executableToolCall, toolResult)); if (!isToolErrorResult(toolResult) && toolResult.tool === "plan_todo") { - await emit(agentEvent.taskPlan(toolResult.plan)); + yield agentEvent.taskPlan(toolResult.plan); } afterTool = executableToolCall.tool; nextPrompt = `${nextPrompt}\n\n${formatToolResultForPrompt(toolResult)}\n\n${formatContinuationInstruction( @@ -289,13 +275,30 @@ export class TopchesterAgentRuntime implements AgentRuntime { )}`; } - await emit( - agentEvent.assistantMessage( - "I stopped because the tool loop ended unexpectedly.", - formatAgentMessageMeta(lastModelId, totalDurationMs, tokenUsageTotals) - ), - agentEvent.status("ready") + yield agentEvent.assistantMessage( + "I stopped because the tool loop ended unexpectedly.", + formatAgentMessageMeta(lastModelId, totalDurationMs, tokenUsageTotals) ); + yield agentEvent.status("ready"); + } + + /** + * Compatibility wrapper for callers that still expect a completed event + * array or use the older `onEvent` callback shape. + */ + async submitMessage( + conversation: ConversationTurn[], + message: string, + abortSignal?: AbortSignal, + onEvent?: AgentRuntimeEventSink, + options: AgentRuntimeSubmitMessageOptions = {} + ): Promise { + const events: AgentRuntimeEvent[] = []; + + for await (const event of this.submitMessageStream(conversation, message, abortSignal, options)) { + events.push(event); + await onEvent?.(event); + } return events; } diff --git a/src/cli/run.ts b/src/cli/run.ts index c1334b2..868cf76 100644 --- a/src/cli/run.ts +++ b/src/cli/run.ts @@ -103,13 +103,15 @@ export async function executeRunCommand(context: AppContext, options: RunCommand } else { await session.append({ kind: "message", role: "user", text: options.prompt }); pushJson(jsonEvents, runId, session.sessionId, "user.message", { text: options.prompt, inputType: "prompt" }); - await applyRuntimeEvents({ - events: await runtime.submitMessage(conversation, options.prompt, abortController.signal), - session, - jsonEvents, - runId, - plain: !options.json, - }); + for await (const event of runtime.submitMessageStream(conversation, options.prompt, abortController.signal)) { + await applyRuntimeEvent({ + event, + session, + jsonEvents, + runId, + plain: !options.json, + }); + } } const durationMs = Date.now() - startedAt; @@ -189,17 +191,27 @@ async function applyRuntimeEvents(options: { plain: boolean; }): Promise { for (const event of options.events) { - const payload = runtimeEventToSessionPayload(event); + await applyRuntimeEvent({ ...options, event }); + } +} - if (payload) { - await options.session.append(payload); - } +async function applyRuntimeEvent(options: { + event: AgentRuntimeEvent; + session: SessionHandle; + jsonEvents: RunJsonEvent[]; + runId: string; + plain: boolean; +}): Promise { + const payload = runtimeEventToSessionPayload(options.event); + + if (payload) { + await options.session.append(payload); + } - pushJson(options.jsonEvents, options.runId, options.session.sessionId, event.type, { event }); + pushJson(options.jsonEvents, options.runId, options.session.sessionId, options.event.type, { event: options.event }); - if (options.plain) { - printPlainEvent(event); - } + if (options.plain) { + printPlainEvent(options.event); } } diff --git a/src/tui/shell.ts b/src/tui/shell.ts index 1b1b9e3..d37dd62 100644 --- a/src/tui/shell.ts +++ b/src/tui/shell.ts @@ -174,20 +174,21 @@ export class TopchesterTuiShell implements TuiShell { role: "user", text: message, }); - await this.runtime.submitMessage( + for await (const event of this.runtime.submitMessageStream( app.getConversationTurns(), message, abortController.signal, - async (event) => { - if (event.type === "message" && event.role === "assistant") { - reasoningDisplay?.commit(app); - busy.clearActivity(); - } - await this.applyRuntimeEvents(app, [event], tui); - tui.requestRender(); - }, - { onReasoning: reasoningDisplay?.sink } - ); + { + onReasoning: reasoningDisplay?.sink, + } + )) { + if (event.type === "message" && event.role === "assistant") { + reasoningDisplay?.commit(app); + busy.clearActivity(); + } + await this.applyRuntimeEvents(app, [event], tui); + tui.requestRender(); + } } catch (error) { if (cancelled) { app.addMessage(systemMessage("Response stopped.")); diff --git a/test/commands.test.ts b/test/commands.test.ts index c59d1b7..f810ad4 100644 --- a/test/commands.test.ts +++ b/test/commands.test.ts @@ -502,6 +502,72 @@ describe("slash commands", () => { expect(JSON.stringify(events)).not.toContain("checking local context"); }); + it("collects submitMessage results from the runtime stream path", async () => { + function runtimeWithFinalMessage(workspace: string): TopchesterAgentRuntime { + return new TopchesterAgentRuntime({ + ...createTestContext(workspace), + modelGateway: { + async generateAgentStep() { + return { + text: "Done.", + providerId: "fake", + modelId: "fake-agent", + purpose: "agent.primary" as const, + toolCalls: [], + toolProtocol: "text-json" as const, + protocolAttempts: [], + providerRejectedTools: false, + warnings: [], + openRouterRoutingApplied: false, + }; + }, + } as unknown as AppContext["modelGateway"], + }); + } + + const streamWorkspace = await mkdtemp(join(tmpdir(), "topchester-stream-runtime-")); + const collectorWorkspace = await mkdtemp(join(tmpdir(), "topchester-stream-runtime-")); + const streamed = await collectRuntimeEvents( + runtimeWithFinalMessage(streamWorkspace).submitMessageStream([], "hello") + ); + const collected = await runtimeWithFinalMessage(collectorWorkspace).submitMessage([], "hello"); + + expect(normalizeRuntimeEventsForComparison(collected)).toEqual(normalizeRuntimeEventsForComparison(streamed)); + }); + + it("propagates aborts through the runtime stream path", async () => { + const workspace = await mkdtemp(join(tmpdir(), "topchester-stream-abort-")); + const abortController = new AbortController(); + const runtime = new TopchesterAgentRuntime({ + ...createTestContext(workspace), + modelGateway: { + async generateAgentStep(request: { abortSignal?: AbortSignal }) { + return new Promise((_, reject) => { + if (request.abortSignal?.aborted) { + reject(new DOMException("Aborted", "AbortError")); + return; + } + + request.abortSignal?.addEventListener( + "abort", + () => { + reject(new DOMException("Aborted", "AbortError")); + }, + { once: true } + ); + }); + }, + } as unknown as AppContext["modelGateway"], + }); + + const iterator = runtime.submitMessageStream([], "abort", abortController.signal)[Symbol.asyncIterator](); + const pending = iterator.next(); + abortController.abort(); + + await expect(pending).rejects.toThrow(/Aborted/u); + await expect(iterator.next()).resolves.toEqual({ value: undefined, done: true }); + }); + it("logs each prompt sent to the model at debug level", async () => { const workspace = await mkdtemp(join(tmpdir(), "topchester-commands-")); await writeFile(join(workspace, "notes.txt"), "hello\n"); @@ -1346,3 +1412,19 @@ async function getRuntimeKnowledgeFolderState( ? { exists: event.status.kbExists, isDirectory: event.status.kbIsDirectory } : undefined; } + +async function collectRuntimeEvents(events: AsyncIterable): Promise { + const collected: AgentRuntimeEvent[] = []; + + for await (const event of events) { + collected.push(event); + } + + return collected; +} + +function normalizeRuntimeEventsForComparison(events: AgentRuntimeEvent[]): AgentRuntimeEvent[] { + return events.map((event) => + event.type === "message" && event.meta !== undefined ? { ...event, meta: "" } : event + ); +} diff --git a/test/tui.render.test.ts b/test/tui.render.test.ts index a44d163..6007149 100644 --- a/test/tui.render.test.ts +++ b/test/tui.render.test.ts @@ -1719,6 +1719,14 @@ describe("TUI rendering", () => { async submitSlashCommand() { return []; }, + async *submitMessageStream() { + yield agentEvent.toolCall({ tool: "read_file", args: { path: "README.md" } }, "read_file: README.md"); + firstEventApplied?.(); + await new Promise((resolve) => { + releaseRuntime = resolve; + }); + yield agentEvent.assistantMessage("Done."); + }, async submitMessage(_conversation, _message, _abortSignal, onEvent) { await onEvent?.( agentEvent.toolCall({ tool: "read_file", args: { path: "README.md" } }, "read_file: README.md") @@ -1786,6 +1794,14 @@ describe("TUI rendering", () => { async submitSlashCommand() { return []; }, + async *submitMessageStream(_conversation, _message, _abortSignal, options) { + await options?.onReasoning?.({ type: "delta", text: "checking\nworkspace" }); + reasoningApplied?.(); + await new Promise((resolve) => { + releaseRuntime = resolve; + }); + yield agentEvent.assistantMessage("Done."); + }, async submitMessage(_conversation, _message, _abortSignal, onEvent, options) { await options?.onReasoning?.({ type: "delta", text: "checking\nworkspace" }); reasoningApplied?.(); @@ -1862,6 +1878,14 @@ describe("TUI rendering", () => { async submitSlashCommand() { return []; }, + async *submitMessageStream() { + if (Date.now() > 0) { + throw new Error( + "No endpoints found that can handle the requested parameters.\n at doStream\n at streamStep" + ); + } + yield agentEvent.status("unreachable"); + }, async submitMessage() { throw new Error( "No endpoints found that can handle the requested parameters.\n at doStream\n at streamStep" @@ -1976,6 +2000,9 @@ describe("TUI rendering", () => { async submitSlashCommand() { return []; }, + async *submitMessageStream() { + yield agentEvent.assistantMessage("Ready."); + }, async submitMessage(_conversation, _message, _abortSignal, onEvent) { await onEvent?.(agentEvent.assistantMessage("Ready.")); From 63efdc122a894246f90f11b82bc60ad693a329ef Mon Sep 17 00:00:00 2001 From: Dragan Bajcic Date: Fri, 15 May 2026 11:20:16 +0200 Subject: [PATCH 4/9] feat(session): persist child session trees --- docs/SESSIONS.md | 6 + ...5-15-subagent-session-tree-runtime-plan.md | 16 ++- src/session/store.ts | 105 ++++++++++++++++++ test/session.test.ts | 74 ++++++++++++ 4 files changed, 200 insertions(+), 1 deletion(-) diff --git a/docs/SESSIONS.md b/docs/SESSIONS.md index ab178fd..0284af2 100644 --- a/docs/SESSIONS.md +++ b/docs/SESSIONS.md @@ -67,4 +67,10 @@ For existing sessions that do not have tree fields, loaders treat `source` as `user` and `rootSessionId` as the session's own `sessionId`. That keeps older project-local sessions readable without rewriting their JSONL. +Child sessions are stored as normal session folders under the same project-local +session root. Creating a child session writes the child's own `metadata.json` +with `source: "subagent"` and appends a `subagent_started` reference to the +parent `events.jsonl`. Child events stay in the child session's log, so replay +can load a parent alone, list its direct children, or expand the full tree. + Keep model-facing chat roles separate from UI/runtime events. The TUI can show both, but model context should only include what the agent runtime intentionally selects. diff --git a/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md b/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md index fac0633..b39bd79 100644 --- a/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md +++ b/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md @@ -246,7 +246,7 @@ Verified: ## Slice 3: Add Child Session Persistence -Status: [ ] Not started +Status: [x] Done Goal: @@ -272,6 +272,20 @@ Verification: - Backwards compatibility test loads an old metadata object with no parent fields. - `pnpm check`. +Completed: + +- Added `createChildSession(...)` with child metadata and parent `subagent_started` event recording. +- Added `listChildSessions(...)` and `loadSessionTree(...)` for direct-child and recursive tree loading. +- Kept parent and child logs separate while preserving root session inheritance across nested children. +- Updated session docs with child-session persistence behavior. + +Verified: + +- `pnpm test -- test/session.test.ts` +- `pnpm typecheck` +- `pnpm check` +- `mise run local-ci` + ## Slice 4: Add Agent Profiles And Tool Permission Filtering Status: [ ] Not started diff --git a/src/session/store.ts b/src/session/store.ts index 0be15ac..4ac375c 100644 --- a/src/session/store.ts +++ b/src/session/store.ts @@ -9,6 +9,7 @@ import { toolCallMessage, type ChatMessage } from "../tui/messages.js"; import { SESSION_EVENT_VERSION, SESSION_METADATA_VERSION, + sessionEventPayload, sessionEventSchema, sessionMetadataSchema, type SessionEvent, @@ -34,12 +35,24 @@ export interface LoadedSession { events: SessionEvent[]; } +export interface LoadedSessionTree { + session: LoadedSession; + children: LoadedSessionTree[]; +} + export interface RehydratedSession { messages: ChatMessage[]; status?: string; taskPlan?: TaskPlanState; } +export interface CreateChildSessionOptions { + parent: SessionHandle; + parentToolCallId: string; + agentProfileId?: string; + title?: string; +} + export function generateSessionId(): string { return uuidv7(); } @@ -72,6 +85,53 @@ export async function createSession(workspaceRoot: string): Promise { + validateSessionId(options.parent.sessionId); + const sessionId = generateSessionId(); + const sessionDir = join(getTopchesterSessionsPath(workspaceRoot), sessionId); + const metadataPath = join(sessionDir, "metadata.json"); + const eventsPath = join(sessionDir, "events.jsonl"); + const createdAt = new Date().toISOString(); + const metadata: SessionMetadata = { + version: SESSION_METADATA_VERSION, + sessionId, + rootSessionId: options.parent.metadata.rootSessionId, + parentSessionId: options.parent.sessionId, + parentToolCallId: options.parentToolCallId, + source: "subagent", + ...(options.agentProfileId === undefined ? {} : { agentProfileId: options.agentProfileId }), + ...(options.title === undefined ? {} : { title: options.title }), + workspaceRoot, + createdAt, + updatedAt: createdAt, + lastEventId: 0, + }; + + await mkdir(sessionDir, { recursive: true }); + await writeMetadata(metadataPath, metadata); + await writeFile(eventsPath, "", { flag: "wx" }); + + const child = buildHandle(sessionDir, metadata); + await options.parent.append( + sessionEventPayload.subagentStarted( + { + sessionId: child.sessionId, + parentSessionId: options.parent.sessionId, + parentToolCallId: options.parentToolCallId, + }, + { + ...(options.agentProfileId === undefined ? {} : { agentProfileId: options.agentProfileId }), + ...(options.title === undefined ? {} : { title: options.title }), + } + ) + ); + + return child; +} + export async function loadSessionForAppend(workspaceRoot: string, sessionId: string): Promise { const loaded = await loadSession(workspaceRoot, sessionId); @@ -101,6 +161,51 @@ export async function loadSession(workspaceRoot: string, sessionIdOrLatest: stri return { sessionId, sessionDir, metadata, events }; } +export async function listChildSessions(workspaceRoot: string, parentSessionId: string): Promise { + validateSessionId(parentSessionId); + const sessionsPath = getTopchesterSessionsPath(workspaceRoot); + let entries: string[]; + try { + entries = await readdir(sessionsPath); + } catch { + return []; + } + + const children: LoadedSession[] = []; + + for (const entry of entries.filter((candidate) => SESSION_ID_PATTERN.test(candidate)).sort()) { + const metadataPath = join(sessionsPath, entry, "metadata.json"); + let metadata: SessionMetadata; + try { + metadata = await readMetadata(metadataPath); + } catch { + continue; + } + + if (metadata.parentSessionId !== parentSessionId) { + continue; + } + + children.push(await loadSession(workspaceRoot, entry)); + } + + return children.sort((left, right) => { + const byCreatedAt = left.metadata.createdAt.localeCompare(right.metadata.createdAt); + return byCreatedAt === 0 ? left.sessionId.localeCompare(right.sessionId) : byCreatedAt; + }); +} + +export async function loadSessionTree(workspaceRoot: string, sessionIdOrLatest: string): Promise { + const session = await loadSession(workspaceRoot, sessionIdOrLatest); + const children = await Promise.all( + (await listChildSessions(workspaceRoot, session.sessionId)).map((child) => + loadSessionTree(workspaceRoot, child.sessionId) + ) + ); + + return { session, children }; +} + export async function resolveLatestSessionId(workspaceRoot: string): Promise { const sessionsPath = getTopchesterSessionsPath(workspaceRoot); let entries: string[]; diff --git a/test/session.test.ts b/test/session.test.ts index 2be7388..6bcb8e3 100644 --- a/test/session.test.ts +++ b/test/session.test.ts @@ -4,11 +4,14 @@ import { join } from "node:path"; import { describe, expect, it } from "vitest"; import { getTopchesterSessionsPath } from "../src/app/paths.js"; import { + createChildSession, createSession, ensureSessionStorage, generateSessionId, + listChildSessions, loadSession, loadSessionForAppend, + loadSessionTree, rehydrateSession, resolveLatestSessionId, } from "../src/session/store.js"; @@ -394,6 +397,77 @@ describe("session store", () => { }); }); + it("creates child sessions with parent links and records the parent lifecycle reference", async () => { + const workspace = await tempWorkspace(); + const parent = await createSession(workspace); + + const child = await createChildSession(workspace, { + parent, + parentToolCallId: "task-call-1", + agentProfileId: "explore", + title: "Inspect runtime", + }); + await child.append({ kind: "message", role: "assistant", text: "Child answer" }); + + const loadedParent = await loadSession(workspace, parent.sessionId); + const loadedChild = await loadSession(workspace, child.sessionId); + + expect(loadedParent.events).toEqual([ + expect.objectContaining({ + kind: "subagent_started", + sessionId: child.sessionId, + parentSessionId: parent.sessionId, + parentToolCallId: "task-call-1", + agentProfileId: "explore", + title: "Inspect runtime", + }), + ]); + expect(loadedChild.metadata).toMatchObject({ + sessionId: child.sessionId, + rootSessionId: parent.sessionId, + parentSessionId: parent.sessionId, + parentToolCallId: "task-call-1", + source: "subagent", + agentProfileId: "explore", + title: "Inspect runtime", + }); + expect(loadedChild.events).toEqual([ + expect.objectContaining({ kind: "message", role: "assistant", text: "Child answer" }), + ]); + }); + + it("lists child sessions and loads recursive session trees", async () => { + const workspace = await tempWorkspace(); + const parent = await createSession(workspace); + const firstChild = await createChildSession(workspace, { + parent, + parentToolCallId: "task-call-1", + title: "First", + }); + const secondChild = await createChildSession(workspace, { + parent, + parentToolCallId: "task-call-2", + title: "Second", + }); + const grandchild = await createChildSession(workspace, { + parent: firstChild, + parentToolCallId: "task-call-1-1", + title: "Nested", + }); + + const children = await listChildSessions(workspace, parent.sessionId); + const tree = await loadSessionTree(workspace, parent.sessionId); + + expect(children.map((child) => child.sessionId)).toEqual([firstChild.sessionId, secondChild.sessionId]); + expect(tree.session.sessionId).toBe(parent.sessionId); + expect(tree.children.map((child) => child.session.sessionId)).toEqual([ + firstChild.sessionId, + secondChild.sessionId, + ]); + expect(tree.children[0]?.children.map((child) => child.session.sessionId)).toEqual([grandchild.sessionId]); + expect(tree.children[0]?.children[0]?.session.metadata.rootSessionId).toBe(parent.sessionId); + }); + it("creates parent session folders inside the workspace only", async () => { const workspace = await tempWorkspace(); await mkdir(join(workspace, "nested"), { recursive: true }); From df87186cdf4e0a9786ce2f7c18d6c7621d609e2f Mon Sep 17 00:00:00 2001 From: Dragan Bajcic Date: Fri, 15 May 2026 11:30:16 +0200 Subject: [PATCH 5/9] feat(agent): add profile tool permissions --- docs/ARCHITECTURE.md | 15 ++ ...5-15-subagent-session-tree-runtime-plan.md | 18 ++- src/agent/profiles.ts | 137 ++++++++++++++++++ src/agent/prompts.ts | 120 ++++++++++++--- src/agent/runtime.ts | 12 +- src/agent/tools.ts | 16 ++ src/agent/tools/executor.ts | 15 +- src/agent/tools/registry.ts | 10 +- src/agent/tools/types.ts | 3 + test/tools.test.ts | 51 +++++++ 10 files changed, 367 insertions(+), 30 deletions(-) create mode 100644 src/agent/profiles.ts diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 7c49213..92d71f0 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -262,6 +262,21 @@ Do not add a big global event bus in V0. A global bus can make small apps feel c If plugins, background jobs, or multiple clients need fan-out later, add a scoped event hub around the runtime/session boundary. Keep events named, versioned, and tied to the session log shape. +## Agent Profiles And Tool Permissions + +Runtime turns execute under an agent profile. The primary profile uses the normal +model slot and can see the full registered tool set. Subagent profiles can add +prompt instructions, choose a model slot, and narrow the tool set. + +Tool permissions are enforced twice: + +- prompt/model schema filtering hides denied tools from the model-facing tool list; +- execution-time checks reject denied tools even if a model emits one anyway. + +Permission composition is monotonic for subagents: a child profile can reduce +the parent permission view, but parent-denied tools remain denied and cannot be +reintroduced by the child profile. + ## Future GUI / IDE Path The TUI should be only one client of the same KB-aware runtime. diff --git a/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md b/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md index b39bd79..5afb053 100644 --- a/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md +++ b/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md @@ -288,7 +288,7 @@ Verified: ## Slice 4: Add Agent Profiles And Tool Permission Filtering -Status: [ ] Not started +Status: [x] Done Goal: @@ -319,6 +319,22 @@ Verification: - Existing tool tests pass under the primary profile. - `pnpm check`. +Completed: + +- Added primary and initial subagent profiles with prompt additions, model slots, and tool permission defaults. +- Added `ToolPermissionView` construction with parent-deny inheritance. +- Filtered profile-visible tool prompts and native model tool schemas. +- Enforced tool permissions inside `executeToolCall(...)` so denied tools fail even if emitted by a model. +- Passed the primary profile permission view through normal runtime turns. +- Documented profile and permission composition in `docs/ARCHITECTURE.md`. + +Verified: + +- `pnpm test -- test/tools.test.ts test/commands.test.ts` +- `pnpm typecheck` +- `pnpm check` +- `mise run local-ci` + ## Slice 5: Add `task`/`subagent` Tool Backed By `SubagentManager` Status: [ ] Not started diff --git a/src/agent/profiles.ts b/src/agent/profiles.ts new file mode 100644 index 0000000..da59033 --- /dev/null +++ b/src/agent/profiles.ts @@ -0,0 +1,137 @@ +import { + getToolDefinitionsForPermissions, + toolRegistry, + type RegisteredTool, + type ToolName, +} from "./tools/registry.js"; + +export type AgentProfileMode = "primary" | "subagent" | "all"; +export type ToolPermissionDefault = "allow" | "deny"; + +export interface AgentProfile { + id: string; + displayName: string; + mode: AgentProfileMode; + promptAdditions: string[]; + modelPurpose: "agent.primary" | "agent.fast"; + toolPermissionDefault: ToolPermissionDefault; + allowedTools: readonly ToolName[]; + deniedTools: readonly ToolName[]; +} + +export interface ToolPermissionView { + profileId: string; + defaultPermission: ToolPermissionDefault; + allowedTools: ReadonlySet; + deniedTools: ReadonlySet; +} + +export interface ToolPermissionParentView { + deniedTools?: Iterable; +} + +const READ_ONLY_TOOLS = [ + "read_file", + "list_files", + "grep", + "find_file", + "git_status", + "git_diff", + "git_log", +] as const satisfies readonly ToolName[]; + +export const PRIMARY_AGENT_PROFILE: AgentProfile = { + id: "primary", + displayName: "Primary", + mode: "primary", + promptAdditions: [], + modelPurpose: "agent.primary", + toolPermissionDefault: "allow", + allowedTools: [], + deniedTools: [], +}; + +export const SUBAGENT_PROFILES = [ + { + id: "explore", + displayName: "Explore", + mode: "subagent", + promptAdditions: [ + "You are running as a read-only exploration subagent. Inspect the workspace and return concise findings to the parent agent.", + ], + modelPurpose: "agent.fast", + toolPermissionDefault: "deny", + allowedTools: READ_ONLY_TOOLS, + deniedTools: ["plan_todo"], + }, + { + id: "general", + displayName: "General", + mode: "subagent", + promptAdditions: [ + "You are running as a constrained subagent. Work only on the delegated prompt and return a concise result.", + ], + modelPurpose: "agent.primary", + toolPermissionDefault: "allow", + allowedTools: [], + deniedTools: ["plan_todo"], + }, +] as const satisfies readonly AgentProfile[]; + +export const AGENT_PROFILES = [PRIMARY_AGENT_PROFILE, ...SUBAGENT_PROFILES] as const satisfies readonly AgentProfile[]; + +export function resolveAgentProfile(profileId = PRIMARY_AGENT_PROFILE.id): AgentProfile { + const profile = AGENT_PROFILES.find((candidate) => candidate.id === profileId); + + if (!profile) { + throw new Error(`Unknown agent profile "${profileId}".`); + } + + return profile; +} + +export function createToolPermissionView( + profile: AgentProfile, + parent: ToolPermissionParentView = {} +): ToolPermissionView { + const deniedTools = new Set(profile.deniedTools); + + for (const tool of parent.deniedTools ?? []) { + deniedTools.add(tool); + } + + return { + profileId: profile.id, + defaultPermission: profile.toolPermissionDefault, + allowedTools: new Set(profile.allowedTools), + deniedTools, + }; +} + +export function isToolAllowed(permissionView: ToolPermissionView, toolName: string): toolName is ToolName { + if (!isRegisteredToolName(toolName)) { + return false; + } + + if (permissionView.deniedTools.has(toolName)) { + return false; + } + + if (permissionView.defaultPermission === "deny") { + return permissionView.allowedTools.has(toolName); + } + + return true; +} + +export function getProfileToolDefinitions(permissionView: ToolPermissionView): RegisteredTool[] { + return getToolDefinitionsForPermissions((toolName) => isToolAllowed(permissionView, toolName)); +} + +export function getDeniedToolNames(permissionView: ToolPermissionView): ToolName[] { + return [...permissionView.deniedTools].sort(); +} + +function isRegisteredToolName(toolName: string): toolName is ToolName { + return toolName in toolRegistry; +} diff --git a/src/agent/prompts.ts b/src/agent/prompts.ts index 288d8e2..f21d31e 100644 --- a/src/agent/prompts.ts +++ b/src/agent/prompts.ts @@ -1,10 +1,27 @@ +import { isToolAllowed, PRIMARY_AGENT_PROFILE, type AgentProfile, type ToolPermissionView } from "./profiles.js"; +import { type ToolName } from "./tools.js"; import { getToolPromptLines } from "./tools.js"; -export function getChatSystemPrompt(): string { +export interface ChatSystemPromptOptions { + profile?: AgentProfile; + permissions?: ToolPermissionView; +} + +export function getChatSystemPrompt(options: ChatSystemPromptOptions = {}): string { + const profile = options.profile ?? PRIMARY_AGENT_PROFILE; + const canUseTool = (toolName: ToolName) => + options.permissions ? isToolAllowed(options.permissions, toolName) : true; + const toolPromptLines = options.permissions + ? getToolPromptLines((toolName) => canUseTool(toolName)) + : getToolPromptLines(); + return [ "You are Topchester, a plain-spoken terminal coding agent for software engineering work.", "Your job is to turn ordinary user requests into concrete repository work: inspect the codebase, make focused changes when tools allow it, verify the result when possible, and report the outcome clearly.", "", + `Agent profile: ${profile.displayName} (${profile.id}).`, + ...profile.promptAdditions, + "", "Working style:", "- Start by understanding the user's intent and the surrounding code before proposing or changing anything non-trivial.", "- Prefer local project evidence over assumptions. Use search and read tools to find relevant files, examples, tests, commands, and conventions.", @@ -17,33 +34,90 @@ export function getChatSystemPrompt(): string { "- Ask a clarifying question only when the missing information blocks useful progress or the safe interpretation is genuinely unclear.", "", "You have these tools available:", - ...getToolPromptLines(), + ...toolPromptLines, "", "Tool use:", "- When using a tool, output exactly one tool JSON object and no prose, markdown, or additional JSON. After the tool result, either output the next single tool JSON object or a final plain-text answer.", "- You already have permission to use the available tools to handle the user's request. Do not ask the user to provide tool results or permission to use an available tool.", "- Do not claim to have read, created, edited, staged, committed, or run anything unless a tool result in this turn confirms it.", - "- Use plan_todo for non-trivial multi-step work before the first substantive repository tool call.", - "- Keep plan_todo items short, user-safe, and usually 2 to 6 items. Maintain exactly one in_progress item while work remains, update it after major progress changes, and clear it only when abandoning the plan or when no visible plan is useful.", - "- Do not use plan_todo for simple one-step answers, tiny reads, or trivial edits.", - "- Do not call plan_todo only to summarize completed work before a final answer. If no visible plan is active and the work is done, answer directly.", - "- Use read/search tools when the user asks about files, code, symbols, usages, tests, or project behavior.", - "- Use find_file for path or filename lookup. Use grep for text inside files. If grep output mentions another path, treat that mentioned path as content until find_file or read_file confirms it exists.", - "- Use list_files, grep, find_file, and read_file for exact file listing, search, lookup, and reading tasks.", - "- Use git_status, git_diff, and git_log for Git state, diffs, and history. Prefer these over inspect_command for Git workflow inspection.", - "- Use git_add and git_commit only when the user explicitly asks to stage or commit. Never stage unrelated files, never stage '.', and never commit unless staged paths exactly match the user's request.", - "- Use inspect_command only for quick read-only repo orientation when a short familiar command chain is clearer than several dedicated tool calls.", - "- inspect_command is not a shell. Unsafe commands, shell expansion, scripts, installs, builds, tests, network access, and file mutation are not available through it.", - "- Use read_file before editing a file so your edit is based on current file content and hash metadata.", - "- When passing expected_current_hash to edit_file or write_file, use the current pre-edit/pre-write hash from the latest read_file result for that exact file. Never invent it and never use a predicted after-edit or after-write hash.", - "- Use edit_file for targeted edits to existing files. Make multiple disjoint edits for the same file in one call when possible.", - "- Use write_file to create new files by default. It fails when the file already exists unless you are replacing the whole file with overwrite:true and expected_current_hash from read_file.", - "- When the user asks you to create a new file, call write_file. Do not answer that the file was created until write_file succeeds.", - "- Pass write_file create_parent_dirs:true only when the user intent clearly includes creating that folder path.", - "- Do not use inspect_command for file creation or file mutation.", - "- Keep edit_file old_text small but unique. Do not include line labels or grep prefixes in old_text; use exact file text only.", - "- Use edit/write tools when they are available and the user asks you to implement, fix, add, update, or refactor code.", - "- Use command/test tools when they are available and you need to inspect the environment, run tests, format, lint, typecheck, or verify behavior.", + ...(canUseTool("plan_todo") + ? [ + "- Use plan_todo for non-trivial multi-step work before the first substantive repository tool call.", + "- Keep plan_todo items short, user-safe, and usually 2 to 6 items. Maintain exactly one in_progress item while work remains, update it after major progress changes, and clear it only when abandoning the plan or when no visible plan is useful.", + "- Do not use plan_todo for simple one-step answers, tiny reads, or trivial edits.", + "- Do not call plan_todo only to summarize completed work before a final answer. If no visible plan is active and the work is done, answer directly.", + ] + : []), + ...(canUseTool("read_file") || canUseTool("grep") || canUseTool("find_file") || canUseTool("list_files") + ? ["- Use read/search tools when the user asks about files, code, symbols, usages, tests, or project behavior."] + : []), + ...(canUseTool("find_file") && canUseTool("grep") && canUseTool("read_file") + ? [ + "- Use find_file for path or filename lookup. Use grep for text inside files. If grep output mentions another path, treat that mentioned path as content until find_file or read_file confirms it exists.", + ] + : []), + ...(canUseTool("list_files") && canUseTool("grep") && canUseTool("find_file") && canUseTool("read_file") + ? ["- Use list_files, grep, find_file, and read_file for exact file listing, search, lookup, and reading tasks."] + : []), + ...(canUseTool("git_status") && canUseTool("git_diff") && canUseTool("git_log") + ? [ + "- Use git_status, git_diff, and git_log for Git state, diffs, and history. Prefer these over inspect_command for Git workflow inspection.", + ] + : []), + ...(canUseTool("git_add") && canUseTool("git_commit") + ? [ + "- Use git_add and git_commit only when the user explicitly asks to stage or commit. Never stage unrelated files, never stage '.', and never commit unless staged paths exactly match the user's request.", + ] + : []), + ...(canUseTool("inspect_command") + ? [ + "- Use inspect_command only for quick read-only repo orientation when a short familiar command chain is clearer than several dedicated tool calls.", + "- inspect_command is not a shell. Unsafe commands, shell expansion, scripts, installs, builds, tests, network access, and file mutation are not available through it.", + ] + : []), + ...(canUseTool("edit_file") && canUseTool("read_file") + ? ["- Use read_file before editing a file so your edit is based on current file content and hash metadata."] + : []), + ...(canUseTool("read_file") && (canUseTool("edit_file") || canUseTool("write_file")) + ? [ + "- When passing expected_current_hash to edit_file or write_file, use the current pre-edit/pre-write hash from the latest read_file result for that exact file. Never invent it and never use a predicted after-edit or after-write hash.", + ] + : []), + ...(canUseTool("edit_file") + ? [ + "- Use edit_file for targeted edits to existing files. Make multiple disjoint edits for the same file in one call when possible.", + ] + : []), + ...(canUseTool("write_file") && canUseTool("read_file") + ? [ + "- Use write_file to create new files by default. It fails when the file already exists unless you are replacing the whole file with overwrite:true and expected_current_hash from read_file.", + "- When the user asks you to create a new file, call write_file. Do not answer that the file was created until write_file succeeds.", + "- Pass write_file create_parent_dirs:true only when the user intent clearly includes creating that folder path.", + ] + : []), + ...(canUseTool("write_file") && !canUseTool("read_file") + ? [ + "- Use write_file to create new files by default. It fails when the file already exists unless overwrite:true is available with verified current content.", + "- When the user asks you to create a new file, call write_file. Do not answer that the file was created until write_file succeeds.", + "- Pass write_file create_parent_dirs:true only when the user intent clearly includes creating that folder path.", + ] + : []), + ...(canUseTool("inspect_command") ? ["- Do not use inspect_command for file creation or file mutation."] : []), + ...(canUseTool("edit_file") + ? [ + "- Keep edit_file old_text small but unique. Do not include line labels or grep prefixes in old_text; use exact file text only.", + ] + : []), + ...(canUseTool("edit_file") || canUseTool("write_file") + ? [ + "- Use edit/write tools when they are available and the user asks you to implement, fix, add, update, or refactor code.", + ] + : []), + ...(canUseTool("inspect_command") + ? [ + "- Use command/test tools when they are available and you need to inspect the environment, run tests, format, lint, typecheck, or verify behavior.", + ] + : []), "- After each tool result, decide the next useful action from the new evidence. Continue until the request is handled or blocked.", "Do not make up file contents or search results.", ].join("\n"); diff --git a/src/agent/runtime.ts b/src/agent/runtime.ts index 47607b1..dd94849 100644 --- a/src/agent/runtime.ts +++ b/src/agent/runtime.ts @@ -9,6 +9,7 @@ import { executeSlashCommand, parseSlashCommand } from "./commands.js"; import { type ConversationTurn, buildConversationPrompt } from "./conversation.js"; import { ABORT_CHOICE_VALUE, agentEvent, choiceAction, type AgentRuntimeEvent } from "./events.js"; import { checkAgentReady } from "./health.js"; +import { createToolPermissionView, getProfileToolDefinitions, PRIMARY_AGENT_PROFILE } from "./profiles.js"; import { getChatSystemPrompt } from "./prompts.js"; import { createTaskPlanController, hasOpenTaskPlan, type TaskPlanState } from "./task-plan.js"; import { type ModelAgentResult, type ModelReasoningSink } from "../model/index.js"; @@ -16,7 +17,6 @@ import { executeToolCall, isToolErrorResult, parseToolCallWithSource, - toolRegistry, type ModelToolCall, type ToolCall, type ToolExecutionResult, @@ -131,6 +131,9 @@ export class TopchesterAgentRuntime implements AgentRuntime { let nextPrompt = prompt; let totalDurationMs = 0; const tokenUsageTotals: TurnTokenUsageTotals = {}; + const profile = PRIMARY_AGENT_PROFILE; + const permissions = createToolPermissionView(profile); + const tools = getProfileToolDefinitions(permissions); let lastModelId = "model"; let afterTool: ToolCall["tool"] | undefined; let toolProtocolOverride = readToolProtocolEnvOverride(); @@ -138,7 +141,7 @@ export class TopchesterAgentRuntime implements AgentRuntime { for (let toolCalls = 0; toolCalls <= MAX_TOOL_CALLS_PER_TURN; toolCalls += 1) { const startedAt = Date.now(); - const system = getChatSystemPrompt(); + const system = getChatSystemPrompt({ profile, permissions }); this.context.logger.debug( { event: "model_prompt", @@ -159,6 +162,7 @@ export class TopchesterAgentRuntime implements AgentRuntime { abortSignal, toolProtocol: toolProtocolOverride, onReasoning: options.onReasoning, + tools, }); const durationMs = Date.now() - startedAt; const toolCall = result.toolCalls[0]; @@ -263,6 +267,8 @@ export class TopchesterAgentRuntime implements AgentRuntime { const toolResult = await executeToolCall(this.context.workspaceRoot, executableToolCall, { logger: this.context.logger, taskPlan: this.taskPlan, + profile, + permissions, }); yield agentEvent.toolCall(executableToolCall, formatToolCallMessage(executableToolCall, toolResult)); if (!isToolErrorResult(toolResult) && toolResult.tool === "plan_todo") { @@ -430,12 +436,12 @@ async function generateAgentStep( abortSignal?: AbortSignal; toolProtocol?: ToolProtocolOverride; onReasoning?: ModelReasoningSink; + tools: ReturnType; } ): Promise { if ("generateAgentStep" in context.modelGateway && typeof context.modelGateway.generateAgentStep === "function") { return context.modelGateway.generateAgentStep({ ...request, - tools: Object.values(toolRegistry), }); } diff --git a/src/agent/tools.ts b/src/agent/tools.ts index 3436e92..3d080b9 100644 --- a/src/agent/tools.ts +++ b/src/agent/tools.ts @@ -104,8 +104,24 @@ export { export { parseNativeToolCall, parseToolCall, parseToolCallWithSource } from "./tools/parser.js"; export { toAiSdkToolSet } from "./tools/ai-sdk-tools.js"; export { readFileTool, readWorkspaceFile, type ReadFileToolArgs, type ReadFileToolCall } from "./tools/read-file.js"; +export { + AGENT_PROFILES, + PRIMARY_AGENT_PROFILE, + SUBAGENT_PROFILES, + createToolPermissionView, + getDeniedToolNames, + getProfileToolDefinitions, + isToolAllowed, + resolveAgentProfile, + type AgentProfile, + type AgentProfileMode, + type ToolPermissionDefault, + type ToolPermissionParentView, + type ToolPermissionView, +} from "./profiles.js"; export { getToolDefinition, + getToolDefinitionsForPermissions, getToolPromptLines, isToolName, toolRegistry, diff --git a/src/agent/tools/executor.ts b/src/agent/tools/executor.ts index b1d558f..c197c5d 100644 --- a/src/agent/tools/executor.ts +++ b/src/agent/tools/executor.ts @@ -1,4 +1,5 @@ -import { getToolDefinition, type ToolCall, type ToolResult } from "./registry.js"; +import { isToolAllowed, type AgentProfile, type ToolPermissionView } from "../profiles.js"; +import { getToolDefinition, isToolName, type ToolCall, type ToolResult } from "./registry.js"; import { type ToolDefinition, type ToolContext, type ToolExecutionResult } from "./types.js"; import { type Logger } from "pino"; import { type TaskPlanController } from "../task-plan.js"; @@ -7,6 +8,8 @@ export interface ExecuteToolCallOptions { pathEnv?: string; logger?: Logger; taskPlan?: TaskPlanController; + profile?: AgentProfile; + permissions?: ToolPermissionView; } type RuntimeToolDefinition = ToolDefinition; @@ -22,9 +25,19 @@ export async function executeToolCall( pathEnv: options.pathEnv, logger: options.logger, taskPlan: options.taskPlan, + profile: options.profile, + permissions: options.permissions, }; try { + if (!isToolName(call.tool)) { + throw new Error(`Unknown tool "${call.tool}".`); + } + + if (options.permissions && !isToolAllowed(options.permissions, call.tool)) { + throw new Error(`Tool "${call.tool}" is not allowed for agent profile "${options.permissions.profileId}".`); + } + const definition = getToolDefinition(call.tool) as RuntimeToolDefinition; const parsedCall = { ...call, args: definition.argsSchema.parse(call.args) } as ToolCall; diff --git a/src/agent/tools/registry.ts b/src/agent/tools/registry.ts index 231f435..fb30c15 100644 --- a/src/agent/tools/registry.ts +++ b/src/agent/tools/registry.ts @@ -38,6 +38,12 @@ export function getToolDefinition(name: Name): (typeof to return toolRegistry[name]; } -export function getToolPromptLines(): string[] { - return Object.values(toolRegistry).map((tool) => tool.prompt); +export function getToolPromptLines(filter?: (toolName: ToolName) => boolean): string[] { + return getToolDefinitionsForPermissions(filter).map((tool) => tool.prompt); +} + +export function getToolDefinitionsForPermissions(filter?: (toolName: ToolName) => boolean): RegisteredTool[] { + return Object.entries(toolRegistry) + .filter(([name]) => filter?.(name as ToolName) ?? true) + .map(([, tool]) => tool); } diff --git a/src/agent/tools/types.ts b/src/agent/tools/types.ts index 4fbadd7..3ae8da9 100644 --- a/src/agent/tools/types.ts +++ b/src/agent/tools/types.ts @@ -1,12 +1,15 @@ import { type z } from "zod"; import { type Logger } from "pino"; import { type TaskPlanController } from "../task-plan.js"; +import { type AgentProfile, type ToolPermissionView } from "../profiles.js"; export interface ToolContext { workspaceRoot: string; pathEnv?: string; logger?: Logger; taskPlan?: TaskPlanController; + profile?: AgentProfile; + permissions?: ToolPermissionView; } export interface ToolCall { diff --git a/test/tools.test.ts b/test/tools.test.ts index a2e055d..3c8c23d 100644 --- a/test/tools.test.ts +++ b/test/tools.test.ts @@ -5,15 +5,19 @@ import { join } from "node:path"; import { describe, expect, it } from "vitest"; import { applyExactEdits, + createToolPermissionView, editWorkspaceFile, executeToolCall, findWorkspaceFilesByName, + getProfileToolDefinitions, getToolPromptLines, grepWorkspace, isToolErrorResult, + isToolAllowed, listWorkspaceFiles, parseToolCall, parseToolCallWithSource, + resolveAgentProfile, readWorkspaceFile, toAiSdkToolSet, writeWorkspaceFile, @@ -306,6 +310,53 @@ describe("agent tools", () => { expect(result.content).toContain("plan_todo requires runtime task-plan state"); }); + it("filters denied tools out of profile prompts and native tool schemas", () => { + const profile = resolveAgentProfile("explore"); + const permissions = createToolPermissionView(profile); + const prompt = getChatSystemPrompt({ profile, permissions }); + const nativeTools = toAiSdkToolSet(getProfileToolDefinitions(permissions)); + + expect(prompt).toContain("Agent profile: Explore (explore)."); + expect(prompt).not.toContain("plan_todo:"); + expect(prompt).not.toContain("edit_file:"); + expect(prompt).not.toContain("write_file:"); + expect(prompt).not.toContain("git_commit:"); + expect(Object.keys(nativeTools).sort()).toEqual([ + "find_file", + "git_diff", + "git_log", + "git_status", + "grep", + "list_files", + "read_file", + ]); + }); + + it("rejects denied tools at execution time", async () => { + const workspace = await mkdtemp(join(tmpdir(), "topchester-tools-")); + const permissions = createToolPermissionView(resolveAgentProfile("explore")); + const call = parseToolCall( + '{"tool":"edit_file","args":{"path":"a.txt","edits":[{"old_text":"a","new_text":"b"}]}}' + ); + + if (!call) { + throw new Error("Expected edit_file tool call to parse."); + } + + const result = await executeToolCall(workspace, call, { permissions }); + + expect(isToolErrorResult(result)).toBe(true); + expect(result.content).toContain('Tool "edit_file" is not allowed for agent profile "explore"'); + }); + + it("inherits parent denied tools when building child profile permissions", () => { + const profile = resolveAgentProfile("explore"); + const permissions = createToolPermissionView(profile, { deniedTools: ["read_file"] }); + + expect(isToolAllowed(permissions, "read_file")).toBe(false); + expect(isToolAllowed(permissions, "grep")).toBe(true); + }); + it("gets model prompt lines from the tool registry", () => { expect(getToolPromptLines()).toEqual([ 'plan_todo: replace the visible session task plan for non-trivial multi-step work; keep 2-6 short items, exactly one in_progress item while work remains, and use [] only to clear. Do not use plan_todo just to report completed work before a final answer. To use it, reply with only JSON: {"tool":"plan_todo","args":{"items":[{"text":"Inspect relevant files","status":"in_progress"},{"text":"Implement focused change","status":"pending"}]}}', From b0f77253e22f302b9e20e8d65e346706cfb8f05d Mon Sep 17 00:00:00 2001 From: Dragan Bajcic Date: Fri, 15 May 2026 11:41:56 +0200 Subject: [PATCH 6/9] feat(agent): add task subagent tool --- docs/ARCHITECTURE.md | 5 + docs/cli.md | 2 + ...5-15-subagent-session-tree-runtime-plan.md | 18 ++- docs/tui.md | 3 + src/agent/profiles.ts | 4 +- src/agent/runtime.ts | 113 ++++++++++++++-- src/agent/subagents.ts | 124 ++++++++++++++++++ src/agent/tools.ts | 1 + src/agent/tools/executor.ts | 10 ++ src/agent/tools/registry.ts | 2 + src/agent/tools/task.ts | 55 ++++++++ src/agent/tools/types.ts | 5 + src/cli/run.ts | 14 +- src/session/runtime-payloads.ts | 74 +++++++++++ src/session/store.ts | 29 ++-- src/tui/layout.ts | 1 + src/tui/messages.ts | 38 +++++- src/tui/runtime-events.ts | 56 +++++++- src/tui/shell.ts | 80 ++--------- test/commands.test.ts | 81 ++++++++++++ test/tools.test.ts | 1 + test/tui.render.test.ts | 33 ++++- 22 files changed, 639 insertions(+), 110 deletions(-) create mode 100644 src/agent/subagents.ts create mode 100644 src/agent/tools/task.ts create mode 100644 src/session/runtime-payloads.ts diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 92d71f0..279abba 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -277,6 +277,11 @@ Permission composition is monotonic for subagents: a child profile can reduce the parent permission view, but parent-denied tools remain denied and cannot be reintroduced by the child profile. +The `task` tool is the first subagent entrypoint. It creates a real child +session, runs the delegated prompt under a subagent profile, forwards child +runtime events to the parent stream, and returns one bounded result to the +parent model. + ## Future GUI / IDE Path The TUI should be only one client of the same KB-aware runtime. diff --git a/docs/cli.md b/docs/cli.md index a9b628f..45b904b 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -77,6 +77,7 @@ Current behavior: - The coding loop can use workspace-scoped file tools: `read_file`, `list_files`, `grep`, `find_file`, `edit_file`, `write_file`, and `inspect_command`. - The coding loop can use structured Git tools: `git_status`, `git_diff`, `git_log`, `git_add`, and `git_commit`. - The coding loop can use `plan_todo` to keep a visible session-only task plan during non-trivial multi-step work. Completed-only `plan_todo` text emitted with a final answer is ignored when no visible plan is open, so accidental closed-plan updates do not render as raw chat text. +- The coding loop can use `task` to delegate focused read-only exploration or isolated analysis to a child agent session. The parent receives a bounded task result while child events are persisted in the child session log and forwarded as runtime events. - `git_status`, `git_diff`, and `git_log` are the preferred path for Git state, diffs, and recent history. `inspect_command` can still inspect read-only Git commands, but it is an orientation fallback rather than the normal Git workflow. - `git_add` stages only explicit paths whose current status was acknowledged. It rejects broad pathspecs such as `.` and does not stage unrelated files by default. - `git_commit` commits only when staged paths exactly match `expected_staged_paths`. The model prompt still tells the agent not to stage or commit unless the user explicitly asks. @@ -109,6 +110,7 @@ Current behavior: - Emits startup KB status before the prompt runs. - Persists user messages and runtime events to the session log. - Persists `plan_todo` task-plan events to the session log. Resume restores the latest visible plan without adding task-plan rows to future model context. +- Persists child `task` sessions separately under the same project-local session root and records parent-child links in session metadata. - Includes a per-run `runId` in structured logs when `TOPCHESTER_LOG_LEVEL` enables logging. - Routes slash-command prompts such as `/kb status` through the same command dispatcher used by the TUI. - Does not open the interactive TUI. diff --git a/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md b/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md index 5afb053..d65f2e7 100644 --- a/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md +++ b/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md @@ -337,7 +337,7 @@ Verified: ## Slice 5: Add `task`/`subagent` Tool Backed By `SubagentManager` -Status: [ ] Not started +Status: [x] Done Goal: @@ -371,6 +371,22 @@ Verification: - TUI render test covers running, completed, and failed child task states. - `pnpm check`. +Completed: + +- Added the `task` tool and registered it in the model-facing tool registry. +- Added `SubagentManager` as a runtime service that creates child sessions, runs child runtimes, forwards child events, persists child logs, and returns bounded parent-visible results. +- Passed session, permission, abort, tool-call id, event sink, and subagent-manager capabilities through tool execution context. +- Rendered subagent lifecycle and forwarded child message/tool events as nested task rows in the TUI. +- Moved runtime-event persistence mapping into a session helper so parent and child logs use one mapping. +- Updated CLI/TUI/architecture docs for `task` and child-session behavior. + +Verified: + +- `pnpm test -- test/tools.test.ts test/commands.test.ts test/session.test.ts test/tui.render.test.ts` +- `pnpm typecheck` +- `pnpm check` +- `mise run local-ci` + ## Slice 6: Add Parallel Execution For `task` Only Status: [ ] Not started diff --git a/docs/tui.md b/docs/tui.md index 4e4bfd0..4ece57c 100644 --- a/docs/tui.md +++ b/docs/tui.md @@ -90,6 +90,7 @@ If the KB is missing, empty, misconfigured, or not current, the startup KB statu The agent can use these workspace-scoped tools from the TUI: +- `task` — delegate focused read-only exploration or isolated analysis to a child agent session. - `plan_todo` — replace the visible session task plan for multi-step work. - `read_file` — read a UTF-8 file inside the workspace and return hash metadata. - `list_files` — list files and folders inside a workspace folder. @@ -107,6 +108,8 @@ Examples: ```text plan_todo: 3 items, 1 active read_file: README.md +↳ task: Inspect runtime (running) +↳ task: Inspect runtime (completed) edit_file: src/example.ts (changed +1/-1) inspect_command: pwd && rg --files docs/plans | head -20 ``` diff --git a/src/agent/profiles.ts b/src/agent/profiles.ts index da59033..f35c3ad 100644 --- a/src/agent/profiles.ts +++ b/src/agent/profiles.ts @@ -62,7 +62,7 @@ export const SUBAGENT_PROFILES = [ modelPurpose: "agent.fast", toolPermissionDefault: "deny", allowedTools: READ_ONLY_TOOLS, - deniedTools: ["plan_todo"], + deniedTools: ["task", "plan_todo"], }, { id: "general", @@ -74,7 +74,7 @@ export const SUBAGENT_PROFILES = [ modelPurpose: "agent.primary", toolPermissionDefault: "allow", allowedTools: [], - deniedTools: ["plan_todo"], + deniedTools: ["task", "plan_todo"], }, ] as const satisfies readonly AgentProfile[]; diff --git a/src/agent/runtime.ts b/src/agent/runtime.ts index dd94849..df3ac1e 100644 --- a/src/agent/runtime.ts +++ b/src/agent/runtime.ts @@ -9,9 +9,18 @@ import { executeSlashCommand, parseSlashCommand } from "./commands.js"; import { type ConversationTurn, buildConversationPrompt } from "./conversation.js"; import { ABORT_CHOICE_VALUE, agentEvent, choiceAction, type AgentRuntimeEvent } from "./events.js"; import { checkAgentReady } from "./health.js"; -import { createToolPermissionView, getProfileToolDefinitions, PRIMARY_AGENT_PROFILE } from "./profiles.js"; +import { + createToolPermissionView, + getProfileToolDefinitions, + isToolAllowed, + PRIMARY_AGENT_PROFILE, + type AgentProfile, + type ToolPermissionView, +} from "./profiles.js"; import { getChatSystemPrompt } from "./prompts.js"; +import { SubagentManager } from "./subagents.js"; import { createTaskPlanController, hasOpenTaskPlan, type TaskPlanState } from "./task-plan.js"; +import { type SessionHandle } from "../session/store.js"; import { type ModelAgentResult, type ModelReasoningSink } from "../model/index.js"; import { executeToolCall, @@ -56,10 +65,14 @@ export type AgentRuntimeEventSink = (event: AgentRuntimeEvent) => void | Promise export interface AgentRuntimeSubmitMessageOptions { onReasoning?: ModelReasoningSink; + session?: SessionHandle; } export interface TopchesterAgentRuntimeOptions { disableL1Context?: boolean; + profile?: AgentProfile; + parentPermissions?: ToolPermissionView; + session?: SessionHandle; } export class TopchesterAgentRuntime implements AgentRuntime { @@ -131,9 +144,25 @@ export class TopchesterAgentRuntime implements AgentRuntime { let nextPrompt = prompt; let totalDurationMs = 0; const tokenUsageTotals: TurnTokenUsageTotals = {}; - const profile = PRIMARY_AGENT_PROFILE; - const permissions = createToolPermissionView(profile); + const profile = this.options.profile ?? PRIMARY_AGENT_PROFILE; + const permissions = createToolPermissionView(profile, { + deniedTools: this.options.parentPermissions?.deniedTools, + }); const tools = getProfileToolDefinitions(permissions); + const session = options.session ?? this.options.session; + const subagents = new SubagentManager({ + context: this.context, + parentSession: session, + parentProfile: profile, + parentPermissions: permissions, + createRuntime: ({ profile: childProfile, parentPermissions, session: childSession }) => + new TopchesterAgentRuntime(this.context, { + ...this.options, + profile: childProfile, + parentPermissions, + session: childSession, + }), + }); let lastModelId = "model"; let afterTool: ToolCall["tool"] | undefined; let toolProtocolOverride = readToolProtocolEnvOverride(); @@ -264,12 +293,25 @@ export class TopchesterAgentRuntime implements AgentRuntime { return; } - const toolResult = await executeToolCall(this.context.workspaceRoot, executableToolCall, { + const toolEventQueue = createRuntimeEventQueue(); + const toolResultPromise = executeToolCall(this.context.workspaceRoot, executableToolCall, { logger: this.context.logger, taskPlan: this.taskPlan, profile, permissions, + subagents, + abortSignal, + toolCallId: toolCall.id, + eventSink: (event) => toolEventQueue.push(event), + }).finally(() => { + toolEventQueue.close(); }); + + for await (const event of toolEventQueue) { + yield event; + } + + const toolResult = await toolResultPromise; yield agentEvent.toolCall(executableToolCall, formatToolCallMessage(executableToolCall, toolResult)); if (!isToolErrorResult(toolResult) && toolResult.tool === "plan_todo") { yield agentEvent.taskPlan(toolResult.plan); @@ -277,7 +319,8 @@ export class TopchesterAgentRuntime implements AgentRuntime { afterTool = executableToolCall.tool; nextPrompt = `${nextPrompt}\n\n${formatToolResultForPrompt(toolResult)}\n\n${formatContinuationInstruction( result.toolProtocol, - toolResult + toolResult, + isToolAllowed(permissions, "plan_todo") )}`; } @@ -420,6 +463,46 @@ export class TopchesterAgentRuntime implements AgentRuntime { } } +interface RuntimeEventQueue { + push(event: AgentRuntimeEvent): void; + close(): void; + [Symbol.asyncIterator](): AsyncIterator; +} + +function createRuntimeEventQueue(): RuntimeEventQueue { + const events: AgentRuntimeEvent[] = []; + let closed = false; + let notify: (() => void) | undefined; + + return { + push(event) { + events.push(event); + notify?.(); + notify = undefined; + }, + + close() { + closed = true; + notify?.(); + notify = undefined; + }, + + async *[Symbol.asyncIterator]() { + while (!closed || events.length > 0) { + const event = events.shift(); + if (event) { + yield event; + continue; + } + + await new Promise((resolve) => { + notify = resolve; + }); + } + }, + }; +} + /** * Calls the configured model gateway for a single agent step and normalizes * the result into the newer `ModelAgentResult` shape. Gateways that implement @@ -635,6 +718,10 @@ function formatToolResultForPrompt(result: ToolExecutionResult): str return [`Tool result from ${result.tool}:`, result.content].join("\n"); } + if (result.tool === "task") { + return [`Tool result from ${result.tool}:`, result.content].join("\n"); + } + if (result.tool === "edit_file") { return [ `Tool result from ${result.tool}${path}:`, @@ -770,7 +857,11 @@ function formatToolResultForPrompt(result: ToolExecutionResult): str * restates the current tool-call protocol so the next model step remains * parseable by the runtime. */ -function formatContinuationInstruction(protocol: ToolProtocol, result: ToolExecutionResult): string { +function formatContinuationInstruction( + protocol: ToolProtocol, + result: ToolExecutionResult, + canUsePlanTodo = true +): string { const toolInstruction = protocol === "text-xml" ? "If another tool is needed, reply with only one XML tool call." @@ -785,8 +876,10 @@ function formatContinuationInstruction(protocol: ToolProtocol, result: ToolExecu return [ "Continue the user's request using the tool result above and the visible plan when one is active.", resultInstruction, - "Update plan_todo after major progress changes.", - "Before a final answer, close the visible plan by calling plan_todo with all finished items marked completed, or with [] if abandoning the plan.", + canUsePlanTodo ? "Update plan_todo after major progress changes." : "", + canUsePlanTodo + ? "Before a final answer, close the visible plan by calling plan_todo with all finished items marked completed, or with [] if abandoning the plan." + : "", toolInstruction, "Otherwise answer the user. Do not guess.", ] @@ -833,6 +926,10 @@ function formatToolCallMessage(call: ToolCall, result?: ToolExecutionResult; +} + +export interface SubagentManagerOptions { + context: AppContext; + parentSession?: SessionHandle; + parentProfile: AgentProfile; + parentPermissions: ToolPermissionView; + createRuntime(options: { + profile: AgentProfile; + parentPermissions: ToolPermissionView; + session: SessionHandle; + }): SubagentRuntime; +} + +export interface RunSubagentTaskOptions { + description: string; + prompt: string; + subagentType?: string; + taskId?: string; + parentToolCallId: string; + eventSink?: (event: AgentRuntimeEvent) => void | Promise; + abortSignal?: AbortSignal; +} + +export interface SubagentTaskRunResult { + sessionId: string; + status: "completed" | "failed"; + result: string; + profileId: string; +} + +export class SubagentManager { + constructor(private readonly options: SubagentManagerOptions) {} + + async runTask(options: RunSubagentTaskOptions): Promise { + const parentSession = this.options.parentSession; + + if (!parentSession) { + throw new Error("task requires an active persisted session."); + } + + const profile = resolveAgentProfile(options.subagentType ?? "explore"); + if (profile.mode !== "subagent" && profile.mode !== "all") { + throw new Error(`Agent profile "${profile.id}" cannot be used for subagent tasks.`); + } + + const child = await createChildSession(this.options.context.workspaceRoot, { + parent: parentSession, + parentToolCallId: options.parentToolCallId, + agentProfileId: profile.id, + title: options.description, + recordParentEvent: false, + }); + const reference = { + sessionId: child.sessionId, + parentSessionId: parentSession.sessionId, + parentToolCallId: options.parentToolCallId, + }; + + await options.eventSink?.( + agentEvent.subagentStarted({ + ...reference, + agentProfileId: profile.id, + title: options.description, + }) + ); + + const childRuntime = this.options.createRuntime({ + profile, + parentPermissions: this.options.parentPermissions, + session: child, + }); + let finalResponse = ""; + + try { + for await (const childEvent of childRuntime.submitMessageStream([], options.prompt, options.abortSignal, { + session: child, + })) { + const payload = runtimeEventToSessionPayload(childEvent); + if (payload) { + await child.append(payload); + } + if (childEvent.type === "message" && childEvent.role === "assistant") { + finalResponse = childEvent.text; + } + await options.eventSink?.(agentEvent.subagentEvent(reference, childEvent)); + } + + const result = finalResponse.trim() || "Subagent completed without an assistant response."; + await options.eventSink?.(agentEvent.subagentCompleted({ ...reference, result })); + + return { + sessionId: child.sessionId, + status: "completed", + result, + profileId: profile.id, + }; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + await options.eventSink?.(agentEvent.subagentFailed({ ...reference, error: message })); + + return { + sessionId: child.sessionId, + status: "failed", + result: message, + profileId: profile.id, + }; + } + } +} diff --git a/src/agent/tools.ts b/src/agent/tools.ts index 3d080b9..46c4145 100644 --- a/src/agent/tools.ts +++ b/src/agent/tools.ts @@ -1,4 +1,5 @@ export { executeToolCall, type ExecuteToolCallOptions } from "./tools/executor.js"; +export { taskArgsSchema, taskTool, type TaskToolArgs, type TaskToolCall, type TaskToolResult } from "./tools/task.js"; export { planTodoTool, type PlanTodoToolCall, type PlanTodoToolResult } from "./tools/plan-todo.js"; export { applyTaskPlanUpdate, diff --git a/src/agent/tools/executor.ts b/src/agent/tools/executor.ts index c197c5d..17d66a6 100644 --- a/src/agent/tools/executor.ts +++ b/src/agent/tools/executor.ts @@ -1,4 +1,6 @@ import { isToolAllowed, type AgentProfile, type ToolPermissionView } from "../profiles.js"; +import { type AgentRuntimeEvent } from "../events.js"; +import { type SubagentManager } from "../subagents.js"; import { getToolDefinition, isToolName, type ToolCall, type ToolResult } from "./registry.js"; import { type ToolDefinition, type ToolContext, type ToolExecutionResult } from "./types.js"; import { type Logger } from "pino"; @@ -10,6 +12,10 @@ export interface ExecuteToolCallOptions { taskPlan?: TaskPlanController; profile?: AgentProfile; permissions?: ToolPermissionView; + subagents?: SubagentManager; + eventSink?: (event: AgentRuntimeEvent) => void | Promise; + abortSignal?: AbortSignal; + toolCallId?: string; } type RuntimeToolDefinition = ToolDefinition; @@ -27,6 +33,10 @@ export async function executeToolCall( taskPlan: options.taskPlan, profile: options.profile, permissions: options.permissions, + subagents: options.subagents, + eventSink: options.eventSink, + abortSignal: options.abortSignal, + toolCallId: options.toolCallId, }; try { diff --git a/src/agent/tools/registry.ts b/src/agent/tools/registry.ts index fb30c15..ad8e61f 100644 --- a/src/agent/tools/registry.ts +++ b/src/agent/tools/registry.ts @@ -6,10 +6,12 @@ import { inspectCommandTool } from "./inspect-command.js"; import { listFilesTool } from "./list-files.js"; import { planTodoTool } from "./plan-todo.js"; import { readFileTool } from "./read-file.js"; +import { taskTool } from "./task.js"; import { type ToolCallForDefinition, type ToolResultForDefinition } from "./types.js"; import { writeFileTool } from "./write-file.js"; export const toolRegistry = { + [taskTool.name]: taskTool, [planTodoTool.name]: planTodoTool, [readFileTool.name]: readFileTool, [listFilesTool.name]: listFilesTool, diff --git a/src/agent/tools/task.ts b/src/agent/tools/task.ts new file mode 100644 index 0000000..baa2394 --- /dev/null +++ b/src/agent/tools/task.ts @@ -0,0 +1,55 @@ +import { z } from "zod"; +import { defineTool, type ToolCall, type ToolResult } from "./types.js"; + +export const taskArgsSchema = z.object({ + description: z.string().min(1), + prompt: z.string().min(1), + subagent_type: z.string().optional(), + task_id: z.string().optional(), +}); + +export type TaskToolArgs = z.infer; +export type TaskToolCall = ToolCall<"task", TaskToolArgs>; + +export interface TaskToolResult extends ToolResult<"task"> { + childSessionId: string; + status: "completed" | "failed"; + profileId: string; +} + +export const taskTool = defineTool({ + name: "task", + description: "Delegate a focused prompt to a constrained child agent session.", + prompt: + 'task: delegate focused read-only research or isolated analysis to a child agent session. Use it when parallel context gathering would help. To use it, reply with only JSON: {"tool":"task","args":{"description":"Inspect runtime event flow","prompt":"Read the runtime and summarize how events are emitted.","subagent_type":"explore"}}', + argsSchema: taskArgsSchema, + async execute(context, args): Promise { + if (!context.subagents) { + throw new Error("task requires a runtime subagent manager."); + } + + const result = await context.subagents.runTask({ + description: args.description, + prompt: args.prompt, + subagentType: args.subagent_type, + taskId: args.task_id, + parentToolCallId: context.toolCallId ?? args.task_id ?? "task", + eventSink: context.eventSink, + abortSignal: context.abortSignal, + }); + + return { + tool: "task", + childSessionId: result.sessionId, + status: result.status, + profileId: result.profileId, + content: [ + `Task ${result.status}: ${args.description}`, + `child_session: ${result.sessionId}`, + `profile: ${result.profileId}`, + "", + result.result, + ].join("\n"), + }; + }, +}); diff --git a/src/agent/tools/types.ts b/src/agent/tools/types.ts index 3ae8da9..a8fc988 100644 --- a/src/agent/tools/types.ts +++ b/src/agent/tools/types.ts @@ -2,6 +2,7 @@ import { type z } from "zod"; import { type Logger } from "pino"; import { type TaskPlanController } from "../task-plan.js"; import { type AgentProfile, type ToolPermissionView } from "../profiles.js"; +import { type SubagentManager } from "../subagents.js"; export interface ToolContext { workspaceRoot: string; @@ -10,6 +11,10 @@ export interface ToolContext { taskPlan?: TaskPlanController; profile?: AgentProfile; permissions?: ToolPermissionView; + subagents?: SubagentManager; + eventSink?: (event: import("../events.js").AgentRuntimeEvent) => void | Promise; + abortSignal?: AbortSignal; + toolCallId?: string; } export interface ToolCall { diff --git a/src/cli/run.ts b/src/cli/run.ts index 868cf76..ccb3455 100644 --- a/src/cli/run.ts +++ b/src/cli/run.ts @@ -15,11 +15,8 @@ import { rehydrateSession, type SessionHandle, } from "../session/store.js"; -import { - runtimeEventToSessionPayload, - slashCommandToSessionPayload, - chatMessageToSessionPayload, -} from "../tui/shell.js"; +import { slashCommandToSessionPayload, chatMessageToSessionPayload } from "../tui/shell.js"; +import { runtimeEventToSessionPayload as runtimeEventToSessionPayloadFromSession } from "../session/runtime-payloads.js"; import { getStartupThreadMessages } from "../tui/status.js"; export interface RunCommandOptions { @@ -103,7 +100,9 @@ export async function executeRunCommand(context: AppContext, options: RunCommand } else { await session.append({ kind: "message", role: "user", text: options.prompt }); pushJson(jsonEvents, runId, session.sessionId, "user.message", { text: options.prompt, inputType: "prompt" }); - for await (const event of runtime.submitMessageStream(conversation, options.prompt, abortController.signal)) { + for await (const event of runtime.submitMessageStream(conversation, options.prompt, abortController.signal, { + session, + })) { await applyRuntimeEvent({ event, session, @@ -167,6 +166,7 @@ async function loadConversation(workspaceRoot: string, resume: string): Promise< case "system": case "thinking": case "tool_call": + case "subagent": case "modal": return []; } @@ -202,7 +202,7 @@ async function applyRuntimeEvent(options: { runId: string; plain: boolean; }): Promise { - const payload = runtimeEventToSessionPayload(options.event); + const payload = runtimeEventToSessionPayloadFromSession(options.event); if (payload) { await options.session.append(payload); diff --git a/src/session/runtime-payloads.ts b/src/session/runtime-payloads.ts new file mode 100644 index 0000000..da902e3 --- /dev/null +++ b/src/session/runtime-payloads.ts @@ -0,0 +1,74 @@ +import { type AgentRuntimeEvent } from "../agent/events.js"; +import { type SessionEventPayload } from "./events.js"; + +export function runtimeEventToSessionPayload(event: AgentRuntimeEvent): SessionEventPayload | undefined { + switch (event.type) { + case "message": + return { + kind: "message", + role: event.role, + text: event.text, + ...(event.meta === undefined ? {} : { meta: event.meta }), + }; + case "tool_call": + return { + kind: "tool_call", + label: event.label, + call: event.call as unknown as Record, + }; + case "task_plan": + return { + kind: "task_plan", + items: event.plan.items, + updatedAt: event.plan.updatedAt, + }; + case "knowledge_status": + return undefined; + case "choice": + return { + kind: "choice", + tone: event.tone, + title: event.title, + ...(event.body === undefined ? {} : { body: event.body }), + actions: event.actions, + }; + case "subagent_started": + return { + kind: "subagent_started", + sessionId: event.sessionId, + parentSessionId: event.parentSessionId, + parentToolCallId: event.parentToolCallId, + ...(event.agentProfileId === undefined ? {} : { agentProfileId: event.agentProfileId }), + ...(event.title === undefined ? {} : { title: event.title }), + }; + case "subagent_event": + return { + kind: "subagent_event", + sessionId: event.sessionId, + parentSessionId: event.parentSessionId, + parentToolCallId: event.parentToolCallId, + event: event.event as unknown as Record, + }; + case "subagent_completed": + return { + kind: "subagent_completed", + sessionId: event.sessionId, + parentSessionId: event.parentSessionId, + parentToolCallId: event.parentToolCallId, + ...(event.result === undefined ? {} : { result: event.result }), + }; + case "subagent_failed": + return { + kind: "subagent_failed", + sessionId: event.sessionId, + parentSessionId: event.parentSessionId, + parentToolCallId: event.parentToolCallId, + error: event.error, + }; + case "status": + return { + kind: "status", + status: event.status, + }; + } +} diff --git a/src/session/store.ts b/src/session/store.ts index 4ac375c..e55cd48 100644 --- a/src/session/store.ts +++ b/src/session/store.ts @@ -51,6 +51,7 @@ export interface CreateChildSessionOptions { parentToolCallId: string; agentProfileId?: string; title?: string; + recordParentEvent?: boolean; } export function generateSessionId(): string { @@ -115,19 +116,21 @@ export async function createChildSession( await writeFile(eventsPath, "", { flag: "wx" }); const child = buildHandle(sessionDir, metadata); - await options.parent.append( - sessionEventPayload.subagentStarted( - { - sessionId: child.sessionId, - parentSessionId: options.parent.sessionId, - parentToolCallId: options.parentToolCallId, - }, - { - ...(options.agentProfileId === undefined ? {} : { agentProfileId: options.agentProfileId }), - ...(options.title === undefined ? {} : { title: options.title }), - } - ) - ); + if (options.recordParentEvent ?? true) { + await options.parent.append( + sessionEventPayload.subagentStarted( + { + sessionId: child.sessionId, + parentSessionId: options.parent.sessionId, + parentToolCallId: options.parentToolCallId, + }, + { + ...(options.agentProfileId === undefined ? {} : { agentProfileId: options.agentProfileId }), + ...(options.title === undefined ? {} : { title: options.title }), + } + ) + ); + } return child; } diff --git a/src/tui/layout.ts b/src/tui/layout.ts index da8554e..03605b8 100644 --- a/src/tui/layout.ts +++ b/src/tui/layout.ts @@ -170,6 +170,7 @@ export class ChatLayout implements Component, Focusable { case "system": case "thinking": case "tool_call": + case "subagent": case "modal": return []; } diff --git a/src/tui/messages.ts b/src/tui/messages.ts index d454109..1bfddbd 100644 --- a/src/tui/messages.ts +++ b/src/tui/messages.ts @@ -2,7 +2,7 @@ import { ui } from "../cli/ui.js"; import { type ToolCall } from "../agent/tools.js"; import { renderMarkdown } from "./markdown.js"; -export type ChatMessageKind = "system" | "user" | "agent" | "thinking" | "tool_call" | "modal"; +export type ChatMessageKind = "system" | "user" | "agent" | "thinking" | "tool_call" | "modal" | "subagent"; export interface SystemChatMessage { kind: "system"; @@ -35,6 +35,14 @@ export interface ToolCallChatMessage { resultSummary?: string; } +export interface SubagentChatMessage { + kind: "subagent"; + status: "running" | "event" | "completed" | "failed"; + sessionId: string; + title?: string; + text?: string; +} + export interface ChatModalAction { label: string; value?: string; @@ -54,6 +62,7 @@ export type ChatMessage = | AgentChatMessage | ThinkingChatMessage | ToolCallChatMessage + | SubagentChatMessage | ChatModalMessage; export function systemMessage(text: string): ChatMessage { @@ -78,6 +87,10 @@ export function toolCallMessage(call: ToolCall, label: string, resultSummary?: s : { kind: "tool_call", call, label, resultSummary }; } +export function subagentMessage(message: Omit): ChatMessage { + return { kind: "subagent", ...message }; +} + export function modalMessage(message: Omit): ChatMessage { return { kind: "modal", ...message }; } @@ -96,6 +109,10 @@ export function renderChatMessage(message: ChatMessage, options: RenderChatMessa return renderToolCallMessage(message); } + if (message.kind === "subagent") { + return renderSubagentMessage(message); + } + if (message.kind === "thinking") { return message.text.split("\n").map((line) => ui.muted(line)); } @@ -164,6 +181,25 @@ function renderToolCallMessage(message: ToolCallChatMessage): string[] { return [` ${ui.muted(expandTabs(visibleLabel))}`]; } +function renderSubagentMessage(message: SubagentChatMessage): string[] { + const label = message.title ?? shortSessionId(message.sessionId); + + switch (message.status) { + case "running": + return [` ${ui.muted(`↳ task: ${label} (running)`)}`]; + case "event": + return message.text ? [` ${ui.muted(`↳ task: ${label}: ${message.text}`)}`] : []; + case "completed": + return [` ${ui.muted(`↳ task: ${label} (completed)`)}`, ...(message.text ? [` ${message.text}`] : [])]; + case "failed": + return [` ${ui.warn(`↳ task: ${label} (failed)`)}`, ...(message.text ? [` ${message.text}`] : [])]; + } +} + +function shortSessionId(sessionId: string): string { + return sessionId.length <= 8 ? sessionId : sessionId.slice(0, 8); +} + function expandTabs(line: string): string { let column = 0; let expanded = ""; diff --git a/src/tui/runtime-events.ts b/src/tui/runtime-events.ts index 50950da..8a08618 100644 --- a/src/tui/runtime-events.ts +++ b/src/tui/runtime-events.ts @@ -1,7 +1,14 @@ import { type AgentRuntimeEvent } from "../agent/events.js"; import { getKnowledgeStatusEvents } from "../agent/runtime.js"; import { type KnowledgeStatus } from "../knowledge/status.js"; -import { agentMessage, modalMessage, systemMessage, toolCallMessage, type ChatMessage } from "./messages.js"; +import { + agentMessage, + modalMessage, + subagentMessage, + systemMessage, + toolCallMessage, + type ChatMessage, +} from "./messages.js"; import { formatKnowledgePathStatus } from "./status.js"; export function getKnowledgeStatusMessages(status: KnowledgeStatus): ChatMessage[] { @@ -38,15 +45,60 @@ export function renderRuntimeEvent(event: AgentRuntimeEvent): ChatMessage[] { case "task_plan": return []; case "subagent_started": + return [ + subagentMessage({ + status: "running", + sessionId: event.sessionId, + title: event.title, + }), + ]; case "subagent_event": + return formatForwardedSubagentEvent(event.sessionId, event.event); case "subagent_completed": + return [ + subagentMessage({ + status: "completed", + sessionId: event.sessionId, + text: event.result, + }), + ]; case "subagent_failed": - return []; + return [ + subagentMessage({ + status: "failed", + sessionId: event.sessionId, + text: event.error, + }), + ]; case "status": return []; } } +function formatForwardedSubagentEvent(sessionId: string, event: AgentRuntimeEvent): ChatMessage[] { + if (event.type === "message" && event.role === "assistant") { + return [ + subagentMessage({ + status: "event", + sessionId, + text: event.text, + }), + ]; + } + + if (event.type === "tool_call") { + return [ + subagentMessage({ + status: "event", + sessionId, + text: event.label, + }), + ]; + } + + return []; +} + function formatKbPathSource(status: KnowledgeStatus): string { return status.kbPathSource === "env" ? " (custom)" : ""; } diff --git a/src/tui/shell.ts b/src/tui/shell.ts index d37dd62..10ada76 100644 --- a/src/tui/shell.ts +++ b/src/tui/shell.ts @@ -6,6 +6,7 @@ import { type AppContext } from "../app/context.js"; import { ui } from "../cli/ui.js"; import { type ModelReasoningEvent, type ModelReasoningSink } from "../model/index.js"; import { type SessionEventPayload } from "../session/events.js"; +import { runtimeEventToSessionPayload } from "../session/runtime-payloads.js"; import { createSession, type SessionHandle } from "../session/store.js"; import { BusyIndicator, ReasoningTailBuffer } from "./busy.js"; import { ChatLayout } from "./layout.js"; @@ -13,6 +14,8 @@ import { type ChatMessage, systemMessage, thinkingMessage } from "./messages.js" import { renderRuntimeEvent } from "./runtime-events.js"; import { getFolderName, getModelLabel, getStartupThreadMessages, renderStaticLayout } from "./status.js"; +export { runtimeEventToSessionPayload } from "../session/runtime-payloads.js"; + export interface TuiShell { render(): Promise; } @@ -180,6 +183,7 @@ export class TopchesterTuiShell implements TuiShell { abortController.signal, { onReasoning: reasoningDisplay?.sink, + session: this.session, } )) { if (event.type === "message" && event.role === "assistant") { @@ -357,6 +361,10 @@ export function chatMessageToSessionPayload(message: ChatMessage): SessionEventP return undefined; } + if (message.kind === "subagent") { + return undefined; + } + if (message.kind === "modal") { return { kind: "choice", @@ -378,78 +386,6 @@ export function chatMessageToSessionPayload(message: ChatMessage): SessionEventP return undefined; } -export function runtimeEventToSessionPayload(event: AgentRuntimeEvent): SessionEventPayload | undefined { - switch (event.type) { - case "message": - return { - kind: "message", - role: event.role, - text: event.text, - ...(event.meta === undefined ? {} : { meta: event.meta }), - }; - case "tool_call": - return { - kind: "tool_call", - label: event.label, - call: event.call as unknown as Record, - }; - case "task_plan": - return { - kind: "task_plan", - items: event.plan.items, - updatedAt: event.plan.updatedAt, - }; - case "knowledge_status": - return undefined; - case "choice": - return { - kind: "choice", - tone: event.tone, - title: event.title, - ...(event.body === undefined ? {} : { body: event.body }), - actions: event.actions, - }; - case "subagent_started": - return { - kind: "subagent_started", - sessionId: event.sessionId, - parentSessionId: event.parentSessionId, - parentToolCallId: event.parentToolCallId, - ...(event.agentProfileId === undefined ? {} : { agentProfileId: event.agentProfileId }), - ...(event.title === undefined ? {} : { title: event.title }), - }; - case "subagent_event": - return { - kind: "subagent_event", - sessionId: event.sessionId, - parentSessionId: event.parentSessionId, - parentToolCallId: event.parentToolCallId, - event: event.event as unknown as Record, - }; - case "subagent_completed": - return { - kind: "subagent_completed", - sessionId: event.sessionId, - parentSessionId: event.parentSessionId, - parentToolCallId: event.parentToolCallId, - ...(event.result === undefined ? {} : { result: event.result }), - }; - case "subagent_failed": - return { - kind: "subagent_failed", - sessionId: event.sessionId, - parentSessionId: event.parentSessionId, - parentToolCallId: event.parentToolCallId, - error: event.error, - }; - case "status": - return { - kind: "status", - status: event.status, - }; - } -} - export function slashCommandToSessionPayload(command: string): SessionEventPayload { return { kind: "message", diff --git a/test/commands.test.ts b/test/commands.test.ts index f810ad4..e31d3a4 100644 --- a/test/commands.test.ts +++ b/test/commands.test.ts @@ -11,6 +11,7 @@ import { parseSlashCommand, } from "../src/agent/commands.js"; import { TopchesterAgentRuntime } from "../src/agent/runtime.js"; +import { createSession, listChildSessions, loadSession } from "../src/session/store.js"; describe("slash commands", () => { it("parses slash commands and arguments", () => { @@ -535,6 +536,71 @@ describe("slash commands", () => { expect(normalizeRuntimeEventsForComparison(collected)).toEqual(normalizeRuntimeEventsForComparison(streamed)); }); + it("runs task tool calls as child sessions and feeds the bounded result back to the parent", async () => { + const workspace = await mkdtemp(join(tmpdir(), "topchester-task-tool-")); + const session = await createSession(workspace); + const prompts: string[] = []; + const runtime = new TopchesterAgentRuntime({ + ...createTestContext(workspace), + modelGateway: { + async generateAgentStep(request: { prompt: string }) { + prompts.push(request.prompt); + + if (request.prompt.includes("Child prompt")) { + return fakeAgentStep("Child found src/agent/runtime.ts."); + } + + if (request.prompt.includes("Tool result from task:")) { + return fakeAgentStep("Parent received the child result."); + } + + return fakeAgentStep("", [ + { + id: "task-call-1", + source: "native" as const, + tool: "task", + args: { + description: "Inspect runtime", + prompt: "Child prompt", + subagent_type: "explore", + }, + }, + ]); + }, + } as unknown as AppContext["modelGateway"], + }); + + const events = await collectRuntimeEvents(runtime.submitMessageStream([], "delegate work", undefined, { session })); + const children = await listChildSessions(workspace, session.sessionId); + const child = await loadSession(workspace, children[0]!.sessionId); + + expect(events.map((event) => event.type)).toEqual([ + "subagent_started", + "subagent_event", + "subagent_event", + "subagent_completed", + "tool_call", + "message", + "status", + ]); + expect(events.find((event) => event.type === "tool_call")).toMatchObject({ + label: expect.stringContaining("task: completed"), + }); + expect(prompts.at(-1)).toContain("Child found src/agent/runtime.ts."); + expect(children).toHaveLength(1); + expect(child.metadata).toMatchObject({ + source: "subagent", + parentSessionId: session.sessionId, + parentToolCallId: "task-call-1", + agentProfileId: "explore", + title: "Inspect runtime", + }); + expect(child.events).toEqual([ + expect.objectContaining({ kind: "message", role: "assistant", text: "Child found src/agent/runtime.ts." }), + expect.objectContaining({ kind: "status", status: "ready" }), + ]); + }); + it("propagates aborts through the runtime stream path", async () => { const workspace = await mkdtemp(join(tmpdir(), "topchester-stream-abort-")); const abortController = new AbortController(); @@ -1428,3 +1494,18 @@ function normalizeRuntimeEventsForComparison(events: AgentRuntimeEvent[]): Agent event.type === "message" && event.meta !== undefined ? { ...event, meta: "" } : event ); } + +function fakeAgentStep(text: string, toolCalls: Array> = []) { + return { + text, + providerId: "fake", + modelId: "fake-agent", + purpose: "agent.primary" as const, + toolCalls, + toolProtocol: "native-openai-compatible" as const, + protocolAttempts: [], + providerRejectedTools: false, + warnings: [], + openRouterRoutingApplied: false, + }; +} diff --git a/test/tools.test.ts b/test/tools.test.ts index 3c8c23d..ba243df 100644 --- a/test/tools.test.ts +++ b/test/tools.test.ts @@ -359,6 +359,7 @@ describe("agent tools", () => { it("gets model prompt lines from the tool registry", () => { expect(getToolPromptLines()).toEqual([ + 'task: delegate focused read-only research or isolated analysis to a child agent session. Use it when parallel context gathering would help. To use it, reply with only JSON: {"tool":"task","args":{"description":"Inspect runtime event flow","prompt":"Read the runtime and summarize how events are emitted.","subagent_type":"explore"}}', 'plan_todo: replace the visible session task plan for non-trivial multi-step work; keep 2-6 short items, exactly one in_progress item while work remains, and use [] only to clear. Do not use plan_todo just to report completed work before a final answer. To use it, reply with only JSON: {"tool":"plan_todo","args":{"items":[{"text":"Inspect relevant files","status":"in_progress"},{"text":"Implement focused change","status":"pending"}]}}', 'read_file: read a UTF-8 file inside the workspace. To use it, reply with only JSON: {"tool":"read_file","args":{"path":"package.json"}}', 'list_files: list files and directories inside the workspace; top-level by default, recursive only when requested, with "/" after directory names. To use it, reply with only JSON: {"tool":"list_files","args":{"path":"src","recursive":false,"limit":500}}', diff --git a/test/tui.render.test.ts b/test/tui.render.test.ts index 6007149..1e89272 100644 --- a/test/tui.render.test.ts +++ b/test/tui.render.test.ts @@ -38,6 +38,7 @@ import { type ChatMessage, modalMessage, renderChatMessage, + subagentMessage, systemMessage, toolCallMessage, userMessage, @@ -1482,6 +1483,26 @@ describe("TUI rendering", () => { kind: "status", status: "ready", }); + expect( + renderRuntimeEvent( + agentEvent.subagentStarted({ + sessionId: "child-session", + parentSessionId: "parent-session", + parentToolCallId: "task-call-1", + title: "Inspect runtime", + }) + ) + ).toEqual([subagentMessage({ status: "running", sessionId: "child-session", title: "Inspect runtime" })]); + expect( + renderRuntimeEvent( + agentEvent.subagentCompleted({ + sessionId: "child-session", + parentSessionId: "parent-session", + parentToolCallId: "task-call-1", + result: "Done", + }) + ) + ).toEqual([subagentMessage({ status: "completed", sessionId: "child-session", text: "Done" })]); }); it("does not turn successful startup checks into visible ready messages", async () => { @@ -2071,7 +2092,13 @@ describe("TUI rendering", () => { expect(appendCalls).toBe(1); expect( messages.map((message) => - message.kind === "modal" ? message.title : "text" in message ? message.text : message.label + message.kind === "modal" + ? message.title + : message.kind === "tool_call" + ? message.label + : "text" in message + ? message.text + : "" ) ).toEqual(["Saved later", "Session save failed: disk is full"]); }); @@ -2085,9 +2112,7 @@ describe("TUI rendering", () => { expect(layoutSource).not.toMatch( /node:fs|from ".*session|append\(|loadSession|createSession|getTopchesterSessionsPath/u ); - expect(runtimeSource).not.toMatch( - /node:fs|from ".*session|append\(|loadSession|createSession|getTopchesterSessionsPath/u - ); + expect(runtimeSource).not.toMatch(/node:fs|append\(|loadSession|getTopchesterSessionsPath/u); }); }); From 5a28c7e75937e7e3e82117ab2c0835d54e7207fe Mon Sep 17 00:00:00 2001 From: Dragan Bajcic Date: Fri, 15 May 2026 11:44:42 +0200 Subject: [PATCH 7/9] feat(runtime): run task tools concurrently --- ...5-15-subagent-session-tree-runtime-plan.md | 17 +++- src/agent/runtime.ts | 47 +++++++++++ test/commands.test.ts | 77 +++++++++++++++++++ 3 files changed, 140 insertions(+), 1 deletion(-) diff --git a/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md b/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md index d65f2e7..5ebf6e8 100644 --- a/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md +++ b/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md @@ -389,7 +389,7 @@ Verified: ## Slice 6: Add Parallel Execution For `task` Only -Status: [ ] Not started +Status: [x] Done Goal: @@ -414,6 +414,21 @@ Verification: - Cancellation test proves all running children stop. - `pnpm check`. +Completed: + +- Added a task-only parallel scheduler path for native model responses containing multiple `task` calls. +- Streamed child events through a shared event queue while concurrent task calls run. +- Preserved source-order parent tool rows and model-visible task results after concurrent execution. +- Added a conservative task concurrency limit of 3 per batch. +- Continued using per-task error results and abort signals through the existing task execution path. + +Verified: + +- `pnpm test -- test/commands.test.ts` +- `pnpm typecheck` +- `pnpm check` +- `mise run local-ci` + ## Slice 7: Expand To General Multi-Tool Parallelism Status: [ ] Not started diff --git a/src/agent/runtime.ts b/src/agent/runtime.ts index df3ac1e..1004b4e 100644 --- a/src/agent/runtime.ts +++ b/src/agent/runtime.ts @@ -36,6 +36,7 @@ import { } from "./tools.js"; const MAX_TOOL_CALLS_PER_TURN = 75; +const DEFAULT_TASK_CONCURRENCY = 3; interface TurnTokenUsageTotals { inputTokens?: number; @@ -277,6 +278,52 @@ export class TopchesterAgentRuntime implements AgentRuntime { return; } + if (result.toolCalls.length > 1 && result.toolCalls.every((call) => call.tool === "task")) { + const taskCalls = result.toolCalls.map((call) => call as ToolCall); + const taskResults: ToolExecutionResult[] = []; + + for (let index = 0; index < taskCalls.length; index += DEFAULT_TASK_CONCURRENCY) { + const batch = taskCalls.slice(index, index + DEFAULT_TASK_CONCURRENCY); + const taskEventQueue = createRuntimeEventQueue(); + const batchResultPromise = Promise.all( + batch.map((call, batchIndex) => + executeToolCall(this.context.workspaceRoot, call, { + logger: this.context.logger, + taskPlan: this.taskPlan, + profile, + permissions, + subagents, + abortSignal, + toolCallId: result.toolCalls[index + batchIndex]?.id, + eventSink: (event) => taskEventQueue.push(event), + }) + ) + ).finally(() => { + taskEventQueue.close(); + }); + + for await (const event of taskEventQueue) { + yield event; + } + + taskResults.push(...(await batchResultPromise)); + } + + for (let index = 0; index < taskCalls.length; index += 1) { + yield agentEvent.toolCall(taskCalls[index]!, formatToolCallMessage(taskCalls[index]!, taskResults[index])); + } + + afterTool = "task"; + nextPrompt = `${nextPrompt}\n\n${taskResults + .map((toolResult) => formatToolResultForPrompt(toolResult)) + .join("\n\n")}\n\n${formatContinuationInstruction( + result.toolProtocol, + taskResults.at(-1)!, + isToolAllowed(permissions, "plan_todo") + )}`; + continue; + } + const executableToolCall = toolCall as ToolCall; const suppressiblePlanTodoAnswer = getSuppressiblePlanTodoAnswer( executableToolCall, diff --git a/test/commands.test.ts b/test/commands.test.ts index e31d3a4..de54b56 100644 --- a/test/commands.test.ts +++ b/test/commands.test.ts @@ -601,6 +601,72 @@ describe("slash commands", () => { ]); }); + it("runs multiple task calls concurrently while preserving parent result order", async () => { + const workspace = await mkdtemp(join(tmpdir(), "topchester-task-parallel-")); + const session = await createSession(workspace); + const prompts: string[] = []; + let releaseChildA: (() => void) | undefined; + const childAReleased = new Promise((resolve) => { + releaseChildA = resolve; + }); + const runtime = new TopchesterAgentRuntime({ + ...createTestContext(workspace), + modelGateway: { + async generateAgentStep(request: { prompt: string }) { + prompts.push(request.prompt); + + if (request.prompt.includes("Child A")) { + await childAReleased; + return fakeAgentStep("A result"); + } + + if (request.prompt.includes("Child B")) { + return fakeAgentStep("B result"); + } + + if (request.prompt.includes("Tool result from task:")) { + return fakeAgentStep("Parent received both task results."); + } + + return fakeAgentStep("", [ + { + id: "task-a", + source: "native" as const, + tool: "task", + args: { description: "A", prompt: "Child A", subagent_type: "explore" }, + }, + { + id: "task-b", + source: "native" as const, + tool: "task", + args: { description: "B", prompt: "Child B", subagent_type: "explore" }, + }, + ]); + }, + } as unknown as AppContext["modelGateway"], + }); + + const eventsPromise = collectRuntimeEvents( + runtime.submitMessageStream([], "delegate twice", undefined, { session }) + ); + await waitForPrompt(prompts, "Child B"); + releaseChildA?.(); + const events = await eventsPromise; + const firstCompletionIndex = events.findIndex((event) => event.type === "subagent_completed"); + const startedBeforeCompletion = events + .slice(0, firstCompletionIndex) + .filter((event) => event.type === "subagent_started"); + const parentResultPrompt = prompts.find((prompt) => prompt.includes("Tool result from task:")) ?? ""; + + expect(startedBeforeCompletion).toHaveLength(2); + expect(parentResultPrompt.indexOf("A result")).toBeLessThan(parentResultPrompt.indexOf("B result")); + expect( + events + .filter((event) => event.type === "tool_call") + .map((event) => (event.type === "tool_call" ? event.label : "")) + ).toEqual([expect.stringContaining("task: completed"), expect.stringContaining("task: completed")]); + }); + it("propagates aborts through the runtime stream path", async () => { const workspace = await mkdtemp(join(tmpdir(), "topchester-stream-abort-")); const abortController = new AbortController(); @@ -1509,3 +1575,14 @@ function fakeAgentStep(text: string, toolCalls: Array> = openRouterRoutingApplied: false, }; } + +async function waitForPrompt(prompts: string[], pattern: string): Promise { + for (let attempt = 0; attempt < 50; attempt += 1) { + if (prompts.some((prompt) => prompt.includes(pattern))) { + return; + } + await new Promise((resolve) => setTimeout(resolve, 10)); + } + + throw new Error(`Timed out waiting for prompt containing ${pattern}`); +} From 095cfdd615e48b3f95ceff5b782355ebd929e347 Mon Sep 17 00:00:00 2001 From: Dragan Bajcic Date: Fri, 15 May 2026 11:49:27 +0200 Subject: [PATCH 8/9] feat(runtime): schedule parallel read tools --- docs/ARCHITECTURE.md | 9 ++++ ...5-15-subagent-session-tree-runtime-plan.md | 18 +++++++- src/agent/runtime.ts | 35 +++++++++++++++ src/agent/tools.ts | 1 + src/agent/tools/find-file.ts | 3 ++ src/agent/tools/git.ts | 9 ++++ src/agent/tools/grep.ts | 3 ++ src/agent/tools/list-files.ts | 3 ++ src/agent/tools/read-file.ts | 3 ++ src/agent/tools/registry.ts | 10 +++++ src/agent/tools/types.ts | 4 ++ test/commands.test.ts | 44 +++++++++++++++++++ 12 files changed, 141 insertions(+), 1 deletion(-) diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 279abba..2cdfd31 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -282,6 +282,15 @@ session, runs the delegated prompt under a subagent profile, forwards child runtime events to the parent stream, and returns one bounded result to the parent model. +Tool definitions can opt into parallel scheduling with metadata: + +- `parallelSafe` marks tools that may run alongside other safe tools. +- `mutatesWorkspace` and `requiresExclusiveWorkspace` keep write, Git mutation, command, and unknown tools sequential by default. +- `resourceKeys(args)` gives future schedulers a stable conflict key, such as a file path or Git scope. + +Only explicitly read-only tools are marked parallel-safe initially. Unknown or +unmarked tools remain sequential. + ## Future GUI / IDE Path The TUI should be only one client of the same KB-aware runtime. diff --git a/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md b/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md index 5ebf6e8..a1b902f 100644 --- a/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md +++ b/docs/plans/2026-05-15-subagent-session-tree-runtime-plan.md @@ -431,7 +431,7 @@ Verified: ## Slice 7: Expand To General Multi-Tool Parallelism -Status: [ ] Not started +Status: [x] Done Goal: @@ -462,6 +462,22 @@ Verification: - TUI tests cover interleaved tool progress. - `pnpm check`. +Completed: + +- Added per-tool scheduling metadata: `parallelSafe`, `mutatesWorkspace`, `requiresExclusiveWorkspace`, and `resourceKeys`. +- Marked read-only file/search/Git inspection tools as parallel-safe. +- Left mutating, command, and unmarked tools on the existing sequential path by default. +- Added a general native multi-tool scheduler path for explicitly parallel-safe tools. +- Preserved source-order parent tool rows and model-visible tool results after parallel read-only execution. +- Documented scheduling metadata for tool authors. + +Verified: + +- `pnpm test -- test/commands.test.ts test/tools.test.ts` +- `pnpm typecheck` +- `pnpm check` +- `mise run local-ci` + ## Open Questions - Should child sessions be visible in the default session list, or only when expanding a parent session? diff --git a/src/agent/runtime.ts b/src/agent/runtime.ts index 1004b4e..2b6aa90 100644 --- a/src/agent/runtime.ts +++ b/src/agent/runtime.ts @@ -24,6 +24,7 @@ import { type SessionHandle } from "../session/store.js"; import { type ModelAgentResult, type ModelReasoningSink } from "../model/index.js"; import { executeToolCall, + isParallelSafeToolName, isToolErrorResult, parseToolCallWithSource, type ModelToolCall, @@ -324,6 +325,40 @@ export class TopchesterAgentRuntime implements AgentRuntime { continue; } + if (result.toolCalls.length > 1 && result.toolCalls.every((call) => isParallelSafeToolName(call.tool))) { + const parallelCalls = result.toolCalls.map((call) => call as ToolCall); + const parallelResults = await Promise.all( + parallelCalls.map((call, index) => + executeToolCall(this.context.workspaceRoot, call, { + logger: this.context.logger, + taskPlan: this.taskPlan, + profile, + permissions, + subagents, + abortSignal, + toolCallId: result.toolCalls[index]?.id, + }) + ) + ); + + for (let index = 0; index < parallelCalls.length; index += 1) { + yield agentEvent.toolCall( + parallelCalls[index]!, + formatToolCallMessage(parallelCalls[index]!, parallelResults[index]) + ); + } + + afterTool = parallelCalls.at(-1)?.tool; + nextPrompt = `${nextPrompt}\n\n${parallelResults + .map((toolResult) => formatToolResultForPrompt(toolResult)) + .join("\n\n")}\n\n${formatContinuationInstruction( + result.toolProtocol, + parallelResults.at(-1)!, + isToolAllowed(permissions, "plan_todo") + )}`; + continue; + } + const executableToolCall = toolCall as ToolCall; const suppressiblePlanTodoAnswer = getSuppressiblePlanTodoAnswer( executableToolCall, diff --git a/src/agent/tools.ts b/src/agent/tools.ts index 46c4145..aa289bd 100644 --- a/src/agent/tools.ts +++ b/src/agent/tools.ts @@ -124,6 +124,7 @@ export { getToolDefinition, getToolDefinitionsForPermissions, getToolPromptLines, + isParallelSafeToolName, isToolName, toolRegistry, type RegisteredTool, diff --git a/src/agent/tools/find-file.ts b/src/agent/tools/find-file.ts index 5a3c507..31090f1 100644 --- a/src/agent/tools/find-file.ts +++ b/src/agent/tools/find-file.ts @@ -45,6 +45,9 @@ export const findFileTool = defineTool({ prompt: 'find_file: find existing files by fuzzy path or filename inside the workspace; matches may appear in the middle of a filename, and results are file paths, not file contents. To use it, reply with only JSON: {"tool":"find_file","args":{"query":"runtime"}}', argsSchema: findFileArgsSchema, + parallelSafe: true, + mutatesWorkspace: false, + resourceKeys: (args) => [`find:${args.path}`], execute: (context, args) => findWorkspaceFilesByName(context.workspaceRoot, args, { pathEnv: context.pathEnv, logger: context.logger }), }); diff --git a/src/agent/tools/git.ts b/src/agent/tools/git.ts index 366d019..50e4e2e 100644 --- a/src/agent/tools/git.ts +++ b/src/agent/tools/git.ts @@ -109,6 +109,9 @@ export const gitStatusTool = defineTool({ prompt: 'git_status: inspect branch, head, clean state, staged, unstaged, and untracked files without parsing shell output. To use it, reply with only JSON: {"tool":"git_status","args":{"path":".","include_untracked":true}}', argsSchema: gitStatusArgsSchema, + parallelSafe: true, + mutatesWorkspace: false, + resourceKeys: (args) => [`git-status:${args.path}`], execute: (context, args) => inspectGitStatus(context, args), }); @@ -118,6 +121,9 @@ export const gitDiffTool = defineTool({ prompt: 'git_diff: inspect a bounded Git diff; use scope "all", "unstaged", or "staged", and include_untracked:true only when untracked file patches are needed. To use it, reply with only JSON: {"tool":"git_diff","args":{"scope":"all","include_untracked":true}}', argsSchema: gitDiffArgsSchema, + parallelSafe: true, + mutatesWorkspace: false, + resourceKeys: (args) => [`git-diff:${args.path ?? "."}:${args.scope}`], execute: (context, args) => inspectGitDiff(context, args), }); @@ -127,6 +133,9 @@ export const gitLogTool = defineTool({ prompt: 'git_log: inspect recent commits without parsing shell output. To use it, reply with only JSON: {"tool":"git_log","args":{"limit":10,"path":"src/agent/runtime.ts"}}', argsSchema: gitLogArgsSchema, + parallelSafe: true, + mutatesWorkspace: false, + resourceKeys: (args) => [`git-log:${args.path ?? "."}`], execute: (context, args) => inspectGitLog(context, args), }); diff --git a/src/agent/tools/grep.ts b/src/agent/tools/grep.ts index 7e5e3a9..8e47e0f 100644 --- a/src/agent/tools/grep.ts +++ b/src/agent/tools/grep.ts @@ -26,6 +26,9 @@ export const grepTool = defineTool({ prompt: 'grep: search text inside file contents in the workspace; output lines are the files containing the matched text, and paths mentioned inside those lines are not confirmed files unless checked with find_file or read_file. To use it, reply with only JSON: {"tool":"grep","args":{"pattern":"function name","path":"src"}}', argsSchema: grepArgsSchema, + parallelSafe: true, + mutatesWorkspace: false, + resourceKeys: (args) => [`grep:${args.path ?? "."}`], execute: (context, args) => grepWorkspace(context.workspaceRoot, args, { pathEnv: context.pathEnv, logger: context.logger }), }); diff --git a/src/agent/tools/list-files.ts b/src/agent/tools/list-files.ts index 7aed994..0850a3c 100644 --- a/src/agent/tools/list-files.ts +++ b/src/agent/tools/list-files.ts @@ -24,6 +24,9 @@ export const listFilesTool = defineTool({ prompt: 'list_files: list files and directories inside the workspace; top-level by default, recursive only when requested, with "/" after directory names. To use it, reply with only JSON: {"tool":"list_files","args":{"path":"src","recursive":false,"limit":500}}', argsSchema: listFilesArgsSchema, + parallelSafe: true, + mutatesWorkspace: false, + resourceKeys: (args) => [`dir:${args.path}`], execute: (context, args) => listWorkspaceFiles(context.workspaceRoot, args), }); diff --git a/src/agent/tools/read-file.ts b/src/agent/tools/read-file.ts index 746cafc..1a09b28 100644 --- a/src/agent/tools/read-file.ts +++ b/src/agent/tools/read-file.ts @@ -20,6 +20,9 @@ export const readFileTool = defineTool({ prompt: 'read_file: read a UTF-8 file inside the workspace. To use it, reply with only JSON: {"tool":"read_file","args":{"path":"package.json"}}', argsSchema: readFileArgsSchema, + parallelSafe: true, + mutatesWorkspace: false, + resourceKeys: (args) => [`file:${args.path}`], execute: (context, args) => readWorkspaceFile(context.workspaceRoot, args.path), }); diff --git a/src/agent/tools/registry.ts b/src/agent/tools/registry.ts index ad8e61f..cf8b3a2 100644 --- a/src/agent/tools/registry.ts +++ b/src/agent/tools/registry.ts @@ -49,3 +49,13 @@ export function getToolDefinitionsForPermissions(filter?: (toolName: ToolName) = .filter(([name]) => filter?.(name as ToolName) ?? true) .map(([, tool]) => tool); } + +export function isParallelSafeToolName(name: string): name is ToolName { + if (!isToolName(name)) { + return false; + } + + const definition = toolRegistry[name]; + + return Boolean(definition.parallelSafe && !definition.mutatesWorkspace && !definition.requiresExclusiveWorkspace); +} diff --git a/src/agent/tools/types.ts b/src/agent/tools/types.ts index a8fc988..d5f5bed 100644 --- a/src/agent/tools/types.ts +++ b/src/agent/tools/types.ts @@ -62,6 +62,10 @@ export interface ToolDefinition; + parallelSafe?: boolean; + mutatesWorkspace?: boolean; + requiresExclusiveWorkspace?: boolean; + resourceKeys?: (args: any) => readonly string[]; execute(context: ToolContext, args: Args): Promise; } diff --git a/test/commands.test.ts b/test/commands.test.ts index de54b56..fb06922 100644 --- a/test/commands.test.ts +++ b/test/commands.test.ts @@ -667,6 +667,50 @@ describe("slash commands", () => { ).toEqual([expect.stringContaining("task: completed"), expect.stringContaining("task: completed")]); }); + it("runs explicitly parallel-safe read-only tool calls from one model step", async () => { + const workspace = await mkdtemp(join(tmpdir(), "topchester-read-parallel-")); + await writeFile(join(workspace, "a.txt"), "A\n"); + await writeFile(join(workspace, "b.txt"), "B\n"); + const prompts: string[] = []; + const runtime = new TopchesterAgentRuntime({ + ...createTestContext(workspace), + modelGateway: { + async generateAgentStep(request: { prompt: string }) { + prompts.push(request.prompt); + + if (request.prompt.includes("Tool result from read_file")) { + return fakeAgentStep("Read both files."); + } + + return fakeAgentStep("", [ + { + id: "read-a", + source: "native" as const, + tool: "read_file", + args: { path: "a.txt" }, + }, + { + id: "read-b", + source: "native" as const, + tool: "read_file", + args: { path: "b.txt" }, + }, + ]); + }, + } as unknown as AppContext["modelGateway"], + }); + + const events = await runtime.submitMessage([], "read both files"); + const parentResultPrompt = prompts.find((prompt) => prompt.includes("Tool result from read_file")) ?? ""; + + expect( + events + .filter((event) => event.type === "tool_call") + .map((event) => (event.type === "tool_call" ? event.label : "")) + ).toEqual(["read_file: a.txt", "read_file: b.txt"]); + expect(parentResultPrompt.indexOf('"a.txt"')).toBeLessThan(parentResultPrompt.indexOf('"b.txt"')); + }); + it("propagates aborts through the runtime stream path", async () => { const workspace = await mkdtemp(join(tmpdir(), "topchester-stream-abort-")); const abortController = new AbortController(); From 42303d0f59b7a742c69d5c51921a9de2837c765e Mon Sep 17 00:00:00 2001 From: Dragan Bajcic Date: Fri, 15 May 2026 13:35:13 +0200 Subject: [PATCH 9/9] feat(model): support OpenRouter service tier and attribution headers - Add (flex/priority) to model provider configuration - Automatically inject and headers for OpenRouter providers - Add smoke test for subagent task delegation and metadata persistence - Update documentation and configuration schemas to reflect provider enhancements --- config/gemini-flex.yaml | 9 ++ docs/MODEL_CONFIG.md | 5 + docs/config.md | 7 +- scripts/smoke/fake-api.ts | 20 +++ scripts/smoke/run-smoke.ts | 148 ++++++++++++++++++ .../scenarios/16-task-subagent/config.json | 17 ++ .../template/docs/task-subagent-note.txt | 1 + src/config/index.ts | 21 ++- src/model/index.ts | 18 +++ test/config.test.ts | 43 ++++- test/model.test.ts | 39 +++++ 11 files changed, 322 insertions(+), 6 deletions(-) create mode 100644 config/gemini-flex.yaml create mode 100644 scripts/smoke/scenarios/16-task-subagent/config.json create mode 100644 scripts/smoke/scenarios/16-task-subagent/template/docs/task-subagent-note.txt diff --git a/config/gemini-flex.yaml b/config/gemini-flex.yaml new file mode 100644 index 0000000..82e7390 --- /dev/null +++ b/config/gemini-flex.yaml @@ -0,0 +1,9 @@ +models: + default: openrouter/google/gemini-3.1-flash-lite + providers: + openrouter: + type: openai-compatible + baseURL: https://openrouter.ai/api/v1 + apiKeyEnv: OPENROUTER_API_KEY + supportsStructuredOutputs: true + service_tier: flex diff --git a/docs/MODEL_CONFIG.md b/docs/MODEL_CONFIG.md index 23cb2c5..912f219 100644 --- a/docs/MODEL_CONFIG.md +++ b/docs/MODEL_CONFIG.md @@ -133,6 +133,8 @@ For agent turns, Topchester tries native OpenAI-compatible tool calls first. If OpenRouter requests that try native tools include internal routing hints so OpenRouter should pick an upstream that can accept tool parameters. This is automatic for providers named like `openrouter` or using an OpenRouter base URL. +Topchester also adds default `HTTP-Referer` and `X-Title` attribution headers for OpenRouter providers unless the config sets those header names explicitly. + Advanced debugging overrides are available but should stay out of normal examples: ```jsonc @@ -143,6 +145,7 @@ Advanced debugging overrides are available but should stay out of normal example "type": "openai-compatible", "baseURL": "https://openrouter.ai/api/v1", "apiKeyEnv": "OPENROUTER_API_KEY", + "service_tier": "flex", "toolProtocol": "text-json", "openRouterToolRouting": "off", }, @@ -151,6 +154,8 @@ Advanced debugging overrides are available but should stay out of normal example } ``` +`service_tier` is passed through to compatible OpenRouter requests. Supported request values are `flex` and `priority`. + `toolProtocol` values: - `auto` — default; try native tools, then text fallbacks. diff --git a/docs/config.md b/docs/config.md index d4ce9c7..1387800 100644 --- a/docs/config.md +++ b/docs/config.md @@ -62,7 +62,7 @@ models: ## Example 4: Custom OpenRouter Settings -Use this when you want a custom environment variable name, headers, or tool behavior. +Use this when you want a custom environment variable name, extra headers, or tool behavior. ```yaml models: @@ -184,6 +184,7 @@ models: apiKeyEnv: MY_PROVIDER_API_KEY apiKey: optional-inline-key supportsStructuredOutputs: true + service_tier: flex toolProtocol: auto openRouterToolRouting: auto headers: @@ -192,6 +193,10 @@ models: Prefer `apiKeyEnv` over `apiKey` so secrets stay out of config files. +Topchester adds default `HTTP-Referer` and `X-Title` headers for OpenRouter providers unless the config sets those header names explicitly. + +`service_tier` is passed through to compatible OpenRouter requests. Use `flex` for lower cost with higher latency, or `priority` for faster service at higher cost. + `toolProtocol` can be: - `auto`: try native tools, then text fallbacks. diff --git a/scripts/smoke/fake-api.ts b/scripts/smoke/fake-api.ts index 0d84f83..04d4f66 100644 --- a/scripts/smoke/fake-api.ts +++ b/scripts/smoke/fake-api.ts @@ -187,6 +187,26 @@ function chooseResponse(prompt: string): string { }); } + if (prompt.includes("TASK_SUBAGENT_SMOKE") && prompt.includes("Tool result from task")) { + return "Subagent found marker aqua."; + } + + if (prompt.includes("TASK_SUBAGENT_SMOKE")) { + return toolCall("task", { + description: "Inspect task subagent smoke marker", + prompt: "TASK_SUBAGENT_CHILD read docs/task-subagent-note.txt and report the exact marker line from that file.", + subagent_type: "explore", + }); + } + + if (prompt.includes("TASK_SUBAGENT_CHILD") && prompt.includes("Tool result from read_file")) { + return "Child result: subagent marker: aqua"; + } + + if (prompt.includes("TASK_SUBAGENT_CHILD")) { + return toolCall("read_file", { path: "docs/task-subagent-note.txt" }); + } + if (prompt.includes("Tool result from edit_file")) { return "Done."; } diff --git a/scripts/smoke/run-smoke.ts b/scripts/smoke/run-smoke.ts index 9f9427a..e90040a 100644 --- a/scripts/smoke/run-smoke.ts +++ b/scripts/smoke/run-smoke.ts @@ -65,6 +65,15 @@ const scenarioSchema = z.object({ lastActiveItem: z.string().optional(), }) .optional(), + expectedSubagent: z + .object({ + completed: z.boolean().optional().default(true), + agentProfileId: z.string().optional(), + requiredChildToolCalls: z.array(z.string()).optional().default([]), + expectedResultContains: z.array(z.string()).optional().default([]), + expectedChildMessagesContain: z.array(z.string()).optional().default([]), + }) + .optional(), expectedGit: z .object({ stagedPaths: z.array(z.string()).optional(), @@ -506,6 +515,7 @@ async function assertScenario( } await expectTaskPlanState(outputDir, scenario.expectedTaskPlan, failures); + await expectSubagentState(workspace, outputDir, scenario.expectedSubagent, failures); await expectGitState(workspace, scenario.expectedGit, failures); } @@ -541,6 +551,144 @@ async function expectTaskPlanState( } } +async function expectSubagentState( + workspace: string, + outputDir: string, + expectation: Scenario["expectedSubagent"], + failures: string[] +): Promise { + if (!expectation) { + return; + } + + const runtimeEvents = await readRuntimeJsonEvents(outputDir); + const started = runtimeEvents.filter((event) => event.type === "subagent_started"); + const completed = runtimeEvents.filter((event) => event.type === "subagent_completed"); + const failed = runtimeEvents.filter((event) => event.type === "subagent_failed"); + const childEvents = runtimeEvents.filter((event) => event.type === "subagent_event"); + + if (started.length === 0) { + failures.push("expected at least one subagent_started event"); + return; + } + + if (expectation.completed && completed.length === 0) { + failures.push("expected at least one subagent_completed event"); + } + + if (expectation.completed && failed.length > 0) { + failures.push(`expected no subagent_failed events, saw ${failed.length}`); + } + + const childSessionIds = started + .map((event) => getStringField(event, "sessionId")) + .filter((id): id is string => Boolean(id)); + + for (const childSessionId of childSessionIds) { + await expectChildSessionMetadata(workspace, childSessionId, started, expectation, failures); + } + + for (const tool of expectation.requiredChildToolCalls) { + const sawTool = childEvents.some((event) => { + const childEvent = getRecordField(event, "event"); + const call = getRecordField(childEvent, "call"); + return getStringField(childEvent, "type") === "tool_call" && getStringField(call, "tool") === tool; + }); + + if (!sawTool) { + failures.push(`expected child tool ${tool} was not called`); + } + } + + for (const text of expectation.expectedResultContains) { + const sawResult = completed.some((event) => getStringField(event, "result")?.includes(text)); + + if (!sawResult) { + failures.push(`subagent completed result did not contain ${JSON.stringify(text)}`); + } + } + + for (const text of expectation.expectedChildMessagesContain) { + const sawMessage = childEvents.some((event) => { + const childEvent = getRecordField(event, "event"); + return ( + getStringField(childEvent, "type") === "message" && + getStringField(childEvent, "role") === "assistant" && + Boolean(getStringField(childEvent, "text")?.includes(text)) + ); + }); + + if (!sawMessage) { + failures.push(`subagent child messages did not contain ${JSON.stringify(text)}`); + } + } +} + +async function expectChildSessionMetadata( + workspace: string, + childSessionId: string, + startedEvents: Record[], + expectation: NonNullable, + failures: string[] +): Promise { + const metadataPath = join(workspace, ".agents", "topchester", "sessions", childSessionId, "metadata.json"); + const raw = await readFile(metadataPath, "utf8").catch(() => undefined); + + if (raw === undefined) { + failures.push(`expected child session metadata for ${childSessionId}`); + return; + } + + const metadata = JSON.parse(raw) as Record; + const started = startedEvents.find((event) => getStringField(event, "sessionId") === childSessionId); + const parentSessionId = getStringField(started, "parentSessionId"); + + if (getStringField(metadata, "source") !== "subagent") { + failures.push(`child session ${childSessionId} source was not subagent`); + } + + if (parentSessionId && getStringField(metadata, "parentSessionId") !== parentSessionId) { + failures.push(`child session ${childSessionId} parentSessionId did not match subagent_started`); + } + + if (expectation.agentProfileId && getStringField(metadata, "agentProfileId") !== expectation.agentProfileId) { + failures.push( + `child session ${childSessionId} agentProfileId was ${JSON.stringify(getStringField(metadata, "agentProfileId"))}, expected ${JSON.stringify(expectation.agentProfileId)}` + ); + } +} + +async function readRuntimeJsonEvents(outputDir: string): Promise[]> { + const eventFiles = (await readdir(outputDir)) + .filter((entry) => /^events-\d+\.jsonl$/u.test(entry)) + .sort((left, right) => left.localeCompare(right)); + const events = (await Promise.all(eventFiles.map((entry) => readJsonLines(join(outputDir, entry))))).flat(); + + return events.map((entry) => entry.event).filter(isRecord); +} + +function getRecordField(value: unknown, field: string): Record | undefined { + if (!isRecord(value)) { + return undefined; + } + + const child = value[field]; + return isRecord(child) ? child : undefined; +} + +function getStringField(value: unknown, field: string): string | undefined { + if (!isRecord(value)) { + return undefined; + } + + const child = value[field]; + return typeof child === "string" ? child : undefined; +} + +function isRecord(value: unknown): value is Record { + return Boolean(value) && typeof value === "object" && !Array.isArray(value); +} + async function collectGlobalLogs( workspace: string, runIds: string[], diff --git a/scripts/smoke/scenarios/16-task-subagent/config.json b/scripts/smoke/scenarios/16-task-subagent/config.json new file mode 100644 index 0000000..b93fd06 --- /dev/null +++ b/scripts/smoke/scenarios/16-task-subagent/config.json @@ -0,0 +1,17 @@ +{ + "id": "16-task-subagent", + "name": "Task subagent", + "description": "Checks the task tool creates a child session, forwards child events, persists subagent metadata, and returns the child result to the parent.", + "timeoutMs": 15000, + "prompt": "TASK_SUBAGENT_SMOKE use the task tool for this smoke test. Delegate exactly one read-only child task. The child prompt must include TASK_SUBAGENT_CHILD and must ask the child to read docs/task-subagent-note.txt and report the exact marker line. After the task finishes, answer with exactly: Subagent found marker aqua.", + "requiredToolCalls": ["task"], + "forbiddenToolCalls": ["edit_file", "write_file", "inspect_command"], + "expectedOutputContains": ["Subagent found marker aqua."], + "expectedSubagent": { + "completed": true, + "agentProfileId": "explore", + "requiredChildToolCalls": ["read_file"], + "expectedResultContains": ["subagent marker: aqua"], + "expectedChildMessagesContain": ["subagent marker: aqua"] + } +} diff --git a/scripts/smoke/scenarios/16-task-subagent/template/docs/task-subagent-note.txt b/scripts/smoke/scenarios/16-task-subagent/template/docs/task-subagent-note.txt new file mode 100644 index 0000000..3cdf056 --- /dev/null +++ b/scripts/smoke/scenarios/16-task-subagent/template/docs/task-subagent-note.txt @@ -0,0 +1 @@ +subagent marker: aqua diff --git a/src/config/index.ts b/src/config/index.ts index a8da50f..a711d9c 100644 --- a/src/config/index.ts +++ b/src/config/index.ts @@ -8,6 +8,10 @@ const modelPurposeSchema = z.enum(["agent.primary", "agent.fast", "kb.scan", "kb const modelPurposes = modelPurposeSchema.options; const toolProtocolSchema = z.enum(["auto", "native", "text-json", "text-xml"]); +const openRouterAttributionHeaders = { + "HTTP-Referer": "https://topchester.com", + "X-Title": "Topchester", +}; const providerSchema = z.object({ type: z.literal("openai-compatible"), @@ -16,6 +20,7 @@ const providerSchema = z.object({ apiKey: z.string().optional(), headers: z.record(z.string(), z.string()).optional(), supportsStructuredOutputs: z.boolean().optional(), + service_tier: z.enum(["flex", "priority"]).optional(), toolProtocol: toolProtocolSchema.optional(), openRouterToolRouting: z.enum(["auto", "force", "off"]).optional(), }); @@ -280,10 +285,7 @@ function ensureKnownProvider(providers: Record, provider: strin baseURL: "https://openrouter.ai/api/v1", apiKeyEnv: "OPENROUTER_API_KEY", supportsStructuredOutputs: true, - headers: { - "HTTP-Referer": "https://topchester.com", - "X-Title": "Topchester", - }, + headers: { ...openRouterAttributionHeaders }, }; } @@ -297,9 +299,20 @@ function applyKnownProviderDefaults(providers: Record) { provider.supportsStructuredOutputs ??= true; provider.toolProtocol ??= "native"; } + + if (isOpenRouterProvider(providerId, provider.baseURL)) { + provider.headers = { + ...openRouterAttributionHeaders, + ...(isPlainObject(provider.headers) ? provider.headers : {}), + }; + } } } +function isOpenRouterProvider(providerId: string, baseURL: string): boolean { + return providerId.toLowerCase().includes("openrouter") || baseURL.toLowerCase().includes("openrouter.ai"); +} + function isOpenAIProvider(providerId: string, baseURL: string): boolean { const normalizedProvider = providerId.toLowerCase(); const normalizedBaseURL = baseURL.toLowerCase(); diff --git a/src/model/index.ts b/src/model/index.ts index 95bf014..962c774 100644 --- a/src/model/index.ts +++ b/src/model/index.ts @@ -19,6 +19,7 @@ export interface OpenAICompatibleProviderConfig { apiKeyEnv?: string; headers?: Record; supportsStructuredOutputs?: boolean; + service_tier?: "flex" | "priority"; toolProtocol?: ToolProtocolOverride; openRouterToolRouting?: "auto" | "force" | "off"; } @@ -155,6 +156,7 @@ export class ModelGateway { model: resolved.model, system: request.system, prompt: request.prompt, + providerOptions: buildProviderOptions(resolved.providerId, resolved.providerConfig), abortSignal: request.abortSignal, }); const usage = normalizeUsage(result.usage, { @@ -259,6 +261,7 @@ export class ModelGateway { model: resolved.model, system: request.system, prompt: request.prompt, + providerOptions: buildProviderOptions(resolved.providerId, resolved.providerConfig), abortSignal: request.abortSignal, }); @@ -334,6 +337,7 @@ export class ModelGateway { model: resolved.model, system: request.system, prompt: request.prompt, + providerOptions: buildProviderOptions(resolved.providerId, resolved.providerConfig), abortSignal: request.abortSignal, }); const usage = normalizeUsage(result.usage, { @@ -430,6 +434,7 @@ export class ModelGateway { model: resolved.model, system: request.system, prompt: request.prompt, + providerOptions: buildProviderOptions(resolved.providerId, resolved.providerConfig), abortSignal: request.abortSignal, includeRawChunks: true, }); @@ -594,8 +599,21 @@ function resolveApiKey(config: OpenAICompatibleProviderConfig): string | undefin type JsonValue = string | number | boolean | null | JsonValue[] | { [key: string]: JsonValue }; type ProviderOptions = Record; +function buildProviderOptions(providerId: string, config: OpenAICompatibleProviderConfig): ProviderOptions { + const options: { [key: string]: JsonValue } = {}; + + if (config.service_tier !== undefined) { + options.service_tier = config.service_tier; + } + + return { + [providerId]: options, + }; +} + function buildNativeProviderOptions(providerId: string, config: OpenAICompatibleProviderConfig): ProviderOptions { const options: { [key: string]: JsonValue } = { + ...buildProviderOptions(providerId, config)[providerId], parallel_tool_calls: false, }; diff --git a/test/config.test.ts b/test/config.test.ts index 48fe387..7ebbd74 100644 --- a/test/config.test.ts +++ b/test/config.test.ts @@ -331,7 +331,7 @@ describe("Topchester config loading", () => { }); }); - it("does not replace an explicitly configured OpenRouter provider with shortcut defaults", async () => { + it("adds attribution defaults to explicitly configured OpenRouter providers", async () => { const root = await mkdtemp(join(tmpdir(), "topchester-config-")); const workspace = join(root, "workspace"); await mkdir(workspace, { recursive: true }); @@ -346,6 +346,7 @@ describe("Topchester config loading", () => { " baseURL: https://custom-openrouter.example/v1", " apiKeyEnv: CUSTOM_OPENROUTER_KEY", " supportsStructuredOutputs: false", + " service_tier: flex", " toolProtocol: native", " openRouterToolRouting: off", " headers:", @@ -360,14 +361,47 @@ describe("Topchester config loading", () => { baseURL: "https://custom-openrouter.example/v1", apiKeyEnv: "CUSTOM_OPENROUTER_KEY", supportsStructuredOutputs: false, + service_tier: "flex", toolProtocol: "native", openRouterToolRouting: "off", headers: { + "HTTP-Referer": "https://topchester.com", + "X-Title": "Topchester", "X-Test": "custom", }, }); }); + it("preserves explicit OpenRouter attribution header overrides", async () => { + const root = await mkdtemp(join(tmpdir(), "topchester-config-")); + const workspace = join(root, "workspace"); + await mkdir(workspace, { recursive: true }); + await writeFile( + join(workspace, "topchester.yaml"), + [ + "models:", + " default: openrouter/google/gemini-3.1-flash-lite", + " providers:", + " openrouter:", + " type: openai-compatible", + " baseURL: https://openrouter.ai/api/v1", + " apiKeyEnv: OPENROUTER_API_KEY", + " headers:", + " HTTP-Referer: https://example.com", + " X-Title: Custom App", + ].join("\n") + ); + + const config = loadTopchesterConfig({ workspaceRoot: workspace }); + + expect(config.models?.providers?.openrouter).toMatchObject({ + headers: { + "HTTP-Referer": "https://example.com", + "X-Title": "Custom App", + }, + }); + }); + it("infers native OpenAI tool defaults for OpenAI-compatible providers named openai", async () => { const root = await mkdtemp(join(tmpdir(), "topchester-config-")); const workspace = join(root, "workspace"); @@ -455,6 +489,7 @@ describe("Topchester config loading", () => { " baseURL: https://openrouter.ai/api/v1", " apiKeyEnv: OPENROUTER_API_KEY", " supportsStructuredOutputs: true", + " service_tier: flex", " toolProtocol: auto", " openRouterToolRouting: force", " headers:", @@ -487,8 +522,14 @@ describe("Topchester config loading", () => { expect(config.models?.providers?.openrouter).toMatchObject({ baseURL: "https://openrouter.ai/api/v1", apiKeyEnv: "OPENROUTER_API_KEY", + service_tier: "flex", toolProtocol: "auto", openRouterToolRouting: "force", + headers: { + "HTTP-Referer": "https://topchester.com", + "X-Title": "Topchester", + "X-Test": "custom", + }, }); expect(config.models?.providers?.ollama).toMatchObject({ baseURL: "http://localhost:11434/v1", diff --git a/test/model.test.ts b/test/model.test.ts index 904627f..4bdfb28 100644 --- a/test/model.test.ts +++ b/test/model.test.ts @@ -53,6 +53,43 @@ describe("ModelGateway agent tool protocol", () => { expect(result.usage).toEqual({ inputTokens: 1, outputTokens: 1, totalTokens: 2, costUsd: 0.00014 }); }); + it("sends configured service tier on text responses", async () => { + const api = await startChatApi((body) => { + expect(body.service_tier).toBe("flex"); + + return { + choices: [ + { + index: 0, + finish_reason: "stop", + message: { role: "assistant", content: "Hello." }, + }, + ], + }; + }); + const gateway = new ModelGateway({ + defaultPurpose: "agent.primary", + defaultProvider: "openrouter", + models: { + "agent.primary": { name: "test-model" }, + }, + providers: { + openrouter: { + type: "openai-compatible", + baseURL: api.baseURL, + apiKey: "test", + service_tier: "flex", + }, + }, + }); + + await gateway.generateText({ + purpose: "agent.primary", + system: "system", + prompt: "hello", + }); + }); + it("sends native OpenAI-compatible tools and normalizes structured tool calls", async () => { const api = await startChatApi((body) => { expect(body.tools).toEqual([ @@ -156,6 +193,7 @@ describe("ModelGateway agent tool protocol", () => { const api = await startChatApi((body) => { expect(body.provider).toEqual({ require_parameters: true }); expect(body.parallel_tool_calls).toBe(false); + expect(body.service_tier).toBe("flex"); return { choices: [ @@ -178,6 +216,7 @@ describe("ModelGateway agent tool protocol", () => { type: "openai-compatible", baseURL: api.baseURL, apiKey: "test", + service_tier: "flex", }, }, });