diff --git a/CONTEXT.md b/CONTEXT.md deleted file mode 100644 index 70df1e0673..0000000000 --- a/CONTEXT.md +++ /dev/null @@ -1,110 +0,0 @@ -# Mux Conversation Context - -Mux preserves workspace transcripts while controlling which messages are active conversation context for the agent. - -## Language - -**Context Reset**: -Starts a new active conversation context while preserving earlier transcript history. -_Avoid_: soft clear, compaction, truncate, clear history - -**Transcript History**: -The persisted record of messages in a workspace, including messages that are no longer active context. -_Avoid_: context, prompt history - -**Active Conversation Context**: -The subset of transcript history eligible to be sent to the agent for the next response. -_Avoid_: chat history, transcript - -**Compaction Boundary**: -A context boundary that carries a provider-visible summary of earlier transcript history. -_Avoid_: context reset - -**Context Reset Boundary**: -A visible separator in transcript history where the active conversation context starts over; older history may be hidden behind a load-older affordance. -_Avoid_: compaction boundary, summary message - -**Agent Carryover State**: -Workspace state outside transcript history that can influence future agent turns. -_Avoid_: hidden context, leftovers - -**Context Boundary**: -A transcript marker that divides provider-eligible context windows without deleting transcript history. -_Avoid_: synthetic assistant message, compacted message - -**Hard Clear**: -A destructive operation that deletes transcript history for the active workspace. -_Avoid_: context reset, soft clear - -**Provider-Eligible Message**: -A transcript message that can contribute content to a future agent request. -_Avoid_: persisted row, visible message - -**Transcript Export**: -A shared or copied representation of transcript history, including visible context boundaries. -_Avoid_: active context export - -## Relationships - -- A **Hard Clear** deletes **Transcript History**. -- A **Context Reset** preserves **Transcript History**. -- A **Context Reset** creates a **Context Reset Boundary**. -- A **Compaction Boundary** is a kind of **Context Boundary**. -- A **Context Reset Boundary** is a kind of **Context Boundary**. -- A **Context Reset Boundary** separates older **Transcript History** from the new **Active Conversation Context**. -- Older **Transcript History** above a **Context Reset Boundary** can be hidden behind load-older history. -- **Active Conversation Context** may be smaller than **Transcript History**. - -- A **Context Reset** clears **Agent Carryover State** so previous work does not influence future agent turns. -- A **Context Reset Boundary** is created only when the current context window contains at least one **Provider-Eligible Message**. -- A **Context Reset Boundary** is visible transcript structure, not conversation content for the agent. - -- Context usage reflects **Active Conversation Context**, not all loaded **Transcript History**. -- Messages above the latest **Context Boundary** are viewable and exportable but cannot directly mutate the current **Active Conversation Context**. -- A **Transcript Export** can include **Transcript History** from above a **Context Reset Boundary**. - -## Example dialogue - -> **Dev:** "After a **Context Reset**, can the agent answer from messages above the **Context Reset Boundary**, or see a hidden note that the reset happened?" -> **Domain expert:** "No — those messages remain in **Transcript History**, and the boundary is visible transcript structure, but neither is part of the agent's **Active Conversation Context**." - -> **Dev:** "Are **Compaction Boundaries** and **Context Reset Boundaries** separate mechanisms?" -> **Domain expert:** "No — both are **Context Boundaries**. A **Compaction Boundary** summarizes earlier history for the agent; a **Context Reset Boundary** does not." - -> **Dev:** "Should `/clear` preserve **Transcript History** now that **Context Reset** exists?" -> **Domain expert:** "No — `/clear` remains a **Hard Clear**. `/clear --soft` performs a **Context Reset**." - -> **Dev:** "If there is no **Transcript History**, should a **Context Reset** create a boundary anyway?" -> **Domain expert:** "No — without earlier history, there is nothing for a **Context Reset Boundary** to separate." - -> **Dev:** "If the user repeats `/clear --soft` before sending another message, should we append another **Context Reset Boundary**?" -> **Domain expert:** "No — repeated resets with no active-context messages are no-op successes." - -> **Dev:** "Should the `/clear --soft` command itself appear as a user message?" -> **Domain expert:** "No — a **Context Reset** is represented by a **Context Reset Boundary**, not by a user prompt." - -> **Dev:** "Can a **Context Reset** happen while the agent is still responding?" -> **Domain expert:** "No — context can only be reset once the active turn has stopped and transcript ordering is stable." - -> **Dev:** "How should users find **Context Reset** outside slash commands?" -> **Domain expert:** "Expose it as a separate command from **Hard Clear**, named around resetting context while preserving history." - -> **Dev:** "What should the visible separator say?" -> **Domain expert:** "Use `Context reset`; avoid labels that imply transcript history was deleted." - -> **Dev:** "Should a **Context Reset Boundary** show when it happened?" -> **Domain expert:** "Persist the timestamp for ordering and audit, but keep the visible separator label simple." - -> **Dev:** "Can a **Context Reset** happen while user input is queued?" -> **Domain expert:** "No — queued input belongs to the old context and must be sent or cleared before resetting." - -> **Dev:** "What happens to pending composer content when a user performs a **Context Reset**?" -> **Domain expert:** "Resetting context starts fresh, so pending composer state should not carry over." - -> **Dev:** "Should partial or aborted messages before a **Context Reset Boundary** be cleaned up?" -> **Domain expert:** "No — they remain **Transcript History** above the boundary, but are outside the new **Active Conversation Context**." - -## Flagged ambiguities - -- "soft clear" is a user-facing command style, not the domain concept; resolved: use **Context Reset** for the behavior. -- "compaction boundary" implies summarization; resolved: use **Context Reset Boundary** for a reset without summarization. diff --git a/Dockerfile b/Dockerfile index 24920f3e72..becaa1d600 100644 --- a/Dockerfile +++ b/Dockerfile @@ -113,6 +113,9 @@ COPY --from=builder /app/dist/*.html ./dist/ COPY --from=builder /app/dist/*.css ./dist/ COPY --from=builder /app/dist/*.js ./dist/ +# Copy TypeScript lib files used by the bundled PTC validator at server startup. +COPY --from=builder /app/dist/typescript-lib ./dist/typescript-lib + # Copy runtime bundles last (most volatile layer during backend iteration). COPY --from=builder /app/dist/runtime ./dist/runtime diff --git a/Makefile b/Makefile index 0035e9e7d3..b5084eb766 100644 --- a/Makefile +++ b/Makefile @@ -282,6 +282,7 @@ verify-docker-runtime-artifacts: build-docker-runtime ## Verify required Docker @test -f dist/runtime/server-bundle.js @test -f dist/runtime/tokenizer.worker.js @test -f dist/static/splash.html + @test -f dist/typescript-lib/lib.es2023.d.ts.txt # Bundle server runtime for Docker image to reduce runtime dependencies/image size. # Depend on build-main explicitly because dist/cli/server.js is emitted as a side effect. diff --git a/bun.lock b/bun.lock index 514abc7ca3..97eca1ace3 100644 --- a/bun.lock +++ b/bun.lock @@ -79,8 +79,10 @@ "posthog-node": "^5.17.0", "quickjs-emscripten": "^0.31.0", "quickjs-emscripten-core": "^0.31.0", + "react": "18.3.1", "react-colorful": "^5.6.1", "react-resizable-panels": "^3.0.6", + "react-dom": "18.3.1", "react-router-dom": "^7.11.0", "recharts": "^2.15.3", "rehype-harden": "^1.1.5", @@ -169,13 +171,11 @@ "postcss": "^8.5.6", "posthog-js": "^1.276.0", "prettier": "^3.6.2", - "react": "^18.2.0", "react-compiler-runtime": "^1.0.0", "react-dnd": "^16.0.1", "react-dnd-html5-backend": "^16.0.1", "react-dnd-test-backend": "^16.0.1", "react-dnd-test-utils": "^16.0.1", - "react-dom": "^18.2.0", "rehype-katex": "^7.0.1", "rehype-raw": "^7.0.0", "remark-gfm": "^4.0.1", diff --git a/docs/hooks/tools.mdx b/docs/hooks/tools.mdx index bde56b6c89..b76c7eccc0 100644 --- a/docs/hooks/tools.mdx +++ b/docs/hooks/tools.mdx @@ -227,12 +227,15 @@ If a value is too large for the environment, it may be omitted (not set). Mux al
-agent_report (2) +agent_report (5) -| Env var | JSON path | Type | Description | -| -------------------------------- | ---------------- | ------ | ----------- | -| `MUX_TOOL_INPUT_REPORT_MARKDOWN` | `reportMarkdown` | string | — | -| `MUX_TOOL_INPUT_TITLE` | `title` | string | — | +| Env var | JSON path | Type | Description | +| --------------------------------------- | ---------------------- | ------- | ------------------------------------------------------------------------------------ | +| `MUX_TOOL_INPUT_REPORT_MARKDOWN` | `reportMarkdown` | string | — | +| `MUX_TOOL_INPUT_REPORT_MARKDOWN_PATH` | `reportMarkdownPath` | string | Path to the markdown report file, usually report.md in the workspace root | +| `MUX_TOOL_INPUT_STRUCTURED_OUTPUT` | `structuredOutput` | unknown | — | +| `MUX_TOOL_INPUT_STRUCTURED_OUTPUT_PATH` | `structuredOutputPath` | string | Path to a JSON file containing the structured output, usually structured-output.json | +| `MUX_TOOL_INPUT_TITLE` | `title` | string | — |
@@ -633,8 +636,8 @@ If a value is too large for the environment, it may be omitted (not set). Mux al | `MUX_TOOL_INPUT_FILTER` | `filter` | string | Optional regex to filter bash task output lines. By default, only matching lines are returned. When filter_exclude is true, matching lines are excluded instead. Non-matching lines are discarded and cannot be retrieved later. | | `MUX_TOOL_INPUT_FILTER_EXCLUDE` | `filter_exclude` | boolean | When true, lines matching 'filter' are excluded instead of kept. Requires 'filter' to be set. | | `MUX_TOOL_INPUT_MIN_COMPLETED` | `min_completed` | number | Number of awaited tasks that must complete before this call returns. Defaults to 1, so by default task_await returns as soon as the FIRST awaited task completes, letting you act on it while the rest keep running. The result still includes every task complete at that moment plus current status (running/queued) for the rest. Tasks that have not yet completed keep running and remain re-awaitable on a later task_await call. Raise this (e.g. set it to the total number of awaited tasks) when you genuinely need more before proceeding — for example best-of-N synthesis that must compare every candidate. Clamped to the number of awaited tasks; values above that behave like 'wait for all'. | -| `MUX_TOOL_INPUT_TASK_IDS_` | `task_ids[]` | string | List of task IDs to await — use only real IDs returned by prior task, bash, or task_list tool results; never fabricate an ID. When omitted, waits for all active descendant tasks of the current workspace. | -| `MUX_TOOL_INPUT_TASK_IDS_COUNT` | `task_ids.length` | number | Number of elements in task_ids (List of task IDs to await — use only real IDs returned by prior task, bash, or task_list tool results; never fabricate an ID. When omitted, waits for all active descendant tasks of the current workspace.) | +| `MUX_TOOL_INPUT_TASK_IDS_` | `task_ids[]` | string | List of task IDs or workflow run IDs to await — use only real IDs returned by prior task, bash, workflow_run, or task_list tool results; never fabricate an ID. When omitted, waits for all active descendant tasks and workflow runs of the current workspace. | +| `MUX_TOOL_INPUT_TASK_IDS_COUNT` | `task_ids.length` | number | Number of elements in task_ids (List of task IDs or workflow run IDs to await — use only real IDs returned by prior task, bash, workflow_run, or task_list tool results; never fabricate an ID. When omitted, waits for all active descendant tasks and workflow runs of the current workspace.) | | `MUX_TOOL_INPUT_TIMEOUT_SECS` | `timeout_secs` | number | Maximum time to wait in seconds for each task. For bash tasks, this waits for NEW output (or process exit). If exceeded, the result returns status=queued\|running\|awaiting_report (task is still active). Defaults to 600 seconds (10 minutes) if not specified. Set to 0 for a non-blocking status check. | @@ -679,6 +682,26 @@ If a value is too large for the environment, it may be omitted (not set). Mux al +
+workflow_read (1) + +| Env var | JSON path | Type | Description | +| --------------------- | --------- | ------ | ----------- | +| `MUX_TOOL_INPUT_NAME` | `name` | string | — | + +
+ +
+workflow_run (3) + +| Env var | JSON path | Type | Description | +| ---------------------------------- | ------------------- | ------- | ----------- | +| `MUX_TOOL_INPUT_ARGS` | `args` | unknown | — | +| `MUX_TOOL_INPUT_NAME` | `name` | string | — | +| `MUX_TOOL_INPUT_RUN_IN_BACKGROUND` | `run_in_background` | boolean | — | + +
+ {/* END TOOL_HOOK_ENV_VARS */} diff --git a/package.json b/package.json index 0a3626e0ca..a530266adf 100644 --- a/package.json +++ b/package.json @@ -121,8 +121,10 @@ "posthog-node": "^5.17.0", "quickjs-emscripten": "^0.31.0", "quickjs-emscripten-core": "^0.31.0", + "react": "18.3.1", "react-colorful": "^5.6.1", "react-resizable-panels": "^3.0.6", + "react-dom": "18.3.1", "react-router-dom": "^7.11.0", "recharts": "^2.15.3", "rehype-harden": "^1.1.5", @@ -211,13 +213,11 @@ "postcss": "^8.5.6", "posthog-js": "^1.276.0", "prettier": "^3.6.2", - "react": "^18.2.0", "react-compiler-runtime": "^1.0.0", "react-dnd": "^16.0.1", "react-dnd-html5-backend": "^16.0.1", "react-dnd-test-backend": "^16.0.1", "react-dnd-test-utils": "^16.0.1", - "react-dom": "^18.2.0", "rehype-katex": "^7.0.1", "rehype-raw": "^7.0.0", "remark-gfm": "^4.0.1", diff --git a/rfc/20260529_dynamic-workflows.md b/rfc/20260529_dynamic-workflows.md new file mode 100644 index 0000000000..17d8dc56d5 --- /dev/null +++ b/rfc/20260529_dynamic-workflows.md @@ -0,0 +1,699 @@ +--- +author: @mux +date: 2026-05-29 +--- + +# Dynamic Workflows for Mux + +Status: Draft + +## Stakeholders + +- [ ] Product Lead: +- [ ] Engineering DRI: +- [ ] CTO: +- [ ] Frontend/UI reviewer: +- [ ] Runtime/task orchestration reviewer: + +## Problem Statement + +Mux already supports parallel agent workflows through tasks, sub-agents, agent skills, programmatic tool calling, goals, and durable chat/task state. However, repeatable multi-agent orchestration still mostly lives in prose instructions, ad-hoc parent-agent reasoning, or one-off tool calls. That makes sophisticated patterns such as deep research, adversarial verification, multi-lane review, and goal-internal control flow harder to reuse, inspect, resume, or explain visually. + +We want Mux to support executable workflow orchestration: plain JavaScript scripts that coordinate sub-agent tasks through a small conductor-only API. A workflow should make the orchestration structure visible to users, preserve durable progress across restarts, support structured sub-agent outputs, and be discoverable like skills without turning skills into executable code. + +The initial product should be a developer-facing experiment with enough durability and observability to dogfood real workflows. It should not start as a polished `/workflows` dashboard product. + +## Glossary + +This RFC relies on these terms: + +- **Workflow Definition**: reusable executable orchestration that coordinates agent work. +- **Workflow Run**: one execution of a workflow definition for a specific request/input. +- **Scratch Workflow Definition**: a generated workflow definition stored for one workflow run without becoming reusable/discoverable. +- **Workflow Promotion**: explicitly saving a scratch workflow definition as a reusable workflow definition. +- **Workflow Primitive**: a conductor operation available inside workflow JavaScript. +- **Workflow Step**: a replayable unit of orchestration progress within a durable workflow run. +- **Durable Workflow Run**: a workflow run that can continue after Mux restarts without losing completed orchestration progress. +- **Workflow Resume**: continuing an interrupted workflow run from durable state. +- **Partial Workflow Recovery**: reusing recoverable workflow steps and rerunning missing/unrecoverable steps. +- **Workflow Result**: final workflow output, including a human-readable report and optional machine-readable data. +- **Structured Task Output**: machine-readable task result requested by a workflow run. +- **Report-Time Validation**: validation when a task submits structured task output as part of its final report. +- **Goal Step**: a unit of progress inside a goal; a workflow run can be a goal step. + +## Goals + +1. Let agents and users run conductor-only JavaScript workflow definitions that coordinate sub-agent tasks. +2. Make workflow runs observable in the main chat with first-class phases, logs, child tasks, validation events, status, and result. +3. Make workflow runs durable and resumable from the first real workflow implementation. +4. Support report-time JSON Schema validation for structured sub-agent outputs. +5. Preserve the existing skill mental model for discovery and precedence while keeping workflow storage and trust boundaries separate. +6. Support dynamic one-off workflow generation and explicit promotion to reusable workflow definitions. +7. Ship built-in deep research as the first showcase workflow. + +## Non-goals + +1. Do not make agent skills executable. +2. Do not expose arbitrary tools such as bash, file editing, web fetch, browser automation, or `mux.*` inside workflow definitions in v1. +3. Do not add TypeScript workflow authoring in v1. +4. Do not build a full workflow dashboard in v1. +5. Do not add workflow-specific concurrency or total-agent caps in v1. +6. Do not implement generic `parallel(fn[])`, nested `workflow(...)`, or workflow-side memory/file-write primitives in v1. +7. Do not make a workflow run automatically create, replace, or complete workspace goals. + +## Proposal Overview + +Add a first-class workflow product layer on top of Mux's task/sub-agent system. + +A workflow definition is plain JavaScript. It runs in a sandboxed coordinator runtime that exposes only conductor primitives such as `agent`, `parallelAgents`, `phase`, and `log`. Hands-on work happens inside spawned tasks, whose transcripts and tools remain visible through existing task infrastructure. + +A workflow run persists a durable journal of steps, emitted events, spawned task IDs, structured outputs, and final result. On resume after interruption or restart, Mux reruns the workflow definition against the journal. Completed steps short-circuit from recorded results. Missing or unrecoverable steps rerun when safe. + +Workflow definitions are discovered from project-local, global, and built-in roots. Project-local definitions are governed by existing project trust. Dynamically generated scratch definitions are saved under Mux-controlled scratch/run storage and can be promoted explicitly to reusable definitions. + +The first built-in workflow is deep research. It should demonstrate scoping, source gathering, cross-checking, adversarial refutation, structured output validation, and final synthesis. + +## UX & Design + +### Invocation + +Workflow runs should be launched primarily from chat/tool-call interactions. + +Once workflow definitions exist, they should be discoverable like skills: + +- Slash invocation explicitly starts a workflow run: `/deep-research browser automation`. +- Inline `$name` references include/identify a workflow definition in chat context. They start a run only when the surrounding user request clearly asks to run that workflow. + +Examples: + +- `/deep-research browser automation` starts a workflow run. +- `Run $deep-research on browser automation` starts a workflow run. +- `Can you improve $deep-research?` references the workflow definition for inspection/editing; it does not run it. +- `Compare $deep-research and $bug-hunt` references definitions; it does not run them. + +Slash invocation should stay simple in v1. Do not add a `--background` slash flag initially. Tool-based launch should expose `run_in_background` for the agent. + +### Workflow run card + +The first version should include a lightweight first-class workflow run card in the launching chat. The card should show: + +- Workflow name and source. +- Run status: running, completed, failed, interrupted, waiting, or backgrounded. +- Current and completed phases. +- Child tasks and their statuses. +- Workflow logs emitted by the coordinator. +- Structured output validation successes and failures. +- Final workflow result or error. +- A promotion affordance for scratch workflows. +- Resume/interrupt actions for durable runs. + +A full workflow dashboard is an eventual requirement, not a v1 requirement. + +### Foreground and background behavior + +Workflow runs support foreground and background execution. + +- Tool launch includes `run_in_background`, defaulting to `false`, matching bash and task behavior. +- Slash-launched workflows start in the foreground. +- If the user sends a follow-up message or manually starts another workflow while a foreground workflow is active, Mux should move the first run to the background rather than blocking the conversation. +- Background workflow runs remain visible through their run cards and can later be queried, awaited, resumed, or integrated by the parent agent. + +### Dynamic generation and promotion + +Agents can generate one-off scratch workflow definitions for a specific request. Those generated scripts should be written to a Mux-controlled scratch/run location under Mux home, not automatically saved into project or global workflow roots. + +Scratch workflow definitions are durable for their run. The run record must retain the script content or a stable reference to the scratch script so recovery can replay it. + +Scratch definitions are not discoverable as reusable slash/inline workflows until the user promotes them. Promotion should open a naming/location flow where the user explicitly chooses project-local or global storage. Do not default the location. + +### Future drilldown + +Eventually, users should be able to drill into running child tasks from a workflow run card, inspect stuck agents, resume child tasks where possible, or manually prompt/intervene. The run card should not block those future interactions, but detailed task-intervention UI can come after the first workflow release. + +## Operational Scenarios + +### Deep research + +A user asks Mux to research an unfamiliar technical topic. The parent agent dynamically creates or selects a deep research workflow. The workflow: + +1. Scopes the topic. +2. Fans out source discovery to multiple sub-agents. +3. Asks source-reading agents to return structured source summaries. +4. Runs adversarial verification agents that refute or qualify claims. +5. Synthesizes a final report with structured claims, sources, and confidence. + +The run card shows each phase, spawned tasks, validation events, logs, and final synthesis. + +### Crash and resume + +A workflow has completed source discovery and spawned verification tasks. Mux restarts. On startup or explicit resume, the same workflow run continues from its durable journal. Completed steps short-circuit. Running/interrupted child tasks resume in place where possible. Missing or corrupt step records rerun through partial workflow recovery. + +The workflow does not spawn duplicate agents for completed steps. + +### Goal-internal workflow + +A user sets a goal to complete a long-running project objective. The agent uses a workflow as control flow inside the goal loop. The workflow delegates work, verification, correctness review, and progress assessment, then returns a workflow result to the agent. The goal remains the ongoing loop; the workflow is a goal step, not the goal itself. + +### Scratch promotion + +An agent generates a one-off research workflow for a narrow question. After the run succeeds, the user clicks “Save workflow,” enters a name/description, chooses project-local or global storage, and promotes the scratch workflow definition. The promoted definition appears in slash and `$` suggestions according to normal discovery/precedence rules. + +## Requirements + +### Initial Functional Requirements + +#### Workflow definition authoring + +- Workflow definitions are plain `.js` files in v1. +- TypeScript authoring is deferred; future TypeScript/Zod layers can compile down to JavaScript and JSON Schema. +- Workflow scripts run in a sandboxed coordinator runtime. +- Workflow scripts must not have direct filesystem, shell, network, or `mux.*` tool access in v1. + +#### Workflow primitives + +Expose these conductor-only primitives in v1: + +- `agent(spec)` — spawn one task and, by default, wait for its report. +- `backgroundAgent(spec)` — spawn one task and return a handle without waiting. +- `awaitAgents(handles, opts?)` — await one or more task handles. +- `parallelAgents(specs, opts?)` — spawn a group of tasks and wait for reports. +- `phase(name, details?)` — emit workflow progress. +- `log(message, data?)` — emit lightweight workflow diagnostics. +- `args` — read-only workflow input. +- `limits` or `budget` — read-only run caps. + +Represent variants and best-of-style fan-out with `parallelAgents(...)` patterns in v1. Keep existing task-tool `n` and `variants` available to ordinary agents, but do not add dedicated workflow primitives yet. + +#### Durable step identity + +- Replay-boundary primitives require stable IDs. +- `agent(...)`, `backgroundAgent(...)`, and `parallelAgents(...)` create durable replay boundaries and need stable author-provided identities. +- `phase(...)` and `log(...)` do not require author-provided IDs. +- Missing IDs during normal execution should fail fast as workflow authoring errors. +- Missing or corrupted persisted results during recovery should trigger partial workflow recovery when safe. + +Example: + +```js +const scope = agent({ + id: "scope-topic", + title: "Scope topic", + agent: "explore", + prompt: `Scope this research topic: ${args.topic}`, +}); + +const reports = parallelAgents({ + id: "verify-claims", + items: claims, + key: (claim) => claim.id, + task: (claim) => ({ + title: `Verify ${claim.id}`, + agent: "explore", + prompt: `Verify or refute this claim: ${claim.text}`, + }), +}); +``` + +#### Structured task output + +Workflow task primitives may include `outputSchema`. + +- `outputSchema` is a JSON Schema object literal. +- The initial schema subset should include `type`, `properties`, `required`, `items`, `enum`, `minItems`, `maxItems`, `minLength`, `maxLength`, and `additionalProperties`. +- When a workflow task has an output schema, the child task must submit both `reportMarkdown` and `structuredOutput` through its final report tool. +- Mux validates `structuredOutput` at report time before accepting the report. +- If validation fails, the final report tool call returns a validation error inside the child task. The child remains active and can call the report tool again. +- The workflow receives only validated structured output. + +Example schema: + +```js +const result = agent({ + id: "find-claims", + title: "Find key claims", + agent: "explore", + prompt: "Find five claims that need verification...", + outputSchema: { + type: "object", + required: ["findings"], + additionalProperties: false, + properties: { + findings: { + type: "array", + minItems: 5, + items: { + type: "object", + required: ["claim", "evidence", "confidence"], + additionalProperties: false, + properties: { + claim: { type: "string", minLength: 1 }, + evidence: { type: "array", items: { type: "string" } }, + confidence: { enum: ["low", "medium", "high"] }, + }, + }, + }, + }, + }, +}); +``` + +#### Workflow result + +The minimum v1 result contract is: + +```js +{ + reportMarkdown: string, + structuredOutput?: unknown, +} +``` + +If a workflow returns a string, Mux may treat it as `reportMarkdown`. The run card displays `reportMarkdown`; parent agents and goal loops can consume `structuredOutput`. + +#### Storage and discovery + +Workflow definitions are stored separately from agent skills. + +Initial roots: + +1. Project-local: `.mux/workflows/.js` +2. Global user-private: `~/.mux/workflows/.js` +3. Built-in: workflow definitions shipped with Mux + +Discovery and precedence should mirror skills where possible: + +1. Project-local wins. +2. Global wins over built-in. +3. Built-in is fallback. + +Project-local definitions can override built-ins when the project is trusted. + +Scratch workflow definitions should be stored under Mux-controlled scratch/run storage and are not included in reusable discovery until promoted. + +#### Trust + +Workflow trust piggybacks on existing project trust in v1. + +- Built-in workflow definitions are trusted by default. +- Global user-private definitions are treated like user-controlled Mux configuration. +- Project-local definitions are repo-controlled executable orchestration and are governed by existing project trust. +- Untrusted projects must not execute `.mux/workflows/*`. +- Discovery can omit untrusted project-local workflows or show them disabled with a trust-project affordance. + +Do not add per-workflow or per-content-hash approval in v1. + +#### Interrupt and resume + +- Use **resume**, not restart, as the primary continuation concept. +- Interrupting a workflow run stops the coordinator and cascade-interrupts active child tasks while preserving durable workflow state, completed step results, and interrupted task workspaces where possible. +- Resume continues the same workflow run ID from its durable journal. +- Completed steps short-circuit from recorded results. +- Interrupted child task workspaces resume in place when possible. +- Missing or unrecoverable steps rerun through partial workflow recovery. +- Reserve restart for a future explicit “start over as a new run” action. + +#### Goals + +- Workflows and goals are complementary. +- A goal is the ongoing objective loop. +- A workflow run is control flow inside that loop and can be a goal step. +- Workflow work, child tasks, costs, and results should be attributable to the active goal when launched inside one. +- A workflow run should not automatically create, replace, or complete a goal in v1. + +#### Built-in workflow focus + +- Ship deep research as the first built-in workflow. +- Defer deep review until deep research proves the runtime, durable replay, structured outputs, and run-card model. + +### Initial Non-functional Requirements + +#### Reliability + +- Workflow runs must be durable across Mux restarts/crashes. +- Recovery must not duplicate completed agent tasks. +- Recovery should be partial rather than all-or-nothing. +- Corrupt/missing step records should be isolated where possible; intact steps should still be reused. + +#### Security + +- The workflow coordinator is conductor-only in v1. +- Project-local workflows are gated by project trust. +- Workflow definitions should be treated as executable code, not documentation. +- Do not silently execute skills. + +#### Observability + +- Workflow state, phases, logs, child tasks, structured output validation, errors, and results must be visible through the run card. +- Background workflow runs must remain discoverable from the launching chat. +- The run store should support future dashboard/list views. + +#### Usability + +- Discovery/precedence should match skill intuition as much as possible. +- Slash invocation should be simple. +- `$name` references must not implicitly execute code unless the user clearly asks to run the workflow. +- Promotion from scratch to reusable workflow must require explicit user action and explicit location choice. + +#### Performance and cost + +- Do not add workflow-specific concurrency caps in v1. +- Use the existing global task queue/settings. +- Raise the default global `maxParallelAgentTasks` from 3 to 16 so workflow fan-out feels meaningfully parallel by default. +- Keep architecture open for future workflow-level budgets/caps if customers need them. + +### Eventual Requirements + +Future versions should be able to add: + +- Full workflow dashboard/run list. +- Command-palette workflow discovery and run management. +- TypeScript authoring or a richer schema DSL that compiles to v1 primitives. +- Generic `parallel(fn[])` if the runtime can safely support it. +- Nested workflow calls. +- Carefully scoped workflow-only memory/write primitives. +- Workflow-level concurrency/cost budgets. +- Per-task drilldown, intervention, and resume controls from the workflow card. +- Deep review as a built-in or refactored workflow. + +## Scope + +### In scope for the initial RFC direction + +- Workflow domain model and storage boundaries. +- Workflow runtime permission model. +- Initial conductor primitives. +- Structured task output validation. +- Durable run and partial recovery semantics. +- Lightweight run card behavior. +- Invocation/discovery/promotion/trust semantics. +- Built-in deep research focus. + +### Out of scope until the implementation plan + +- Exact class/file names. +- Exact database/file format schema. +- Exact IPC/oRPC surface. +- Exact React component hierarchy. +- Migration details for existing task/session artifacts. +- Final deep research prompt text. +- Exhaustive tests and issue breakdown. + +## Architecture + +### Proposed services + +Introduce a first-class workflow layer rather than stretching `code_execution` into a product feature. + +Recommended service boundaries: + +- `WorkflowDefinitionStore` + - Discovers project-local, global, built-in, and scratch definitions. + - Applies precedence and trust gates. + - Separates reusable definitions from scratch definitions. + +- `WorkflowRunStore` + - Persists workflow run metadata, status, events, final result, errors, and child task links. + - Owns the durable step/result journal. + - Stores enough definition identity/content to resume scratch runs. + +- `WorkflowRunner` + - Executes plain JavaScript workflow definitions in a sandboxed coordinator runtime. + - Exposes conductor primitives. + - Replays against the run journal on resume. + - Fails fast on authoring errors such as missing durable step IDs. + +- `WorkflowEventBus` + - Emits run status, phase, log, child-task, validation, result, and error events to the UI. + +- `TaskService` adapter + - Spawns child tasks. + - Awaits reports. + - Validates structured outputs through the final report path. + - Interrupts/resumes child task workspaces where possible. + +### Runtime model + +The workflow runtime can reuse the sandbox substrate used by programmatic tool calling, but it should expose a different API. PTC exposes model tools under `mux.*` for batching tool calls; workflows expose a conductor API for durable orchestration. The two features should not be conflated. + +In v1, workflow scripts have no direct access to Node, shell, filesystem, network, or arbitrary Mux tools. All side-effectful work happens inside tasks. + +### Durable run storage sketch + +The exact paths should be finalized during implementation planning, but the storage model should support: + +- Workspace/session-scoped run records under Mux home. +- A run metadata file. +- An append-only or recoverable event log. +- A step/result journal keyed by stable step IDs and input hashes. +- A stored copy or stable reference for scratch workflow definitions. +- Child task ID links and accepted report artifacts. + +A plausible shape: + +```text +~/.mux/sessions//workflows// + run.json + definition.js # for scratch or captured executable content + events.jsonl + steps.jsonl +``` + +Reusable definitions remain outside run storage: + +```text +/.mux/workflows/.js +~/.mux/workflows/.js +src/node/builtinWorkflows/.js # source-of-truth location TBD +``` + +### Replay algorithm sketch + +For each replay-boundary primitive call: + +1. Validate that the call has a stable ID. +2. Normalize durable input data for the call. +3. Look up a completed journal entry by step ID and input identity. +4. If a valid result exists, return it without spawning work. +5. If no valid result exists, execute the primitive. +6. Persist the started/completed state and result. +7. On validation or persistence failure, surface an error or use partial recovery where safe. + +For recovery with partial corruption: + +- Reuse intact completed steps. +- Rerun missing steps. +- Rerun corrupt/unrecoverable steps when safe. +- Preserve completed downstream steps when their identities and inputs remain valid. +- Do not fail the whole workflow solely because one step record is missing. + +### Structured task report path + +The existing sub-agent report contract will need to grow from Markdown-only reporting to optional structured reporting for workflow-spawned tasks. + +Conceptual report input for schema-constrained tasks: + +```ts +{ + reportMarkdown: string; + title?: string | null; + structuredOutput?: unknown; +} +``` + +The child task should receive a final report tool schema that reflects the requested output schema. If its submitted structured output fails validation, the tool returns a validation error and the task remains active. + +This is intentionally stronger than extracting JSON from Markdown after the fact. + +### Invocation and discovery flow + +- Workflow discovery reuses skill-like ordering and UI intuition. +- Workflow descriptors must be distinguishable from skill descriptors in UI. +- Slash invocation starts a workflow run. +- `$name` creates a workflow reference; execution depends on user intent. +- Scratch workflows are not discoverable until promoted. + +### Trust flow + +Project-local workflow discovery/execution must consult project trust, matching existing repo-controlled Mux script/config behavior. + +If a project is untrusted: + +- Do not execute project-local workflow definitions. +- Prefer omitting them from suggestions; if shown, render disabled with a trust-project action. + +### Goal attribution flow + +When a workflow run starts in a workspace with an active goal, the run and its child tasks should be attributable to that goal. The workflow result should feed back into the active goal loop. Goal completion remains governed by existing goal mechanisms. + +## Phases + +This RFC intentionally stops short of a detailed implementation plan. A later plan should break these down into small, testable slices. + +1. **Workflow domain and run store** + - Define workflow run metadata, event log, step journal, and scratch definition persistence. + +2. **Conductor runtime and primitives** + - Execute plain JavaScript workflow definitions with conductor-only APIs. + - Implement stable step ID enforcement and replay lookup. + +3. **Task integration and structured output** + - Extend workflow-spawned task reporting to support report-time structured output validation. + - Return validated task outputs to workflow primitives. + +4. **Run card and events** + - Render lightweight workflow run cards in chat. + - Stream phases, logs, task links, validation events, result, and error state. + +5. **Discovery, invocation, trust, and promotion** + - Add workflow definition discovery roots, skill-like precedence, slash/inline references, project trust gating, scratch definitions, and promotion UI. + +6. **Durable resume and partial recovery** + - Resume interrupted/restarted runs from the durable journal. + - Reuse completed steps and rerun missing/unrecoverable steps. + - Cascade interrupt/resume child task workspaces where possible. + +7. **Built-in deep research** + - Ship a built-in deep research workflow and dogfood it heavily before adding more built-ins. + +## Dogfooding and Validation + +Before treating workflows as a productized feature, dogfood deep research end to end. + +### Required dogfood scenarios + +1. **Novel topic research** + - Start from a broad topic. + - Scope, fan out, verify, refute, and synthesize. + - Confirm final report includes claims, sources, confidence, and caveats. + +2. **Adversarial validation** + - Have one lane produce claims and another lane refute/qualify them. + - Verify the run card makes disagreement/refutation understandable. + +3. **Crash/resume** + - Interrupt or restart Mux mid-run. + - Resume the same workflow run. + - Confirm completed steps are not duplicated and missing steps recover. + +4. **Structured output validation failure** + - Force a child task to submit invalid structured output. + - Confirm the report tool returns a validation error and the child can resubmit. + +5. **Foreground to background** + - Start a slash-invoked foreground workflow. + - Send another user message or start another workflow. + - Confirm the first workflow moves to background and remains observable. + +6. **Scratch promotion** + - Generate a dynamic scratch workflow. + - Promote it with explicit name/location choice. + - Confirm discovery/precedence/trust behavior after promotion. + +### Evidence to capture + +Each dogfood pass should produce reviewer-visible evidence: + +- Prompt used. +- Workflow script or definition source. +- Run transcript and run-card screenshots. +- Spawned task list and task transcripts. +- Structured validation events. +- Final workflow result. +- Screenshots of promotion/resume/interrupt UI where applicable. +- Short screen recording for visual workflow behavior and recovery paths. + +### Automated validation targets + +The implementation plan should include targeted tests for: + +- Definition discovery and precedence. +- Project trust gating. +- Slash vs `$name` semantics. +- Stable step ID enforcement. +- Replay short-circuiting completed steps. +- Partial recovery for missing/corrupt step records. +- Report-time structured output validation and retry. +- Foreground/background transitions. +- Workflow run card projection from events. +- Goal attribution. + +## Alternatives Considered + +### Reuse skills as executable workflows + +Rejected. Skills are reusable instruction/reference playbooks. Making them executable would blur a key trust boundary and undermine progressive disclosure semantics. + +### Use PTC `code_execution` directly as workflows + +Rejected for product shape. PTC is useful substrate, but workflows need durable run identity, step journaling, resume/recovery, phase/log events, trust, discovery, promotion, and a first-class run card. + +### TypeScript workflow definitions in v1 + +Deferred. Plain JavaScript avoids an authoring compile pipeline, source maps, dependency resolution, and a larger trust surface. TypeScript can be layered on later. + +### Direct tools inside workflow scripts + +Rejected for v1. Workflows should coordinate; tasks should execute. This keeps side effects visible in task transcripts and makes durable replay safer. + +### Per-workflow content-hash approvals + +Rejected for v1. Existing project trust already gates repo-controlled Mux scripts and configuration. Project-local workflow definitions should use the same project trust model initially. + +### Workflow-specific concurrency caps + +Rejected for v1. Existing task settings and queueing should govern concurrency. Raise the global default parallel task limit to 16 instead of adding arbitrary workflow-only caps. + +### Dedicated best-of/variant primitives + +Deferred. `parallelAgents(...)` can express v1 variants and best-of-style fan-out. Dedicated primitives should wait until built-in workflows reveal stable semantics. + +### Deep review as the first built-in + +Deferred. Deep research is more novel, cleaner for conductor-only orchestration, and better for visually proving phases, cross-checking, structured outputs, and adversarial verification. + +## Open Questions + +These should be resolved during implementation planning, not before this RFC can guide the plan: + +1. Exact on-disk run-store paths and file formats. +2. Exact oRPC/IPC API surface for workflow discovery, launch, events, interrupt, resume, and promotion. +3. Exact UI treatment for disabled untrusted project-local workflows in suggestions. +4. Exact child-task prompt/report-tool injection for output schemas. +5. Exact lifecycle for cleaning old scratch workflow definitions and old run journals. +6. Exact default built-in deep research script and structured output schema. +7. Exact migration behavior if a workflow definition changes while an old run is resumable. + +## Evidence Map + +Repo facts verified during the grilling session: + +- Agent skills are file-based playbooks with skill-like discovery roots and precedence: `docs/agents/agent-skills.mdx`, `src/node/services/agentSkills/agentSkillsService.ts`. +- Current PTC uses sandboxed JavaScript and exposes synchronous-looking `mux.*` tools: `src/node/services/tools/code_execution.ts`. +- Task tooling supports foreground/background runs and grouped task spawning: `src/node/services/tools/task.ts`, `src/common/utils/tools/toolDefinitions.ts`. +- Task settings currently default `maxParallelAgentTasks` to 3 and allow up to 256: `src/common/config/schemas/taskSettings.ts`, `src/common/types/tasks.ts`. +- Project trust is a per-project config bit used to gate repo-controlled scripts/config: `src/node/utils/projectTrust.ts`, `src/common/schemas/project.ts`, `src/browser/features/Settings/Sections/SecuritySection.tsx`. +- Project trust gates `.mux/init`, `.mux/tool_env`, tool hooks, git hooks, task/workspace creation, and project-local MCP config: `src/node/runtime/initHook.ts`, `src/node/services/hooks.ts`, `src/node/services/tools/bash.ts`, `src/node/services/mcpConfigService.ts`, `src/node/services/workspaceService.ts`, `src/node/services/taskService.ts`. +- Sub-agent final reports currently accept Markdown and optional title only: `src/common/utils/tools/toolDefinitions.ts`, `src/node/services/tools/agent_report.ts`. +- Task interruption currently has a preserved-interruption path distinct from destructive task termination: `src/node/services/workspaceService.ts`, `src/node/services/taskService.ts`, `src/node/services/tools/task_terminate.ts`. +- Chat/tool-call crash resilience uses `partial.json` and `chat.jsonl`: `src/node/services/historyService.ts`, `src/node/services/streamManager.ts`. + +## Decision Ledger + +| Decision | Status | Rationale | +| ---------------------------------------------------------------- | -------: | ---------------------------------------------------------------- | +| Use separate workflow definitions, not executable skills | Accepted | Preserves skill trust/progressive-disclosure semantics. | +| Start as developer-facing experiment, not full dashboard | Accepted | Learn orchestration value before polishing product shell. | +| Plain JavaScript authoring in v1 | Accepted | Matches sandbox substrate and avoids compile pipeline. | +| Conductor-only workflow runtime | Accepted | Keeps side effects in task transcripts and replay safer. | +| Report-time structured task output validation | Accepted | Gives workflows a real programmatic contract. | +| JSON Schema object literals for task output schemas | Accepted | Dependency-light, serializable, future TS/Zod can compile to it. | +| Lightweight workflow run card in chat | Accepted | Needed for trust/observability without full dashboard. | +| Foreground/background behavior mirrors bash/task | Accepted | Matches existing Mux UX and agent control. | +| Durable workflow runs from first real implementation | Accepted | A crash/restart should not lose orchestration progress. | +| Explicit stable IDs for replay-boundary primitives | Accepted | Durable replay cannot rely on fragile call order. | +| Partial workflow recovery | Accepted | Recovery should reuse intact steps and rerun missing work. | +| Project trust governs project-local workflows | Accepted | Reuses existing repo-controlled script trust model. | +| Scratch workflows are one-off by default and promotable | Accepted | Enables dynamic generation without polluting reusable roots. | +| No workflow-specific caps in v1; raise global task default to 16 | Accepted | Keeps limits simple while enabling fan-out. | +| Workflows can be goal steps | Accepted | Goals are loops; workflows are control flow inside loops. | +| Deep research first built-in | Accepted | Best showcase for novel value and conductor-only orchestration. | diff --git a/src/browser/contexts/CommandRegistryContext.tsx b/src/browser/contexts/CommandRegistryContext.tsx index ad5138072b..92ae18ffa0 100644 --- a/src/browser/contexts/CommandRegistryContext.tsx +++ b/src/browser/contexts/CommandRegistryContext.tsx @@ -62,6 +62,10 @@ interface CommandRegistryContextValue { const CommandRegistryContext = createContext(null); +export function useOptionalCommandRegistry(): CommandRegistryContextValue | null { + return useContext(CommandRegistryContext); +} + export function useCommandRegistry(): CommandRegistryContextValue { const ctx = useContext(CommandRegistryContext); if (!ctx) throw new Error("useCommandRegistry must be used within CommandRegistryProvider"); diff --git a/src/browser/features/ChatInput/CommandSuggestions.test.tsx b/src/browser/features/ChatInput/CommandSuggestions.test.tsx index c0b5b43220..7eb7d998f5 100644 --- a/src/browser/features/ChatInput/CommandSuggestions.test.tsx +++ b/src/browser/features/ChatInput/CommandSuggestions.test.tsx @@ -43,6 +43,19 @@ describe("CommandSuggestions", () => { globalThis.document = undefined as unknown as Document; }); + it("renders workflow suggestions with a distinct badge", () => { + const { getByText } = render( + undefined} + onDismiss={() => undefined} + isVisible + /> + ); + + expect(getByText("Workflow")).toBeTruthy(); + }); + it("preserves the selected suggestion by id when suggestions reorder", () => { const initialSuggestions = [makeSuggestion("a"), makeSuggestion("b"), makeSuggestion("c")]; const nextSuggestions = [makeSuggestion("c"), makeSuggestion("a"), makeSuggestion("b")]; diff --git a/src/browser/features/ChatInput/CommandSuggestions.tsx b/src/browser/features/ChatInput/CommandSuggestions.tsx index 41e99bb549..818582ddbe 100644 --- a/src/browser/features/ChatInput/CommandSuggestions.tsx +++ b/src/browser/features/ChatInput/CommandSuggestions.tsx @@ -67,6 +67,30 @@ function HighlightedText({ return {parts}; } +const SUGGESTION_KIND_BADGES = { + workflow: { label: "Workflow", className: "text-plan-mode" }, + skill: { label: "Skill", className: "text-medium" }, +} satisfies Partial< + Record, { label: string; className: string }> +>; + +function SuggestionKindBadge(props: { kind: SlashSuggestion["kind"] }) { + if (props.kind !== "workflow" && props.kind !== "skill") { + return null; + } + const badge = SUGGESTION_KIND_BADGES[props.kind]; + return ( + + {badge.label} + + ); +} + // Props interface interface CommandSuggestionsProps { suggestions: SlashSuggestion[]; @@ -285,6 +309,7 @@ export const CommandSuggestions: React.FC = ({ > +
= (props) => { const creationProject = variant === "creation" ? userProjects.get(creationParentProjectPath) : undefined; const [thinkingLevel] = useThinkingLevel(); + const dynamicWorkflowsExperimentEnabled = useExperimentValue(EXPERIMENT_IDS.DYNAMIC_WORKFLOWS); const workspaceHeartbeatsExperimentEnabled = useExperimentValue( EXPERIMENT_IDS.WORKSPACE_HEARTBEATS ); @@ -272,6 +279,7 @@ const ChatInputInner: React.FC = (props) => { asyncCommandScopeRef.current = { variant, workspaceId }; }, [variant, workspaceId]); + const store = useWorkspaceStoreRaw(); const workspaceSidebarState = useOptionalWorkspaceSidebarState(workspaceId); const workspaceGoal = workspaceSidebarState?.goal ?? null; @@ -367,6 +375,8 @@ const ChatInputInner: React.FC = (props) => { const [atMentionSuggestions, setAtMentionSuggestions] = useState([]); const [showSkillSuggestions, setShowSkillSuggestions] = useState(false); const [skillSuggestions, setSkillSuggestions] = useState([]); + const projectedWorkflowRunCardKeysRef = useRef(new Set()); + const workflowsRequestIdRef = useRef(0); const agentSkillsRequestIdRef = useRef(0); const atMentionDebounceRef = useRef | null>(null); const atMentionRequestIdRef = useRef(0); @@ -383,6 +393,9 @@ const ChatInputInner: React.FC = (props) => { const [showSymbolSuggestions, setShowSymbolSuggestions] = useState(false); const [symbolSuggestions, setSymbolSuggestions] = useState([]); const lastSymbolQueryRef = useRef(""); + const [workflowDefinitionDescriptors, setWorkflowDefinitionDescriptors] = useState< + WorkflowDefinitionDescriptor[] + >([]); const [agentSkillDescriptors, setAgentSkillDescriptors] = useState([]); const [toast, setToast] = useState(null); // State for destructive command confirmation modal (currently only /clear). @@ -890,6 +903,7 @@ const ChatInputInner: React.FC = (props) => { subProjectPath: creationSubProjectPath, onWorkspaceCreated: props.onWorkspaceCreated, message: creationNameMessage, + dynamicWorkflowsEnabled: dynamicWorkflowsExperimentEnabled, draftId: props.pendingDraftId, userModel: preferredModel, } @@ -1468,15 +1482,24 @@ const ChatInputInner: React.FC = (props) => { useLayoutEffect(() => { const suggestions = getSlashCommandSuggestions(input, { agentSkills: agentSkillDescriptors, + workflows: dynamicWorkflowsExperimentEnabled ? workflowDefinitionDescriptors : [], variant, isExperimentEnabled: (experimentId) => resolveSlashCommandExperimentValue(experimentId, { workspaceHeartbeats: workspaceHeartbeatsExperimentEnabled, + dynamicWorkflows: dynamicWorkflowsExperimentEnabled, }), }); setCommandSuggestions((prev) => replaceSuggestions(prev, suggestions)); setShowCommandSuggestions(suggestions.length > 0); - }, [input, agentSkillDescriptors, variant, workspaceHeartbeatsExperimentEnabled]); + }, [ + input, + agentSkillDescriptors, + workflowDefinitionDescriptors, + variant, + workspaceHeartbeatsExperimentEnabled, + dynamicWorkflowsExperimentEnabled, + ]); // Watch input/cursor for `\symbol` backslash commands and surface the menu. useLayoutEffect(() => { @@ -1508,9 +1531,75 @@ const ChatInputInner: React.FC = (props) => { isExperimentEnabled: (experimentId) => resolveSlashCommandExperimentValue(experimentId, { workspaceHeartbeats: workspaceHeartbeatsExperimentEnabled, + dynamicWorkflows: dynamicWorkflowsExperimentEnabled, }), }); + // Load workflow definitions for slash suggestions and slash invocation. + useEffect(() => { + let isMounted = true; + const requestId = ++workflowsRequestIdRef.current; + + const loadWorkflows = async () => { + const discoveryInput = + variant === "workspace" && workspaceId + ? { workspaceId } + : variant === "creation" && atMentionProjectPath + ? { projectPath: atMentionProjectPath } + : null; + + if (!api || !discoveryInput || !dynamicWorkflowsExperimentEnabled) { + if (isMounted && workflowsRequestIdRef.current === requestId) { + setWorkflowDefinitionDescriptors([]); + } + return; + } + + try { + const workflows = await api.workflows.listDefinitions(discoveryInput); + const discoveryWorkspaceId = variant === "workspace" && workspaceId ? workspaceId : null; + const runs = + discoveryWorkspaceId != null + ? await api.workflows.listRuns({ workspaceId: discoveryWorkspaceId }) + : []; + if (!isMounted || workflowsRequestIdRef.current !== requestId) { + return; + } + setWorkflowDefinitionDescriptors(Array.isArray(workflows) ? workflows : []); + if (discoveryWorkspaceId == null) { + return; + } + const muxMessages = store.getWorkspaceState(discoveryWorkspaceId).muxMessages; + for (const run of runs) { + const projection = getWorkflowRunCardProjection(muxMessages, run); + if (!projection.shouldProject) { + continue; + } + const cardKey = `${discoveryWorkspaceId}:${run.id}:${run.updatedAt}:${run.status}`; + if (projectedWorkflowRunCardKeysRef.current.has(cardKey)) { + continue; + } + projectedWorkflowRunCardKeysRef.current.add(cardKey); + addWorkflowRunCardMessageForRun(discoveryWorkspaceId, run, { + existingMessage: projection.existingMessage, + }); + } + } catch (error) { + console.error("Failed to load workflow definitions:", error); + if (!isMounted || workflowsRequestIdRef.current !== requestId) { + return; + } + setWorkflowDefinitionDescriptors([]); + } + }; + + void loadWorkflows(); + + return () => { + isMounted = false; + }; + }, [api, variant, workspaceId, atMentionProjectPath, dynamicWorkflowsExperimentEnabled, store]); + // Load agent skills for suggestions useEffect(() => { let isMounted = true; @@ -2011,6 +2100,8 @@ const ChatInputInner: React.FC = (props) => { variant, workspaceId: commandWorkspaceId, projectPath: commandProjectPath, + rawInput: restoreInput, + dynamicWorkflowsEnabled: dynamicWorkflowsExperimentEnabled, openSettings: open, currentModel: workspaceSidebarState?.currentModel ?? null, sendMessageOptions: commandSendMessageOptions, @@ -2255,6 +2346,7 @@ const ChatInputInner: React.FC = (props) => { const { parsed, skillInvocation } = await parseCommandWithSkillInvocation({ messageText, agentSkillDescriptors, + workflowDefinitions: dynamicWorkflowsExperimentEnabled ? workflowDefinitionDescriptors : [], api, discovery: skillDiscovery, }); @@ -2268,15 +2360,20 @@ const ChatInputInner: React.FC = (props) => { // Route to creation handler for creation variant if (variant === "creation") { - const initialGoalCommand = parsed?.type === "goal-set" ? parsed : undefined; - if (!initialGoalCommand) { + const initialSlashCommand = + parsed?.type === "goal-set" || + (parsed?.type === "workflow-run" && dynamicWorkflowsExperimentEnabled) + ? parsed + : undefined; + if (!initialSlashCommand) { const commandHandled = await executeParsedCommand(parsed, input); if (commandHandled) { return; } } - let creationMessageTextForSend = initialGoalCommand?.objective ?? messageText; + let creationMessageTextForSend = + initialSlashCommand?.type === "goal-set" ? initialSlashCommand.objective : messageText; let creationOptionsOverride: Partial | undefined; if (skillInvocation) { @@ -2321,7 +2418,7 @@ const ChatInputInner: React.FC = (props) => { creationMessageTextForSend, creationFileParts.length > 0 ? creationFileParts : undefined, creationOptionsOverride, - initialGoalCommand + initialSlashCommand ); if (creationResult.success) { diff --git a/src/browser/features/ChatInput/useCreationWorkspace.test.tsx b/src/browser/features/ChatInput/useCreationWorkspace.test.tsx index c99797e848..a9c414eb6a 100644 --- a/src/browser/features/ChatInput/useCreationWorkspace.test.tsx +++ b/src/browser/features/ChatInput/useCreationWorkspace.test.tsx @@ -246,6 +246,10 @@ type WorkspaceGetGoalArgs = Parameters[0]; type WorkspaceGetGoalResult = Awaited>; type WorkspaceSetGoalArgs = Parameters[0]; type WorkspaceSetGoalResult = Awaited>; +type WorkflowStartArgs = Parameters[0]; +type WorkflowStartResult = Awaited>; +type WorkflowGetRunArgs = Parameters[0]; +type WorkflowGetRunResult = Awaited>; type WorkspaceCreateResult = Awaited>; type NameGenerationArgs = Parameters[0]; type NameGenerationResult = Awaited>; @@ -257,6 +261,7 @@ type MockOrpcWorkspaceClient = Pick< APIClient["workspace"], "sendMessage" | "create" | "updateAgentAISettings" | "getGoal" | "setGoal" >; +type MockOrpcWorkflowsClient = Pick; type MockOrpcNameGenerationClient = Pick; type WindowWithApi = Window & typeof globalThis; type WindowApi = WindowWithApi["api"]; @@ -276,6 +281,7 @@ const noopUnsubscribe = () => () => undefined; interface MockOrpcClient { projects: MockOrpcProjectsClient; workspace: MockOrpcWorkspaceClient; + workflows: MockOrpcWorkflowsClient; nameGeneration: MockOrpcNameGenerationClient; } interface SetupWindowOptions { @@ -295,6 +301,12 @@ interface SetupWindowOptions { setGoal?: ReturnType< typeof mock<(args: WorkspaceSetGoalArgs) => Promise> >; + workflowStart?: ReturnType< + typeof mock<(args: WorkflowStartArgs) => Promise> + >; + workflowGetRun?: ReturnType< + typeof mock<(args: WorkflowGetRunArgs) => Promise> + >; create?: ReturnType Promise>>; nameGeneration?: ReturnType< typeof mock<(args: NameGenerationArgs) => Promise> @@ -309,6 +321,8 @@ const setupWindow = ({ updateAgentAISettings, getGoal, setGoal, + workflowStart, + workflowGetRun, nameGeneration, }: SetupWindowOptions = {}) => { // Sync the useProjectContext mock with the default trusted config. @@ -368,6 +382,22 @@ const setupWindow = ({ } as WorkspaceSetGoalResult); }); + const workflowStartMock = + workflowStart ?? + mock<(args: WorkflowStartArgs) => Promise>(() => { + return Promise.resolve({ + runId: "wfr_test", + status: "running", + result: null, + } as WorkflowStartResult); + }); + + const workflowGetRunMock = + workflowGetRun ?? + mock<(args: WorkflowGetRunArgs) => Promise>(() => { + return Promise.resolve(null as WorkflowGetRunResult); + }); + const createMock = create ?? mock<(args: WorkspaceCreateArgs) => Promise>(() => { @@ -422,6 +452,10 @@ const setupWindow = ({ getGoal: (input: WorkspaceGetGoalArgs) => getGoalMock(input), setGoal: (input: WorkspaceSetGoalArgs) => setGoalMock(input), }, + workflows: { + start: (input: WorkflowStartArgs) => workflowStartMock(input), + getRun: (input: WorkflowGetRunArgs) => workflowGetRunMock(input), + }, nameGeneration: { generate: (input: NameGenerationArgs) => nameGenerationMock(input), }, @@ -531,6 +565,7 @@ const setupWindow = ({ getGoal: getGoalMock, setGoal: setGoalMock, }, + workflowsApi: { start: workflowStartMock, getRun: workflowGetRunMock }, nameGenerationApi: { generate: nameGenerationMock }, }; }; @@ -806,6 +841,109 @@ describe("useCreationWorkspace", () => { }); }); + test("handleSend creates workspace and sends initial workflow result to the agent", async () => { + const workflowResult = { + reportMarkdown: "# Workflow result\n\nCreation workflow complete.", + structuredOutput: { confidence: "high" }, + }; + const workflowStartMock = mock( + (_args: WorkflowStartArgs): Promise => + Promise.resolve({ + runId: "wfr_creation", + status: "completed", + result: workflowResult, + }) + ); + const workflowGetRunMock = mock( + (_args: WorkflowGetRunArgs): Promise => + Promise.resolve({ + id: "wfr_creation", + workspaceId: TEST_WORKSPACE_ID, + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in", + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:test", + args: { input: "mux workflows" }, + status: "completed", + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:01.000Z", + events: [ + { + sequence: 1, + type: "result", + at: "2026-05-29T00:00:01.000Z", + result: workflowResult, + }, + ], + steps: [], + } as WorkflowGetRunResult) + ); + const sendMessageMock = mock( + (_args: WorkspaceSendMessageArgs): Promise => + Promise.resolve({ success: true, data: {} } as WorkspaceSendMessageResult) + ); + const { workspaceApi, workflowsApi } = setupWindow({ + workflowStart: workflowStartMock, + workflowGetRun: workflowGetRunMock, + sendMessage: sendMessageMock, + }); + + const onWorkspaceCreated = mock((metadata: FrontendWorkspaceMetadata) => metadata); + const getHook = renderUseCreationWorkspace({ + projectPath: TEST_PROJECT_PATH, + dynamicWorkflowsEnabled: true, + onWorkspaceCreated, + message: "/deep-research mux workflows", + }); + + await waitFor(() => expect(getHook().branches).toEqual([FALLBACK_BRANCH])); + + let handleSendResult: CreationSendResult | undefined; + await act(async () => { + handleSendResult = await getHook().handleSend( + "/deep-research mux workflows", + undefined, + undefined, + { + type: "workflow-run", + name: "deep-research", + argsText: "mux workflows", + } + ); + }); + + expect(handleSendResult).toEqual({ success: true }); + expect(workspaceApi.create.mock.calls.length).toBe(1); + expect(workspaceApi.sendMessage.mock.calls.length).toBe(1); + const workflowStartInput = workflowsApi.start.mock.calls[0]?.[0]; + expect(workflowStartInput).toMatchObject({ + workspaceId: TEST_WORKSPACE_ID, + name: "deep-research", + runInBackground: true, + rawCommand: "/deep-research mux workflows", + args: { input: "mux workflows" }, + }); + expect(workflowStartInput?.continuationOptions?.agentId).toBe("exec"); + expect(workflowsApi.getRun).toHaveBeenCalledWith({ + workspaceId: TEST_WORKSPACE_ID, + runId: "wfr_creation", + }); + const sendInput = workspaceApi.sendMessage.mock.calls[0]?.[0]; + expect(sendInput?.message).toContain("/deep-research mux workflows"); + expect(sendInput?.message).toContain(""); + expect(sendInput?.message).toContain("Creation workflow complete"); + const muxMetadata: unknown = sendInput?.options.muxMetadata; + expect( + typeof muxMetadata === "object" && muxMetadata !== null && "rawCommand" in muxMetadata + ? muxMetadata.rawCommand + : undefined + ).toBe("/deep-research mux workflows"); + }); + test("handleSend shows trust dialog for untrusted projects", async () => { mockProjectConfigMap = new Map([[TEST_PROJECT_PATH, { workspaces: [], trusted: false }]]); const nameGenerationMock = mock( @@ -1372,6 +1510,7 @@ interface HookOptions { markPendingInitialSend?: boolean; } ) => void; + dynamicWorkflowsEnabled?: boolean; message?: string; draftId?: string | null; } diff --git a/src/browser/features/ChatInput/useCreationWorkspace.ts b/src/browser/features/ChatInput/useCreationWorkspace.ts index 429b3af1d9..3c0c5e44ca 100644 --- a/src/browser/features/ChatInput/useCreationWorkspace.ts +++ b/src/browser/features/ChatInput/useCreationWorkspace.ts @@ -61,7 +61,10 @@ import { workspaceStore } from "@/browser/stores/WorkspaceStore"; import { WORKSPACE_DEFAULTS } from "@/constants/workspaceDefaults"; export type CreationSendResult = { success: true } | { success: false; error?: SendMessageError }; -export type CreationInitialSlashCommand = Extract; +export type CreationInitialSlashCommand = Extract< + ParsedCommand, + { type: "goal-set" } | { type: "workflow-run" } +>; interface UseCreationWorkspaceOptions { projectPath: string; @@ -75,6 +78,8 @@ interface UseCreationWorkspaceOptions { subProjectPath?: string | null; /** Draft ID for UI-only workspace creation drafts (from URL) */ draftId?: string | null; + /** Dynamic workflows gate used when an initial creation command starts a workflow. */ + dynamicWorkflowsEnabled?: boolean; /** User's currently selected model (for name generation fallback) */ userModel?: string; } @@ -214,6 +219,7 @@ export function useCreationWorkspace({ message, subProjectPath, draftId, + dynamicWorkflowsEnabled = false, userModel, }: UseCreationWorkspaceOptions): UseCreationWorkspaceReturn { const workspaceContext = useOptionalWorkspaceContext(); @@ -358,6 +364,15 @@ export function useCreationWorkspace({ return { success: false }; } + if (initialSlashCommand?.type === "workflow-run" && !dynamicWorkflowsEnabled) { + setToast({ + id: Date.now().toString(), + type: "error", + message: "Dynamic workflows are disabled", + }); + return { success: false }; + } + // Build runtime config early (used later for workspace creation) let runtimeSelection = settings.selectedRuntime; @@ -592,6 +607,8 @@ export function useCreationWorkspace({ workspaceId: metadata.id, variant: "workspace", projectPath, + rawInput: messageText, + dynamicWorkflowsEnabled, sendMessageOptions, setInput: () => undefined, setAttachments: () => undefined, @@ -609,15 +626,17 @@ export function useCreationWorkspace({ return { success: false }; } - const openGoalTab = () => { - window.dispatchEvent( - createCustomEvent(CUSTOM_EVENTS.OPEN_GOAL_TAB, { workspaceId: metadata.id }) - ); - }; - if (typeof window.requestAnimationFrame === "function") { - window.requestAnimationFrame(openGoalTab); - } else { - openGoalTab(); + if (initialSlashCommand.type === "goal-set") { + const openGoalTab = () => { + window.dispatchEvent( + createCustomEvent(CUSTOM_EVENTS.OPEN_GOAL_TAB, { workspaceId: metadata.id }) + ); + }; + if (typeof window.requestAnimationFrame === "function") { + window.requestAnimationFrame(openGoalTab); + } else { + openGoalTab(); + } } return { success: true }; } @@ -690,6 +709,7 @@ export function useCreationWorkspace({ workspaceNameState.autoGenerate, workspaceNameState.name, subProjectPath, + dynamicWorkflowsEnabled, draftId, promoteWorkspaceDraft, deleteWorkspaceDraft, diff --git a/src/browser/features/ChatInput/utils.inlineSkillRefs.test.ts b/src/browser/features/ChatInput/utils.inlineSkillRefs.test.ts index 5474ce89e5..d72dd53d1c 100644 --- a/src/browser/features/ChatInput/utils.inlineSkillRefs.test.ts +++ b/src/browser/features/ChatInput/utils.inlineSkillRefs.test.ts @@ -1,3 +1,4 @@ +/* eslint-disable @typescript-eslint/await-thenable */ import { describe, expect, test } from "bun:test"; import type { AgentSkillDescriptor } from "@/common/types/agentSkill"; import { @@ -38,6 +39,63 @@ describe("parseCommandWithSkillInvocation", () => { }); }); +describe("parseCommandWithSkillInvocation workflows", () => { + test("resolves an unambiguous unknown slash command as a workflow run", async () => { + await expect( + parseCommandWithSkillInvocation({ + messageText: "/deep-research topic: mux", + agentSkillDescriptors: [], + workflowDefinitions: [ + { + name: "deep-research", + description: "Research deeply", + scope: "built-in", + executable: true, + }, + ], + api: null, + discovery: null, + }) + ).resolves.toEqual({ + parsed: { type: "workflow-run", name: "deep-research", argsText: "topic: mux" }, + skillInvocation: null, + }); + }); + + test("requires explicit workflow invocation when a skill and workflow share a name", async () => { + await expect( + parseCommandWithSkillInvocation({ + messageText: "/deep-review topic: mux", + agentSkillDescriptors: [ + { + name: "deep-review", + description: "Review deeply", + scope: "project", + }, + ], + workflowDefinitions: [ + { + name: "deep-review", + description: "Ambiguous workflow", + scope: "project", + executable: true, + }, + ], + api: null, + discovery: null, + }) + ).resolves.toEqual({ + parsed: { + type: "command-invalid-args", + command: "deep-review", + input: "deep-review", + usage: "Skill and workflow names collide. Use /workflow deep-review to run the workflow.", + }, + skillInvocation: null, + }); + }); +}); + describe("resolveInlineSkillRefsForSend", () => { test("returns an empty array for no slash and no inline refs", async () => { expect( diff --git a/src/browser/features/ChatInput/utils.ts b/src/browser/features/ChatInput/utils.ts index 7aacbcce35..de89837fe6 100644 --- a/src/browser/features/ChatInput/utils.ts +++ b/src/browser/features/ChatInput/utils.ts @@ -6,6 +6,7 @@ import { resolveInlineSkillReferences, } from "@/browser/utils/agentSkills/inlineSkillReferences"; import type { AgentSkillDescriptor } from "@/common/types/agentSkill"; +import type { WorkflowDefinitionDescriptor } from "@/common/types/workflow"; import type { ParsedRuntime } from "@/common/types/runtime"; import { buildAgentSkillMetadata, @@ -23,6 +24,9 @@ export type CreationRuntimeValidationError = | { mode: "ssh"; kind: "missingCoderTemplate" } | { mode: "ssh"; kind: "missingCoderPreset" }; +const WORKFLOW_SKILL_COLLISION_USAGE = (name: string) => + `Skill and workflow names collide. Use /workflow ${name} to run the workflow.`; + export interface SkillInvocation { descriptor: AgentSkillDescriptor; userText: string; @@ -116,9 +120,48 @@ async function resolveSkillInvocation(options: { }; } +function resolveLocalWorkflowInvocation(options: { + messageText: string; + parsed: ParsedCommand; + workflowDefinitions: WorkflowDefinitionDescriptor[]; + hasSkill: boolean; +}): ParsedCommand | null { + if (!isUnknownSlashCommand(options.parsed)) { + return null; + } + + const command = options.parsed.command; + const prefix = `/${command}`; + const afterPrefix = options.messageText.slice(prefix.length); + const hasSeparator = afterPrefix.length === 0 || /^\s/u.test(afterPrefix); + if (!hasSeparator) { + return null; + } + + const workflow = options.workflowDefinitions.find( + (definition) => definition.name === command && definition.executable + ); + if (!workflow) { + return null; + } + + if (options.hasSkill) { + return { + type: "command-invalid-args", + command, + input: command, + usage: WORKFLOW_SKILL_COLLISION_USAGE(command), + }; + } + + const argsText = afterPrefix.trimStart(); + return { type: "workflow-run", name: workflow.name, ...(argsText ? { argsText } : {}) }; +} + export async function parseCommandWithSkillInvocation(options: { messageText: string; agentSkillDescriptors: AgentSkillDescriptor[]; + workflowDefinitions?: WorkflowDefinitionDescriptor[]; api: APIClient | null; discovery: SkillResolutionTarget | null; }): Promise<{ parsed: ParsedCommand; skillInvocation: SkillInvocation | null }> { @@ -131,7 +174,21 @@ export async function parseCommandWithSkillInvocation(options: { discovery: options.discovery, }); - return { parsed: skillInvocation ? null : parsed, skillInvocation }; + const workflowInvocation = resolveLocalWorkflowInvocation({ + messageText: options.messageText, + parsed, + workflowDefinitions: options.workflowDefinitions ?? [], + hasSkill: skillInvocation != null, + }); + + if (workflowInvocation?.type === "command-invalid-args") { + return { parsed: workflowInvocation, skillInvocation: null }; + } + + return { + parsed: skillInvocation || workflowInvocation ? workflowInvocation : parsed, + skillInvocation, + }; } /** diff --git a/src/browser/features/Messages/MessageRenderer.stories.tsx b/src/browser/features/Messages/MessageRenderer.stories.tsx index 5de1aed343..39cb48de9a 100644 --- a/src/browser/features/Messages/MessageRenderer.stories.tsx +++ b/src/browser/features/Messages/MessageRenderer.stories.tsx @@ -13,6 +13,12 @@ import { createGoalContinuationMessage, createUserMessage, } from "@/browser/stories/mocks/messages"; +import { + WORKFLOW_RESULT_METADATA_TYPE, + WORKFLOW_RUN_CARD_DISPLAY_METADATA_TYPE, + WORKFLOW_TRIGGER_DISPLAY_METADATA_TYPE, + buildWorkflowRunCardMessage, +} from "@/common/utils/workflowRunMessages"; import { createFileEditTool, createFileReadTool, @@ -198,6 +204,90 @@ export const Conversation: AppStory = { ), }; +export const WorkflowTriggeredCommand: AppStory = { + parameters: { chromatic: { disableSnapshot: true } }, + render: () => ( + { + collapseLeftSidebar(); + const rawCommand = "/shallow-review what do you think of workflows"; + const runId = "wfr_workflow_trigger_story"; + const workflowRun = { + id: runId, + workspaceId: "ws-workflow-trigger", + definition: { + name: "shallow-review", + description: "Quick workflow review", + scope: "scratch" as const, + sourcePath: "/tmp/mux/sessions/workspace/workflows/shallow-review.js", + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:workflow-trigger-story", + args: { input: "what do you think of workflows" }, + status: "running" as const, + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:01.000Z", + events: [ + { + sequence: 1, + type: "status" as const, + at: "2026-05-29T00:00:00.000Z", + status: "running" as const, + }, + { sequence: 2, type: "phase" as const, at: "2026-05-29T00:00:01.000Z", name: "gather" }, + ], + steps: [], + }; + const workflowCard = buildWorkflowRunCardMessage( + { name: "shallow-review", args: workflowRun.args }, + { runId, status: workflowRun.status, result: null, run: workflowRun }, + STABLE_TIMESTAMP - 295000 + ) as ChatMuxMessage; + workflowCard.type = "message"; + workflowCard.metadata = { + historySequence: 2, + timestamp: STABLE_TIMESTAMP - 295000, + synthetic: true, + uiVisible: true, + muxMetadata: { type: WORKFLOW_RUN_CARD_DISPLAY_METADATA_TYPE, runId }, + }; + + return setupSimpleChatStory({ + workspaceId: "ws-workflow-trigger", + messages: [ + createUserMessage("workflow-command", rawCommand, { + historySequence: 1, + timestamp: STABLE_TIMESTAMP - 300000, + muxMetadata: { + type: WORKFLOW_TRIGGER_DISPLAY_METADATA_TYPE, + rawCommand, + commandPrefix: "/shallow-review", + runId, + }, + }), + workflowCard, + createUserMessage( + "workflow-result-hidden", + `${rawCommand}\n\n{}`, + { + historySequence: 3, + timestamp: STABLE_TIMESTAMP - 290000, + muxMetadata: { + type: WORKFLOW_RESULT_METADATA_TYPE, + rawCommand, + commandPrefix: "/shallow-review", + runId, + }, + } + ), + ], + }); + }} + /> + ), +}; + /** * Synthetic / goal system-message composite. * diff --git a/src/browser/features/Tools/AgentReportToolCall.test.tsx b/src/browser/features/Tools/AgentReportToolCall.test.tsx index da55f053d6..1a3185c4c8 100644 --- a/src/browser/features/Tools/AgentReportToolCall.test.tsx +++ b/src/browser/features/Tools/AgentReportToolCall.test.tsx @@ -44,4 +44,36 @@ describe("AgentReportToolCall", () => { expect(view.getByText(/Hello/)).toBeTruthy(); expect(view.getByText(/World/)).toBeTruthy(); }); + + test("renders file-backed report payload from tool output", () => { + const view = render( + + + + ); + + expect(view.getByText(/File Report/)).toBeTruthy(); + expect(view.getByText(/From disk/)).toBeTruthy(); + }); + + test("renders default file-backed report placeholder before tool output", () => { + const view = render( + + + + ); + + expect(view.getByText(/Report file: report\.md/)).toBeTruthy(); + }); }); diff --git a/src/browser/features/Tools/AgentReportToolCall.tsx b/src/browser/features/Tools/AgentReportToolCall.tsx index 2bf50dedd2..f40ae64e0d 100644 --- a/src/browser/features/Tools/AgentReportToolCall.tsx +++ b/src/browser/features/Tools/AgentReportToolCall.tsx @@ -26,6 +26,19 @@ interface AgentReportToolCallProps { status?: ToolStatus; } +function getSubmittedReportMarkdown( + args: AgentReportToolArgs, + result: AgentReportToolResult | undefined +): string { + if (result && "success" in result && result.success === true && result.report?.reportMarkdown) { + return result.report.reportMarkdown; + } + if ("reportMarkdown" in args) { + return args.reportMarkdown; + } + return `Report file: ${args.reportMarkdownPath ?? "report.md"}`; +} + export const AgentReportToolCall: React.FC = ({ args, result, @@ -37,9 +50,10 @@ export const AgentReportToolCall: React.FC = ({ const errorResult = isToolErrorResult(result) ? result : null; const title = args.title ?? "Agent report"; + const reportMarkdown = getSubmittedReportMarkdown(args, result); // Show a small preview when collapsed so the card still has some useful context. - const firstLine = args.reportMarkdown.trim().split("\n")[0] ?? ""; + const firstLine = reportMarkdown.trim().split("\n")[0] ?? ""; const preview = firstLine.length > 80 ? firstLine.slice(0, 80).trim() + "…" : firstLine; return ( @@ -54,7 +68,7 @@ export const AgentReportToolCall: React.FC = ({ {expanded && (
- +
{errorResult && {errorResult.error}}
diff --git a/src/browser/features/Tools/Shared/ToolPrimitives.tsx b/src/browser/features/Tools/Shared/ToolPrimitives.tsx index 59cac65ce1..c8f2e425b0 100644 --- a/src/browser/features/Tools/Shared/ToolPrimitives.tsx +++ b/src/browser/features/Tools/Shared/ToolPrimitives.tsx @@ -236,6 +236,9 @@ export const TOOL_NAME_TO_ICON: Partial> = { bash_output: Wrench, bash_background_terminate: Square, bash_background_list: List, + workflow_list: List, + workflow_read: BookOpen, + workflow_run: Sparkles, agent_report: FileText, agent_skill_read: GraduationCap, agent_skill_read_file: GraduationCap, diff --git a/src/browser/features/Tools/Shared/getToolComponent.test.ts b/src/browser/features/Tools/Shared/getToolComponent.test.ts index 66a69bf971..4939604c71 100644 --- a/src/browser/features/Tools/Shared/getToolComponent.test.ts +++ b/src/browser/features/Tools/Shared/getToolComponent.test.ts @@ -7,10 +7,22 @@ import { CompleteGoalToolCall } from "../CompleteGoalToolCall"; import { DesktopActionToolCall } from "../DesktopActionToolCall"; import { DesktopScreenshotToolCall } from "../DesktopScreenshotToolCall"; import { GenericToolCall } from "../GenericToolCall"; +import { WorkflowRunToolCall } from "../WorkflowRunToolCall"; +import { WorkflowListToolCall, WorkflowReadToolCall } from "../WorkflowDefinitionToolCall"; import { GetGoalToolCall } from "../GetGoalToolCall"; import { getToolComponent } from "./getToolComponent"; describe("getToolComponent", () => { + test("returns workflow definition tool components", () => { + expect(getToolComponent("workflow_list", {})).toBe(WorkflowListToolCall); + expect(getToolComponent("workflow_read", { name: "deep-research" })).toBe(WorkflowReadToolCall); + }); + + test("returns WorkflowRunToolCall for workflow_run", () => { + const component = getToolComponent("workflow_run", { name: "deep-research" }); + expect(component).toBe(WorkflowRunToolCall); + }); + test("returns AgentReportToolCall for agent_report", () => { const component = getToolComponent("agent_report", { reportMarkdown: "# Hello" }); expect(component).toBe(AgentReportToolCall); diff --git a/src/browser/features/Tools/Shared/getToolComponent.ts b/src/browser/features/Tools/Shared/getToolComponent.ts index 178c231a78..ad029fe136 100644 --- a/src/browser/features/Tools/Shared/getToolComponent.ts +++ b/src/browser/features/Tools/Shared/getToolComponent.ts @@ -41,6 +41,8 @@ import { } from "../TaskToolCall"; import { TaskApplyGitPatchToolCall } from "../TaskApplyGitPatchToolCall"; import { GetGoalToolCall } from "../GetGoalToolCall"; +import { WorkflowRunToolCall } from "../WorkflowRunToolCall"; +import { WorkflowListToolCall, WorkflowReadToolCall } from "../WorkflowDefinitionToolCall"; import { CompleteGoalToolCall } from "../CompleteGoalToolCall"; /** @@ -164,6 +166,18 @@ const TOOL_REGISTRY: Record = { component: TaskApplyGitPatchToolCall, schema: TOOL_DEFINITIONS.task_apply_git_patch.schema, }, + workflow_list: { + component: WorkflowListToolCall, + schema: TOOL_DEFINITIONS.workflow_list.schema, + }, + workflow_read: { + component: WorkflowReadToolCall, + schema: TOOL_DEFINITIONS.workflow_read.schema, + }, + workflow_run: { + component: WorkflowRunToolCall, + schema: TOOL_DEFINITIONS.workflow_run.schema, + }, agent_report: { component: AgentReportToolCall, schema: TOOL_DEFINITIONS.agent_report.schema, diff --git a/src/browser/features/Tools/TaskToolCall.tsx b/src/browser/features/Tools/TaskToolCall.tsx index 02d47688d1..46468b0d65 100644 --- a/src/browser/features/Tools/TaskToolCall.tsx +++ b/src/browser/features/Tools/TaskToolCall.tsx @@ -98,6 +98,7 @@ const TaskStatusBadge: React.FC<{ case "reported": return "bg-success/20 text-success"; case "running": + case "backgrounded": return "bg-pending/20 text-pending"; case "awaiting_report": return "bg-warning/20 text-warning"; @@ -208,7 +209,12 @@ interface TaskRowProps { } function isTaskRowElapsedActive(status: string): boolean { - return status === "queued" || status === "running" || status === "awaiting_report"; + return ( + status === "queued" || + status === "running" || + status === "backgrounded" || + status === "awaiting_report" + ); } const TaskRowElapsed: React.FC<{ startedAtMs: number | undefined; status: string }> = (props) => { diff --git a/src/browser/features/Tools/WorkflowDefinitionToolCall.stories.tsx b/src/browser/features/Tools/WorkflowDefinitionToolCall.stories.tsx new file mode 100644 index 0000000000..7c6ebd22e3 --- /dev/null +++ b/src/browser/features/Tools/WorkflowDefinitionToolCall.stories.tsx @@ -0,0 +1,88 @@ +import type { Meta, StoryObj } from "@storybook/react-vite"; + +import { + WorkflowListToolCall, + WorkflowReadToolCall, +} from "@/browser/features/Tools/WorkflowDefinitionToolCall"; +import { lightweightMeta } from "@/browser/stories/meta.js"; + +const source = `export default function workflow({ args, agent, phase, log }) { + phase("review", { artifact: args.artifact }); + log("Starting review loop"); + + const review = agent({ + id: "review", + title: "Review implementation", + prompt: "Review " + args.artifact, + }); + + return { + reportMarkdown: "# Review complete\\n\\n" + review.reportMarkdown, + structuredOutput: { verdict: "clean" }, + }; +} +`; + +const meta = { + ...lightweightMeta, + title: "App/Chat/Tools/WorkflowDefinitions", +} satisfies Meta; + +export default meta; + +type Story = StoryObj; + +export const WorkflowRead: Story = { + render: () => ( + + ), +}; + +export const WorkflowList: Story = { + render: () => ( + + ), +}; diff --git a/src/browser/features/Tools/WorkflowDefinitionToolCall.test.tsx b/src/browser/features/Tools/WorkflowDefinitionToolCall.test.tsx new file mode 100644 index 0000000000..0b0f3e442d --- /dev/null +++ b/src/browser/features/Tools/WorkflowDefinitionToolCall.test.tsx @@ -0,0 +1,104 @@ +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; +import { GlobalWindow } from "happy-dom"; +import { cleanup, render } from "@testing-library/react"; + +import type React from "react"; + +import { TooltipProvider } from "@/browser/components/Tooltip/Tooltip"; +import { ThemeProvider } from "@/browser/contexts/ThemeContext"; +import { WorkflowListToolCall, WorkflowReadToolCall } from "./WorkflowDefinitionToolCall"; + +const source = `export default function workflow({ args, agent }) { + const topic = args.topic ?? "workflow UI"; + return agent({ id: "review", prompt: "Review " + topic }); +}`; + +function renderWithTooltip(ui: React.ReactElement) { + return render( + + {ui} + + ); +} + +function expectWorkflowHeaderBadge(view: ReturnType, label: string) { + const workflowBadge = view.getByText("Workflow"); + const headerText = workflowBadge.closest('[data-scroll-intent="ignore"]')?.textContent ?? ""; + expect(headerText.indexOf("Workflow")).toBeLessThan(headerText.indexOf(label)); +} + +describe("WorkflowDefinitionToolCall", () => { + let originalWindow: typeof globalThis.window; + let originalDocument: typeof globalThis.document; + let originalLocalStorage: typeof globalThis.localStorage; + + beforeEach(() => { + originalWindow = globalThis.window; + originalDocument = globalThis.document; + originalLocalStorage = globalThis.localStorage; + globalThis.window = new GlobalWindow() as unknown as Window & typeof globalThis; + globalThis.document = globalThis.window.document; + globalThis.localStorage = globalThis.window.localStorage; + }); + + afterEach(() => { + cleanup(); + globalThis.window = originalWindow; + globalThis.document = originalDocument; + globalThis.localStorage = originalLocalStorage; + }); + + test("renders workflow_read metadata and highlighted source", () => { + const view = renderWithTooltip( + + ); + + expectWorkflowHeaderBadge(view, "deep-research"); + expect(view.getByText("Deep research")).toBeTruthy(); + expect(view.container.textContent).toContain("return agent"); + }); + + test("renders workflow_list as definition cards", () => { + const view = renderWithTooltip( + + ); + + expectWorkflowHeaderBadge(view, "list"); + expect(view.getByText("2 definitions")).toBeTruthy(); + expect(view.queryByText("executable")).toBeNull(); + expect(view.getByText("blocked")).toBeTruthy(); + expect(view.getByText("Project is not trusted")).toBeTruthy(); + }); +}); diff --git a/src/browser/features/Tools/WorkflowDefinitionToolCall.tsx b/src/browser/features/Tools/WorkflowDefinitionToolCall.tsx new file mode 100644 index 0000000000..7d753b3911 --- /dev/null +++ b/src/browser/features/Tools/WorkflowDefinitionToolCall.tsx @@ -0,0 +1,297 @@ +import React from "react"; + +import type { WorkflowDefinitionDescriptor } from "@/common/types/workflow"; +import type { + WorkflowListToolArgs, + WorkflowListToolResult, + WorkflowListToolSuccessResult, + WorkflowReadToolArgs, + WorkflowReadToolResult, + WorkflowReadToolSuccessResult, +} from "@/common/types/tools"; +import { cn } from "@/common/lib/utils"; + +import { + DetailSection, + ErrorBox, + ExpandIcon, + LoadingDots, + StatusIndicator, + ToolContainer, + ToolDetails, + ToolHeader, + ToolIcon, + ToolName, +} from "./Shared/ToolPrimitives"; +import { + getStatusDisplay, + isToolErrorResult, + type ToolStatus, + useToolExpansion, +} from "./Shared/toolUtils"; +import { HighlightedCode, JsonHighlight } from "./Shared/HighlightedCode"; + +interface WorkflowListToolCallProps { + args: WorkflowListToolArgs; + result?: WorkflowListToolResult; + status?: ToolStatus; +} + +interface WorkflowReadToolCallProps { + args: WorkflowReadToolArgs; + result?: WorkflowReadToolResult; + status?: ToolStatus; +} + +export const WORKFLOW_ACTION_BUTTON_CLASS = + "text-muted hover:text-foreground border-border rounded border px-2 py-1 disabled:opacity-50 disabled:hover:text-muted"; + +export type WorkflowPromotionTarget = "project" | "global"; + +export function WorkflowKindBadge() { + return ( + + Workflow + + ); +} + +export function WorkflowBadge(props: { + children: React.ReactNode; + tone?: "normal" | "success" | "warning"; +}) { + return ( + + {props.children} + + ); +} + +export function WorkflowSection(props: { + title: string; + children: React.ReactNode; + className?: string; +}) { + return ( + +
{props.title}
+ {props.children} +
+ ); +} + +export function WorkflowJsonBlock(props: { value: unknown; className?: string }) { + return ( +
+ +
+ ); +} + +export function WorkflowSourceBlock(props: { + source: string; + title?: string; + className?: string; + maxHeightClassName?: string; +}) { + const source = props.source.trimEnd(); + return ( + +
+ +
+
+ ); +} + +function formatWorkflowDefinitionCount(count: number): string { + return count === 1 ? "1 definition" : `${count} definitions`; +} + +export function formatWorkflowSavedMessage(scope: WorkflowPromotionTarget): string { + return scope === "project" ? "Saved to project workflows" : "Saved to global workflows"; +} + +function WorkflowDefinitionListRow(props: { descriptor: WorkflowDefinitionDescriptor }) { + const descriptor = props.descriptor; + return ( +
+ + {descriptor.name} + + {descriptor.scope} +
+
+ {descriptor.description} +
+ {descriptor.blockedReason && ( +
+ {descriptor.blockedReason} +
+ )} +
+ {!descriptor.executable && blocked} +
+ ); +} + +function WorkflowDefinitionList(props: { workflows: WorkflowDefinitionDescriptor[] }) { + return ( +
+
+ {props.workflows.map((workflow) => ( + + ))} +
+
+ ); +} + +export function WorkflowDefinitionCard(props: { + descriptor: WorkflowDefinitionDescriptor; + compact?: boolean; +}) { + const descriptor = props.descriptor; + return ( +
+
+ {descriptor.name} + {descriptor.scope} + {!descriptor.executable && blocked} +
+ {!props.compact && ( +
{descriptor.description}
+ )} + {descriptor.sourcePath && ( +
+ {descriptor.sourcePath} +
+ )} + {descriptor.blockedReason && ( +
{descriptor.blockedReason}
+ )} +
+ ); +} + +function isWorkflowListSuccessResult( + value: WorkflowListToolResult | undefined +): value is WorkflowListToolSuccessResult { + return value != null && !isToolErrorResult(value); +} + +function isWorkflowReadSuccessResult( + value: WorkflowReadToolResult | undefined +): value is WorkflowReadToolSuccessResult { + return value != null && !isToolErrorResult(value); +} + +function WorkflowLoadingState() { + return ( +
+ Waiting for workflow result + +
+ ); +} + +export const WorkflowListToolCall: React.FC = ({ + result, + status = "pending", +}) => { + const { expanded, toggleExpanded } = useToolExpansion(true); + const errorResult = isToolErrorResult(result) ? result : null; + const successResult = isWorkflowListSuccessResult(result) ? result : null; + const workflows = successResult?.workflows ?? []; + + return ( + + + + + + list + {workflows.length > 0 && ( + + {formatWorkflowDefinitionCount(workflows.length)} + + )} + {getStatusDisplay(status)} + + + {expanded && ( + + {workflows.length > 0 ? ( + + ) : status === "executing" ? ( + + ) : ( +
No workflow definitions returned.
+ )} + {errorResult && {errorResult.error}} +
+ )} +
+ ); +}; + +export const WorkflowReadToolCall: React.FC = ({ + args, + result, + status = "pending", +}) => { + const { expanded, toggleExpanded } = useToolExpansion(true); + const errorResult = isToolErrorResult(result) ? result : null; + const successResult = isWorkflowReadSuccessResult(result) ? result : null; + + return ( + + + + + + {args.name} + {getStatusDisplay(status)} + + + {expanded && ( + + {successResult ? ( + <> + + + + + + ) : status === "executing" ? ( + + ) : null} + {errorResult && {errorResult.error}} + + )} + + ); +}; diff --git a/src/browser/features/Tools/WorkflowRunToolCall.stories.tsx b/src/browser/features/Tools/WorkflowRunToolCall.stories.tsx new file mode 100644 index 0000000000..d09375d831 --- /dev/null +++ b/src/browser/features/Tools/WorkflowRunToolCall.stories.tsx @@ -0,0 +1,150 @@ +import type { ReactNode } from "react"; +import type { Meta, StoryObj } from "@storybook/react-vite"; + +import { APIContext } from "@/browser/contexts/API"; +import { WorkflowRunToolCall } from "@/browser/features/Tools/WorkflowRunToolCall"; +import { lightweightMeta } from "@/browser/stories/meta.js"; + +const storyApi = { + workflows: { + promoteScratch: (input: { + name: string; + description: string; + location: "project" | "global"; + }) => + Promise.resolve({ + name: input.name, + description: input.description, + scope: input.location, + sourcePath: + input.location === "project" + ? `/repo/.mux/workflows/${input.name}.js` + : `~/.mux/workflows/${input.name}.js`, + executable: true, + }), + }, +}; + +function StoryAPIProvider(props: { children: ReactNode }) { + return ( + undefined, + retry: () => undefined, + }} + > + {props.children} + + ); +} + +const meta = { + ...lightweightMeta, + title: "App/Chat/Tools/WorkflowRun", + component: WorkflowRunToolCall, +} satisfies Meta; + +export default meta; + +type Story = StoryObj; + +export const CompletedDeepResearch: Story = { + args: { + args: { + name: "deep-research", + args: { topic: "workflow run cards" }, + run_in_background: false, + }, + status: "completed", + result: { + status: "completed", + runId: "wfr_story", + result: { + reportMarkdown: + "# Deep Research\n\nWorkflow run cards should show phases, tasks, and final synthesis.", + structuredOutput: { confidence: "medium", gaps: ["Dogfood in full app"] }, + }, + run: { + id: "wfr_story", + workspaceId: "workspace-1", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in", + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:story", + args: { topic: "workflow run cards" }, + status: "completed", + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:02.000Z", + events: [ + { sequence: 1, type: "status", at: "2026-05-29T00:00:00.000Z", status: "running" }, + { sequence: 2, type: "phase", at: "2026-05-29T00:00:00.000Z", name: "scope" }, + { + sequence: 3, + type: "task", + at: "2026-05-29T00:00:01.000Z", + stepId: "scope-topic", + taskId: "task_scope", + status: "completed", + }, + { + sequence: 4, + type: "phase", + at: "2026-05-29T00:00:01.000Z", + name: "adversarial-verification", + }, + { sequence: 5, type: "status", at: "2026-05-29T00:00:02.000Z", status: "completed" }, + ], + steps: [], + }, + }, + }, +}; + +export const ScratchPromotable: Story = { + render: (args) => ( + + + + ), + args: { + args: { + name: "scratch", + args: { topic: "promote this workflow" }, + run_in_background: true, + }, + status: "completed", + result: { + status: "completed", + runId: "wfr_scratch_story", + result: { reportMarkdown: "# Scratch workflow\n\nThis one-off workflow can be promoted." }, + run: { + id: "wfr_scratch_story", + workspaceId: "workspace-1", + definition: { + name: "scratch", + description: "Scratch workflow", + scope: "scratch", + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:scratch-story", + args: { topic: "promote this workflow" }, + status: "completed", + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:02.000Z", + events: [ + { sequence: 1, type: "phase", at: "2026-05-29T00:00:00.000Z", name: "draft" }, + { sequence: 2, type: "status", at: "2026-05-29T00:00:02.000Z", status: "completed" }, + ], + steps: [], + }, + }, + }, +}; diff --git a/src/browser/features/Tools/WorkflowRunToolCall.test.tsx b/src/browser/features/Tools/WorkflowRunToolCall.test.tsx new file mode 100644 index 0000000000..bb877ae1b2 --- /dev/null +++ b/src/browser/features/Tools/WorkflowRunToolCall.test.tsx @@ -0,0 +1,985 @@ +/* eslint-disable @typescript-eslint/require-await */ +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; +import { GlobalWindow } from "happy-dom"; +import { cleanup, fireEvent, render, waitFor } from "@testing-library/react"; +import { useEffect, type ReactNode } from "react"; + +import { APIContext } from "@/browser/contexts/API"; +import { + CommandRegistryProvider, + useCommandRegistry, + type CommandAction, +} from "@/browser/contexts/CommandRegistryContext"; +import { TooltipProvider } from "@/browser/components/Tooltip/Tooltip"; +import { ThemeProvider } from "@/browser/contexts/ThemeContext"; +import { WorkflowRunToolCall } from "./WorkflowRunToolCall"; + +function APIHarness(props: { client: unknown; children: ReactNode }) { + return ( + undefined, + retry: () => undefined, + }} + > + {props.children} + + ); +} + +function CommandActionCapture(props: { onActions: (actions: CommandAction[]) => void }) { + const registry = useCommandRegistry(); + useEffect(() => { + props.onActions(registry.getActions()); + }); + return null; +} + +function getWorkflowHeader(view: ReturnType): HTMLElement { + const workflowBadge = view.getByText("Workflow"); + const header = workflowBadge.closest('[data-scroll-intent="ignore"]'); + if (header == null) { + throw new Error("Workflow header not found"); + } + return header as HTMLElement; +} + +describe("WorkflowRunToolCall", () => { + let originalWindow: typeof globalThis.window; + let originalDocument: typeof globalThis.document; + let originalLocalStorage: typeof globalThis.localStorage; + + beforeEach(() => { + originalWindow = globalThis.window; + originalDocument = globalThis.document; + originalLocalStorage = globalThis.localStorage; + globalThis.window = new GlobalWindow() as unknown as Window & typeof globalThis; + globalThis.document = globalThis.window.document; + globalThis.localStorage = globalThis.window.localStorage; + }); + + afterEach(() => { + cleanup(); + globalThis.window = originalWindow; + globalThis.document = originalDocument; + globalThis.localStorage = originalLocalStorage; + }); + + test("renders workflow run phases, linked task ids, and final report", async () => { + const view = render( + + + + + + ); + + expect(view.getAllByText("deep-research").length).toBeGreaterThan(0); + const workflowHeader = getWorkflowHeader(view); + expect(workflowHeader.textContent?.indexOf("Workflow") ?? -1).toBeLessThan( + workflowHeader.textContent?.indexOf("deep-research") ?? -1 + ); + expect(view.queryByText("wfr_123")).toBeNull(); + + fireEvent.click(workflowHeader); + + expect(view.getByText("wfr_123")).toBeTruthy(); + const getDisclosureForTitle = (title: string) => view.getByText(title).closest("details"); + expect(getDisclosureForTitle("Arguments")?.hasAttribute("open")).toBe(false); + expect(getDisclosureForTitle("Definition source")?.hasAttribute("open")).toBe(false); + expect(getDisclosureForTitle("Structured output")?.hasAttribute("open")).toBe(false); + expect(view.container.textContent).toContain("workflow cards"); + expect(view.container.textContent).toContain("scope"); + expect(view.container.textContent).toContain("adversarial-verification"); + expect(view.container.textContent).toContain("task_scope"); + const firstEventIndex = view.getByText("#1"); + expect(firstEventIndex).toBeTruthy(); + expect(firstEventIndex.getAttribute("title")).toBeNull(); + expect(firstEventIndex.getAttribute("aria-label")).toBe("Raw event #2"); + expect(view.getByText("scope").closest("div")?.className).toContain("bg-plan-mode-alpha"); + expect(view.getByText("Scoped topic").closest("div")?.className).not.toContain( + "bg-plan-mode-alpha" + ); + expect(view.getByText("Workflow events (5)")).toBeTruthy(); + const taskEventRow = view.getByText("scope-topic / task_scope / completed"); + const taskEventIndex = view.getByText("#3"); + expect(taskEventIndex.className).toContain("cursor-help"); + expect(taskEventRow.className).toContain("cursor-pointer"); + expect(taskEventRow.getAttribute("title")).toBeNull(); + const taskEventSummary = taskEventRow.closest("summary"); + const taskEventDetails = taskEventRow.closest("details"); + if (taskEventSummary == null) { + throw new Error("Expected task event summary"); + } + expect(taskEventDetails?.hasAttribute("open")).toBe(false); + + fireEvent.click(taskEventSummary); + + expect(taskEventDetails?.hasAttribute("open")).toBe(true); + await waitFor( + () => expect(taskEventDetails?.textContent).toContain("Child task report body."), + { + timeout: 5_000, + } + ); + await waitFor(() => expect(view.container.textContent).toContain("Workflow result body")); + const renderedText = view.container.textContent ?? ""; + expect(renderedText.indexOf("confidence")).toBeLessThan( + renderedText.indexOf("Workflow result body") + ); + expect(renderedText).toContain("confidence"); + }); + + test("refreshes a running workflow from the API and shows the completed result", async () => { + const api = { + workflows: { + getRun: async () => ({ + id: "wfr_live", + workspaceId: "workspace-1", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in", + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:test", + args: { topic: "workflow cards" }, + status: "completed", + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:02.000Z", + events: [ + { sequence: 1, type: "status", at: "2026-05-29T00:00:00.000Z", status: "running" }, + { sequence: 2, type: "phase", at: "2026-05-29T00:00:00.000Z", name: "scope" }, + { + sequence: 3, + type: "result", + at: "2026-05-29T00:00:02.000Z", + result: { reportMarkdown: "done live" }, + }, + { sequence: 4, type: "status", at: "2026-05-29T00:00:02.000Z", status: "completed" }, + ], + steps: [], + }), + }, + }; + + const view = render( + + + + + + + + ); + + await waitFor(() => expect(view.getAllByText("completed").length).toBeGreaterThan(0)); + expect(view.queryByText("done live")).toBeNull(); + + fireEvent.click(getWorkflowHeader(view)); + + expect(view.getByText("done live")).toBeTruthy(); + }); + + test("keeps completed workflow runs expanded after the user toggles the card", async () => { + const runningRun = { + id: "wfr_manual", + workspaceId: "workspace-1", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in" as const, + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:test", + args: { topic: "workflow cards" }, + status: "running" as const, + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:01.000Z", + events: [ + { + sequence: 1, + type: "status" as const, + at: "2026-05-29T00:00:00.000Z", + status: "running" as const, + }, + ], + steps: [], + }; + const completedRun = { + ...runningRun, + status: "completed" as const, + updatedAt: "2026-05-29T00:00:02.000Z", + events: [ + ...runningRun.events, + { + sequence: 2, + type: "result" as const, + at: "2026-05-29T00:00:02.000Z", + result: { reportMarkdown: "manual result" }, + }, + { + sequence: 3, + type: "status" as const, + at: "2026-05-29T00:00:02.000Z", + status: "completed" as const, + }, + ], + }; + const pendingRefresh: { resolve?: (run: typeof completedRun) => void } = {}; + const api = { + workflows: { + getRun: async () => + await new Promise((resolve) => { + pendingRefresh.resolve = resolve; + }), + }, + }; + + const view = render( + + + + + + + + ); + + await waitFor(() => expect(pendingRefresh.resolve).toBeDefined()); + const workflowHeader = getWorkflowHeader(view); + fireEvent.click(workflowHeader); + expect(view.queryByText("wfr_manual")).toBeNull(); + fireEvent.click(workflowHeader); + expect(view.getByText("wfr_manual")).toBeTruthy(); + + const completeRefresh = pendingRefresh.resolve; + if (completeRefresh == null) { + throw new Error("Expected workflow refresh to be pending"); + } + completeRefresh(completedRun); + + await waitFor(() => expect(view.getByText("manual result")).toBeTruthy()); + expect(view.getAllByText("completed").length).toBeGreaterThan(0); + }); + + test("shows interrupt action for running workflows and updates with the returned run", async () => { + let interrupted = false; + const api = { + workflows: { + getRun: async () => null, + interrupt: async () => { + interrupted = true; + return { + id: "wfr_interrupt", + workspaceId: "workspace-1", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in", + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:test", + args: { topic: "workflow cards" }, + status: "interrupted", + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:02.000Z", + events: [ + { sequence: 1, type: "status", at: "2026-05-29T00:00:00.000Z", status: "running" }, + { + sequence: 2, + type: "status", + at: "2026-05-29T00:00:02.000Z", + status: "interrupted", + }, + ], + steps: [], + }; + }, + }, + }; + + const view = render( + + + + + + + + ); + + fireEvent.click(view.getByRole("button", { name: "Interrupt workflow" })); + + await waitFor(() => expect(interrupted).toBe(true)); + await waitFor(() => expect(view.getAllByText("interrupted").length).toBeGreaterThan(0)); + }); + + test("registers workflow run actions with the command palette", async () => { + let interrupted = false; + let actions: CommandAction[] = []; + const api = { + workflows: { + getRun: async () => null, + interrupt: async () => { + interrupted = true; + return { + id: "wfr_palette", + workspaceId: "workspace-1", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in", + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:test", + args: { topic: "workflow cards" }, + status: "interrupted", + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:02.000Z", + events: [ + { sequence: 1, type: "status", at: "2026-05-29T00:00:00.000Z", status: "running" }, + { + sequence: 2, + type: "status", + at: "2026-05-29T00:00:02.000Z", + status: "interrupted", + }, + ], + steps: [], + }; + }, + }, + }; + + render( + + + + + + (actions = nextActions)} /> + + + + + ); + + await waitFor(() => + expect(actions.some((action) => action.id === "workflow:wfr_palette:interrupt")).toBe(true) + ); + const interruptAction = actions.find( + (action) => action.id === "workflow:wfr_palette:interrupt" + ); + expect(interruptAction).toBeDefined(); + await interruptAction?.run(); + + await waitFor(() => expect(interrupted).toBe(true)); + }); + + test("shows resume action for interrupted workflows and refreshes after resume", async () => { + let resumed = false; + let getRunCalls = 0; + const interruptedRun = { + id: "wfr_resume", + workspaceId: "workspace-1", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in" as const, + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:test", + args: { topic: "workflow cards" }, + status: "interrupted" as const, + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:01.000Z", + events: [ + { + sequence: 1, + type: "status" as const, + at: "2026-05-29T00:00:00.000Z", + status: "interrupted" as const, + }, + ], + steps: [], + }; + const completedRun = { + ...interruptedRun, + status: "completed" as const, + updatedAt: "2026-05-29T00:00:02.000Z", + events: [ + ...interruptedRun.events, + { + sequence: 2, + type: "result" as const, + at: "2026-05-29T00:00:02.000Z", + result: { reportMarkdown: "resumed" }, + }, + { + sequence: 3, + type: "status" as const, + at: "2026-05-29T00:00:02.000Z", + status: "completed" as const, + }, + ], + }; + const api = { + workflows: { + resume: async () => { + resumed = true; + return { + runId: "wfr_resume", + status: "running" as const, + result: null, + }; + }, + getRun: async () => { + getRunCalls += 1; + return getRunCalls === 1 ? interruptedRun : completedRun; + }, + }, + }; + + const view = render( + + + + + + + + ); + + fireEvent.click(view.getByRole("button", { name: "Resume workflow" })); + + await waitFor(() => expect(resumed).toBe(true)); + await waitFor(() => expect(view.getAllByText("completed").length).toBeGreaterThan(0)); + expect(view.queryByText("resumed")).toBeNull(); + + fireEvent.click(getWorkflowHeader(view)); + + expect(view.getByText("resumed")).toBeTruthy(); + }); + + test("clears resume polling when resume fails", async () => { + let getRunCalls = 0; + const api = { + workflows: { + resume: async () => { + throw new Error("Project trust is required"); + }, + getRun: async () => { + getRunCalls += 1; + return null; + }, + }, + }; + + const view = render( + + + + + + + + ); + + fireEvent.click(view.getByRole("button", { name: "Resume workflow" })); + + await waitFor(() => expect(view.getByText("Project trust is required")).toBeTruthy()); + await new Promise((resolve) => setTimeout(resolve, 0)); + expect(getRunCalls).toBe(0); + }); + + test("saves scratch workflow runs directly to project workflows", async () => { + const promotions: unknown[] = []; + const api = { + workflows: { + promoteScratch: async (input: unknown) => { + promotions.push(input); + return { + name: "scratch", + description: "Scratch workflow", + scope: "project", + sourcePath: "/repo/.mux/workflows/scratch.js", + executable: true, + }; + }, + }, + }; + + const view = render( + + + + + + + + ); + + fireEvent.click(getWorkflowHeader(view)); + + expect(view.queryByRole("button", { name: "Promote workflow" })).toBeNull(); + expect(view.getByRole("button", { name: "Save to global workflows" })).toBeTruthy(); + + fireEvent.click(view.getByRole("button", { name: "Save to project workflows" })); + + await waitFor(() => expect(promotions).toHaveLength(1)); + expect(promotions[0]).toEqual({ + workspaceId: "workspace-1", + runId: "wfr_scratch", + name: "scratch", + description: "Scratch workflow", + location: "project", + overwrite: false, + }); + await waitFor(() => expect(view.getByText("Saved to project workflows")).toBeTruthy()); + expect(view.container.textContent).toContain("/repo/.mux/workflows/scratch.js"); + expect(view.queryByRole("button", { name: "Save to project workflows" })).toBeNull(); + expect(view.queryByRole("button", { name: "Save to global workflows" })).toBeNull(); + }); + + test("saves scratch workflow runs directly to global workflows", async () => { + const promotions: unknown[] = []; + const api = { + workflows: { + promoteScratch: async (input: unknown) => { + promotions.push(input); + return { + name: "scratch", + description: "Scratch workflow", + scope: "global", + sourcePath: "/home/user/.mux/workflows/scratch.js", + executable: true, + }; + }, + }, + }; + + const view = render( + + + + + + + + ); + + fireEvent.click(getWorkflowHeader(view)); + + fireEvent.click(view.getByRole("button", { name: "Save to global workflows" })); + + await waitFor(() => expect(promotions).toHaveLength(1)); + expect(promotions[0]).toEqual({ + workspaceId: "workspace-1", + runId: "wfr_scratch", + name: "scratch", + description: "Scratch workflow", + location: "global", + overwrite: false, + }); + await waitFor(() => expect(view.getByText("Saved to global workflows")).toBeTruthy()); + expect(view.container.textContent).toContain("/home/user/.mux/workflows/scratch.js"); + expect(view.queryByRole("button", { name: "Save to project workflows" })).toBeNull(); + expect(view.queryByRole("button", { name: "Save to global workflows" })).toBeNull(); + }); + + test("uses live workflow run status for the header instead of stale tool completion state", () => { + const view = render( + + + + + + ); + + expect(view.getByText("executing")).toBeTruthy(); + expect(view.queryByText("completed")).toBeNull(); + }); +}); diff --git a/src/browser/features/Tools/WorkflowRunToolCall.tsx b/src/browser/features/Tools/WorkflowRunToolCall.tsx new file mode 100644 index 0000000000..74085be8b0 --- /dev/null +++ b/src/browser/features/Tools/WorkflowRunToolCall.tsx @@ -0,0 +1,770 @@ +import React, { useContext, useEffect, useLayoutEffect, useRef, useState } from "react"; + +import { APIContext, type APIClient } from "@/browser/contexts/API"; +import { TooltipIfPresent } from "@/browser/components/Tooltip/Tooltip"; +import { + useOptionalCommandRegistry, + type CommandAction, +} from "@/browser/contexts/CommandRegistryContext"; +import type { + WorkflowDefinitionDescriptor, + WorkflowRunEvent, + WorkflowRunRecord, + WorkflowStepRecord, +} from "@/common/types/workflow"; +import type { + WorkflowRunToolArgs, + WorkflowRunToolResult, + WorkflowRunToolSuccessResult, +} from "@/common/types/tools"; +import assert from "@/common/utils/assert"; + +import { + ToolContainer, + ToolHeader, + ExpandIcon, + ToolName, + StatusIndicator, + ToolDetails, + ToolIcon, + ErrorBox, +} from "./Shared/ToolPrimitives"; +import { + getStatusDisplay, + isToolErrorResult, + type ToolStatus, + useToolExpansion, +} from "./Shared/toolUtils"; +import { HighlightedCode } from "./Shared/HighlightedCode"; +import { + WorkflowDefinitionCard, + WorkflowJsonBlock, + WorkflowKindBadge, + WorkflowSection, + WORKFLOW_ACTION_BUTTON_CLASS, + formatWorkflowSavedMessage, + type WorkflowPromotionTarget, +} from "./WorkflowDefinitionToolCall"; +import { MarkdownRenderer } from "../Messages/MarkdownRenderer"; + +interface WorkflowRunToolCallProps { + args: WorkflowRunToolArgs; + result?: WorkflowRunToolResult; + status?: ToolStatus; +} + +type WorkflowRunAction = "interrupt" | "resume"; + +async function updateWorkflowRunFromAction(input: { + api: APIClient; + workspaceId: string; + runId: string; + action: WorkflowRunAction; + setActionError: React.Dispatch>; + setRefreshedRun: React.Dispatch>; + setResumingRunId: React.Dispatch>; +}) { + input.setActionError(null); + let resumeRequestAccepted = false; + try { + const nextRun = + input.action === "interrupt" + ? await input.api.workflows.interrupt({ + workspaceId: input.workspaceId, + runId: input.runId, + }) + : await input.api.workflows.resume({ + workspaceId: input.workspaceId, + runId: input.runId, + }); + if (input.action === "resume") { + resumeRequestAccepted = true; + input.setResumingRunId(input.runId); + } + if ("id" in nextRun) { + input.setRefreshedRun(nextRun); + if (nextRun.status !== "interrupted") { + input.setResumingRunId(null); + } + return; + } + const refreshed = await input.api.workflows.getRun({ + workspaceId: input.workspaceId, + runId: input.runId, + }); + if (refreshed != null) { + input.setRefreshedRun(refreshed); + if (refreshed.status !== "interrupted") { + input.setResumingRunId(null); + } + } + } catch (error) { + if (input.action === "resume" && !resumeRequestAccepted) { + input.setResumingRunId(null); + } + input.setActionError( + error instanceof Error ? error.message : `Failed to ${input.action} workflow` + ); + } +} + +function isWorkflowRunSuccessResult( + value: WorkflowRunToolResult | undefined +): value is WorkflowRunToolSuccessResult { + return value != null && !isToolErrorResult(value); +} + +function getReportMarkdown(value: unknown): string | null { + if (value != null && typeof value === "object") { + const reportMarkdown = (value as Record).reportMarkdown; + if (typeof reportMarkdown === "string" && reportMarkdown.trim().length > 0) { + return reportMarkdown; + } + } + return null; +} + +function getStructuredOutput(value: unknown): unknown { + if (value != null && typeof value === "object") { + return (value as Record).structuredOutput; + } + return undefined; +} + +function getEventKey(event: WorkflowRunEvent): string { + return `${event.sequence}:${event.type}`; +} + +function getWorkflowEventLabel(event: WorkflowRunEvent): string { + switch (event.type) { + case "phase": + return event.name; + case "log": + return event.message; + case "task": + return `${event.stepId} / ${event.taskId} / ${event.status}`; + case "patch": + return `${event.stepId} / ${event.sourceTaskId} / ${event.status}`; + case "validation": { + const verdict = event.success ? "passed" : "failed"; + return event.message + ? `${event.stepId} validation ${verdict}: ${event.message}` + : `${event.stepId} validation ${verdict}`; + } + case "error": + return event.message; + case "status": + return event.status; + case "result": + return "Result recorded"; + } +} + +function getWorkflowEventDetail(event: WorkflowRunEvent): unknown { + switch (event.type) { + case "phase": + return event.details; + case "log": + return event.data; + case "result": + return event.result; + case "patch": + return event.details; + case "task": + case "validation": + case "error": + case "status": + return undefined; + } +} + +function getEventTone(event: WorkflowRunEvent): "normal" | "success" | "warning" { + if (event.type === "error") { + return "warning"; + } + if (event.type === "validation") { + return event.success ? "success" : "warning"; + } + if (event.type === "patch") { + return event.status === "applied" + ? "success" + : event.status === "started" + ? "normal" + : "warning"; + } + if (event.type === "result") { + return "success"; + } + return "normal"; +} + +function getEventToneClass(event: WorkflowRunEvent): string { + switch (getEventTone(event)) { + case "success": + return "text-success"; + case "warning": + return "text-warning"; + case "normal": + return "text-muted"; + } +} + +function WorkflowDisclosureSection(props: { + title: string; + children: React.ReactNode; + className?: string; +}) { + return ( +
+ + + {props.title} + +
{props.children}
+
+ ); +} + +function getEventRowClass(event: WorkflowRunEvent): string { + if (event.type === "phase") { + return "border-l-2 border-plan-mode/70 bg-plan-mode-alpha"; + } + return "border-l-2 border-transparent"; +} + +function getEventTypeClass(event: WorkflowRunEvent): string { + if (event.type === "phase") { + return "rounded border border-plan-mode/40 bg-plan-mode-alpha px-1 py-0.5 text-plan-mode-light"; + } + return getEventToneClass(event); +} + +function findTaskStepForEvent( + event: WorkflowRunEvent, + steps: readonly WorkflowStepRecord[] +): WorkflowStepRecord | null { + if (event.type !== "task") { + return null; + } + + const byTaskId = steps.find((step) => step.taskId === event.taskId); + if (byTaskId != null) { + return byTaskId; + } + return steps.find((step) => step.stepId === event.stepId) ?? null; +} + +function getTaskReportMarkdown( + event: WorkflowRunEvent, + steps: readonly WorkflowStepRecord[] +): string | null { + const step = findTaskStepForEvent(event, steps); + const reportMarkdown = step?.result?.reportMarkdown; + return typeof reportMarkdown === "string" && reportMarkdown.trim().length > 0 + ? reportMarkdown + : null; +} + +function WorkflowEventTooltip(props: { + event: WorkflowRunEvent; + displayIndex: number; + label: string; +}) { + return ( +
+
+ Display #{props.displayIndex} · Raw event #{props.event.sequence} +
+
{props.label}
+
+ ); +} + +function WorkflowEventRow(props: { + event: WorkflowRunEvent; + displayIndex: number; + steps: readonly WorkflowStepRecord[]; +}) { + const event = props.event; + const detail = getWorkflowEventDetail(event); + const taskReportMarkdown = getTaskReportMarkdown(event, props.steps); + const isExpandable = detail !== undefined || taskReportMarkdown != null; + const clickableCursorClass = isExpandable ? "cursor-pointer" : ""; + const label = getWorkflowEventLabel(event); + const row = ( +
+ {/* Keep the tooltip trigger on the sequence cell so expandable row text still advertises clickability. */} + + } + side="top" + align="start" + > + + #{props.displayIndex} + + + + {event.type} + + {label} +
+ ); + + if (!isExpandable) { + return
  • {row}
  • ; + } + + return ( +
  • +
    + + {row} + + {taskReportMarkdown != null ? ( +
    + +
    + ) : ( + + )} +
    +
  • + ); +} + +const AUTO_COLLAPSE_WORKFLOW_STATUSES = new Set(["completed"]); + +const REFRESHING_WORKFLOW_STATUSES = new Set(["pending", "running", "backgrounded"]); + +function getLatestResultEvent(run: WorkflowRunRecord | null | undefined): unknown { + return run?.events.findLast((event) => event.type === "result")?.result; +} + +function shouldRefreshWorkflow(status: string): boolean { + return REFRESHING_WORKFLOW_STATUSES.has(status); +} + +function toToolStatus(status: string): ToolStatus { + if (status === "running") { + return "executing"; + } + if ( + status === "pending" || + status === "completed" || + status === "failed" || + status === "interrupted" || + status === "backgrounded" + ) { + return status; + } + return "pending"; +} + +export const WorkflowRunToolCall: React.FC = ({ + args, + result, + status = "pending", +}) => { + const apiState = useContext(APIContext); + const commandRegistry = useOptionalCommandRegistry(); + const { expanded, setExpanded, toggleExpanded } = useToolExpansion(true); + const userToggledExpansionRef = useRef(false); + const autoCollapseRunIdRef = useRef(undefined); + const registerCommandSource = commandRegistry?.registerSource; + const errorResult = isToolErrorResult(result) ? result : null; + const successResult = isWorkflowRunSuccessResult(result) ? result : null; + const [refreshedRun, setRefreshedRun] = useState(null); + const [resumingRunId, setResumingRunId] = useState(null); + const baseRun = successResult?.run; + const runId = successResult?.runId ?? baseRun?.id; + const run = refreshedRun?.id === runId ? refreshedRun : baseRun; + const displayStatus = run?.status ?? successResult?.status ?? status; + const resultValue = successResult?.result ?? getLatestResultEvent(run); + const reportMarkdown = getReportMarkdown(resultValue); + const structuredOutput = getStructuredOutput(resultValue); + const invocationArgs = run?.args ?? args.args ?? {}; + const events = run?.events ?? []; + const interestingEvents = events.filter( + (event) => event.type !== "status" && event.type !== "result" + ); + const headerStatus = toToolStatus(displayStatus); + + const toggleWorkflowExpanded = () => { + userToggledExpansionRef.current = true; + toggleExpanded(); + }; + useLayoutEffect(() => { + if (autoCollapseRunIdRef.current !== runId) { + autoCollapseRunIdRef.current = runId; + userToggledExpansionRef.current = false; + } + // Completed workflow runs can contain large reports and event logs. Collapse them once for + // scanability, but never override an explicit user expansion/collapse choice. + if (AUTO_COLLAPSE_WORKFLOW_STATUSES.has(displayStatus) && !userToggledExpansionRef.current) { + setExpanded(false); + } + }, [displayStatus, runId, setExpanded]); + + const [actionError, setActionError] = useState(null); + const [promotedDefinition, setPromotedDefinition] = useState( + null + ); + const [savingPromotionTarget, setSavingPromotionTarget] = + useState(null); + const savingPromotionTargetRef = useRef(null); + const displayDefinition = promotedDefinition ?? run?.definition; + const canInterrupt = + apiState?.api != null && + run?.workspaceId != null && + (displayStatus === "running" || displayStatus === "backgrounded"); + const canResume = + apiState?.api != null && run?.workspaceId != null && displayStatus === "interrupted"; + const canPromote = + run?.workspaceId != null && run.definition.scope === "scratch" && promotedDefinition == null; + const canSavePromotedWorkflow = + apiState?.api != null && + runId != null && + canPromote && + savingPromotionTarget == null && + savingPromotionTargetRef.current == null; + + const updateRunFromAction = async (action: WorkflowRunAction) => { + if (apiState?.api == null || run?.workspaceId == null || runId == null) { + return; + } + await updateWorkflowRunFromAction({ + api: apiState.api, + workspaceId: run.workspaceId, + runId, + action, + setActionError, + setRefreshedRun, + setResumingRunId, + }); + }; + + const saveScratchWorkflow = (location: WorkflowPromotionTarget) => { + const api = apiState?.api; + const sourceDefinition = run?.definition; + if ( + api == null || + run?.workspaceId == null || + runId == null || + sourceDefinition == null || + !canPromote || + savingPromotionTargetRef.current != null + ) { + return; + } + + assert(sourceDefinition.scope === "scratch", "Only scratch workflow runs can be saved"); + setActionError(null); + setSavingPromotionTarget(location); + savingPromotionTargetRef.current = location; + api.workflows + .promoteScratch({ + workspaceId: run.workspaceId, + runId, + name: sourceDefinition.name, + description: sourceDefinition.description, + location, + overwrite: false, + }) + .then((descriptor) => { + assert( + descriptor.scope === location, + "promoteScratch returned a descriptor for a different location" + ); + setPromotedDefinition(descriptor); + }) + .catch((error: unknown) => { + setActionError(error instanceof Error ? error.message : "Failed to save workflow"); + }) + .finally(() => { + savingPromotionTargetRef.current = null; + setSavingPromotionTarget(null); + }); + }; + + const saveScratchWorkflowRef = useRef(saveScratchWorkflow); + saveScratchWorkflowRef.current = saveScratchWorkflow; + + useEffect(() => { + if (registerCommandSource == null || runId == null || run?.workspaceId == null) { + return; + } + + const workflowApi = apiState?.api; + const workspaceId = run.workspaceId; + const runWorkflowAction = async (action: WorkflowRunAction) => { + if (workflowApi == null) { + return; + } + await updateWorkflowRunFromAction({ + api: workflowApi, + workspaceId, + runId, + action, + setActionError, + setRefreshedRun, + setResumingRunId, + }); + }; + + const unregister = registerCommandSource(() => { + const subtitle = `${args.name} • ${runId}`; + const actions: CommandAction[] = []; + if (canInterrupt) { + actions.push({ + id: `workflow:${runId}:interrupt`, + title: `Interrupt workflow: ${args.name}`, + subtitle, + section: "Workflows", + keywords: ["workflow", "interrupt", "stop", args.name, runId], + run: () => runWorkflowAction("interrupt"), + }); + } + if (canResume) { + actions.push({ + id: `workflow:${runId}:resume`, + title: `Resume workflow: ${args.name}`, + subtitle, + section: "Workflows", + keywords: ["workflow", "resume", "continue", args.name, runId], + run: () => runWorkflowAction("resume"), + }); + } + if (canPromote) { + actions.push( + { + id: `workflow:${runId}:save-project`, + title: `Save workflow to project workflows: ${args.name}`, + subtitle, + section: "Workflows", + keywords: ["workflow", "save", "project", "scratch", args.name, runId], + run: () => { + userToggledExpansionRef.current = true; + setExpanded(true); + saveScratchWorkflowRef.current("project"); + }, + }, + { + id: `workflow:${runId}:save-global`, + title: `Save workflow to global workflows: ${args.name}`, + subtitle, + section: "Workflows", + keywords: ["workflow", "save", "global", "scratch", args.name, runId], + run: () => { + userToggledExpansionRef.current = true; + setExpanded(true); + saveScratchWorkflowRef.current("global"); + }, + } + ); + } + return actions; + }); + + return unregister; + }, [ + apiState?.api, + args.name, + canInterrupt, + canPromote, + canResume, + registerCommandSource, + run?.workspaceId, + runId, + setExpanded, + ]); + + useEffect(() => { + if ( + apiState?.api == null || + runId == null || + run?.workspaceId == null || + (!shouldRefreshWorkflow(displayStatus) && resumingRunId !== runId) + ) { + return; + } + + let ignore = false; + const refresh = async () => { + try { + const nextRun = await apiState.api.workflows.getRun({ + workspaceId: run.workspaceId, + runId, + }); + if (!ignore && nextRun != null) { + setRefreshedRun(nextRun); + if (nextRun.status !== "interrupted") { + setResumingRunId(null); + } + } + } catch (error) { + console.error("Failed to refresh workflow run:", error); + } + }; + + void refresh(); + const interval = window.setInterval(() => { + void refresh(); + }, 2_000); + return () => { + ignore = true; + window.clearInterval(interval); + }; + }, [apiState?.api, displayStatus, resumingRunId, run?.workspaceId, runId]); + + return ( + + + + + + {args.name} + {getStatusDisplay(headerStatus)} + + + {expanded && ( + +
    + {runId && {runId}} + {displayStatus} + {displayDefinition?.scope && {displayDefinition.scope}} +
    + + {displayDefinition && ( + + + + )} + + {/* Large workflow payloads stay collapsed so completed runs remain scannable. */} + + + + + {run?.definitionSource && ( + +
    + +
    +
    + )} + + {(canInterrupt || canResume || canPromote) && ( +
    + {canInterrupt && ( + + )} + {canResume && ( + + )} + {canPromote && ( + <> + + + + )} +
    + )} + + {(promotedDefinition?.scope === "project" || promotedDefinition?.scope === "global") && ( +
    + {formatWorkflowSavedMessage(promotedDefinition.scope)} +
    + )} + + {actionError && {actionError}} + + {interestingEvents.length > 0 && ( + +
    +
      + {interestingEvents.map((event, index) => ( + + ))} +
    +
    +
    + )} + + {structuredOutput !== undefined && ( + + + + )} + + {reportMarkdown && ( +
    + +
    + )} + + {errorResult && {errorResult.error}} +
    + )} +
    + ); +}; diff --git a/src/browser/hooks/useSendMessageOptions.ts b/src/browser/hooks/useSendMessageOptions.ts index 3426e7042d..53664d1061 100644 --- a/src/browser/hooks/useSendMessageOptions.ts +++ b/src/browser/hooks/useSendMessageOptions.ts @@ -61,6 +61,8 @@ export function useSendMessageOptions(workspaceId: string): SendMessageOptionsWi const execSubagentHardRestart = useExperimentOverrideValue( EXPERIMENT_IDS.EXEC_SUBAGENT_HARD_RESTART ); + const dynamicWorkflows = useExperimentOverrideValue(EXPERIMENT_IDS.DYNAMIC_WORKFLOWS); + const subagentFileReports = useExperimentOverrideValue(EXPERIMENT_IDS.SUBAGENT_FILE_REPORTS); // Prefer metadata over the global default until workspace localStorage seeding catches up. const metadataSettings = getWorkspaceAiSettingsFromMetadata( @@ -82,6 +84,8 @@ export function useSendMessageOptions(workspaceId: string): SendMessageOptionsWi programmaticToolCallingExclusive, advisorTool, execSubagentHardRestart, + dynamicWorkflows, + subagentFileReports, }, disableWorkspaceAgents, }); diff --git a/src/browser/utils/chatCommands.test.ts b/src/browser/utils/chatCommands.test.ts index de8a11775d..7c9a8b2222 100644 --- a/src/browser/utils/chatCommands.test.ts +++ b/src/browser/utils/chatCommands.test.ts @@ -145,6 +145,374 @@ function createGoalCommandContext(api: SlashCommandContext["api"]): SlashCommand }); } +describe("processSlashCommand - workflow", () => { + test("rejects workflow execution when dynamic workflows are disabled", async () => { + const start = mock(() => + Promise.resolve({ runId: "wfr_123", status: "running", result: null }) + ); + const context = createSlashCommandContext({ + api: { + workflows: { start }, + } as unknown as SlashCommandContext["api"], + dynamicWorkflowsEnabled: false, + }); + + const result = await processSlashCommand( + { type: "workflow-run", name: "deep-research", argsText: "mux" }, + context + ); + + expect(result).toEqual({ clearInput: false, toastShown: true }); + expect(start).not.toHaveBeenCalled(); + expect(context.setToast).toHaveBeenCalledWith( + expect.objectContaining({ type: "error", message: "Dynamic workflows are disabled" }) + ); + }); + + test("sends completed workflow slash output to the main agent as hidden context", async () => { + const workflowResult = { + reportMarkdown: "# Research\n\nFindings", + structuredOutput: { confidence: "high" }, + }; + const start = mock(() => + Promise.resolve({ + runId: "wfr_123", + status: "completed", + result: workflowResult, + }) + ); + const getRun = mock(() => + Promise.resolve({ + id: "wfr_123", + workspaceId: "test-ws", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in", + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:test", + args: { input: "mux" }, + status: "completed", + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:01.000Z", + events: [ + { + sequence: 1, + type: "result", + at: "2026-05-29T00:00:01.000Z", + result: workflowResult, + }, + ], + steps: [], + }) + ); + interface SentWorkflowMessage { + message: string; + options: { muxMetadata?: { type?: string; rawCommand?: string; commandPrefix?: string } }; + } + const sentMessages: SentWorkflowMessage[] = []; + const sendMessage = mock((input: SentWorkflowMessage) => { + sentMessages.push(input); + return Promise.resolve({ success: true }); + }); + const onMessageSent = mock(() => undefined); + const context = createSlashCommandContext({ + api: { + workflows: { start, getRun }, + workspace: { sendMessage }, + } as unknown as SlashCommandContext["api"], + rawInput: "/deep-research mux", + dynamicWorkflowsEnabled: true, + onMessageSent, + }); + + const result = await processSlashCommand( + { type: "workflow-run", name: "deep-research", argsText: "mux" }, + context + ); + + expect(result).toEqual({ clearInput: true, toastShown: true }); + expect(start).toHaveBeenCalledWith({ + workspaceId: "test-ws", + name: "deep-research", + runInBackground: true, + args: { input: "mux" }, + rawCommand: "/deep-research mux", + continuationOptions: context.sendMessageOptions, + }); + expect(getRun).toHaveBeenCalledWith({ workspaceId: "test-ws", runId: "wfr_123" }); + expect(sendMessage).toHaveBeenCalledTimes(1); + const sendInput = sentMessages[0]; + expect(sendInput).toBeDefined(); + expect(sendInput.message).toContain("/deep-research mux"); + expect(sendInput.message).toContain(""); + expect(sendInput.message).toContain("Findings"); + expect(sendInput.message).toContain("confidence"); + expect(sendInput.options.muxMetadata?.type).toBe("workflow-result"); + expect(sendInput.options.muxMetadata?.rawCommand).toBe("/deep-research mux"); + expect(sendInput.options.muxMetadata?.commandPrefix).toBe("/deep-research"); + expect(context.setSendingState).toHaveBeenNthCalledWith(1, true); + expect(context.setSendingState).toHaveBeenNthCalledWith(2, false); + expect(context.setSendingState).toHaveBeenNthCalledWith(3, true); + expect(context.setSendingState).toHaveBeenNthCalledWith(4, false); + expect(onMessageSent).toHaveBeenCalledWith("tool-end"); + }); + + test("leaves slash workflow continuation to backend when invocation is persisted", async () => { + const start = mock(() => + Promise.resolve({ + runId: "wfr_123", + status: "running", + result: null, + invocationMessagePersisted: true, + }) + ); + const getRun = mock(() => Promise.resolve(null)); + const sendMessage = mock(() => Promise.resolve({ success: true })); + const context = createSlashCommandContext({ + api: { + workflows: { start, getRun }, + workspace: { sendMessage }, + } as unknown as SlashCommandContext["api"], + rawInput: "/deep-research mux", + dynamicWorkflowsEnabled: true, + }); + + const result = await processSlashCommand( + { type: "workflow-run", name: "deep-research", argsText: "mux" }, + context + ); + + expect(result).toEqual({ clearInput: true, toastShown: true }); + expect(start).toHaveBeenCalledWith({ + workspaceId: "test-ws", + name: "deep-research", + runInBackground: true, + args: { input: "mux" }, + rawCommand: "/deep-research mux", + continuationOptions: context.sendMessageOptions, + }); + expect(getRun).not.toHaveBeenCalled(); + expect(sendMessage).not.toHaveBeenCalled(); + expect(context.setToast).toHaveBeenCalledWith( + expect.objectContaining({ type: "success", message: "Workflow deep-research started" }) + ); + expect(context.setSendingState).toHaveBeenNthCalledWith(1, true); + expect(context.setSendingState).toHaveBeenNthCalledWith(2, false); + }); + + test("does not send terminal workflow results for superseded slash commands", async () => { + const workflowResult = { reportMarkdown: "done" }; + const start = mock(() => + Promise.resolve({ + runId: "wfr_completed", + status: "completed", + result: workflowResult, + }) + ); + const getRun = mock(() => + Promise.resolve({ + id: "wfr_completed", + workspaceId: "test-ws", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in", + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:test", + args: { input: "mux" }, + status: "completed", + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:01.000Z", + events: [ + { sequence: 1, type: "result", at: "2026-05-29T00:00:01.000Z", result: workflowResult }, + ], + steps: [], + }) + ); + const sendMessage = mock(() => Promise.resolve({ success: true })); + const context = createSlashCommandContext({ + api: { + workflows: { start, getRun }, + workspace: { sendMessage }, + } as unknown as SlashCommandContext["api"], + rawInput: "/deep-research mux", + dynamicWorkflowsEnabled: true, + asyncCommandToken: 1, + isAsyncCommandCurrent: mock(() => false), + }); + + const result = await processSlashCommand( + { type: "workflow-run", name: "deep-research", argsText: "mux" }, + context + ); + + expect(result).toEqual({ clearInput: true, toastShown: false }); + expect(sendMessage).not.toHaveBeenCalled(); + expect(context.setToast).not.toHaveBeenCalled(); + }); + + test("does not restore a superseded workflow slash command", async () => { + const start = mock(() => + Promise.resolve({ + runId: "wfr_running", + status: "running", + result: null, + }) + ); + const getRun = mock(() => + Promise.resolve({ + id: "wfr_running", + workspaceId: "test-ws", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in", + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:test", + args: { input: "mux" }, + status: "running", + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:01.000Z", + events: [], + steps: [], + }) + ); + const sendMessage = mock(() => Promise.resolve({ success: true })); + const context = createSlashCommandContext({ + api: { + workflows: { start, getRun }, + workspace: { sendMessage }, + } as unknown as SlashCommandContext["api"], + rawInput: "/deep-research mux", + dynamicWorkflowsEnabled: true, + asyncCommandToken: 1, + isAsyncCommandCurrent: mock(() => false), + }); + + const result = await processSlashCommand( + { type: "workflow-run", name: "deep-research", argsText: "mux" }, + context + ); + + expect(result).toEqual({ clearInput: true, toastShown: false }); + expect(sendMessage).not.toHaveBeenCalled(); + expect(context.setToast).not.toHaveBeenCalled(); + }); + + test("does not restore failed workflow slash commands over newer drafts", async () => { + const start = mock(() => + Promise.resolve({ + runId: "wfr_failed_send", + status: "completed", + result: { reportMarkdown: "done" }, + }) + ); + const getRun = mock(() => + Promise.resolve({ + id: "wfr_failed_send", + workspaceId: "test-ws", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in", + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:test", + args: { input: "mux" }, + status: "completed", + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:01.000Z", + events: [], + steps: [], + }) + ); + const sendMessage = mock(() => Promise.resolve({ success: false })); + const context = createSlashCommandContext({ + api: { + workflows: { start, getRun }, + workspace: { sendMessage }, + } as unknown as SlashCommandContext["api"], + rawInput: "/deep-research mux", + dynamicWorkflowsEnabled: true, + getInput: mock(() => "newer draft"), + }); + + const result = await processSlashCommand( + { type: "workflow-run", name: "deep-research", argsText: "mux" }, + context + ); + + expect(result).toEqual({ clearInput: true, toastShown: true }); + expect(context.setToast).toHaveBeenCalledWith( + expect.objectContaining({ + type: "error", + message: "Failed to send workflow result to the agent", + }) + ); + }); + + test("does not continue the agent after an interrupted workflow slash run", async () => { + const start = mock(() => + Promise.resolve({ + runId: "wfr_interrupted", + status: "interrupted", + result: null, + }) + ); + const getRun = mock(() => + Promise.resolve({ + id: "wfr_interrupted", + workspaceId: "test-ws", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in", + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:test", + args: { input: "mux" }, + status: "interrupted", + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:01.000Z", + events: [], + steps: [], + }) + ); + const sendMessage = mock(() => Promise.resolve({ success: true })); + const onMessageSent = mock(() => undefined); + const context = createSlashCommandContext({ + api: { + workflows: { start, getRun }, + workspace: { sendMessage }, + } as unknown as SlashCommandContext["api"], + rawInput: "/deep-research mux", + dynamicWorkflowsEnabled: true, + onMessageSent, + }); + + const result = await processSlashCommand( + { type: "workflow-run", name: "deep-research", argsText: "mux" }, + context + ); + + expect(result).toEqual({ clearInput: true, toastShown: true }); + expect(sendMessage).not.toHaveBeenCalled(); + expect(onMessageSent).not.toHaveBeenCalled(); + expect(context.setToast).toHaveBeenCalledWith( + expect.objectContaining({ type: "success", message: "Workflow deep-research interrupted" }) + ); + }); +}); + describe("processSlashCommand - side-question", () => { function createSideQuestionContext( sideQuestion: (input: { diff --git a/src/browser/utils/chatCommands.ts b/src/browser/utils/chatCommands.ts index 4f9297cc8a..30d4784abb 100644 --- a/src/browser/utils/chatCommands.ts +++ b/src/browser/utils/chatCommands.ts @@ -23,6 +23,7 @@ import { } from "@/common/types/message"; import type { GoalRecordV1, GoalSetError, GoalStatus } from "@/common/types/goal"; import type { ReviewNoteData } from "@/common/types/review"; +import type { WorkflowRunRecord } from "@/common/types/workflow"; import type { FrontendWorkspaceMetadata } from "@/common/types/workspace"; import type { RuntimeConfig } from "@/common/types/runtime"; import { RUNTIME_MODE, parseRuntimeModeAndHost } from "@/common/types/runtime"; @@ -78,6 +79,10 @@ import { addEphemeralMessage } from "@/browser/stores/WorkspaceStore"; import { setGoalWithConflictRetry } from "@/browser/utils/goals/setGoalWithConflictRetry"; import { loadGoalDefaults, resolveGoalSetIntent } from "@/browser/utils/goals/resolveGoalSetIntent"; import { SIDE_QUESTION_COMMAND } from "@/common/utils/messages/sideQuestion"; +import { + WORKFLOW_RESULT_METADATA_TYPE, + buildWorkflowResultContextMessage, +} from "@/common/utils/workflowRunMessages"; const BUILT_IN_MODEL_SET = new Set(Object.values(KNOWN_MODELS).map((model) => model.id)); @@ -158,6 +163,12 @@ export interface SlashCommandContext extends Omit void; + /** Original slash command text as typed, for durable command display. */ + rawInput?: string; + + /** Current dynamic-workflows experiment assignment for executable workflow commands. */ + dynamicWorkflowsEnabled?: boolean; + // Global Actions setPreferredModel: (model: string) => void; setVimEnabled: (cb: (prev: boolean) => boolean) => void; @@ -182,6 +193,63 @@ export interface SlashCommandContext extends Omit { + return new Promise((resolve) => window.setTimeout(resolve, ms)); +} + +async function waitForWorkflowTerminalRun(input: { + client: RouterClient; + workspaceId: string; + runId: string; + initialStatus: string; + isCurrent?: () => boolean; +}): Promise { + let run = await input.client.workflows.getRun({ + workspaceId: input.workspaceId, + runId: input.runId, + }); + let status = run?.status ?? input.initialStatus; + + while (!isWorkflowTerminalStatus(status)) { + if (input.isCurrent?.() === false) { + throw new Error(WORKFLOW_COMMAND_SUPERSEDED_MESSAGE); + } + await delay(WORKFLOW_POLL_INTERVAL_MS); + run = await input.client.workflows.getRun({ + workspaceId: input.workspaceId, + runId: input.runId, + }); + status = run?.status ?? status; + } + + if (input.isCurrent?.() === false) { + throw new Error(WORKFLOW_COMMAND_SUPERSEDED_MESSAGE); + } + + return run; +} + +function parseWorkflowSlashArgs(argsText: string | undefined): unknown { + const trimmed = argsText?.trim(); + if (!trimmed) { + return {}; + } + + if (trimmed.startsWith("{") || trimmed.startsWith("[")) { + return JSON.parse(trimmed) as unknown; + } + + return { input: trimmed }; +} + // ============================================================================ // Command Dispatcher // ============================================================================ @@ -318,6 +386,155 @@ export async function processSlashCommand( return { clearInput: false, toastShown: true }; } + if (parsed.type === "workflow-run") { + const workflowsEnabled = + context.dynamicWorkflowsEnabled ?? + isExperimentEnabled(EXPERIMENT_IDS.DYNAMIC_WORKFLOWS) === true; + if (!workflowsEnabled) { + setToast({ + id: Date.now().toString(), + type: "error", + message: "Dynamic workflows are disabled", + }); + return { clearInput: false, toastShown: true }; + } + + const activeClient = requireClient(); + if (!activeClient) { + return { clearInput: false, toastShown: true }; + } + if (!context.workspaceId) { + setToast({ + id: Date.now().toString(), + type: "error", + message: "No workspace selected", + }); + return { clearInput: false, toastShown: true }; + } + + let args: unknown; + try { + args = parseWorkflowSlashArgs(parsed.argsText); + } catch (error) { + setToast({ + id: Date.now().toString(), + type: "error", + message: error instanceof Error ? error.message : "Invalid workflow arguments", + }); + return { clearInput: false, toastShown: true }; + } + + const workspaceId = context.workspaceId; + const rawInput = context.rawInput?.trim(); + const rawCommand = rawInput && rawInput.length > 0 ? rawInput : `/${parsed.name}`; + const commandPrefix = rawCommand.split(/\s+/u)[0] ?? `/${parsed.name}`; + const isCurrent = + context.asyncCommandToken != null && context.isAsyncCommandCurrent != null + ? () => context.isAsyncCommandCurrent?.(context.asyncCommandToken!, workspaceId) !== false + : undefined; + + setInput(""); + let sendingStateActive = false; + const setWorkflowSendingState = (active: boolean) => { + if (sendingStateActive === active) { + return; + } + sendingStateActive = active; + context.setSendingState(active); + }; + + setWorkflowSendingState(true); + try { + const result = await activeClient.workflows.start({ + workspaceId, + name: parsed.name, + runInBackground: true, + args, + continuationOptions: context.sendMessageOptions, + rawCommand, + }); + // The workflow is durable and backgrounded; do not pin the composer while polling for + // completion, otherwise the user cannot supersede a long-running slash workflow. + setWorkflowSendingState(false); + if (result.invocationMessagePersisted === true) { + trackCommandUsed("workflow"); + setToast({ + id: Date.now().toString(), + type: "success", + message: `Workflow ${parsed.name} started`, + }); + return { clearInput: true, toastShown: true }; + } + const run = await waitForWorkflowTerminalRun({ + client: activeClient, + workspaceId, + runId: result.runId, + initialStatus: result.status, + isCurrent, + }); + const terminalStatus = run?.status ?? result.status; + if (terminalStatus === "interrupted") { + trackCommandUsed("workflow"); + setToast({ + id: Date.now().toString(), + type: "success", + message: `Workflow ${parsed.name} interrupted`, + }); + return { clearInput: true, toastShown: true }; + } + const workflowResultMessage = buildWorkflowResultContextMessage({ + rawCommand, + name: parsed.name, + runId: result.runId, + status: terminalStatus, + result: result.result, + run, + }); + // Keep workflow outputs model-visible but UI-hidden: rawCommand drives transcript display, + // while the XML block below gives the main agent the completed workflow result. + setWorkflowSendingState(true); + const sendResult = await activeClient.workspace.sendMessage({ + workspaceId, + message: workflowResultMessage, + options: { + ...context.sendMessageOptions, + muxMetadata: { + type: WORKFLOW_RESULT_METADATA_TYPE, + rawCommand, + commandPrefix, + runId: result.runId, + requestedModel: context.sendMessageOptions.model, + }, + }, + }); + if (!sendResult.success) { + throw new Error("Failed to send workflow result to the agent"); + } + context.onMessageSent?.(context.sendMessageOptions.queueDispatchMode ?? "tool-end"); + trackCommandUsed("workflow"); + setToast({ + id: Date.now().toString(), + type: "success", + message: `Workflow ${parsed.name} ${terminalStatus}`, + }); + return { clearInput: true, toastShown: true }; + } catch (error) { + if (error instanceof Error && error.message === WORKFLOW_COMMAND_SUPERSEDED_MESSAGE) { + return { clearInput: true, toastShown: false }; + } + setToast({ + id: Date.now().toString(), + type: "error", + message: error instanceof Error ? error.message : "Failed to run workflow", + }); + const currentInput = context.getInput?.(); + const shouldRestoreCommand = currentInput === undefined || currentInput.trim().length === 0; + return { clearInput: !shouldRestoreCommand, toastShown: true }; + } finally { + setWorkflowSendingState(false); + } + } + if (parsed.type === "debug-llm-request") { setInput(""); window.dispatchEvent(createCustomEvent(CUSTOM_EVENTS.OPEN_DEBUG_LLM_REQUEST)); diff --git a/src/browser/utils/messages/StreamingMessageAggregator.test.ts b/src/browser/utils/messages/StreamingMessageAggregator.test.ts index b8e8012373..bcc5790270 100644 --- a/src/browser/utils/messages/StreamingMessageAggregator.test.ts +++ b/src/browser/utils/messages/StreamingMessageAggregator.test.ts @@ -1,6 +1,7 @@ import { describe, test, expect } from "bun:test"; import { CONTEXT_BOUNDARY_KINDS } from "@/common/constants/contextBoundary"; import { createMuxMessage, type DisplayedMessage } from "@/common/types/message"; +import { buildWorkflowRunCardMessage } from "@/common/utils/workflowRunMessages"; import { shouldNotifyOnResponseComplete } from "./responseCompletionMetadata"; import { MAX_HISTORY_HIDDEN_SEGMENTS } from "./transcriptTruncationPlan"; import { StreamingMessageAggregator } from "./StreamingMessageAggregator"; @@ -448,6 +449,72 @@ describe("StreamingMessageAggregator", () => { expect(userMessages[1]?.isSynthetic).toBeUndefined(); }); + test("renders persisted workflow slash invocation before workflow card", () => { + const aggregator = new StreamingMessageAggregator(TEST_CREATED_AT); + const command = createMuxMessage("workflow-command", "user", "/deep-research mux", { + timestamp: 1, + historySequence: 1, + muxMetadata: { + type: "workflow-trigger-display", + rawCommand: "/deep-research mux", + commandPrefix: "/deep-research", + runId: "wfr_123", + }, + }); + const card = buildWorkflowRunCardMessage( + { name: "deep-research", args: { input: "mux" } }, + { runId: "wfr_123", status: "running", result: null }, + 2 + ); + card.metadata = { + timestamp: 2, + historySequence: 2, + synthetic: true, + uiVisible: true, + muxMetadata: { type: "workflow-run-card-display", runId: "wfr_123" }, + }; + const hiddenWorkflowResult = createMuxMessage( + "workflow-result", + "user", + '/deep-research mux\n\n{"reportMarkdown":"hidden"}', + { + timestamp: 3, + historySequence: 3, + muxMetadata: { + type: "workflow-result", + rawCommand: "/deep-research mux", + commandPrefix: "/deep-research", + runId: "wfr_123", + }, + } + ); + const assistant = createMuxMessage("assistant-1", "assistant", "Done", { + timestamp: 4, + historySequence: 4, + }); + + aggregator.loadHistoricalMessages([command, card, hiddenWorkflowResult, assistant], false); + + const displayed = aggregator.getDisplayedMessages(); + expect(displayed.map((message) => message.type)).toEqual(["user", "tool", "assistant"]); + expect(displayed[0]).toMatchObject({ + type: "user", + content: "/deep-research mux", + commandPrefix: "/deep-research", + }); + if (displayed[0]?.type !== "user") { + throw new Error("Expected workflow command to render as a user message"); + } + expect(displayed[0].content).not.toContain("mux_workflow_result"); + expect(displayed.some((message) => message.id === "workflow-result")).toBe(false); + expect(displayed[1]).toMatchObject({ + type: "tool", + toolName: "workflow_run", + args: { name: "deep-research", args: { input: "mux" }, run_in_background: true }, + result: { status: "running", runId: "wfr_123", result: null }, + }); + }); + test("should strip legacy goal-cleared label from displayed summaries", () => { const aggregator = new StreamingMessageAggregator(TEST_CREATED_AT); const legacySummary = createMuxMessage( diff --git a/src/browser/utils/messages/StreamingMessageAggregator.ts b/src/browser/utils/messages/StreamingMessageAggregator.ts index 32abb3b06f..e34af82294 100644 --- a/src/browser/utils/messages/StreamingMessageAggregator.ts +++ b/src/browser/utils/messages/StreamingMessageAggregator.ts @@ -80,6 +80,7 @@ import { isSideQuestionAnswerMessage as isSideQuestionAnswerMuxMessage, isSideQuestionUserMessage as isSideQuestionUserMuxMessage, } from "@/common/utils/messages/sideQuestion"; +import { isWorkflowResultMessage } from "@/common/utils/workflowRunMessages"; // Maximum number of messages to display in the DOM for performance // Full history is still maintained internally for token counting and stats @@ -3417,9 +3418,9 @@ export class StreamingMessageAggregator { typeof window !== "undefined" && window.api?.debugLlmRequest === true; const shouldHideMessageFromTranscript = (message: MuxMessage): boolean => - message.metadata?.synthetic === true && !showSyntheticMessages && - message.metadata?.uiVisible !== true; + ((message.metadata?.synthetic === true && message.metadata?.uiVisible !== true) || + isWorkflowResultMessage(message)); // Synthetic agent-skill snapshot messages are hidden from the transcript unless // debugLlmRequest is enabled. We still want to surface their content in the UI by diff --git a/src/browser/utils/messages/buildSendMessageOptions.ts b/src/browser/utils/messages/buildSendMessageOptions.ts index a8f7cb62e0..7549bd0054 100644 --- a/src/browser/utils/messages/buildSendMessageOptions.ts +++ b/src/browser/utils/messages/buildSendMessageOptions.ts @@ -8,6 +8,8 @@ export interface ExperimentValues { programmaticToolCallingExclusive: boolean | undefined; advisorTool: boolean | undefined; execSubagentHardRestart: boolean | undefined; + dynamicWorkflows: boolean | undefined; + subagentFileReports: boolean | undefined; } export interface SendMessageOptionsInput { diff --git a/src/browser/utils/messages/sendOptions.ts b/src/browser/utils/messages/sendOptions.ts index 90830fc7fc..4d6cbeac67 100644 --- a/src/browser/utils/messages/sendOptions.ts +++ b/src/browser/utils/messages/sendOptions.ts @@ -84,6 +84,8 @@ export function getSendOptionsFromStorage(workspaceId: string): SendMessageOptio ), advisorTool: isExperimentEnabled(EXPERIMENT_IDS.ADVISOR_TOOL), execSubagentHardRestart: isExperimentEnabled(EXPERIMENT_IDS.EXEC_SUBAGENT_HARD_RESTART), + dynamicWorkflows: isExperimentEnabled(EXPERIMENT_IDS.DYNAMIC_WORKFLOWS), + subagentFileReports: isExperimentEnabled(EXPERIMENT_IDS.SUBAGENT_FILE_REPORTS), }, }); } diff --git a/src/browser/utils/slashCommands/experimentVisibility.ts b/src/browser/utils/slashCommands/experimentVisibility.ts index 1313946fd9..c536f653e0 100644 --- a/src/browser/utils/slashCommands/experimentVisibility.ts +++ b/src/browser/utils/slashCommands/experimentVisibility.ts @@ -2,6 +2,7 @@ import { EXPERIMENT_IDS, type ExperimentId } from "@/common/constants/experiment export interface SlashCommandExperimentSnapshot { workspaceHeartbeats: boolean; + dynamicWorkflows?: boolean; } export function resolveSlashCommandExperimentValue( @@ -11,6 +12,8 @@ export function resolveSlashCommandExperimentValue( switch (experimentId) { case EXPERIMENT_IDS.WORKSPACE_HEARTBEATS: return snapshot.workspaceHeartbeats; + case EXPERIMENT_IDS.DYNAMIC_WORKFLOWS: + return snapshot.dynamicWorkflows; default: return undefined; } diff --git a/src/browser/utils/slashCommands/parser.test.ts b/src/browser/utils/slashCommands/parser.test.ts index 2d8fb82b94..427673bc26 100644 --- a/src/browser/utils/slashCommands/parser.test.ts +++ b/src/browser/utils/slashCommands/parser.test.ts @@ -304,6 +304,14 @@ describe("plan commands", () => { }); describe("init command", () => { + it("parses explicit workflow invocation", () => { + expect(parseCommand('/workflow deep-research {"topic":"mux"}')).toEqual({ + type: "workflow-run", + name: "deep-research", + argsText: '{"topic":"mux"}', + }); + }); + it("should parse /init as unknown-command (handled as a skill invocation)", () => { expectParse("/init", { type: "unknown-command", diff --git a/src/browser/utils/slashCommands/registry.ts b/src/browser/utils/slashCommands/registry.ts index f8b9b15d97..03c95adcbc 100644 --- a/src/browser/utils/slashCommands/registry.ts +++ b/src/browser/utils/slashCommands/registry.ts @@ -657,6 +657,44 @@ const btwCommandDefinition: SlashCommandDefinition = { }, }; +const WORKFLOW_COMMAND_USAGE = "/workflow [args]"; + +const workflowCommandDefinition: SlashCommandDefinition = { + key: "workflow", + description: "Run an explicit workflow by name", + experimentGate: EXPERIMENT_IDS.DYNAMIC_WORKFLOWS, + inputHint: WORKFLOW_COMMAND_USAGE, + suggestions: ({ partialToken, context }) => { + const workflows: SuggestionDefinition[] = (context.workflows ?? []) + .filter((workflow) => workflow.executable) + .map((workflow) => ({ + key: workflow.name, + description: `${workflow.description} (${workflow.scope} workflow)`, + })); + return filterAndMapSuggestions(workflows, partialToken, (workflow) => ({ + id: `workflow-explicit:${workflow.key}`, + display: workflow.key, + description: workflow.description, + replacement: `/workflow ${workflow.key} `, + kind: "workflow", + })); + }, + handler: ({ rawInput }): ParsedCommand => { + const trimmed = rawInput.trim(); + if (!trimmed) { + return { type: "command-missing-args", command: "workflow", usage: WORKFLOW_COMMAND_USAGE }; + } + const firstWhitespace = trimmed.search(/\s/u); + const name = firstWhitespace === -1 ? trimmed : trimmed.slice(0, firstWhitespace); + const argsText = firstWhitespace === -1 ? undefined : trimmed.slice(firstWhitespace).trim(); + return { + type: "workflow-run", + name, + ...(argsText ? { argsText } : {}), + }; + }, +}; + const debugLlmRequestCommandDefinition: SlashCommandDefinition = { key: "debug-llm-request", description: "Show the last LLM request sent (debug)", @@ -677,6 +715,7 @@ export const SLASH_COMMAND_DEFINITIONS: readonly SlashCommandDefinition[] = [ heartbeatCommandDefinition, goalCommandDefinition, btwCommandDefinition, + workflowCommandDefinition, debugLlmRequestCommandDefinition, ]; diff --git a/src/browser/utils/slashCommands/suggestions.test.ts b/src/browser/utils/slashCommands/suggestions.test.ts index 5d7b2c3426..d20271d150 100644 --- a/src/browser/utils/slashCommands/suggestions.test.ts +++ b/src/browser/utils/slashCommands/suggestions.test.ts @@ -1,3 +1,4 @@ +/* eslint-disable @typescript-eslint/no-unsafe-argument */ import { describe, it, expect } from "bun:test"; import { EXPERIMENT_IDS, type ExperimentId } from "@/common/constants/experiments"; import { getSlashCommandSuggestions } from "./suggestions"; @@ -97,6 +98,125 @@ describe("getSlashCommandSuggestions", () => { expect(suggestions.map((s) => s.display)).toContain("/deep-review"); }); + it("includes workflows distinctly and hides ambiguous top-level workflow shortcuts", () => { + const suggestions = getSlashCommandSuggestions("/deep", { + agentSkills: [ + { + name: "deep-review", + description: "Review deeply", + scope: "project", + }, + ], + workflows: [ + { + name: "deep-research", + description: "Research deeply", + scope: "built-in", + executable: true, + }, + { + name: "deep-review", + description: "Ambiguous workflow", + scope: "project", + executable: true, + }, + ], + }); + + expect(suggestions).toContainEqual( + expect.objectContaining({ + id: "workflow:deep-research", + display: "/deep-research", + kind: "workflow", + replacement: "/deep-research ", + }) + ); + expect(suggestions).not.toContainEqual(expect.objectContaining({ id: "workflow:deep-review" })); + expect(suggestions).toContainEqual(expect.objectContaining({ id: "skill:deep-review" })); + }); + + it("hides top-level workflow shortcuts that collide with model aliases", () => { + const workflow = { + name: "haiku", + description: "Alias collision workflow", + scope: "global" as const, + executable: true, + }; + + const suggestions = getSlashCommandSuggestions("/ha", { workflows: [workflow] }); + + expect(suggestions).toContainEqual(expect.objectContaining({ id: "model-oneshot:haiku" })); + expect(suggestions).not.toContainEqual(expect.objectContaining({ id: "workflow:haiku" })); + + const explicitSuggestions = getSlashCommandSuggestions("/workflow ha", { + workflows: [workflow], + isExperimentEnabled: (experimentId) => experimentId === EXPERIMENT_IDS.DYNAMIC_WORKFLOWS, + }); + + expect(explicitSuggestions).toContainEqual( + expect.objectContaining({ + id: "workflow-explicit:haiku", + display: "haiku", + kind: "workflow", + replacement: "/workflow haiku ", + }) + ); + }); + + it("suggests explicit workflow invocations for ambiguous workflow names", () => { + const suggestions = getSlashCommandSuggestions("/workflow deep", { + agentSkills: [ + { + name: "deep-review", + description: "Review deeply", + scope: "project", + }, + ], + workflows: [ + { + name: "deep-review", + description: "Ambiguous workflow", + scope: "project", + executable: true, + }, + ], + isExperimentEnabled: (experimentId) => experimentId === EXPERIMENT_IDS.DYNAMIC_WORKFLOWS, + }); + + expect(suggestions).toEqual([ + expect.objectContaining({ + id: "workflow-explicit:deep-review", + display: "deep-review", + kind: "workflow", + replacement: "/workflow deep-review ", + }), + ]); + }); + + it("suggests workflows while creating a new workspace", () => { + const suggestions = getSlashCommandSuggestions("/workflow deep", { + variant: "creation", + workflows: [ + { + name: "deep-research", + description: "Research deeply", + scope: "built-in", + executable: true, + }, + ], + isExperimentEnabled: (experimentId) => experimentId === EXPERIMENT_IDS.DYNAMIC_WORKFLOWS, + }); + + expect(suggestions).toContainEqual( + expect.objectContaining({ + id: "workflow-explicit:deep-research", + display: "deep-research", + kind: "workflow", + replacement: "/workflow deep-research ", + }) + ); + }); + it("filters top level commands by partial input", () => { const suggestions = getSlashCommandSuggestions("/cl"); expect(suggestions).toHaveLength(1); diff --git a/src/browser/utils/slashCommands/suggestions.ts b/src/browser/utils/slashCommands/suggestions.ts index d2e43901c3..a195563571 100644 --- a/src/browser/utils/slashCommands/suggestions.ts +++ b/src/browser/utils/slashCommands/suggestions.ts @@ -75,10 +75,36 @@ function buildTopLevelSuggestions( id: `skill:${definition.key}`, display: `/${definition.key}`, description: definition.description, + kind: "skill", replacement, }; }); + const skillNames = new Set((context.agentSkills ?? []).map((skill) => skill.name)); + const workflowDefinitions: SuggestionDefinition[] = (context.workflows ?? []) + .filter((workflow) => workflow.executable) + // Known commands, skills, and model one-shot aliases must not execute workflow code through + // ambiguous top-level slash shortcuts. The explicit /workflow command remains available. + .filter((workflow) => !SLASH_COMMAND_DEFINITION_MAP.has(workflow.name)) + .filter((workflow) => !skillNames.has(workflow.name)) + .filter((workflow) => !Object.hasOwn(MODEL_ABBREVIATIONS, workflow.name)) + .map((workflow) => ({ + key: workflow.name, + description: `${workflow.description} (${workflow.scope} workflow)`, + })); + + const workflowSuggestions = filterAndMapSuggestions( + workflowDefinitions, + partial, + (definition) => ({ + id: `workflow:${definition.key}`, + display: `/${definition.key}`, + description: definition.description, + kind: "workflow", + replacement: `/${definition.key} `, + }) + ); + // Model alias one-shot suggestions (e.g., /haiku, /sonnet, /opus+high). // The build callback below hardcodes the trailing space, so `appendSpace` // is intentionally omitted here. @@ -100,7 +126,12 @@ function buildTopLevelSuggestions( }) ); - return [...commandSuggestions, ...skillSuggestions, ...modelAliasSuggestions]; + return [ + ...commandSuggestions, + ...skillSuggestions, + ...workflowSuggestions, + ...modelAliasSuggestions, + ]; } function buildSubcommandSuggestions( diff --git a/src/browser/utils/slashCommands/types.ts b/src/browser/utils/slashCommands/types.ts index 0b38a4e0c0..6fc84f8476 100644 --- a/src/browser/utils/slashCommands/types.ts +++ b/src/browser/utils/slashCommands/types.ts @@ -11,6 +11,7 @@ import type { ExperimentId } from "@/common/constants/experiments"; import type { AgentSkillDescriptor } from "@/common/types/agentSkill"; +import type { WorkflowDefinitionDescriptor } from "@/common/types/workflow"; import type { ParsedThinkingInput } from "@/common/types/thinking"; export type ParsedCommand = @@ -31,6 +32,7 @@ export type ParsedCommand = | { type: "vim-toggle" } | { type: "plan-show" } | { type: "plan-open" } + | { type: "workflow-run"; name: string; argsText?: string } | { type: "debug-llm-request" } | { type: "unknown-command"; command: string; subcommand?: string } | { type: "command-unknown-flag"; command: string; flag: string; usage?: string } @@ -94,10 +96,12 @@ export interface SlashSuggestion { id: string; display: string; description: string; + kind?: "command" | "skill" | "workflow" | "model"; replacement: string; } export interface SlashSuggestionContext extends SlashCommandVisibilityContext { + workflows?: WorkflowDefinitionDescriptor[]; agentSkills?: AgentSkillDescriptor[]; } diff --git a/src/browser/utils/workflowRunMessages.test.ts b/src/browser/utils/workflowRunMessages.test.ts new file mode 100644 index 0000000000..95b6593a8b --- /dev/null +++ b/src/browser/utils/workflowRunMessages.test.ts @@ -0,0 +1,199 @@ +import { describe, expect, test } from "bun:test"; + +import { + buildWorkflowRunCardMessage, + filterWorkflowDisplayOnlyMessages, + getWorkflowRunCardProjection, + hasWorkflowRunToolCallMessage, +} from "./workflowRunMessages"; +import type { MuxMessage } from "@/common/types/message"; +import type { WorkflowRunRecord } from "@/common/types/workflow"; + +describe("buildWorkflowRunCardMessage", () => { + test("builds a stable workflow_run card message with the current durable run", () => { + const run: WorkflowRunRecord = { + id: "wfr_reload", + workspaceId: "workspace-1", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in", + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:test", + args: { topic: "reload" }, + status: "completed", + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:01.000Z", + events: [], + steps: [], + }; + + const message = buildWorkflowRunCardMessage( + { name: run.definition.name, args: run.args }, + { runId: run.id, status: run.status, result: { reportMarkdown: "done" }, run }, + 123 + ); + + expect(message.id).toBe("workflow-run-wfr_reload"); + expect(message.parts[0]).toMatchObject({ + type: "dynamic-tool", + toolName: "workflow_run", + input: { name: "deep-research", args: { topic: "reload" }, run_in_background: true }, + output: { status: "completed", runId: "wfr_reload", result: { reportMarkdown: "done" }, run }, + }); + }); + + test("filters durable workflow UI-only rows while preserving workflow results", () => { + const trigger: MuxMessage = { + id: "workflow-command", + role: "user", + parts: [{ type: "text", text: "/deep-research mux" }], + metadata: { + historySequence: 1, + muxMetadata: { + type: "workflow-trigger-display", + rawCommand: "/deep-research mux", + runId: "wfr_1", + }, + }, + }; + const card = buildWorkflowRunCardMessage( + { name: "deep-research", args: { input: "mux" } }, + { runId: "wfr_1", status: "running", result: null }, + 2 + ); + card.metadata = { + historySequence: 2, + muxMetadata: { type: "workflow-run-card-display", runId: "wfr_1" }, + }; + const result: MuxMessage = { + id: "workflow-result", + role: "user", + parts: [{ type: "text", text: "/deep-research mux\n\n" }], + metadata: { + historySequence: 3, + muxMetadata: { type: "workflow-result", rawCommand: "/deep-research mux", runId: "wfr_1" }, + }, + }; + + expect(filterWorkflowDisplayOnlyMessages([trigger, card, result])).toEqual([result]); + }); + + test("detects existing persisted workflow_run tool calls by run id or in-flight input", () => { + const run = { + id: "wfr_existing", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in" as const, + executable: true, + }, + args: { topic: "reload" }, + }; + const completedMessage = buildWorkflowRunCardMessage( + { name: run.definition.name, args: run.args }, + { runId: run.id, status: "completed", result: { reportMarkdown: "done" } }, + 123 + ); + const inFlightMessage: MuxMessage = { + id: "assistant_1", + role: "assistant", + parts: [ + { + type: "dynamic-tool", + toolCallId: "call_1", + toolName: "workflow_run", + state: "input-available", + input: { name: "deep-research", args: { topic: "reload" } }, + }, + ], + }; + + expect(hasWorkflowRunToolCallMessage([completedMessage], run)).toBe(true); + expect(hasWorkflowRunToolCallMessage([inFlightMessage], run)).toBe(true); + expect(hasWorkflowRunToolCallMessage([completedMessage], { ...run, id: "wfr_missing" })).toBe( + false + ); + }); + + test("projects updated terminal workflow cards while preserving the existing card slot", () => { + const run = { + id: "wfr_refresh", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in" as const, + executable: true, + }, + args: { topic: "reload" }, + status: "completed" as const, + }; + const staleMessage = buildWorkflowRunCardMessage( + { name: run.definition.name, args: run.args }, + { runId: run.id, status: "running", result: null }, + 123 + ); + staleMessage.metadata = { + historySequence: 42, + muxMetadata: { type: "workflow-run-card-display", runId: run.id }, + }; + + const projection = getWorkflowRunCardProjection([staleMessage], run); + + expect(projection).toEqual({ shouldProject: true, existingMessage: staleMessage }); + }); + + test("does not project cards already owned by assistant workflow tool calls", () => { + const run = { + id: "wfr_running", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in" as const, + executable: true, + }, + args: { topic: "reload" }, + status: "running" as const, + }; + const inFlightMessage: MuxMessage = { + id: "assistant_1", + role: "assistant", + parts: [ + { + type: "dynamic-tool", + toolCallId: "call_1", + toolName: "workflow_run", + state: "input-available", + input: { name: "deep-research", args: { topic: "reload" } }, + }, + ], + }; + const completedAssistantMessage: MuxMessage = { + id: "assistant_2", + role: "assistant", + parts: [ + { + type: "dynamic-tool", + toolCallId: "call_2", + toolName: "workflow_run", + state: "output-available", + input: { name: "deep-research", args: { topic: "reload" } }, + output: { runId: run.id, status: "completed", result: { reportMarkdown: "done" } }, + }, + ], + }; + + expect(getWorkflowRunCardProjection([inFlightMessage], run)).toEqual({ + shouldProject: false, + existingMessage: null, + }); + expect( + getWorkflowRunCardProjection([completedAssistantMessage], { ...run, status: "completed" }) + ).toEqual({ + shouldProject: false, + existingMessage: null, + }); + }); +}); diff --git a/src/browser/utils/workflowRunMessages.ts b/src/browser/utils/workflowRunMessages.ts new file mode 100644 index 0000000000..854c21ef6e --- /dev/null +++ b/src/browser/utils/workflowRunMessages.ts @@ -0,0 +1,136 @@ +import { addEphemeralMessage } from "@/browser/stores/WorkspaceStore"; +import type { MuxMessage } from "@/common/types/message"; +import type { WorkflowRunRecord } from "@/common/types/workflow"; +import assert from "@/common/utils/assert"; +import { + buildWorkflowRunCardMessage, + filterWorkflowDisplayOnlyMessages, + type WorkflowRunCardInput, + type WorkflowRunCardResult, +} from "@/common/utils/workflowRunMessages"; + +export { buildWorkflowRunCardMessage, filterWorkflowDisplayOnlyMessages }; +export type { WorkflowRunCardInput, WorkflowRunCardResult }; + +function getLatestWorkflowResult(run: WorkflowRunRecord): unknown { + return run.events.findLast((event) => event.type === "result")?.result ?? null; +} + +function getOutputRunId(output: unknown): string | null { + if (output != null && typeof output === "object") { + const runId = (output as Record).runId; + if (typeof runId === "string" && runId.length > 0) { + return runId; + } + } + return null; +} + +function getWorkflowInput(input: unknown): WorkflowRunCardInput | null { + if (input != null && typeof input === "object") { + const record = input as Record; + if (typeof record.name === "string" && record.name.length > 0) { + return { name: record.name, args: record.args ?? {} }; + } + } + return null; +} + +function jsonEqual(left: unknown, right: unknown): boolean { + try { + return JSON.stringify(left) === JSON.stringify(right); + } catch { + return false; + } +} + +function getProjectedWorkflowRunCardMessageId(runId: string): string { + assert(runId.length > 0, "getProjectedWorkflowRunCardMessageId: run id is required"); + return `workflow-run-${runId}`; +} + +export function findProjectedWorkflowRunCardMessage( + messages: readonly MuxMessage[], + runId: string +): MuxMessage | null { + assert(runId.length > 0, "findProjectedWorkflowRunCardMessage: run id is required"); + const messageId = getProjectedWorkflowRunCardMessageId(runId); + return ( + messages.find( + (message) => + message.id === messageId && + message.parts.some( + (part) => + part.type === "dynamic-tool" && + part.toolName === "workflow_run" && + part.state === "output-available" && + getOutputRunId(part.output) === runId + ) + ) ?? null + ); +} + +export function hasWorkflowRunToolCallMessage( + messages: readonly MuxMessage[], + run: Pick +): boolean { + assert(run.id.length > 0, "hasWorkflowRunToolCallMessage: run id is required"); + return messages.some((message) => + message.parts.some((part) => { + if (part.type !== "dynamic-tool" || part.toolName !== "workflow_run") { + return false; + } + if (part.state === "output-available") { + return getOutputRunId(part.output) === run.id; + } + const input = getWorkflowInput(part.input); + return input?.name === run.definition.name && jsonEqual(input.args, run.args); + }) + ); +} + +export function getWorkflowRunCardProjection( + messages: readonly MuxMessage[], + run: Pick +): { shouldProject: boolean; existingMessage: MuxMessage | null } { + assert(run.id.length > 0, "getWorkflowRunCardProjection: run id is required"); + const existingMessage = findProjectedWorkflowRunCardMessage(messages, run.id); + if (existingMessage != null) { + return { shouldProject: true, existingMessage }; + } + + // Normal assistant workflow_run tool calls render and refresh themselves. Only synthetic + // projected cards should be replaced here; otherwise we would duplicate model-started cards. + if (hasWorkflowRunToolCallMessage(messages, run)) { + return { shouldProject: false, existingMessage: null }; + } + + return { shouldProject: true, existingMessage: null }; +} + +export function addWorkflowRunCardMessage( + workspaceId: string, + input: WorkflowRunCardInput, + result: WorkflowRunCardResult, + options?: { existingMessage?: MuxMessage | null } +): void { + assert(workspaceId.length > 0, "addWorkflowRunCardMessage: workspaceId is required"); + const message = buildWorkflowRunCardMessage(input, result); + if (options?.existingMessage?.metadata != null) { + message.metadata = options.existingMessage.metadata; + } + addEphemeralMessage(workspaceId, message); +} + +export function addWorkflowRunCardMessageForRun( + workspaceId: string, + run: WorkflowRunRecord, + options?: { existingMessage?: MuxMessage | null } +): void { + addWorkflowRunCardMessage( + workspaceId, + { name: run.definition.name, args: run.args }, + { runId: run.id, status: run.status, result: getLatestWorkflowResult(run), run }, + options + ); +} diff --git a/src/cli/run.ts b/src/cli/run.ts index 438343312f..b20ff12fa8 100644 --- a/src/cli/run.ts +++ b/src/cli/run.ts @@ -284,6 +284,8 @@ function buildExperimentsObject(experimentIds: string[]): SendMessageOptions["ex programmaticToolCalling: experimentIds.includes("programmatic-tool-calling"), programmaticToolCallingExclusive: experimentIds.includes("programmatic-tool-calling-exclusive"), execSubagentHardRestart: experimentIds.includes("exec-subagent-hard-restart"), + dynamicWorkflows: experimentIds.includes("dynamic-workflows"), + subagentFileReports: experimentIds.includes("subagent-file-reports"), }; } diff --git a/src/common/config/schemas/taskSettings.ts b/src/common/config/schemas/taskSettings.ts index 2ead3a550c..5d6542ccb7 100644 --- a/src/common/config/schemas/taskSettings.ts +++ b/src/common/config/schemas/taskSettings.ts @@ -1,7 +1,7 @@ import { z } from "zod"; export const TASK_SETTINGS_LIMITS = { - maxParallelAgentTasks: { min: 1, max: 256, default: 3 }, + maxParallelAgentTasks: { min: 1, max: 256, default: 16 }, maxTaskNestingDepth: { min: 1, max: 5, default: 3 }, } as const; diff --git a/src/common/constants/experiments.ts b/src/common/constants/experiments.ts index eebb22e742..793e7fb189 100644 --- a/src/common/constants/experiments.ts +++ b/src/common/constants/experiments.ts @@ -16,6 +16,8 @@ export const EXPERIMENT_IDS = { ADVISOR_TOOL: "advisor-tool", WORKSPACE_HEARTBEATS: "workspace-heartbeats", PORTABLE_DESKTOP: "portable-desktop", + DYNAMIC_WORKFLOWS: "dynamic-workflows", + SUBAGENT_FILE_REPORTS: "subagent-file-reports", } as const; export type ExperimentId = (typeof EXPERIMENT_IDS)[keyof typeof EXPERIMENT_IDS]; @@ -131,6 +133,23 @@ export const EXPERIMENTS: Record = { platformRestriction: ["linux"], showInSettings: true, }, + [EXPERIMENT_IDS.DYNAMIC_WORKFLOWS]: { + id: EXPERIMENT_IDS.DYNAMIC_WORKFLOWS, + name: "Dynamic Workflows", + description: "Enable durable JavaScript workflow orchestration for delegated agent tasks", + enabledByDefault: false, + userOverridable: true, + showInSettings: true, + }, + [EXPERIMENT_IDS.SUBAGENT_FILE_REPORTS]: { + id: EXPERIMENT_IDS.SUBAGENT_FILE_REPORTS, + name: "Subagent File Reports", + description: + "Submit subagent task reports through workspace files (`report.md` and `structured-output.json`)", + enabledByDefault: false, + userOverridable: true, + showInSettings: true, + }, }; function getPlatformDisplayName(platform: NodeJS.Platform): string { diff --git a/src/common/orpc/schemas.ts b/src/common/orpc/schemas.ts index 70b66f69ab..23a38ba33d 100644 --- a/src/common/orpc/schemas.ts +++ b/src/common/orpc/schemas.ts @@ -45,6 +45,7 @@ export { FrontendWorkspaceMetadataSchema, GitStatusSchema, ProjectRefSchema, + WorkflowTaskMetadataSchema, WorkspaceActivitySnapshotSchema, WorkspaceGoalDefaultsOverrideSchema, WorkspaceHeartbeatSettingsSchema, @@ -103,6 +104,23 @@ export { SkillNameSchema, } from "./schemas/agentSkill"; +// Workflow schemas +export { + StructuredTaskOutputSchema, + WorkflowDefinitionDescriptorSchema, + WorkflowDefinitionScopeSchema, + WorkflowEventSequenceSchema, + WorkflowNameSchema, + WorkflowResultSchema, + WorkflowRunEventSchema, + WorkflowRunIdSchema, + WorkflowRunRecordSchema, + WorkflowRunStatusSchema, + WorkflowRunStatusTransitionSchema, + WorkflowStepRecordSchema, + WorkflowStepStatusSchema, +} from "./schemas/workflow"; + // Instruction context schemas (AGENTS.md, CLAUDE.md, …) export { AdditionalSystemContextSchema, @@ -260,6 +278,7 @@ export { menu, agentSkills, agents, + workflows, nameGeneration, projects, mcpOauth, diff --git a/src/common/orpc/schemas/api.ts b/src/common/orpc/schemas/api.ts index e1120c0700..8e20fd0d8a 100644 --- a/src/common/orpc/schemas/api.ts +++ b/src/common/orpc/schemas/api.ts @@ -71,6 +71,13 @@ import { AgentSkillPackageSchema, SkillNameSchema, } from "./agentSkill"; +import { + WorkflowDefinitionDescriptorSchema, + WorkflowNameSchema, + WorkflowRunIdSchema, + WorkflowRunRecordSchema, + WorkflowRunStatusSchema, +} from "./workflow"; import { AgentDefinitionDescriptorSchema, AgentDefinitionPackageSchema, @@ -1713,6 +1720,91 @@ export const agentSkills = { }, }; +const WorkflowDefinitionDiscoveryInputSchema = z + .object({ + projectPath: z.string().min(1).optional(), + workspaceId: z.string().min(1).optional(), + }) + .strict() + .refine((data) => Boolean(data.projectPath ?? data.workspaceId), { + message: "Either projectPath or workspaceId must be provided", + }); + +// Workflows +export const workflows = { + listDefinitions: { + input: WorkflowDefinitionDiscoveryInputSchema, + output: z.array(WorkflowDefinitionDescriptorSchema), + }, + readDefinition: { + input: z.object({ workspaceId: z.string().min(1), name: WorkflowNameSchema }).strict(), + output: z.object({ descriptor: WorkflowDefinitionDescriptorSchema, source: z.string().min(1) }), + }, + listRuns: { + input: z.object({ workspaceId: z.string().min(1) }).strict(), + output: z.array(WorkflowRunRecordSchema), + }, + getRun: { + input: z.object({ workspaceId: z.string().min(1), runId: WorkflowRunIdSchema }).strict(), + output: WorkflowRunRecordSchema.nullable(), + }, + interrupt: { + input: z.object({ workspaceId: z.string().min(1), runId: WorkflowRunIdSchema }).strict(), + output: WorkflowRunRecordSchema, + }, + resume: { + input: z.object({ workspaceId: z.string().min(1), runId: WorkflowRunIdSchema }).strict(), + output: z.object({ + runId: WorkflowRunIdSchema, + status: WorkflowRunStatusSchema, + result: z.unknown(), + }), + }, + promoteScratchDefinition: { + input: z + .object({ + workspaceId: z.string().min(1), + name: WorkflowNameSchema, + description: z.string().min(1).max(1024), + location: z.enum(["project", "global"]), + overwrite: z.boolean().optional(), + }) + .strict(), + output: WorkflowDefinitionDescriptorSchema, + }, + promoteScratch: { + input: z + .object({ + workspaceId: z.string().min(1), + runId: WorkflowRunIdSchema, + name: WorkflowNameSchema, + description: z.string().min(1).max(1024), + location: z.enum(["project", "global"]), + overwrite: z.boolean().optional(), + }) + .strict(), + output: WorkflowDefinitionDescriptorSchema, + }, + start: { + input: z + .object({ + workspaceId: z.string().min(1), + name: WorkflowNameSchema, + runInBackground: z.boolean().optional(), + args: z.unknown().optional(), + rawCommand: z.string().min(1).optional(), + continuationOptions: SendMessageOptionsSchema.optional(), + }) + .strict(), + output: z.object({ + runId: WorkflowRunIdSchema, + status: WorkflowRunStatusSchema, + result: z.unknown(), + invocationMessagePersisted: z.boolean().optional(), + }), + }, +}; + // Name generation for new workspaces (decoupled from workspace creation) export const nameGeneration = { generate: { diff --git a/src/common/orpc/schemas/stream.ts b/src/common/orpc/schemas/stream.ts index f7c14846e1..92bfb0fa2f 100644 --- a/src/common/orpc/schemas/stream.ts +++ b/src/common/orpc/schemas/stream.ts @@ -685,6 +685,8 @@ export const ExperimentsSchema = z.object({ programmaticToolCalling: z.boolean().optional(), programmaticToolCallingExclusive: z.boolean().optional(), advisorTool: z.boolean().optional(), + dynamicWorkflows: z.boolean().optional(), + subagentFileReports: z.boolean().optional(), execSubagentHardRestart: z.boolean().optional(), }); diff --git a/src/common/orpc/schemas/telemetry.ts b/src/common/orpc/schemas/telemetry.ts index aeb0163a58..0de5c1c660 100644 --- a/src/common/orpc/schemas/telemetry.ts +++ b/src/common/orpc/schemas/telemetry.ts @@ -45,6 +45,7 @@ const TelemetryCommandTypeSchema = z.enum([ "plan", "providers", "goal", + "workflow", "btw", ]); diff --git a/src/common/orpc/schemas/workflow.test.ts b/src/common/orpc/schemas/workflow.test.ts new file mode 100644 index 0000000000..d0a1bada73 --- /dev/null +++ b/src/common/orpc/schemas/workflow.test.ts @@ -0,0 +1,125 @@ +import { describe, expect, test } from "bun:test"; +import { EXPERIMENTS, EXPERIMENT_IDS } from "@/common/constants/experiments"; +import { WorkflowTaskMetadataSchema } from "./workspace"; +import { + WorkflowDefinitionDescriptorSchema, + WorkflowEventSequenceSchema, + WorkflowNameSchema, + WorkflowRunIdSchema, + WorkflowRunRecordSchema, + WorkflowRunStatusTransitionSchema, +} from "./workflow"; + +describe("workflow domain schemas", () => { + test("accepts a durable workflow run record with ordered events", () => { + const run = WorkflowRunRecordSchema.parse({ + id: "wfr_123", + workspaceId: "workspace-1", + definition: { + name: "deep-research", + description: "Research a topic", + scope: "built-in", + executable: true, + }, + definitionSource: "export default async function workflow() { return null; }", + definitionHash: "sha256:abc123", + args: { topic: "workflow replay" }, + status: "running", + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:01.000Z", + events: [ + { + sequence: 1, + type: "status", + at: "2026-05-29T00:00:00.000Z", + status: "running", + }, + { + sequence: 2, + type: "phase", + at: "2026-05-29T00:00:01.000Z", + name: "scope", + }, + { + sequence: 3, + type: "patch", + at: "2026-05-29T00:00:02.000Z", + stepId: "apply-implementation", + sourceTaskId: "task_impl", + status: "applied", + details: { taskId: "task_impl" }, + }, + ], + steps: [], + }); + + expect(run.definition.name).toBe("deep-research"); + expect(run.events.map((event) => event.sequence)).toEqual([1, 2, 3]); + }); + + test("rejects workflow run ids that could escape the run directory", () => { + expect(WorkflowRunIdSchema.safeParse("wfr_123").success).toBe(true); + expect(WorkflowRunIdSchema.safeParse("../wfr_123").success).toBe(false); + expect(WorkflowRunIdSchema.safeParse("wfr_../escape").success).toBe(false); + expect(WorkflowRunIdSchema.safeParse("task_123").success).toBe(false); + }); + + test("rejects invalid workflow names and non-executable untrusted descriptors", () => { + expect(WorkflowNameSchema.safeParse("bad--name").success).toBe(false); + expect(WorkflowNameSchema.safeParse("DeepResearch").success).toBe(false); + + const result = WorkflowDefinitionDescriptorSchema.safeParse({ + name: "local-workflow", + description: "Project local workflow", + scope: "project", + executable: false, + blockedReason: "Project is not trusted", + }); + + expect(result.success).toBe(true); + }); + + test("rejects out-of-order events", () => { + const result = WorkflowEventSequenceSchema.safeParse([ + { sequence: 2, type: "log", at: "2026-05-29T00:00:00.000Z", message: "late" }, + { sequence: 1, type: "log", at: "2026-05-29T00:00:01.000Z", message: "early" }, + ]); + + expect(result.success).toBe(false); + }); + + test("rejects impossible status transitions", () => { + expect( + WorkflowRunStatusTransitionSchema.safeParse({ from: "completed", to: "running" }).success + ).toBe(false); + expect( + WorkflowRunStatusTransitionSchema.safeParse({ from: "running", to: "interrupted" }).success + ).toBe(true); + }); +}); + +describe("workflow task metadata schema", () => { + test("accepts workflow task metadata with an output schema", () => { + const parsed = WorkflowTaskMetadataSchema.parse({ + runId: "wfr_123", + stepId: "claims", + outputSchema: { type: "object" }, + }); + + expect(parsed).toEqual({ + runId: "wfr_123", + stepId: "claims", + outputSchema: { type: "object" }, + }); + }); +}); + +describe("workflow experiment gate", () => { + test("keeps dynamic workflows opt-in during rollout", () => { + const experiment = EXPERIMENTS[EXPERIMENT_IDS.DYNAMIC_WORKFLOWS]; + + expect(experiment.enabledByDefault).toBe(false); + expect(experiment.userOverridable).toBe(true); + expect(experiment.showInSettings).toBe(true); + }); +}); diff --git a/src/common/orpc/schemas/workflow.ts b/src/common/orpc/schemas/workflow.ts new file mode 100644 index 0000000000..185b2dfda3 --- /dev/null +++ b/src/common/orpc/schemas/workflow.ts @@ -0,0 +1,187 @@ +import { z } from "zod"; + +export const WorkflowNameSchema = z + .string() + .min(1) + .max(64) + .regex(/^[a-z0-9]+(?:-[a-z0-9]+)*$/); + +export const WorkflowDefinitionScopeSchema = z.enum(["project", "global", "built-in", "scratch"]); + +export const WorkflowRunIdSchema = z + .string() + .min(1) + .max(128) + .regex(/^wfr_[A-Za-z0-9_-]+$/); + +export const WorkflowRunStatusSchema = z.enum([ + "pending", + "running", + "backgrounded", + "interrupted", + "completed", + "failed", +]); + +const IsoDateTimeSchema = z.string().datetime({ offset: true }); +const JsonValueSchema: z.ZodType = z.lazy(() => + z.union([ + z.string(), + z.number(), + z.boolean(), + z.null(), + z.array(JsonValueSchema), + z.record(z.string(), JsonValueSchema), + ]) +); + +export const WorkflowDefinitionDescriptorSchema = z + .object({ + name: WorkflowNameSchema, + description: z.string().min(1).max(1024), + scope: WorkflowDefinitionScopeSchema, + sourcePath: z.string().min(1).optional(), + executable: z.boolean(), + blockedReason: z.string().min(1).optional(), + }) + .refine((value) => value.executable || value.blockedReason != null, { + message: "Non-executable workflow definitions must include a blocked reason", + path: ["blockedReason"], + }); + +export const WorkflowResultSchema = z.object({ + reportMarkdown: z.string(), + structuredOutput: JsonValueSchema.optional(), +}); + +export const StructuredTaskOutputSchema = z.object({ + reportMarkdown: z.string(), + title: z.string().min(1).nullable().optional(), + structuredOutput: JsonValueSchema.optional(), + taskId: z.string().min(1).optional(), +}); + +export const WorkflowRunEventSchema = z.discriminatedUnion("type", [ + z.object({ + sequence: z.number().int().positive(), + type: z.literal("status"), + at: IsoDateTimeSchema, + status: WorkflowRunStatusSchema, + }), + z.object({ + sequence: z.number().int().positive(), + type: z.literal("phase"), + at: IsoDateTimeSchema, + name: z.string().min(1), + details: JsonValueSchema.optional(), + }), + z.object({ + sequence: z.number().int().positive(), + type: z.literal("log"), + at: IsoDateTimeSchema, + message: z.string().min(1), + data: JsonValueSchema.optional(), + }), + z.object({ + sequence: z.number().int().positive(), + type: z.literal("task"), + at: IsoDateTimeSchema, + stepId: z.string().min(1), + taskId: z.string().min(1), + status: z.string().min(1), + }), + z.object({ + sequence: z.number().int().positive(), + type: z.literal("patch"), + at: IsoDateTimeSchema, + stepId: z.string().min(1), + sourceTaskId: z.string().min(1), + status: z.enum(["started", "applied", "conflict", "failed"]), + details: JsonValueSchema.optional(), + }), + z.object({ + sequence: z.number().int().positive(), + type: z.literal("validation"), + at: IsoDateTimeSchema, + stepId: z.string().min(1), + success: z.boolean(), + message: z.string().min(1).optional(), + }), + z.object({ + sequence: z.number().int().positive(), + type: z.literal("result"), + at: IsoDateTimeSchema, + result: WorkflowResultSchema, + }), + z.object({ + sequence: z.number().int().positive(), + type: z.literal("error"), + at: IsoDateTimeSchema, + message: z.string().min(1), + }), +]); + +export const WorkflowEventSequenceSchema = z + .array(WorkflowRunEventSchema) + .superRefine((events, ctx) => { + let previousSequence = 0; + for (const [index, event] of events.entries()) { + if (event.sequence <= previousSequence) { + ctx.addIssue({ + code: z.ZodIssueCode.custom, + message: "Workflow events must be strictly ordered by increasing sequence", + path: [index, "sequence"], + }); + } + previousSequence = event.sequence; + } + }); + +export const WorkflowStepStatusSchema = z.enum(["started", "completed", "failed", "interrupted"]); + +export const WorkflowStepRecordSchema = z.object({ + stepId: z.string().min(1), + inputHash: z.string().min(1), + status: WorkflowStepStatusSchema, + taskId: z.string().min(1).optional(), + startedAt: IsoDateTimeSchema, + completedAt: IsoDateTimeSchema.optional(), + result: StructuredTaskOutputSchema.optional(), + error: z.string().min(1).optional(), +}); + +const WorkflowRunStatusTransitions: Record< + z.infer, + ReadonlyArray> +> = { + pending: ["running", "backgrounded", "interrupted", "failed"], + running: ["backgrounded", "interrupted", "completed", "failed"], + backgrounded: ["running", "interrupted", "completed", "failed"], + interrupted: ["running", "failed"], + completed: [], + failed: [], +}; + +export const WorkflowRunStatusTransitionSchema = z + .object({ + from: WorkflowRunStatusSchema, + to: WorkflowRunStatusSchema, + }) + .refine((transition) => WorkflowRunStatusTransitions[transition.from].includes(transition.to), { + message: "Invalid workflow run status transition", + path: ["to"], + }); + +export const WorkflowRunRecordSchema = z.object({ + id: WorkflowRunIdSchema, + workspaceId: z.string().min(1), + definition: WorkflowDefinitionDescriptorSchema, + definitionSource: z.string().min(1), + definitionHash: z.string().min(1), + args: JsonValueSchema, + status: WorkflowRunStatusSchema, + createdAt: IsoDateTimeSchema, + updatedAt: IsoDateTimeSchema, + events: WorkflowEventSequenceSchema, + steps: z.array(WorkflowStepRecordSchema), +}); diff --git a/src/common/orpc/schemas/workspace.ts b/src/common/orpc/schemas/workspace.ts index 88ba304ca2..6951eea678 100644 --- a/src/common/orpc/schemas/workspace.ts +++ b/src/common/orpc/schemas/workspace.ts @@ -1,6 +1,7 @@ import { z } from "zod"; import { ThinkingLevelSchema } from "../../types/thinking"; import { RuntimeConfigSchema } from "./runtime"; +import { WorkflowRunIdSchema } from "./workflow"; import { WorkspaceAISettingsByAgentSchema, WorkspaceAISettingsSchema } from "./workspaceAiSettings"; import { TASK_GROUP_KIND_VALUES } from "@/common/utils/tools/taskGroups"; import { GoalSnapshotSchema } from "./goal"; @@ -86,6 +87,14 @@ export const WorkspaceHeartbeatSettingsSchema = z.object({ }), }); +export const WorkflowTaskMetadataSchema = z.object({ + runId: WorkflowRunIdSchema.meta({ description: "Workflow run that spawned this task." }), + stepId: z.string().min(1).meta({ description: "Workflow step that spawned this task." }), + outputSchema: z.unknown().optional().meta({ + description: "Optional JSON Schema subset required for this task's structured output.", + }), +}); + export const WorkspaceMetadataSchema = z.object({ id: z.string().meta({ description: @@ -143,6 +152,9 @@ export const WorkspaceMetadataSchema = z.object({ description: 'If set, selects an agent definition for this workspace (e.g., "explore" or "exec").', }), + workflowTask: WorkflowTaskMetadataSchema.optional().meta({ + description: "Workflow run/step metadata for workflow-spawned child tasks.", + }), bestOf: BestOfGroupSchema.optional().meta({ description: "Grouping metadata for child tasks spawned from the same parent tool call.", }), diff --git a/src/common/schemas/project.ts b/src/common/schemas/project.ts index 0de08229a5..a9437bd92e 100644 --- a/src/common/schemas/project.ts +++ b/src/common/schemas/project.ts @@ -54,6 +54,14 @@ export const WorktreeArchiveSnapshotSchema = z.object({ }), }); +export const WorkflowTaskMetadataSchema = z.object({ + runId: z.string().min(1).meta({ description: "Workflow run that spawned this task." }), + stepId: z.string().min(1).meta({ description: "Workflow step that spawned this task." }), + outputSchema: z.unknown().optional().meta({ + description: "Optional JSON Schema subset required for this task's structured output.", + }), +}); + export const WorkspaceConfigSchema = z.object({ path: z.string().meta({ description: "Absolute path to workspace directory - REQUIRED for backward compatibility", @@ -107,6 +115,9 @@ export const WorkspaceConfigSchema = z.object({ description: 'If set, selects an agent definition for this workspace (e.g., "explore" or "exec").', }), + workflowTask: WorkflowTaskMetadataSchema.optional().meta({ + description: "Workflow run/step metadata for workflow-spawned child tasks.", + }), bestOf: BestOfGroupSchema.optional().meta({ description: "Grouping metadata for child tasks spawned from the same parent tool call.", }), @@ -135,6 +146,8 @@ export const WorkspaceConfigSchema = z.object({ programmaticToolCalling: z.boolean().optional(), programmaticToolCallingExclusive: z.boolean().optional(), advisorTool: z.boolean().optional(), + dynamicWorkflows: z.boolean().optional(), + subagentFileReports: z.boolean().optional(), execSubagentHardRestart: z.boolean().optional(), }) .optional() diff --git a/src/common/telemetry/payload.ts b/src/common/telemetry/payload.ts index 7868afcb87..1d8dbe1371 100644 --- a/src/common/telemetry/payload.ts +++ b/src/common/telemetry/payload.ts @@ -300,7 +300,8 @@ export type TelemetryCommandType = | "plan" | "providers" | "goal" - | "btw"; + | "btw" + | "workflow"; /** * Command usage event - tracks slash command usage patterns diff --git a/src/common/types/message.ts b/src/common/types/message.ts index 863f9bf655..f894eceeff 100644 --- a/src/common/types/message.ts +++ b/src/common/types/message.ts @@ -373,6 +373,26 @@ export type MuxMessageMetadata = MuxMessageMetadataBase & /** Original user input for one-shot overrides (e.g., "/opus+high do something") — used as display content so the command prefix remains visible. */ rawCommand?: string; } + | { + // Durable UI-only row that shows the slash command that launched a workflow run. + // Provider requests filter this out; the completed workflow result is sent later. + type: "workflow-trigger-display"; + rawCommand: string; + runId: string; + } + | { + // Durable UI-only assistant row containing the workflow_run card. + // Provider requests filter this out; the completed workflow result is sent later. + type: "workflow-run-card-display"; + runId: string; + } + | { + // Provider-visible workflow result message. The transcript hides this row because the + // user already sees the original slash command plus the workflow card. + type: "workflow-result"; + rawCommand: string; + runId: string; + } | { // /btw — user-side marker for a side question. // diff --git a/src/common/types/tasks.test.ts b/src/common/types/tasks.test.ts index 1fefdee3e8..8a7c31cf48 100644 --- a/src/common/types/tasks.test.ts +++ b/src/common/types/tasks.test.ts @@ -44,6 +44,11 @@ describe("normalizeTaskSettings", () => { expect(normalizeTaskSettings({})).toEqual(DEFAULT_TASK_SETTINGS); }); + test("uses sixteen parallel agent tasks by default while preserving explicit values", () => { + expect(normalizeTaskSettings(undefined).maxParallelAgentTasks).toBe(16); + expect(normalizeTaskSettings({ maxParallelAgentTasks: 4 }).maxParallelAgentTasks).toBe(4); + }); + test("defaults include preserveSubagentsUntilArchive: false", () => { const normalized = normalizeTaskSettings(undefined); expect(normalized.preserveSubagentsUntilArchive).toBe(false); diff --git a/src/common/types/tools.ts b/src/common/types/tools.ts index fb76014e9f..45a24e6e64 100644 --- a/src/common/types/tools.ts +++ b/src/common/types/tools.ts @@ -30,6 +30,9 @@ import type { TaskTerminateToolResultSchema, TOOL_DEFINITIONS, WebFetchToolResultSchema, + WorkflowListToolResultSchema, + WorkflowReadToolResultSchema, + WorkflowRunToolResultSchema, } from "@/common/utils/tools/toolDefinitions"; // Bash Tool Types, derived from schema (avoid drift) @@ -259,6 +262,26 @@ export type TaskTerminateToolArgs = z.infer; +// Workflow Definition Tool Types +export type WorkflowListToolArgs = z.infer; + +export type WorkflowListToolSuccessResult = z.infer; + +export type WorkflowListToolResult = WorkflowListToolSuccessResult | ToolErrorResult; + +export type WorkflowReadToolArgs = z.infer; + +export type WorkflowReadToolSuccessResult = z.infer; + +export type WorkflowReadToolResult = WorkflowReadToolSuccessResult | ToolErrorResult; + +// Workflow Run Tool Types +export type WorkflowRunToolArgs = z.infer; + +export type WorkflowRunToolSuccessResult = z.infer; + +export type WorkflowRunToolResult = WorkflowRunToolSuccessResult | ToolErrorResult; + // Agent Report Tool Types export type AgentReportToolArgs = z.infer; diff --git a/src/common/types/workflow.ts b/src/common/types/workflow.ts new file mode 100644 index 0000000000..af92fdcc59 --- /dev/null +++ b/src/common/types/workflow.ts @@ -0,0 +1,38 @@ +import type { z } from "zod"; +import type { + StructuredTaskOutputSchema, + WorkflowDefinitionDescriptorSchema, + WorkflowDefinitionScopeSchema, + WorkflowNameSchema, + WorkflowResultSchema, + WorkflowRunEventSchema, + WorkflowRunIdSchema, + WorkflowRunRecordSchema, + WorkflowRunStatusSchema, + WorkflowStepRecordSchema, + WorkflowStepStatusSchema, +} from "@/common/orpc/schemas"; +import { WorkflowRunStatusTransitionSchema } from "@/common/orpc/schemas"; +import assert from "@/common/utils/assert"; + +export type WorkflowName = z.infer; +export type WorkflowDefinitionScope = z.infer; +export type WorkflowRunId = z.infer; +export type WorkflowRunStatus = z.infer; +export type WorkflowStepStatus = z.infer; +export type WorkflowDefinitionDescriptor = z.infer; +export type WorkflowResult = z.infer; +export type StructuredTaskOutput = z.infer; +export type WorkflowRunEvent = z.infer; +export type WorkflowStepRecord = z.infer; +export type WorkflowRunRecord = z.infer; + +export function assertWorkflowRunStatusTransition( + from: WorkflowRunStatus, + to: WorkflowRunStatus +): void { + assert( + WorkflowRunStatusTransitionSchema.safeParse({ from, to }).success, + `Invalid workflow run status transition: ${from} -> ${to}` + ); +} diff --git a/src/common/utils/jsonSchemaSubset.test.ts b/src/common/utils/jsonSchemaSubset.test.ts new file mode 100644 index 0000000000..9f02ae1c9b --- /dev/null +++ b/src/common/utils/jsonSchemaSubset.test.ts @@ -0,0 +1,127 @@ +import { describe, expect, test } from "bun:test"; +import { validateJsonSchemaSubset, validateJsonSchemaSubsetSchema } from "./jsonSchemaSubset"; + +describe("validateJsonSchemaSubset", () => { + test("validates schemas without requiring an example value", () => { + expect( + validateJsonSchemaSubsetSchema({ + type: "object", + required: ["summary"], + properties: { summary: { type: "string" } }, + additionalProperties: false, + }) + ).toEqual({ success: true }); + + expect(validateJsonSchemaSubsetSchema({ type: ["string", "null"] })).toEqual({ + success: false, + errors: [{ path: "$", message: "Unsupported JSON Schema type union" }], + }); + }); + + test("accepts nested objects that satisfy required properties and primitive types", () => { + const result = validateJsonSchemaSubset( + { + type: "object", + required: ["claims"], + properties: { + claims: { + type: "array", + items: { + type: "object", + required: ["text", "confidence"], + properties: { + text: { type: "string" }, + confidence: { type: "number" }, + }, + additionalProperties: false, + }, + }, + }, + additionalProperties: false, + }, + { claims: [{ text: "Workflow runs are durable", confidence: 0.8 }] } + ); + + expect(result).toEqual({ success: true }); + }); + + test("returns actionable paths for missing required properties and type errors", () => { + const result = validateJsonSchemaSubset( + { + type: "object", + required: ["summary", "sources"], + properties: { + summary: { type: "string" }, + sources: { type: "array", items: { type: "string" } }, + }, + }, + { sources: ["one", 2] } + ); + + expect(result).toEqual({ + success: false, + errors: [ + { path: "$.summary", message: "Required property is missing" }, + { path: "$.sources[1]", message: "Expected string, got number" }, + ], + }); + }); + + test("rejects unsupported schema keywords instead of ignoring them", () => { + const result = validateJsonSchemaSubset({ type: "string", pattern: "^ok$" }, "ok"); + + expect(result).toEqual({ + success: false, + errors: [{ path: "$", message: "Unsupported JSON Schema keyword: pattern" }], + }); + }); + + test("rejects JSON Schema type unions instead of skipping type validation", () => { + const result = validateJsonSchemaSubset({ type: ["string", "null"] }, 42); + + expect(result).toEqual({ + success: false, + errors: [{ path: "$", message: "Unsupported JSON Schema type union" }], + }); + }); + + test("rejects schema-valued additionalProperties instead of ignoring extra values", () => { + const result = validateJsonSchemaSubset( + { type: "object", additionalProperties: { type: "string" } }, + { extra: 42 } + ); + + expect(result).toEqual({ + success: false, + errors: [ + { + path: "$.additionalProperties", + message: "Unsupported JSON Schema additionalProperties schema", + }, + ], + }); + }); + + test("supports enum, integer, and additionalProperties false", () => { + const result = validateJsonSchemaSubset( + { + type: "object", + properties: { + status: { enum: ["pass", "fail"] }, + count: { type: "integer" }, + }, + additionalProperties: false, + }, + { status: "maybe", count: 1.5, extra: true } + ); + + expect(result).toEqual({ + success: false, + errors: [ + { path: "$.status", message: "Expected one of: pass, fail" }, + { path: "$.count", message: "Expected integer, got number" }, + { path: "$.extra", message: "Additional property is not allowed" }, + ], + }); + }); +}); diff --git a/src/common/utils/jsonSchemaSubset.ts b/src/common/utils/jsonSchemaSubset.ts new file mode 100644 index 0000000000..ab0243227f --- /dev/null +++ b/src/common/utils/jsonSchemaSubset.ts @@ -0,0 +1,217 @@ +export interface JsonSchemaValidationError { + path: string; + message: string; +} + +export type JsonSchemaSubsetValidationResult = + | { success: true } + | { success: false; errors: JsonSchemaValidationError[] }; + +const SUPPORTED_SCHEMA_KEYWORDS = new Set([ + "type", + "properties", + "required", + "items", + "additionalProperties", + "enum", +]); + +export function validateJsonSchemaSubsetSchema(schema: unknown): JsonSchemaSubsetValidationResult { + if (!isPlainRecord(schema)) { + return { success: false, errors: [{ path: "$", message: "Schema must be an object" }] }; + } + + const errors: JsonSchemaValidationError[] = []; + collectUnsupportedKeywordErrors(schema, "$", errors); + return errors.length === 0 ? { success: true } : { success: false, errors }; +} + +export function validateJsonSchemaSubset( + schema: unknown, + value: unknown +): JsonSchemaSubsetValidationResult { + const schemaValidation = validateJsonSchemaSubsetSchema(schema); + if (!schemaValidation.success) { + return schemaValidation; + } + + const errors: JsonSchemaValidationError[] = []; + validateValue(schema, value, "$", errors); + return errors.length === 0 ? { success: true } : { success: false, errors }; +} + +function validateValue( + schema: unknown, + value: unknown, + path: string, + errors: JsonSchemaValidationError[] +): void { + if (!isPlainRecord(schema)) { + errors.push({ path, message: "Schema must be an object" }); + return; + } + + if (Array.isArray(schema.enum) && !schema.enum.some((candidate) => Object.is(candidate, value))) { + errors.push({ path, message: `Expected one of: ${schema.enum.map(String).join(", ")}` }); + } + + if (typeof schema.type === "string") { + validateType(schema.type, value, path, errors); + } + + if (schema.type === "object" && isPlainRecord(value)) { + validateObject(schema, value, path, errors); + } + + if (schema.type === "array" && Array.isArray(value)) { + validateArray(schema, value, path, errors); + } +} + +function validateObject( + schema: Record, + value: Record, + path: string, + errors: JsonSchemaValidationError[] +): void { + const properties = isPlainRecord(schema.properties) ? schema.properties : {}; + const required = Array.isArray(schema.required) ? schema.required : []; + + for (const property of required) { + if (typeof property !== "string") { + errors.push({ path, message: "Required property names must be strings" }); + continue; + } + if (!(property in value)) { + errors.push({ path: `${path}.${property}`, message: "Required property is missing" }); + } + } + + for (const [property, propertySchema] of Object.entries(properties)) { + if (property in value) { + validateValue(propertySchema, value[property], `${path}.${property}`, errors); + } + } + + if (schema.additionalProperties === false) { + const allowedProperties = new Set(Object.keys(properties)); + for (const property of Object.keys(value)) { + if (!allowedProperties.has(property)) { + errors.push({ path: `${path}.${property}`, message: "Additional property is not allowed" }); + } + } + } +} + +function validateArray( + schema: Record, + value: unknown[], + path: string, + errors: JsonSchemaValidationError[] +): void { + if (schema.items == null) { + return; + } + + for (const [index, item] of value.entries()) { + validateValue(schema.items, item, `${path}[${index}]`, errors); + } +} + +function validateType( + type: string, + value: unknown, + path: string, + errors: JsonSchemaValidationError[] +): void { + switch (type) { + case "object": + if (!isPlainRecord(value)) { + errors.push({ path, message: `Expected object, got ${getJsonType(value)}` }); + } + return; + case "array": + if (!Array.isArray(value)) { + errors.push({ path, message: `Expected array, got ${getJsonType(value)}` }); + } + return; + case "string": + if (typeof value !== "string") { + errors.push({ path, message: `Expected string, got ${getJsonType(value)}` }); + } + return; + case "number": + if (typeof value !== "number" || !Number.isFinite(value)) { + errors.push({ path, message: `Expected number, got ${getJsonType(value)}` }); + } + return; + case "integer": + if (typeof value !== "number" || !Number.isInteger(value)) { + errors.push({ path, message: `Expected integer, got ${getJsonType(value)}` }); + } + return; + case "boolean": + if (typeof value !== "boolean") { + errors.push({ path, message: `Expected boolean, got ${getJsonType(value)}` }); + } + return; + case "null": + if (value !== null) { + errors.push({ path, message: `Expected null, got ${getJsonType(value)}` }); + } + return; + default: + errors.push({ path, message: `Unsupported JSON Schema type: ${type}` }); + } +} + +function collectUnsupportedKeywordErrors( + schema: unknown, + path: string, + errors: JsonSchemaValidationError[] +): void { + if (!isPlainRecord(schema)) { + return; + } + + for (const key of Object.keys(schema)) { + if (!SUPPORTED_SCHEMA_KEYWORDS.has(key)) { + errors.push({ path, message: `Unsupported JSON Schema keyword: ${key}` }); + } + } + + if (Array.isArray(schema.type)) { + errors.push({ path, message: "Unsupported JSON Schema type union" }); + } + + if ( + schema.additionalProperties != null && + schema.additionalProperties !== true && + schema.additionalProperties !== false + ) { + errors.push({ + path: `${path}.additionalProperties`, + message: "Unsupported JSON Schema additionalProperties schema", + }); + } + + if (isPlainRecord(schema.properties)) { + for (const [property, propertySchema] of Object.entries(schema.properties)) { + collectUnsupportedKeywordErrors(propertySchema, `${path}.${property}`, errors); + } + } + + if (schema.items != null) { + collectUnsupportedKeywordErrors(schema.items, `${path}[]`, errors); + } +} + +function isPlainRecord(value: unknown): value is Record { + return value != null && typeof value === "object" && !Array.isArray(value); +} + +function getJsonType(value: unknown): string { + if (value === null) return "null"; + if (Array.isArray(value)) return "array"; + return typeof value; +} diff --git a/src/common/utils/tools/toolDefinitions.test.ts b/src/common/utils/tools/toolDefinitions.test.ts index 9ba385c9ed..ce646181cd 100644 --- a/src/common/utils/tools/toolDefinitions.test.ts +++ b/src/common/utils/tools/toolDefinitions.test.ts @@ -410,6 +410,18 @@ describe("TOOL_DEFINITIONS", () => { expect(tools).toContain("skills_catalog_read"); }); + it("only includes workflow tools when dynamic workflows are enabled", () => { + const disabledTools = getAvailableTools("openai:gpt-4o", { enableDynamicWorkflows: false }); + expect(disabledTools).not.toContain("workflow_list"); + expect(disabledTools).not.toContain("workflow_read"); + expect(disabledTools).not.toContain("workflow_run"); + + const enabledTools = getAvailableTools("openai:gpt-4o", { enableDynamicWorkflows: true }); + expect(enabledTools).toContain("workflow_list"); + expect(enabledTools).toContain("workflow_read"); + expect(enabledTools).toContain("workflow_run"); + }); + it("agent_skill_write schema rejects an advertise tool argument (advertise is authored in content)", () => { const parsed = TOOL_DEFINITIONS.agent_skill_write.schema.safeParse({ name: "demo-skill", diff --git a/src/common/utils/tools/toolDefinitions.ts b/src/common/utils/tools/toolDefinitions.ts index c96292cb33..cb728727ad 100644 --- a/src/common/utils/tools/toolDefinitions.ts +++ b/src/common/utils/tools/toolDefinitions.ts @@ -27,7 +27,15 @@ */ import { z } from "zod"; -import { AgentIdSchema, AgentSkillPackageSchema, SkillNameSchema } from "@/common/orpc/schemas"; +import { + AgentIdSchema, + AgentSkillPackageSchema, + SkillNameSchema, + WorkflowDefinitionDescriptorSchema, + WorkflowNameSchema, + WorkflowRunRecordSchema, + WorkflowRunStatusSchema, +} from "@/common/orpc/schemas"; import { RUNTIME_MODE, type RuntimeMode } from "@/common/types/runtime"; import { BASH_HARD_MAX_LINES, @@ -323,6 +331,7 @@ const TaskToolCompletedReportSchema = z taskId: z.string(), reportMarkdown: z.string(), title: z.string().optional(), + structuredOutput: z.unknown().optional(), agentId: z.string().optional(), agentType: z.string().optional(), groupKind: z.enum(TASK_GROUP_KIND_VALUES).optional(), @@ -364,6 +373,7 @@ export const TaskToolCompletedResultSchema = z taskIds: z.array(z.string()).min(1).optional(), reportMarkdown: z.string().optional(), title: z.string().optional(), + structuredOutput: z.unknown().optional(), agentId: z.string().optional(), agentType: z.string().optional(), reports: z.array(TaskToolCompletedReportSchema).min(1).optional(), @@ -419,8 +429,8 @@ export const TaskAwaitToolArgsSchema = z .array(z.string().min(1)) .nullish() .describe( - "List of task IDs to await — use only real IDs returned by prior task, bash, or task_list tool results; never fabricate an ID. " + - "When omitted, waits for all active descendant tasks of the current workspace." + "List of task IDs or workflow run IDs to await — use only real IDs returned by prior task, bash, workflow_run, or task_list tool results; never fabricate an ID. " + + "When omitted, waits for all active descendant tasks and workflow runs of the current workspace." ), filter: z .string() @@ -527,22 +537,25 @@ export const TaskAwaitToolCompletedResultSchema = z status: z.literal("completed"), taskId: z.string(), reportMarkdown: z.string(), + structuredOutput: z.unknown().optional(), title: z.string().optional(), output: z.string().optional(), elapsed_ms: z.number().optional(), exitCode: z.number().optional(), note: z.string().optional(), + run: WorkflowRunRecordSchema.optional(), artifacts: TaskAwaitToolArtifactsSchema.optional(), }) .strict(); export const TaskAwaitToolActiveResultSchema = z .object({ - status: z.enum(["queued", "running", "awaiting_report"]), + status: z.enum(["queued", "running", "backgrounded", "awaiting_report", "interrupted"]), taskId: z.string(), output: z.string().optional(), elapsed_ms: z.number().optional(), note: z.string().optional(), + run: WorkflowRunRecordSchema.optional(), }) .strict(); @@ -567,6 +580,7 @@ export const TaskAwaitToolErrorResultSchema = z status: z.literal("error"), taskId: z.string(), error: z.string(), + run: WorkflowRunRecordSchema.optional(), }) .strict(); @@ -770,18 +784,107 @@ export const TaskListToolResultSchema = z }) .strict(); +// ----------------------------------------------------------------------------- +// workflow_run (durable workflow orchestration) +// ----------------------------------------------------------------------------- + +export const WorkflowListToolArgsSchema = z.object({}).strict(); + +export const WorkflowListToolResultSchema = z + .object({ + workflows: z.array(WorkflowDefinitionDescriptorSchema), + }) + .strict(); + +export const WorkflowReadToolArgsSchema = z + .object({ + name: WorkflowNameSchema, + }) + .strict(); + +export const WorkflowReadToolResultSchema = z + .object({ + descriptor: WorkflowDefinitionDescriptorSchema, + source: z.string().min(1), + }) + .strict(); + +export const WorkflowRunToolArgsSchema = z + .object({ + name: WorkflowNameSchema, + args: z.unknown().nullish(), + run_in_background: z.boolean().nullish().default(false), + }) + .strict(); + +export const WorkflowRunToolResultSchema = z + .object({ + status: WorkflowRunStatusSchema, + runId: z.string().min(1), + result: z.unknown(), + run: WorkflowRunRecordSchema.optional(), + }) + .strict(); + // ----------------------------------------------------------------------------- // agent_report (explicit subagent -> parent report) // ----------------------------------------------------------------------------- -export const AgentReportToolArgsSchema = z +export const AgentReportInlineToolArgsSchema = z .object({ reportMarkdown: z.string().min(1), + structuredOutput: z.unknown().nullish(), + title: z.string().nullish(), + }) + .strict(); + +export const AgentReportFileToolArgsSchema = z + .object({ + reportMarkdownPath: z + .string() + .min(1) + .nullish() + .describe("Path to the markdown report file, usually report.md in the workspace root"), + structuredOutputPath: z + .string() + .min(1) + .nullish() + .describe( + "Path to a JSON file containing the structured output, usually structured-output.json" + ), title: z.string().nullish(), }) .strict(); -export const AgentReportToolResultSchema = z.object({ success: z.literal(true) }).strict(); +export const AgentReportToolArgsSchema = z.union([ + AgentReportInlineToolArgsSchema, + AgentReportFileToolArgsSchema, +]); + +export const AgentReportSubmittedReportSchema = z + .object({ + reportMarkdown: z.string().min(1), + structuredOutput: z.unknown().optional(), + title: z.string().min(1).optional(), + }) + .strict(); + +export const AgentReportToolResultSchema = z.discriminatedUnion("success", [ + z + .object({ + success: z.literal(true), + message: z.string().min(1).optional(), + report: AgentReportSubmittedReportSchema.optional(), + }) + .strict(), + z + .object({ + success: z.literal(false), + message: z.string().min(1), + errors: z.array(z.object({ path: z.string().min(1), message: z.string().min(1) })).min(1), + }) + .strict(), +]); const FILE_TOOL_PATH = z .string() .describe("Path to the file to edit (absolute or relative to the current workspace)"); @@ -1407,14 +1510,14 @@ export const TOOL_DEFINITIONS = { }, task_await: { description: - "Wait for one or more tasks to produce output. " + + "Wait for one or more tasks or workflow runs to produce output. " + "\n\nWHEN TO USE: only call task_await when the current user request depends on a task's output, or when synthesis/integration of a previously-spawned task is the next logical step. " + "Do not call task_await solely because active tasks exist; for unrelated user messages, respond directly and let tasks continue in the background. " + "\n\nIMPORTANT: Do not call task_await in the same parallel tool-call batch as task or bash — " + "the taskId is not available until the spawning tool returns. " + "Always wait for the task/bash tool result first, then call task_await in a subsequent step. " + - "When omitting task_ids to await all active tasks, ensure at least one background task was already spawned in a prior step. " + - "\n\nAgent tasks return reports when completed. " + + "When omitting task_ids to await all active tasks/workflows, ensure at least one background task or workflow was already spawned in a prior step. " + + "\n\nAgent tasks and workflow runs return reports when completed. " + "Bash tasks return incremental output while running and a final reportMarkdown when they exit. " + "For bash tasks, you may optionally pass filter/filter_exclude to include/exclude output lines by regex. " + "WARNING: when using filter, non-matching lines are permanently discarded. " + @@ -1424,7 +1527,7 @@ export const TOOL_DEFINITIONS = { "Set min_completed higher (up to the number of awaited tasks) when you genuinely need more before proceeding — e.g. best-of-N synthesis that must compare every candidate should pass min_completed equal to the batch size. " + "The result always includes every task complete at the moment it returns, plus current status for the rest; not-yet-completed tasks keep running and stay re-awaitable on a later call. " + "You always get per-task results (like Promise.allSettled), just possibly before every task has finished. " + - "Possible statuses: completed, queued, running, awaiting_report, not_found, invalid_scope, error. " + + "Possible statuses: completed, queued, running, backgrounded, awaiting_report, interrupted, not_found, invalid_scope, error. " + "Bash task outputs may be automatically filtered; when this happens, check each result's note for details and (if available) where the full output was saved.", schema: TaskAwaitToolArgsSchema, }, @@ -1444,6 +1547,21 @@ export const TOOL_DEFINITIONS = { "This is a discovery tool, NOT a waiting mechanism. If the current request actually depends on a task's output, call task_await with the specific task IDs you need; do not await all active tasks just because they appear here.", schema: TaskListToolArgsSchema, }, + workflow_list: { + description: + "List durable workflow definitions available in this workspace. Use this before workflow_run when you do not already know the workflow name. Before writing or editing workflow JS, read the built-in workflow-authoring skill. Scratch workflows are workspace files at .mux/workflows/.scratch/.js and should be authored with file_read/file_edit_* tools.", + schema: WorkflowListToolArgsSchema, + }, + workflow_read: { + description: + "Read a durable workflow definition's descriptor and source by name. Use this to inspect expected args or understand a workflow before running it. Before authoring new workflow JS, read the built-in workflow-authoring skill for available globals, schema limits, and replay rules.", + schema: WorkflowReadToolArgsSchema, + }, + workflow_run: { + description: + "Start a durable workflow run by workflow name. Workflows coordinate delegated agent tasks and preserve run state for replay/resume. To create a scratch workflow, first read the built-in workflow-authoring skill, then write .mux/workflows/.scratch/.js with a // description: header and default exported function, then run it by name.", + schema: WorkflowRunToolArgsSchema, + }, agent_report: { description: "Report the final result of a sub-agent task back to the parent workspace. " + @@ -2174,6 +2292,7 @@ export function getAvailableTools( enableAgentReport?: boolean; enableAnalyticsQuery?: boolean; enableAdvisor?: boolean; + enableDynamicWorkflows?: boolean; /** @deprecated Mux global tools are always included. */ enableMuxGlobalAgentsTools?: boolean; } @@ -2182,6 +2301,7 @@ export function getAvailableTools( const enableAgentReport = options?.enableAgentReport ?? true; const enableAnalyticsQuery = options?.enableAnalyticsQuery ?? true; const enableAdvisor = options?.enableAdvisor ?? false; + const enableDynamicWorkflows = options?.enableDynamicWorkflows ?? false; // Base tools available for all models // Note: Tool availability is controlled by agent tool policy (allowlist), not mode checks here. @@ -2219,6 +2339,7 @@ export function getAvailableTools( "task_apply_git_patch", "task_terminate", "task_list", + ...(enableDynamicWorkflows ? ["workflow_list", "workflow_read", "workflow_run"] : []), ...(enableAgentReport ? ["agent_report"] : []), "get_goal", "complete_goal", diff --git a/src/common/utils/tools/tools.test.ts b/src/common/utils/tools/tools.test.ts index 1a630f4b37..330417fe9a 100644 --- a/src/common/utils/tools/tools.test.ts +++ b/src/common/utils/tools/tools.test.ts @@ -1,3 +1,4 @@ +/* eslint-disable @typescript-eslint/require-await */ import { describe, expect, mock, test } from "bun:test"; import { z } from "zod"; @@ -89,6 +90,66 @@ describe("getToolsForModel", () => { expect(toolsWithReport.agent_report).toBeDefined(); }); + test("only includes workflow tools when dynamic workflows service and experiment are enabled", async () => { + const runtime = new LocalRuntime(process.cwd()); + const initStateManager = createInitStateManager(); + + const withoutExperiment = await getToolsForModel( + "noop:model", + { + cwd: process.cwd(), + runtime, + runtimeTempDir: "/tmp", + workspaceId: "ws-1", + workflowService: { + listDefinitions: mock(async () => []), + readDefinition: mock(async () => ({ + descriptor: { name: "demo", description: "Demo", scope: "built-in", executable: true }, + source: "export default function workflow() { return null; }", + })), + startNamedWorkflow: mock(async () => ({ + runId: "wfr_1", + status: "completed" as const, + result: null, + })), + }, + }, + "ws-1", + initStateManager + ); + expect(withoutExperiment.workflow_list).toBeUndefined(); + expect(withoutExperiment.workflow_read).toBeUndefined(); + expect(withoutExperiment.workflow_run).toBeUndefined(); + + const withExperiment = await getToolsForModel( + "noop:model", + { + cwd: process.cwd(), + runtime, + runtimeTempDir: "/tmp", + workspaceId: "ws-1", + experiments: { dynamicWorkflows: true }, + workflowService: { + listDefinitions: mock(async () => []), + readDefinition: mock(async () => ({ + descriptor: { name: "demo", description: "Demo", scope: "built-in", executable: true }, + source: "export default function workflow() { return null; }", + })), + startNamedWorkflow: mock(async () => ({ + runId: "wfr_1", + status: "completed" as const, + result: null, + })), + }, + }, + "ws-1", + initStateManager + ); + expect(withExperiment.workflow_list).toBeDefined(); + expect(withExperiment.workflow_read).toBeDefined(); + expect(withExperiment.workflow_run).toBeDefined(); + }); + test("includes desktop tools when workspace capability is available", async () => { const runtime = new LocalRuntime(process.cwd()); const initStateManager = createInitStateManager(); diff --git a/src/common/utils/tools/tools.ts b/src/common/utils/tools/tools.ts index a506b59bf0..71541bb892 100644 --- a/src/common/utils/tools/tools.ts +++ b/src/common/utils/tools/tools.ts @@ -40,6 +40,11 @@ import { createMuxAgentsReadTool } from "@/node/services/tools/mux_agents_read"; import { createMuxAgentsWriteTool } from "@/node/services/tools/mux_agents_write"; import { createMuxConfigReadTool } from "@/node/services/tools/mux_config_read"; import { createMuxConfigWriteTool } from "@/node/services/tools/mux_config_write"; +import { + createWorkflowListTool, + createWorkflowReadTool, +} from "@/node/services/tools/workflow_definitions"; +import { createWorkflowRunTool } from "@/node/services/tools/workflow_run"; import { createAgentReportTool } from "@/node/services/tools/agent_report"; import { wrapWithInitWait } from "@/node/services/tools/wrapWithInitWait"; import { withHooks, type HookConfig } from "@/node/services/tools/withHooks"; @@ -138,6 +143,29 @@ export interface ToolConfiguration { reportModelUsage?: (event: ToolModelUsageEvent) => void; /** Task orchestration for sub-agent tasks */ taskService?: TaskService; + /** Durable workflow lifecycle service for dynamic workflow tools. */ + workflowService?: { + listDefinitions(options: { projectTrusted: boolean }): Promise; + readDefinition(input: { + name: string; + projectTrusted: boolean; + }): Promise<{ descriptor: unknown; source: string }>; + getRun?(input: { workspaceId: string; runId: string }): Promise; + listRuns?(input: { workspaceId: string }): Promise; + startNamedWorkflowInBackground?(input: { + name: string; + workspaceId: string; + projectTrusted: boolean; + args: unknown; + }): Promise<{ runId: string; status: string; result: unknown }>; + startNamedWorkflow(input: { + name: string; + workspaceId: string; + projectTrusted: boolean; + args: unknown; + abortSignal?: AbortSignal; + }): Promise<{ runId: string; status: string; result: unknown }>; + }; /** Workspace goal lifecycle service for model-facing goal tools. */ goalService?: WorkspaceGoalService; /** Per-request goal tool gates derived from goal status and agent capabilities. */ @@ -145,6 +173,10 @@ export interface ToolConfiguration { getGoal: boolean; completeGoal: boolean; }; + /** Optional JSON Schema subset required by a workflow-spawned task report. */ + workflowAgentOutputSchema?: unknown; + /** When true, subagent reports are submitted by paths to report.md/structured-output.json. */ + subagentReportFiles?: boolean; /** Enable agent_report tool (only valid for child task workspaces) */ enableAgentReport?: boolean; /** Experiments inherited from parent (for subagent spawning) */ @@ -153,6 +185,8 @@ export interface ToolConfiguration { programmaticToolCallingExclusive?: boolean; advisorTool?: boolean; execSubagentHardRestart?: boolean; + dynamicWorkflows?: boolean; + subagentFileReports?: boolean; }; /** Available sub-agents for the task tool description (dynamic context) */ availableSubagents?: AgentDefinitionDescriptor[]; @@ -455,6 +489,13 @@ export async function getToolsForModel( // (workspaceStatusGenerator.ts), which create the tool inline. Exposing // them in the default toolset would let exec-derived agents see their // "call me immediately" descriptions. + ...(config.workflowService && config.experiments?.dynamicWorkflows + ? { + workflow_list: createWorkflowListTool(config), + workflow_read: createWorkflowReadTool(config), + workflow_run: createWorkflowRunTool(config), + } + : {}), ...(config.enableAgentReport ? { agent_report: createAgentReportTool(config) } : {}), ...(config.goalService && config.enableGoalTools?.getGoal ? { get_goal: createGetGoalTool(config) } @@ -566,6 +607,9 @@ export async function getToolsForModel( getAvailableTools(modelString, { enableAgentReport: config.enableAgentReport, enableAnalyticsQuery: Boolean(config.analyticsService), + enableDynamicWorkflows: Boolean( + config.workflowService && config.experiments?.dynamicWorkflows + ), enableAdvisor: Boolean(config.advisorRuntime), // Mux global tools are always created; tool policy (agent frontmatter) // controls which agents can actually use them. diff --git a/src/common/utils/workflowRunMessages.ts b/src/common/utils/workflowRunMessages.ts new file mode 100644 index 0000000000..a40eba169c --- /dev/null +++ b/src/common/utils/workflowRunMessages.ts @@ -0,0 +1,154 @@ +import type { MuxMessage } from "@/common/types/message"; +import type { WorkflowRunRecord } from "@/common/types/workflow"; +import assert from "@/common/utils/assert"; + +export const WORKFLOW_TRIGGER_DISPLAY_METADATA_TYPE = "workflow-trigger-display"; +export const WORKFLOW_RUN_CARD_DISPLAY_METADATA_TYPE = "workflow-run-card-display"; +export const WORKFLOW_RESULT_METADATA_TYPE = "workflow-result"; + +export const WORKFLOW_RESULT_XML_TAG = "mux_workflow_result"; + +function getWorkflowResultValue(result: unknown, run: WorkflowRunRecord | null): unknown { + if (result != null) { + return result; + } + return run?.events.findLast((event) => event.type === "result")?.result ?? result; +} + +function getWorkflowError(run: WorkflowRunRecord | null): string | undefined { + return run?.events.findLast((event) => event.type === "error")?.message; +} + +function getWorkflowResultField(value: unknown, field: string): unknown { + if (value != null && typeof value === "object") { + return (value as Record)[field]; + } + return undefined; +} + +function stringifyWorkflowResultPayload(payload: unknown): string { + try { + return JSON.stringify(payload, null, 2); + } catch { + return JSON.stringify({ error: "Workflow result could not be serialized." }, null, 2); + } +} + +export function buildWorkflowResultContextMessage(input: { + rawCommand: string; + name: string; + runId: string; + status: string; + result: unknown; + run: WorkflowRunRecord | null; +}): string { + assert( + input.rawCommand.trim().length > 0, + "buildWorkflowResultContextMessage: rawCommand required" + ); + assert(input.name.length > 0, "buildWorkflowResultContextMessage: workflow name required"); + assert(input.runId.length > 0, "buildWorkflowResultContextMessage: runId required"); + + const resultValue = getWorkflowResultValue(input.result, input.run); + const reportMarkdown = getWorkflowResultField(resultValue, "reportMarkdown"); + const structuredOutput = getWorkflowResultField(resultValue, "structuredOutput"); + const payload = { + workflow: { + name: input.name, + runId: input.runId, + status: input.status, + }, + ...(typeof reportMarkdown === "string" ? { reportMarkdown } : {}), + ...(structuredOutput !== undefined ? { structuredOutput } : {}), + ...(resultValue != null ? { result: resultValue } : {}), + ...(getWorkflowError(input.run) ? { error: getWorkflowError(input.run) } : {}), + }; + + return [ + "The workflow below has finished. Continue the agent turn for the original request using this workflow result. Do not merely restate the raw payload; synthesize the next answer or action from it.", + `Original workflow command: ${input.rawCommand}`, + `<${WORKFLOW_RESULT_XML_TAG}>\n${stringifyWorkflowResultPayload(payload)}\n`, + ].join("\n\n"); +} + +export interface WorkflowRunCardInput { + name: string; + args: unknown; +} + +export interface WorkflowRunCardResult { + runId: string; + status: string; + result: unknown; + run?: WorkflowRunRecord; +} + +type WorkflowRunToolPart = Extract; + +export function isWorkflowTriggerDisplayMessage(message: MuxMessage): boolean { + return message.metadata?.muxMetadata?.type === WORKFLOW_TRIGGER_DISPLAY_METADATA_TYPE; +} + +export function isWorkflowRunCardDisplayMessage(message: MuxMessage): boolean { + return message.metadata?.muxMetadata?.type === WORKFLOW_RUN_CARD_DISPLAY_METADATA_TYPE; +} + +export function isWorkflowResultMessage(message: MuxMessage): boolean { + return message.metadata?.muxMetadata?.type === WORKFLOW_RESULT_METADATA_TYPE; +} + +export function isWorkflowDisplayOnlyMessage(message: MuxMessage): boolean { + return isWorkflowTriggerDisplayMessage(message) || isWorkflowRunCardDisplayMessage(message); +} + +export function filterWorkflowDisplayOnlyMessages(messages: MuxMessage[]): MuxMessage[] { + if (!messages.some(isWorkflowDisplayOnlyMessage)) { + return messages; + } + return messages.filter((message) => !isWorkflowDisplayOnlyMessage(message)); +} + +export function buildWorkflowRunToolPart( + input: WorkflowRunCardInput, + result: WorkflowRunCardResult, + now = Date.now() +): WorkflowRunToolPart { + assert(input.name.length > 0, "buildWorkflowRunToolPart: workflow name is required"); + assert(result.runId.length > 0, "buildWorkflowRunToolPart: runId is required"); + + return { + type: "dynamic-tool", + toolCallId: `workflow-run-${result.runId}`, + toolName: "workflow_run", + state: "output-available", + input: { + name: input.name, + args: input.args, + run_in_background: true, + }, + output: { + status: result.status, + runId: result.runId, + result: result.result, + ...(result.run != null ? { run: result.run } : {}), + }, + timestamp: now, + }; +} + +export function buildWorkflowRunCardMessage( + input: WorkflowRunCardInput, + result: WorkflowRunCardResult, + now = Date.now() +): MuxMessage { + const toolPart = buildWorkflowRunToolPart(input, result, now); + return { + id: toolPart.toolCallId, + role: "assistant", + parts: [toolPart], + metadata: { + historySequence: Number.MAX_SAFE_INTEGER, + timestamp: now, + }, + }; +} diff --git a/src/constants/slashCommands.ts b/src/constants/slashCommands.ts index c21b8363e4..d5670a6054 100644 --- a/src/constants/slashCommands.ts +++ b/src/constants/slashCommands.ts @@ -35,4 +35,5 @@ export const WORKSPACE_ONLY_COMMAND_TYPES: ReadonlySet = new Set([ "goal-complete", "goal-clear", "side-question", + "workflow-run", ]); diff --git a/src/node/builtinSkills/workflow-authoring.md b/src/node/builtinSkills/workflow-authoring.md new file mode 100644 index 0000000000..63efaf6292 --- /dev/null +++ b/src/node/builtinSkills/workflow-authoring.md @@ -0,0 +1,283 @@ +--- +name: workflow-authoring +description: Author durable JavaScript workflows for repeatable multi-agent orchestration +--- + +# Workflow Authoring + +Use this skill **before writing or editing a workflow definition**. Workflows are durable JavaScript conductors that coordinate sub-agent tasks, validate structured reports, and preserve run state for replay/resume. + +## When to use a workflow + +Prefer a workflow when the task is a repeatable orchestration pattern, especially when it needs several of these: + +- Multiple phases with clear progress reporting (`phase`, `log`). +- Parallel sub-agent fan-out with stable roles or lanes. +- Structured output validation from sub-agents. +- Adversarial verification / cross-checking of candidate findings. +- Durable state so completed work is reused after resume/restart. +- A reusable slash-invokable process, like deep research or deep review. + +Do **not** create a workflow for a small one-off edit, a single simple investigation, or work that needs the conductor itself to run shell/filesystem/network operations. The conductor is intentionally limited; delegate those operations to sub-agents. + +## Before authoring + +1. Run `workflow_list` to see existing workflows. +2. If an existing workflow is close, run `workflow_read({ name })` and adapt the pattern. +3. For one-off drafts, write a scratch workflow: + + ```text + .mux/workflows/.scratch/.js + ``` + +4. Use normal file tools (`file_read`, `file_edit_insert`, `file_edit_replace_string`) to author the JavaScript. +5. Run it by name with `workflow_run`. + +Scratch workflows must include a description header and a default exported function: + +```js +// description: Short workflow description +export default function workflow({ args, phase, log, agent, parallelAgents, applyPatch }) { + phase("scope", { input: args.input }); + return { reportMarkdown: "Done" }; +} +``` + +Reusable project workflows live in `.mux/workflows/.js`; global workflows live in `~/.mux/workflows/.js`. Project and scratch workflows require Project Trust. + +## Available workflow globals + +A workflow default export receives one object: + +```js +export default function workflow({ args, phase, log, agent, parallelAgents, applyPatch }) {} +``` + +### `args` + +The invocation payload from `workflow_run`. Plain-text slash args are passed as `{ input: "..." }`, so normalize `args.input` for commands like: + +```text +/workflow my-workflow review PR #123 +``` + +### `phase(name, details?)` + +Records a durable phase event shown in the run card. + +```js +phase("adversarial-verification", { candidateCount: issues.length }); +``` + +### `log(message, data?)` + +Records lightweight progress/details. + +```js +log("Selected lanes", { lanes }); +``` + +### `agent(spec)` + +Runs one workflow-owned sub-agent and waits for its final report. + +Required fields: + +- `id`: stable step ID used for replay; never derive from unstable ordering unless the input ordering is stable. +- `prompt`: child task prompt. + +Optional fields: + +- `title`: UI title. +- `agentId`: sub-agent type/id; defaults to the workflow adapter default (usually `explore`). +- `outputSchema`: JSON Schema subset used to validate `structuredOutput`. + +Returns: + +```ts +{ + taskId: string, + reportMarkdown: string, + title?: string, + structuredOutput?: unknown +} +``` + +`taskId` is a host-issued patch artifact handle for workflow-owned child tasks. Pass the whole agent result as `applyPatch({ source: result })` instead of inventing task IDs. + +### `applyPatch(spec)` + +Applies a workflow-owned child task's git patch artifact to the current parent workspace. The host always dry-runs first in a temporary worktree and only performs the real apply when the dry-run succeeds. The conductor never receives raw patch text and cannot apply arbitrary patches. + +Required fields: + +- `id`: stable replay ID for this mutation step. +- `source` (or `from`): an `agent(...)` result, a `parallelAgents(...)` item, or a workflow-owned `taskId` string. + +Optional fields: + +- `target`: currently only `"parent"`; this is where the existing task patch artifact is applied. +- `projectPath` / `project_path`: limit a multi-project patch artifact to one project. +- `threeWay` / `three_way`: defaults to `true` and maps to `git am --3way`. +- `force`: allow re-apply / dirty-tree behavior exactly like `task_apply_git_patch`. +- `onConflict`: currently only `"return"`. + +Returns structured status instead of throwing on ordinary patch conflicts: + +```ts +{ + success: boolean, + status: "applied" | "conflict" | "failed", + taskId: string, + projectResults?: unknown, + conflictPaths?: string[], + failedPatchSubject?: string, + error?: string, + note?: string +} +``` + +Conflict resolution should follow the old Orchestrator pattern: spawn a dedicated `exec` resolver, include the failing `taskId`, tell it to call `task_apply_git_patch` in its own workspace, resolve `git am` conflicts, commit the resolved result, report, and then call `applyPatch` on the resolver result. + +```js +const implementation = agent({ + id: "implement-auth-fix", + agentId: "exec", + prompt: execBrief, +}); + +let applied = applyPatch({ + id: "apply-auth-fix", + source: implementation, + target: "parent", + onConflict: "return", +}); + +if (applied.status === "conflict") { + const resolver = agent({ + id: "resolve-auth-fix-conflict", + agentId: "exec", + prompt: buildResolverBrief(applied), + }); + applied = applyPatch({ id: "apply-resolved-auth-fix", source: resolver }); +} +``` + +### `parallelAgents(specs)` + +Runs multiple `agent` specs concurrently and returns results in input order. Use this for review lanes, source summarization, claim verification, or other independent slices. + +```js +const laneResults = parallelAgents( + lanes.map((lane) => ({ + id: `review-${lane}`, + title: `Review ${lane}`, + prompt: lanePrompt(lane), + outputSchema: issueListSchema(), + })) +); +``` + +## Structured output schemas + +`outputSchema` supports this JSON Schema subset: + +- `type` +- `properties` +- `required` +- `items` +- `additionalProperties` +- `enum` + +Keep schemas small and strict. Use `additionalProperties: false` for deterministic outputs. + +```js +function issueListSchema() { + return { + type: "object", + required: ["issues"], + additionalProperties: false, + properties: { + issues: { + type: "array", + items: { + type: "object", + required: ["title", "severity", "filePaths", "evidence"], + additionalProperties: false, + properties: { + title: { type: "string" }, + severity: { type: "string", enum: ["P0", "P1", "P2", "P3", "P4"] }, + filePaths: { type: "array", items: { type: "string" } }, + evidence: { type: "string" }, + }, + }, + }, + }, + }; +} +``` + +## Replay rules and gotchas + +- Every `agent` / `parallelAgents` item and every `applyPatch` call must have a stable `id`. +- The replay key includes the step ID and normalized spec, so changing prompts, schemas, patch source IDs, or apply options creates new work. +- `applyPatch` is a durable mutation effect: completed apply/conflict/failed results are replayed from the journal and are not re-applied on resume. +- The workflow conductor cannot call general tools, import modules, access Node, run shell, read files, use timers, or rely on `Date`/`Math.random`. +- Put shell/filesystem/web investigation inside delegated sub-agent prompts. +- Cap model-produced fan-out before calling `parallelAgents`. +- Return `{ reportMarkdown, structuredOutput }` so the parent agent and UI both get useful output. + +## Minimal pattern + +```js +// description: Review a change with parallel lanes and verification +export default function workflow({ args, phase, log, agent, parallelAgents }) { + const target = normalizeTarget(args); + + phase("scope", { target }); + const scope = agent({ + id: "scope", + title: "Scope work", + prompt: "Identify review lanes for: " + target, + outputSchema: { + type: "object", + required: ["lanes"], + additionalProperties: false, + properties: { lanes: { type: "array", items: { type: "string" } } }, + }, + }); + + const lanes = scope.structuredOutput.lanes.slice(0, 6); + log("Running lanes", { lanes }); + + phase("lane-review", { lanes }); + const reviews = parallelAgents( + lanes.map(function (lane) { + return { + id: "review-" + lane, + title: "Review " + lane, + prompt: "Review " + target + " for " + lane + " issues.", + outputSchema: issueListSchema(), + }; + }) + ); + + phase("final-synthesis", { reviewCount: reviews.length }); + const final = agent({ + id: "synthesize", + title: "Synthesize result", + prompt: "Synthesize these structured review outputs: " + JSON.stringify(reviews), + }); + + return { reportMarkdown: final.reportMarkdown }; +} + +function normalizeTarget(args) { + if (typeof args === "string" && args.trim()) return args.trim(); + if (args && typeof args === "object") { + if (typeof args.target === "string" && args.target.trim()) return args.target.trim(); + if (typeof args.input === "string" && args.input.trim()) return args.input.trim(); + } + return "current workspace"; +} +``` diff --git a/src/node/config.ts b/src/node/config.ts index c964edf5ba..36aa2de4f3 100644 --- a/src/node/config.ts +++ b/src/node/config.ts @@ -1552,6 +1552,7 @@ export class Config { parentWorkspaceId: workspace.parentWorkspaceId, agentType: workspace.agentType, agentId: workspace.agentId, + workflowTask: workspace.workflowTask, bestOf: workspace.bestOf, taskStatus: workspace.taskStatus, reportedAt: workspace.reportedAt, @@ -1650,6 +1651,7 @@ export class Config { metadata.parentWorkspaceId ??= workspace.parentWorkspaceId; metadata.agentType ??= workspace.agentType; metadata.agentId ??= workspace.agentId; + metadata.workflowTask ??= workspace.workflowTask; metadata.bestOf ??= workspace.bestOf; metadata.taskStatus ??= workspace.taskStatus; metadata.reportedAt ??= workspace.reportedAt; @@ -1719,6 +1721,7 @@ export class Config { parentWorkspaceId: workspace.parentWorkspaceId, agentType: workspace.agentType, agentId: workspace.agentId, + workflowTask: workspace.workflowTask, bestOf: workspace.bestOf, taskStatus: workspace.taskStatus, reportedAt: workspace.reportedAt, @@ -1770,6 +1773,7 @@ export class Config { parentWorkspaceId: workspace.parentWorkspaceId, agentType: workspace.agentType, agentId: workspace.agentId, + workflowTask: workspace.workflowTask, bestOf: workspace.bestOf, taskStatus: workspace.taskStatus, reportedAt: workspace.reportedAt, @@ -1839,6 +1843,7 @@ export class Config { parentWorkspaceId: metadata.parentWorkspaceId, agentType: metadata.agentType, agentId: metadata.agentId, + workflowTask: metadata.workflowTask, bestOf: metadata.bestOf, taskStatus: metadata.taskStatus, reportedAt: metadata.reportedAt, diff --git a/src/node/orpc/context.ts b/src/node/orpc/context.ts index c6d725147b..9c978471f8 100644 --- a/src/node/orpc/context.ts +++ b/src/node/orpc/context.ts @@ -1,3 +1,4 @@ +import type { IJSRuntimeFactory } from "@/node/services/ptc/runtime"; import type { IncomingHttpHeaders } from "http"; import type { Config } from "@/node/config"; import type { AIService } from "@/node/services/aiService"; @@ -89,5 +90,6 @@ export interface ORPCContext { desktopSessionManager: DesktopSessionManager; desktopTokenManager: DesktopTokenManager; desktopBridgeServer: DesktopBridgeServer; + workflowRuntimeFactory: IJSRuntimeFactory; headers?: IncomingHttpHeaders; } diff --git a/src/node/orpc/router.test.ts b/src/node/orpc/router.test.ts index ced3ecaf38..9daa906a92 100644 --- a/src/node/orpc/router.test.ts +++ b/src/node/orpc/router.test.ts @@ -1,3 +1,4 @@ +/* eslint-disable @typescript-eslint/await-thenable, @typescript-eslint/no-unsafe-argument, @typescript-eslint/no-unsafe-assignment, @typescript-eslint/require-await, @typescript-eslint/restrict-template-expressions, local/no-sync-fs-methods */ import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"; import { createRouterClient } from "@orpc/server"; import * as fs from "fs"; @@ -5,6 +6,9 @@ import * as os from "os"; import * as path from "path"; import { DEFAULT_TASK_SETTINGS } from "@/common/types/tasks"; import { Config } from "@/node/config"; +import { QuickJSRuntimeFactory } from "@/node/services/ptc/quickjsRuntime"; +import { ForegroundWaitBackgroundedError } from "@/node/services/taskService"; +import { WorkflowRunStore } from "@/node/services/workflows/WorkflowRunStore"; import type { ORPCContext } from "./context"; import { router } from "./router"; @@ -52,6 +56,340 @@ describe("router workspace goal validation", () => { }); }); +async function waitForRouterCondition( + description: string, + predicate: () => boolean +): Promise { + const deadline = Date.now() + 1_000; + while (Date.now() < deadline) { + if (predicate()) { + return; + } + await new Promise((resolve) => setTimeout(resolve, 10)); + } + throw new Error(`Timed out waiting for ${description}`); +} + +async function waitForRouterWorkflowStatus( + client: { + workflows: { + getRun(input: { workspaceId: string; runId: string }): Promise<{ status: string } | null>; + }; + }, + workspaceId: string, + runId: string, + status: string +): Promise { + const deadline = Date.now() + 1_000; + while (Date.now() < deadline) { + const run = await client.workflows.getRun({ workspaceId, runId }); + if (run?.status === status) { + return; + } + await new Promise((resolve) => setTimeout(resolve, 10)); + } + const run = await client.workflows.getRun({ workspaceId, runId }); + throw new Error(`Timed out waiting for ${runId} to become ${status}; got ${run?.status}`); +} + +describe("router workflow routes", () => { + let tempDir: string; + let config: Config; + let projectPath: string; + + beforeEach(async () => { + tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "mux-router-workflows-test-")); + config = new Config(tempDir); + projectPath = path.join(tempDir, "project"); + fs.mkdirSync(path.join(projectPath, ".mux", "workflows"), { recursive: true }); + fs.writeFileSync( + path.join(projectPath, ".mux", "workflows", "demo.js"), + `// description: Demo workflow\nexport default function workflow({ args }) { return { reportMarkdown: args.topic }; }\n` + ); + await config.editConfig((current) => { + current.projects.set(projectPath, { workspaces: [], trusted: true }); + return current; + }); + }); + + afterEach(() => { + fs.rmSync(tempDir, { recursive: true, force: true }); + }); + + function createContext(options: { enabled: boolean }): ORPCContext { + return { + workflowRuntimeFactory: new QuickJSRuntimeFactory(), + config, + aiService: { + waitForInit: mock(async () => undefined), + getWorkspaceMetadata: mock(async () => ({ + success: true, + data: { + id: "workspace-1", + name: "workspace-1", + projectPath, + namedWorkspacePath: projectPath, + runtimeConfig: { type: "local", srcBaseDir: tempDir }, + }, + })), + }, + workspaceService: { + appendWorkflowRunInvocation: mock(async () => true), + }, + taskService: {}, + experimentsService: { + isExperimentEnabled: mock(() => options.enabled), + }, + } as unknown as ORPCContext; + } + + test("lists workflow definitions only when dynamic workflows are enabled", async () => { + const disabledClient = createRouterClient(router(), { + context: createContext({ enabled: false }), + }); + await expect( + disabledClient.workflows.listDefinitions({ workspaceId: "workspace-1" }) + ).rejects.toThrow(/Dynamic workflows are disabled/); + + const enabledClient = createRouterClient(router(), { + context: createContext({ enabled: true }), + }); + await expect( + enabledClient.workflows.readDefinition({ workspaceId: "workspace-1", name: "demo" }) + ).resolves.toMatchObject({ + descriptor: expect.objectContaining({ name: "demo", scope: "project" }), + source: expect.stringContaining("reportMarkdown: args.topic"), + }); + await expect(enabledClient.workflows.listDefinitions({ projectPath })).resolves.toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: "demo", scope: "project", executable: true }), + ]) + ); + await expect( + enabledClient.workflows.listDefinitions({ workspaceId: "workspace-1" }) + ).resolves.toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: "demo", scope: "project", executable: true }), + ]) + ); + }); + + test("promotes a scratch workflow run through the API", async () => { + const runStore = new WorkflowRunStore({ sessionDir: config.getSessionDir("workspace-1") }); + await runStore.createRun({ + id: "wfr_scratch_api", + workspaceId: "workspace-1", + definition: { name: "scratch", description: "Scratch", scope: "scratch", executable: true }, + definitionSource: + "export default function workflow() { return { reportMarkdown: 'scratch api' }; }\n", + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const client = createRouterClient(router(), { context: createContext({ enabled: true }) }); + + await expect( + client.workflows.promoteScratch({ + workspaceId: "workspace-1", + runId: "wfr_scratch_api", + name: "scratch-api", + description: "Scratch API workflow", + location: "project", + overwrite: false, + }) + ).resolves.toMatchObject({ name: "scratch-api", scope: "project", executable: true }); + expect( + fs.readFileSync(path.join(projectPath, ".mux", "workflows", "scratch-api.js"), "utf-8") + ).toContain("Scratch API workflow"); + }); + + test("promotes a workspace scratch workflow definition through the API without a run", async () => { + const scratchRoot = path.join(projectPath, ".mux", "workflows", ".scratch"); + fs.mkdirSync(scratchRoot, { recursive: true }); + fs.writeFileSync( + path.join(scratchRoot, "scratch-draft.js"), + "// description: Scratch draft\nexport default function workflow() { return { reportMarkdown: 'scratch api' }; }\n", + "utf-8" + ); + const client = createRouterClient(router(), { context: createContext({ enabled: true }) }); + + await expect( + client.workflows.promoteScratchDefinition({ + workspaceId: "workspace-1", + name: "scratch-draft", + description: "Reusable draft workflow", + location: "project", + overwrite: false, + }) + ).resolves.toMatchObject({ name: "scratch-draft", scope: "project", executable: true }); + expect( + fs.readFileSync(path.join(projectPath, ".mux", "workflows", "scratch-draft.js"), "utf-8") + ).toContain("Reusable draft workflow"); + }); + + test("interrupts and resumes workflow runs through the API", async () => { + const runStore = new WorkflowRunStore({ sessionDir: config.getSessionDir("workspace-1") }); + await runStore.createRun({ + id: "wfr_api_resume", + workspaceId: "workspace-1", + definition: { name: "demo", description: "Demo", scope: "built-in", executable: true }, + definitionSource: + "export default function workflow() { return { reportMarkdown: 'resumed via api' }; }\n", + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + + const client = createRouterClient(router(), { context: createContext({ enabled: true }) }); + + await expect( + client.workflows.interrupt({ workspaceId: "workspace-1", runId: "wfr_api_resume" }) + ).resolves.toMatchObject({ id: "wfr_api_resume", status: "interrupted" }); + await expect( + client.workflows.resume({ workspaceId: "workspace-1", runId: "wfr_api_resume" }) + ).resolves.toEqual({ + runId: "wfr_api_resume", + status: "running", + result: null, + }); + await waitForRouterWorkflowStatus(client, "workspace-1", "wfr_api_resume", "completed"); + }); + + test("starts a trusted project-local workflow through the API", async () => { + const client = createRouterClient(router(), { context: createContext({ enabled: true }) }); + + const result = await client.workflows.start({ + workspaceId: "workspace-1", + name: "demo", + args: { topic: "workflow routes" }, + }); + + expect(result.status).toBe("completed"); + expect(result.runId).toMatch(/^wfr_/); + expect(result.result).toEqual({ reportMarkdown: "workflow routes" }); + + await expect( + client.workflows.getRun({ workspaceId: "workspace-1", runId: result.runId }) + ).resolves.toMatchObject({ + id: result.runId, + workspaceId: "workspace-1", + definition: expect.objectContaining({ name: "demo" }), + status: "completed", + }); + await expect(client.workflows.listRuns({ workspaceId: "workspace-1" })).resolves.toEqual([ + expect.objectContaining({ id: result.runId, status: "completed" }), + ]); + }); + + test("persists workflow slash invocations before returning", async () => { + const context = createContext({ enabled: true }); + const workspaceService = context.workspaceService as unknown as { + appendWorkflowRunInvocation: ReturnType; + }; + const client = createRouterClient(router(), { context }); + + const result = await client.workflows.start({ + workspaceId: "workspace-1", + name: "demo", + runInBackground: true, + args: { input: "workflow routes" }, + rawCommand: "/demo workflow routes", + }); + + expect(result).toMatchObject({ + status: "running", + invocationMessagePersisted: true, + }); + expect(workspaceService.appendWorkflowRunInvocation).toHaveBeenCalledWith( + expect.objectContaining({ + workspaceId: "workspace-1", + rawCommand: "/demo workflow routes", + name: "demo", + args: { input: "workflow routes" }, + runId: result.runId, + status: "running", + }) + ); + }); + + test("waits for foreground slash invocation persistence before terminal continuation", async () => { + fs.writeFileSync( + path.join(projectPath, ".mux", "workflows", "backgroundable.js"), + "// description: Backgroundable workflow\nexport default function workflow({ agent }) { return agent({ id: 'slow-step', prompt: 'slow' }); }\n" + ); + const context = createContext({ enabled: true }); + const workspaceService = context.workspaceService as unknown as { + appendWorkflowRunInvocation: ReturnType; + isWorkflowInvocationCurrent: ReturnType; + sendMessage: ReturnType; + }; + let releaseInvocationPersistence: (() => void) | undefined; + workspaceService.appendWorkflowRunInvocation = mock(async () => { + await new Promise((resolve) => { + releaseInvocationPersistence = resolve; + }); + return true; + }); + workspaceService.isWorkflowInvocationCurrent = mock(async () => true); + workspaceService.sendMessage = mock(async () => ({ success: true, data: {} })); + + let waitCalls = 0; + context.taskService = { + create: mock(async () => ({ success: true, data: { taskId: "task_slow" } })), + waitForAgentReport: mock(async () => { + waitCalls += 1; + if (waitCalls === 1) { + throw new ForegroundWaitBackgroundedError(); + } + return { reportMarkdown: "done" }; + }), + } as unknown as ORPCContext["taskService"]; + + const client = createRouterClient(router(), { context }); + const startPromise = client.workflows.start({ + workspaceId: "workspace-1", + name: "backgroundable", + args: { input: "slow" }, + rawCommand: "/backgroundable slow", + continuationOptions: { model: "test:model", agentId: "exec" }, + }); + + await waitForRouterCondition( + "foreground invocation persistence to start", + () => workspaceService.appendWorkflowRunInvocation.mock.calls.length === 1 + ); + await waitForRouterCondition( + "background resume to finish its agent wait", + () => waitCalls === 2 + ); + expect(workspaceService.sendMessage).not.toHaveBeenCalled(); + releaseInvocationPersistence?.(); + + const result = await startPromise; + expect(result).toMatchObject({ status: "backgrounded", invocationMessagePersisted: true }); + await waitForRouterCondition( + "workflow terminal continuation to send after invocation persistence", + () => workspaceService.sendMessage.mock.calls.length === 1 + ); + expect(workspaceService.sendMessage.mock.calls[0]?.[0]).toBe("workspace-1"); + expect(workspaceService.sendMessage.mock.calls[0]?.[1]).toContain(""); + }); + + test("starts a workflow in the background when requested through the API", async () => { + const client = createRouterClient(router(), { context: createContext({ enabled: true }) }); + + const result = await client.workflows.start({ + workspaceId: "workspace-1", + name: "demo", + runInBackground: true, + args: { topic: "background workflow routes" }, + }); + + expect(result.status).toBe("running"); + expect(result.runId).toMatch(/^wfr_/); + expect(result.result).toBeNull(); + await waitForRouterWorkflowStatus(client, "workspace-1", result.runId, "completed"); + }); +}); + describe("router config.saveConfig", () => { let tempDir: string; let config: Config; diff --git a/src/node/orpc/router.ts b/src/node/orpc/router.ts index 55ab8a8ab7..6e3fbf5e66 100644 --- a/src/node/orpc/router.ts +++ b/src/node/orpc/router.ts @@ -39,8 +39,9 @@ import type { LogEntry } from "@/node/services/logBuffer"; import { clearLogEntries, subscribeLogFeed } from "@/node/services/logBuffer"; import { createReplayBufferedStreamMessageRelay } from "./replayBufferedStreamMessageRelay"; +import { getRuntimeType } from "@/node/runtime/initHook"; import { createRuntime, checkRuntimeAvailability } from "@/node/runtime/runtimeFactory"; -import { createRuntimeForWorkspace } from "@/node/runtime/runtimeHelpers"; +import { createRuntimeForWorkspace, resolveWorkspaceRootPath } from "@/node/runtime/runtimeHelpers"; import { readPlanFile } from "@/node/utils/runtime/helpers"; import { secretsToRecord } from "@/common/types/secrets"; import { roundToBase2 } from "@/common/telemetry/utils"; @@ -94,8 +95,24 @@ import { type SubagentTranscriptArtifactIndexEntry, } from "@/node/services/subagentTranscriptArtifacts"; import { getErrorMessage } from "@/common/utils/errors"; +import { + shouldUseRuntimeWorkflowProjectIO, + WorkflowDefinitionStore, +} from "@/node/services/workflows/WorkflowDefinitionStore"; +import { WorkflowRunStore } from "@/node/services/workflows/WorkflowRunStore"; +import { + WorkflowService, + type WorkflowBackgroundRunTerminalEvent, +} from "@/node/services/workflows/WorkflowService"; +import { WorkflowTaskServiceAdapter } from "@/node/services/workflows/WorkflowTaskServiceAdapter"; +import { resolveWorkflowScratchRoots } from "@/node/services/workflows/workflowScratchRoots"; import { isProjectTrusted } from "@/node/utils/projectTrust"; +import { + WORKFLOW_RESULT_METADATA_TYPE, + buildWorkflowResultContextMessage, +} from "@/common/utils/workflowRunMessages"; + const RAW_QUERY_USER_ERROR_PATTERNS = [ /^parser error:/i, /^binder error:/i, @@ -158,6 +175,86 @@ function isTrustedProjectPath(context: ORPCContext, projectPath?: string | null) return isProjectTrusted(context.config, projectPath); } +function assertDynamicWorkflowsEnabled(context: ORPCContext): void { + if (!context.experimentsService.isExperimentEnabled(EXPERIMENT_IDS.DYNAMIC_WORKFLOWS)) { + throw new ORPCError("BAD_REQUEST", { + message: "Dynamic workflows are disabled", + }); + } +} + +async function resolveWorkflowContext( + context: ORPCContext, + workspaceId: string, + options: { + onBackgroundRunTerminal?: (event: WorkflowBackgroundRunTerminalEvent) => Promise | void; + } = {} +): Promise<{ service: WorkflowService; projectTrusted: boolean }> { + assert(workspaceId.length > 0, "resolveWorkflowContext: workspaceId is required"); + assertDynamicWorkflowsEnabled(context); + await context.aiService.waitForInit(workspaceId); + const metadataResult = await context.aiService.getWorkspaceMetadata(workspaceId); + if (!metadataResult.success) { + throw new Error(metadataResult.error); + } + const metadata = metadataResult.data; + const projectTrusted = isTrustedProjectPath(context, metadata.projectPath); + const runtime = createRuntimeForWorkspace(metadata); + const workspacePath = resolveWorkspaceRootPath(metadata, runtime); + const runtimeType = getRuntimeType(metadata.runtimeConfig); + const useRuntimeProjectIO = shouldUseRuntimeWorkflowProjectIO(runtimeType); + const workflowScratchRoots = resolveWorkflowScratchRoots(context.config, workspaceId, { + workspaceRootPath: workspacePath, + normalizePath: runtime.normalizePath.bind(runtime), + }); + + const subagentFileReportsExperimentEnabled = context.experimentsService.isExperimentEnabled( + EXPERIMENT_IDS.SUBAGENT_FILE_REPORTS + ); + + const workflowRuntimeTempDir = runtime.normalizePath(".mux/tmp", workspacePath); + + return { + projectTrusted, + service: new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ + projectRoot: runtime.normalizePath(".mux/workflows", workspacePath), + globalRoot: path.join(context.config.rootDir, "workflows"), + scratchRoot: workflowScratchRoots.scratchRoot, + projectRuntime: useRuntimeProjectIO ? runtime : undefined, + projectCwd: useRuntimeProjectIO ? workspacePath : undefined, + }), + runStore: new WorkflowRunStore({ sessionDir: context.config.getSessionDir(workspaceId) }), + runtimeFactory: context.workflowRuntimeFactory, + taskAdapterFactory: (runId) => + new WorkflowTaskServiceAdapter({ + taskService: context.taskService, + parentWorkspaceId: workspaceId, + workflowRunId: runId, + defaultAgentId: "explore", + patchToolConfig: { + workspaceId, + cwd: workspacePath, + runtime, + runtimeTempDir: workflowRuntimeTempDir, + workspaceSessionDir: context.config.getSessionDir(workspaceId), + trusted: projectTrusted, + }, + getProjectTrusted: () => isTrustedProjectPath(context, metadata.projectPath), + experiments: { + dynamicWorkflows: true, + subagentFileReports: subagentFileReportsExperimentEnabled, + }, + }), + ...(options.onBackgroundRunTerminal != null + ? { onBackgroundRunTerminal: options.onBackgroundRunTerminal } + : {}), + getCurrentProjectTrusted: () => isTrustedProjectPath(context, metadata.projectPath), + runnerId: `workflow-runner:${workspaceId}`, + }), + }; +} + function normalizeOptionalConfigString(value: string | null | undefined): string | undefined { const trimmedValue = value?.trim(); if (!trimmedValue) { @@ -1564,6 +1661,255 @@ export const router = (authToken?: string) => { return result.package; }), }, + workflows: { + listDefinitions: t + .input(schemas.workflows.listDefinitions.input) + .output(schemas.workflows.listDefinitions.output) + .handler(async ({ context, input }) => { + if (input.workspaceId != null) { + const { service, projectTrusted } = await resolveWorkflowContext( + context, + input.workspaceId + ); + return service.listDefinitions({ projectTrusted }); + } + + assertDynamicWorkflowsEnabled(context); + assert( + input.projectPath != null, + "Workflow definition discovery requires a project path" + ); + const definitionStore = new WorkflowDefinitionStore({ + projectRoot: path.join(input.projectPath, ".mux", "workflows"), + globalRoot: path.join(context.config.rootDir, "workflows"), + }); + return definitionStore.listDefinitions({ + projectTrusted: isTrustedProjectPath(context, input.projectPath), + }); + }), + readDefinition: t + .input(schemas.workflows.readDefinition.input) + .output(schemas.workflows.readDefinition.output) + .handler(async ({ context, input }) => { + const { service, projectTrusted } = await resolveWorkflowContext( + context, + input.workspaceId + ); + return service.readDefinition({ name: input.name, projectTrusted }); + }), + listRuns: t + .input(schemas.workflows.listRuns.input) + .output(schemas.workflows.listRuns.output) + .handler(async ({ context, input }) => { + const { service, projectTrusted } = await resolveWorkflowContext( + context, + input.workspaceId + ); + await service.resumeCrashedRuns({ workspaceId: input.workspaceId, projectTrusted }); + return service.listRuns({ workspaceId: input.workspaceId }); + }), + getRun: t + .input(schemas.workflows.getRun.input) + .output(schemas.workflows.getRun.output) + .handler(async ({ context, input }) => { + const { service } = await resolveWorkflowContext(context, input.workspaceId); + return service.getRun({ workspaceId: input.workspaceId, runId: input.runId }); + }), + interrupt: t + .input(schemas.workflows.interrupt.input) + .output(schemas.workflows.interrupt.output) + .handler(async ({ context, input }) => { + const { service } = await resolveWorkflowContext(context, input.workspaceId); + return service.interruptRun({ workspaceId: input.workspaceId, runId: input.runId }); + }), + resume: t + .input(schemas.workflows.resume.input) + .output(schemas.workflows.resume.output) + .handler(async ({ context, input }) => { + const { service, projectTrusted } = await resolveWorkflowContext( + context, + input.workspaceId + ); + return service.resumeRunInBackground({ + workspaceId: input.workspaceId, + runId: input.runId, + projectTrusted, + }); + }), + promoteScratchDefinition: t + .input(schemas.workflows.promoteScratchDefinition.input) + .output(schemas.workflows.promoteScratchDefinition.output) + .handler(async ({ context, input }) => { + const { service, projectTrusted } = await resolveWorkflowContext( + context, + input.workspaceId + ); + return service.promoteScratchDefinition({ + workspaceId: input.workspaceId, + name: input.name, + description: input.description, + location: input.location, + overwrite: input.overwrite ?? false, + projectTrusted, + }); + }), + promoteScratch: t + .input(schemas.workflows.promoteScratch.input) + .output(schemas.workflows.promoteScratch.output) + .handler(async ({ context, input }) => { + const { service, projectTrusted } = await resolveWorkflowContext( + context, + input.workspaceId + ); + return service.promoteScratchWorkflow({ + workspaceId: input.workspaceId, + runId: input.runId, + name: input.name, + description: input.description, + location: input.location, + overwrite: input.overwrite ?? false, + projectTrusted, + }); + }), + start: t + .input(schemas.workflows.start.input) + .output(schemas.workflows.start.output) + .handler(async ({ context, input, signal }) => { + assertDynamicWorkflowsEnabled(context); + let invocationMessagePersisted: boolean | undefined; + let resolveInvocationPersistence: (persisted: boolean) => void = () => undefined; + const invocationPersistence = new Promise((resolve) => { + resolveInvocationPersistence = resolve; + }); + const rawCommandForContinuation = input.rawCommand; + const continuationOptions = input.continuationOptions; + const onBackgroundRunTerminal = + rawCommandForContinuation != null && continuationOptions != null + ? async ({ runId, status, result, run }: WorkflowBackgroundRunTerminalEvent) => { + const persistedInvocation = + invocationMessagePersisted === true ? true : await invocationPersistence; + if (persistedInvocation !== true) { + log.warn("Skipping slash workflow continuation without persisted invocation", { + workspaceId: input.workspaceId, + runId, + }); + return; + } + const invocationCurrent = + await context.workspaceService.isWorkflowInvocationCurrent( + input.workspaceId, + runId + ); + if (!invocationCurrent) { + log.debug("Skipping superseded slash workflow continuation", { + workspaceId: input.workspaceId, + runId, + }); + return; + } + const commandPrefix = rawCommandForContinuation.split(/\s+/u)[0] ?? input.name; + const workflowResultMessage = buildWorkflowResultContextMessage({ + rawCommand: rawCommandForContinuation, + name: input.name, + runId, + status, + result, + run, + }); + const sendResult = await context.workspaceService.sendMessage( + input.workspaceId, + workflowResultMessage, + { + ...continuationOptions, + skipAiSettingsPersistence: true, + muxMetadata: { + type: WORKFLOW_RESULT_METADATA_TYPE, + rawCommand: rawCommandForContinuation, + commandPrefix, + runId, + requestedModel: continuationOptions.model, + }, + }, + { + skipAutoResumeReset: true, + synthetic: true, + agentInitiated: true, + requireIdle: true, + startStreamInBackground: true, + } + ); + if (!sendResult.success) { + log.warn("Failed to continue slash workflow after completion", { + workspaceId: input.workspaceId, + runId, + error: sendResult.error, + }); + } + } + : undefined; + const { service, projectTrusted } = await resolveWorkflowContext( + context, + input.workspaceId, + { + ...(onBackgroundRunTerminal != null ? { onBackgroundRunTerminal } : {}), + } + ); + const workflowStartArgs = { + name: input.name, + workspaceId: input.workspaceId, + projectTrusted, + args: input.args ?? {}, + }; + const persistInvocation = async (details: { + runId: string; + status: string; + result: unknown; + run?: NonNullable>>; + }) => { + assert(input.rawCommand != null, "Workflow invocation persistence requires rawCommand"); + try { + invocationMessagePersisted = + await context.workspaceService.appendWorkflowRunInvocation({ + workspaceId: input.workspaceId, + rawCommand: input.rawCommand, + name: input.name, + args: workflowStartArgs.args, + runId: details.runId, + status: details.status, + result: details.result, + ...(details.run != null ? { run: details.run } : {}), + }); + } finally { + resolveInvocationPersistence(invocationMessagePersisted === true); + } + }; + const result = + input.runInBackground === true + ? await service.startNamedWorkflowInBackground({ + ...workflowStartArgs, + ...(input.rawCommand != null + ? { onBackgroundRunCreated: persistInvocation } + : {}), + }) + : await service.startNamedWorkflow({ ...workflowStartArgs, abortSignal: signal }); + if (input.rawCommand == null) { + return result; + } + if (input.runInBackground !== true) { + const run = await service.getRun({ + workspaceId: input.workspaceId, + runId: result.runId, + }); + await persistInvocation({ + runId: result.runId, + status: result.status, + result: result.result, + ...(run != null ? { run } : {}), + }); + } + return { ...result, invocationMessagePersisted }; + }), + }, providers: { list: t .input(schemas.providers.list.input) diff --git a/src/node/services/agentSession.goalAutoPause.test.ts b/src/node/services/agentSession.goalAutoPause.test.ts index 2ee2f1c8b3..d3b7e37676 100644 --- a/src/node/services/agentSession.goalAutoPause.test.ts +++ b/src/node/services/agentSession.goalAutoPause.test.ts @@ -639,7 +639,10 @@ describe("AgentSession goal safety hooks", () => { }); expect(result.success).toBe(true); - await new Promise((resolve) => setTimeout(resolve, 50)); + await waitForCondition( + async () => (await goalService.getGoal(workspaceId))?.status === "complete", + { timeoutMs: 5_000 } + ); expect(await goalService.getGoal(workspaceId)).toMatchObject({ status: "complete", completionSummary: "All wrapped up.", diff --git a/src/node/services/agentSkills/agentSkillsService.test.ts b/src/node/services/agentSkills/agentSkillsService.test.ts index 54e2af3b00..1cc695a2cc 100644 --- a/src/node/services/agentSkills/agentSkillsService.test.ts +++ b/src/node/services/agentSkills/agentSkillsService.test.ts @@ -257,6 +257,7 @@ describe("agentSkillsService", () => { "mux-docs", "orchestrate", "spawn", + "workflow-authoring", ]); const foo = skills.find((s) => s.name === "foo"); @@ -664,6 +665,7 @@ describe("agentSkillsService", () => { "mux-docs", "orchestrate", "spawn", + "workflow-authoring", ]); const invalidNames = diagnostics.invalidSkills.map((issue) => issue.directoryName).sort(); diff --git a/src/node/services/agentSkills/builtInSkillContent.generated.ts b/src/node/services/agentSkills/builtInSkillContent.generated.ts index 7171e1d1b9..1e773bf8dd 100644 --- a/src/node/services/agentSkills/builtInSkillContent.generated.ts +++ b/src/node/services/agentSkills/builtInSkillContent.generated.ts @@ -4026,12 +4026,15 @@ export const BUILTIN_SKILL_FILES: Record> = { "", "", "
    ", - "agent_report (2)", + "agent_report (5)", "", - "| Env var | JSON path | Type | Description |", - "| -------------------------------- | ---------------- | ------ | ----------- |", - "| `MUX_TOOL_INPUT_REPORT_MARKDOWN` | `reportMarkdown` | string | — |", - "| `MUX_TOOL_INPUT_TITLE` | `title` | string | — |", + "| Env var | JSON path | Type | Description |", + "| --------------------------------------- | ---------------------- | ------- | ------------------------------------------------------------------------------------ |", + "| `MUX_TOOL_INPUT_REPORT_MARKDOWN` | `reportMarkdown` | string | — |", + "| `MUX_TOOL_INPUT_REPORT_MARKDOWN_PATH` | `reportMarkdownPath` | string | Path to the markdown report file, usually report.md in the workspace root |", + "| `MUX_TOOL_INPUT_STRUCTURED_OUTPUT` | `structuredOutput` | unknown | — |", + "| `MUX_TOOL_INPUT_STRUCTURED_OUTPUT_PATH` | `structuredOutputPath` | string | Path to a JSON file containing the structured output, usually structured-output.json |", + "| `MUX_TOOL_INPUT_TITLE` | `title` | string | — |", "", "
    ", "", @@ -4432,8 +4435,8 @@ export const BUILTIN_SKILL_FILES: Record> = { "| `MUX_TOOL_INPUT_FILTER` | `filter` | string | Optional regex to filter bash task output lines. By default, only matching lines are returned. When filter_exclude is true, matching lines are excluded instead. Non-matching lines are discarded and cannot be retrieved later. |", "| `MUX_TOOL_INPUT_FILTER_EXCLUDE` | `filter_exclude` | boolean | When true, lines matching 'filter' are excluded instead of kept. Requires 'filter' to be set. |", "| `MUX_TOOL_INPUT_MIN_COMPLETED` | `min_completed` | number | Number of awaited tasks that must complete before this call returns. Defaults to 1, so by default task_await returns as soon as the FIRST awaited task completes, letting you act on it while the rest keep running. The result still includes every task complete at that moment plus current status (running/queued) for the rest. Tasks that have not yet completed keep running and remain re-awaitable on a later task_await call. Raise this (e.g. set it to the total number of awaited tasks) when you genuinely need more before proceeding — for example best-of-N synthesis that must compare every candidate. Clamped to the number of awaited tasks; values above that behave like 'wait for all'. |", - "| `MUX_TOOL_INPUT_TASK_IDS_` | `task_ids[]` | string | List of task IDs to await — use only real IDs returned by prior task, bash, or task_list tool results; never fabricate an ID. When omitted, waits for all active descendant tasks of the current workspace. |", - "| `MUX_TOOL_INPUT_TASK_IDS_COUNT` | `task_ids.length` | number | Number of elements in task_ids (List of task IDs to await — use only real IDs returned by prior task, bash, or task_list tool results; never fabricate an ID. When omitted, waits for all active descendant tasks of the current workspace.) |", + "| `MUX_TOOL_INPUT_TASK_IDS_` | `task_ids[]` | string | List of task IDs or workflow run IDs to await — use only real IDs returned by prior task, bash, workflow_run, or task_list tool results; never fabricate an ID. When omitted, waits for all active descendant tasks and workflow runs of the current workspace. |", + "| `MUX_TOOL_INPUT_TASK_IDS_COUNT` | `task_ids.length` | number | Number of elements in task_ids (List of task IDs or workflow run IDs to await — use only real IDs returned by prior task, bash, workflow_run, or task_list tool results; never fabricate an ID. When omitted, waits for all active descendant tasks and workflow runs of the current workspace.) |", "| `MUX_TOOL_INPUT_TIMEOUT_SECS` | `timeout_secs` | number | Maximum time to wait in seconds for each task. For bash tasks, this waits for NEW output (or process exit). If exceeded, the result returns status=queued\\|running\\|awaiting_report (task is still active). Defaults to 600 seconds (10 minutes) if not specified. Set to 0 for a non-blocking status check. |", "", "", @@ -4478,6 +4481,26 @@ export const BUILTIN_SKILL_FILES: Record> = { "", "", "", + "
    ", + "workflow_read (1)", + "", + "| Env var | JSON path | Type | Description |", + "| --------------------- | --------- | ------ | ----------- |", + "| `MUX_TOOL_INPUT_NAME` | `name` | string | — |", + "", + "
    ", + "", + "
    ", + "workflow_run (3)", + "", + "| Env var | JSON path | Type | Description |", + "| ---------------------------------- | ------------------- | ------- | ----------- |", + "| `MUX_TOOL_INPUT_ARGS` | `args` | unknown | — |", + "| `MUX_TOOL_INPUT_NAME` | `name` | string | — |", + "| `MUX_TOOL_INPUT_RUN_IN_BACKGROUND` | `run_in_background` | boolean | — |", + "", + "
    ", + "", "{/* END TOOL_HOOK_ENV_VARS */}", "", "", @@ -6643,4 +6666,292 @@ export const BUILTIN_SKILL_FILES: Record> = { "", ].join("\n"), }, + "workflow-authoring": { + "SKILL.md": [ + "---", + "name: workflow-authoring", + "description: Author durable JavaScript workflows for repeatable multi-agent orchestration", + "---", + "", + "# Workflow Authoring", + "", + "Use this skill **before writing or editing a workflow definition**. Workflows are durable JavaScript conductors that coordinate sub-agent tasks, validate structured reports, and preserve run state for replay/resume.", + "", + "## When to use a workflow", + "", + "Prefer a workflow when the task is a repeatable orchestration pattern, especially when it needs several of these:", + "", + "- Multiple phases with clear progress reporting (`phase`, `log`).", + "- Parallel sub-agent fan-out with stable roles or lanes.", + "- Structured output validation from sub-agents.", + "- Adversarial verification / cross-checking of candidate findings.", + "- Durable state so completed work is reused after resume/restart.", + "- A reusable slash-invokable process, like deep research or deep review.", + "", + "Do **not** create a workflow for a small one-off edit, a single simple investigation, or work that needs the conductor itself to run shell/filesystem/network operations. The conductor is intentionally limited; delegate those operations to sub-agents.", + "", + "## Before authoring", + "", + "1. Run `workflow_list` to see existing workflows.", + "2. If an existing workflow is close, run `workflow_read({ name })` and adapt the pattern.", + "3. For one-off drafts, write a scratch workflow:", + "", + " ```text", + " .mux/workflows/.scratch/.js", + " ```", + "", + "4. Use normal file tools (`file_read`, `file_edit_insert`, `file_edit_replace_string`) to author the JavaScript.", + "5. Run it by name with `workflow_run`.", + "", + "Scratch workflows must include a description header and a default exported function:", + "", + "```js", + "// description: Short workflow description", + "export default function workflow({ args, phase, log, agent, parallelAgents, applyPatch }) {", + ' phase("scope", { input: args.input });', + ' return { reportMarkdown: "Done" };', + "}", + "```", + "", + "Reusable project workflows live in `.mux/workflows/.js`; global workflows live in `~/.mux/workflows/.js`. Project and scratch workflows require Project Trust.", + "", + "## Available workflow globals", + "", + "A workflow default export receives one object:", + "", + "```js", + "export default function workflow({ args, phase, log, agent, parallelAgents, applyPatch }) {}", + "```", + "", + "### `args`", + "", + 'The invocation payload from `workflow_run`. Plain-text slash args are passed as `{ input: "..." }`, so normalize `args.input` for commands like:', + "", + "```text", + "/workflow my-workflow review PR #123", + "```", + "", + "### `phase(name, details?)`", + "", + "Records a durable phase event shown in the run card.", + "", + "```js", + 'phase("adversarial-verification", { candidateCount: issues.length });', + "```", + "", + "### `log(message, data?)`", + "", + "Records lightweight progress/details.", + "", + "```js", + 'log("Selected lanes", { lanes });', + "```", + "", + "### `agent(spec)`", + "", + "Runs one workflow-owned sub-agent and waits for its final report.", + "", + "Required fields:", + "", + "- `id`: stable step ID used for replay; never derive from unstable ordering unless the input ordering is stable.", + "- `prompt`: child task prompt.", + "", + "Optional fields:", + "", + "- `title`: UI title.", + "- `agentId`: sub-agent type/id; defaults to the workflow adapter default (usually `explore`).", + "- `outputSchema`: JSON Schema subset used to validate `structuredOutput`.", + "", + "Returns:", + "", + "```ts", + "{", + " taskId: string,", + " reportMarkdown: string,", + " title?: string,", + " structuredOutput?: unknown", + "}", + "```", + "", + "`taskId` is a host-issued patch artifact handle for workflow-owned child tasks. Pass the whole agent result as `applyPatch({ source: result })` instead of inventing task IDs.", + "", + "### `applyPatch(spec)`", + "", + "Applies a workflow-owned child task's git patch artifact to the current parent workspace. The host always dry-runs first in a temporary worktree and only performs the real apply when the dry-run succeeds. The conductor never receives raw patch text and cannot apply arbitrary patches.", + "", + "Required fields:", + "", + "- `id`: stable replay ID for this mutation step.", + "- `source` (or `from`): an `agent(...)` result, a `parallelAgents(...)` item, or a workflow-owned `taskId` string.", + "", + "Optional fields:", + "", + '- `target`: currently only `"parent"`; this is where the existing task patch artifact is applied.', + "- `projectPath` / `project_path`: limit a multi-project patch artifact to one project.", + "- `threeWay` / `three_way`: defaults to `true` and maps to `git am --3way`.", + "- `force`: allow re-apply / dirty-tree behavior exactly like `task_apply_git_patch`.", + '- `onConflict`: currently only `"return"`.', + "", + "Returns structured status instead of throwing on ordinary patch conflicts:", + "", + "```ts", + "{", + " success: boolean,", + ' status: "applied" | "conflict" | "failed",', + " taskId: string,", + " projectResults?: unknown,", + " conflictPaths?: string[],", + " failedPatchSubject?: string,", + " error?: string,", + " note?: string", + "}", + "```", + "", + "Conflict resolution should follow the old Orchestrator pattern: spawn a dedicated `exec` resolver, include the failing `taskId`, tell it to call `task_apply_git_patch` in its own workspace, resolve `git am` conflicts, commit the resolved result, report, and then call `applyPatch` on the resolver result.", + "", + "```js", + "const implementation = agent({", + ' id: "implement-auth-fix",', + ' agentId: "exec",', + " prompt: execBrief,", + "});", + "", + "let applied = applyPatch({", + ' id: "apply-auth-fix",', + " source: implementation,", + ' target: "parent",', + ' onConflict: "return",', + "});", + "", + 'if (applied.status === "conflict") {', + " const resolver = agent({", + ' id: "resolve-auth-fix-conflict",', + ' agentId: "exec",', + " prompt: buildResolverBrief(applied),", + " });", + ' applied = applyPatch({ id: "apply-resolved-auth-fix", source: resolver });', + "}", + "```", + "", + "### `parallelAgents(specs)`", + "", + "Runs multiple `agent` specs concurrently and returns results in input order. Use this for review lanes, source summarization, claim verification, or other independent slices.", + "", + "```js", + "const laneResults = parallelAgents(", + " lanes.map((lane) => ({", + " id: `review-${lane}`,", + " title: `Review ${lane}`,", + " prompt: lanePrompt(lane),", + " outputSchema: issueListSchema(),", + " }))", + ");", + "```", + "", + "## Structured output schemas", + "", + "`outputSchema` supports this JSON Schema subset:", + "", + "- `type`", + "- `properties`", + "- `required`", + "- `items`", + "- `additionalProperties`", + "- `enum`", + "", + "Keep schemas small and strict. Use `additionalProperties: false` for deterministic outputs.", + "", + "```js", + "function issueListSchema() {", + " return {", + ' type: "object",', + ' required: ["issues"],', + " additionalProperties: false,", + " properties: {", + " issues: {", + ' type: "array",', + " items: {", + ' type: "object",', + ' required: ["title", "severity", "filePaths", "evidence"],', + " additionalProperties: false,", + " properties: {", + ' title: { type: "string" },', + ' severity: { type: "string", enum: ["P0", "P1", "P2", "P3", "P4"] },', + ' filePaths: { type: "array", items: { type: "string" } },', + ' evidence: { type: "string" },', + " },", + " },", + " },", + " },", + " };", + "}", + "```", + "", + "## Replay rules and gotchas", + "", + "- Every `agent` / `parallelAgents` item and every `applyPatch` call must have a stable `id`.", + "- The replay key includes the step ID and normalized spec, so changing prompts, schemas, patch source IDs, or apply options creates new work.", + "- `applyPatch` is a durable mutation effect: completed apply/conflict/failed results are replayed from the journal and are not re-applied on resume.", + "- The workflow conductor cannot call general tools, import modules, access Node, run shell, read files, use timers, or rely on `Date`/`Math.random`.", + "- Put shell/filesystem/web investigation inside delegated sub-agent prompts.", + "- Cap model-produced fan-out before calling `parallelAgents`.", + "- Return `{ reportMarkdown, structuredOutput }` so the parent agent and UI both get useful output.", + "", + "## Minimal pattern", + "", + "```js", + "// description: Review a change with parallel lanes and verification", + "export default function workflow({ args, phase, log, agent, parallelAgents }) {", + " const target = normalizeTarget(args);", + "", + ' phase("scope", { target });', + " const scope = agent({", + ' id: "scope",', + ' title: "Scope work",', + ' prompt: "Identify review lanes for: " + target,', + " outputSchema: {", + ' type: "object",', + ' required: ["lanes"],', + " additionalProperties: false,", + ' properties: { lanes: { type: "array", items: { type: "string" } } },', + " },", + " });", + "", + " const lanes = scope.structuredOutput.lanes.slice(0, 6);", + ' log("Running lanes", { lanes });', + "", + ' phase("lane-review", { lanes });', + " const reviews = parallelAgents(", + " lanes.map(function (lane) {", + " return {", + ' id: "review-" + lane,', + ' title: "Review " + lane,', + ' prompt: "Review " + target + " for " + lane + " issues.",', + " outputSchema: issueListSchema(),", + " };", + " })", + " );", + "", + ' phase("final-synthesis", { reviewCount: reviews.length });', + " const final = agent({", + ' id: "synthesize",', + ' title: "Synthesize result",', + ' prompt: "Synthesize these structured review outputs: " + JSON.stringify(reviews),', + " });", + "", + " return { reportMarkdown: final.reportMarkdown };", + "}", + "", + "function normalizeTarget(args) {", + ' if (typeof args === "string" && args.trim()) return args.trim();', + ' if (args && typeof args === "object") {', + ' if (typeof args.target === "string" && args.target.trim()) return args.target.trim();', + ' if (typeof args.input === "string" && args.input.trim()) return args.input.trim();', + " }", + ' return "current workspace";', + "}", + "```", + "", + ].join("\n"), + }, }; diff --git a/src/node/services/aiService.test.ts b/src/node/services/aiService.test.ts index 6d77aa22e9..7d066fee50 100644 --- a/src/node/services/aiService.test.ts +++ b/src/node/services/aiService.test.ts @@ -37,6 +37,7 @@ import type { CodexOauthService } from "@/node/services/codexOauthService"; import { MULTI_PROJECT_CONFIG_KEY } from "@/common/constants/multiProject"; import { CODEX_ENDPOINT } from "@/common/constants/codexOAuth"; +import { buildWorkflowRunCardMessage } from "@/common/utils/workflowRunMessages"; import type { LanguageModel, Tool } from "ai"; import { createMuxMessage } from "@/common/types/message"; import type { ModelMessage, MuxMessage } from "@/common/types/message"; @@ -434,6 +435,61 @@ describe("prepareProviderRequestMessages", () => { "next-user", ]); }); + + it("filters workflow display rows while keeping provider-visible workflow results", () => { + const trigger = createMuxMessage("workflow-command", "user", "/shallow-review mux", { + historySequence: 1, + muxMetadata: { + type: "workflow-trigger-display", + rawCommand: "/shallow-review mux", + commandPrefix: "/shallow-review", + runId: "wfr_1", + }, + }); + const card = buildWorkflowRunCardMessage( + { name: "shallow-review", args: { input: "mux" } }, + { runId: "wfr_1", status: "running", result: null }, + 2 + ); + card.metadata = { + historySequence: 2, + synthetic: true, + uiVisible: true, + muxMetadata: { type: "workflow-run-card-display", runId: "wfr_1" }, + }; + const result = createMuxMessage( + "workflow-result", + "user", + "/shallow-review mux\n\n{}", + { + historySequence: 3, + muxMetadata: { + type: "workflow-result", + rawCommand: "/shallow-review mux", + commandPrefix: "/shallow-review", + runId: "wfr_1", + }, + } + ); + const nextUser = createMuxMessage("next-user", "user", "continue normal work", { + historySequence: 4, + }); + + const prepared = prepareProviderRequestMessages( + [trigger, card, result, nextUser], + "openai", + "off" + ); + + expect(prepared.activeContextMessages.map((message) => message.id)).toEqual([ + "workflow-result", + "next-user", + ]); + expect(prepared.providerRequestMessages.map((message) => message.id)).toEqual([ + "workflow-result", + "next-user", + ]); + }); }); describe("AIService", () => { diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index a3d168ddbf..79653382b5 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -1,3 +1,4 @@ +import * as path from "node:path"; import * as fs from "fs/promises"; import { EventEmitter } from "events"; @@ -119,6 +120,20 @@ import { import { applyToolPolicyAndExperiments, captureMcpToolTelemetry } from "./toolAssembly"; import { getErrorMessage } from "@/common/utils/errors"; import { filterSideQuestionMessages } from "@/common/utils/messages/sideQuestion"; +import { + WORKFLOW_RESULT_METADATA_TYPE, + buildWorkflowResultContextMessage, + filterWorkflowDisplayOnlyMessages, +} from "@/common/utils/workflowRunMessages"; +import { QuickJSRuntimeFactory } from "@/node/services/ptc/quickjsRuntime"; +import { + shouldUseRuntimeWorkflowProjectIO, + WorkflowDefinitionStore, +} from "@/node/services/workflows/WorkflowDefinitionStore"; +import { WorkflowRunStore } from "@/node/services/workflows/WorkflowRunStore"; +import { WorkflowService } from "@/node/services/workflows/WorkflowService"; +import { WorkflowTaskServiceAdapter } from "@/node/services/workflows/WorkflowTaskServiceAdapter"; +import { resolveWorkflowScratchRoots } from "@/node/services/workflows/workflowScratchRoots"; import { isProjectTrusted } from "@/node/utils/projectTrust"; const STREAM_STARTUP_DIAGNOSTIC_THRESHOLD_MS = 1_000; @@ -133,16 +148,18 @@ export function prepareProviderRequestMessages( sideQuestionFilteredCount: number; contextBoundarySlicedCount: number; } { - // /btw side questions are durable UI history, not main-agent context. - // Filter them before boundary slicing so future normal turns don't see - // side-question Q/A pairs and accidentally continue from an aside. + // /btw side questions and workflow display rows are durable UI history, not main-agent context. + // Filter them before boundary slicing so future normal turns don't see UI-only artifacts. const messagesWithoutSideQuestions = filterSideQuestionMessages(messages); + const messagesWithoutWorkflowDisplay = filterWorkflowDisplayOnlyMessages( + messagesWithoutSideQuestions + ); const sideQuestionFilteredCount = messages.length - messagesWithoutSideQuestions.length; const activeContextMessages = sliceMessagesForProviderFromLatestContextBoundary( - messagesWithoutSideQuestions + messagesWithoutWorkflowDisplay ); const contextBoundarySlicedCount = - messagesWithoutSideQuestions.length - activeContextMessages.length; + messagesWithoutWorkflowDisplay.length - activeContextMessages.length; const preserveReasoningOnly = canonicalProviderName === "anthropic" && effectiveThinkingLevel !== "off"; return { @@ -301,6 +318,23 @@ function derivePromptCacheScope(metadata: WorkspaceMetadata): string { return `${metadata.projectName}-${uniqueSuffix([metadata.projectPath])}`; } +interface WorkflowResultContinuationSender { + isWorkflowInvocationCurrent(workspaceId: string, runId: string): Promise; + sendMessage( + workspaceId: string, + message: string, + options: SendMessageOptions, + internal?: { + skipAutoResumeReset?: boolean; + synthetic?: boolean; + agentInitiated?: boolean; + /** When true, reject instead of queueing if the workspace is busy. */ + requireIdle?: boolean; + startStreamInBackground?: boolean; + } + ): Promise>; +} + export class AIService extends EventEmitter { private readonly streamManager: StreamManager; private readonly historyService: HistoryService; @@ -345,6 +379,7 @@ export class AIService extends EventEmitter { private lastLlmRequestByWorkspace = new Map(); private taskService?: TaskService; private extraTools?: Record; + private workflowResultContinuationSender?: WorkflowResultContinuationSender; private analyticsService?: { executeRawQuery(sql: string): Promise }; private desktopSessionManager?: DesktopSessionManager; @@ -412,6 +447,10 @@ export class AIService extends EventEmitter { this.taskService = taskService; } + setWorkflowResultContinuationSender(sender: WorkflowResultContinuationSender): void { + this.workflowResultContinuationSender = sender; + } + setAnalyticsService(service: { executeRawQuery(sql: string): Promise }): void { this.analyticsService = service; } @@ -1129,6 +1168,12 @@ export class AIService extends EventEmitter { const advisorExperimentEnabled = experiments?.advisorTool ?? this.experimentsService?.isExperimentEnabled(EXPERIMENT_IDS.ADVISOR_TOOL) === true; + const dynamicWorkflowsExperimentEnabled = + experiments?.dynamicWorkflows ?? + this.experimentsService?.isExperimentEnabled(EXPERIMENT_IDS.DYNAMIC_WORKFLOWS) === true; + const subagentFileReportsExperimentEnabled = + experiments?.subagentFileReports ?? + this.experimentsService?.isExperimentEnabled(EXPERIMENT_IDS.SUBAGENT_FILE_REPORTS) === true; emitStartupBreadcrumb("loading_workspace_context"); const resolveAgentForStreamStartedAt = Date.now(); const agentResult = await resolveAgentForStream({ @@ -1473,17 +1518,137 @@ export class AIService extends EventEmitter { advisorModelString, cfg.advisorThinkingLevel ?? THINKING_LEVEL_OFF ); - const muxEnv = getMuxEnv( - metadata.projectPath, - getRuntimeType(metadata.runtimeConfig), - metadata.name, - { - workspaceId, - modelString, - thinkingLevel: thinkingLevel ?? "off", - costsUsd: sessionCostsUsd, - } - ); + const runtimeType = getRuntimeType(metadata.runtimeConfig); + const useRuntimeProjectWorkflowIO = shouldUseRuntimeWorkflowProjectIO(runtimeType); + const workflowScratchRoots = resolveWorkflowScratchRoots(this.config, workspaceId, { + workspaceRootPath: workspacePath, + normalizePath: runtime.normalizePath.bind(runtime), + }); + const muxEnv = getMuxEnv(metadata.projectPath, runtimeType, metadata.name, { + workspaceId, + modelString, + thinkingLevel: thinkingLevel ?? "off", + costsUsd: sessionCostsUsd, + }); + + const workflowService = + dynamicWorkflowsExperimentEnabled && this.taskService != null + ? new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ + projectRoot: runtime.normalizePath(".mux/workflows", workspacePath), + globalRoot: path.join(this.config.rootDir, "workflows"), + scratchRoot: workflowScratchRoots.scratchRoot, + projectRuntime: useRuntimeProjectWorkflowIO ? runtime : undefined, + projectCwd: useRuntimeProjectWorkflowIO ? workspacePath : undefined, + }), + runStore: new WorkflowRunStore({ + sessionDir: this.config.getSessionDir(workspaceId), + }), + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapterFactory: (runId) => + new WorkflowTaskServiceAdapter({ + taskService: this.taskService!, + parentWorkspaceId: workspaceId, + workflowRunId: runId, + defaultAgentId: "explore", + patchToolConfig: { + workspaceId, + cwd: workspacePath, + runtime, + runtimeTempDir, + workspaceSessionDir: this.config.getSessionDir(workspaceId), + trusted: isProjectTrusted(this.config, metadata.projectPath), + }, + getProjectTrusted: () => isProjectTrusted(this.config, metadata.projectPath), + experiments: { + ...experiments, + dynamicWorkflows: dynamicWorkflowsExperimentEnabled, + subagentFileReports: subagentFileReportsExperimentEnabled, + }, + }), + // Background workflow tools outlive the model turn that started them. Feed the + // terminal result back as a hidden user turn so the parent agent continues + // instead of leaving the user staring at the workflow report payload. + onBackgroundRunTerminal: async ({ runId, status, result, run }) => { + const continuationSender = this.workflowResultContinuationSender; + if (continuationSender == null) { + log.warn("Workflow completed but no continuation sender is configured", { + workspaceId, + runId, + }); + return; + } + + let invocationCurrent = await continuationSender.isWorkflowInvocationCurrent( + workspaceId, + runId + ); + while (!invocationCurrent && this.isStreaming(workspaceId)) { + await new Promise((resolve) => setTimeout(resolve, 1_000)); + invocationCurrent = await continuationSender.isWorkflowInvocationCurrent( + workspaceId, + runId + ); + } + if (!invocationCurrent) { + log.debug("Skipping superseded workflow continuation", { workspaceId, runId }); + return; + } + + const rawCommand = `workflow_run ${run.definition.name}`; + const workflowResultMessage = buildWorkflowResultContextMessage({ + rawCommand, + name: run.definition.name, + runId, + status, + result, + run, + }); + const sendResult = await continuationSender.sendMessage( + workspaceId, + workflowResultMessage, + { + model: modelString, + thinkingLevel: effectiveThinkingLevel, + agentId: effectiveAgentId, + toolPolicy: effectiveToolPolicy, + additionalSystemInstructions: scratchpadAdditionalSystemInstructions, + maxOutputTokens, + providerOptions: effectiveMuxProviderOptions, + experiments: { + ...experiments, + dynamicWorkflows: dynamicWorkflowsExperimentEnabled, + subagentFileReports: subagentFileReportsExperimentEnabled, + }, + skipAiSettingsPersistence: true, + muxMetadata: { + type: WORKFLOW_RESULT_METADATA_TYPE, + rawCommand, + commandPrefix: "workflow_run", + runId, + requestedModel: modelString, + }, + }, + { + skipAutoResumeReset: true, + synthetic: true, + agentInitiated: true, + requireIdle: true, + startStreamInBackground: true, + } + ); + if (!sendResult.success) { + log.warn("Failed to continue agent after workflow completion", { + workspaceId, + runId, + error: sendResult.error, + }); + } + }, + getCurrentProjectTrusted: () => isProjectTrusted(this.config, metadata.projectPath), + runnerId: `workflow-runner:${workspaceId}`, + }) + : undefined; // Create assistant message ID early so tool-side usage reporting and nested tool events // stay scoped to this specific assistant turn. The placeholder is appended to history below @@ -1576,10 +1741,14 @@ export class AIService extends EventEmitter { ancestorPlanFilePaths, workspaceId, muxScope, + workflowService, goalService: workspaceGoalService, enableGoalTools: goalToolAvailability, // Only child workspaces (tasks) can report to a parent. enableAgentReport: Boolean(metadata.parentWorkspaceId), + workflowAgentOutputSchema: metadata.workflowTask?.outputSchema, + subagentReportFiles: + subagentFileReportsExperimentEnabled && metadata.parentWorkspaceId != null, // External edit detection callback recordFileState, reportModelUsage: (event) => { @@ -1654,8 +1823,12 @@ export class AIService extends EventEmitter { taskService: this.taskService, analyticsService: this.analyticsService, desktopSessionManager: this.desktopSessionManager, - // Experiments for inheritance to subagents. - experiments, + // Experiments for inheritance to subagents and workflow tool gating. + experiments: { + ...experiments, + dynamicWorkflows: dynamicWorkflowsExperimentEnabled, + subagentFileReports: subagentFileReportsExperimentEnabled, + }, // Dynamic context for tool descriptions (moved from system prompt for better model attention) availableSubagents: agentDefinitions, availableSkills, diff --git a/src/node/services/coreServices.ts b/src/node/services/coreServices.ts index 980f5a2f3a..23300e0f90 100644 --- a/src/node/services/coreServices.ts +++ b/src/node/services/coreServices.ts @@ -127,6 +127,7 @@ export function createCoreServices(opts: CoreServicesOptions): CoreServices { opts.sessionTimingService, opts.opResolver ); + aiService.setWorkflowResultContinuationSender(workspaceService); workspaceService.setMCPServerManager(mcpServerManager); workspaceService.setWorkspaceGoalService(workspaceGoalService); workspaceGoalService.setOnActivityChange((workspaceId, snapshot) => { diff --git a/src/node/services/ptc/quickjsRuntime.test.ts b/src/node/services/ptc/quickjsRuntime.test.ts index 26ad335230..2b65bca485 100644 --- a/src/node/services/ptc/quickjsRuntime.test.ts +++ b/src/node/services/ptc/quickjsRuntime.test.ts @@ -51,6 +51,12 @@ describe("QuickJSRuntime", () => { expect(result.result).toBeNull(); }); + it("resolves returned promises", async () => { + const result = await runtime.eval("return (async () => ({ ok: true }))();"); + expect(result.success).toBe(true); + expect(result.result).toEqual({ ok: true }); + }); + it("handles syntax errors", async () => { const result = await runtime.eval("return {{{;"); expect(result.success).toBe(false); @@ -64,7 +70,7 @@ describe("QuickJSRuntime", () => { }); // Note: With asyncify, async host functions appear SYNC to QuickJS. - // Native JS await/Promise is not supported - use sync calls to host functions. + // Call host functions directly unless the evaluated code intentionally returns a Promise. it("handles multiple statements", async () => { const result = await runtime.eval(` const x = 10; diff --git a/src/node/services/ptc/quickjsRuntime.ts b/src/node/services/ptc/quickjsRuntime.ts index 0e5a981fa2..422ebb4bfb 100644 --- a/src/node/services/ptc/quickjsRuntime.ts +++ b/src/node/services/ptc/quickjsRuntime.ts @@ -305,14 +305,22 @@ export class QuickJSRuntime implements IJSRuntime { }; } - // With asyncify, evalCodeAsync suspends until async host functions complete. - // The result is already resolved - no need to resolve the promise. - const value: unknown = this.ctx.dump(evalResult.value) as unknown; + const resolvedValue = this.resolveReturnedValue(evalResult.value, deadline, timeoutMs); evalResult.value.dispose(); + if (!resolvedValue.success) { + return { + success: false, + error: resolvedValue.error, + toolCalls: this.toolCalls, + consoleOutput: this.consoleOutput, + duration_ms: Date.now() - execStartTime, + }; + } + return { success: true, - result: value, + result: resolvedValue.value, toolCalls: this.toolCalls, consoleOutput: this.consoleOutput, duration_ms: Date.now() - execStartTime, @@ -361,6 +369,44 @@ export class QuickJSRuntime implements IJSRuntime { } } + private resolveReturnedValue( + handle: QuickJSHandle, + deadline: number, + timeoutMs: number + ): { success: true; value: unknown } | { success: false; error: string } { + let promiseState = this.ctx.getPromiseState(handle); + while (promiseState.type === "pending" && this.ctx.runtime.hasPendingJob()) { + const pendingJobs = this.ctx.runtime.executePendingJobs(); + if (pendingJobs.error) { + const errorObj: unknown = pendingJobs.error.context.dump(pendingJobs.error) as unknown; + const error = this.getErrorMessage(errorObj, deadline, timeoutMs); + pendingJobs.dispose(); + return { success: false, error }; + } + pendingJobs.dispose(); + promiseState = this.ctx.getPromiseState(handle); + } + + if (promiseState.type === "pending") { + return { success: false, error: "Execution returned a pending Promise" }; + } + if (promiseState.type === "rejected") { + const errorObj: unknown = this.ctx.dump(promiseState.error) as unknown; + promiseState.error.dispose(); + return { success: false, error: this.getErrorMessage(errorObj, deadline, timeoutMs) }; + } + + try { + const valueHandle = promiseState.notAPromise ? handle : promiseState.value; + const value: unknown = this.ctx.dump(valueHandle) as unknown; + return { success: true, value }; + } finally { + if (!promiseState.notAPromise) { + promiseState.value.dispose(); + } + } + } + /** * Format a QuickJS error object into a readable error message. */ diff --git a/src/node/services/ptc/typeValidator.test.ts b/src/node/services/ptc/typeValidator.test.ts index b1c837cf59..cbe2b87232 100644 --- a/src/node/services/ptc/typeValidator.test.ts +++ b/src/node/services/ptc/typeValidator.test.ts @@ -1,7 +1,11 @@ +import * as fs from "node:fs/promises"; +import * as path from "node:path"; + import { describe, test, expect, beforeAll } from "bun:test"; import { z } from "zod"; import type { Tool } from "ai"; -import { validateTypes } from "./typeValidator"; +import { DisposableTempDir } from "@/node/services/tempDir"; +import { findBundledTypeScriptLibDir, validateTypes } from "./typeValidator"; import { generateMuxTypes } from "./typeGenerator"; /** @@ -40,6 +44,17 @@ describe("validateTypes", () => { muxTypes = await generateMuxTypes(tools); }); + test("finds bundled TypeScript libs from Docker server bundle layout", async () => { + using tmp = new DisposableTempDir("type-validator"); + const runtimeDir = path.join(tmp.path, "dist", "runtime"); + const libDir = path.join(tmp.path, "dist", "typescript-lib"); + await fs.mkdir(runtimeDir, { recursive: true }); + await fs.mkdir(libDir, { recursive: true }); + await fs.writeFile(path.join(libDir, "lib.es2023.d.ts.txt"), ""); + + expect(findBundledTypeScriptLibDir(runtimeDir)).toBe(libDir); + }); + test("accepts valid code with correct property names", () => { const result = validateTypes( ` diff --git a/src/node/services/ptc/typeValidator.ts b/src/node/services/ptc/typeValidator.ts index 30bae51318..46f946c407 100644 --- a/src/node/services/ptc/typeValidator.ts +++ b/src/node/services/ptc/typeValidator.ts @@ -19,13 +19,13 @@ import ts from "typescript"; * because electron-builder ignores .d.ts files by default (hardcoded, cannot override): * https://github.com/electron-userland/electron-builder/issues/5064 * - * These constants are computed once at module load time. + * These constants are computed once at module load time. The Docker server bundle runs from + * dist/runtime, while the unbundled Electron/main build runs from dist/node/services/ptc, so + * probe both relative layouts before falling back to the TypeScript package in development. */ -const BUNDLED_LIB_DIR = path.resolve(__dirname, "../../../typescript-lib"); -const IS_PRODUCTION = fs.existsSync(path.join(BUNDLED_LIB_DIR, "lib.es2023.d.ts.txt")); -const LIB_DIR = IS_PRODUCTION - ? BUNDLED_LIB_DIR - : path.dirname(require.resolve("typescript/lib/lib.d.ts")); +const BUNDLED_LIB_DIR = findBundledTypeScriptLibDir(__dirname); +const IS_PRODUCTION = BUNDLED_LIB_DIR != null; +const LIB_DIR = BUNDLED_LIB_DIR ?? path.dirname(require.resolve("typescript/lib/lib.d.ts")); export const WRAPPER_PREFIX = "function __agent__() {\n"; const MUX_TYPES_FILE = "mux.d.ts"; @@ -151,6 +151,20 @@ function createProgramForCode( return { program, host, getSourceFile: () => sourceFile, setSourceFile }; } +function hasBundledTypeScriptLib(dir: string): boolean { + return fs.existsSync(path.join(dir, toProductionLibName("lib.es2023.d.ts"))); +} + +export function findBundledTypeScriptLibDir(baseDir: string): string | null { + const candidates = [ + // Unbundled main/Electron build: dist/node/services/ptc -> dist/typescript-lib. + path.resolve(baseDir, "../../../typescript-lib"), + // Docker server bundle: dist/runtime -> dist/typescript-lib. + path.resolve(baseDir, "../typescript-lib"), + ]; + return candidates.find(hasBundledTypeScriptLib) ?? null; +} + /** Convert lib filename for production: lib.X.d.ts → lib.X.d.ts.txt */ function toProductionLibName(fileName: string): string { return fileName + ".txt"; diff --git a/src/node/services/serviceContainer.ts b/src/node/services/serviceContainer.ts index 9ae48bdec0..1b19818376 100644 --- a/src/node/services/serviceContainer.ts +++ b/src/node/services/serviceContainer.ts @@ -57,6 +57,7 @@ import { createCoderUnarchiveHook, } from "@/node/runtime/coderLifecycleHooks"; import { createWorktreeArchiveHook } from "@/node/runtime/worktreeLifecycleHooks"; +import { QuickJSRuntimeFactory } from "@/node/services/ptc/quickjsRuntime"; import { setGlobalCoderService } from "@/node/runtime/runtimeFactory"; import { setSshPromptService } from "@/node/runtime/sshConnectionPool"; import { setSshPromptService as setSSH2SshPromptService } from "@/node/runtime/SSH2ConnectionPool"; @@ -79,6 +80,7 @@ import type { ExternalSecretResolver } from "@/common/types/secrets"; * Services are accessed via the ORPC context object. */ export class ServiceContainer { + public readonly workflowRuntimeFactory = new QuickJSRuntimeFactory(); public readonly config: Config; // Core services — instantiated by createCoreServices (shared with `mux run` CLI) private readonly historyService: CoreServices["historyService"]; @@ -484,6 +486,7 @@ export class ServiceContainer { const resolveOnePasswordService = () => this.onePasswordService; return { + workflowRuntimeFactory: this.workflowRuntimeFactory, config: this.config, aiService: this.aiService, projectService: this.projectService, diff --git a/src/node/services/subagentReportArtifacts.test.ts b/src/node/services/subagentReportArtifacts.test.ts index 8c43642a0b..b7c07debc1 100644 --- a/src/node/services/subagentReportArtifacts.test.ts +++ b/src/node/services/subagentReportArtifacts.test.ts @@ -4,6 +4,7 @@ import * as os from "os"; import * as path from "path"; import { + readSubagentReportArtifact, readSubagentReportArtifactsFile, upsertSubagentReportArtifact, } from "@/node/services/subagentReportArtifacts"; @@ -41,4 +42,25 @@ describe("subagentReportArtifacts", () => { expect(entry).toBeDefined(); expect(entry?.reportTokenEstimate).toBe(100); }); + + test("upsertSubagentReportArtifact preserves structured output", async () => { + const workspaceId = "parent-1"; + const childTaskId = "child-structured"; + const structuredOutput = { claims: ["durable"], confidence: 0.8 }; + + await upsertSubagentReportArtifact({ + workspaceId, + workspaceSessionDir: testDir, + childTaskId, + parentWorkspaceId: workspaceId, + ancestorWorkspaceIds: [workspaceId], + reportMarkdown: "structured report", + structuredOutput, + nowMs: Date.now(), + }); + + const artifact = await readSubagentReportArtifact(testDir, childTaskId); + + expect(artifact?.structuredOutput).toEqual(structuredOutput); + }); }); diff --git a/src/node/services/subagentReportArtifacts.ts b/src/node/services/subagentReportArtifacts.ts index 2bd20a9810..54da7b326e 100644 --- a/src/node/services/subagentReportArtifacts.ts +++ b/src/node/services/subagentReportArtifacts.ts @@ -26,6 +26,7 @@ export interface SubagentReportArtifactIndexEntry { title?: string; /** Full ancestor chain (parent first). Used for descendant scope checks after cleanup. */ ancestorWorkspaceIds: string[]; + structuredOutput?: unknown; /** Estimated token count of delivered report markdown (~4 chars/token). */ reportTokenEstimate?: number; } @@ -135,6 +136,7 @@ export async function readSubagentReportArtifact( thinkingLevel?: unknown; title?: unknown; ancestorWorkspaceIds?: unknown; + structuredOutput?: unknown; reportMarkdown?: unknown; }; @@ -159,6 +161,7 @@ export async function readSubagentReportArtifact( : undefined, thinkingLevel: coerceThinkingLevel(meta.thinkingLevel), title: title ?? meta.title, + structuredOutput: obj.structuredOutput, reportMarkdown, }; } @@ -185,6 +188,7 @@ export async function readSubagentReportArtifact( thinkingLevel, title, ancestorWorkspaceIds, + structuredOutput: obj.structuredOutput, reportMarkdown, }; } catch (error) { @@ -230,6 +234,7 @@ export async function upsertSubagentReportArtifact(params: { model?: string; /** Task-level thinking/reasoning level used when running the sub-agent (optional for legacy entries). */ thinkingLevel?: ThinkingLevel; + structuredOutput?: unknown; title?: string; nowMs?: number; }): Promise { @@ -267,6 +272,7 @@ export async function upsertSubagentReportArtifact(params: { thinkingLevel, title: params.title, ancestorWorkspaceIds: params.ancestorWorkspaceIds, + structuredOutput: params.structuredOutput, reportMarkdown: params.reportMarkdown, }, null, @@ -290,6 +296,7 @@ export async function upsertSubagentReportArtifact(params: { model, thinkingLevel, title: params.title, + structuredOutput: params.structuredOutput, ancestorWorkspaceIds: params.ancestorWorkspaceIds, }; updated.reportTokenEstimate = Math.ceil( diff --git a/src/node/services/taskService.test.ts b/src/node/services/taskService.test.ts index 13294c4a8f..fe497e98e2 100644 --- a/src/node/services/taskService.test.ts +++ b/src/node/services/taskService.test.ts @@ -1340,6 +1340,107 @@ describe("TaskService", () => { expect(childEntry?.taskThinkingLevel).toBe("medium"); }, 20_000); + test("appends file-backed report instructions to ordinary subagent prompts", async () => { + const config = await createTestConfig(rootDir); + stubStableIds(config, ["aaaaaaaaaa"], "bbbbbbbbbb"); + + const projectPath = await createTestProject(rootDir, "repo", { initGit: false }); + const parentId = "1111111111"; + await saveWorkspaces( + config, + projectPath, + [ + { + path: projectPath, + id: parentId, + name: "parent", + createdAt: new Date().toISOString(), + runtimeConfig: { type: "local" }, + aiSettings: { model: "openai:gpt-5.2", thinkingLevel: "medium" }, + }, + ], + testTaskSettings() + ); + const { workspaceService, sendMessage } = createWorkspaceServiceMocks(); + const { taskService } = createTaskServiceHarness(config, { workspaceService }); + + const created = await createAgentTask(taskService, parentId, "do the thing", { + experiments: { subagentFileReports: true }, + }); + + expect(created.success).toBe(true); + expect(sendMessage).toHaveBeenCalledWith( + "aaaaaaaaaa", + expect.any(String), + expect.objectContaining({ experiments: { subagentFileReports: true } }), + expect.anything() + ); + const sentPrompt = (sendMessage as unknown as { mock: { calls: Array<[string, string]> } }).mock + .calls[0]?.[1]; + assert(typeof sentPrompt === "string", "sendMessage prompt is required"); + expect(sentPrompt.startsWith("do the thing")).toBe(true); + expect(sentPrompt).toContain("report.md"); + expect(sentPrompt).toContain("agent_report"); + expect(sentPrompt).toContain("reportMarkdownPath"); + expect(sentPrompt).toContain("structuredOutputPath"); + expect(sentPrompt).toContain("title"); + expect(sentPrompt).not.toContain("structured-output.json"); + }, 20_000); + + test("passes workflow output schema through file-backed report instructions", async () => { + const config = await createTestConfig(rootDir); + stubStableIds(config, ["aaaaaaaaaa"], "bbbbbbbbbb"); + + const projectPath = await createTestProject(rootDir, "repo", { initGit: false }); + const parentId = "1111111111"; + await saveWorkspaces( + config, + projectPath, + [ + { + path: projectPath, + id: parentId, + name: "parent", + createdAt: new Date().toISOString(), + runtimeConfig: { type: "local" }, + aiSettings: { model: "openai:gpt-5.2", thinkingLevel: "medium" }, + }, + ], + testTaskSettings() + ); + const { workspaceService, sendMessage } = createWorkspaceServiceMocks(); + const { taskService } = createTaskServiceHarness(config, { workspaceService }); + const outputSchema = { + type: "object", + required: ["claims"], + properties: { + claims: { type: "array", items: { type: "string" } }, + }, + }; + + const created = await createAgentTask(taskService, parentId, "collect claims", { + experiments: { subagentFileReports: true }, + workflowTask: { + runId: "wfr_123", + stepId: "collect-claims", + outputSchema, + }, + }); + + expect(created.success).toBe(true); + const sentPrompt = (sendMessage as unknown as { mock: { calls: Array<[string, string]> } }).mock + .calls[0]?.[1]; + assert(typeof sentPrompt === "string", "sendMessage prompt is required"); + const schemaStart = sentPrompt.indexOf("{"); + const schemaEnd = sentPrompt.lastIndexOf("}"); + assert( + schemaStart >= 0 && schemaEnd > schemaStart, + "file-report prompt must include a JSON schema" + ); + expect(JSON.parse(sentPrompt.slice(schemaStart, schemaEnd + 1))).toEqual(outputSchema); + expect(sentPrompt).toContain("structured-output.json"); + }, 20_000); + test("inherits parent model + thinking when target agent has no global defaults", async () => { const config = await createTestConfig(rootDir); stubStableIds(config, ["aaaaaaaaaa"], "bbbbbbbbbb"); @@ -2052,6 +2153,98 @@ describe("TaskService", () => { ); }, 20_000); + test("Task.create persists workflow task metadata for report validation", async () => { + const config = await createTestConfig(rootDir); + stubStableIds(config, ["taskflow01"]); + const { parentId } = await saveLocalParentWorkspace(config, rootDir); + const { taskService } = createTaskServiceHarness(config); + + const outputSchema = { + type: "object", + required: ["claims"], + properties: { claims: { type: "array", items: { type: "string" } } }, + additionalProperties: false, + }; + + const result = await createAgentTask(taskService, parentId, "extract claims", { + workflowTask: { + runId: "wfr_123", + stepId: "claims", + outputSchema, + }, + }); + + expect(result.success).toBe(true); + const task = findWorkspaceInConfig(config, "taskflow01"); + expect(task?.workflowTask).toEqual({ + runId: "wfr_123", + stepId: "claims", + outputSchema, + }); + }); + + test("TaskService extracts file-backed agent_report payloads from tool output", async () => { + const config = await createTestConfig(rootDir); + const { taskService } = createTaskServiceHarness(config); + const reportReader = taskService as unknown as { + findAgentReportArgsInParts(parts: readonly unknown[]): { + reportMarkdown: string; + title?: string; + structuredOutput?: unknown; + } | null; + }; + + const report = reportReader.findAgentReportArgsInParts([ + { + type: "dynamic-tool", + toolName: "agent_report", + state: "output-available", + input: { reportMarkdownPath: "report.md", structuredOutputPath: "structured-output.json" }, + output: { + success: true, + report: { + reportMarkdown: "# Done", + title: "Done", + structuredOutput: { claims: ["durable"] }, + }, + }, + }, + ]); + + expect(report).toEqual({ + reportMarkdown: "# Done", + title: "Done", + structuredOutput: { claims: ["durable"] }, + }); + }); + + test("TaskService preserves null structuredOutput from inline agent_report args", async () => { + const config = await createTestConfig(rootDir); + const { taskService } = createTaskServiceHarness(config); + const reportReader = taskService as unknown as { + findAgentReportArgsInParts(parts: readonly unknown[]): { + reportMarkdown: string; + title?: string; + structuredOutput?: unknown; + } | null; + }; + + const report = reportReader.findAgentReportArgsInParts([ + { + type: "dynamic-tool", + toolName: "agent_report", + state: "output-available", + input: { reportMarkdown: "# Done", structuredOutput: null, title: null }, + output: { success: true }, + }, + ]); + + expect(report).toEqual({ + reportMarkdown: "# Done", + structuredOutput: null, + }); + }); + test("created task metadata is not recomputed after defaults change", async () => { const config = await createTestConfig(rootDir); stubStableIds(config, ["aaaaaaaaaa"], "bbbbbbbbbb"); @@ -2139,6 +2332,49 @@ describe("TaskService", () => { ); }); + test("does not auto-resume a parent for workflow-owned descendants", async () => { + const config = await createTestConfig(rootDir); + + const projectPath = path.join(rootDir, "repo"); + const rootWorkspaceId = "root-111"; + const workflowTaskId = "task-workflow"; + const workflowChildTaskId = "task-workflow-child"; + + await saveWorkspaces( + config, + projectPath, + [ + projectWorkspace(projectPath, "root", rootWorkspaceId), + projectWorkspace(projectPath, "workflow-task", workflowTaskId, { + parentWorkspaceId: rootWorkspaceId, + agentType: "exec", + taskStatus: "running", + workflowTask: { runId: "wfr_target", stepId: "scope" }, + }), + projectWorkspace(projectPath, "workflow-child", workflowChildTaskId, { + parentWorkspaceId: workflowTaskId, + agentType: "explore", + taskStatus: "running", + }), + ], + testTaskSettings() + ); + + const { aiService } = createAIServiceMocks(config); + const { workspaceService, sendMessage } = createWorkspaceServiceMocks(); + const { taskService } = createTaskServiceHarness(config, { aiService, workspaceService }); + + await handleTaskServiceStreamEndForTest(taskService, { + type: "stream-end", + workspaceId: rootWorkspaceId, + messageId: "assistant-root", + metadata: { model: "openai:gpt-5.2" }, + parts: [], + }); + + expect(sendMessage).not.toHaveBeenCalled(); + }); + test("does not auto-resume a parent while a follow-up turn is already queued or preparing", async () => { const config = await createTestConfig(rootDir); @@ -2806,7 +3042,11 @@ describe("TaskService", () => { type: "dynamic-tool", toolCallId: "agent-report-call-1", toolName: "agent_report", - input: { reportMarkdown: "Hello from child", title: "Result" }, + input: { + reportMarkdown: "Hello from child", + title: "Result", + structuredOutput: { claims: ["fast handoff"] }, + }, state: "output-available", output: { success: true }, }, @@ -2814,14 +3054,83 @@ describe("TaskService", () => { }); expect(sendMessage).toHaveBeenCalledTimes(1); + const handoffPrompt = (sendMessage as unknown as { mock: { calls: Array<[string, string]> } }) + .mock.calls[0]?.[1]; + assert(typeof handoffPrompt === "string", "tasks-completed handoff prompt is required"); + expect(handoffPrompt).toContain("structured outputs"); + expect(handoffPrompt).not.toContain("task_await"); expect(sendMessage).toHaveBeenCalledWith( parentWorkspaceId, - expect.stringContaining("sub-agent task(s) have completed"), + expect.stringContaining("Background sub-agent task(s) have completed"), expect.objectContaining({ agentId: "plan", }), expect.objectContaining({ skipAutoResumeReset: true, synthetic: true }) ); + + const parentHistory = await collectFullHistory(historyService, parentWorkspaceId); + const serializedParentHistory = JSON.stringify(parentHistory); + expect(serializedParentHistory).toContain(""); + expect(serializedParentHistory).toContain(""); + expect(serializedParentHistory).toContain("claims"); + }); + + test("workflow-owned child reports do not trigger generic parent handoff", async () => { + const config = await createTestConfig(rootDir); + + const projectPath = path.join(rootDir, "repo"); + const parentWorkspaceId = "parent-workflow-report"; + const childTaskId = "task-workflow-report"; + + await saveWorkspaces( + config, + projectPath, + [ + projectWorkspace(projectPath, "parent", parentWorkspaceId, { + aiSettings: { model: "openai:gpt-5.2", thinkingLevel: "medium" }, + }), + projectWorkspace(projectPath, "workflow-child", childTaskId, { + parentWorkspaceId, + agentType: "explore", + taskStatus: "running", + taskModelString: "openai:gpt-5.2", + taskThinkingLevel: "medium", + workflowTask: { runId: "wfr_report_handoff", stepId: "collect" }, + }), + ], + testTaskSettings() + ); + + const { aiService } = createAIServiceMocks(config); + const { workspaceService, sendMessage } = createWorkspaceServiceMocks(); + const { historyService, taskService } = createTaskServiceHarness(config, { + aiService, + workspaceService, + }); + + await handleTaskServiceStreamEndForTest(taskService, { + type: "stream-end", + workspaceId: childTaskId, + messageId: "assistant-workflow-child-output", + metadata: { model: "openai:gpt-5.2" }, + parts: [ + { + type: "dynamic-tool", + toolCallId: "agent-report-call-1", + toolName: "agent_report", + input: { + reportMarkdown: "Workflow step report", + title: "Workflow Step", + }, + state: "output-available", + output: { success: true }, + }, + ], + }); + + expect(sendMessage).not.toHaveBeenCalled(); + const parentHistory = await collectFullHistory(historyService, parentWorkspaceId); + expect(JSON.stringify(parentHistory)).not.toContain(""); }); test("foreground waiter suppresses tasks-completed auto-resume notification", async () => { @@ -2887,7 +3196,7 @@ describe("TaskService", () => { expect(sendMessage).not.toHaveBeenCalledWith( parentWorkspaceId, - expect.stringContaining("background sub-agent task(s) have completed"), + expect.stringContaining("task(s) have completed"), expect.anything(), expect.anything() ); @@ -3117,6 +3426,107 @@ describe("TaskService", () => { expect(childTask?.taskStatus).toBe("interrupted"); }); + test("terminateAllDescendantAgentTasks can scope interrupts to one workflow run", async () => { + const config = await createTestConfig(rootDir); + const projectPath = path.join(rootDir, "repo"); + const rootWorkspaceId = "root-111"; + const workflowTaskId = "task-workflow"; + const workflowChildTaskId = "task-workflow-child"; + const otherTaskId = "task-other"; + + await saveWorkspaces( + config, + projectPath, + [ + projectWorkspace(projectPath, "root", rootWorkspaceId), + projectWorkspace(projectPath, "workflow-task", workflowTaskId, { + parentWorkspaceId: rootWorkspaceId, + agentType: "exec", + taskStatus: "running", + workflowTask: { runId: "wfr_target", stepId: "scope" }, + }), + projectWorkspace(projectPath, "workflow-child", workflowChildTaskId, { + parentWorkspaceId: workflowTaskId, + agentType: "explore", + taskStatus: "running", + }), + projectWorkspace(projectPath, "other-task", otherTaskId, { + parentWorkspaceId: rootWorkspaceId, + agentType: "exec", + taskStatus: "running", + workflowTask: { runId: "wfr_other", stepId: "scope" }, + }), + ], + testTaskSettings() + ); + + const { aiService } = createAIServiceMocks(config); + const { workspaceService } = createWorkspaceServiceMocks(); + const { taskService } = createTaskServiceHarness(config, { aiService, workspaceService }); + + const interruptedTaskIds = await taskService.terminateAllDescendantAgentTasks(rootWorkspaceId, { + workflowRunId: "wfr_target", + }); + + expect(interruptedTaskIds).toEqual([workflowChildTaskId, workflowTaskId]); + const saved = config.loadConfigOrDefault(); + const tasks = saved.projects.get(projectPath)?.workspaces ?? []; + expect(tasks.find((workspace) => workspace.id === workflowTaskId)?.taskStatus).toBe( + "interrupted" + ); + expect(tasks.find((workspace) => workspace.id === workflowChildTaskId)?.taskStatus).toBe( + "interrupted" + ); + expect(tasks.find((workspace) => workspace.id === otherTaskId)?.taskStatus).toBe("running"); + }); + + test("listActiveDescendantAgentTaskIds can exclude workflow-owned descendants", async () => { + const config = await createTestConfig(rootDir); + const projectPath = path.join(rootDir, "repo"); + const rootWorkspaceId = "root-111"; + const workflowTaskId = "task-workflow"; + const workflowChildTaskId = "task-workflow-child"; + const regularTaskId = "task-regular"; + + await saveWorkspaces( + config, + projectPath, + [ + projectWorkspace(projectPath, "root", rootWorkspaceId), + projectWorkspace(projectPath, "workflow-task", workflowTaskId, { + parentWorkspaceId: rootWorkspaceId, + agentType: "exec", + taskStatus: "running", + workflowTask: { runId: "wfr_target", stepId: "scope" }, + }), + projectWorkspace(projectPath, "workflow-child", workflowChildTaskId, { + parentWorkspaceId: workflowTaskId, + agentType: "explore", + taskStatus: "running", + }), + projectWorkspace(projectPath, "regular-task", regularTaskId, { + parentWorkspaceId: rootWorkspaceId, + agentType: "exec", + taskStatus: "running", + }), + ], + testTaskSettings() + ); + + const { aiService } = createAIServiceMocks(config); + const { workspaceService } = createWorkspaceServiceMocks(); + const { taskService } = createTaskServiceHarness(config, { aiService, workspaceService }); + + expect(new Set(taskService.listActiveDescendantAgentTaskIds(rootWorkspaceId))).toEqual( + new Set([regularTaskId, workflowChildTaskId, workflowTaskId]) + ); + expect( + taskService.listActiveDescendantAgentTaskIds(rootWorkspaceId, { + excludeWorkflowTasks: true, + }) + ).toEqual([regularTaskId]); + }); + test("terminateAllDescendantAgentTasks preserves already-completed descendants", async () => { const config = await createTestConfig(rootDir); @@ -4401,7 +4811,11 @@ describe("TaskService", () => { type: "dynamic-tool", toolCallId: "agent-report-call-1", toolName: "agent_report", - input: { reportMarkdown: "Hello from child", title: "Result" }, + input: { + reportMarkdown: "Hello from child", + title: "Result", + structuredOutput: { claims: ["durable"] }, + }, state: "output-available", output: { success: true }, }, @@ -4460,8 +4874,19 @@ describe("TaskService", () => { expect.objectContaining({ workspaceId: childId }) ); + const reportArtifact = await readSubagentReportArtifact( + config.getSessionDir(parentId), + childId + ); + expect(reportArtifact?.structuredOutput).toEqual({ claims: ["durable"] }); + expect(remove).toHaveBeenCalledTimes(1); expect(remove).toHaveBeenCalledWith(childId, true); + const childReportHandoffPrompt = ( + sendMessage as unknown as { mock: { calls: Array<[string, string]> } } + ).mock.calls[0]?.[1]; + assert(typeof childReportHandoffPrompt === "string", "child report handoff prompt is required"); + expect(childReportHandoffPrompt).not.toContain("task_await"); expect(sendMessage).toHaveBeenCalledWith( parentId, expect.stringContaining("sub-agent task(s) have completed"), @@ -5905,6 +6330,11 @@ describe("TaskService", () => { expect(remove).toHaveBeenCalledTimes(1); expect(remove).toHaveBeenCalledWith(childId, true); + const fallbackHandoffPrompt = ( + sendMessageMock as unknown as { mock: { calls: Array<[string, string]> } } + ).mock.calls[0]?.[1]; + assert(typeof fallbackHandoffPrompt === "string", "fallback handoff prompt is required"); + expect(fallbackHandoffPrompt).not.toContain("task_await"); expect(sendMessageMock).toHaveBeenCalledWith( parentId, expect.stringContaining("sub-agent task(s) have completed"), @@ -7130,6 +7560,7 @@ describe("TaskService", () => { ancestorWorkspaceIds: [parentId], reportMarkdown: "Report from child one", title: "Option one", + structuredOutput: { score: 1 }, nowMs: Date.now(), }); @@ -7158,6 +7589,8 @@ describe("TaskService", () => { const serializedParentHistory = JSON.stringify(parentHistory); expect(serializedParentHistory).toContain(""); expect(serializedParentHistory).toContain("Report from child one"); + expect(serializedParentHistory).toContain(""); + expect(serializedParentHistory).toContain("score"); expect( serializedParentHistory.match( /child-best-of-concurrent-deferred-fallback-1<\/task_id>/g diff --git a/src/node/services/taskService.ts b/src/node/services/taskService.ts index e240da89c0..36581dcf1a 100644 --- a/src/node/services/taskService.ts +++ b/src/node/services/taskService.ts @@ -67,7 +67,8 @@ import type { ThinkingLevel } from "@/common/types/thinking"; import type { ErrorEvent, StreamEndEvent } from "@/common/types/stream"; import { isDynamicToolPart, type DynamicToolPart } from "@/common/types/toolParts"; import { - AgentReportToolArgsSchema, + AgentReportInlineToolArgsSchema, + AgentReportSubmittedReportSchema, TaskToolResultSchema, TaskToolArgsSchema, } from "@/common/utils/tools/toolDefinitions"; @@ -133,15 +134,127 @@ export interface TaskCreateArgs { kind?: TaskGroupKind; label?: string; }; + workflowTask?: { + runId: string; + stepId: string; + outputSchema?: unknown; + }; /** Experiments to inherit to subagent */ experiments?: { programmaticToolCalling?: boolean; programmaticToolCallingExclusive?: boolean; advisorTool?: boolean; execSubagentHardRestart?: boolean; + dynamicWorkflows?: boolean; + subagentFileReports?: boolean; }; } +function appendSubagentFileReportInstructions( + prompt: string, + workflowTask: TaskCreateArgs["workflowTask"] +): string { + assert(prompt.trim().length > 0, "appendSubagentFileReportInstructions requires prompt"); + const outputSchema = workflowTask?.outputSchema; + let schemaInstruction = ""; + if (outputSchema !== undefined) { + const schemaJson = JSON.stringify(outputSchema, null, 2); + assert( + schemaJson !== undefined, + "appendSubagentFileReportInstructions requires JSON output schema" + ); + schemaInstruction = [ + "Write the required structured output as valid JSON to `structured-output.json`.", + // File-backed report mode only exposes file paths in the tool schema, so the prompt must carry + // the workflow output contract that inline `agent_report` arguments would otherwise describe. + "The structured output must match this JSON Schema:", + "```json", + schemaJson, + "```", + ].join("\n"); + } + + return [ + prompt, + "Subagent file-backed report mode is enabled for this task. Before reporting, create or update `report.md` in the workspace root with your final markdown report.", + schemaInstruction, + "When complete, call agent_report with `reportMarkdownPath: null`, `structuredOutputPath: null`, and `title: null` so Mux uses the default report files.", + ] + .filter((instruction) => instruction.length > 0) + .join("\n\n"); +} + +function stringifyStructuredOutputForSubagentReport(structuredOutput: unknown): string { + const json = JSON.stringify(structuredOutput, null, 2); + assert( + json !== undefined, + "stringifyStructuredOutputForSubagentReport requires JSON-serializable structured output" + ); + return json; +} + +function formatSubagentReportUserMessage(params: { + childWorkspaceId: string; + agentType: string; + title: string; + reportMarkdown: string; + structuredOutput?: unknown; +}): string { + assert(params.childWorkspaceId.length > 0, "subagent report message requires child id"); + assert(params.agentType.length > 0, "subagent report message requires agent type"); + assert(params.title.length > 0, "subagent report message requires title"); + assert(params.reportMarkdown.length > 0, "subagent report message requires markdown"); + + const lines = [ + "", + `${params.childWorkspaceId}`, + `${params.agentType}`, + `${params.title}`, + "", + params.reportMarkdown, + "", + ]; + + if (params.structuredOutput !== undefined) { + lines.push( + "", + "```json", + stringifyStructuredOutputForSubagentReport(params.structuredOutput), + "```", + "" + ); + } + + lines.push(""); + return lines.join("\n"); +} + +// Completed background reports are already persisted into the parent context; asking the parent +// to call task_await burns an extra model/tool turn before it can synthesize the final answer. +const COMPLETED_BACKGROUND_SUBAGENT_HANDOFF_PROMPT = + "Background sub-agent task(s) have completed. Their accepted reports and any structured outputs " + + "are already injected into this workspace context as task tool results or synthetic user report " + + "messages. Write the final response now, integrating those results. If a required report appears " + + "missing, explain the missing context instead of waiting for another handoff."; + +function getTaskCompletionInstruction(params: { + completionToolName: "agent_report" | "propose_plan"; + subagentFileReports: boolean; +}): string { + if (params.completionToolName === "propose_plan") { + return "Call propose_plan exactly once now. Base it only on the planning work already completed in this workspace."; + } + + if (params.subagentFileReports) { + return ( + "Create or update report.md with your final report, then call agent_report exactly once now with reportMarkdownPath, structuredOutputPath, and title all set to null. " + + "Base it only on the work already completed in this workspace." + ); + } + + return "Call agent_report exactly once now with your final report. Base it only on the work already completed in this workspace."; +} + export interface TaskCreateResult { taskId: string; kind: TaskKind; @@ -185,7 +298,7 @@ interface AgentTaskIndex { interface PendingTaskWaiter { taskId: string; - resolve: (report: { reportMarkdown: string; title?: string }) => void; + resolve: (report: { reportMarkdown: string; title?: string; structuredOutput?: unknown }) => void; reject: (error: Error) => void; cleanup: () => void; requestingWorkspaceId?: string; @@ -199,6 +312,7 @@ interface PendingTaskStartWaiter { interface CompletedAgentReportCacheEntry { reportMarkdown: string; + structuredOutput?: unknown; title?: string; // Ancestor workspace IDs captured when the report was cached. // Used to keep descendant-scope checks working even if the task workspace is cleaned up. @@ -700,13 +814,15 @@ export class TaskService { isPlanLike, }); const resumeStartedAt = Date.now(); + const restartCompletionInstruction = isPlanLike + ? "When you have a final plan, call propose_plan exactly once." + : task.taskExperiments?.subagentFileReports === true + ? "When you have a final answer, create or update report.md, then call agent_report with reportMarkdownPath, structuredOutputPath, and title all set to null." + : "When you have a final answer, call agent_report exactly once."; const sendResult = await this.workspaceService.sendMessage( task.id, - isPlanLike - ? "Mux restarted while this task was running. Continue where you left off. " + - "When you have a final plan, call propose_plan exactly once." - : "Mux restarted while this task was running. Continue where you left off. " + - "When you have a final answer, call agent_report exactly once.", + "Mux restarted while this task was running. Continue where you left off. " + + restartCompletionInstruction, { model, agentId, @@ -841,10 +957,14 @@ export class TaskService { return Err("Task.create: unsupported kind"); } - const prompt = coerceNonEmptyString(args.prompt); - if (!prompt) { + const basePrompt = coerceNonEmptyString(args.prompt); + if (!basePrompt) { return Err("Task.create: prompt is required"); } + const prompt = + args.experiments?.subagentFileReports === true + ? appendSubagentFileReportInstructions(basePrompt, args.workflowTask) + : basePrompt; const agentIdRaw = coerceNonEmptyString(args.agentId ?? args.agentType); if (!agentIdRaw) { @@ -1048,6 +1168,8 @@ export class TaskService { maxParallelAgentTasks: taskSettings.maxParallelAgentTasks, shouldQueue, runtimeType: taskRuntimeConfig.type, + workflowRunId: args.workflowTask?.runId, + workflowStepId: args.workflowTask?.stepId, promptLength: prompt.length, model: taskModelString, thinkingLevel: effectiveThinkingLevel, @@ -1090,6 +1212,7 @@ export class TaskService { parentWorkspaceId, agentId, agentType, + workflowTask: args.workflowTask, bestOf: normalizedBestOf, taskStatus: "queued", taskPrompt: prompt, @@ -1207,6 +1330,7 @@ export class TaskService { agentId, parentWorkspaceId, agentType, + workflowTask: args.workflowTask, bestOf: normalizedBestOf, taskStatus: "running", taskTrunkBranch: trunkBranch, @@ -1358,7 +1482,10 @@ export class TaskService { * Legacy naming note: this method retains the original "terminate" name for * compatibility with existing call sites. */ - async terminateAllDescendantAgentTasks(workspaceId: string): Promise { + async terminateAllDescendantAgentTasks( + workspaceId: string, + options?: { workflowRunId?: string } + ): Promise { assert( workspaceId.length > 0, "terminateAllDescendantAgentTasks: workspaceId must be non-empty" @@ -1371,7 +1498,11 @@ export class TaskService { const cfg = this.config.loadConfigOrDefault(); const index = this.buildAgentTaskIndex(cfg); - const descendants = this.listDescendantAgentTaskIdsFromIndex(index, workspaceId); + const descendants = this.listDescendantAgentTaskIdsFromIndex(index, workspaceId).filter( + (taskId) => + options?.workflowRunId == null || + this.isWorkflowRunDescendant(index, taskId, options.workflowRunId) + ); if (descendants.length === 0) { return interruptedTaskIds; } @@ -1647,14 +1778,18 @@ export class TaskService { requestingWorkspaceId?: string; backgroundOnMessageQueued?: boolean; } - ): Promise<{ reportMarkdown: string; title?: string }> { + ): Promise<{ reportMarkdown: string; title?: string; structuredOutput?: unknown }> { assert(taskId.length > 0, "waitForAgentReport: taskId must be non-empty"); // Report monotonicity invariant: check the in-memory cache before any status-based // interruption handling so a finalized report stays awaitable once observed. const cached = this.completedReportsByTaskId.get(taskId); if (cached) { - return { reportMarkdown: cached.reportMarkdown, title: cached.title }; + return { + reportMarkdown: cached.reportMarkdown, + title: cached.title, + structuredOutput: cached.structuredOutput, + }; } const timeoutMs = options?.timeoutMs ?? 10 * 60 * 1000; // 10 minutes @@ -1668,6 +1803,7 @@ export class TaskService { const tryReadPersistedReport = async (): Promise<{ reportMarkdown: string; + structuredOutput?: unknown; title?: string; } | null> => { if (!requestingWorkspaceId) { @@ -1684,11 +1820,16 @@ export class TaskService { this.completedReportsByTaskId.set(taskId, { reportMarkdown: artifact.reportMarkdown, title: artifact.title, + structuredOutput: artifact.structuredOutput, ancestorWorkspaceIds: artifact.ancestorWorkspaceIds, }); this.enforceCompletedReportCacheLimit(); - return { reportMarkdown: artifact.reportMarkdown, title: artifact.title }; + return { + reportMarkdown: artifact.reportMarkdown, + title: artifact.title, + structuredOutput: artifact.structuredOutput, + }; }; // Fast-path: if the task is already gone (cleanup) or already reported (restart), return the @@ -1719,7 +1860,11 @@ export class TaskService { } } - return await new Promise<{ reportMarkdown: string; title?: string }>((resolve, reject) => { + return await new Promise<{ + reportMarkdown: string; + title?: string; + structuredOutput?: unknown; + }>((resolve, reject) => { void (async () => { // Validate existence early to avoid waiting on never-resolving task IDs. const cfg = this.config.loadConfigOrDefault(); @@ -2006,7 +2151,10 @@ export class TaskService { return this.listCompletedDescendantAgentTaskIds(index, workspaceId).length > 0; } - listActiveDescendantAgentTaskIds(workspaceId: string): string[] { + listActiveDescendantAgentTaskIds( + workspaceId: string, + options: { excludeWorkflowTasks?: boolean } = {} + ): string[] { assert( workspaceId.length > 0, "listActiveDescendantAgentTaskIds: workspaceId must be non-empty" @@ -2017,17 +2165,28 @@ export class TaskService { const activeStatuses = new Set(["queued", "running", "awaiting_report"]); const result: string[] = []; - const stack: string[] = [...(index.childrenByParent.get(workspaceId) ?? [])]; + const stack: Array<{ taskId: string; workflowOwned: boolean }> = [ + ...(index.childrenByParent.get(workspaceId) ?? []).map((taskId) => ({ + taskId, + workflowOwned: false, + })), + ]; while (stack.length > 0) { const next = stack.pop()!; - const status = index.byId.get(next)?.taskStatus; - if (status && activeStatuses.has(status)) { - result.push(next); + const entry = index.byId.get(next.taskId); + const workflowOwned = next.workflowOwned || entry?.workflowTask != null; + const status = entry?.taskStatus; + if ( + status && + activeStatuses.has(status) && + !(options.excludeWorkflowTasks && workflowOwned) + ) { + result.push(next.taskId); } - const children = index.childrenByParent.get(next); + const children = index.childrenByParent.get(next.taskId); if (children) { for (const child of children) { - stack.push(child); + stack.push({ taskId: child, workflowOwned }); } } } @@ -2170,6 +2329,22 @@ export class TaskService { return result; } + private isWorkflowRunDescendant( + index: AgentTaskIndex, + taskId: string, + workflowRunId: string + ): boolean { + let current: string | undefined = taskId; + for (let i = 0; current != null && i < 32; i++) { + const entry = index.byId.get(current); + if (entry?.workflowTask?.runId === workflowRunId) { + return true; + } + current = index.parentById.get(current); + } + return false; + } + private listCompletedDescendantAgentTaskIds( index: AgentTaskIndex, workspaceId: string @@ -2992,14 +3167,15 @@ export class TaskService { options?: { reason?: "startup" | "stream_end" | "error"; error?: Pick; + subagentFileReports?: boolean; } ): string { const completionToolLabel = completionToolName === "propose_plan" ? "propose_plan" : "agent_report"; - const completionInstruction = - completionToolName === "propose_plan" - ? "Call propose_plan exactly once now. Base it only on the planning work already completed in this workspace." - : "Call agent_report exactly once now with your final report. Base it only on the work already completed in this workspace."; + const completionInstruction = getTaskCompletionInstruction({ + completionToolName, + subagentFileReports: options?.subagentFileReports === true, + }); const noExtraWorkInstruction = completionToolName === "propose_plan" ? "Do not continue planning or call other tools." @@ -3054,11 +3230,15 @@ export class TaskService { const startedAt = Date.now(); const sendResult = await this.workspaceService.sendMessage( workspaceId, - this.buildCompletionToolRecoveryMessage(completionToolName, options), + this.buildCompletionToolRecoveryMessage(completionToolName, { + ...options, + subagentFileReports: entry.workspace.taskExperiments?.subagentFileReports === true, + }), { model, agentId, thinkingLevel: entry.workspace.taskThinkingLevel, + experiments: entry.workspace.taskExperiments, toolPolicy: [{ regex_match: `^${completionToolName}$`, action: "require" }], }, { synthetic: true, agentInitiated: true } @@ -3123,9 +3303,12 @@ export class TaskService { return; } - // Foreground waits can be backgrounded at runtime when users queue another message. - // Those task IDs are tracked in-memory and excluded from parent auto-resume nudges. - const activeTaskIds = this.listActiveDescendantAgentTaskIds(workspaceId); + // Workflow-owned descendants report through the workflow runner; parent nudges must not + // bypass that journal/final-result path by asking the model to task_await them directly. + // Foreground waits can also be backgrounded at runtime when users queue another message. + const activeTaskIds = this.listActiveDescendantAgentTaskIds(workspaceId, { + excludeWorkflowTasks: true, + }); const blockingTaskIds = activeTaskIds.filter((id) => !this.isTaskQueueBackgrounded(id)); // One-shot semantics: consume exemptions after this stream-end's decision. @@ -3309,7 +3492,7 @@ export class TaskService { private async settleInterruptedTaskAtStreamEnd( workspaceId: string, entry: { projectPath: string; workspace: WorkspaceConfigEntry }, - reportArgs: { reportMarkdown: string; title?: string } | null + reportArgs: { reportMarkdown: string; title?: string; structuredOutput?: unknown } | null ): Promise { if (reportArgs) { await this.finalizeAgentTaskReport(workspaceId, entry, reportArgs); @@ -3773,7 +3956,13 @@ export class TaskService { params.parentWorkspaceId, sibling.taskId, findWorkspaceEntry(cfg, sibling.taskId), - { reportMarkdown: artifact.reportMarkdown, title: artifact.title } + { + reportMarkdown: artifact.reportMarkdown, + ...(artifact.title !== undefined ? { title: artifact.title } : {}), + ...(artifact.structuredOutput !== undefined + ? { structuredOutput: artifact.structuredOutput } + : {}), + } ); for (const taskId of siblingCleanupTaskIds) { cleanupTaskIds.add(taskId); @@ -3846,7 +4035,7 @@ export class TaskService { private async finalizeAgentTaskReport( childWorkspaceId: string, childEntry: { projectPath: string; workspace: WorkspaceConfigEntry } | null | undefined, - reportArgs: { reportMarkdown: string; title?: string } + reportArgs: { reportMarkdown: string; title?: string; structuredOutput?: unknown } ): Promise { this.markTaskForegroundRelevant(childWorkspaceId); @@ -3901,6 +4090,8 @@ export class TaskService { return; } + const isWorkflowOwnedChildReport = latestChildEntry?.workspace.workflowTask != null; + const parentById = this.buildAgentTaskIndex(cfgAfterReport).parentById; const ancestorWorkspaceIds = this.listAncestorWorkspaceIdsUsingParentById( parentById, @@ -3923,6 +4114,7 @@ export class TaskService { model: latestChildEntry?.workspace.taskModelString, thinkingLevel: latestChildEntry?.workspace.taskThinkingLevel, title: reportArgs.title, + structuredOutput: reportArgs.structuredOutput, nowMs: persistedAtMs, }); } catch (error: unknown) { @@ -3992,6 +4184,16 @@ export class TaskService { }); } + if (isWorkflowOwnedChildReport) { + // Workflow-owned tasks report through WorkflowRunner's journal/final-result path. Do not + // also nudge the parent model with a generic background-subagent handoff. + log.debug("Skipping post-report parent auto-resume for workflow-owned child", { + parentWorkspaceId, + childWorkspaceId, + }); + return; + } + if ( !hadForegroundWaiters && !hasActiveDescendants && @@ -4004,7 +4206,7 @@ export class TaskService { ); const sendResult = await this.workspaceService.sendMessage( parentWorkspaceId, - "Your background sub-agent task(s) have completed. Use task_await to retrieve their reports and integrate the results.", + COMPLETED_BACKGROUND_SUBAGENT_HANDOFF_PROMPT, { model: resumeOptions.model, agentId: resumeOptions.agentId, @@ -4032,7 +4234,7 @@ export class TaskService { private resolveWaiters( taskId: string, - report: { reportMarkdown: string; title?: string } + report: { reportMarkdown: string; title?: string; structuredOutput?: unknown } ): boolean { this.markTaskForegroundRelevant(taskId); @@ -4043,6 +4245,7 @@ export class TaskService { this.completedReportsByTaskId.set(taskId, { reportMarkdown: report.reportMarkdown, title: report.title, + structuredOutput: report.structuredOutput, ancestorWorkspaceIds, }); this.enforceCompletedReportCacheLimit(); @@ -4125,18 +4328,34 @@ export class TaskService { private findAgentReportArgsInParts( parts: readonly unknown[] - ): { reportMarkdown: string; title?: string } | null { + ): { reportMarkdown: string; title?: string; structuredOutput?: unknown } | null { for (let i = parts.length - 1; i >= 0; i--) { const part = parts[i]; if (!isDynamicToolPart(part)) continue; if (part.toolName !== "agent_report") continue; if (part.state !== "output-available") continue; if (!isSuccessfulToolResult(part.output)) continue; - const parsed = AgentReportToolArgsSchema.safeParse(part.input); + const outputReport = AgentReportSubmittedReportSchema.safeParse( + typeof part.output === "object" && part.output !== null && "report" in part.output + ? (part.output as { report?: unknown }).report + : undefined + ); + if (outputReport.success) { + return outputReport.data; + } + + const parsed = AgentReportInlineToolArgsSchema.safeParse(part.input); if (!parsed.success) continue; // Normalize null → undefined at the schema boundary so downstream // code that expects `title?: string` doesn't need to handle null. - return { reportMarkdown: parsed.data.reportMarkdown, title: parsed.data.title ?? undefined }; + const report: { reportMarkdown: string; title?: string; structuredOutput?: unknown } = { + reportMarkdown: parsed.data.reportMarkdown, + title: parsed.data.title ?? undefined, + }; + if (Object.prototype.hasOwnProperty.call(parsed.data, "structuredOutput")) { + report.structuredOutput = parsed.data.structuredOutput; + } + return report; } return null; } @@ -4226,6 +4445,7 @@ export class TaskService { const reports: Array<{ taskId: string; reportMarkdown: string; + structuredOutput?: unknown; title?: string; agentId?: string; agentType?: string; @@ -4243,6 +4463,7 @@ export class TaskService { taskId: sibling.taskId, reportMarkdown: artifact.reportMarkdown, title: artifact.title, + structuredOutput: artifact.structuredOutput, agentId: sibling.agentId, agentType: sibling.agentType, groupKind: sibling.kind, @@ -4376,7 +4597,7 @@ export class TaskService { parentWorkspaceId: string, childWorkspaceId: string, childEntry: { projectPath: string; workspace: WorkspaceConfigEntry } | null | undefined, - report: { reportMarkdown: string; title?: string } + report: { reportMarkdown: string; title?: string; structuredOutput?: unknown } ): Promise { assert( childWorkspaceId.length > 0, @@ -4412,15 +4633,16 @@ export class TaskService { parentWorkspaceId: string, childWorkspaceId: string, childEntry: { projectPath: string; workspace: WorkspaceConfigEntry } | null | undefined, - report: { reportMarkdown: string; title?: string } + report: { reportMarkdown: string; title?: string; structuredOutput?: unknown } ): Promise { - const agentType = childEntry?.workspace.agentType ?? "agent"; + const agentType = coerceNonEmptyString(childEntry?.workspace.agentType) ?? "agent"; const output = { status: "completed" as const, taskId: childWorkspaceId, reportMarkdown: report.reportMarkdown, title: report.title, + structuredOutput: report.structuredOutput, agentType, }; const parsedOutput = TaskToolResultSchema.safeParse(output); @@ -4438,6 +4660,14 @@ export class TaskService { } } + if (childEntry?.workspace.workflowTask != null) { + log.debug("Skipping generic parent report delivery for workflow-owned child", { + parentWorkspaceId, + childWorkspaceId, + }); + return []; + } + // Restart-safe: if the parent has a pending task tool call in partial.json (interrupted stream), // finalize it with the report. Avoid rewriting persisted history to keep earlier messages immutable. if (!this.aiService.isStreaming(parentWorkspaceId)) { @@ -4476,20 +4706,22 @@ export class TaskService { // Background tasks: append a synthetic user message containing the report so earlier history // remains immutable (append-only) and prompt caches can still reuse the prefix. - const titlePrefix = report.title ?? `Subagent (${agentType}) report`; - const xml = [ - "", - `${childWorkspaceId}`, - `${agentType}`, - `${titlePrefix}`, - "", - report.reportMarkdown, - "", - "", - ].join("\n"); + const titlePrefix = + typeof report.title === "string" && report.title.trim().length > 0 + ? report.title + : `Subagent (${agentType}) report`; + const reportContent = formatSubagentReportUserMessage({ + childWorkspaceId, + agentType, + title: titlePrefix, + reportMarkdown: report.reportMarkdown, + ...(report.structuredOutput !== undefined + ? { structuredOutput: report.structuredOutput } + : {}), + }); const messageId = createTaskReportMessageId(); - const reportMessage = createMuxMessage(messageId, "user", xml, { + const reportMessage = createMuxMessage(messageId, "user", reportContent, { timestamp: Date.now(), synthetic: true, }); diff --git a/src/node/services/testDispatchHelpers.ts b/src/node/services/testDispatchHelpers.ts index b39b8ff7b0..cab0fe9de8 100644 --- a/src/node/services/testDispatchHelpers.ts +++ b/src/node/services/testDispatchHelpers.ts @@ -36,7 +36,7 @@ export async function drainPendingDispatches(): Promise { * `workspaceGoalService.test.ts` and `idleDispatcher.test.ts`. */ export async function waitForCondition( - condition: () => boolean, + condition: () => boolean | Promise, options?: { timeoutMs?: number; intervalMs?: number } ): Promise { const timeoutMs = options?.timeoutMs ?? 1_000; @@ -44,7 +44,7 @@ export async function waitForCondition( const deadline = Date.now() + timeoutMs; while (Date.now() < deadline) { - if (condition()) { + if (await condition()) { return; } await new Promise((resolve) => setTimeout(resolve, intervalMs)); diff --git a/src/node/services/tools/agent_report.test.ts b/src/node/services/tools/agent_report.test.ts index 7b4d98388b..bdaf005f25 100644 --- a/src/node/services/tools/agent_report.test.ts +++ b/src/node/services/tools/agent_report.test.ts @@ -1,4 +1,6 @@ import { describe, it, expect, mock } from "bun:test"; +import * as fs from "node:fs/promises"; +import * as path from "node:path"; import type { ToolExecutionOptions } from "ai"; import { createAgentReportTool } from "./agent_report"; @@ -36,6 +38,196 @@ describe("agent_report tool", () => { } }); + it("exposes workflow output schema directly in inline agent_report input", () => { + using tempDir = new TestTempDir("test-agent-report-tool-schema"); + const outputSchema = { + type: "object", + required: ["claims"], + properties: { claims: { type: "array", items: { type: "string" } } }, + additionalProperties: false, + }; + const tool = createAgentReportTool({ + ...createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" }), + taskService: { + hasActiveDescendantAgentTasksForWorkspace: mock(() => false), + } as unknown as TaskService, + workflowAgentOutputSchema: outputSchema, + }); + + const inputSchema = tool.inputSchema as { jsonSchema?: unknown }; + expect(inputSchema.jsonSchema).toEqual({ + type: "object", + properties: { + reportMarkdown: { type: "string", minLength: 1 }, + structuredOutput: outputSchema, + title: { anyOf: [{ type: "string" }, { type: "null" }] }, + }, + required: ["reportMarkdown", "structuredOutput", "title"], + additionalProperties: false, + }); + }); + + it("returns validation failure without finalizing when structured output does not match workflow schema", async () => { + using tempDir = new TestTempDir("test-agent-report-tool-structured-invalid"); + const baseConfig = createTestToolConfig(tempDir.path, { + workspaceId: "task-workspace", + }); + + const taskService = { + hasActiveDescendantAgentTasksForWorkspace: mock(() => false), + } as unknown as TaskService; + + const tool = createAgentReportTool({ + ...baseConfig, + taskService, + workflowAgentOutputSchema: { + type: "object", + required: ["claims"], + properties: { claims: { type: "array", items: { type: "string" } } }, + additionalProperties: false, + }, + }); + + const result: unknown = await Promise.resolve( + tool.execute!( + { reportMarkdown: "done", structuredOutput: { claims: [1] } }, + mockToolCallOptions + ) + ); + + expect(result).toEqual({ + success: false, + message: "Structured output failed schema validation.", + errors: [{ path: "$.claims[0]", message: "Expected string, got number" }], + }); + }); + + it("returns success when structured output satisfies workflow schema", async () => { + using tempDir = new TestTempDir("test-agent-report-tool-structured-ok"); + const baseConfig = createTestToolConfig(tempDir.path, { + workspaceId: "task-workspace", + }); + + const taskService = { + hasActiveDescendantAgentTasksForWorkspace: mock(() => false), + } as unknown as TaskService; + + const tool = createAgentReportTool({ + ...baseConfig, + taskService, + workflowAgentOutputSchema: { + type: "object", + required: ["claims"], + properties: { claims: { type: "array", items: { type: "string" } } }, + additionalProperties: false, + }, + }); + + const result: unknown = await Promise.resolve( + tool.execute!( + { reportMarkdown: "done", structuredOutput: { claims: ["a"] } }, + mockToolCallOptions + ) + ); + + expect(result).toEqual({ + success: true, + message: "Report submitted successfully.", + }); + }); + + it("submits a subagent file-backed report from report.md and structured-output.json", async () => { + using tempDir = new TestTempDir("test-agent-report-tool-file-backed"); + await fs.writeFile(path.join(tempDir.path, "report.md"), "# Done\n\nFindings.", "utf-8"); + await fs.writeFile( + path.join(tempDir.path, "structured-output.json"), + JSON.stringify({ claims: ["durable"] }), + "utf-8" + ); + const taskService = { + hasActiveDescendantAgentTasksForWorkspace: mock(() => false), + } as unknown as TaskService; + const tool = createAgentReportTool({ + ...createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" }), + taskService, + subagentReportFiles: true, + workflowAgentOutputSchema: { + type: "object", + required: ["claims"], + properties: { claims: { type: "array", items: { type: "string" } } }, + additionalProperties: false, + }, + }); + + const inputSchema = tool.inputSchema as { jsonSchema?: unknown }; + expect(inputSchema.jsonSchema).toEqual( + expect.objectContaining({ + required: ["reportMarkdownPath", "structuredOutputPath", "title"], + }) + ); + + const result: unknown = await Promise.resolve(tool.execute!(undefined, mockToolCallOptions)); + + expect(result).toEqual({ + success: true, + message: "Report submitted successfully.", + report: { + reportMarkdown: "# Done\n\nFindings.", + structuredOutput: { claims: ["durable"] }, + }, + }); + }); + + it("submits a subagent file-backed markdown report with empty arguments", async () => { + using tempDir = new TestTempDir("test-agent-report-tool-file-backed-empty-args"); + await fs.writeFile(path.join(tempDir.path, "report.md"), "# Done", "utf-8"); + const tool = createAgentReportTool({ + ...createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" }), + taskService: { + hasActiveDescendantAgentTasksForWorkspace: mock(() => false), + } as unknown as TaskService, + subagentReportFiles: true, + }); + + const result: unknown = await Promise.resolve(tool.execute!({}, mockToolCallOptions)); + + expect(result).toEqual({ + success: true, + message: "Report submitted successfully.", + report: { reportMarkdown: "# Done" }, + }); + }); + + it("rejects file-backed structured output that fails workflow schema validation", async () => { + using tempDir = new TestTempDir("test-agent-report-tool-file-backed-invalid"); + await fs.writeFile(path.join(tempDir.path, "report.md"), "done", "utf-8"); + await fs.writeFile( + path.join(tempDir.path, "structured-output.json"), + '{"claims":[1]}', + "utf-8" + ); + const tool = createAgentReportTool({ + ...createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" }), + taskService: { + hasActiveDescendantAgentTasksForWorkspace: mock(() => false), + } as unknown as TaskService, + subagentReportFiles: true, + workflowAgentOutputSchema: { + type: "object", + required: ["claims"], + properties: { claims: { type: "array", items: { type: "string" } } }, + }, + }); + + const result: unknown = await Promise.resolve(tool.execute!({}, mockToolCallOptions)); + + expect(result).toEqual({ + success: false, + message: "Structured output failed schema validation.", + errors: [{ path: "$.claims[0]", message: "Expected string, got number" }], + }); + }); + it("returns success when the task has no active descendants", async () => { using tempDir = new TestTempDir("test-agent-report-tool-ok"); const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" }); diff --git a/src/node/services/tools/agent_report.ts b/src/node/services/tools/agent_report.ts index 99f949a1cb..0b8e309343 100644 --- a/src/node/services/tools/agent_report.ts +++ b/src/node/services/tools/agent_report.ts @@ -1,15 +1,321 @@ -import { tool } from "ai"; +import { jsonSchema, tool } from "ai"; +import type { JSONSchema7 } from "@ai-sdk/provider"; +import { getErrorMessage } from "@/common/utils/errors"; +import { + validateJsonSchemaSubset, + validateJsonSchemaSubsetSchema, + type JsonSchemaValidationError, +} from "@/common/utils/jsonSchemaSubset"; import type { ToolConfiguration, ToolFactory } from "@/common/utils/tools/tools"; -import { TOOL_DEFINITIONS } from "@/common/utils/tools/toolDefinitions"; +import { + AgentReportFileToolArgsSchema, + AgentReportInlineToolArgsSchema, + TOOL_DEFINITIONS, +} from "@/common/utils/tools/toolDefinitions"; +import { RuntimeError } from "@/node/runtime/Runtime"; +import { readFileString } from "@/node/utils/runtime/helpers"; +import { validateFileSize, validatePathInCwd } from "./fileCommon"; import { requireTaskService, requireWorkspaceId } from "./toolUtils"; +const DEFAULT_REPORT_MARKDOWN_PATH = "report.md"; +const DEFAULT_STRUCTURED_OUTPUT_PATH = "structured-output.json"; + +const REPORT_MARKDOWN_MAX_BYTES = 256 * 1024; +const STRUCTURED_OUTPUT_MAX_BYTES = 64 * 1024; + +interface AgentReportSuccessResult { + success: true; + message: string; + report?: { + reportMarkdown: string; + title?: string; + structuredOutput?: unknown; + }; +} + +interface AgentReportFailureResult { + success: false; + message: string; + errors: JsonSchemaValidationError[]; +} + +type AgentReportResult = AgentReportSuccessResult | AgentReportFailureResult; + +function validationFailure( + message: string, + errors: JsonSchemaValidationError[] +): AgentReportFailureResult { + return { success: false, message, errors }; +} + +function zodValidationFailure( + message: string, + error: { issues: Array<{ path: unknown[]; message: string }> } +) { + return validationFailure( + message, + error.issues.map((issue) => ({ + path: issue.path.length > 0 ? `$.${issue.path.join(".")}` : "$", + message: issue.message, + })) + ); +} + +function validateStructuredOutput(config: ToolConfiguration, structuredOutput: unknown) { + if (config.workflowAgentOutputSchema == null) { + return null; + } + + const validation = validateJsonSchemaSubset(config.workflowAgentOutputSchema, structuredOutput); + return validation.success + ? null + : validationFailure("Structured output failed schema validation.", validation.errors); +} + +function buildInlineInputSchema(config: ToolConfiguration) { + const outputSchema = config.workflowAgentOutputSchema; + if (outputSchema == null || !validateJsonSchemaSubsetSchema(outputSchema).success) { + return AgentReportInlineToolArgsSchema; + } + + return jsonSchema( + { + type: "object", + properties: { + reportMarkdown: { type: "string", minLength: 1 }, + structuredOutput: outputSchema as JSONSchema7, + title: { anyOf: [{ type: "string" }, { type: "null" }] }, + }, + required: ["reportMarkdown", "structuredOutput", "title"], + additionalProperties: false, + } satisfies JSONSchema7, + { + validate: (value) => { + const parsed = AgentReportInlineToolArgsSchema.safeParse(value); + if (!parsed.success) { + return { success: false, error: parsed.error }; + } + const validation = validateStructuredOutput(config, parsed.data.structuredOutput); + if (validation) { + return { success: false, error: new Error(validation.message) }; + } + return { success: true, value: parsed.data }; + }, + } + ); +} + +function buildFileInputSchema() { + return jsonSchema( + { + type: "object", + properties: { + reportMarkdownPath: { + anyOf: [{ type: "string", minLength: 1 }, { type: "null" }], + description: + "Optional path to the markdown report file. Pass null or omit to submit report.md from the workspace root.", + }, + structuredOutputPath: { + anyOf: [{ type: "string", minLength: 1 }, { type: "null" }], + description: + "Optional path to structured output JSON. Pass null or omit to submit structured-output.json when this task requires structured output.", + }, + title: { anyOf: [{ type: "string" }, { type: "null" }] }, + }, + required: ["reportMarkdownPath", "structuredOutputPath", "title"], + additionalProperties: false, + } satisfies JSONSchema7, + { + validate: (value) => { + const parsed = AgentReportFileToolArgsSchema.safeParse(value ?? {}); + return parsed.success + ? { success: true, value: parsed.data } + : { success: false, error: parsed.error }; + }, + } + ); +} + +function getAgentReportInputSchema(config: ToolConfiguration) { + return config.subagentReportFiles ? buildFileInputSchema() : buildInlineInputSchema(config); +} + +function getAgentReportDescription(config: ToolConfiguration): string { + if (!config.subagentReportFiles) { + return TOOL_DEFINITIONS.agent_report.description; + } + + return ( + TOOL_DEFINITIONS.agent_report.description + + "\n\nSubagent file-backed report mode is enabled for this task. " + + "Write the final human-readable report to `report.md` in the workspace root. " + + (config.workflowAgentOutputSchema != null + ? "Write the required structured output as valid JSON to `structured-output.json`. " + : "") + + "Then call agent_report with reportMarkdownPath, structuredOutputPath, and title all set to null so Mux uses the default files. " + + "Only pass non-null file path arguments if you intentionally used non-default filenames." + ); +} + +async function readReportFile(params: { + config: ToolConfiguration; + filePath: string; + fieldPath: string; + maxBytes: number; +}): Promise<{ success: true; content: string } | AgentReportFailureResult> { + const { config, filePath, fieldPath, maxBytes } = params; + const pathValidation = validatePathInCwd(filePath, config.cwd, config.runtime, [ + config.runtimeTempDir, + ]); + if (pathValidation) { + return validationFailure("Report file submission failed.", [ + { path: fieldPath, message: pathValidation.error }, + ]); + } + + const resolvedPath = config.runtime.normalizePath(filePath, config.cwd); + let fileStat; + try { + fileStat = await config.runtime.stat(resolvedPath); + } catch (error) { + const message = error instanceof RuntimeError ? error.message : getErrorMessage(error); + return validationFailure("Report file submission failed.", [{ path: fieldPath, message }]); + } + + if (fileStat.isDirectory) { + return validationFailure("Report file submission failed.", [ + { path: fieldPath, message: `Path is a directory, not a file: ${resolvedPath}` }, + ]); + } + + const sizeValidation = validateFileSize(fileStat); + if (sizeValidation) { + return validationFailure("Report file submission failed.", [ + { path: fieldPath, message: sizeValidation.error }, + ]); + } + if (fileStat.size > maxBytes) { + return validationFailure("Report file submission failed.", [ + { + path: fieldPath, + message: `File is too large (${fileStat.size} bytes). Maximum allowed is ${maxBytes} bytes.`, + }, + ]); + } + + try { + const content = await readFileString(config.runtime, resolvedPath); + if (Buffer.byteLength(content, "utf-8") > maxBytes) { + return validationFailure("Report file submission failed.", [ + { + path: fieldPath, + message: `File is too large after decoding. Maximum allowed is ${maxBytes} bytes.`, + }, + ]); + } + return { success: true, content }; + } catch (error) { + const message = error instanceof RuntimeError ? error.message : getErrorMessage(error); + return validationFailure("Report file submission failed.", [{ path: fieldPath, message }]); + } +} + +async function executeFileBackedReport( + config: ToolConfiguration, + rawArgs: unknown +): Promise { + const parsed = AgentReportFileToolArgsSchema.safeParse(rawArgs ?? {}); + if (!parsed.success) { + return zodValidationFailure("Report file arguments failed validation.", parsed.error); + } + + const reportMarkdownPath = parsed.data.reportMarkdownPath ?? DEFAULT_REPORT_MARKDOWN_PATH; + const structuredOutputPath = + parsed.data.structuredOutputPath ?? + (config.workflowAgentOutputSchema != null ? DEFAULT_STRUCTURED_OUTPUT_PATH : undefined); + + const markdown = await readReportFile({ + config, + filePath: reportMarkdownPath, + fieldPath: "$.reportMarkdownPath", + maxBytes: REPORT_MARKDOWN_MAX_BYTES, + }); + if (!markdown.success) { + return markdown; + } + if (markdown.content.trim().length === 0) { + return validationFailure("Report file submission failed.", [ + { path: "$.reportMarkdownPath", message: "Report markdown must not be empty" }, + ]); + } + + let structuredOutput: unknown; + if (structuredOutputPath != null) { + const structuredOutputFile = await readReportFile({ + config, + filePath: structuredOutputPath, + fieldPath: "$.structuredOutputPath", + maxBytes: STRUCTURED_OUTPUT_MAX_BYTES, + }); + if (!structuredOutputFile.success) { + return structuredOutputFile; + } + try { + structuredOutput = JSON.parse(structuredOutputFile.content) as unknown; + } catch (error) { + return validationFailure("Structured output JSON failed parsing.", [ + { path: "$.structuredOutputPath", message: getErrorMessage(error) }, + ]); + } + } else if (config.workflowAgentOutputSchema != null) { + return validationFailure("Structured output file is required.", [ + { path: "$.structuredOutputPath", message: "Required property is missing" }, + ]); + } + + const structuredValidation = validateStructuredOutput(config, structuredOutput); + if (structuredValidation) { + return structuredValidation; + } + + const title = parsed.data.title?.trim(); + return { + success: true, + message: "Report submitted successfully.", + report: { + reportMarkdown: markdown.content, + ...(title ? { title } : {}), + ...(structuredOutput !== undefined ? { structuredOutput } : {}), + }, + }; +} + +function executeInlineReport(config: ToolConfiguration, rawArgs: unknown): AgentReportResult { + const parsed = AgentReportInlineToolArgsSchema.safeParse(rawArgs); + if (!parsed.success) { + return zodValidationFailure("Report arguments failed validation.", parsed.error); + } + + const structuredValidation = validateStructuredOutput(config, parsed.data.structuredOutput); + if (structuredValidation) { + return structuredValidation; + } + + // Intentionally no report payload on success. The backend orchestrator consumes inline + // tool-call args from persisted history once the tool call completes successfully. + return { + success: true, + message: "Report submitted successfully.", + }; +} + export const createAgentReportTool: ToolFactory = (config: ToolConfiguration) => { return tool({ - description: TOOL_DEFINITIONS.agent_report.description, - inputSchema: TOOL_DEFINITIONS.agent_report.schema, - execute: (): { success: true; message: string } => { + description: getAgentReportDescription(config), + inputSchema: getAgentReportInputSchema(config), + execute: async (args: unknown): Promise => { const workspaceId = requireWorkspaceId(config, "agent_report"); const taskService = requireTaskService(config, "agent_report"); @@ -20,14 +326,11 @@ export const createAgentReportTool: ToolFactory = (config: ToolConfiguration) => ); } - // Intentionally no side-effects. The backend orchestrator consumes the tool-call args - // via persisted history/partial state once the tool call completes successfully. - // The stream continues after this so the SDK can record usage, while StreamManager - // stops autonomous loops once it observes agent_report with output.success === true. - return { - success: true, - message: "Report submitted successfully.", - }; + if (config.subagentReportFiles) { + return await executeFileBackedReport(config, args); + } + + return executeInlineReport(config, args); }, }); }; diff --git a/src/node/services/tools/task.ts b/src/node/services/tools/task.ts index 18feff807b..7da730fa63 100644 --- a/src/node/services/tools/task.ts +++ b/src/node/services/tools/task.ts @@ -78,6 +78,7 @@ interface PendingTaskInfo { interface CompletedTaskInfo { taskId: string; reportMarkdown: string; + structuredOutput?: unknown; title?: string; agentId: string; agentType: string; @@ -126,6 +127,7 @@ function serializeCompletedReport(report: CompletedTaskInfo) { return { taskId: report.taskId, reportMarkdown: report.reportMarkdown, + structuredOutput: report.structuredOutput, title: report.title, agentId: report.agentId, agentType: report.agentType, @@ -210,6 +212,7 @@ function buildCompletedTaskResult(params: { status: "completed", taskId: report.taskId, reportMarkdown: report.reportMarkdown, + structuredOutput: report.structuredOutput, title: report.title, agentId: report.agentId, agentType: report.agentType, @@ -397,6 +400,7 @@ export const createTaskTool: ToolFactory = (config: ToolConfiguration) => { report: { taskId: createdTask.taskId, reportMarkdown: report.reportMarkdown, + structuredOutput: report.structuredOutput, title: report.title, agentId: requestedAgentId, agentType: requestedAgentId, diff --git a/src/node/services/tools/task_apply_git_patch.ts b/src/node/services/tools/task_apply_git_patch.ts index 1d5bd760fc..58dc4b1313 100644 --- a/src/node/services/tools/task_apply_git_patch.ts +++ b/src/node/services/tools/task_apply_git_patch.ts @@ -2,10 +2,13 @@ import assert from "node:assert/strict"; import * as fsPromises from "fs/promises"; import * as path from "node:path"; +import type { z } from "zod"; + import { tool } from "ai"; import type { ToolConfiguration, ToolFactory } from "@/common/utils/tools/tools"; import { + TaskApplyGitPatchToolArgsSchema, TaskApplyGitPatchToolResultSchema, TOOL_DEFINITIONS, } from "@/common/utils/tools/toolDefinitions"; @@ -26,6 +29,14 @@ import { getWorkspaceProjectRepos } from "@/node/services/workspaceProjectRepos" import { parseToolResult, requireWorkspaceId } from "./toolUtils"; +export type TaskApplyGitPatchArgs = z.infer; +export type TaskApplyGitPatchResult = z.infer; + +export type TaskApplyGitPatchConfiguration = Pick< + ToolConfiguration, + "workspaceId" | "cwd" | "runtime" | "runtimeTempDir" | "workspaceSessionDir" | "trusted" +>; + interface AppliedCommit { subject: string; sha?: string; @@ -888,249 +899,272 @@ async function applyProjectPatch(params: { }; } -export const createTaskApplyGitPatchTool: ToolFactory = (config: ToolConfiguration) => { - return tool({ - description: TOOL_DEFINITIONS.task_apply_git_patch.description, - inputSchema: TOOL_DEFINITIONS.task_apply_git_patch.schema, - execute: async (args, { abortSignal }): Promise => { - const workspaceId = requireWorkspaceId(config, "task_apply_git_patch"); - assert(config.cwd, "task_apply_git_patch requires cwd"); - assert(config.runtimeTempDir, "task_apply_git_patch requires runtimeTempDir"); - const workspaceSessionDir = config.workspaceSessionDir; - assert(workspaceSessionDir, "task_apply_git_patch requires workspaceSessionDir"); - - const taskId = args.task_id; - const dryRun = args.dry_run === true; - const threeWay = args.three_way !== false; - const force = args.force === true; - - const artifactLookup = await findGitPatchArtifactInWorkspaceOrAncestors({ - workspaceId, - workspaceSessionDir, - childTaskId: taskId, - }); +export async function applyTaskGitPatchArtifact( + config: TaskApplyGitPatchConfiguration, + args: TaskApplyGitPatchArgs, + options: { abortSignal?: AbortSignal; allowAlreadyApplied?: boolean } = {} +): Promise { + const workspaceId = requireWorkspaceId(config, "task_apply_git_patch"); + assert(config.cwd, "task_apply_git_patch requires cwd"); + assert(config.runtimeTempDir, "task_apply_git_patch requires runtimeTempDir"); + const workspaceSessionDir = config.workspaceSessionDir; + assert(workspaceSessionDir, "task_apply_git_patch requires workspaceSessionDir"); + + const parsedArgs = TaskApplyGitPatchToolArgsSchema.parse(args); + const taskId = parsedArgs.task_id; + const dryRun = parsedArgs.dry_run === true; + const threeWay = parsedArgs.three_way !== false; + const force = parsedArgs.force === true; + + await config.runtime.ensureDir(config.runtimeTempDir, options.abortSignal); + + const artifactLookup = await findGitPatchArtifactInWorkspaceOrAncestors({ + workspaceId, + workspaceSessionDir, + childTaskId: taskId, + }); - if (!artifactLookup) { - return parseToolResult( - TaskApplyGitPatchToolResultSchema, - { - success: false as const, - taskId, - dryRun, - error: "No git patch artifact found for this taskId.", - }, - "task_apply_git_patch" - ); - } + if (!artifactLookup) { + return parseToolResult( + TaskApplyGitPatchToolResultSchema, + { + success: false as const, + taskId, + dryRun, + error: "No git patch artifact found for this taskId.", + }, + "task_apply_git_patch" + ); + } - const artifact = artifactLookup.artifact; - const artifactWorkspaceId = artifactLookup.artifactWorkspaceId; - const artifactSessionDir = artifactLookup.artifactSessionDir; - const isReplay = artifactWorkspaceId !== workspaceId; - const artifactLookupNote = artifactLookup.note; - - if (artifact.parentWorkspaceId !== artifactWorkspaceId) { - return parseToolResult( - TaskApplyGitPatchToolResultSchema, - { - success: false as const, - taskId, - dryRun, - error: "This patch artifact belongs to a different parent workspace.", - note: mergeNotes( - artifactLookupNote, - `Expected parent workspace ${artifactWorkspaceId} but artifact metadata says ${artifact.parentWorkspaceId}.` - ), - }, - "task_apply_git_patch" - ); - } + const artifact = artifactLookup.artifact; + const artifactWorkspaceId = artifactLookup.artifactWorkspaceId; + const artifactSessionDir = artifactLookup.artifactSessionDir; + const isReplay = artifactWorkspaceId !== workspaceId; + const artifactLookupNote = artifactLookup.note; - const requestedProjectPath = args.project_path; - const projectArtifacts = - requestedProjectPath != null - ? artifact.projectArtifacts.filter((projectArtifact) => - matchesProjectArtifactProjectPath(projectArtifact, requestedProjectPath) - ) - : artifact.projectArtifacts; - - if (args.project_path != null && projectArtifacts.length === 0) { - return parseToolResult( - TaskApplyGitPatchToolResultSchema, - { - success: false as const, - taskId, - dryRun, - error: `No project patch artifact found for ${args.project_path}.`, - }, - "task_apply_git_patch" - ); - } + if (artifact.parentWorkspaceId !== artifactWorkspaceId) { + return parseToolResult( + TaskApplyGitPatchToolResultSchema, + { + success: false as const, + taskId, + dryRun, + error: "This patch artifact belongs to a different parent workspace.", + note: mergeNotes( + artifactLookupNote, + `Expected parent workspace ${artifactWorkspaceId} but artifact metadata says ${artifact.parentWorkspaceId}.` + ), + }, + "task_apply_git_patch" + ); + } - if (projectArtifacts.length === 0) { - return parseToolResult( - TaskApplyGitPatchToolResultSchema, - { - success: false as const, - taskId, - dryRun, - error: "This task has no project patch artifacts.", - }, - "task_apply_git_patch" - ); - } + const requestedProjectPath = parsedArgs.project_path; + const projectArtifacts = + requestedProjectPath != null + ? artifact.projectArtifacts.filter((projectArtifact) => + matchesProjectArtifactProjectPath(projectArtifact, requestedProjectPath) + ) + : artifact.projectArtifacts; + + if (parsedArgs.project_path != null && projectArtifacts.length === 0) { + return parseToolResult( + TaskApplyGitPatchToolResultSchema, + { + success: false as const, + taskId, + dryRun, + error: `No project patch artifact found for ${parsedArgs.project_path}.`, + }, + "task_apply_git_patch" + ); + } - const repoTargetsByProjectPath = resolveCurrentWorkspaceRepoTargets({ - workspaceId, - workspaceSessionDir, - }); - const projectResults: TaskApplyGitPatchProjectResult[] = []; - - const readyProjectArtifacts = projectArtifacts.filter( - (projectArtifact) => projectArtifact.status === "ready" - ); - if (readyProjectArtifacts.length === 0) { - for (const projectArtifact of projectArtifacts) { - projectResults.push(summarizeNonReadyProjectArtifact({ projectArtifact })); - } + if (projectArtifacts.length === 0) { + return parseToolResult( + TaskApplyGitPatchToolResultSchema, + { + success: false as const, + taskId, + dryRun, + error: "This task has no project patch artifacts.", + }, + "task_apply_git_patch" + ); + } - const legacyFields = toLegacyFields(projectResults); - return parseToolResult( - TaskApplyGitPatchToolResultSchema, - { - success: false as const, - taskId, - dryRun, - projectResults, - error: "This task has no ready project patch artifacts.", - note: artifactLookupNote, - ...legacyFields, - }, - "task_apply_git_patch" - ); - } + const repoTargetsByProjectPath = resolveCurrentWorkspaceRepoTargets({ + workspaceId, + workspaceSessionDir, + }); + const projectResults: TaskApplyGitPatchProjectResult[] = []; - let shouldStopAfterFailure = false; - for (const projectArtifact of projectArtifacts) { - if (shouldStopAfterFailure) { - projectResults.push({ - projectPath: projectArtifact.projectPath, - projectName: projectArtifact.projectName, - status: "skipped", - error: "Not attempted because an earlier project apply failed.", - }); - continue; - } + const readyProjectArtifacts = projectArtifacts.filter( + (projectArtifact) => projectArtifact.status === "ready" + ); + if (readyProjectArtifacts.length === 0) { + for (const projectArtifact of projectArtifacts) { + projectResults.push(summarizeNonReadyProjectArtifact({ projectArtifact })); + } - if (projectArtifact.status !== "ready") { - projectResults.push(summarizeNonReadyProjectArtifact({ projectArtifact })); - if (args.project_path != null) { - shouldStopAfterFailure = true; - } - continue; - } + const legacyFields = toLegacyFields(projectResults); + return parseToolResult( + TaskApplyGitPatchToolResultSchema, + { + success: false as const, + taskId, + dryRun, + projectResults, + error: "This task has no ready project patch artifacts.", + note: artifactLookupNote, + ...legacyFields, + }, + "task_apply_git_patch" + ); + } - if (!isReplay && projectArtifact.appliedAtMs && !force && !dryRun) { - projectResults.push({ - projectPath: projectArtifact.projectPath, - projectName: projectArtifact.projectName, - status: "failed", - error: `Patch already applied at ${new Date(projectArtifact.appliedAtMs).toISOString()}.`, - note: "Re-run with force=true to apply again.", - }); - shouldStopAfterFailure = true; - continue; - } + let shouldStopAfterFailure = false; + for (const projectArtifact of projectArtifacts) { + if (shouldStopAfterFailure) { + projectResults.push({ + projectPath: projectArtifact.projectPath, + projectName: projectArtifact.projectName, + status: "skipped", + error: "Not attempted because an earlier project apply failed.", + }); + continue; + } - const repoTarget = repoTargetsByProjectPath.get(projectArtifact.projectPath); - const repoCwd = - repoTarget?.repoCwd ?? (artifact.projectArtifacts.length === 1 ? config.cwd : undefined); - if (!repoCwd) { - projectResults.push({ - projectPath: projectArtifact.projectPath, - projectName: projectArtifact.projectName, - status: "failed", - error: "Could not resolve the current workspace repo root for this project.", - }); - shouldStopAfterFailure = true; - continue; - } + if (projectArtifact.status !== "ready") { + projectResults.push(summarizeNonReadyProjectArtifact({ projectArtifact })); + if (parsedArgs.project_path != null) { + shouldStopAfterFailure = true; + } + continue; + } - const applyResult = await applyProjectPatch({ - taskId, - workspaceId, - runtime: config.runtime, - runtimeTempDir: config.runtimeTempDir, - trusted: config.trusted === true, - repoCwd, - projectArtifact, - artifactWorkspaceId, - artifactSessionDir, - artifactLookupNote, - dryRun, - threeWay, - force, - isReplay, - abortSignal, + if (!isReplay && projectArtifact.appliedAtMs && !force) { + const appliedAt = new Date(projectArtifact.appliedAtMs).toISOString(); + if (options.allowAlreadyApplied === true) { + projectResults.push({ + projectPath: projectArtifact.projectPath, + projectName: projectArtifact.projectName, + status: "applied", + note: `Patch already applied at ${appliedAt}; treating as applied for replay-safe workflow integration.`, }); - projectResults.push(applyResult.projectResult); - if (!applyResult.success) { - shouldStopAfterFailure = true; - } + continue; } - - const legacyFields = toLegacyFields(projectResults); - const attemptedReadyCount = projectArtifacts.filter( - (projectArtifact) => projectArtifact.status === "ready" - ).length; - const appliedReadyCount = projectResults.filter( - (projectResult) => projectResult.status === "applied" - ).length; - const hasApplyFailure = projectResults.some( - (projectResult, index) => - projectResult.status === "failed" && projectArtifacts[index]?.status === "ready" - ); - const overallNote = mergeNotes( - artifactLookupNote, - projectResults - .map((projectResult) => projectResult.note) - .filter((note): note is string => typeof note === "string") - .join("\n") || undefined - ); - - if (hasApplyFailure) { - const firstFailedProject = projectResults.find( - (projectResult) => projectResult.status === "failed" - ); - return parseToolResult( - TaskApplyGitPatchToolResultSchema, - { - success: false as const, - taskId, - dryRun, - projectResults, - error: - firstFailedProject?.error ?? - `Failed while applying project patches (${appliedReadyCount}/${attemptedReadyCount} ready projects applied).`, - note: overallNote, - ...legacyFields, - }, - "task_apply_git_patch" - ); + if (!dryRun) { + projectResults.push({ + projectPath: projectArtifact.projectPath, + projectName: projectArtifact.projectName, + status: "failed", + error: `Patch already applied at ${appliedAt}.`, + note: "Re-run with force=true to apply again.", + }); + shouldStopAfterFailure = true; + continue; } + } - return parseToolResult( - TaskApplyGitPatchToolResultSchema, - { - success: true as const, - taskId, - projectResults, - dryRun, - note: overallNote, - ...(projectResults.length === 1 ? legacyFields : {}), - }, - "task_apply_git_patch" - ); + const repoTarget = repoTargetsByProjectPath.get(projectArtifact.projectPath); + const repoCwd = + repoTarget?.repoCwd ?? (artifact.projectArtifacts.length === 1 ? config.cwd : undefined); + if (!repoCwd) { + projectResults.push({ + projectPath: projectArtifact.projectPath, + projectName: projectArtifact.projectName, + status: "failed", + error: "Could not resolve the current workspace repo root for this project.", + }); + shouldStopAfterFailure = true; + continue; + } + + const applyResult = await applyProjectPatch({ + taskId, + workspaceId, + runtime: config.runtime, + runtimeTempDir: config.runtimeTempDir, + trusted: config.trusted === true, + repoCwd, + projectArtifact, + artifactWorkspaceId, + artifactSessionDir, + artifactLookupNote, + dryRun, + threeWay, + force, + isReplay, + abortSignal: options.abortSignal, + }); + projectResults.push(applyResult.projectResult); + if (!applyResult.success) { + shouldStopAfterFailure = true; + } + } + + const legacyFields = toLegacyFields(projectResults); + const attemptedReadyCount = projectArtifacts.filter( + (projectArtifact) => projectArtifact.status === "ready" + ).length; + const appliedReadyCount = projectResults.filter( + (projectResult) => projectResult.status === "applied" + ).length; + const hasApplyFailure = projectResults.some( + (projectResult, index) => + projectResult.status === "failed" && projectArtifacts[index]?.status === "ready" + ); + const overallNote = mergeNotes( + artifactLookupNote, + projectResults + .map((projectResult) => projectResult.note) + .filter((note): note is string => typeof note === "string") + .join("\n") || undefined + ); + + if (hasApplyFailure) { + const firstFailedProject = projectResults.find( + (projectResult) => projectResult.status === "failed" + ); + return parseToolResult( + TaskApplyGitPatchToolResultSchema, + { + success: false as const, + taskId, + dryRun, + projectResults, + error: + firstFailedProject?.error ?? + `Failed while applying project patches (${appliedReadyCount}/${attemptedReadyCount} ready projects applied).`, + note: overallNote, + ...legacyFields, + }, + "task_apply_git_patch" + ); + } + + return parseToolResult( + TaskApplyGitPatchToolResultSchema, + { + success: true as const, + taskId, + projectResults, + dryRun, + note: overallNote, + ...(projectResults.length === 1 ? legacyFields : {}), + }, + "task_apply_git_patch" + ); +} + +export const createTaskApplyGitPatchTool: ToolFactory = (config: ToolConfiguration) => { + return tool({ + description: TOOL_DEFINITIONS.task_apply_git_patch.description, + inputSchema: TOOL_DEFINITIONS.task_apply_git_patch.schema, + execute: async (args, { abortSignal }): Promise => { + return await applyTaskGitPatchArtifact(config, args, { abortSignal }); }, }); }; diff --git a/src/node/services/tools/task_await.test.ts b/src/node/services/tools/task_await.test.ts index c94a8d3b36..b8fc1544b6 100644 --- a/src/node/services/tools/task_await.test.ts +++ b/src/node/services/tools/task_await.test.ts @@ -3,6 +3,8 @@ import * as fs from "fs"; import { describe, it, expect, mock, spyOn } from "bun:test"; import type { ToolExecutionOptions } from "ai"; +import type { ToolConfiguration } from "@/common/utils/tools/tools"; +import type { WorkflowRunRecord, WorkflowRunStatus } from "@/common/types/workflow"; import { createTaskAwaitTool } from "./task_await"; import { TestTempDir, createTestToolConfig } from "./testHelpers"; import type { BackgroundProcessManager } from "@/node/services/backgroundProcessManager"; @@ -14,6 +16,27 @@ const mockToolCallOptions: ToolExecutionOptions = { messages: [], }; +type TestWorkflowService = NonNullable; + +function createWorkflowRun( + status: WorkflowRunStatus, + events: WorkflowRunRecord["events"] = [] +): WorkflowRunRecord { + return { + id: "wfr_demo", + workspaceId: "parent-workspace", + definition: { name: "demo", description: "Demo workflow", scope: "built-in", executable: true }, + definitionSource: "export default function workflow() { return null; }\n", + definitionHash: "sha256:demo", + args: {}, + status, + createdAt: "2026-01-01T00:00:00.000Z", + updatedAt: "2026-01-01T00:00:05.000Z", + events, + steps: [], + }; +} + describe("task_await tool", () => { it("includes gitFormatPatch artifacts written during waitForAgentReport", async () => { using tempDir = new TestTempDir("test-task-await-tool-artifacts"); @@ -554,6 +577,192 @@ describe("task_await tool", () => { expect(waitForAgentReport).toHaveBeenCalledTimes(0); }); + it("awaits workflow run ids and returns the consolidated workflow result", async () => { + using tempDir = new TestTempDir("test-task-await-tool-workflow-completed"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "parent-workspace" }); + const completedRun = createWorkflowRun("completed", [ + { + sequence: 1, + type: "status", + at: "2026-01-01T00:00:01.000Z", + status: "running", + }, + { + sequence: 2, + type: "result", + at: "2026-01-01T00:00:04.000Z", + result: { reportMarkdown: "workflow done", structuredOutput: { ok: true } }, + }, + { + sequence: 3, + type: "status", + at: "2026-01-01T00:00:05.000Z", + status: "completed", + }, + ]); + + const taskService = { + listActiveDescendantAgentTaskIds: mock(() => []), + isDescendantAgentTask: mock(() => Promise.resolve(false)), + waitForAgentReport: mock(() => { + throw new Error("workflow run IDs should not be treated as agent tasks"); + }), + } as unknown as TaskService; + const workflowService = { + getRun: mock(() => Promise.resolve(completedRun)), + }; + const tool = createTaskAwaitTool({ + ...baseConfig, + taskService, + workflowService: workflowService as unknown as TestWorkflowService, + }); + + const result: unknown = await Promise.resolve( + tool.execute!({ task_ids: ["wfr_demo"] }, mockToolCallOptions) + ); + + expect(result).toEqual({ + results: [ + { + status: "completed", + taskId: "wfr_demo", + reportMarkdown: "workflow done", + structuredOutput: { ok: true }, + title: "demo", + elapsed_ms: 5000, + run: completedRun, + }, + ], + }); + expect(workflowService.getRun).toHaveBeenCalledWith({ + workspaceId: "parent-workspace", + runId: "wfr_demo", + }); + }); + + it("discovers active workflow runs when task_ids is omitted", async () => { + using tempDir = new TestTempDir("test-task-await-tool-workflow-discovery"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "parent-workspace" }); + const backgroundedRun = { + ...createWorkflowRun("backgrounded", [ + { + sequence: 1, + type: "status" as const, + at: "2026-01-01T00:00:01.000Z", + status: "running" as const, + }, + { + sequence: 2, + type: "status" as const, + at: "2026-01-01T00:00:02.000Z", + status: "backgrounded" as const, + }, + ]), + id: "wfr_backgrounded", + status: "backgrounded" as const, + }; + + const taskService = { + listActiveDescendantAgentTaskIds: mock(() => []), + isDescendantAgentTask: mock(() => Promise.resolve(false)), + waitForAgentReport: mock(() => { + throw new Error("workflow discovery should not wait for agent reports"); + }), + } as unknown as TaskService; + const workflowService = { + listRuns: mock(() => Promise.resolve([backgroundedRun])), + getRun: mock(() => Promise.resolve(backgroundedRun)), + }; + const tool = createTaskAwaitTool({ + ...baseConfig, + taskService, + workflowService: workflowService as unknown as TestWorkflowService, + }); + + const result: unknown = await Promise.resolve( + tool.execute!({ timeout_secs: 0 }, mockToolCallOptions) + ); + + const workflowResult = result as { + results: Array<{ elapsed_ms?: unknown }>; + }; + expect(typeof workflowResult.results[0]?.elapsed_ms).toBe("number"); + expect(result).toEqual({ + results: [ + { + status: "backgrounded", + taskId: "wfr_backgrounded", + elapsed_ms: workflowResult.results[0]?.elapsed_ms, + note: "Workflow run is backgrounded. Use task_await to monitor progress.", + run: backgroundedRun, + }, + ], + }); + }); + + it("polls a backgrounded workflow run until the final result is available", async () => { + using tempDir = new TestTempDir("test-task-await-tool-workflow-poll"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "parent-workspace" }); + const backgroundedRun = { + ...createWorkflowRun("backgrounded"), + id: "wfr_poll", + status: "backgrounded" as const, + }; + const completedRun = { + ...createWorkflowRun("completed", [ + { + sequence: 1, + type: "result" as const, + at: "2026-01-01T00:00:05.000Z", + result: { reportMarkdown: "poll complete" }, + }, + { + sequence: 2, + type: "status" as const, + at: "2026-01-01T00:00:05.000Z", + status: "completed" as const, + }, + ]), + id: "wfr_poll", + status: "completed" as const, + }; + let getRunCalls = 0; + + const taskService = { + listActiveDescendantAgentTaskIds: mock(() => []), + isDescendantAgentTask: mock(() => Promise.resolve(false)), + waitForAgentReport: mock(() => { + throw new Error("workflow polling should not wait for agent reports"); + }), + } as unknown as TaskService; + const workflowService = { + getRun: mock(() => Promise.resolve(getRunCalls++ === 0 ? backgroundedRun : completedRun)), + }; + const tool = createTaskAwaitTool({ + ...baseConfig, + taskService, + workflowService: workflowService as unknown as TestWorkflowService, + }); + + const result: unknown = await Promise.resolve( + tool.execute!({ task_ids: ["wfr_poll"], timeout_secs: 1 }, mockToolCallOptions) + ); + + expect(result).toEqual({ + results: [ + { + status: "completed", + taskId: "wfr_poll", + reportMarkdown: "poll complete", + title: "demo", + elapsed_ms: 5000, + run: completedRun, + }, + ], + }); + expect(workflowService.getRun).toHaveBeenCalledTimes(2); + }); + it("defaults to waiting on all active descendant tasks when task_ids is omitted", async () => { using tempDir = new TestTempDir("test-task-await-tool-descendants"); const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "parent-workspace" }); @@ -572,7 +781,9 @@ describe("task_await tool", () => { const result: unknown = await Promise.resolve(tool.execute!({}, mockToolCallOptions)); - expect(listActiveDescendantAgentTaskIds).toHaveBeenCalledWith("parent-workspace"); + expect(listActiveDescendantAgentTaskIds).toHaveBeenCalledWith("parent-workspace", { + excludeWorkflowTasks: true, + }); expect(result).toEqual({ results: [{ status: "completed", taskId: "t1", reportMarkdown: "ok", title: undefined }], }); diff --git a/src/node/services/tools/task_await.ts b/src/node/services/tools/task_await.ts index 5715f24c43..a57cb3529e 100644 --- a/src/node/services/tools/task_await.ts +++ b/src/node/services/tools/task_await.ts @@ -2,7 +2,9 @@ import { tool } from "ai"; import type { ToolConfiguration, ToolFactory } from "@/common/utils/tools/tools"; import { readSubagentGitPatchArtifact } from "@/node/services/subagentGitPatchArtifacts"; +import { WorkflowRunRecordSchema } from "@/common/orpc/schemas"; import { TaskAwaitToolResultSchema, TOOL_DEFINITIONS } from "@/common/utils/tools/toolDefinitions"; +import type { WorkflowRunRecord, WorkflowRunStatus } from "@/common/types/workflow"; import { fromBashTaskId, toBashTaskId } from "./taskId"; import { formatBashOutputReport } from "./bashTaskReport"; @@ -20,6 +22,9 @@ import { type AgentTaskTimestamps, } from "@/node/services/taskService"; +const DEFAULT_TASK_AWAIT_TIMEOUT_MS = 600_000; +const WORKFLOW_AWAIT_POLL_INTERVAL_MS = 250; + // Status values for which task_await still treats an agent task as live and // should surface the live status (plus an `elapsed_ms` field) instead of // awaiting a report. Centralised here so the timeout=0 and "timed out" error @@ -74,6 +79,118 @@ function buildTaskAwaitSequencingError(taskId: string, suggestedTaskIds: string[ }; } +function isWorkflowRunId(taskId: string): boolean { + return taskId.startsWith("wfr_"); +} + +function isWorkflowRunAwaitableStatus(status: WorkflowRunStatus): boolean { + return status === "pending" || status === "running" || status === "backgrounded"; +} + +function isWorkflowRunTerminalStatus(status: WorkflowRunStatus): boolean { + return status === "completed" || status === "failed" || status === "interrupted"; +} + +function parseWorkflowRun(value: unknown): WorkflowRunRecord { + return WorkflowRunRecordSchema.parse(value); +} + +function getWorkflowRunElapsedMs(run: WorkflowRunRecord): number | undefined { + const createdAtMs = parseTimestampMs(run.createdAt); + if (createdAtMs == null) { + return undefined; + } + const updatedAtMs = parseTimestampMs(run.updatedAt); + const endAtMs = isWorkflowRunTerminalStatus(run.status) ? updatedAtMs : Date.now(); + return Math.max(0, (endAtMs ?? Date.now()) - createdAtMs); +} + +function getWorkflowRunReport(run: WorkflowRunRecord): { + reportMarkdown: string; + structuredOutput?: unknown; +} { + const result = run.events.findLast((event) => event.type === "result")?.result; + if (result != null) { + return result; + } + return { reportMarkdown: `Workflow ${run.definition.name} completed without a final report.` }; +} + +function getWorkflowRunError(run: WorkflowRunRecord): string { + return ( + run.events.findLast((event) => event.type === "error")?.message ?? + `Workflow ${run.definition.name} failed.` + ); +} + +function buildWorkflowAwaitResult(run: WorkflowRunRecord) { + const base = { + taskId: run.id, + run, + ...withElapsedMs(getWorkflowRunElapsedMs(run)), + }; + + switch (run.status) { + case "completed": { + const result = getWorkflowRunReport(run); + return { + status: "completed" as const, + ...base, + reportMarkdown: result.reportMarkdown, + ...(result.structuredOutput !== undefined + ? { structuredOutput: result.structuredOutput } + : {}), + title: run.definition.name, + }; + } + case "failed": + return { + status: "error" as const, + ...base, + error: getWorkflowRunError(run), + }; + case "interrupted": + return { + status: "interrupted" as const, + ...base, + note: `Workflow ${run.definition.name} was interrupted.`, + }; + case "pending": + return { + status: "queued" as const, + ...base, + }; + case "backgrounded": + return { + status: "backgrounded" as const, + ...base, + note: "Workflow run is backgrounded. Use task_await to monitor progress.", + }; + case "running": + return { + status: "running" as const, + ...base, + }; + } +} + +async function waitForDelayOrAbort(delayMs: number, signal: AbortSignal): Promise { + if (signal.aborted) { + return; + } + await new Promise((resolve) => { + const timer = setTimeout(resolve, delayMs); + signal.addEventListener( + "abort", + () => { + clearTimeout(timer); + resolve(); + }, + { once: true } + ); + }); +} + export const createTaskAwaitTool: ToolFactory = (config: ToolConfiguration) => { return tool({ description: TOOL_DEFINITIONS.task_await.description, @@ -90,8 +207,10 @@ export const createTaskAwaitTool: ToolFactory = (config: ToolConfiguration) => { const requestedIds: string[] | null = args.task_ids && args.task_ids.length > 0 ? args.task_ids : null; - const activeDescendantAgentTaskIds = - taskService.listActiveDescendantAgentTaskIds(workspaceId); + const activeDescendantAgentTaskIds = taskService.listActiveDescendantAgentTaskIds( + workspaceId, + { excludeWorkflowTasks: true } + ); const listInScopeBackgroundBashTaskIds = async (): Promise => { if (!config.backgroundProcessManager) { return []; @@ -110,9 +229,26 @@ export const createTaskAwaitTool: ToolFactory = (config: ToolConfiguration) => { return dedupeStrings(bashTaskIds); }; + const listInScopeWorkflowRunIds = async (): Promise => { + if (config.workflowService?.listRuns == null) { + return []; + } + + const workflowRunIds: string[] = []; + const runs = await config.workflowService.listRuns({ workspaceId }); + for (const rawRun of runs) { + const parsed = WorkflowRunRecordSchema.safeParse(rawRun); + if (!parsed.success || !isWorkflowRunAwaitableStatus(parsed.data.status)) { + continue; + } + workflowRunIds.push(parsed.data.id); + } + return dedupeStrings(workflowRunIds); + }; const listInScopeAwaitableTaskIds = async (): Promise => { const awaitableTaskIds = [...activeDescendantAgentTaskIds]; awaitableTaskIds.push(...(await listInScopeBackgroundBashTaskIds())); + awaitableTaskIds.push(...(await listInScopeWorkflowRunIds())); return dedupeStrings(awaitableTaskIds); }; let suggestionBashTaskIdsPromise: Promise | undefined; @@ -120,11 +256,18 @@ export const createTaskAwaitTool: ToolFactory = (config: ToolConfiguration) => { suggestionBashTaskIdsPromise ??= listInScopeBackgroundBashTaskIds().catch(() => []); return await suggestionBashTaskIdsPromise; }; + let suggestionWorkflowRunIdsPromise: Promise | undefined; + const getSuggestionWorkflowRunIds = async (): Promise => { + suggestionWorkflowRunIdsPromise ??= listInScopeWorkflowRunIds().catch(() => []); + return await suggestionWorkflowRunIdsPromise; + }; const uniqueTaskIds = requestedIds ? dedupeStrings(requestedIds) : await listInScopeAwaitableTaskIds(); - const agentTaskIds = uniqueTaskIds.filter((taskId) => !taskId.startsWith("bash:")); + const agentTaskIds = uniqueTaskIds.filter( + (taskId) => !taskId.startsWith("bash:") && !isWorkflowRunId(taskId) + ); const bulkFilter = ( taskService as unknown as { filterDescendantAgentTaskIds?: ( @@ -167,6 +310,54 @@ export const createTaskAwaitTool: ToolFactory = (config: ToolConfiguration) => { ? taskService.getAgentTaskStatuses(rejectedAgentTaskIds) : new Map(); + const getWorkflowRun = async (runId: string): Promise => { + if (config.workflowService?.getRun == null) { + throw new Error("workflowService not available for workflow run awaits"); + } + const run = await config.workflowService.getRun({ workspaceId, runId }); + if (run == null) { + return null; + } + return parseWorkflowRun(run); + }; + + const awaitWorkflowRun = async (runId: string, taskSignal: AbortSignal) => { + let run = await getWorkflowRun(runId); + if (run == null) { + return { status: "not_found" as const, taskId: runId }; + } + if (timeoutMs === 0 || isWorkflowRunTerminalStatus(run.status)) { + return buildWorkflowAwaitResult(run); + } + + const deadline = Date.now() + (timeoutMs ?? DEFAULT_TASK_AWAIT_TIMEOUT_MS); + while (!isWorkflowRunTerminalStatus(run.status)) { + if (abortSignal?.aborted) { + return { status: "error" as const, taskId: runId, error: "Interrupted", run }; + } + if (taskSignal.aborted || Date.now() >= deadline) { + return buildWorkflowAwaitResult(run); + } + + const remainingMs = Math.max(1, deadline - Date.now()); + await waitForDelayOrAbort( + Math.min(WORKFLOW_AWAIT_POLL_INTERVAL_MS, remainingMs), + taskSignal + ); + if (taskSignal.aborted) { + return buildWorkflowAwaitResult(run); + } + + const nextRun = await getWorkflowRun(runId); + if (nextRun == null) { + return { status: "not_found" as const, taskId: runId }; + } + run = nextRun; + } + + return buildWorkflowAwaitResult(run); + }; + // task_await resolves once `min_completed` tasks have completed (default 1 = return on the // first completion) rather than always blocking on every awaited task. Each task gets its // own AbortController chained to the tool-call signal so that, once we have enough @@ -240,6 +431,10 @@ export const createTaskAwaitTool: ToolFactory = (config: ToolConfiguration) => { }; } + if (isWorkflowRunId(taskId)) { + return await awaitWorkflowRun(taskId, taskSignal); + } + if (!descendantAgentTaskIdSet.has(taskId)) { const lookup = rejectedAgentTaskStatuses.get(taskId); const activeTaskIds = @@ -248,6 +443,7 @@ export const createTaskAwaitTool: ToolFactory = (config: ToolConfiguration) => { const suggestedTaskIds = dedupeStrings([ ...activeDescendantAgentTaskIds, ...(await getSuggestionBashTaskIds()), + ...(await getSuggestionWorkflowRunIds()), ]); if (suggestedTaskIds.length > 0) { return buildTaskAwaitSequencingError(taskId, suggestedTaskIds); @@ -283,6 +479,7 @@ export const createTaskAwaitTool: ToolFactory = (config: ToolConfiguration) => { status: "completed" as const, taskId, reportMarkdown: report.reportMarkdown, + structuredOutput: report.structuredOutput, title: report.title, ...getAgentTaskElapsedField(taskId), ...(gitFormatPatch ? { artifacts: { gitFormatPatch } } : {}), @@ -309,6 +506,7 @@ export const createTaskAwaitTool: ToolFactory = (config: ToolConfiguration) => { status: "completed" as const, taskId, reportMarkdown: report.reportMarkdown, + structuredOutput: report.structuredOutput, title: report.title, ...getAgentTaskElapsedField(taskId), ...(gitFormatPatch ? { artifacts: { gitFormatPatch } } : {}), diff --git a/src/node/services/tools/workflow_definitions.test.ts b/src/node/services/tools/workflow_definitions.test.ts new file mode 100644 index 0000000000..d26d67bca1 --- /dev/null +++ b/src/node/services/tools/workflow_definitions.test.ts @@ -0,0 +1,74 @@ +/* eslint-disable @typescript-eslint/no-unsafe-assignment, @typescript-eslint/require-await */ +import { describe, expect, mock, test } from "bun:test"; +import type { ToolExecutionOptions } from "ai"; +import { createWorkflowListTool, createWorkflowReadTool } from "./workflow_definitions"; +import { TestTempDir, createTestToolConfig } from "./testHelpers"; + +const mockToolCallOptions: ToolExecutionOptions = { + toolCallId: "test-call-id", + messages: [], +}; + +const descriptor = { + name: "deep-research", + description: "Deep research", + scope: "built-in" as const, + executable: true, +}; + +describe("workflow definition tools", () => { + test("lists available workflows through WorkflowService", async () => { + using tempDir = new TestTempDir("test-workflow-list-tool"); + const listDefinitions = mock(async () => [descriptor]); + const tool = createWorkflowListTool({ + ...createTestToolConfig(tempDir.path, { workspaceId: "workspace-1" }), + trusted: true, + workflowService: { + listDefinitions, + readDefinition: mock(async () => ({ + descriptor, + source: "export default function workflow() { return null; }", + })), + startNamedWorkflow: mock(async () => ({ + runId: "wfr_1", + status: "completed" as const, + result: null, + })), + }, + }); + + const result = await tool.execute!({}, mockToolCallOptions); + + expect(listDefinitions).toHaveBeenCalledWith({ projectTrusted: true }); + expect(result).toEqual({ workflows: [descriptor] }); + }); + + test("reads a workflow source through WorkflowService", async () => { + using tempDir = new TestTempDir("test-workflow-read-tool"); + const readDefinition = mock(async () => ({ + descriptor, + source: "export default function workflow() { return null; }", + })); + const tool = createWorkflowReadTool({ + ...createTestToolConfig(tempDir.path, { workspaceId: "workspace-1" }), + trusted: false, + workflowService: { + listDefinitions: mock(async () => []), + readDefinition, + startNamedWorkflow: mock(async () => ({ + runId: "wfr_1", + status: "completed" as const, + result: null, + })), + }, + }); + + const result = await tool.execute!({ name: "deep-research" }, mockToolCallOptions); + + expect(readDefinition).toHaveBeenCalledWith({ name: "deep-research", projectTrusted: false }); + expect(result).toEqual({ + descriptor, + source: "export default function workflow() { return null; }", + }); + }); +}); diff --git a/src/node/services/tools/workflow_definitions.ts b/src/node/services/tools/workflow_definitions.ts new file mode 100644 index 0000000000..232c01379c --- /dev/null +++ b/src/node/services/tools/workflow_definitions.ts @@ -0,0 +1,47 @@ +import { tool } from "ai"; + +import type { ToolConfiguration, ToolFactory } from "@/common/utils/tools/tools"; +import { + TOOL_DEFINITIONS, + WorkflowListToolResultSchema, + WorkflowReadToolResultSchema, +} from "@/common/utils/tools/toolDefinitions"; +import { parseToolResult } from "./toolUtils"; + +function requireWorkflowService(config: ToolConfiguration, toolName: string) { + if (!config.workflowService) { + throw new Error(`${toolName} requires workflowService`); + } + return config.workflowService; +} + +export const createWorkflowListTool: ToolFactory = (config: ToolConfiguration) => { + return tool({ + description: TOOL_DEFINITIONS.workflow_list.description, + inputSchema: TOOL_DEFINITIONS.workflow_list.schema, + execute: async (): Promise => { + const workflowService = requireWorkflowService(config, "workflow_list"); + const workflows = await workflowService.listDefinitions({ + projectTrusted: config.trusted === true, + }); + + return parseToolResult(WorkflowListToolResultSchema, { workflows }, "workflow_list"); + }, + }); +}; + +export const createWorkflowReadTool: ToolFactory = (config: ToolConfiguration) => { + return tool({ + description: TOOL_DEFINITIONS.workflow_read.description, + inputSchema: TOOL_DEFINITIONS.workflow_read.schema, + execute: async (args): Promise => { + const workflowService = requireWorkflowService(config, "workflow_read"); + const result = await workflowService.readDefinition({ + name: args.name, + projectTrusted: config.trusted === true, + }); + + return parseToolResult(WorkflowReadToolResultSchema, result, "workflow_read"); + }, + }); +}; diff --git a/src/node/services/tools/workflow_run.test.ts b/src/node/services/tools/workflow_run.test.ts new file mode 100644 index 0000000000..95b50933c0 --- /dev/null +++ b/src/node/services/tools/workflow_run.test.ts @@ -0,0 +1,161 @@ +/* eslint-disable @typescript-eslint/await-thenable, @typescript-eslint/no-unsafe-assignment, @typescript-eslint/require-await */ +import { describe, expect, mock, test } from "bun:test"; +import type { ToolExecutionOptions } from "ai"; +import { createWorkflowRunTool } from "./workflow_run"; +import { TestTempDir, createTestToolConfig } from "./testHelpers"; + +const mockToolCallOptions: ToolExecutionOptions = { + toolCallId: "test-call-id", + messages: [], +}; + +describe("workflow_run tool", () => { + test("starts a named workflow through WorkflowService", async () => { + using tempDir = new TestTempDir("test-workflow-run-tool"); + const startNamedWorkflow = mock(async () => ({ + runId: "wfr_123", + status: "completed" as const, + result: { reportMarkdown: "done" }, + })); + const getRun = mock(async () => ({ + id: "wfr_123", + workspaceId: "workspace-1", + definition: { + name: "deep-research", + description: "Deep research", + scope: "built-in" as const, + executable: true, + }, + definitionSource: "export default function workflow() { return null; }", + definitionHash: "sha256:test", + args: { topic: "workflow tools" }, + status: "completed" as const, + createdAt: "2026-05-29T00:00:00.000Z", + updatedAt: "2026-05-29T00:00:01.000Z", + events: [ + { + sequence: 1, + type: "status" as const, + at: "2026-05-29T00:00:00.000Z", + status: "running" as const, + }, + { sequence: 2, type: "phase" as const, at: "2026-05-29T00:00:00.000Z", name: "scope" }, + { + sequence: 3, + type: "result" as const, + at: "2026-05-29T00:00:01.000Z", + result: { reportMarkdown: "done" }, + }, + { + sequence: 4, + type: "status" as const, + at: "2026-05-29T00:00:01.000Z", + status: "completed" as const, + }, + ], + steps: [], + })); + const abortController = new AbortController(); + const tool = createWorkflowRunTool({ + ...createTestToolConfig(tempDir.path, { workspaceId: "workspace-1" }), + trusted: true, + workflowService: { + listDefinitions: mock(async () => []), + readDefinition: mock(async () => ({ + descriptor: { + name: "deep-research", + description: "Deep research", + scope: "built-in", + executable: true, + }, + source: "export default function workflow() { return null; }", + })), + startNamedWorkflow, + getRun, + }, + }); + + const result = await tool.execute!( + { name: "deep-research", args: { topic: "workflow tools" }, run_in_background: false }, + { ...mockToolCallOptions, abortSignal: abortController.signal } + ); + + expect(startNamedWorkflow).toHaveBeenCalledWith({ + name: "deep-research", + workspaceId: "workspace-1", + projectTrusted: true, + args: { topic: "workflow tools" }, + abortSignal: abortController.signal, + }); + expect(getRun).toHaveBeenCalledWith({ workspaceId: "workspace-1", runId: "wfr_123" }); + expect(result).toEqual({ + status: "completed", + runId: "wfr_123", + result: { reportMarkdown: "done" }, + run: expect.objectContaining({ + id: "wfr_123", + status: "completed", + events: expect.arrayContaining([expect.objectContaining({ type: "phase", name: "scope" })]), + }), + }); + }); + + test("starts a workflow in background mode", async () => { + using tempDir = new TestTempDir("test-workflow-run-tool-background"); + const startNamedWorkflow = mock(async () => { + throw new Error("foreground start should not be used"); + }); + const startNamedWorkflowInBackground = mock(async () => ({ + runId: "wfr_background", + status: "running" as const, + result: null, + })); + const getRun = mock(async () => null); + const tool = createWorkflowRunTool({ + ...createTestToolConfig(tempDir.path, { workspaceId: "workspace-1" }), + trusted: false, + workflowService: { + listDefinitions: mock(async () => []), + readDefinition: mock(async () => ({ + descriptor: { + name: "deep-research", + description: "Deep research", + scope: "built-in", + executable: true, + }, + source: "export default function workflow() { return null; }", + })), + startNamedWorkflow, + startNamedWorkflowInBackground, + getRun, + }, + }); + + const result = await tool.execute!( + { name: "deep-research", args: { topic: "workflow tools" }, run_in_background: true }, + mockToolCallOptions + ); + + expect(startNamedWorkflowInBackground).toHaveBeenCalledWith({ + name: "deep-research", + workspaceId: "workspace-1", + projectTrusted: false, + args: { topic: "workflow tools" }, + }); + expect(startNamedWorkflow).not.toHaveBeenCalled(); + expect(result).toEqual({ status: "running", runId: "wfr_background", result: null }); + }); + + test("requires the workflow service", async () => { + using tempDir = new TestTempDir("test-workflow-run-tool-missing"); + const tool = createWorkflowRunTool({ + ...createTestToolConfig(tempDir.path, { workspaceId: "workspace-1" }), + }); + + await expect( + Promise.resolve( + tool.execute!({ name: "demo", args: {}, run_in_background: false }, mockToolCallOptions) + ) + ).rejects.toThrow(/workflowService/); + }); +}); diff --git a/src/node/services/tools/workflow_run.ts b/src/node/services/tools/workflow_run.ts new file mode 100644 index 0000000000..c96d4793d2 --- /dev/null +++ b/src/node/services/tools/workflow_run.ts @@ -0,0 +1,62 @@ +import { tool } from "ai"; + +import type { ToolConfiguration, ToolFactory } from "@/common/utils/tools/tools"; +import { + WorkflowRunToolResultSchema, + TOOL_DEFINITIONS, +} from "@/common/utils/tools/toolDefinitions"; +import { parseToolResult, requireWorkspaceId } from "./toolUtils"; + +function requireWorkflowService(config: ToolConfiguration) { + if (!config.workflowService) { + throw new Error("workflow_run requires workflowService"); + } + return config.workflowService; +} + +function requireBackgroundWorkflowStart( + workflowService: NonNullable +) { + if (workflowService.startNamedWorkflowInBackground == null) { + throw new Error("workflow_run background mode requires startNamedWorkflowInBackground"); + } + return workflowService.startNamedWorkflowInBackground.bind(workflowService); +} + +export const createWorkflowRunTool: ToolFactory = (config: ToolConfiguration) => { + return tool({ + description: TOOL_DEFINITIONS.workflow_run.description, + inputSchema: TOOL_DEFINITIONS.workflow_run.schema, + execute: async (args, options): Promise => { + const workspaceId = requireWorkspaceId(config, "workflow_run"); + const workflowService = requireWorkflowService(config); + + const startInput = { + name: args.name, + workspaceId, + projectTrusted: config.trusted === true, + args: args.args ?? {}, + }; + const result = + args.run_in_background === true + ? await requireBackgroundWorkflowStart(workflowService)(startInput) + : await workflowService.startNamedWorkflow({ + ...startInput, + ...(options.abortSignal != null ? { abortSignal: options.abortSignal } : {}), + }); + + const run = await workflowService.getRun?.({ workspaceId, runId: result.runId }); + + return parseToolResult( + WorkflowRunToolResultSchema, + { + status: result.status, + runId: result.runId, + result: result.result, + ...(run != null ? { run } : {}), + }, + "workflow_run" + ); + }, + }); +}; diff --git a/src/node/services/workflows/WorkflowDefinitionStore.test.ts b/src/node/services/workflows/WorkflowDefinitionStore.test.ts new file mode 100644 index 0000000000..522e3ca8da --- /dev/null +++ b/src/node/services/workflows/WorkflowDefinitionStore.test.ts @@ -0,0 +1,287 @@ +import * as fs from "node:fs/promises"; +import * as path from "node:path"; + +import { describe, expect, test } from "bun:test"; +import { RUNTIME_MODE } from "@/common/types/runtime"; +import { DisposableTempDir } from "@/node/services/tempDir"; +import { TrueRemotePathMappedRuntime } from "@/node/services/tools/testHelpers"; +import { + shouldUseRuntimeWorkflowProjectIO, + WorkflowDefinitionStore, +} from "./WorkflowDefinitionStore"; + +async function writeWorkflow( + root: string, + name: string, + description: string, + body = "return args;" +) { + await fs.mkdir(root, { recursive: true }); + await fs.writeFile( + path.join(root, `${name}.js`), + `// description: ${description}\nexport default async function workflow({ args }) { ${body} }\n`, + "utf-8" + ); +} + +describe("WorkflowDefinitionStore", () => { + test("uses runtime project I/O only when workspace paths are runtime-owned", () => { + expect(shouldUseRuntimeWorkflowProjectIO(RUNTIME_MODE.LOCAL)).toBe(false); + expect(shouldUseRuntimeWorkflowProjectIO(RUNTIME_MODE.WORKTREE)).toBe(false); + expect(shouldUseRuntimeWorkflowProjectIO(RUNTIME_MODE.DEVCONTAINER)).toBe(false); + expect(shouldUseRuntimeWorkflowProjectIO(RUNTIME_MODE.SSH)).toBe(true); + expect(shouldUseRuntimeWorkflowProjectIO(RUNTIME_MODE.DOCKER)).toBe(true); + }); + + test("discovers workflows by project, global, then built-in precedence when trusted", async () => { + using tmp = new DisposableTempDir("workflow-definitions"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow(projectRoot, "demo", "Project demo"); + await writeWorkflow(globalRoot, "demo", "Global demo"); + await writeWorkflow(globalRoot, "global-only", "Global only"); + + const store = new WorkflowDefinitionStore({ + projectRoot, + globalRoot, + builtIns: [ + { name: "demo", description: "Built-in demo", source: "export default () => null;" }, + { + name: "deep-research", + description: "Built-in research", + source: "export default () => null;", + }, + ], + }); + + const definitions = await store.listDefinitions({ projectTrusted: true }); + + expect(definitions.map((definition) => [definition.name, definition.scope])).toEqual([ + ["deep-research", "built-in"], + ["demo", "project"], + ["global-only", "global"], + ]); + expect(definitions.find((definition) => definition.name === "demo")?.description).toBe( + "Project demo" + ); + }); + + test("omits project-local workflows when the project is not trusted", async () => { + using tmp = new DisposableTempDir("workflow-definitions"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow(projectRoot, "demo", "Project demo"); + await writeWorkflow(globalRoot, "demo", "Global demo"); + + const store = new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }); + + const definitions = await store.listDefinitions({ projectTrusted: false }); + + expect(definitions).toEqual([ + { + name: "demo", + description: "Global demo", + scope: "global", + sourcePath: path.join(globalRoot, "demo.js"), + executable: true, + }, + ]); + }); + + test("reads the selected reusable definition source", async () => { + using tmp = new DisposableTempDir("workflow-definitions"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow(projectRoot, "demo", "Project demo", "return { project: true };"); + + const store = new WorkflowDefinitionStore({ + projectRoot, + globalRoot, + builtIns: [ + { + name: "scratch-example", + description: "Built-in fallback", + source: "export default () => null;", + }, + ], + }); + + const definition = await store.readDefinition("demo", { projectTrusted: true }); + const discovered = await store.listDefinitions({ projectTrusted: true }); + + expect(definition.source).toContain("project: true"); + expect(definition.descriptor.scope).toBe("project"); + expect(discovered.every((candidate) => candidate.scope !== "scratch")).toBe(true); + }); + + test("discovers trusted workspace scratch workflows before reusable definitions and writes .gitignore", async () => { + using tmp = new DisposableTempDir("workflow-definitions"); + const workspaceRoot = path.join(tmp.path, "project"); + const scratchRoot = path.join(workspaceRoot, ".mux", "workflows", ".scratch"); + const projectRoot = path.join(workspaceRoot, ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow(globalRoot, "scratch-demo", "Global fallback"); + await writeWorkflow( + scratchRoot, + "scratch-demo", + "Workspace scratch demo", + "return { reportMarkdown: 'scratch' };" + ); + const store = new WorkflowDefinitionStore({ + scratchRoot, + projectRoot, + globalRoot, + builtIns: [ + { + name: "scratch-demo", + description: "Built-in fallback", + source: "export default () => null;", + }, + ], + }); + + const definitions = await store.listDefinitions({ projectTrusted: true }); + const definition = await store.readDefinition("scratch-demo", { projectTrusted: true }); + const gitignore = await fs.readFile(path.join(scratchRoot, ".gitignore"), "utf-8"); + + expect(definitions).toEqual([ + { + name: "scratch-demo", + description: "Workspace scratch demo", + scope: "scratch", + sourcePath: path.join(scratchRoot, "scratch-demo.js"), + executable: true, + }, + ]); + expect(definition.descriptor.scope).toBe("scratch"); + expect(definition.source).toContain("// description: Workspace scratch demo"); + expect(definition.source).toContain("reportMarkdown: 'scratch'"); + expect(gitignore).toBe("*\n!.gitignore\n"); + }); + + test("omits workspace scratch workflows when the project is not trusted", async () => { + using tmp = new DisposableTempDir("workflow-definitions"); + const workspaceRoot = path.join(tmp.path, "project"); + const scratchRoot = path.join(workspaceRoot, ".mux", "workflows", ".scratch"); + const projectRoot = path.join(workspaceRoot, ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow(globalRoot, "scratch-demo", "Global fallback"); + await writeWorkflow(scratchRoot, "scratch-demo", "Untrusted scratch demo"); + const store = new WorkflowDefinitionStore({ + scratchRoot, + projectRoot, + globalRoot, + builtIns: [], + }); + + const definitions = await store.listDefinitions({ projectTrusted: false }); + const definition = await store.readDefinition("scratch-demo", { projectTrusted: false }); + + expect(definitions).toEqual([ + { + name: "scratch-demo", + description: "Global fallback", + scope: "global", + sourcePath: path.join(globalRoot, "scratch-demo.js"), + executable: true, + }, + ]); + expect(definition.descriptor.scope).toBe("global"); + }); + + test("uses runtime I/O for project workflow discovery and promotion", async () => { + using tmp = new DisposableTempDir("workflow-definitions"); + const remoteBase = "/remote-workspaces"; + const workspacePath = path.posix.join(remoteBase, "project", "feature"); + const runtime = new TrueRemotePathMappedRuntime(tmp.path, remoteBase); + const projectRoot = runtime.normalizePath(".mux/workflows", workspacePath); + const scratchRoot = runtime.normalizePath(".mux/workflows/.scratch", workspacePath); + const localWorkflowRoot = path.join(tmp.path, "project", "feature", ".mux", "workflows"); + const localScratchRoot = path.join(localWorkflowRoot, ".scratch"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow( + localWorkflowRoot, + "remote-demo", + "Remote project demo", + "return { remote: true };" + ); + await writeWorkflow( + localScratchRoot, + "scratch-remote", + "Remote scratch demo", + "return { scratch: true };" + ); + + const store = new WorkflowDefinitionStore({ + projectRoot, + scratchRoot, + projectRuntime: runtime, + projectCwd: workspacePath, + globalRoot, + builtIns: [], + }); + + const definition = await store.readDefinition("remote-demo", { projectTrusted: true }); + const scratchDefinition = await store.readDefinition("scratch-remote", { + projectTrusted: true, + }); + const promoted = await store.promoteDefinition({ + name: "promoted-demo", + description: "Promoted over runtime", + source: "export default function workflow() { return { reportMarkdown: 'ok' }; }", + location: "project", + overwrite: false, + projectTrusted: true, + }); + + expect(definition.descriptor.sourcePath).toBe(`${projectRoot}/remote-demo.js`); + expect(definition.source).toContain("remote: true"); + expect(scratchDefinition.descriptor.sourcePath).toBe(`${scratchRoot}/scratch-remote.js`); + expect(scratchDefinition.source).toContain("scratch: true"); + const scratchGitignore = await fs.readFile(path.join(localScratchRoot, ".gitignore"), "utf-8"); + expect(scratchGitignore).toBe("*\n!.gitignore\n"); + expect(promoted.sourcePath).toBe(`${projectRoot}/promoted-demo.js`); + const promotedSource = await fs.readFile( + path.join(localWorkflowRoot, "promoted-demo.js"), + "utf-8" + ); + expect(promotedSource).toContain("// description: Promoted over runtime"); + + let duplicateError: unknown; + try { + await store.promoteDefinition({ + name: "promoted-demo", + description: "Duplicate", + source: "export default function workflow() { return null; }", + location: "project", + overwrite: false, + projectTrusted: true, + }); + } catch (error) { + duplicateError = error; + } + if (!(duplicateError instanceof Error)) { + throw new Error("Expected duplicate promotion to fail"); + } + expect(duplicateError.message).toMatch(/already exists/); + }); + + test("skips invalid filenames and unreadable descriptors", async () => { + using tmp = new DisposableTempDir("workflow-definitions"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow(projectRoot, "valid-name", "Valid workflow"); + await fs.writeFile(path.join(projectRoot, "BadName.js"), "// description: bad\n", "utf-8"); + await fs.writeFile( + path.join(projectRoot, "missing-description.js"), + "export default () => null;", + "utf-8" + ); + + const store = new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }); + + const definitions = await store.listDefinitions({ projectTrusted: true }); + + expect(definitions.map((definition) => definition.name)).toEqual(["valid-name"]); + }); +}); diff --git a/src/node/services/workflows/WorkflowDefinitionStore.ts b/src/node/services/workflows/WorkflowDefinitionStore.ts new file mode 100644 index 0000000000..c811b118f5 --- /dev/null +++ b/src/node/services/workflows/WorkflowDefinitionStore.ts @@ -0,0 +1,465 @@ +import * as fs from "node:fs/promises"; +import * as path from "node:path"; + +import { WorkflowDefinitionDescriptorSchema, WorkflowNameSchema } from "@/common/orpc/schemas"; +import { RUNTIME_MODE, type RuntimeMode } from "@/common/types/runtime"; +import type { WorkflowDefinitionDescriptor, WorkflowName } from "@/common/types/workflow"; +import assert from "@/common/utils/assert"; +import { getErrorMessage } from "@/common/utils/errors"; +import type { Runtime } from "@/node/runtime/Runtime"; +import { log } from "@/node/services/log"; +import { quoteRuntimeProbePath } from "@/node/services/tools/runtimePathShellQuote"; +import { execBuffered, readFileString, writeFileString } from "@/node/utils/runtime/helpers"; +import { + BUILT_IN_WORKFLOW_DEFINITIONS, + type BuiltInWorkflowDefinition, +} from "./builtInWorkflowDefinitions"; + +export interface WorkflowDefinitionStoreOptions { + projectRoot: string; + globalRoot: string; + scratchRoot?: string; + projectRuntime?: Runtime; + projectCwd?: string; + builtIns?: readonly BuiltInWorkflowDefinition[]; +} + +export function shouldUseRuntimeWorkflowProjectIO(runtimeType: RuntimeMode): boolean { + return runtimeType === RUNTIME_MODE.SSH || runtimeType === RUNTIME_MODE.DOCKER; +} + +export type WorkflowPromotionLocation = "project" | "global"; + +export interface PromoteWorkflowDefinitionInput { + name: string; + description: string; + source: string; + location: WorkflowPromotionLocation; + overwrite: boolean; + projectTrusted: boolean; +} + +export interface WorkflowDefinitionReadResult { + descriptor: WorkflowDefinitionDescriptor; + source: string; +} + +interface ScannedWorkflowDefinition { + descriptor: WorkflowDefinitionDescriptor; + source: string; +} + +const DESCRIPTION_PREFIX = "// description:"; +// Workspace scratch workflows are edited through normal file tools, so keep generated drafts out +// of the user's git status while leaving the ignore rule itself visible and reviewable. +export const WORKFLOW_SCRATCH_GITIGNORE_CONTENT = "*\n!.gitignore\n"; + +function parseWorkflowDescription(source: string): string | null { + const firstMeaningfulLine = source + .split("\n") + .map((line) => line.trim()) + .find((line) => line.length > 0); + + if (!firstMeaningfulLine?.startsWith(DESCRIPTION_PREFIX)) { + return null; + } + + const description = firstMeaningfulLine.slice(DESCRIPTION_PREFIX.length).trim(); + return description.length > 0 ? description : null; +} + +function descriptorForFile(args: { + name: WorkflowName; + description: string; + scope: "project" | "global" | "scratch"; + sourcePath: string; +}): WorkflowDefinitionDescriptor | null { + const descriptor = { + name: args.name, + description: args.description, + scope: args.scope, + sourcePath: args.sourcePath, + executable: true, + } satisfies WorkflowDefinitionDescriptor; + + const parsed = WorkflowDefinitionDescriptorSchema.safeParse(descriptor); + if (!parsed.success) { + log.warn(`Invalid workflow definition descriptor '${args.name}': ${parsed.error.message}`); + return null; + } + + return parsed.data; +} + +async function scanDirectory( + root: string, + scope: "project" | "global" | "scratch" +): Promise { + let entries: string[]; + try { + entries = await fs.readdir(root); + } catch { + return []; + } + + const definitions: ScannedWorkflowDefinition[] = []; + for (const entry of entries) { + if (!entry.endsWith(".js")) { + continue; + } + + const rawName = entry.slice(0, -".js".length); + const nameResult = WorkflowNameSchema.safeParse(rawName); + if (!nameResult.success) { + log.warn(`Skipping invalid workflow filename '${entry}' in ${root}`); + continue; + } + + const sourcePath = path.join(root, entry); + let source: string; + try { + const stat = await fs.stat(sourcePath); + if (!stat.isFile()) { + continue; + } + source = await fs.readFile(sourcePath, "utf-8"); + } catch (error) { + log.warn(`Skipping unreadable workflow '${sourcePath}': ${getErrorMessage(error)}`); + continue; + } + + const description = parseWorkflowDescription(source); + if (description == null) { + log.warn(`Skipping workflow '${sourcePath}' because it is missing a description header`); + continue; + } + + const descriptor = descriptorForFile({ + name: nameResult.data, + description, + scope, + sourcePath, + }); + if (descriptor == null) { + continue; + } + + definitions.push({ descriptor, source }); + } + + return definitions; +} + +async function listRuntimeWorkflowFilenames( + runtime: Runtime, + root: string, + cwd: string +): Promise { + const quotedRoot = quoteRuntimeProbePath(root); + const result = await execBuffered( + runtime, + `if [ ! -d ${quotedRoot} ]; then exit 0; fi +for file in ${quotedRoot}/*.js; do + [ -f "$file" ] || continue + basename "$file" +done`, + { cwd, timeout: 10 } + ); + if (result.exitCode !== 0) { + const details = result.stderr.trim() || result.stdout.trim() || `exit code ${result.exitCode}`; + throw new Error(`Runtime workflow discovery failed: ${details}`); + } + return result.stdout + .split("\n") + .map((entry) => entry.trim()) + .filter((entry) => entry.length > 0); +} + +async function scanRuntimeDirectory( + runtime: Runtime, + root: string, + cwd: string, + scope: "project" | "scratch" +): Promise { + let entries: string[]; + try { + entries = await listRuntimeWorkflowFilenames(runtime, root, cwd); + } catch (error) { + log.warn(`Skipping runtime workflow root '${root}': ${getErrorMessage(error)}`); + return []; + } + + const definitions: ScannedWorkflowDefinition[] = []; + for (const entry of entries) { + if (!entry.endsWith(".js")) { + continue; + } + + const rawName = entry.slice(0, -".js".length); + const nameResult = WorkflowNameSchema.safeParse(rawName); + if (!nameResult.success) { + log.warn(`Skipping invalid workflow filename '${entry}' in ${root}`); + continue; + } + + const sourcePath = runtime.normalizePath(entry, root); + let source: string; + try { + source = await readFileString(runtime, sourcePath); + } catch (error) { + log.warn(`Skipping unreadable runtime workflow '${sourcePath}': ${getErrorMessage(error)}`); + continue; + } + + const description = parseWorkflowDescription(source); + if (description == null) { + log.warn(`Skipping workflow '${sourcePath}' because it is missing a description header`); + continue; + } + + const descriptor = descriptorForFile({ + name: nameResult.data, + description, + scope, + sourcePath, + }); + if (descriptor == null) { + continue; + } + + definitions.push({ descriptor, source }); + } + + return definitions; +} + +async function runtimePathExists( + runtime: Runtime, + targetPath: string, + cwd: string +): Promise { + const result = await execBuffered(runtime, `[ -e ${quoteRuntimeProbePath(targetPath)} ]`, { + cwd, + timeout: 5, + }); + if (result.exitCode === 0) { + return true; + } + if (result.exitCode === 1) { + return false; + } + const details = result.stderr.trim() || result.stdout.trim() || `exit code ${result.exitCode}`; + throw new Error(`Runtime workflow path probe failed: ${details}`); +} + +async function ensureLocalScratchGitignore(scratchRoot: string): Promise { + await fs.mkdir(scratchRoot, { recursive: true }); + const gitignorePath = path.join(scratchRoot, ".gitignore"); + await fs.writeFile(gitignorePath, WORKFLOW_SCRATCH_GITIGNORE_CONTENT, "utf-8"); +} + +async function ensureRuntimeScratchGitignore(runtime: Runtime, scratchRoot: string): Promise { + await runtime.ensureDir(scratchRoot); + await writeFileString( + runtime, + runtime.normalizePath(".gitignore", scratchRoot), + WORKFLOW_SCRATCH_GITIGNORE_CONTENT + ); +} + +function readBuiltInDefinitions( + builtIns: readonly BuiltInWorkflowDefinition[] +): ScannedWorkflowDefinition[] { + const definitions: ScannedWorkflowDefinition[] = []; + for (const builtIn of builtIns) { + const descriptor = WorkflowDefinitionDescriptorSchema.parse({ + name: builtIn.name, + description: builtIn.description, + scope: "built-in", + executable: true, + }); + definitions.push({ descriptor, source: builtIn.source }); + } + return definitions; +} + +function normalizePromotionDescription(description: string): string { + const normalized = description.replace(/\s+/gu, " ").trim(); + assert(normalized.length > 0, "Workflow promotion description is required"); + return normalized; +} + +function withDescriptionHeader(source: string, description: string): string { + const lines = source.replace(/^\uFEFF/u, "").split("\n"); + const firstMeaningfulIndex = lines.findIndex((line) => line.trim().length > 0); + if ( + firstMeaningfulIndex >= 0 && + lines[firstMeaningfulIndex]?.trim().startsWith(DESCRIPTION_PREFIX) + ) { + lines.splice(firstMeaningfulIndex, 1, `${DESCRIPTION_PREFIX} ${description}`); + return lines.join("\n"); + } + return `${DESCRIPTION_PREFIX} ${description}\n${source}`; +} + +export class WorkflowDefinitionStore { + private readonly projectRoot: string; + private readonly globalRoot: string; + private readonly scratchRoot?: string; + private readonly projectRuntime?: Runtime; + private readonly projectCwd?: string; + private readonly builtIns: readonly BuiltInWorkflowDefinition[]; + + constructor(options: WorkflowDefinitionStoreOptions) { + assert(options.projectRoot.length > 0, "WorkflowDefinitionStore: projectRoot is required"); + assert(options.globalRoot.length > 0, "WorkflowDefinitionStore: globalRoot is required"); + assert( + options.projectRuntime == null || + (options.projectCwd != null && options.projectCwd.length > 0), + "WorkflowDefinitionStore: projectCwd is required with projectRuntime" + ); + + this.projectRoot = options.projectRoot; + this.globalRoot = options.globalRoot; + this.scratchRoot = options.scratchRoot; + this.projectRuntime = options.projectRuntime; + this.projectCwd = options.projectCwd; + this.builtIns = options.builtIns ?? BUILT_IN_WORKFLOW_DEFINITIONS; + } + + async listDefinitions(options: { + projectTrusted: boolean; + }): Promise { + const byName = await this.collectDefinitions(options); + return Array.from(byName.values()) + .map((definition) => definition.descriptor) + .sort((a, b) => a.name.localeCompare(b.name)); + } + + async readDefinition( + name: string, + options: { projectTrusted: boolean } + ): Promise { + const parsedName = WorkflowNameSchema.parse(name); + const byName = await this.collectDefinitions(options); + const definition = byName.get(parsedName); + if (definition == null) { + throw new Error(`Workflow definition not found: ${parsedName}`); + } + return { + descriptor: definition.descriptor, + source: definition.source, + }; + } + + async promoteDefinition( + input: PromoteWorkflowDefinitionInput + ): Promise { + const name = WorkflowNameSchema.parse(input.name); + const description = normalizePromotionDescription(input.description); + assert( + input.source.trim().length > 0, + "WorkflowDefinitionStore.promoteDefinition: source is required" + ); + if (input.location === "project" && !input.projectTrusted) { + throw new Error("Project trust is required to promote project-local workflows"); + } + + const root = input.location === "project" ? this.projectRoot : this.globalRoot; + const sourcePath = + this.projectRuntime?.normalizePath(`${name}.js`, root) ?? path.join(root, `${name}.js`); + const promotedSource = withDescriptionHeader(input.source, description); + if (input.location === "project" && this.projectRuntime != null) { + assert( + this.projectCwd != null, + "WorkflowDefinitionStore.promoteDefinition: projectCwd missing" + ); + await this.projectRuntime.ensureDir(root); + if ( + !input.overwrite && + (await runtimePathExists(this.projectRuntime, sourcePath, this.projectCwd)) + ) { + throw new Error(`Workflow definition already exists: ${sourcePath}`); + } + await writeFileString(this.projectRuntime, sourcePath, promotedSource); + } else { + await fs.mkdir(root, { recursive: true }); + await fs.writeFile(sourcePath, promotedSource, { + encoding: "utf-8", + flag: input.overwrite ? "w" : "wx", + }); + } + + const descriptor = descriptorForFile({ + name, + description, + scope: input.location, + sourcePath, + }); + assert( + descriptor != null, + "WorkflowDefinitionStore.promoteDefinition: descriptor must be valid" + ); + return descriptor; + } + + private async collectDefinitions(options: { + projectTrusted: boolean; + }): Promise> { + const byName = new Map(); + const sources: ScannedWorkflowDefinition[][] = []; + + if (this.scratchRoot != null && options.projectTrusted) { + // Scratch workflows live under the workspace checkout, so treat them like project-local + // code for trust gating rather than exposing repo-controlled files from untrusted projects. + if (this.projectRuntime != null) { + assert( + this.projectCwd != null, + "WorkflowDefinitionStore.collectDefinitions: projectCwd missing" + ); + await ensureRuntimeScratchGitignore(this.projectRuntime, this.scratchRoot); + sources.push( + await scanRuntimeDirectory( + this.projectRuntime, + this.scratchRoot, + this.projectCwd, + "scratch" + ) + ); + } else { + await ensureLocalScratchGitignore(this.scratchRoot); + sources.push(await scanDirectory(this.scratchRoot, "scratch")); + } + } + if (options.projectTrusted) { + if (this.projectRuntime != null) { + assert( + this.projectCwd != null, + "WorkflowDefinitionStore.collectDefinitions: projectCwd missing" + ); + sources.push( + await scanRuntimeDirectory( + this.projectRuntime, + this.projectRoot, + this.projectCwd, + "project" + ) + ); + } else { + sources.push(await scanDirectory(this.projectRoot, "project")); + } + } + sources.push(await scanDirectory(this.globalRoot, "global")); + sources.push(readBuiltInDefinitions(this.builtIns)); + + for (const source of sources) { + for (const definition of source) { + if (!byName.has(definition.descriptor.name)) { + byName.set(definition.descriptor.name, definition); + } + } + } + + return byName; + } +} diff --git a/src/node/services/workflows/WorkflowRunStore.test.ts b/src/node/services/workflows/WorkflowRunStore.test.ts new file mode 100644 index 0000000000..cba8841f8b --- /dev/null +++ b/src/node/services/workflows/WorkflowRunStore.test.ts @@ -0,0 +1,327 @@ +/* eslint-disable @typescript-eslint/await-thenable */ +import * as fs from "node:fs/promises"; +import * as path from "node:path"; + +import { describe, expect, test } from "bun:test"; +import { DisposableTempDir } from "@/node/services/tempDir"; +import { WorkflowRunStore } from "./WorkflowRunStore"; + +const definition = { + name: "deep-research", + description: "Research a topic", + scope: "built-in" as const, + executable: true, +}; + +const source = "export default async function workflow() { return 'ok'; }\n"; + +async function createStore(sessionDir: string, staleLeaseMs = 10) { + const store = new WorkflowRunStore({ sessionDir, staleLeaseMs }); + await store.createRun({ + id: "wfr_123", + workspaceId: "workspace-1", + definition, + definitionSource: source, + args: { topic: "durable runs" }, + now: "2026-05-29T00:00:00.000Z", + }); + return store; +} + +describe("WorkflowRunStore", () => { + test("persists captured definition source and reloads run state", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + + await store.appendEvent("wfr_123", { + sequence: 1, + type: "status", + at: "2026-05-29T00:00:01.000Z", + status: "running", + }); + + const reloadedStore = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + const run = await reloadedStore.getRun("wfr_123"); + + expect(run.definitionSource).toBe(source); + expect(run.definitionHash).toMatch(/^sha256:/); + expect(run.events.map((event) => event.sequence)).toEqual([1]); + }); + + test("rejects invalid run ids before resolving run file paths", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = new WorkflowRunStore({ sessionDir: tmp.path }); + + await expect(store.getRun("../wfr_escape")).rejects.toThrow(/runId must match/); + await expect(store.acquireLease("wfr_../escape", "runner-a", Date.now())).rejects.toThrow( + /runId must match/ + ); + await expect( + store.createRun({ + id: "task_123", + workspaceId: "workspace-1", + definition, + definitionSource: source, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }) + ).rejects.toThrow(/runId must match/); + }); + + test("ignores malformed journal lines while preserving valid events and steps", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + + await store.appendEvent("wfr_123", { + sequence: 1, + type: "phase", + at: "2026-05-29T00:00:01.000Z", + name: "scope", + }); + await store.recordStepCompleted("wfr_123", { + stepId: "scope-task", + inputHash: "input:1", + taskId: "task_1", + result: { reportMarkdown: "done", structuredOutput: { ok: true } }, + startedAt: "2026-05-29T00:00:01.000Z", + completedAt: "2026-05-29T00:00:02.000Z", + }); + + await fs.appendFile(path.join(tmp.path, "workflows", "wfr_123", "events.jsonl"), "not json\n"); + await fs.appendFile( + path.join(tmp.path, "workflows", "wfr_123", "steps.jsonl"), + '{"bad":true}\n' + ); + + const run = await store.getRun("wfr_123"); + const completed = await store.getCompletedStep("wfr_123", "scope-task", "input:1"); + + expect(run.events).toHaveLength(1); + expect(run.steps).toHaveLength(1); + expect(completed?.result?.structuredOutput).toEqual({ ok: true }); + }); + + test("rejects duplicate or out-of-order event sequence numbers", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + + await store.appendEvent("wfr_123", { + sequence: 1, + type: "log", + at: "2026-05-29T00:00:01.000Z", + message: "first", + }); + + await expect( + store.appendEvent("wfr_123", { + sequence: 1, + type: "log", + at: "2026-05-29T00:00:02.000Z", + message: "duplicate", + }) + ).rejects.toThrow(/strictly ordered/); + }); + + test("preserves interrupted runs unless explicit resume is allowed", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + + await store.appendStatus("wfr_123", "interrupted", "2026-05-29T00:00:01.000Z"); + + await expect( + store.appendStatus("wfr_123", "running", "2026-05-29T00:00:02.000Z") + ).rejects.toThrow(/interrupted/); + await expect( + store.appendStatus("wfr_123", "completed", "2026-05-29T00:00:02.000Z") + ).rejects.toThrow(/interrupted/); + await expect( + store.appendEvent("wfr_123", { + sequence: 2, + type: "log", + at: "2026-05-29T00:00:02.000Z", + message: "too late", + }) + ).rejects.toThrow(/interrupted/); + await expect( + store.recordStepCompleted("wfr_123", { + stepId: "late-step", + inputHash: "hash:late-step", + taskId: "task_late", + result: { reportMarkdown: "late" }, + startedAt: "2026-05-29T00:00:01.000Z", + completedAt: "2026-05-29T00:00:02.000Z", + }) + ).rejects.toThrow(/interrupted/); + await expect(store.getRun("wfr_123")).resolves.toMatchObject({ status: "interrupted" }); + + await expect( + store.appendStatus("wfr_123", "running", "2026-05-29T00:00:03.000Z", { + allowInterruptedResume: true, + }) + ).resolves.toMatchObject({ status: "running" }); + }); + + test("fences journal and step writes by current lease owner", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + + await expect(store.acquireLease("wfr_123", "runner-a", 1000)).resolves.toBe(true); + await store.appendStatus("wfr_123", "running", "2026-05-29T00:00:01.000Z", { + expectedLeaseOwnerId: "runner-a", + }); + await expect(store.acquireLease("wfr_123", "runner-b", 1012)).resolves.toBe(true); + + await expect( + store.appendStatus("wfr_123", "completed", "2026-05-29T00:00:02.000Z", { + expectedLeaseOwnerId: "runner-a", + }) + ).rejects.toThrow(/lease lost/); + await expect( + store.recordStepCompleted( + "wfr_123", + { + stepId: "read-source", + inputHash: "source:a", + taskId: "task_1", + result: { reportMarkdown: "source summary" }, + startedAt: "2026-05-29T00:00:01.000Z", + completedAt: "2026-05-29T00:00:02.000Z", + }, + { expectedLeaseOwnerId: "runner-a" } + ) + ).rejects.toThrow(/lease lost/); + await expect(store.getRun("wfr_123")).resolves.toMatchObject({ status: "running" }); + }); + + test("replays terminal status from journal when run file is stale", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + await fs.appendFile( + path.join(tmp.path, "workflows", "wfr_123", "events.jsonl"), + `${JSON.stringify({ + sequence: 1, + type: "status", + at: "2026-05-29T00:00:01.000Z", + status: "completed", + })}\n`, + "utf-8" + ); + + await expect(store.getRun("wfr_123")).resolves.toMatchObject({ status: "completed" }); + await expect( + store.appendStatus("wfr_123", "interrupted", "2026-05-29T00:00:02.000Z") + ).rejects.toThrow(/Cannot transition/); + }); + + test("does not overwrite terminal runs with later interrupt status", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + + await store.appendStatus("wfr_123", "running", "2026-05-29T00:00:01.000Z"); + await store.appendStatus("wfr_123", "completed", "2026-05-29T00:00:02.000Z"); + + await expect( + store.appendStatus("wfr_123", "interrupted", "2026-05-29T00:00:03.000Z") + ).rejects.toThrow(/Cannot transition/); + await expect(store.getRun("wfr_123")).resolves.toMatchObject({ status: "completed" }); + }); + + test("reuses completed steps by stable step id and input hash", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + + await store.recordStepStarted("wfr_123", { + stepId: "read-source", + inputHash: "source:a", + taskId: "task_1", + startedAt: "2026-05-29T00:00:01.000Z", + }); + await store.recordStepCompleted("wfr_123", { + stepId: "read-source", + inputHash: "source:a", + taskId: "task_1", + result: { reportMarkdown: "source summary" }, + startedAt: "2026-05-29T00:00:01.000Z", + completedAt: "2026-05-29T00:00:02.000Z", + }); + + await expect(store.getCompletedStep("wfr_123", "read-source", "source:b")).resolves.toBeNull(); + await expect( + store.getCompletedStep("wfr_123", "read-source", "source:a") + ).resolves.toMatchObject({ + status: "completed", + result: { reportMarkdown: "source summary" }, + }); + }); + + test("renews active leases so they are not reclaimed as stale", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + + await expect(store.acquireLease("wfr_123", "runner-a", 1000)).resolves.toBe(true); + await expect(store.renewLease("wfr_123", "runner-a", 1008)).resolves.toBe(true); + await expect(store.acquireLease("wfr_123", "runner-b", 1012)).resolves.toBe(false); + await expect(store.acquireLease("wfr_123", "runner-b", 1019)).resolves.toBe(true); + }); + + test("does not acquire through an active lease mutation lock", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + + await expect(store.acquireLease("wfr_123", "runner-a", 1000)).resolves.toBe(true); + const lockDir = path.join(tmp.path, "workflows", "wfr_123", "lease.json.lock"); + await fs.mkdir(lockDir); + + await expect(store.acquireLease("wfr_123", "runner-b", 1012)).resolves.toBe(false); + + await fs.rm(lockDir, { recursive: true, force: true }); + await expect(store.acquireLease("wfr_123", "runner-b", 1012)).resolves.toBe(true); + }); + + test("serializes renewal with lease ownership changes", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + + await expect(store.acquireLease("wfr_123", "runner-a", 1000)).resolves.toBe(true); + const runDir = path.join(tmp.path, "workflows", "wfr_123"); + const leaseFile = path.join(runDir, "lease.json"); + const lockDir = `${leaseFile}.lock`; + await fs.mkdir(lockDir); + + const renewal = store.renewLease("wfr_123", "runner-a", 1005); + await fs.writeFile(leaseFile, JSON.stringify({ ownerId: "runner-b", acquiredAtMs: 1004 })); + await fs.rm(lockDir, { recursive: true, force: true }); + + await expect(renewal).resolves.toBe(false); + await expect(fs.readFile(leaseFile, "utf-8")).resolves.toContain("runner-b"); + }); + + test("release waits for in-flight lease mutations", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path, 100); + + await expect(store.acquireLease("wfr_123", "runner-a", 1000)).resolves.toBe(true); + const leaseFile = path.join(tmp.path, "workflows", "wfr_123", "lease.json"); + const lockDir = `${leaseFile}.lock`; + await fs.mkdir(lockDir); + + const release = store.releaseLease("wfr_123", "runner-a"); + await new Promise((resolve) => setTimeout(resolve, 5)); + await expect(fs.readFile(leaseFile, "utf-8")).resolves.toContain("runner-a"); + + await fs.rm(lockDir, { recursive: true, force: true }); + await release; + + await expect(store.acquireLease("wfr_123", "runner-b", 1001)).resolves.toBe(true); + }); + + test("prevents concurrent runners while allowing stale lease recovery", async () => { + using tmp = new DisposableTempDir("workflow-runs"); + const store = await createStore(tmp.path); + + await expect(store.acquireLease("wfr_123", "runner-a", 1000)).resolves.toBe(true); + await expect(store.acquireLease("wfr_123", "runner-a", 1001)).resolves.toBe(false); + await expect(store.acquireLease("wfr_123", "runner-b", 1001)).resolves.toBe(false); + await expect(store.acquireLease("wfr_123", "runner-b", 1012)).resolves.toBe(true); + }); +}); diff --git a/src/node/services/workflows/WorkflowRunStore.ts b/src/node/services/workflows/WorkflowRunStore.ts new file mode 100644 index 0000000000..5ccf6ad892 --- /dev/null +++ b/src/node/services/workflows/WorkflowRunStore.ts @@ -0,0 +1,678 @@ +import * as crypto from "node:crypto"; +import type { Dirent } from "node:fs"; +import * as fs from "node:fs/promises"; +import * as path from "node:path"; + +import writeFileAtomic from "write-file-atomic"; + +import { + WorkflowEventSequenceSchema, + WorkflowRunEventSchema, + WorkflowRunIdSchema, + WorkflowRunRecordSchema, + WorkflowStepRecordSchema, +} from "@/common/orpc/schemas"; +import type { + StructuredTaskOutput, + WorkflowDefinitionDescriptor, + WorkflowRunEvent, + WorkflowRunRecord, + WorkflowRunStatus, + WorkflowStepRecord, +} from "@/common/types/workflow"; +import assert from "@/common/utils/assert"; +import { getErrorMessage } from "@/common/utils/errors"; +import { log } from "@/node/services/log"; + +export interface WorkflowRunStoreOptions { + sessionDir: string; + staleLeaseMs?: number; +} + +export interface CreateWorkflowRunInput { + id: string; + workspaceId: string; + definition: WorkflowDefinitionDescriptor; + definitionSource: string; + args: unknown; + now: string; +} + +export interface AppendWorkflowRunEventOptions { + /** + * Only explicit Resume may reopen an interrupted run; stale active runners must preserve the + * interrupt. + */ + allowInterruptedResume?: boolean; + /** Fence a journal/step mutation so only the current lease owner can write it. */ + expectedLeaseOwnerId?: string; +} + +interface LeaseRecord { + ownerId: string; + acquiredAtMs: number; +} + +interface WorkflowStepLookup { + stepId: string; + inputHash: string; +} + +export class WorkflowRunStore { + private readonly sessionDir: string; + private readonly staleLeaseMs: number; + + constructor(options: WorkflowRunStoreOptions) { + assert(options.sessionDir.length > 0, "WorkflowRunStore: sessionDir is required"); + this.sessionDir = options.sessionDir; + this.staleLeaseMs = options.staleLeaseMs ?? 30_000; + } + + async createRun(input: CreateWorkflowRunInput): Promise { + assert(input.id.length > 0, "WorkflowRunStore.createRun: id is required"); + assert(input.workspaceId.length > 0, "WorkflowRunStore.createRun: workspaceId is required"); + assert( + input.definitionSource.length > 0, + "WorkflowRunStore.createRun: definitionSource is required" + ); + + const runDir = this.runDir(input.id); + await fs.mkdir(runDir, { recursive: true }); + await fs.writeFile(path.join(runDir, "definition.js"), input.definitionSource, "utf-8"); + await fs.writeFile(path.join(runDir, "events.jsonl"), "", { flag: "a" }); + await fs.writeFile(path.join(runDir, "steps.jsonl"), "", { flag: "a" }); + + const run = WorkflowRunRecordSchema.parse({ + id: input.id, + workspaceId: input.workspaceId, + definition: input.definition, + definitionSource: input.definitionSource, + definitionHash: hashSource(input.definitionSource), + args: input.args, + status: "pending", + createdAt: input.now, + updatedAt: input.now, + events: [], + steps: [], + }); + + await this.writeRunFile(input.id, run); + return run; + } + + async getRun(runId: string): Promise { + const rawRun = JSON.parse(await fs.readFile(this.runFile(runId), "utf-8")) as unknown; + const partial = WorkflowRunRecordSchema.omit({ events: true, steps: true }).parse(rawRun); + const definitionSource = await fs.readFile( + path.join(this.runDir(runId), "definition.js"), + "utf-8" + ); + const events = await this.readEvents(runId); + const steps = await this.readSteps(runId); + + const latestEvent = events.at(-1); + const status = getRunStatusFromEvents(events) ?? partial.status; + return WorkflowRunRecordSchema.parse({ + ...partial, + definitionSource, + definitionHash: hashSource(definitionSource), + status, + updatedAt: latestEvent?.at ?? partial.updatedAt, + events, + steps, + }); + } + + async listRuns(): Promise { + let entries: Dirent[]; + try { + entries = await fs.readdir(this.workflowsDir(), { withFileTypes: true }); + } catch { + return []; + } + + const runs = await Promise.all( + entries + .filter((entry) => entry.isDirectory()) + .map(async (entry): Promise => { + try { + return await this.getRun(entry.name); + } catch (error) { + log.warn(`Skipping unreadable workflow run '${entry.name}': ${getErrorMessage(error)}`); + return null; + } + }) + ); + + return runs + .filter((run): run is WorkflowRunRecord => run != null) + .sort((a, b) => a.createdAt.localeCompare(b.createdAt)); + } + + async appendEvent( + runId: string, + event: WorkflowRunEvent, + options: AppendWorkflowRunEventOptions = {} + ): Promise { + const lockDir = `${this.eventsFile(runId)}.lock`; + await acquireWorkflowMutationLock( + lockDir, + this.leaseMutationLockStaleMs(), + this.leaseMutationWaitTimeoutMs() + ); + try { + return await this.withExpectedLeaseOwner( + runId, + options.expectedLeaseOwnerId, + async () => await this.appendEventUnlocked(runId, event, options) + ); + } finally { + await fs.rm(lockDir, { recursive: true, force: true }); + } + } + + async appendStatus( + runId: string, + status: WorkflowRunStatus, + at: string, + options: AppendWorkflowRunEventOptions = {} + ): Promise { + const lockDir = `${this.eventsFile(runId)}.lock`; + await acquireWorkflowMutationLock( + lockDir, + this.leaseMutationLockStaleMs(), + this.leaseMutationWaitTimeoutMs() + ); + try { + return await this.withExpectedLeaseOwner(runId, options.expectedLeaseOwnerId, async () => { + const events = await this.readEvents(runId); + return await this.appendEventUnlocked( + runId, + { + sequence: (events.at(-1)?.sequence ?? 0) + 1, + type: "status", + at, + status, + }, + options + ); + }); + } finally { + await fs.rm(lockDir, { recursive: true, force: true }); + } + } + + async recordStepStarted( + runId: string, + input: { + stepId: string; + inputHash: string; + taskId?: string; + startedAt: string; + }, + options: AppendWorkflowRunEventOptions = {} + ): Promise { + await this.appendStepRecord( + runId, + { + stepId: input.stepId, + inputHash: input.inputHash, + taskId: input.taskId, + startedAt: input.startedAt, + status: "started", + }, + options + ); + } + + async recordStepCompleted( + runId: string, + input: { + stepId: string; + inputHash: string; + taskId?: string; + result: StructuredTaskOutput; + startedAt: string; + completedAt: string; + }, + options: AppendWorkflowRunEventOptions = {} + ): Promise { + await this.appendStepRecord( + runId, + { + stepId: input.stepId, + inputHash: input.inputHash, + taskId: input.taskId, + result: input.result, + startedAt: input.startedAt, + completedAt: input.completedAt, + status: "completed", + }, + options + ); + } + + async recordStepFailed( + runId: string, + input: { + stepId: string; + inputHash: string; + taskId?: string; + error: string; + startedAt: string; + completedAt: string; + }, + options: AppendWorkflowRunEventOptions = {} + ): Promise { + await this.appendStepRecord( + runId, + { + stepId: input.stepId, + inputHash: input.inputHash, + taskId: input.taskId, + error: input.error, + startedAt: input.startedAt, + completedAt: input.completedAt, + status: "failed", + }, + options + ); + } + + async getStep( + runId: string, + stepId: string, + inputHash: string + ): Promise { + const [step] = await this.getSteps(runId, [{ stepId, inputHash }]); + return step ?? null; + } + + async getCompletedStep( + runId: string, + stepId: string, + inputHash: string + ): Promise { + const step = await this.getStep(runId, stepId, inputHash); + return step?.status === "completed" ? step : null; + } + + async getSteps( + runId: string, + lookups: readonly WorkflowStepLookup[] + ): Promise> { + if (lookups.length === 0) { + return []; + } + const requestedKeys = new Set(lookups.map(getWorkflowStepKey)); + const byKey = new Map(); + for (const step of await this.readSteps(runId)) { + const key = getWorkflowStepKey(step); + if (requestedKeys.has(key)) { + byKey.set(key, step); + } + } + return lookups.map((lookup) => byKey.get(getWorkflowStepKey(lookup)) ?? null); + } + + async acquireLease(runId: string, ownerId: string, nowMs = Date.now()): Promise { + assert(ownerId.length > 0, "WorkflowRunStore.acquireLease: ownerId is required"); + const leaseFile = this.leaseFile(runId); + const lockDir = `${leaseFile}.lock`; + if (!(await acquireLeaseMutationLock(lockDir, Date.now(), this.leaseMutationLockStaleMs()))) { + return false; + } + + try { + const existing = await readLease(leaseFile); + if (existing != null && nowMs - existing.acquiredAtMs <= this.staleLeaseMs) { + return false; + } + + await fs.mkdir(this.runDir(runId), { recursive: true }); + await writeJsonAtomic(leaseFile, { ownerId, acquiredAtMs: nowMs } satisfies LeaseRecord); + return true; + } finally { + await fs.rm(lockDir, { recursive: true, force: true }); + } + } + + async getLeaseRetryDelayMs(runId: string, nowMs = Date.now()): Promise { + const lease = await readLease(this.leaseFile(runId)); + if (lease == null) { + return 0; + } + const remainingMs = this.staleLeaseMs - (nowMs - lease.acquiredAtMs); + return Math.max(0, Math.ceil(remainingMs) + 1); + } + + getLeaseRenewalIntervalMs(): number { + return Math.max(1, Math.floor(this.staleLeaseMs / 2)); + } + + private leaseMutationLockStaleMs(): number { + return Math.max(1_000, this.staleLeaseMs); + } + + private leaseMutationWaitTimeoutMs(): number { + return Math.max(4_000, this.leaseMutationLockStaleMs() * 4); + } + + async renewLease(runId: string, ownerId: string, nowMs = Date.now()): Promise { + assert(ownerId.length > 0, "WorkflowRunStore.renewLease: ownerId is required"); + const leaseFile = this.leaseFile(runId); + const lockDir = `${leaseFile}.lock`; + try { + await acquireWorkflowMutationLock( + lockDir, + this.leaseMutationLockStaleMs(), + this.leaseMutationWaitTimeoutMs() + ); + } catch { + return false; + } + + try { + const existing = await readLease(leaseFile); + if (existing?.ownerId !== ownerId) { + return false; + } + await writeJsonAtomic(leaseFile, { ownerId, acquiredAtMs: nowMs } satisfies LeaseRecord); + return true; + } finally { + await fs.rm(lockDir, { recursive: true, force: true }); + } + } + + async releaseLease(runId: string, ownerId: string): Promise { + const leaseFile = this.leaseFile(runId); + const lockDir = `${leaseFile}.lock`; + await acquireWorkflowMutationLock( + lockDir, + this.leaseMutationLockStaleMs(), + this.leaseMutationWaitTimeoutMs() + ); + + try { + const existing = await readLease(leaseFile); + if (existing?.ownerId === ownerId) { + await fs.rm(leaseFile, { force: true }); + } + } finally { + await fs.rm(lockDir, { recursive: true, force: true }); + } + } + + private async withExpectedLeaseOwner( + runId: string, + expectedLeaseOwnerId: string | undefined, + mutation: () => Promise + ): Promise { + if (expectedLeaseOwnerId == null) { + return await mutation(); + } + assert( + expectedLeaseOwnerId.length > 0, + "WorkflowRunStore: expected lease owner id must be non-empty" + ); + const leaseFile = this.leaseFile(runId); + const lockDir = `${leaseFile}.lock`; + await acquireWorkflowMutationLock( + lockDir, + this.leaseMutationLockStaleMs(), + this.leaseMutationWaitTimeoutMs() + ); + try { + const lease = await readLease(leaseFile); + if (lease?.ownerId !== expectedLeaseOwnerId) { + throw new Error(`Workflow run lease lost: ${runId}`); + } + return await mutation(); + } finally { + await fs.rm(lockDir, { recursive: true, force: true }); + } + } + + private async appendEventUnlocked( + runId: string, + event: WorkflowRunEvent, + options: AppendWorkflowRunEventOptions = {} + ): Promise { + const parsedEvent = WorkflowRunEventSchema.parse(event); + const existingEvents = await this.readEvents(runId); + const ordered = WorkflowEventSequenceSchema.safeParse([...existingEvents, parsedEvent]); + if (!ordered.success) { + throw new Error(`Workflow events must be strictly ordered: ${ordered.error.message}`); + } + + const run = await this.getRun(runId); + const isInterruptedResumeEvent = + parsedEvent.type === "status" && + options.allowInterruptedResume === true && + parsedEvent.status === "running"; + const isRepeatedInterruptedStatus = + parsedEvent.type === "status" && parsedEvent.status === "interrupted"; + if (run.status === "interrupted" && !isInterruptedResumeEvent && !isRepeatedInterruptedStatus) { + throw new Error(`Workflow run interrupted: ${runId}`); + } + if (parsedEvent.type === "status") { + if (isTerminalRunStatus(run.status)) { + throw new Error( + `Cannot transition workflow run from ${run.status} to ${parsedEvent.status}` + ); + } + } + + await appendJsonLine(this.eventsFile(runId), parsedEvent); + const updatedRun = { + ...run, + events: [...run.events, parsedEvent], + status: parsedEvent.type === "status" ? parsedEvent.status : run.status, + updatedAt: parsedEvent.at, + } satisfies WorkflowRunRecord; + await this.writeRunFile(runId, updatedRun); + return updatedRun; + } + + private async readEvents(runId: string): Promise { + const events = await readJsonLines(this.eventsFile(runId), WorkflowRunEventSchema); + return WorkflowEventSequenceSchema.parse(events); + } + + private async readSteps(runId: string): Promise { + const records = await readJsonLines(this.stepsFile(runId), WorkflowStepRecordSchema); + const byKey = new Map(); + for (const record of records) { + byKey.set(getWorkflowStepKey(record), record); + } + return Array.from(byKey.values()); + } + + private async appendStepRecord( + runId: string, + record: unknown, + options: AppendWorkflowRunEventOptions = {} + ): Promise { + const lockDir = `${this.eventsFile(runId)}.lock`; + await acquireWorkflowMutationLock( + lockDir, + this.leaseMutationLockStaleMs(), + this.leaseMutationWaitTimeoutMs() + ); + try { + await this.withExpectedLeaseOwner(runId, options.expectedLeaseOwnerId, async () => { + const parsedRecord = WorkflowStepRecordSchema.parse(record); + const run = await this.getRun(runId); + if (run.status === "interrupted") { + throw new Error(`Workflow run interrupted: ${runId}`); + } + await appendJsonLine(this.stepsFile(runId), parsedRecord); + }); + } finally { + await fs.rm(lockDir, { recursive: true, force: true }); + } + } + + private async writeRunFile(runId: string, run: WorkflowRunRecord): Promise { + const runForDisk = WorkflowRunRecordSchema.parse(run); + await writeJsonAtomic(this.runFile(runId), runForDisk); + } + + private workflowsDir(): string { + return path.join(this.sessionDir, "workflows"); + } + + private runDir(runId: string): string { + assertValidWorkflowRunId(runId); + return path.join(this.workflowsDir(), runId); + } + + private runFile(runId: string): string { + return path.join(this.runDir(runId), "run.json"); + } + + private eventsFile(runId: string): string { + return path.join(this.runDir(runId), "events.jsonl"); + } + + private stepsFile(runId: string): string { + return path.join(this.runDir(runId), "steps.jsonl"); + } + + private leaseFile(runId: string): string { + return path.join(this.runDir(runId), "lease.json"); + } +} + +// API callers provide run IDs when reading/resuming; validate before path joins so a malformed +// ID cannot escape the workspace-scoped workflows directory. +function assertValidWorkflowRunId(runId: string): void { + assert( + WorkflowRunIdSchema.safeParse(runId).success, + "WorkflowRunStore: runId must match wfr_[A-Za-z0-9_-]+" + ); +} + +function getWorkflowStepKey(step: WorkflowStepLookup): string { + return `${step.stepId}\0${step.inputHash}`; +} + +function hashSource(source: string): string { + return `sha256:${crypto.createHash("sha256").update(source).digest("hex")}`; +} + +async function appendJsonLine(filePath: string, value: unknown): Promise { + await fs.mkdir(path.dirname(filePath), { recursive: true }); + await fs.appendFile(filePath, `${JSON.stringify(value)}\n`, "utf-8"); +} + +async function writeJsonAtomic(filePath: string, value: unknown): Promise { + await fs.mkdir(path.dirname(filePath), { recursive: true }); + await writeFileAtomic(filePath, `${JSON.stringify(value, null, 2)}\n`, "utf-8"); +} + +async function readJsonLines( + filePath: string, + schema: { safeParse(value: unknown): { success: true; data: T } | { success: false } } +): Promise { + let content: string; + try { + content = await fs.readFile(filePath, "utf-8"); + } catch { + return []; + } + + const records: T[] = []; + for (const [index, line] of content.split("\n").entries()) { + if (line.trim().length === 0) { + continue; + } + + try { + const parsedJson = JSON.parse(line) as unknown; + const parsedRecord = schema.safeParse(parsedJson); + if (parsedRecord.success) { + records.push(parsedRecord.data); + } else { + log.warn(`Skipping malformed workflow journal line ${index + 1} in ${filePath}`); + } + } catch (error) { + log.warn( + `Skipping malformed workflow journal line ${index + 1} in ${filePath}: ${getErrorMessage(error)}` + ); + } + } + + return records; +} + +function getRunStatusFromEvents( + events: readonly WorkflowRunEvent[] +): WorkflowRunStatus | undefined { + return events.findLast((event) => event.type === "status")?.status; +} + +function isTerminalRunStatus(status: WorkflowRunStatus): boolean { + return status === "completed" || status === "failed"; +} + +async function acquireWorkflowMutationLock( + lockDir: string, + staleLeaseMs: number, + timeoutMs = staleLeaseMs +): Promise { + const deadline = Date.now() + timeoutMs; + while (Date.now() <= deadline) { + if (await acquireLeaseMutationLock(lockDir, Date.now(), staleLeaseMs)) { + return; + } + await new Promise((resolve) => setTimeout(resolve, 5)); + } + throw new Error(`Timed out acquiring workflow mutation lock: ${lockDir}`); +} + +async function acquireLeaseMutationLock( + lockDir: string, + nowMs: number, + staleLeaseMs: number +): Promise { + try { + await fs.mkdir(lockDir); + return true; + } catch (error) { + if (!isErrno(error, "EEXIST")) { + throw error; + } + } + + try { + const stat = await fs.stat(lockDir); + if (nowMs - stat.mtimeMs <= staleLeaseMs) { + return false; + } + await fs.rm(lockDir, { recursive: true, force: true }); + await fs.mkdir(lockDir); + return true; + } catch (error) { + if (isErrno(error, "EEXIST") || isErrno(error, "ENOENT")) { + return false; + } + throw error; + } +} + +function isErrno(error: unknown, code: string): boolean { + return error instanceof Error && "code" in error && error.code === code; +} + +async function readLease(leaseFile: string): Promise { + try { + const raw = JSON.parse(await fs.readFile(leaseFile, "utf-8")) as Partial; + if (typeof raw.ownerId === "string" && typeof raw.acquiredAtMs === "number") { + return { ownerId: raw.ownerId, acquiredAtMs: raw.acquiredAtMs }; + } + } catch { + return null; + } + return null; +} diff --git a/src/node/services/workflows/WorkflowRunner.test.ts b/src/node/services/workflows/WorkflowRunner.test.ts new file mode 100644 index 0000000000..34b3c983b9 --- /dev/null +++ b/src/node/services/workflows/WorkflowRunner.test.ts @@ -0,0 +1,1093 @@ +/* eslint-disable @typescript-eslint/await-thenable, @typescript-eslint/no-unsafe-argument, @typescript-eslint/require-await */ +import { describe, expect, test } from "bun:test"; +import { ForegroundWaitBackgroundedError } from "@/node/services/taskService"; +import { DisposableTempDir } from "@/node/services/tempDir"; +import { QuickJSRuntimeFactory } from "@/node/services/ptc/quickjsRuntime"; +import { WorkflowRunStore } from "./WorkflowRunStore"; +import { + WorkflowRunBackgroundedError, + WorkflowRunner, + type WorkflowTaskAdapter, +} from "./WorkflowRunner"; +import { hashWorkflowStepInput } from "./workflowReplayKey"; + +const definition = { + name: "deep-research", + description: "Research a topic", + scope: "built-in" as const, + executable: true, +}; + +const source = `export default function workflow({ args, phase, log, agent }) { + phase("scope", { topic: args.topic }); + log("delegating", { topic: args.topic }); + const summary = agent({ id: "summarize-topic", prompt: "Summarize " + args.topic }); + return { reportMarkdown: "Final: " + summary.reportMarkdown }; +} +`; + +async function createRunStore(sessionDir: string) { + const store = new WorkflowRunStore({ sessionDir, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_123", + workspaceId: "workspace-1", + definition, + definitionSource: source, + args: { topic: "durable workflows" }, + now: "2026-05-29T00:00:00.000Z", + }); + return store; +} + +function createRunner(store: WorkflowRunStore, taskAdapter: WorkflowTaskAdapter) { + return new WorkflowRunner({ + runStore: store, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter, + runnerId: "runner-a", + clock: { + nowIso: () => "2026-05-29T00:00:01.000Z", + nowMs: () => 1_000, + }, + }); +} + +describe("WorkflowRunner", () => { + test("executes conductor primitives and persists run events/results", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = await createRunStore(tmp.path); + const taskCalls: unknown[] = []; + let runTimeoutMs: number | undefined; + let runAbortSignalWasAbortedDuringAgent: boolean | undefined; + const runner = createRunner(store, { + async runAgent(spec, _lifecycle, waitOptions) { + taskCalls.push(spec); + runTimeoutMs = waitOptions?.timeoutMs; + runAbortSignalWasAbortedDuringAgent = waitOptions?.abortSignal?.aborted; + return { + taskId: "task_1", + reportMarkdown: "summary", + structuredOutput: { sources: 3 }, + }; + }, + }); + + const result = await runner.run("wfr_123"); + const run = await store.getRun("wfr_123"); + + expect(result).toEqual({ reportMarkdown: "Final: summary" }); + expect(taskCalls).toEqual([{ id: "summarize-topic", prompt: "Summarize durable workflows" }]); + expect(runTimeoutMs).toBeGreaterThan(5 * 60 * 1000); + expect(runAbortSignalWasAbortedDuringAgent).toBe(false); + expect(run.status).toBe("completed"); + expect(run.events.map((event) => event.type)).toEqual([ + "status", + "phase", + "log", + "task", + "result", + "status", + ]); + expect(run.steps).toHaveLength(1); + expect(run.steps[0]).toMatchObject({ + stepId: "summarize-topic", + status: "completed", + taskId: "task_1", + result: { reportMarkdown: "summary", structuredOutput: { sources: 3 } }, + }); + }); + + test("returns child task IDs to workflow code", async () => { + using tmp = new DisposableTempDir("workflow-runner-task-id"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_task_id", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow({ agent }) { + const result = agent({ id: "implement", prompt: "Implement" }); + return { reportMarkdown: result.taskId }; + }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const runner = createRunner(store, { + async runAgent() { + return { taskId: "task_impl", reportMarkdown: "implemented" }; + }, + }); + + await expect(runner.run("wfr_task_id")).resolves.toEqual({ reportMarkdown: "task_impl" }); + const run = await store.getRun("wfr_task_id"); + + expect(run.steps[0]?.result).toMatchObject({ taskId: "task_impl" }); + }); + + test("applies workflow-owned child patches through a durable applyPatch step", async () => { + using tmp = new DisposableTempDir("workflow-runner-apply-patch"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_apply_patch", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow({ agent, applyPatch }) { + const implementation = agent({ id: "implement", prompt: "Implement" }); + const applied = applyPatch({ id: "apply-implement", source: implementation, target: "parent" }); + return { reportMarkdown: applied.status + ":" + applied.taskId }; + }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const applyCalls: unknown[] = []; + const runner = createRunner(store, { + async runAgent() { + return { taskId: "task_impl", reportMarkdown: "implemented" }; + }, + async applyPatch(spec) { + applyCalls.push(spec); + return { + success: true, + taskId: spec.sourceTaskId, + projectResults: [{ projectPath: "/repo", projectName: "repo", status: "applied" }], + }; + }, + }); + + await expect(runner.run("wfr_apply_patch")).resolves.toEqual({ + reportMarkdown: "applied:task_impl", + }); + const run = await store.getRun("wfr_apply_patch"); + + expect(applyCalls).toEqual([ + { + id: "apply-implement", + sourceTaskId: "task_impl", + target: "parent", + threeWay: true, + force: false, + }, + ]); + expect(run.steps).toEqual( + expect.arrayContaining([ + expect.objectContaining({ stepId: "apply-implement", status: "completed" }), + ]) + ); + expect(run.events).toEqual( + expect.arrayContaining([ + expect.objectContaining({ type: "patch", stepId: "apply-implement", status: "started" }), + expect.objectContaining({ type: "patch", stepId: "apply-implement", status: "applied" }), + ]) + ); + }); + + test("replays completed applyPatch steps without reapplying", async () => { + using tmp = new DisposableTempDir("workflow-runner-apply-patch-replay"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + const agentSpec = { id: "implement", prompt: "Implement" }; + const applySpec = { + id: "apply-implement", + sourceTaskId: "task_impl", + target: "parent", + threeWay: true, + force: false, + } as const; + await store.createRun({ + id: "wfr_apply_patch_replay", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow({ agent, applyPatch }) { + const implementation = agent({ id: "implement", prompt: "Implement" }); + const applied = applyPatch({ id: "apply-implement", source: implementation, target: "parent" }); + return { reportMarkdown: applied.status + ":" + applied.taskId }; + }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + await store.recordStepCompleted("wfr_apply_patch_replay", { + stepId: agentSpec.id, + inputHash: hashWorkflowStepInput(agentSpec.id, agentSpec), + taskId: "task_impl", + result: { taskId: "task_impl", reportMarkdown: "implemented" }, + startedAt: "2026-05-29T00:00:01.000Z", + completedAt: "2026-05-29T00:00:02.000Z", + }); + await store.recordStepCompleted("wfr_apply_patch_replay", { + stepId: applySpec.id, + inputHash: hashWorkflowStepInput(applySpec.id, applySpec), + taskId: "task_impl", + result: { + reportMarkdown: "Patch applied from task task_impl.", + structuredOutput: { success: true, status: "applied", taskId: "task_impl" }, + }, + startedAt: "2026-05-29T00:00:03.000Z", + completedAt: "2026-05-29T00:00:04.000Z", + }); + const runner = createRunner(store, { + async runAgent() { + throw new Error("agent should replay"); + }, + async applyPatch() { + throw new Error("patch should replay"); + }, + }); + + await expect(runner.run("wfr_apply_patch_replay")).resolves.toEqual({ + reportMarkdown: "applied:task_impl", + }); + }); + + test("rejects applyPatch sources that are not workflow-owned child tasks", async () => { + using tmp = new DisposableTempDir("workflow-runner-apply-patch-unowned"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_apply_patch_unowned", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow({ applyPatch }) { + return applyPatch({ id: "apply-external", source: "task_external" }); + }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const runner = createRunner(store, { + async runAgent() { + throw new Error("agent should not run"); + }, + async applyPatch() { + throw new Error("external task patch should not be applied"); + }, + }); + + await expect(runner.run("wfr_apply_patch_unowned")).rejects.toThrow( + /was not produced by a completed workflow agent step/ + ); + }); + + test("marks run failed when runtime setup throws after starting", async () => { + using tmp = new DisposableTempDir("workflow-runner-runtime-setup"); + const store = await createRunStore(tmp.path); + const runner = new WorkflowRunner({ + runStore: store, + runtimeFactory: { + async create() { + throw new Error("runtime unavailable"); + }, + }, + taskAdapter: { + async runAgent() { + throw new Error("should not spawn tasks"); + }, + }, + runnerId: "runner-a", + clock: { + nowIso: () => "2026-05-29T00:00:01.000Z", + nowMs: () => 1_000, + }, + }); + + await expect(runner.run("wfr_123")).rejects.toThrow("runtime unavailable"); + const run = await store.getRun("wfr_123"); + + expect(run.status).toBe("failed"); + expect(run.events).toEqual( + expect.arrayContaining([ + expect.objectContaining({ type: "error", message: "runtime unavailable" }), + expect.objectContaining({ type: "status", status: "failed" }), + ]) + ); + }); + + test("requires explicit resume permission to restart interrupted runs", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = await createRunStore(tmp.path); + await store.appendStatus("wfr_123", "interrupted", "2026-05-29T00:00:00.500Z"); + let taskCalls = 0; + const runner = createRunner(store, { + async runAgent() { + taskCalls += 1; + return { taskId: "task_1", reportMarkdown: "summary" }; + }, + }); + + await expect(runner.run("wfr_123")).rejects.toThrow(/interrupted/); + await expect(store.getRun("wfr_123")).resolves.toMatchObject({ status: "interrupted" }); + expect(taskCalls).toBe(0); + + await expect( + runner.run("wfr_123", { allowResumeFromInterrupted: true }) + ).resolves.toMatchObject({ reportMarkdown: "Final: summary" }); + await expect(store.getRun("wfr_123")).resolves.toMatchObject({ status: "completed" }); + expect(taskCalls).toBe(1); + }); + + test("aborts without terminal writes after losing its lease", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = await createRunStore(tmp.path); + let renewCalls = 0; + let resolveTaskStarted!: () => void; + const taskStarted = new Promise((resolve) => { + resolveTaskStarted = resolve; + }); + store.renewLease = async () => { + renewCalls += 1; + if (renewCalls === 1) { + return true; + } + await taskStarted; + return false; + }; + let sawAbort = false; + const runner = createRunner(store, { + async runAgent(_spec, _lifecycle, waitOptions) { + resolveTaskStarted(); + return await new Promise((_resolve, reject) => { + const signal = waitOptions?.abortSignal; + if (signal == null) { + reject(new Error("missing abort signal")); + return; + } + if (signal.aborted) { + sawAbort = true; + reject(new Error("task aborted")); + return; + } + signal.addEventListener( + "abort", + () => { + sawAbort = true; + reject(new Error("task aborted")); + }, + { once: true } + ); + }); + }, + }); + + await expect(runner.run("wfr_123")).rejects.toThrow(/lease lost/); + const run = await store.getRun("wfr_123"); + + expect(renewCalls).toBeGreaterThan(0); + expect(sawAbort).toBe(true); + expect(run.status).toBe("running"); + expect( + run.events.some((event) => event.type === "status" && event.status === "completed") + ).toBe(false); + expect(run.events.some((event) => event.type === "status" && event.status === "failed")).toBe( + false + ); + }); + + test("replays completed agent steps without respawning child tasks", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = await createRunStore(tmp.path); + let taskCalls = 0; + const runner = createRunner(store, { + async runAgent() { + taskCalls += 1; + return { taskId: "task_1", reportMarkdown: "summary" }; + }, + }); + + await runner.run("wfr_123"); + await runner.run("wfr_123"); + + expect(taskCalls).toBe(1); + }); + + test("reuses a recorded started task id instead of respawning on resume", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = await createRunStore(tmp.path); + const spec = { id: "summarize-topic", prompt: "Summarize durable workflows" }; + await store.recordStepStarted("wfr_123", { + stepId: spec.id, + inputHash: hashWorkflowStepInput(spec.id, spec), + taskId: "task_existing", + startedAt: "2026-05-29T00:00:00.500Z", + }); + let runAgentCalls = 0; + const waitedFor: string[] = []; + let waitTimeoutMs: number | undefined; + let waitAbortSignal: AbortSignal | undefined; + const runner = createRunner(store, { + async runAgent() { + runAgentCalls += 1; + return { taskId: "task_duplicate", reportMarkdown: "duplicate" }; + }, + async waitForAgentTask(taskId, _spec, waitOptions) { + waitedFor.push(taskId); + waitTimeoutMs = waitOptions?.timeoutMs; + waitAbortSignal = waitOptions?.abortSignal; + return { taskId, reportMarkdown: "summary" }; + }, + }); + + await expect(runner.run("wfr_123")).resolves.toEqual({ reportMarkdown: "Final: summary" }); + + expect(runAgentCalls).toBe(0); + expect(waitedFor).toEqual(["task_existing"]); + expect(waitTimeoutMs).toBeGreaterThan(5 * 60 * 1000); + expect(waitAbortSignal?.aborted).toBe(false); + }); + + test("reruns stale started task ids that no longer have recoverable reports", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = await createRunStore(tmp.path); + const spec = { id: "summarize-topic", prompt: "Summarize durable workflows" }; + await store.recordStepStarted("wfr_123", { + stepId: spec.id, + inputHash: hashWorkflowStepInput(spec.id, spec), + taskId: "task_missing", + startedAt: "2026-05-29T00:00:00.500Z", + }); + let runAgentCalls = 0; + const waitedFor: string[] = []; + const runner = createRunner(store, { + async runAgent() { + runAgentCalls += 1; + return { taskId: "task_recovered", reportMarkdown: "summary" }; + }, + async waitForAgentTask(taskId) { + waitedFor.push(taskId); + throw new Error("Task not found"); + }, + }); + + await expect(runner.run("wfr_123")).resolves.toEqual({ reportMarkdown: "Final: summary" }); + const run = await store.getRun("wfr_123"); + + expect(waitedFor).toEqual(["task_missing"]); + expect(runAgentCalls).toBe(1); + expect(run.steps.at(-1)).toMatchObject({ + stepId: "summarize-topic", + status: "completed", + taskId: "task_recovered", + }); + }); + + test("restarts started task records when resuming a user-interrupted run", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = await createRunStore(tmp.path); + const spec = { id: "summarize-topic", prompt: "Summarize durable workflows" }; + await store.recordStepStarted("wfr_123", { + stepId: spec.id, + inputHash: hashWorkflowStepInput(spec.id, spec), + taskId: "task_interrupted", + startedAt: "2026-05-29T00:00:00.500Z", + }); + await store.appendStatus("wfr_123", "interrupted", "2026-05-29T00:00:00.750Z"); + let runAgentCalls = 0; + const waitedFor: string[] = []; + const runner = createRunner(store, { + async runAgent() { + runAgentCalls += 1; + return { taskId: "task_restarted", reportMarkdown: "summary" }; + }, + async waitForAgentTask(taskId) { + waitedFor.push(taskId); + throw new Error("interrupted task should not be awaited"); + }, + }); + + await expect(runner.run("wfr_123", { allowResumeFromInterrupted: true })).resolves.toEqual({ + reportMarkdown: "Final: summary", + }); + + expect(runAgentCalls).toBe(1); + expect(waitedFor).toEqual([]); + }); + + test("runs parallelAgents specs concurrently and returns ordered results", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_parallel", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow({ parallelAgents }) { + const results = parallelAgents([ + { id: "source-a", prompt: "Read source A" }, + { id: "source-b", prompt: "Read source B" }, + ]); + return { reportMarkdown: results.map((result) => result.reportMarkdown).join(" + ") }; + }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const calls: string[] = []; + let active = 0; + let maxActive = 0; + const runner = createRunner(store, { + async runAgent(spec) { + calls.push(spec.id); + active += 1; + maxActive = Math.max(maxActive, active); + await new Promise((resolve) => setTimeout(resolve, 10)); + active -= 1; + return { taskId: `task_${spec.id}`, reportMarkdown: spec.id }; + }, + }); + + await expect(runner.run("wfr_parallel")).resolves.toEqual({ + reportMarkdown: "source-a + source-b", + }); + + expect(calls).toEqual(["source-a", "source-b"]); + expect(maxActive).toBe(2); + const run = await store.getRun("wfr_parallel"); + expect(run.steps.map((step) => step.stepId).sort()).toEqual(["source-a", "source-b"]); + }); + + test("interrupts sibling parallelAgents when one child task fails", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_parallel_failure", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow({ parallelAgents }) { + parallelAgents([ + { id: "source-a", prompt: "Read source A" }, + { id: "source-b", prompt: "Read source B" }, + ]); + return { reportMarkdown: "unreachable" }; + }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + let interruptRunCalls = 0; + let releaseSourceB!: () => void; + const sourceBInterrupted = new Promise((resolve) => { + releaseSourceB = resolve; + }); + const calls: string[] = []; + const runner = createRunner(store, { + async runAgent(spec) { + calls.push(spec.id); + if (spec.id === "source-a") { + throw new Error("source-a failed"); + } + await sourceBInterrupted; + throw new Error("source-b interrupted"); + }, + async interruptRun() { + interruptRunCalls += 1; + releaseSourceB(); + }, + }); + + await expect(runner.run("wfr_parallel_failure")).rejects.toThrow("source-a failed"); + + expect(calls).toEqual(["source-a", "source-b"]); + expect(interruptRunCalls).toBe(1); + }); + + test("does not interrupt sibling parallelAgents when foreground wait backgrounds", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_parallel_backgrounded", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow({ parallelAgents }) { + parallelAgents([ + { id: "source-a", prompt: "Read source A" }, + { id: "source-b", prompt: "Read source B" }, + ]); + return { reportMarkdown: "unreachable" }; + }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + let interruptRunCalls = 0; + let sourceBStarted = false; + const runner = createRunner(store, { + async runAgent(spec, _lifecycle, waitOptions) { + if (spec.id === "source-a") { + throw new ForegroundWaitBackgroundedError(); + } + sourceBStarted = true; + await new Promise((_resolve, reject) => { + waitOptions?.abortSignal?.addEventListener( + "abort", + () => reject(new Error("Interrupted")), + { once: true } + ); + }); + throw new Error("unreachable"); + }, + async interruptRun() { + interruptRunCalls += 1; + }, + }); + + await expect(runner.run("wfr_parallel_backgrounded")).rejects.toBeInstanceOf( + WorkflowRunBackgroundedError + ); + + await expect(store.getRun("wfr_parallel_backgrounded")).resolves.toMatchObject({ + status: "backgrounded", + }); + + expect(sourceBStarted).toBe(true); + expect(interruptRunCalls).toBe(0); + }); + + test("retries only failed parallelAgents steps after structured output validation errors", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_parallel_retry_validation", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow({ parallelAgents }) { + const results = parallelAgents([ + { + id: "source-a", + prompt: "Summarize A", + outputSchema: { type: "object", required: ["summary"], properties: { summary: { type: "string" } } }, + }, + { + id: "source-b", + prompt: "Summarize B", + outputSchema: { type: "object", required: ["summary"], properties: { summary: { type: "string" } } }, + }, + ]); + return { reportMarkdown: results.map((result) => result.structuredOutput.summary).join(" + ") }; + }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const calls: string[] = []; + const runner = createRunner(store, { + async runAgent(spec) { + calls.push(spec.id); + if (spec.id === "source-b" && calls.filter((id) => id === "source-b").length === 1) { + return { taskId: "task_source_b_bad", reportMarkdown: "bad" }; + } + return { + taskId: `task_${spec.id}_${calls.length}`, + reportMarkdown: spec.id, + structuredOutput: { summary: spec.id }, + }; + }, + }); + + await expect(runner.run("wfr_parallel_retry_validation")).resolves.toEqual({ + reportMarkdown: "source-a + source-b", + }); + const run = await store.getRun("wfr_parallel_retry_validation"); + + expect(calls).toEqual(["source-a", "source-b", "source-b"]); + expect(run.events).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + type: "task", + stepId: "source-b", + taskId: "task_source_b_bad", + status: "failed", + }), + expect.objectContaining({ + type: "log", + message: "Retrying source-b after validation failure", + }), + ]) + ); + }); + + test("retries workflow agent steps that fail structured output validation", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_retry_validation", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow({ agent }) { + const result = agent({ + id: "claims", + prompt: "Extract claims", + outputSchema: { + type: "object", + required: ["claims"], + properties: { claims: { type: "array", items: { type: "string" } } }, + additionalProperties: false, + }, + }); + return { reportMarkdown: result.structuredOutput.claims.join(", ") }; + }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const prompts: string[] = []; + const runner = createRunner(store, { + async runAgent(spec) { + prompts.push(spec.prompt); + if (prompts.length === 1) { + return { taskId: "task_bad", reportMarkdown: "bad" }; + } + return { + taskId: "task_good", + reportMarkdown: "good", + structuredOutput: { claims: ["durable"] }, + }; + }, + }); + + await expect(runner.run("wfr_retry_validation")).resolves.toEqual({ + reportMarkdown: "durable", + }); + const run = await store.getRun("wfr_retry_validation"); + + expect(prompts).toHaveLength(2); + expect(prompts[1]).toContain("Previous workflow attempt 1 failed output validation"); + expect(run.status).toBe("completed"); + expect(run.steps).toEqual([ + expect.objectContaining({ stepId: "claims", status: "completed", taskId: "task_good" }), + ]); + expect(run.events).toEqual( + expect.arrayContaining([ + expect.objectContaining({ type: "validation", stepId: "claims", success: false }), + expect.objectContaining({ + type: "task", + stepId: "claims", + taskId: "task_bad", + status: "failed", + }), + expect.objectContaining({ + type: "task", + stepId: "claims", + taskId: "task_good", + status: "completed", + }), + expect.objectContaining({ + type: "log", + message: "Retrying claims after validation failure", + }), + ]) + ); + }); + + test("stops retrying workflow agent validation after the maximum attempts", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_retry_exhausted", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow({ agent }) { + return agent({ + id: "claims", + prompt: "Extract claims", + outputSchema: { type: "object", required: ["claims"], properties: { claims: { type: "array" } } }, + }); + }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + let calls = 0; + const runner = createRunner(store, { + async runAgent() { + calls += 1; + return { taskId: `task_bad_${calls}`, reportMarkdown: "bad" }; + }, + }); + + await expect(runner.run("wfr_retry_exhausted")).rejects.toThrow(/structured output/); + const run = await store.getRun("wfr_retry_exhausted"); + + expect(calls).toBe(3); + expect(run.status).toBe("failed"); + expect(run.steps).toEqual([ + expect.objectContaining({ stepId: "claims", status: "failed", taskId: "task_bad_3" }), + ]); + expect( + run.events.filter( + (event) => event.type === "task" && event.stepId === "claims" && event.status === "failed" + ) + ).toHaveLength(3); + }); + + test("validates workflow agent structured output against requested schema", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_schema", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow({ agent }) { + return agent({ + id: "claims", + prompt: "Extract claims", + outputSchema: { + type: "object", + required: ["claims"], + properties: { claims: { type: "array", items: { type: "string" } } }, + additionalProperties: false, + }, + }); + }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const runner = createRunner(store, { + async runAgent() { + return { taskId: "task_1", reportMarkdown: "bad", structuredOutput: { claims: [1] } }; + }, + }); + + await expect(runner.run("wfr_schema")).rejects.toThrow( + /structured output failed schema validation.*claims\[0\]/ + ); + const run = await store.getRun("wfr_schema"); + expect(run.steps).toEqual([ + expect.objectContaining({ stepId: "claims", status: "failed", taskId: "task_1" }), + ]); + }); + + test("marks foreground-backgrounded agent waits as backgrounded runs", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = await createRunStore(tmp.path); + const runner = createRunner(store, { + async runAgent() { + throw new ForegroundWaitBackgroundedError(); + }, + }); + + await expect(runner.run("wfr_123")).rejects.toBeInstanceOf(WorkflowRunBackgroundedError); + await expect(store.getRun("wfr_123")).resolves.toMatchObject({ status: "backgrounded" }); + }); + + test("applies sandbox limits before evaluating workflow source", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_limits", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow() { return { reportMarkdown: "limited" }; }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + let limitsApplied = false; + let evalSawLimits = false; + let timeoutMs: number | undefined; + const noop = () => undefined; + const runner = new WorkflowRunner({ + runStore: store, + runtimeFactory: { + async create() { + return { + setLimits(limits) { + limitsApplied = true; + timeoutMs = limits.timeoutMs; + }, + registerFunction: noop, + registerObject: noop, + onEvent: noop, + abort: noop, + getAbortSignal() { + return undefined; + }, + async eval() { + evalSawLimits = limitsApplied; + return { + success: true, + result: { reportMarkdown: "limited" }, + toolCalls: [], + consoleOutput: [], + duration_ms: 0, + }; + }, + dispose: noop, + [Symbol.dispose]: noop, + }; + }, + }, + taskAdapter: { + async runAgent() { + throw new Error("agent should not run"); + }, + }, + runnerId: "runner-a", + clock: { + nowIso: () => "2026-05-29T00:00:01.000Z", + nowMs: () => 1_000, + }, + }); + + await expect(runner.run("wfr_limits")).resolves.toEqual({ reportMarkdown: "limited" }); + expect(timeoutMs).toBeGreaterThan(5 * 60 * 1000); + expect(evalSawLimits).toBe(true); + }); + + test("supports async workflow function exports", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_async", + workspaceId: "workspace-1", + definition, + definitionSource: `export default async function workflow() { return { reportMarkdown: "async ok" }; }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const runner = createRunner(store, { + async runAgent() { + throw new Error("agent should not run"); + }, + }); + + await expect(runner.run("wfr_async")).resolves.toEqual({ reportMarkdown: "async ok" }); + }); + + test("returns the normalized workflow result for JSON-serializable values", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_normalized_return", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow() { return { summary: "done" }; }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const runner = createRunner(store, { + async runAgent() { + throw new Error("agent should not run"); + }, + }); + + await expect(runner.run("wfr_normalized_return")).resolves.toEqual({ + reportMarkdown: JSON.stringify({ summary: "done" }), + }); + }); + + test("marks empty workflow returns as failed runs", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_empty_return", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow() {}`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const runner = createRunner(store, { + async runAgent() { + throw new Error("agent should not run"); + }, + }); + + await expect(runner.run("wfr_empty_return")).rejects.toThrow(/must return/); + const run = await store.getRun("wfr_empty_return"); + expect(run.status).toBe("failed"); + expect(run.events.some((event) => event.type === "result")).toBe(false); + }); + + test("does not overwrite an interrupted run with completed status", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = await createRunStore(tmp.path); + let releaseAgent!: () => void; + let runPromise!: Promise; + const agentStarted = new Promise((resolve) => { + const runner = createRunner(store, { + async runAgent() { + resolve(); + await new Promise((release) => { + releaseAgent = release; + }); + return { taskId: "task_1", reportMarkdown: "late summary" }; + }, + }); + runPromise = runner.run("wfr_123"); + }); + + await agentStarted; + await store.appendStatus("wfr_123", "interrupted", "2026-05-29T00:00:02.000Z"); + releaseAgent(); + await expect(runPromise).rejects.toThrow(/interrupted/); + + const run = await store.getRun("wfr_123"); + expect(run.status).toBe("interrupted"); + expect(run.events.some((event) => event.type === "result")).toBe(false); + }); + + test("marks compile failures as failed runs", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_compile_error", + workspaceId: "workspace-1", + definition, + definitionSource: `export default () => ({ reportMarkdown: "bad shape" });`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const runner = createRunner(store, { + async runAgent() { + throw new Error("agent should not run"); + }, + }); + + await expect(runner.run("wfr_compile_error")).rejects.toThrow(/export a default function/); + const run = await store.getRun("wfr_compile_error"); + expect(run.status).toBe("failed"); + expect(run.events.map((event) => event.type)).toContain("error"); + }); + + test("fails fast when a replay-boundary primitive omits a stable id", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_missing_id", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow({ agent }) { return agent({ prompt: "no id" }); }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const runner = createRunner(store, { + async runAgent() { + throw new Error("agent should not run without a stable id"); + }, + }); + + await expect(runner.run("wfr_missing_id")).rejects.toThrow(/stable id/); + }); + + test("does not expose mux tools, filesystem imports, or timers to workflow code", async () => { + using tmp = new DisposableTempDir("workflow-runner"); + const store = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await store.createRun({ + id: "wfr_forbidden", + workspaceId: "workspace-1", + definition, + definitionSource: `export default function workflow() { + return { + mux: typeof mux, + require: typeof require, + setTimeout: typeof setTimeout, + Date: typeof Date, + random: typeof Math.random, + }; + }`, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const runner = createRunner(store, { + async runAgent() { + throw new Error("agent should not run"); + }, + }); + + const result = await runner.run("wfr_forbidden"); + + expect(JSON.parse(result.reportMarkdown)).toEqual({ + mux: "undefined", + require: "undefined", + setTimeout: "undefined", + Date: "undefined", + random: "undefined", + }); + }); +}); diff --git a/src/node/services/workflows/WorkflowRunner.ts b/src/node/services/workflows/WorkflowRunner.ts new file mode 100644 index 0000000000..3e4b0362dd --- /dev/null +++ b/src/node/services/workflows/WorkflowRunner.ts @@ -0,0 +1,1249 @@ +import { StructuredTaskOutputSchema, WorkflowResultSchema } from "@/common/orpc/schemas"; +import { TaskApplyGitPatchToolResultSchema } from "@/common/utils/tools/toolDefinitions"; +import type { + StructuredTaskOutput, + WorkflowResult, + WorkflowRunEvent, +} from "@/common/types/workflow"; +import assert from "@/common/utils/assert"; +import { getErrorMessage } from "@/common/utils/errors"; +import { validateJsonSchemaSubset } from "@/common/utils/jsonSchemaSubset"; +import type { IJSRuntime, IJSRuntimeFactory } from "@/node/services/ptc/runtime"; +import type { AppendWorkflowRunEventOptions, WorkflowRunStore } from "./WorkflowRunStore"; +import { assertWorkflowStepId, hashWorkflowStepInput } from "./workflowReplayKey"; + +export class WorkflowRunBackgroundedError extends Error { + constructor(runId: string) { + super(`Workflow run backgrounded: ${runId}`); + this.name = "WorkflowRunBackgroundedError"; + } +} + +class WorkflowAgentOutputValidationError extends Error { + constructor(message: string) { + super(message); + this.name = "WorkflowAgentOutputValidationError"; + } +} + +export interface WorkflowAgentSpec { + id: string; + prompt: string; + title?: string; + agentId?: string; + outputSchema?: unknown; +} + +export interface WorkflowAgentWaitOptions { + abortSignal?: AbortSignal; + timeoutMs?: number; + backgroundOnMessageQueued?: boolean; +} + +export type WorkflowAgentResult = StructuredTaskOutput & { taskId: string }; + +export type WorkflowApplyPatchStatus = "applied" | "conflict" | "failed"; + +export interface WorkflowApplyPatchSpec { + id: string; + sourceTaskId: string; + target: "parent"; + projectPath?: string; + threeWay: boolean; + force: boolean; +} + +export interface WorkflowApplyPatchResult { + success: boolean; + status: WorkflowApplyPatchStatus; + taskId: string; + dryRun?: boolean; + projectResults?: unknown; + appliedCommits?: unknown; + headCommitSha?: string; + conflictPaths?: string[]; + failedPatchSubject?: string; + error?: string; + note?: string; +} + +export interface WorkflowTaskAdapter { + runAgent( + spec: WorkflowAgentSpec, + lifecycle?: { onTaskCreated?: (taskId: string) => Promise | void }, + waitOptions?: WorkflowAgentWaitOptions + ): Promise; + waitForAgentTask?( + taskId: string, + spec: WorkflowAgentSpec, + waitOptions?: WorkflowAgentWaitOptions + ): Promise; + applyPatch?( + spec: WorkflowApplyPatchSpec, + options?: { abortSignal?: AbortSignal } + ): Promise; + interruptRun?(): Promise; +} + +export interface WorkflowRunnerRunOptions { + onLeaseAcquired?: () => void; + abortSignal?: AbortSignal; + backgroundOnMessageQueued?: boolean; + allowResumeFromInterrupted?: boolean; +} + +interface WorkflowRunnerLeaseGuard { + throwIfLost(): void; +} + +export interface WorkflowRunnerClock { + nowIso(): string; + nowMs(): number; +} + +export interface WorkflowRunnerOptions { + runStore: WorkflowRunStore; + runtimeFactory: IJSRuntimeFactory; + taskAdapter: WorkflowTaskAdapter; + runnerId: string; + clock?: WorkflowRunnerClock; +} + +const WORKFLOW_AGENT_MAX_ATTEMPTS = 3; + +const WORKFLOW_RUNTIME_TIMEOUT_MS = 24 * 60 * 60 * 1000; + +function isForegroundWaitBackgroundedError(error: unknown): boolean { + return error instanceof Error && error.name === "ForegroundWaitBackgroundedError"; +} + +function createForegroundWaitBackgroundedError(): Error { + const error = new Error("Workflow foreground wait backgrounded"); + error.name = "ForegroundWaitBackgroundedError"; + return error; +} + +function shouldRestartUnrecoverableStartedTask(error: unknown): boolean { + const message = getErrorMessage(error); + return message === "Task not found" || message === "Task interrupted"; +} + +function isRetryableAgentOutputError(error: unknown): boolean { + return error instanceof WorkflowAgentOutputValidationError; +} + +function getTaskIdFromUnknownAgentResult(result: unknown): string | undefined { + if (result != null && typeof result === "object") { + const taskId = (result as Record).taskId; + if (typeof taskId === "string" && taskId.length > 0) { + return taskId; + } + } + return undefined; +} + +function buildRetryAgentSpec( + spec: WorkflowAgentSpec, + attempt: number, + validationMessage: string +): WorkflowAgentSpec { + return { + ...spec, + prompt: + `${spec.prompt}\n\n` + + `Previous workflow attempt ${attempt} failed output validation: ${validationMessage}\n` + + "Rerun the task from scratch and submit a final report whose structured output satisfies the requested schema. " + + "In file-backed report mode, rewrite structured-output.json and call agent_report with reportMarkdownPath, structuredOutputPath, and title all set to null.", + }; +} + +function abortRuntimeOnSignal(runtime: IJSRuntime, abortSignal?: AbortSignal): () => void { + if (abortSignal == null) { + return () => undefined; + } + if (abortSignal.aborted) { + runtime.abort(); + return () => undefined; + } + const abortRuntime = () => runtime.abort(); + abortSignal.addEventListener("abort", abortRuntime, { once: true }); + return () => abortSignal.removeEventListener("abort", abortRuntime); +} + +function getWorkflowAgentWaitOptions( + runtime: IJSRuntime, + options: WorkflowRunnerRunOptions | undefined +): WorkflowAgentWaitOptions { + return { + abortSignal: runtime.getAbortSignal(), + timeoutMs: WORKFLOW_RUNTIME_TIMEOUT_MS, + backgroundOnMessageQueued: options?.backgroundOnMessageQueued ?? true, + }; +} + +const DEFAULT_CLOCK: WorkflowRunnerClock = { + nowIso: () => new Date().toISOString(), + nowMs: () => Date.now(), +}; + +export class WorkflowRunner { + private readonly runStore: WorkflowRunStore; + private readonly runtimeFactory: IJSRuntimeFactory; + private readonly taskAdapter: WorkflowTaskAdapter; + private readonly runnerId: string; + private readonly clock: WorkflowRunnerClock; + + constructor(options: WorkflowRunnerOptions) { + assert(options.runnerId.length > 0, "WorkflowRunner: runnerId is required"); + this.runStore = options.runStore; + this.runtimeFactory = options.runtimeFactory; + this.taskAdapter = options.taskAdapter; + this.runnerId = options.runnerId; + this.clock = options.clock ?? DEFAULT_CLOCK; + } + + async run(runId: string, options?: WorkflowRunnerRunOptions): Promise { + assert(runId.length > 0, "WorkflowRunner.run: runId is required"); + const leaseAcquired = await this.runStore.acquireLease( + runId, + this.runnerId, + this.clock.nowMs() + ); + if (!leaseAcquired) { + throw new Error(`Workflow run is already active: ${runId}`); + } + + options?.onLeaseAcquired?.(); + let activeRuntime: IJSRuntime | null = null; + let leaseLostError: Error | null = null; + const markLeaseLost = (cause?: unknown) => { + leaseLostError ??= new Error( + cause instanceof Error + ? `Workflow run lease lost: ${runId}: ${cause.message}` + : `Workflow run lease lost: ${runId}` + ); + activeRuntime?.abort(); + }; + const leaseGuard: WorkflowRunnerLeaseGuard = { + throwIfLost() { + if (leaseLostError != null) { + throw leaseLostError; + } + }, + }; + let leaseRenewalInFlight = false; + const leaseRenewal = setInterval(() => { + if (leaseRenewalInFlight) { + return; + } + leaseRenewalInFlight = true; + void this.runStore + .renewLease(runId, this.runnerId, this.clock.nowMs()) + .then((renewed) => { + if (!renewed) { + markLeaseLost(); + } + }) + .catch(markLeaseLost) + .finally(() => { + leaseRenewalInFlight = false; + }); + }, this.runStore.getLeaseRenewalIntervalMs()); + + let removeAbortListener: () => void = () => undefined; + try { + const run = await this.runStore.getRun(runId); + const sequence = new WorkflowEventSequence(run.events.at(-1)?.sequence ?? 0); + if (run.status === "completed") { + const completedResult = run.events.findLast((event) => event.type === "result")?.result; + if (completedResult != null) { + return completedResult; + } + } + const resumingInterruptedRun = run.status === "interrupted"; + if (resumingInterruptedRun && options?.allowResumeFromInterrupted !== true) { + throw new Error(`Workflow run interrupted: ${runId}`); + } + const ignoreStartedTaskIds = resumingInterruptedRun; + let backgrounded: Promise | null = null; + const markBackgrounded = async () => { + leaseGuard.throwIfLost(); + backgrounded ??= this.appendEvent(runId, { + sequence: sequence.next(), + type: "status", + at: this.clock.nowIso(), + status: "backgrounded", + }).then(() => undefined); + await backgrounded; + }; + + leaseGuard.throwIfLost(); + await this.appendEvent( + runId, + { + sequence: sequence.next(), + type: "status", + at: this.clock.nowIso(), + status: "running", + }, + { allowInterruptedResume: resumingInterruptedRun } + ); + + let runtime: IJSRuntime | undefined; + try { + runtime = await this.runtimeFactory.create(); + const setupRuntime = runtime; + activeRuntime = setupRuntime; + if (leaseLostError != null) { + setupRuntime.abort(); + } + removeAbortListener = abortRuntimeOnSignal(setupRuntime, options?.abortSignal); + setupRuntime.setLimits({ timeoutMs: WORKFLOW_RUNTIME_TIMEOUT_MS }); + setupRuntime.registerFunction("__workflowArgs", () => Promise.resolve(run.args)); + setupRuntime.registerFunction("__workflowPhase", async (name, details) => { + assert(typeof name === "string" && name.length > 0, "phase requires a non-empty name"); + leaseGuard.throwIfLost(); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "phase", + at: this.clock.nowIso(), + name, + details, + }); + return null; + }); + setupRuntime.registerFunction("__workflowLog", async (message, data) => { + assert( + typeof message === "string" && message.length > 0, + "log requires a non-empty message" + ); + leaseGuard.throwIfLost(); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "log", + at: this.clock.nowIso(), + message, + data, + }); + return null; + }); + setupRuntime.registerFunction("__workflowAgent", async (rawSpec) => { + try { + return await this.runAgentStep(runId, sequence, rawSpec, { + ignoreStartedTaskIds, + waitOptions: getWorkflowAgentWaitOptions(setupRuntime, options), + leaseGuard, + }); + } catch (error) { + if (isForegroundWaitBackgroundedError(error)) { + await markBackgrounded(); + } + throw error; + } + }); + setupRuntime.registerFunction("__workflowApplyPatch", async (rawSpec) => { + try { + return await this.runApplyPatchStep(runId, sequence, rawSpec, { + abortSignal: setupRuntime.getAbortSignal(), + leaseGuard, + }); + } catch (error) { + if (isForegroundWaitBackgroundedError(error)) { + await markBackgrounded(); + } + throw error; + } + }); + setupRuntime.registerFunction("__workflowParallelAgents", async (rawSpecs) => { + try { + return await this.runAgentStepsInParallel(runId, sequence, rawSpecs, { + ignoreStartedTaskIds, + waitOptions: getWorkflowAgentWaitOptions(setupRuntime, options), + leaseGuard, + }); + } catch (error) { + if (isForegroundWaitBackgroundedError(error)) { + await markBackgrounded(); + } + throw error; + } + }); + } catch (error) { + await this.appendFailureStatus(runId, sequence, error, { + leaseGuard, + abortSignal: options?.abortSignal, + }); + throw error; + } + if (runtime == null) { + throw new Error("Workflow runtime setup did not return a runtime"); + } + using _runtimeResource = runtime; + + let compiledSource: string; + try { + compiledSource = compileWorkflowSource(run.definitionSource); + } catch (error) { + leaseGuard.throwIfLost(); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "error", + at: this.clock.nowIso(), + message: error instanceof Error ? error.message : "Workflow compilation failed", + }); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "status", + at: this.clock.nowIso(), + status: "failed", + }); + throw error; + } + + const execution = await runtime.eval(compiledSource); + if (!execution.success) { + if (backgrounded != null) { + throw new WorkflowRunBackgroundedError(runId); + } + if (options?.abortSignal?.aborted === true) { + throw new Error(execution.error ?? "Workflow run aborted"); + } + await this.throwIfInterrupted(runId); + leaseGuard.throwIfLost(); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "error", + at: this.clock.nowIso(), + message: execution.error ?? "Workflow execution failed", + }); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "status", + at: this.clock.nowIso(), + status: "failed", + }); + throw new Error(execution.error ?? "Workflow execution failed"); + } + + await this.throwIfInterrupted(runId); + let result: WorkflowResult; + try { + result = normalizeWorkflowResultForEvent(execution.result); + } catch (error) { + leaseGuard.throwIfLost(); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "error", + at: this.clock.nowIso(), + message: getErrorMessage(error), + }); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "status", + at: this.clock.nowIso(), + status: "failed", + }); + throw error; + } + leaseGuard.throwIfLost(); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "result", + at: this.clock.nowIso(), + result, + }); + await this.throwIfInterrupted(runId); + leaseGuard.throwIfLost(); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "status", + at: this.clock.nowIso(), + status: "completed", + }); + return result; + } finally { + removeAbortListener(); + clearInterval(leaseRenewal); + await this.runStore.releaseLease(runId, this.runnerId); + } + } + + private async appendFailureStatus( + runId: string, + sequence: WorkflowEventSequence, + error: unknown, + options: { leaseGuard: WorkflowRunnerLeaseGuard; abortSignal?: AbortSignal } + ): Promise { + if (options.abortSignal?.aborted === true) { + return; + } + await this.throwIfInterrupted(runId); + options.leaseGuard.throwIfLost(); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "error", + at: this.clock.nowIso(), + message: getErrorMessage(error), + }); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "status", + at: this.clock.nowIso(), + status: "failed", + }); + } + + private async appendEvent( + runId: string, + event: WorkflowRunEvent, + options: AppendWorkflowRunEventOptions = {} + ) { + return await this.runStore.appendEvent(runId, event, { + ...options, + expectedLeaseOwnerId: this.runnerId, + }); + } + + private async recordStepStarted( + runId: string, + input: Parameters[1] + ): Promise { + await this.runStore.recordStepStarted(runId, input, { expectedLeaseOwnerId: this.runnerId }); + } + + private async recordStepCompleted( + runId: string, + input: Parameters[1] + ): Promise { + await this.runStore.recordStepCompleted(runId, input, { expectedLeaseOwnerId: this.runnerId }); + } + + private async recordStepFailed( + runId: string, + input: Parameters[1] + ): Promise { + await this.runStore.recordStepFailed(runId, input, { expectedLeaseOwnerId: this.runnerId }); + } + + private async throwIfInterrupted(runId: string): Promise { + const run = await this.runStore.getRun(runId); + if (run.status === "interrupted") { + throw new Error(`Workflow run interrupted: ${runId}`); + } + } + + private async runApplyPatchStep( + runId: string, + sequence: WorkflowEventSequence, + rawSpec: unknown, + options: { + abortSignal?: AbortSignal; + leaseGuard: WorkflowRunnerLeaseGuard; + } + ): Promise { + const spec = parseWorkflowApplyPatchSpec(rawSpec); + assertWorkflowStepId(spec.id, "applyPatch"); + const inputHash = hashWorkflowStepInput(spec.id, spec); + options.leaseGuard.throwIfLost(); + const existingStep = await this.runStore.getStep(runId, spec.id, inputHash); + if (existingStep?.status === "completed" && existingStep.result?.structuredOutput != null) { + return normalizeWorkflowApplyPatchResult(existingStep.result.structuredOutput); + } + + options.leaseGuard.throwIfLost(); + await this.assertTaskBelongsToCompletedWorkflowStep(runId, spec.sourceTaskId); + const startedAt = existingStep?.startedAt ?? this.clock.nowIso(); + await this.recordStepStarted(runId, { + stepId: spec.id, + inputHash, + taskId: spec.sourceTaskId, + startedAt, + }); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "patch", + at: this.clock.nowIso(), + stepId: spec.id, + sourceTaskId: spec.sourceTaskId, + status: "started", + details: + spec.projectPath != null + ? { target: spec.target, projectPath: spec.projectPath } + : { target: spec.target }, + }); + + try { + if (this.taskAdapter.applyPatch == null) { + throw new Error("Workflow task adapter does not support applyPatch"); + } + const rawResult = await this.taskAdapter.applyPatch(spec, { + abortSignal: options.abortSignal, + }); + options.leaseGuard.throwIfLost(); + const result = normalizeWorkflowApplyPatchResult(rawResult); + const reportMarkdown = formatWorkflowApplyPatchReport(result); + await this.recordStepCompleted(runId, { + stepId: spec.id, + inputHash, + taskId: spec.sourceTaskId, + result: { + reportMarkdown, + structuredOutput: result, + }, + startedAt, + completedAt: this.clock.nowIso(), + }); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "patch", + at: this.clock.nowIso(), + stepId: spec.id, + sourceTaskId: spec.sourceTaskId, + status: result.status, + details: result, + }); + return result; + } catch (error) { + const message = getErrorMessage(error); + await this.recordStepFailed(runId, { + stepId: spec.id, + inputHash, + taskId: spec.sourceTaskId, + error: message, + startedAt, + completedAt: this.clock.nowIso(), + }); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "patch", + at: this.clock.nowIso(), + stepId: spec.id, + sourceTaskId: spec.sourceTaskId, + status: "failed", + details: { error: message }, + }); + throw error; + } + } + + private async assertTaskBelongsToCompletedWorkflowStep( + runId: string, + taskId: string + ): Promise { + const run = await this.runStore.getRun(runId); + const owningStep = run.steps.find( + (step) => + step.status === "completed" && step.taskId === taskId && step.result?.taskId === taskId + ); + assert( + owningStep != null, + `applyPatch source taskId ${taskId} was not produced by a completed workflow agent step` + ); + } + + private async runAgentStep( + runId: string, + sequence: WorkflowEventSequence, + rawSpec: unknown, + options: { + ignoreStartedTaskIds: boolean; + waitOptions?: WorkflowAgentWaitOptions; + leaseGuard: WorkflowRunnerLeaseGuard; + } + ): Promise { + const spec = parseWorkflowAgentSpec(rawSpec); + assertWorkflowStepId(spec.id, "agent"); + const inputHash = hashWorkflowStepInput(spec.id, spec); + options.leaseGuard.throwIfLost(); + const existingStep = await this.runStore.getStep(runId, spec.id, inputHash); + if (existingStep?.status === "completed" && existingStep.result != null) { + return existingStep.result; + } + + options.leaseGuard.throwIfLost(); + return await this.runAndRecordAgentStepWithRetries(runId, sequence, { + spec, + inputHash, + startedAt: existingStep?.startedAt ?? this.clock.nowIso(), + taskId: + !options.ignoreStartedTaskIds && existingStep?.status === "started" + ? existingStep.taskId + : undefined, + leaseGuard: options.leaseGuard, + waitOptions: options.waitOptions, + }); + } + + private async runAgentStepsInParallel( + runId: string, + sequence: WorkflowEventSequence, + rawSpecs: unknown, + options: { + ignoreStartedTaskIds: boolean; + waitOptions?: WorkflowAgentWaitOptions; + leaseGuard: WorkflowRunnerLeaseGuard; + } + ): Promise { + assert(Array.isArray(rawSpecs), "parallelAgents requires an array of agent specs"); + assert(rawSpecs.length > 0, "parallelAgents requires at least one agent spec"); + + const results = new Array(rawSpecs.length); + const parsedSteps = rawSpecs.map((rawSpec) => { + const spec = parseWorkflowAgentSpec(rawSpec); + assertWorkflowStepId(spec.id, "parallelAgents"); + return { spec, inputHash: hashWorkflowStepInput(spec.id, spec) }; + }); + options.leaseGuard.throwIfLost(); + const existingSteps = await this.runStore.getSteps( + runId, + parsedSteps.map((step) => ({ stepId: step.spec.id, inputHash: step.inputHash })) + ); + let pending: Array<{ + index: number; + spec: WorkflowAgentSpec; + inputHash: string; + startedAt: string; + taskId?: string; + attempt: number; + retryMessage?: string; + }> = []; + for (const [index, step] of parsedSteps.entries()) { + const existingStep = existingSteps[index]; + if (existingStep?.status === "completed" && existingStep.result != null) { + results[index] = existingStep.result; + continue; + } + pending.push({ + index, + spec: step.spec, + inputHash: step.inputHash, + startedAt: existingStep?.startedAt ?? this.clock.nowIso(), + taskId: + !options.ignoreStartedTaskIds && existingStep?.status === "started" + ? existingStep.taskId + : undefined, + attempt: 1, + }); + } + + while (pending.length > 0) { + const currentPending = pending; + pending = []; + const batchAbortController = new AbortController(); + const upstreamAbortSignal = options.waitOptions?.abortSignal; + const abortBatch = () => batchAbortController.abort(); + if (upstreamAbortSignal?.aborted) { + abortBatch(); + } else { + upstreamAbortSignal?.addEventListener("abort", abortBatch, { once: true }); + } + let foregroundBackgrounded = false; + let interruptPromise: Promise | undefined; + const interruptRemainingTasks = async (): Promise => { + interruptPromise ??= this.taskAdapter.interruptRun?.() ?? Promise.resolve(); + try { + await interruptPromise; + } catch { + // Preserve the original child failure; workflow failure handling will surface that cause. + } + }; + const batchWaitOptions: WorkflowAgentWaitOptions = { + ...options.waitOptions, + abortSignal: batchAbortController.signal, + }; + const pendingRuns = currentPending.map(async (step) => { + return await this.runOrResumeAgentStep(runId, { + spec: + step.attempt === 1 + ? step.spec + : buildRetryAgentSpec( + step.spec, + step.attempt - 1, + step.retryMessage ?? "previous attempt failed" + ), + inputHash: step.inputHash, + startedAt: step.startedAt, + taskId: step.taskId, + leaseGuard: options.leaseGuard, + waitOptions: batchWaitOptions, + }); + }); + const guardedRuns = pendingRuns.map(async (pendingRun) => { + try { + return await pendingRun; + } catch (error) { + if (isForegroundWaitBackgroundedError(error)) { + foregroundBackgrounded = true; + abortBatch(); + } else if (!foregroundBackgrounded) { + await interruptRemainingTasks(); + } + throw error; + } + }); + let rawResults: WorkflowAgentResult[]; + try { + rawResults = await Promise.all(guardedRuns); + } catch (error) { + await Promise.allSettled(guardedRuns); + if (foregroundBackgrounded) { + throw createForegroundWaitBackgroundedError(); + } + throw error; + } finally { + upstreamAbortSignal?.removeEventListener("abort", abortBatch); + } + for (const [pendingIndex, rawResult] of rawResults.entries()) { + const step = currentPending[pendingIndex]; + assert(step != null, "WorkflowRunner.runAgentStepsInParallel: missing pending step"); + try { + results[step.index] = await this.recordAgentResult(runId, sequence, { + ...step, + leaseGuard: options.leaseGuard, + rawResult, + }); + } catch (error) { + if (!isRetryableAgentOutputError(error) || step.attempt >= WORKFLOW_AGENT_MAX_ATTEMPTS) { + throw error; + } + options.leaseGuard.throwIfLost(); + await this.recordAgentRetry(runId, sequence, step.spec.id, step.attempt, error); + pending.push({ + ...step, + startedAt: this.clock.nowIso(), + taskId: undefined, + attempt: step.attempt + 1, + retryMessage: getErrorMessage(error), + }); + } + } + } + return results; + } + + private async runAndRecordAgentStepWithRetries( + runId: string, + sequence: WorkflowEventSequence, + step: { + spec: WorkflowAgentSpec; + inputHash: string; + startedAt: string; + taskId?: string; + waitOptions?: WorkflowAgentWaitOptions; + leaseGuard: WorkflowRunnerLeaseGuard; + } + ): Promise { + let attempt = 1; + let startedAt = step.startedAt; + let taskId = step.taskId; + let spec = step.spec; + while (attempt <= WORKFLOW_AGENT_MAX_ATTEMPTS) { + const rawResult = await this.runOrResumeAgentStep(runId, { + spec, + inputHash: step.inputHash, + startedAt, + taskId, + leaseGuard: step.leaseGuard, + waitOptions: step.waitOptions, + }); + try { + return await this.recordAgentResult(runId, sequence, { + spec: step.spec, + inputHash: step.inputHash, + startedAt, + leaseGuard: step.leaseGuard, + rawResult, + }); + } catch (error) { + if (!isRetryableAgentOutputError(error) || attempt >= WORKFLOW_AGENT_MAX_ATTEMPTS) { + throw error; + } + step.leaseGuard.throwIfLost(); + await this.recordAgentRetry(runId, sequence, step.spec.id, attempt, error); + spec = buildRetryAgentSpec(step.spec, attempt, getErrorMessage(error)); + startedAt = this.clock.nowIso(); + taskId = undefined; + attempt += 1; + } + } + throw new Error(`agent ${step.spec.id} exhausted validation retries`); + } + + private async recordAgentRetry( + runId: string, + sequence: WorkflowEventSequence, + stepId: string, + attempt: number, + error: unknown + ): Promise { + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "log", + at: this.clock.nowIso(), + message: `Retrying ${stepId} after validation failure`, + data: { + attempt, + maxAttempts: WORKFLOW_AGENT_MAX_ATTEMPTS, + error: getErrorMessage(error), + }, + }); + } + + private async runOrResumeAgentStep( + runId: string, + step: { + spec: WorkflowAgentSpec; + inputHash: string; + startedAt: string; + taskId?: string; + waitOptions?: WorkflowAgentWaitOptions; + leaseGuard: WorkflowRunnerLeaseGuard; + } + ): Promise { + step.leaseGuard.throwIfLost(); + if (step.taskId != null && this.taskAdapter.waitForAgentTask != null) { + try { + return await this.taskAdapter.waitForAgentTask(step.taskId, step.spec, step.waitOptions); + } catch (error) { + if (!shouldRestartUnrecoverableStartedTask(error)) { + throw error; + } + } + } + + step.leaseGuard.throwIfLost(); + let recordedTaskId: string | undefined; + const rawResult = await this.taskAdapter.runAgent( + step.spec, + { + onTaskCreated: async (taskId) => { + step.leaseGuard.throwIfLost(); + recordedTaskId = taskId; + await this.recordStepStarted(runId, { + stepId: step.spec.id, + inputHash: step.inputHash, + taskId, + startedAt: step.startedAt, + }); + }, + }, + step.waitOptions + ); + step.leaseGuard.throwIfLost(); + if (recordedTaskId == null) { + await this.recordStepStarted(runId, { + stepId: step.spec.id, + inputHash: step.inputHash, + taskId: rawResult.taskId, + startedAt: step.startedAt, + }); + } + return rawResult; + } + + private async recordAgentResult( + runId: string, + sequence: WorkflowEventSequence, + step: { + spec: WorkflowAgentSpec; + inputHash: string; + startedAt: string; + leaseGuard: WorkflowRunnerLeaseGuard; + rawResult: WorkflowAgentResult; + } + ): Promise { + let result: StructuredTaskOutput; + try { + result = StructuredTaskOutputSchema.parse(step.rawResult); + } catch (error) { + const message = `agent ${step.spec.id} returned invalid task output: ${getErrorMessage(error)}`; + await this.recordFailedAgentAttempt(runId, sequence, step, message); + throw new WorkflowAgentOutputValidationError(message); + } + + if (step.spec.outputSchema !== undefined) { + const validation = validateJsonSchemaSubset(step.spec.outputSchema, result.structuredOutput); + if (!validation.success) { + const message = `agent ${step.spec.id} structured output failed schema validation: ${validation.errors + .map((error) => `${error.path}: ${error.message}`) + .join("; ")}`; + await this.recordFailedAgentAttempt(runId, sequence, step, message); + throw new WorkflowAgentOutputValidationError(message); + } + } + step.leaseGuard.throwIfLost(); + const taskId = this.getTaskIdFromAgentResult(step.rawResult, step.spec.id); + await this.recordStepCompleted(runId, { + stepId: step.spec.id, + inputHash: step.inputHash, + taskId, + result, + startedAt: step.startedAt, + completedAt: this.clock.nowIso(), + }); + step.leaseGuard.throwIfLost(); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "task", + at: this.clock.nowIso(), + stepId: step.spec.id, + taskId, + status: "completed", + }); + return result; + } + + private async recordFailedAgentAttempt( + runId: string, + sequence: WorkflowEventSequence, + step: { + spec: WorkflowAgentSpec; + inputHash: string; + startedAt: string; + leaseGuard: WorkflowRunnerLeaseGuard; + rawResult: WorkflowAgentResult; + }, + message: string + ): Promise { + step.leaseGuard.throwIfLost(); + const taskId = getTaskIdFromUnknownAgentResult(step.rawResult); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "validation", + at: this.clock.nowIso(), + stepId: step.spec.id, + success: false, + message, + }); + step.leaseGuard.throwIfLost(); + await this.recordStepFailed(runId, { + stepId: step.spec.id, + inputHash: step.inputHash, + taskId, + error: message, + startedAt: step.startedAt, + completedAt: this.clock.nowIso(), + }); + if (taskId != null) { + step.leaseGuard.throwIfLost(); + await this.appendEvent(runId, { + sequence: sequence.next(), + type: "task", + at: this.clock.nowIso(), + stepId: step.spec.id, + taskId, + status: "failed", + }); + } + } + + private getTaskIdFromAgentResult(result: WorkflowAgentResult, stepId: string): string { + const maybeTaskId = result.taskId; + assert( + typeof maybeTaskId === "string" && maybeTaskId.length > 0, + `agent ${stepId} returned no taskId` + ); + return maybeTaskId; + } +} + +class WorkflowEventSequence { + constructor(private current: number) {} + + next(): number { + this.current += 1; + return this.current; + } +} + +function parseWorkflowApplyPatchSpec(rawSpec: unknown): WorkflowApplyPatchSpec { + assert(rawSpec != null && typeof rawSpec === "object", "applyPatch requires a spec object"); + const spec = rawSpec as Record; + assert(typeof spec.id === "string", "applyPatch replay boundary requires a stable id"); + + const sourceTaskId = getApplyPatchSourceTaskId( + spec.source ?? spec.from ?? spec.task ?? spec.taskId + ); + assert( + typeof sourceTaskId === "string" && sourceTaskId.length > 0, + "applyPatch requires a source taskId or an agent result with taskId" + ); + + const target = spec.target ?? "parent"; + assert(target === "parent", "applyPatch target currently supports only 'parent'"); + if (spec.onConflict !== undefined) { + assert(spec.onConflict === "return", "applyPatch onConflict currently supports only 'return'"); + } + if (spec.strategy !== undefined) { + assert( + spec.strategy === "three-way" || spec.strategy === "dry-run-then-apply", + "applyPatch strategy currently supports 'three-way' or 'dry-run-then-apply'" + ); + } + + const parsed: WorkflowApplyPatchSpec = { + id: spec.id, + sourceTaskId, + target, + threeWay: spec.threeWay !== false && spec.three_way !== false, + force: spec.force === true, + }; + if (typeof spec.projectPath === "string" && spec.projectPath.length > 0) { + parsed.projectPath = spec.projectPath; + } else if (typeof spec.project_path === "string" && spec.project_path.length > 0) { + parsed.projectPath = spec.project_path; + } + return parsed; +} + +function getApplyPatchSourceTaskId(source: unknown): string | undefined { + if (typeof source === "string" && source.length > 0) { + return source; + } + if (source != null && typeof source === "object") { + const taskId = (source as Record).taskId; + if (typeof taskId === "string" && taskId.length > 0) { + return taskId; + } + } + return undefined; +} + +function isWorkflowApplyPatchResult(value: unknown): value is WorkflowApplyPatchResult { + if (value == null || typeof value !== "object") { + return false; + } + const record = value as Record; + return ( + typeof record.success === "boolean" && + typeof record.taskId === "string" && + (record.status === "applied" || record.status === "conflict" || record.status === "failed") + ); +} + +function normalizeWorkflowApplyPatchResult(rawResult: unknown): WorkflowApplyPatchResult { + if (isWorkflowApplyPatchResult(rawResult)) { + return rawResult; + } + const parsed = TaskApplyGitPatchToolResultSchema.parse(rawResult); + const conflictPaths = getConflictPathsFromPatchResult(parsed); + const failedPatchSubject = parsed.success ? undefined : parsed.failedPatchSubject; + const status: WorkflowApplyPatchStatus = parsed.success + ? "applied" + : conflictPaths.length > 0 || failedPatchSubject != null + ? "conflict" + : "failed"; + + return { + success: parsed.success, + status, + taskId: parsed.taskId, + ...(parsed.dryRun !== undefined ? { dryRun: parsed.dryRun } : {}), + ...(parsed.projectResults !== undefined ? { projectResults: parsed.projectResults } : {}), + ...(parsed.appliedCommits !== undefined ? { appliedCommits: parsed.appliedCommits } : {}), + ...(parsed.headCommitSha !== undefined ? { headCommitSha: parsed.headCommitSha } : {}), + ...(conflictPaths.length > 0 ? { conflictPaths } : {}), + ...(failedPatchSubject !== undefined ? { failedPatchSubject } : {}), + ...(parsed.success ? {} : { error: parsed.error }), + ...(parsed.note !== undefined ? { note: parsed.note } : {}), + }; +} + +function getConflictPathsFromPatchResult( + result: ReturnType +): string[] { + const paths = new Set(); + const topLevelConflictPaths = result.success ? [] : (result.conflictPaths ?? []); + for (const path of topLevelConflictPaths) { + paths.add(path); + } + for (const projectResult of result.projectResults ?? []) { + for (const path of projectResult.conflictPaths ?? []) { + paths.add(path); + } + } + return Array.from(paths); +} + +function formatWorkflowApplyPatchReport(result: WorkflowApplyPatchResult): string { + if (result.status === "applied") { + return `Patch applied from task ${result.taskId}.`; + } + if (result.status === "conflict") { + const paths = result.conflictPaths?.length + ? ` Conflicts: ${result.conflictPaths.join(", ")}.` + : ""; + return `Patch from task ${result.taskId} did not apply cleanly.${paths}`; + } + return `Patch from task ${result.taskId} failed: ${result.error ?? "unknown error"}`; +} + +function parseWorkflowAgentSpec(rawSpec: unknown): WorkflowAgentSpec { + assert(rawSpec != null && typeof rawSpec === "object", "agent requires a spec object"); + const spec = rawSpec as Record; + assert(typeof spec.id === "string", "agent replay boundary requires a stable id"); + assert( + typeof spec.prompt === "string" && spec.prompt.length > 0, + "agent requires a non-empty prompt" + ); + const parsed: WorkflowAgentSpec = { + id: spec.id, + prompt: spec.prompt, + }; + if (typeof spec.title === "string" && spec.title.length > 0) { + parsed.title = spec.title; + } + if (typeof spec.agentId === "string" && spec.agentId.length > 0) { + parsed.agentId = spec.agentId; + } + if (spec.outputSchema !== undefined) { + parsed.outputSchema = spec.outputSchema; + } + return parsed; +} + +function normalizeWorkflowResultForEvent(result: unknown): WorkflowResult { + if (result != null && typeof result === "object") { + const record = result as Record; + if (typeof record.reportMarkdown === "string") { + return WorkflowResultSchema.parse({ + reportMarkdown: record.reportMarkdown, + structuredOutput: record.structuredOutput, + }); + } + } + + let reportMarkdown: string | undefined; + try { + reportMarkdown = JSON.stringify(result); + } catch (error) { + throw new Error(`Workflow result must be JSON-serializable: ${getErrorMessage(error)}`); + } + assert( + typeof reportMarkdown === "string", + "Workflow must return a reportMarkdown result or another JSON-serializable value" + ); + return WorkflowResultSchema.parse({ reportMarkdown }); +} + +function compileWorkflowSource(source: string): string { + const compiled = source.replace( + /export\s+default\s+(async\s+)?function(?:\s+[A-Za-z_$][\w$]*)?\s*\(/u, + (_match, asyncKeyword: string | undefined) => `${asyncKeyword ?? ""}function __muxWorkflow(` + ); + assert(compiled !== source, "Workflow definition must export a default function"); + + return ` +Date = undefined; +Math.random = undefined; +${compiled} +return (async () => await __muxWorkflow({ + args: __workflowArgs(), + phase: __workflowPhase, + log: __workflowLog, + agent: __workflowAgent, + applyPatch: __workflowApplyPatch, + parallelAgents: __workflowParallelAgents, +}))(); +`; +} diff --git a/src/node/services/workflows/WorkflowService.test.ts b/src/node/services/workflows/WorkflowService.test.ts new file mode 100644 index 0000000000..10a26edd2d --- /dev/null +++ b/src/node/services/workflows/WorkflowService.test.ts @@ -0,0 +1,1268 @@ +/* eslint-disable @typescript-eslint/await-thenable, @typescript-eslint/no-unsafe-argument, @typescript-eslint/require-await */ +import * as fs from "node:fs/promises"; +import * as path from "node:path"; + +import { describe, expect, test } from "bun:test"; +import { ForegroundWaitBackgroundedError } from "@/node/services/taskService"; +import { DisposableTempDir } from "@/node/services/tempDir"; +import { QuickJSRuntimeFactory } from "@/node/services/ptc/quickjsRuntime"; +import { WorkflowDefinitionStore } from "./WorkflowDefinitionStore"; +import { WorkflowRunStore } from "./WorkflowRunStore"; +import { WorkflowService } from "./WorkflowService"; +import type { WorkflowTaskAdapter } from "./WorkflowRunner"; +import { hashWorkflowStepInput } from "./workflowReplayKey"; + +async function writeWorkflow(root: string, name: string, source: string) { + await fs.mkdir(root, { recursive: true }); + await fs.writeFile(path.join(root, `${name}.js`), source, "utf-8"); +} + +async function waitForCondition( + description: string, + predicate: () => boolean, + timeoutMs = 5_000 +): Promise { + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + if (predicate()) { + return; + } + await new Promise((resolve) => setTimeout(resolve, 10)); + } + throw new Error(`Timed out waiting for ${description}`); +} + +async function waitForWorkflowStatus( + runStore: WorkflowRunStore, + runId: string, + status: string +): Promise { + const deadline = Date.now() + 1_000; + while (Date.now() < deadline) { + const run = await runStore.getRun(runId); + if (run.status === status) { + return; + } + await new Promise((resolve) => setTimeout(resolve, 10)); + } + const run = await runStore.getRun(runId); + throw new Error(`Timed out waiting for ${runId} to become ${status}; got ${run.status}`); +} +async function waitForWorkflowRunFileStatus( + sessionDir: string, + runId: string, + status: string +): Promise { + const runFile = path.join(sessionDir, "workflows", runId, "run.json"); + const deadline = Date.now() + 1_000; + while (Date.now() < deadline) { + try { + const run = JSON.parse(await fs.readFile(runFile, "utf-8")) as { status?: unknown }; + if (run.status === status) { + return; + } + } catch { + // Keep polling until the background writer flushes run.json. + } + await new Promise((resolve) => setTimeout(resolve, 10)); + } + throw new Error(`Timed out waiting for ${runId} run file to become ${status}`); +} + +describe("WorkflowService", () => { + test("starts a named workflow and persists the captured definition source", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + const source = `// description: Demo workflow +export default function workflow({ args, agent }) { + const child = agent({ id: "summarize", prompt: "Summarize " + args.topic }); + return { reportMarkdown: "Final " + child.reportMarkdown }; +} +`; + await writeWorkflow(globalRoot, "demo", source); + + const taskAdapter: WorkflowTaskAdapter = { + async runAgent() { + return { taskId: "task_1", reportMarkdown: "child summary" }; + }, + }; + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter, + generateRunId: () => "wfr_demo", + runnerId: "runner-a", + clock: { + nowIso: () => "2026-05-29T00:00:00.000Z", + nowMs: () => 1_000, + }, + }); + + const result = await service.startNamedWorkflow({ + name: "demo", + workspaceId: "workspace-1", + projectTrusted: true, + args: { topic: "workflow services" }, + }); + const run = await runStore.getRun("wfr_demo"); + + expect(result).toEqual({ + runId: "wfr_demo", + status: "completed", + result: { reportMarkdown: "Final child summary" }, + }); + expect(run.definitionSource).toBe(source); + expect(run.definition.scope).toBe("global"); + }); + + test("runs workspace scratch workflow definitions authored as files", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const workspaceRoot = path.join(tmp.path, "project"); + const scratchRoot = path.join(workspaceRoot, ".mux", "workflows", ".scratch"); + const projectRoot = path.join(workspaceRoot, ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ + projectRoot, + globalRoot, + scratchRoot, + builtIns: [], + }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent() { + throw new Error("agent should not run"); + }, + }, + generateRunId: () => "wfr_scratch_run", + runnerId: "runner-a", + }); + + await writeWorkflow( + scratchRoot, + "scratch-research", + "// description: Scratch research\nexport default function workflow({ args }) { return { reportMarkdown: 'Topic: ' + args.topic }; }\n" + ); + const result = await service.startNamedWorkflow({ + name: "scratch-research", + workspaceId: "workspace-1", + projectTrusted: true, + args: { topic: "drafts" }, + }); + const run = await runStore.getRun("wfr_scratch_run"); + + expect(result).toEqual({ + runId: "wfr_scratch_run", + status: "completed", + result: { reportMarkdown: "Topic: drafts" }, + }); + expect(run.definition.scope).toBe("scratch"); + await expect( + fs.readFile(path.join(scratchRoot, "scratch-research.js"), "utf-8") + ).resolves.toContain("// description: Scratch research"); + await expect(fs.readFile(path.join(scratchRoot, ".gitignore"), "utf-8")).resolves.toBe( + "*\n!.gitignore\n" + ); + }); + + test("lists definitions through the definition store trust gate", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow( + projectRoot, + "demo", + "// description: Project workflow\nexport default function workflow() { return null; }\n" + ); + await writeWorkflow( + globalRoot, + "demo", + "// description: Global workflow\nexport default function workflow() { return null; }\n" + ); + + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }), + runStore: new WorkflowRunStore({ sessionDir: tmp.path }), + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent() { + return { taskId: "task_1", reportMarkdown: "unused" }; + }, + }, + generateRunId: () => "wfr_demo", + runnerId: "runner-a", + }); + + await expect(service.listDefinitions({ projectTrusted: false })).resolves.toEqual([ + expect.objectContaining({ name: "demo", scope: "global" }), + ]); + await expect(service.listDefinitions({ projectTrusted: true })).resolves.toEqual([ + expect.objectContaining({ name: "demo", scope: "project" }), + ]); + }); + + test("interrupts a run without deleting completed step state", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + await runStore.createRun({ + id: "wfr_interrupt", + workspaceId: "workspace-1", + definition: { name: "demo", description: "Demo", scope: "built-in", executable: true }, + definitionSource: + "export default function workflow() { return { reportMarkdown: 'unused' }; }\n", + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + await runStore.recordStepCompleted("wfr_interrupt", { + stepId: "done", + inputHash: "hash:done", + taskId: "task_done", + result: { reportMarkdown: "done" }, + startedAt: "2026-05-29T00:00:01.000Z", + completedAt: "2026-05-29T00:00:02.000Z", + }); + + let interruptCalls = 0; + let statusDuringInterrupt: string | undefined; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ + projectRoot: path.join(tmp.path, "project"), + globalRoot: path.join(tmp.path, "global"), + builtIns: [], + }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent() { + throw new Error("unused"); + }, + async interruptRun() { + statusDuringInterrupt = (await runStore.getRun("wfr_interrupt")).status; + interruptCalls += 1; + }, + }, + runnerId: "runner-a", + clock: { + nowIso: () => "2026-05-29T00:00:03.000Z", + nowMs: () => 1_000, + }, + }); + + const interrupted = await service.interruptRun({ + workspaceId: "workspace-1", + runId: "wfr_interrupt", + }); + const completedStep = await runStore.getCompletedStep("wfr_interrupt", "done", "hash:done"); + + expect(interrupted.status).toBe("interrupted"); + expect(interruptCalls).toBe(1); + expect(statusDuringInterrupt).toBe("interrupted"); + expect(completedStep?.result).toEqual({ reportMarkdown: "done" }); + }); + + test("interrupts foreground workflow runs when the caller aborts", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow( + globalRoot, + "abortable", + "// description: Abortable workflow\nexport default function workflow({ agent }) { return agent({ id: 'slow-step', prompt: 'slow' }); }\n" + ); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + let agentWaitStarted = false; + let interruptCalls = 0; + let agentAbortObserved = false; + let abortObservedDuringInterrupt: boolean | undefined; + let statusDuringAbortInterrupt: string | undefined; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent(_spec, _lifecycle, waitOptions) { + agentWaitStarted = true; + return await new Promise((_, reject) => { + waitOptions?.abortSignal?.addEventListener( + "abort", + () => { + agentAbortObserved = true; + reject(new Error("Task interrupted")); + }, + { once: true } + ); + }); + }, + async interruptRun() { + abortObservedDuringInterrupt = agentAbortObserved; + statusDuringAbortInterrupt = (await runStore.getRun("wfr_abort")).status; + interruptCalls += 1; + }, + }, + generateRunId: () => "wfr_abort", + runnerId: "runner-a", + }); + const abortController = new AbortController(); + + const runPromise = service.startNamedWorkflow({ + name: "abortable", + workspaceId: "workspace-1", + projectTrusted: false, + args: {}, + abortSignal: abortController.signal, + }); + await waitForCondition("foreground agent to start", () => agentWaitStarted); + abortController.abort(); + + await expect(runPromise).rejects.toThrow(/interrupted|aborted/i); + await expect(runStore.getRun("wfr_abort")).resolves.toMatchObject({ status: "interrupted" }); + expect(interruptCalls).toBe(1); + expect(abortObservedDuringInterrupt).toBe(true); + expect(statusDuringAbortInterrupt).toBe("interrupted"); + }); + + test("does not abort a running workflow from another workspace", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow( + globalRoot, + "workspace-owned", + "// description: Workspace-owned workflow\nexport default function workflow({ agent }) { return agent({ id: 'slow-step', prompt: 'slow' }); }\n" + ); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + let releaseAgent: ((value: { taskId: string; reportMarkdown: string }) => void) | undefined; + let agentAbortObserved = false; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent(_spec, _lifecycle, waitOptions) { + return await new Promise<{ taskId: string; reportMarkdown: string }>( + (resolve, reject) => { + releaseAgent = resolve; + waitOptions?.abortSignal?.addEventListener( + "abort", + () => { + agentAbortObserved = true; + reject(new Error("Task interrupted")); + }, + { once: true } + ); + } + ); + }, + }, + generateRunId: () => "wfr_workspace_owned", + runnerId: "runner-a", + }); + const runPromise = service.startNamedWorkflow({ + name: "workspace-owned", + workspaceId: "workspace-1", + projectTrusted: false, + args: {}, + }); + await waitForCondition("foreground agent to start", () => releaseAgent != null); + + await expect( + service.interruptRun({ workspaceId: "workspace-2", runId: "wfr_workspace_owned" }) + ).rejects.toThrow("Workflow run not found: wfr_workspace_owned"); + + expect(agentAbortObserved).toBe(false); + releaseAgent?.({ taskId: "task_slow", reportMarkdown: "done" }); + await expect(runPromise).resolves.toMatchObject({ + runId: "wfr_workspace_owned", + status: "completed", + }); + }); + + test("interruptRun aborts an active foreground runner from another service instance", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow( + globalRoot, + "interrupt-active", + "// description: Interrupt active\nexport default function workflow({ agent }) { return agent({ id: 'slow-step', prompt: 'slow' }); }\n" + ); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + let agentWaitStarted = false; + let agentAbortObserved = false; + let interruptCalls = 0; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent(_spec, _lifecycle, waitOptions) { + agentWaitStarted = true; + return await new Promise((_, reject) => { + waitOptions?.abortSignal?.addEventListener( + "abort", + () => { + agentAbortObserved = true; + reject(new Error("Task interrupted")); + }, + { once: true } + ); + }); + }, + async interruptRun() { + throw new Error("starter service interruptRun should not be called"); + }, + }, + generateRunId: () => "wfr_interrupt_active", + runnerId: "runner-a", + }); + const interruptService = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent() { + throw new Error("interrupt service runAgent should not be called"); + }, + async interruptRun() { + interruptCalls += 1; + }, + }, + runnerId: "runner-b", + }); + + const runPromise = service.startNamedWorkflow({ + name: "interrupt-active", + workspaceId: "workspace-1", + projectTrusted: false, + args: {}, + }); + const runErrorPromise = runPromise.then( + () => null, + (error: unknown) => error + ); + await waitForCondition("foreground agent to start", () => agentWaitStarted); + + const interrupted = await interruptService.interruptRun({ + workspaceId: "workspace-1", + runId: "wfr_interrupt_active", + }); + + expect(interrupted.status).toBe("interrupted"); + expect(agentAbortObserved).toBe(true); + expect(interruptCalls).toBe(1); + const runError = await runErrorPromise; + expect(runError).toBeInstanceOf(Error); + expect(runError instanceof Error ? runError.message : "").toMatch(/interrupted|aborted/i); + }); + + test("moves foreground workflow runs to background when child waits are backgrounded", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow( + globalRoot, + "backgroundable", + "// description: Backgroundable workflow\nexport default function workflow({ agent }) { return agent({ id: 'slow-step', prompt: 'slow' }); }\n" + ); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + let calls = 0; + const backgroundFlags: Array = []; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent(_spec, _lifecycle, waitOptions) { + calls += 1; + backgroundFlags.push(waitOptions?.backgroundOnMessageQueued); + if (calls === 1) { + throw new ForegroundWaitBackgroundedError(); + } + return { taskId: "task_slow", reportMarkdown: "done" }; + }, + }, + generateRunId: () => "wfr_backgrounded", + runnerId: "runner-a", + }); + + const result = await service.startNamedWorkflow({ + name: "backgroundable", + workspaceId: "workspace-1", + projectTrusted: false, + args: {}, + }); + + expect(result).toEqual({ runId: "wfr_backgrounded", status: "backgrounded", result: null }); + await waitForWorkflowStatus(runStore, "wfr_backgrounded", "completed"); + await waitForWorkflowRunFileStatus(tmp.path, "wfr_backgrounded", "completed"); + expect(calls).toBe(2); + expect(backgroundFlags).toEqual([true, false]); + }); + + test("resumes the same run id and reuses completed steps", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + const source = `export default function workflow({ agent }) { + const first = agent({ id: "first", prompt: "first" }); + const second = agent({ id: "second", prompt: "second" }); + return { reportMarkdown: first.reportMarkdown + " + " + second.reportMarkdown }; +} +`; + await runStore.createRun({ + id: "wfr_resume", + workspaceId: "workspace-1", + definition: { name: "demo", description: "Demo", scope: "built-in", executable: true }, + definitionSource: source, + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + await runStore.recordStepCompleted("wfr_resume", { + stepId: "first", + inputHash: hashWorkflowStepInput("first", { id: "first", prompt: "first" }), + taskId: "task_first", + result: { reportMarkdown: "first done" }, + startedAt: "2026-05-29T00:00:01.000Z", + completedAt: "2026-05-29T00:00:02.000Z", + }); + await runStore.appendEvent("wfr_resume", { + sequence: 1, + type: "status", + at: "2026-05-29T00:00:03.000Z", + status: "interrupted", + }); + + const taskCalls: string[] = []; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ + projectRoot: path.join(tmp.path, "project"), + globalRoot: path.join(tmp.path, "global"), + builtIns: [], + }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent(spec) { + taskCalls.push(spec.id); + return { taskId: `task_${spec.id}`, reportMarkdown: `${spec.id} done` }; + }, + }, + runnerId: "runner-a", + clock: { + nowIso: () => "2026-05-29T00:00:04.000Z", + nowMs: () => 1_000, + }, + }); + + const result = await service.resumeRun({ + workspaceId: "workspace-1", + runId: "wfr_resume", + projectTrusted: true, + }); + + expect(result).toEqual({ + runId: "wfr_resume", + status: "completed", + result: { reportMarkdown: "first done + second done" }, + }); + expect(taskCalls).toEqual(["second"]); + }); + + test("keeps resumed workflow running when foreground wait backgrounds", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + await runStore.createRun({ + id: "wfr_resume_backgrounded", + workspaceId: "workspace-1", + definition: { name: "demo", description: "Demo", scope: "built-in", executable: true }, + definitionSource: + "export default function workflow({ agent }) { return agent({ id: 'slow-step', prompt: 'slow' }); }\n", + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + await runStore.appendStatus( + "wfr_resume_backgrounded", + "interrupted", + "2026-05-29T00:00:01.000Z" + ); + + let calls = 0; + const backgroundFlags: Array = []; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ + projectRoot: path.join(tmp.path, "project"), + globalRoot: path.join(tmp.path, "global"), + builtIns: [], + }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent(_spec, _lifecycle, waitOptions) { + calls += 1; + backgroundFlags.push(waitOptions?.backgroundOnMessageQueued); + if (calls === 1) { + throw new ForegroundWaitBackgroundedError(); + } + return { taskId: "task_slow", reportMarkdown: "done" }; + }, + }, + runnerId: "runner-a", + }); + + const result = await service.resumeRun({ + workspaceId: "workspace-1", + runId: "wfr_resume_backgrounded", + projectTrusted: true, + }); + + expect(result).toEqual({ + runId: "wfr_resume_backgrounded", + status: "backgrounded", + result: null, + }); + await waitForWorkflowStatus(runStore, "wfr_resume_backgrounded", "completed"); + expect(calls).toBe(2); + expect(backgroundFlags).toEqual([true, false]); + }); + + test("does not mark resume running before the runner acquires the lease", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + await runStore.createRun({ + id: "wfr_busy_resume", + workspaceId: "workspace-1", + definition: { name: "demo", description: "Demo", scope: "built-in", executable: true }, + definitionSource: + "export default function workflow() { return { reportMarkdown: 'done' }; }\n", + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + await runStore.appendStatus("wfr_busy_resume", "interrupted", "2026-05-29T00:00:01.000Z"); + await runStore.acquireLease("wfr_busy_resume", "old-runner", Date.now()); + const originalConsoleError = console.error; + console.error = () => undefined; + try { + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ + projectRoot: path.join(tmp.path, "project"), + globalRoot: path.join(tmp.path, "global"), + builtIns: [], + }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent() { + return { taskId: "task_1", reportMarkdown: "unused" }; + }, + }, + runnerId: "runner-a", + }); + + await expect( + service.resumeRunInBackground({ + workspaceId: "workspace-1", + runId: "wfr_busy_resume", + projectTrusted: true, + }) + ).rejects.toThrow(/already active/); + + await expect(runStore.getRun("wfr_busy_resume")).resolves.toMatchObject({ + status: "interrupted", + }); + } finally { + console.error = originalConsoleError; + await runStore.releaseLease("wfr_busy_resume", "old-runner"); + } + }); + + test("promotes a scratch workflow run to a reusable global definition", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + await runStore.createRun({ + id: "wfr_scratch", + workspaceId: "workspace-1", + definition: { name: "scratch", description: "Scratch", scope: "scratch", executable: true }, + definitionSource: + "export default function workflow() { return { reportMarkdown: 'scratch' }; }\n", + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent() { + return { taskId: "task_1", reportMarkdown: "unused" }; + }, + }, + runnerId: "runner-a", + }); + + const descriptor = await service.promoteScratchWorkflow({ + workspaceId: "workspace-1", + runId: "wfr_scratch", + name: "promoted-research", + description: "Promoted research workflow", + location: "global", + overwrite: false, + projectTrusted: true, + }); + const promotedSource = await fs.readFile( + path.join(globalRoot, "promoted-research.js"), + "utf-8" + ); + + expect(descriptor).toMatchObject({ + name: "promoted-research", + description: "Promoted research workflow", + scope: "global", + executable: true, + }); + expect(promotedSource).toContain("// description: Promoted research workflow"); + expect(promotedSource).toContain("reportMarkdown: 'scratch'"); + await expect(service.listDefinitions({ projectTrusted: false })).resolves.toEqual([ + expect.objectContaining({ name: "promoted-research", scope: "global" }), + ]); + }); + + test("promotes a scratch workflow definition to a reusable project definition without running it", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const workspaceRoot = path.join(tmp.path, "project"); + const scratchRoot = path.join(workspaceRoot, ".mux", "workflows", ".scratch"); + const projectRoot = path.join(workspaceRoot, ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ + projectRoot, + globalRoot, + scratchRoot, + builtIns: [], + }), + runStore: new WorkflowRunStore({ sessionDir: tmp.path }), + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent() { + throw new Error("agent should not run"); + }, + }, + runnerId: "runner-a", + }); + + await writeWorkflow( + scratchRoot, + "scratch-draft", + "// description: Scratch draft\nexport default function workflow() { return { reportMarkdown: 'draft' }; }\n" + ); + + const descriptor = await service.promoteScratchDefinition({ + workspaceId: "workspace-1", + name: "scratch-draft", + description: "Reusable scratch draft", + location: "project", + overwrite: false, + projectTrusted: true, + }); + const promotedSource = await fs.readFile(path.join(projectRoot, "scratch-draft.js"), "utf-8"); + + expect(descriptor).toMatchObject({ + name: "scratch-draft", + description: "Reusable scratch draft", + scope: "project", + executable: true, + }); + expect(promotedSource).toContain("// description: Reusable scratch draft"); + expect(promotedSource).toContain("reportMarkdown: 'draft'"); + }); + + test("can start a workflow in the background and persist a running run immediately", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow( + globalRoot, + "background-research", + "// description: Background workflow\nexport default function workflow({ agent }) { return agent({ id: 'slow-step', prompt: 'slow' }); }\n" + ); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + let releaseAgent: ((value: { taskId: string; reportMarkdown: string }) => void) | undefined; + const terminalEvents: Array<{ runId: string; status: string; result: unknown }> = []; + const lifecycleEvents: string[] = []; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent() { + lifecycleEvents.push("agent-started"); + return await new Promise<{ taskId: string; reportMarkdown: string }>((resolve) => { + releaseAgent = resolve; + }); + }, + }, + onBackgroundRunTerminal(event) { + terminalEvents.push({ runId: event.runId, status: event.status, result: event.result }); + }, + generateRunId: () => "wfr_background", + runnerId: "runner-a", + }); + + const started = await service.startNamedWorkflowInBackground({ + name: "background-research", + workspaceId: "workspace-1", + projectTrusted: false, + args: {}, + onBackgroundRunCreated(event) { + lifecycleEvents.push("run-created"); + expect(event).toMatchObject({ + runId: "wfr_background", + status: "running", + result: null, + run: { id: "wfr_background", status: "running" }, + }); + }, + }); + + expect(started).toMatchObject({ runId: "wfr_background", status: "running", result: null }); + await expect(runStore.getRun("wfr_background")).resolves.toMatchObject({ + id: "wfr_background", + status: "running", + }); + + expect(lifecycleEvents).toEqual(["run-created"]); + await waitForCondition("background agent to start", () => releaseAgent != null); + expect(lifecycleEvents).toEqual(["run-created", "agent-started"]); + releaseAgent?.({ taskId: "task_slow", reportMarkdown: "done" }); + await waitForWorkflowStatus(runStore, "wfr_background", "completed"); + await waitForCondition("background terminal callback", () => terminalEvents.length === 1); + await expect(runStore.getRun("wfr_background")).resolves.toMatchObject({ status: "completed" }); + expect(terminalEvents).toEqual([ + { + runId: "wfr_background", + status: "completed", + result: { reportMarkdown: "done", structuredOutput: undefined }, + }, + ]); + }); + + test("does not notify background continuation for interrupted runs", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + await writeWorkflow( + globalRoot, + "interruptable-background", + "// description: Interruptable background workflow\nexport default function workflow({ agent }) { return agent({ id: 'slow-step', prompt: 'slow' }); }\n" + ); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + let agentStarted = false; + let agentAbortObserved = false; + let interruptCalls = 0; + const terminalEvents: Array<{ runId: string; status: string; result: unknown }> = []; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent(_spec, _lifecycle, waitOptions) { + agentStarted = true; + return await new Promise((_, reject) => { + waitOptions?.abortSignal?.addEventListener( + "abort", + () => { + agentAbortObserved = true; + reject(new Error("Task interrupted")); + }, + { once: true } + ); + }); + }, + async interruptRun() { + interruptCalls += 1; + }, + }, + onBackgroundRunTerminal(event) { + terminalEvents.push({ runId: event.runId, status: event.status, result: event.result }); + }, + generateRunId: () => "wfr_background_interrupt", + runnerId: "runner-a", + }); + + await service.startNamedWorkflowInBackground({ + name: "interruptable-background", + workspaceId: "workspace-1", + projectTrusted: false, + args: {}, + }); + await waitForCondition("background agent to start", () => agentStarted); + + const interrupted = await service.interruptRun({ + workspaceId: "workspace-1", + runId: "wfr_background_interrupt", + }); + + expect(interrupted.status).toBe("interrupted"); + await waitForWorkflowStatus(runStore, "wfr_background_interrupt", "interrupted"); + await waitForCondition("background agent abort", () => agentAbortObserved); + await new Promise((resolve) => setTimeout(resolve, 20)); + expect(interruptCalls).toBe(1); + expect(terminalEvents).toEqual([]); + }); + + test("auto-resumes crash-recovered running runs without resuming user-interrupted runs", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + await runStore.createRun({ + id: "wfr_crash_running", + workspaceId: "workspace-1", + definition: { name: "demo", description: "Demo", scope: "built-in", executable: true }, + definitionSource: + "export default function workflow({ agent }) { return agent({ id: 'after-crash', prompt: 'resume' }); }\n", + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + await runStore.appendStatus("wfr_crash_running", "running", "2026-05-29T00:00:01.000Z"); + await runStore.createRun({ + id: "wfr_user_interrupted", + workspaceId: "workspace-1", + definition: { name: "demo", description: "Demo", scope: "built-in", executable: true }, + definitionSource: + "export default function workflow({ agent }) { return agent({ id: 'should-not-run', prompt: 'blocked' }); }\n", + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + await runStore.appendStatus("wfr_user_interrupted", "interrupted", "2026-05-29T00:00:01.000Z"); + const taskCalls: string[] = []; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ + projectRoot: path.join(tmp.path, "project"), + globalRoot: path.join(tmp.path, "global"), + builtIns: [], + }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent(spec) { + taskCalls.push(spec.id); + return { taskId: `task_${spec.id}`, reportMarkdown: "resumed" }; + }, + }, + runnerId: "runner-a", + }); + + await expect( + service.resumeCrashedRuns({ workspaceId: "workspace-1", projectTrusted: true }) + ).resolves.toEqual(["wfr_crash_running"]); + await waitForWorkflowStatus(runStore, "wfr_crash_running", "completed"); + await waitForWorkflowRunFileStatus(tmp.path, "wfr_crash_running", "completed"); + + expect(taskCalls).toEqual(["after-crash"]); + await expect(runStore.getRun("wfr_user_interrupted")).resolves.toMatchObject({ + status: "interrupted", + }); + }); + + test("retries crash recovery after a fresh persisted lease becomes stale", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await runStore.createRun({ + id: "wfr_fresh_crash_lease", + workspaceId: "workspace-1", + definition: { name: "demo", description: "Demo", scope: "built-in", executable: true }, + definitionSource: + "export default function workflow({ agent }) { return agent({ id: 'after-lease', prompt: 'resume' }); }\n", + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + await runStore.appendStatus("wfr_fresh_crash_lease", "running", "2026-05-29T00:00:01.000Z"); + await runStore.acquireLease("wfr_fresh_crash_lease", "crashed-runner", Date.now()); + const taskCalls: string[] = []; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ + projectRoot: path.join(tmp.path, "project"), + globalRoot: path.join(tmp.path, "global"), + builtIns: [], + }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent(spec) { + taskCalls.push(spec.id); + return { taskId: `task_${spec.id}`, reportMarkdown: "resumed" }; + }, + }, + runnerId: "runner-a", + }); + + await expect( + service.resumeCrashedRuns({ workspaceId: "workspace-1", projectTrusted: true }) + ).resolves.toEqual(["wfr_fresh_crash_lease"]); + expect(taskCalls).toEqual([]); + + await waitForCondition("crash recovery retry to acquire stale lease", () => + taskCalls.includes("after-lease") + ); + await waitForWorkflowStatus(runStore, "wfr_fresh_crash_lease", "completed"); + }); + + test("re-checks project trust before delayed crash recovery retry", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 25 }); + await runStore.createRun({ + id: "wfr_project_trust_retry", + workspaceId: "workspace-1", + definition: { + name: "project-flow", + description: "Project", + scope: "project", + executable: true, + }, + definitionSource: + "export default function workflow({ agent }) { return agent({ id: 'after-trust-revoked', prompt: 'blocked' }); }\n", + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + await runStore.appendStatus("wfr_project_trust_retry", "running", "2026-05-29T00:00:01.000Z"); + await runStore.acquireLease("wfr_project_trust_retry", "crashed-runner", Date.now()); + const taskCalls: string[] = []; + let currentProjectTrusted = true; + let trustChecks = 0; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ + projectRoot: path.join(tmp.path, "project"), + globalRoot: path.join(tmp.path, "global"), + builtIns: [], + }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent(spec) { + taskCalls.push(spec.id); + return { taskId: `task_${spec.id}`, reportMarkdown: "should not run" }; + }, + }, + getCurrentProjectTrusted: () => { + trustChecks += 1; + return currentProjectTrusted; + }, + runnerId: "runner-a", + }); + + await expect( + service.resumeCrashedRuns({ workspaceId: "workspace-1", projectTrusted: true }) + ).resolves.toEqual(["wfr_project_trust_retry"]); + currentProjectTrusted = false; + + await waitForCondition( + "delayed crash recovery retry to re-check project trust", + () => trustChecks >= 2 + ); + expect(taskCalls).toEqual([]); + await expect(runStore.getRun("wfr_project_trust_retry")).resolves.toMatchObject({ + status: "running", + }); + }); + + test("uses a fresh lease owner for each runner", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + const source = `// description: Demo workflow +export default function workflow() { + return { reportMarkdown: "ok" }; +} +`; + await writeWorkflow(globalRoot, "demo", source); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + const ownerIds: string[] = []; + const acquireLease = runStore.acquireLease.bind(runStore); + runStore.acquireLease = async (runId, ownerId, nowMs) => { + ownerIds.push(ownerId); + return await acquireLease(runId, ownerId, nowMs); + }; + let nextRunId = 0; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent() { + throw new Error("workflow should not spawn tasks"); + }, + }, + generateRunId: () => `wfr_owner_${++nextRunId}`, + runnerId: "runner-a", + clock: { + nowIso: () => "2026-05-29T00:00:00.000Z", + nowMs: () => 1_000, + }, + }); + + await service.startNamedWorkflow({ + name: "demo", + workspaceId: "workspace-1", + projectTrusted: true, + args: {}, + }); + await service.startNamedWorkflow({ + name: "demo", + workspaceId: "workspace-1", + projectTrusted: true, + args: {}, + }); + + expect(ownerIds).toHaveLength(2); + expect(new Set(ownerIds).size).toBe(2); + expect(ownerIds.every((ownerId) => ownerId.startsWith("runner-a:"))).toBe(true); + }); + + test("requires current project trust before resuming project-local workflow runs", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + await runStore.createRun({ + id: "wfr_project_resume", + workspaceId: "workspace-1", + definition: { + name: "project-flow", + description: "Project", + scope: "project", + executable: true, + }, + definitionSource: + "export default function workflow({ agent }) { return agent({ id: 'trusted-step', prompt: 'run' }); }\n", + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + await runStore.appendStatus("wfr_project_resume", "interrupted", "2026-05-29T00:00:01.000Z"); + let taskCalls = 0; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ + projectRoot: path.join(tmp.path, "project"), + globalRoot: path.join(tmp.path, "global"), + builtIns: [], + }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent() { + taskCalls += 1; + return { taskId: "task_trusted", reportMarkdown: "should not run" }; + }, + }, + runnerId: "runner-a", + }); + + await expect( + service.resumeRunInBackground({ + workspaceId: "workspace-1", + runId: "wfr_project_resume", + projectTrusted: false, + }) + ).rejects.toThrow(/Project trust/); + await runStore.appendStatus("wfr_project_resume", "running", "2026-05-29T00:00:02.000Z", { + allowInterruptedResume: true, + }); + + await expect( + service.resumeCrashedRuns({ workspaceId: "workspace-1", projectTrusted: false }) + ).resolves.toEqual([]); + expect(taskCalls).toBe(0); + }); + + test("requires current project trust before resuming scratch workflow runs", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + await runStore.createRun({ + id: "wfr_scratch_resume", + workspaceId: "workspace-1", + definition: { + name: "scratch-flow", + description: "Scratch", + scope: "scratch", + executable: true, + }, + definitionSource: + "export default function workflow({ agent }) { return agent({ id: 'scratch-step', prompt: 'run' }); }\n", + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + await runStore.appendStatus("wfr_scratch_resume", "interrupted", "2026-05-29T00:00:01.000Z"); + let taskCalls = 0; + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ + projectRoot: path.join(tmp.path, "project"), + globalRoot: path.join(tmp.path, "global"), + builtIns: [], + }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent() { + taskCalls += 1; + return { taskId: "task_scratch", reportMarkdown: "should not run" }; + }, + }, + runnerId: "runner-a", + }); + + await expect( + service.resumeRunInBackground({ + workspaceId: "workspace-1", + runId: "wfr_scratch_resume", + projectTrusted: false, + }) + ).rejects.toThrow(/Project trust/); + await runStore.appendStatus("wfr_scratch_resume", "running", "2026-05-29T00:00:02.000Z", { + allowInterruptedResume: true, + }); + + await expect( + service.resumeCrashedRuns({ workspaceId: "workspace-1", projectTrusted: false }) + ).resolves.toEqual([]); + expect(taskCalls).toBe(0); + }); + + test("requires project trust before promoting scratch workflow runs", async () => { + using tmp = new DisposableTempDir("workflow-service"); + const projectRoot = path.join(tmp.path, "project", ".mux", "workflows"); + const globalRoot = path.join(tmp.path, "mux-home", "workflows"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path }); + await runStore.createRun({ + id: "wfr_scratch", + workspaceId: "workspace-1", + definition: { name: "scratch", description: "Scratch", scope: "scratch", executable: true }, + definitionSource: "export default function workflow() { return null; }\n", + args: {}, + now: "2026-05-29T00:00:00.000Z", + }); + const service = new WorkflowService({ + definitionStore: new WorkflowDefinitionStore({ projectRoot, globalRoot, builtIns: [] }), + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent() { + return { taskId: "task_1", reportMarkdown: "unused" }; + }, + }, + runnerId: "runner-a", + }); + + await expect( + service.promoteScratchWorkflow({ + workspaceId: "workspace-1", + runId: "wfr_scratch", + name: "global-research", + description: "Global research workflow", + location: "global", + overwrite: false, + projectTrusted: false, + }) + ).rejects.toThrow(/Project trust/); + }); +}); diff --git a/src/node/services/workflows/WorkflowService.ts b/src/node/services/workflows/WorkflowService.ts new file mode 100644 index 0000000000..b651586d23 --- /dev/null +++ b/src/node/services/workflows/WorkflowService.ts @@ -0,0 +1,659 @@ +import * as crypto from "node:crypto"; + +import type { + WorkflowDefinitionDescriptor, + WorkflowRunRecord, + WorkflowRunStatus, +} from "@/common/types/workflow"; +import assert from "@/common/utils/assert"; +import type { IJSRuntimeFactory } from "@/node/services/ptc/runtime"; +import type { + WorkflowDefinitionStore, + WorkflowDefinitionReadResult, + WorkflowPromotionLocation, +} from "./WorkflowDefinitionStore"; +import type { WorkflowRunStore } from "./WorkflowRunStore"; +import { + WorkflowRunBackgroundedError, + WorkflowRunner, + type WorkflowRunnerClock, + type WorkflowRunnerRunOptions, + type WorkflowTaskAdapter, +} from "./WorkflowRunner"; + +export interface WorkflowBackgroundRunTerminalEvent { + runId: string; + status: WorkflowRunStatus; + result: unknown; + run: WorkflowRunRecord; +} + +export interface WorkflowServiceOptions { + definitionStore: WorkflowDefinitionStore; + runStore: WorkflowRunStore; + runtimeFactory: IJSRuntimeFactory; + taskAdapter?: WorkflowTaskAdapter; + taskAdapterFactory?: (runId: string) => WorkflowTaskAdapter; + onBackgroundRunTerminal?: (event: WorkflowBackgroundRunTerminalEvent) => Promise | void; + generateRunId?: () => string; + // Delayed crash-recovery retries must use current trust, not the value captured when scheduled. + getCurrentProjectTrusted?: () => boolean | Promise; + /** Stable prefix; WorkflowService appends run identity and a nonce for each lease owner. */ + runnerId: string; + clock?: WorkflowRunnerClock; +} + +export interface WorkflowBackgroundRunCreatedEvent { + runId: string; + status: "running"; + result: null; + run: WorkflowRunRecord; +} + +export interface StartNamedWorkflowInput { + name: string; + workspaceId: string; + projectTrusted: boolean; + args: unknown; + onBackgroundRunCreated?: (event: WorkflowBackgroundRunCreatedEvent) => Promise | void; + abortSignal?: AbortSignal; +} + +export interface PromoteScratchDefinitionInput { + workspaceId: string; + name: string; + description: string; + location: WorkflowPromotionLocation; + overwrite: boolean; + projectTrusted: boolean; +} + +export interface PromoteScratchWorkflowInput { + workspaceId: string; + runId: string; + name: string; + description: string; + location: WorkflowPromotionLocation; + overwrite: boolean; + projectTrusted: boolean; +} + +export interface StartNamedWorkflowResult { + runId: string; + status: WorkflowRunStatus; + result: unknown; +} + +const WORKFLOW_BACKGROUND_CONTINUATION_STATUSES = new Set([ + "completed", + "failed", +]); + +// oRPC creates a WorkflowService per request, so workflow lifecycle state that spans requests +// needs process-wide registries. +const pendingCrashResumeTimers = new Map>(); +const activeWorkflowRunnerAbortControllers = new Map(); + +export class WorkflowService { + private readonly definitionStore: WorkflowDefinitionStore; + private readonly runStore: WorkflowRunStore; + private readonly runtimeFactory: IJSRuntimeFactory; + private readonly taskAdapter?: WorkflowTaskAdapter; + private readonly taskAdapterFactory?: (runId: string) => WorkflowTaskAdapter; + private readonly onBackgroundRunTerminal?: ( + event: WorkflowBackgroundRunTerminalEvent + ) => Promise | void; + private readonly generateRunId: () => string; + private readonly getCurrentProjectTrusted?: () => boolean | Promise; + private readonly runnerId: string; + private readonly clock?: WorkflowRunnerClock; + + private readonly backgroundRuns = new Set>(); + + constructor(options: WorkflowServiceOptions) { + assert(options.runnerId.length > 0, "WorkflowService: runnerId is required"); + this.definitionStore = options.definitionStore; + this.runStore = options.runStore; + this.runtimeFactory = options.runtimeFactory; + assert( + options.taskAdapter != null || options.taskAdapterFactory != null, + "WorkflowService: taskAdapter or taskAdapterFactory is required" + ); + this.taskAdapter = options.taskAdapter; + this.taskAdapterFactory = options.taskAdapterFactory; + this.onBackgroundRunTerminal = options.onBackgroundRunTerminal; + this.generateRunId = options.generateRunId ?? generateWorkflowRunId; + this.getCurrentProjectTrusted = options.getCurrentProjectTrusted; + this.runnerId = options.runnerId; + this.clock = options.clock; + } + + async listDefinitions(options: { + projectTrusted: boolean; + }): Promise { + return await this.definitionStore.listDefinitions(options); + } + + async readDefinition(input: { + name: string; + projectTrusted: boolean; + }): Promise { + return await this.definitionStore.readDefinition(input.name, { + projectTrusted: input.projectTrusted, + }); + } + + async listRuns(input: { workspaceId: string }): Promise { + assert(input.workspaceId.length > 0, "WorkflowService.listRuns: workspaceId is required"); + const runs = await this.runStore.listRuns(); + return runs.filter((run) => run.workspaceId === input.workspaceId); + } + + async resumeCrashedRuns(input: { + workspaceId: string; + projectTrusted: boolean; + }): Promise { + assert( + input.workspaceId.length > 0, + "WorkflowService.resumeCrashedRuns: workspaceId is required" + ); + const runs = await this.listRuns({ workspaceId: input.workspaceId }); + const resumable = runs.filter( + (run) => run.status === "running" || run.status === "backgrounded" + ); + const resumedRunIds: string[] = []; + for (const run of resumable) { + if ( + await this.resumeCrashRecoveredRun({ + runId: run.id, + projectTrusted: input.projectTrusted, + failureMessage: "Auto-resumed workflow run failed:", + }) + ) { + resumedRunIds.push(run.id); + } + } + return resumedRunIds; + } + + async getRun(input: { workspaceId: string; runId: string }): Promise { + assert(input.workspaceId.length > 0, "WorkflowService.getRun: workspaceId is required"); + assert(input.runId.length > 0, "WorkflowService.getRun: runId is required"); + try { + const run = await this.runStore.getRun(input.runId); + return run.workspaceId === input.workspaceId ? run : null; + } catch { + return null; + } + } + + async interruptRun(input: { workspaceId: string; runId: string }): Promise { + const run = await this.requireRunForWorkspace(input); + assertWorkflowRunCanTransition(run.status, "interrupted"); + // Stop the active coordinator only after ownership is validated; child cleanup and status + // writes can block on I/O, but a mis-scoped request must not abort another workspace's run. + this.abortActiveRunner(input.runId); + const interrupted = await this.runStore.appendStatus( + input.runId, + "interrupted", + this.clock?.nowIso() ?? new Date().toISOString() + ); + await (this.taskAdapterFactory?.(input.runId) ?? this.requireTaskAdapter()).interruptRun?.(); + return interrupted; + } + + async resumeRunInBackground(input: { + workspaceId: string; + runId: string; + projectTrusted: boolean; + }): Promise { + const run = await this.requireRunForWorkspace(input); + assertRunCanResumeWithCurrentTrust(run, input.projectTrusted); + assertWorkflowRunCanTransition(run.status, "running"); + await this.runInBackground(input.runId, "Background workflow resume failed:", { + allowResumeFromInterrupted: run.status === "interrupted", + }); + return { runId: input.runId, status: "running", result: null }; + } + + async resumeRun(input: { + workspaceId: string; + runId: string; + projectTrusted: boolean; + }): Promise { + const run = await this.requireRunForWorkspace(input); + assertRunCanResumeWithCurrentTrust(run, input.projectTrusted); + assertWorkflowRunCanTransition(run.status, "running"); + const runnerAbortController = new AbortController(); + let unregisterRunnerAbort: () => void = () => undefined; + try { + const runner = this.createRunner(input.runId); + const result = await runner.run(input.runId, { + abortSignal: runnerAbortController.signal, + onLeaseAcquired: () => { + unregisterRunnerAbort = this.registerActiveRunnerAbortController( + input.runId, + runnerAbortController + ); + }, + allowResumeFromInterrupted: run.status === "interrupted", + }); + return { runId: input.runId, status: "completed", result }; + } catch (error) { + if (error instanceof WorkflowRunBackgroundedError) { + void this.runInBackground(input.runId, "Backgrounded workflow resume failed:").catch( + () => undefined + ); + return { runId: input.runId, status: "backgrounded", result: null }; + } + throw error; + } finally { + unregisterRunnerAbort(); + } + } + + async promoteScratchDefinition( + input: PromoteScratchDefinitionInput + ): Promise { + assert( + input.workspaceId.length > 0, + "WorkflowService.promoteScratchDefinition: workspaceId is required" + ); + if (!input.projectTrusted) { + throw new Error("Project trust is required to promote scratch workflow definitions"); + } + const definition = await this.definitionStore.readDefinition(input.name, { + projectTrusted: input.projectTrusted, + }); + if (definition.descriptor.scope !== "scratch") { + throw new Error("Only scratch workflow definitions can be promoted"); + } + return await this.definitionStore.promoteDefinition({ + name: input.name, + description: input.description, + source: definition.source, + location: input.location, + overwrite: input.overwrite, + projectTrusted: input.projectTrusted, + }); + } + + async promoteScratchWorkflow( + input: PromoteScratchWorkflowInput + ): Promise { + const run = await this.requireRunForWorkspace(input); + if (run.definition.scope !== "scratch") { + throw new Error("Only scratch workflow runs can be promoted"); + } + if (!input.projectTrusted) { + throw new Error("Project trust is required to promote scratch workflow runs"); + } + return await this.definitionStore.promoteDefinition({ + name: input.name, + description: input.description, + source: run.definitionSource, + location: input.location, + overwrite: input.overwrite, + projectTrusted: input.projectTrusted, + }); + } + + async startNamedWorkflowInBackground( + input: StartNamedWorkflowInput + ): Promise { + const runId = await this.createNamedWorkflowRun(input); + const run = await this.runStore.appendStatus( + runId, + "running", + this.clock?.nowIso() ?? new Date().toISOString() + ); + await input.onBackgroundRunCreated?.({ runId, status: "running", result: null, run }); + void this.runInBackground(runId, "Background workflow run failed:").catch(() => undefined); + return { runId, status: "running", result: null }; + } + + async startNamedWorkflow(input: StartNamedWorkflowInput): Promise { + const runId = await this.createNamedWorkflowRun(input); + if (input.abortSignal?.aborted === true) { + await this.interruptRun({ workspaceId: input.workspaceId, runId }); + throw new Error(`Workflow run interrupted: ${runId}`); + } + + const runnerAbortController = new AbortController(); + let unregisterRunnerAbort: () => void = () => undefined; + const abortInterrupt = this.interruptRunOnAbort( + input.workspaceId, + runId, + input.abortSignal, + runnerAbortController + ); + try { + const runner = this.createRunner(runId); + const result = await runner.run(runId, { + abortSignal: runnerAbortController.signal, + onLeaseAcquired: () => { + unregisterRunnerAbort = this.registerActiveRunnerAbortController( + runId, + runnerAbortController + ); + }, + }); + return { runId, status: "completed", result }; + } catch (error) { + if (error instanceof WorkflowRunBackgroundedError) { + void this.runInBackground(runId, "Backgrounded workflow run failed:").catch( + () => undefined + ); + return { runId, status: "backgrounded", result: null }; + } + throw error; + } finally { + abortInterrupt.remove(); + await abortInterrupt.wait(); + unregisterRunnerAbort(); + } + } + + private async resumeCrashRecoveredRun(input: { + runId: string; + projectTrusted: boolean; + failureMessage: string; + }): Promise { + const projectTrusted = await this.resolveCurrentProjectTrust(input.projectTrusted); + const run = await this.getCrashRecoverableRun(input.runId); + if (run == null || !canResumeRunWithCurrentTrust(run, projectTrusted)) { + return false; + } + + const retryDelayMs = await this.runStore.getLeaseRetryDelayMs( + input.runId, + this.clock?.nowMs() ?? Date.now() + ); + if (retryDelayMs > 0) { + this.scheduleCrashResumeRetry(input, retryDelayMs); + return true; + } + + try { + await this.runInBackground(input.runId, input.failureMessage); + return true; + } catch (error) { + if (isWorkflowRunAlreadyActiveError(error, input.runId)) { + const nextRetryDelayMs = await this.runStore.getLeaseRetryDelayMs( + input.runId, + this.clock?.nowMs() ?? Date.now() + ); + this.scheduleCrashResumeRetry(input, Math.max(1, nextRetryDelayMs)); + return true; + } + console.error(input.failureMessage, error); + return false; + } + } + + private async resolveCurrentProjectTrust(fallback: boolean): Promise { + return (await this.getCurrentProjectTrusted?.()) ?? fallback; + } + + private async getCrashRecoverableRun(runId: string): Promise { + try { + const run = await this.runStore.getRun(runId); + return run.status === "running" || run.status === "backgrounded" ? run : null; + } catch { + return null; + } + } + + private scheduleCrashResumeRetry( + input: { runId: string; projectTrusted: boolean; failureMessage: string }, + delayMs: number + ): void { + assert(delayMs > 0, "WorkflowService.scheduleCrashResumeRetry: delayMs must be positive"); + if (pendingCrashResumeTimers.has(input.runId)) { + return; + } + const timer = setTimeout(() => { + pendingCrashResumeTimers.delete(input.runId); + void this.resumeCrashRecoveredRun(input).catch((error: unknown) => { + console.error(input.failureMessage, error); + }); + }, delayMs); + unrefTimer(timer); + pendingCrashResumeTimers.set(input.runId, timer); + } + + private registerActiveRunnerAbortController( + runId: string, + controller: AbortController + ): () => void { + assert(runId.length > 0, "WorkflowService.registerActiveRunnerAbortController: runId required"); + const existing = activeWorkflowRunnerAbortControllers.get(runId); + if (existing != null && existing !== controller) { + existing.abort(); + } + activeWorkflowRunnerAbortControllers.set(runId, controller); + return () => { + if (activeWorkflowRunnerAbortControllers.get(runId) === controller) { + activeWorkflowRunnerAbortControllers.delete(runId); + } + }; + } + + private abortActiveRunner(runId: string): void { + activeWorkflowRunnerAbortControllers.get(runId)?.abort(); + } + + private interruptRunOnAbort( + workspaceId: string, + runId: string, + abortSignal: AbortSignal | undefined, + runnerAbortController: AbortController | undefined + ): { remove: () => void; wait: () => Promise } { + if (abortSignal == null) { + return { remove: () => undefined, wait: () => Promise.resolve() }; + } + let interruptPromise: Promise | null = null; + const interrupt = () => { + // Cancel the coordinator before interrupt side effects can block on task cleanup or disk I/O. + runnerAbortController?.abort(); + interruptPromise = (async () => { + try { + await this.interruptRun({ workspaceId, runId }); + } catch { + // The run may have completed or failed before the abort event was delivered. + } + })(); + }; + abortSignal.addEventListener("abort", interrupt, { once: true }); + return { + remove: () => abortSignal.removeEventListener("abort", interrupt), + wait: async () => { + await interruptPromise; + }, + }; + } + + private async createNamedWorkflowRun(input: StartNamedWorkflowInput): Promise { + assert( + input.workspaceId.length > 0, + "WorkflowService.createNamedWorkflowRun: workspaceId is required" + ); + const definition = await this.definitionStore.readDefinition(input.name, { + projectTrusted: input.projectTrusted, + }); + const runId = this.generateRunId(); + assert( + runId.length > 0, + "WorkflowService.createNamedWorkflowRun: generated run id is required" + ); + + await this.runStore.createRun({ + id: runId, + workspaceId: input.workspaceId, + definition: definition.descriptor, + definitionSource: definition.source, + args: input.args, + now: this.clock?.nowIso() ?? new Date().toISOString(), + }); + return runId; + } + + private runInBackground( + runId: string, + failureMessage: string, + runnerOptions: Pick = {} + ): Promise { + const runner = this.createRunner(runId); + const runnerAbortController = new AbortController(); + let unregisterRunnerAbort: () => void = () => undefined; + let startedSettled = false; + let resolveStarted: (() => void) | null = null; + let rejectStarted: ((error: unknown) => void) | null = null; + const started = new Promise((resolve, reject) => { + resolveStarted = resolve; + rejectStarted = reject; + }); + const markStarted = () => { + if (startedSettled) { + return; + } + startedSettled = true; + assert(resolveStarted != null, "WorkflowService.runInBackground: resolveStarted missing"); + resolveStarted(); + }; + const markStartFailed = (error: unknown) => { + if (startedSettled) { + return; + } + startedSettled = true; + assert(rejectStarted != null, "WorkflowService.runInBackground: rejectStarted missing"); + rejectStarted(error); + }; + const markLeaseAcquired = () => { + unregisterRunnerAbort = this.registerActiveRunnerAbortController( + runId, + runnerAbortController + ); + markStarted(); + }; + const runPromise = runner + .run(runId, { + abortSignal: runnerAbortController.signal, + onLeaseAcquired: markLeaseAcquired, + backgroundOnMessageQueued: false, + ...runnerOptions, + }) + .then(async (result) => { + await this.notifyBackgroundRunTerminal(runId, result); + }) + .catch(async (error: unknown) => { + const hadStarted = startedSettled; + markStartFailed(error); + if (hadStarted || !isWorkflowRunAlreadyActiveError(error, runId)) { + console.error(failureMessage, error); + await this.notifyBackgroundRunTerminal(runId, null); + } + }); + this.backgroundRuns.add(runPromise); + void runPromise.finally(() => { + unregisterRunnerAbort(); + this.backgroundRuns.delete(runPromise); + }); + return started; + } + + private async notifyBackgroundRunTerminal(runId: string, result: unknown): Promise { + if (this.onBackgroundRunTerminal == null) { + return; + } + + let run: WorkflowRunRecord; + try { + run = await this.runStore.getRun(runId); + } catch (error) { + console.error("Failed to load terminal workflow run for notification:", error); + return; + } + + if (!WORKFLOW_BACKGROUND_CONTINUATION_STATUSES.has(run.status)) { + return; + } + + try { + await this.onBackgroundRunTerminal({ runId, status: run.status, result, run }); + } catch (error) { + console.error("Workflow background terminal notification failed:", error); + } + } + + private createRunner(runId: string): WorkflowRunner { + return new WorkflowRunner({ + runStore: this.runStore, + runtimeFactory: this.runtimeFactory, + taskAdapter: this.taskAdapterFactory?.(runId) ?? this.requireTaskAdapter(), + runnerId: generateWorkflowRunnerOwnerId(this.runnerId, runId), + ...(this.clock != null ? { clock: this.clock } : {}), + }); + } + + private async requireRunForWorkspace(input: { + workspaceId: string; + runId: string; + }): Promise { + assert(input.workspaceId.length > 0, "WorkflowService: workspaceId is required"); + assert(input.runId.length > 0, "WorkflowService: runId is required"); + const run = await this.runStore.getRun(input.runId); + if (run.workspaceId !== input.workspaceId) { + throw new Error(`Workflow run not found: ${input.runId}`); + } + return run; + } + + private requireTaskAdapter(): WorkflowTaskAdapter { + assert(this.taskAdapter != null, "WorkflowService: taskAdapter is required"); + return this.taskAdapter; + } +} + +function isWorkflowRunAlreadyActiveError(error: unknown, runId: string): boolean { + return error instanceof Error && error.message === `Workflow run is already active: ${runId}`; +} + +function unrefTimer(timer: ReturnType): void { + if (typeof timer !== "object" || timer == null || !("unref" in timer)) { + return; + } + if (typeof timer.unref === "function") { + timer.unref(); + } +} + +function canResumeRunWithCurrentTrust(run: WorkflowRunRecord, projectTrusted: boolean): boolean { + return ( + (run.definition.scope !== "project" && run.definition.scope !== "scratch") || projectTrusted + ); +} + +function assertRunCanResumeWithCurrentTrust(run: WorkflowRunRecord, projectTrusted: boolean): void { + if (!canResumeRunWithCurrentTrust(run, projectTrusted)) { + throw new Error("Project trust is required to resume project-local or scratch workflow runs"); + } +} + +function assertWorkflowRunCanTransition(from: WorkflowRunStatus, to: WorkflowRunStatus): void { + if (from === "completed" || from === "failed") { + throw new Error(`Cannot transition workflow run from ${from} to ${to}`); + } +} + +function generateWorkflowRunnerOwnerId(baseRunnerId: string, runId: string): string { + assert(baseRunnerId.length > 0, "WorkflowService: base runner id is required"); + assert(runId.length > 0, "WorkflowService: run id is required for runner owner id"); + // Lease ownership must fence individual runner processes, not just the workspace/request that + // created them, so stale runners cannot renew or release a replacement runner's lease. + return `${baseRunnerId}:${runId}:${crypto.randomBytes(8).toString("hex")}`; +} + +function generateWorkflowRunId(): string { + return `wfr_${crypto.randomBytes(8).toString("hex")}`; +} diff --git a/src/node/services/workflows/WorkflowTaskServiceAdapter.test.ts b/src/node/services/workflows/WorkflowTaskServiceAdapter.test.ts new file mode 100644 index 0000000000..344c1988b1 --- /dev/null +++ b/src/node/services/workflows/WorkflowTaskServiceAdapter.test.ts @@ -0,0 +1,285 @@ +/* eslint-disable @typescript-eslint/await-thenable, @typescript-eslint/require-await */ +import { describe, expect, mock, test } from "bun:test"; +import { Ok } from "@/common/types/result"; +import { WorkflowTaskServiceAdapter } from "./WorkflowTaskServiceAdapter"; + +describe("WorkflowTaskServiceAdapter", () => { + test("spawns a workflow child task with workflow metadata and returns its report", async () => { + const outputSchema = { type: "object", properties: { claims: { type: "array" } } }; + const create = mock(async (_args: unknown) => + Ok({ taskId: "task_1", kind: "agent" as const, status: "running" as const }) + ); + const waitForAgentReport = mock(async () => ({ + reportMarkdown: "child report", + structuredOutput: { claims: ["durable"] }, + })); + const adapter = new WorkflowTaskServiceAdapter({ + taskService: { create, waitForAgentReport }, + parentWorkspaceId: "parent_1", + workflowRunId: "wfr_123", + defaultAgentId: "explore", + }); + + const result = await adapter.runAgent({ + id: "claims", + prompt: "Extract claims", + title: "Claim extractor", + outputSchema, + }); + + expect(create).toHaveBeenCalledWith({ + parentWorkspaceId: "parent_1", + kind: "agent", + agentId: "explore", + prompt: "Extract claims", + title: "Claim extractor", + workflowTask: { + runId: "wfr_123", + stepId: "claims", + outputSchema, + }, + }); + expect(waitForAgentReport).toHaveBeenCalledWith("task_1", { + requestingWorkspaceId: "parent_1", + backgroundOnMessageQueued: true, + }); + expect(result).toEqual({ + taskId: "task_1", + reportMarkdown: "child report", + structuredOutput: { claims: ["durable"] }, + }); + }); + + test("inherits experiments for task creation", async () => { + let createArgs: unknown; + const create = mock(async (args: unknown) => { + createArgs = args; + return Ok({ taskId: "task_1", kind: "agent" as const, status: "running" as const }); + }); + const waitForAgentReport = mock(async () => ({ reportMarkdown: "child report" })); + const adapter = new WorkflowTaskServiceAdapter({ + taskService: { create, waitForAgentReport }, + parentWorkspaceId: "parent_1", + workflowRunId: "wfr_123", + defaultAgentId: "explore", + experiments: { dynamicWorkflows: true, subagentFileReports: true }, + }); + + await adapter.runAgent({ + id: "claims", + agentId: "exec", + prompt: "Extract claims", + outputSchema: { type: "object" }, + }); + + expect(createArgs).toMatchObject({ + agentId: "exec", + prompt: "Extract claims", + experiments: { dynamicWorkflows: true, subagentFileReports: true }, + }); + }); + + test("disables file-backed reports for read-only Explore workflow tasks", async () => { + let createArgs: unknown; + const create = mock(async (args: unknown) => { + createArgs = args; + return Ok({ taskId: "task_1", kind: "agent" as const, status: "running" as const }); + }); + const waitForAgentReport = mock(async () => ({ reportMarkdown: "child report" })); + const adapter = new WorkflowTaskServiceAdapter({ + taskService: { create, waitForAgentReport }, + parentWorkspaceId: "parent_1", + workflowRunId: "wfr_123", + defaultAgentId: "explore", + experiments: { dynamicWorkflows: true, subagentFileReports: true }, + }); + + await adapter.runAgent({ id: "source", prompt: "Read source" }); + + expect(createArgs).toMatchObject({ + agentId: "explore", + experiments: { dynamicWorkflows: true, subagentFileReports: false }, + }); + }); + + test("passes workflow wait options into report waits", async () => { + const abortController = new AbortController(); + const create = mock(async () => + Ok({ taskId: "task_1", kind: "agent" as const, status: "running" as const }) + ); + const waitForAgentReport = mock(async () => ({ reportMarkdown: "child report" })); + const adapter = new WorkflowTaskServiceAdapter({ + taskService: { create, waitForAgentReport }, + parentWorkspaceId: "parent_1", + workflowRunId: "wfr_123", + defaultAgentId: "explore", + }); + + await adapter.runAgent({ id: "claims", prompt: "Extract claims" }, undefined, { + abortSignal: abortController.signal, + timeoutMs: 1_234, + backgroundOnMessageQueued: false, + }); + + expect(waitForAgentReport).toHaveBeenCalledWith("task_1", { + abortSignal: abortController.signal, + timeoutMs: 1_234, + requestingWorkspaceId: "parent_1", + backgroundOnMessageQueued: false, + }); + }); + + test("dry-runs before applying workflow patch artifacts", async () => { + const create = mock(async () => + Ok({ taskId: "task_1", kind: "agent" as const, status: "running" as const }) + ); + const waitForAgentReport = mock(async () => ({ reportMarkdown: "unused" })); + const calls: unknown[] = []; + const adapter = new WorkflowTaskServiceAdapter({ + taskService: { create, waitForAgentReport }, + parentWorkspaceId: "parent_1", + workflowRunId: "wfr_123", + defaultAgentId: "explore", + getProjectTrusted: () => true, + applyPatchArtifact: async (args) => { + calls.push(args); + return { + success: true, + taskId: args.task_id, + dryRun: args.dry_run === true, + projectResults: [{ projectPath: "/repo", projectName: "repo", status: "applied" }], + }; + }, + }); + + const result = await adapter.applyPatch({ + id: "apply-impl", + sourceTaskId: "task_impl", + target: "parent", + projectPath: "/repo", + threeWay: true, + force: false, + }); + + expect(calls).toEqual([ + { + task_id: "task_impl", + project_path: "/repo", + three_way: true, + force: false, + dry_run: true, + }, + { + task_id: "task_impl", + project_path: "/repo", + three_way: true, + force: false, + dry_run: false, + }, + ]); + expect(result).toMatchObject({ success: true, dryRun: false }); + }); + + test("returns dry-run conflicts without applying workflow patches", async () => { + const create = mock(async () => + Ok({ taskId: "task_1", kind: "agent" as const, status: "running" as const }) + ); + const waitForAgentReport = mock(async () => ({ reportMarkdown: "unused" })); + const calls: unknown[] = []; + const adapter = new WorkflowTaskServiceAdapter({ + taskService: { create, waitForAgentReport }, + parentWorkspaceId: "parent_1", + workflowRunId: "wfr_123", + defaultAgentId: "explore", + getProjectTrusted: () => true, + applyPatchArtifact: async (args) => { + calls.push(args); + return { + success: false, + taskId: args.task_id, + dryRun: true, + error: "Patch failed", + conflictPaths: ["src/auth.ts"], + }; + }, + }); + + const result = await adapter.applyPatch({ + id: "apply-impl", + sourceTaskId: "task_impl", + target: "parent", + threeWay: true, + force: false, + }); + + expect(calls).toEqual([{ task_id: "task_impl", three_way: true, force: false, dry_run: true }]); + expect(result).toMatchObject({ success: false, conflictPaths: ["src/auth.ts"] }); + }); + + test("requires live Project Trust before applying workflow patches", async () => { + const create = mock(async () => + Ok({ taskId: "task_1", kind: "agent" as const, status: "running" as const }) + ); + const waitForAgentReport = mock(async () => ({ reportMarkdown: "unused" })); + const applyPatchArtifact = mock(async () => ({ + success: true as const, + taskId: "task_impl", + projectResults: [], + })); + const adapter = new WorkflowTaskServiceAdapter({ + taskService: { create, waitForAgentReport }, + parentWorkspaceId: "parent_1", + workflowRunId: "wfr_123", + defaultAgentId: "explore", + getProjectTrusted: () => false, + applyPatchArtifact, + }); + + await expect( + adapter.applyPatch({ + id: "apply-impl", + sourceTaskId: "task_impl", + target: "parent", + threeWay: true, + force: false, + }) + ).rejects.toThrow(/Project Trust/); + expect(applyPatchArtifact).not.toHaveBeenCalled(); + }); + + test("interrupts preserved descendant task workspaces for the parent workspace", async () => { + const create = mock(async () => + Ok({ taskId: "task_1", kind: "agent" as const, status: "running" as const }) + ); + const waitForAgentReport = mock(async () => ({ reportMarkdown: "unused" })); + const terminateAllDescendantAgentTasks = mock(async () => ["task_1"]); + const adapter = new WorkflowTaskServiceAdapter({ + taskService: { create, waitForAgentReport, terminateAllDescendantAgentTasks }, + parentWorkspaceId: "parent_1", + workflowRunId: "wfr_123", + defaultAgentId: "explore", + }); + + await adapter.interruptRun(); + + expect(terminateAllDescendantAgentTasks).toHaveBeenCalledWith("parent_1", { + workflowRunId: "wfr_123", + }); + }); + + test("fails fast when task creation fails", async () => { + const create = mock(async () => ({ success: false as const, error: "no runnable agent" })); + const waitForAgentReport = mock(async () => ({ reportMarkdown: "should not wait" })); + const adapter = new WorkflowTaskServiceAdapter({ + taskService: { create, waitForAgentReport }, + parentWorkspaceId: "parent_1", + workflowRunId: "wfr_123", + defaultAgentId: "explore", + }); + + await expect(adapter.runAgent({ id: "claims", prompt: "Extract claims" })).rejects.toThrow( + /no runnable agent/ + ); + expect(waitForAgentReport).not.toHaveBeenCalled(); + }); +}); diff --git a/src/node/services/workflows/WorkflowTaskServiceAdapter.ts b/src/node/services/workflows/WorkflowTaskServiceAdapter.ts new file mode 100644 index 0000000000..069a908c30 --- /dev/null +++ b/src/node/services/workflows/WorkflowTaskServiceAdapter.ts @@ -0,0 +1,245 @@ +import assert from "@/common/utils/assert"; +import { AsyncMutex } from "@/node/utils/concurrency/asyncMutex"; +import type { TaskCreateResult } from "@/node/services/taskService"; +import type { + WorkflowAgentResult, + WorkflowAgentSpec, + WorkflowAgentWaitOptions, + WorkflowApplyPatchSpec, + WorkflowTaskAdapter, +} from "./WorkflowRunner"; +import { + applyTaskGitPatchArtifact, + type TaskApplyGitPatchArgs, + type TaskApplyGitPatchConfiguration, + type TaskApplyGitPatchResult, +} from "@/node/services/tools/task_apply_git_patch"; + +interface WorkflowTaskExperiments { + programmaticToolCalling?: boolean; + programmaticToolCallingExclusive?: boolean; + advisorTool?: boolean; + execSubagentHardRestart?: boolean; + dynamicWorkflows?: boolean; + subagentFileReports?: boolean; +} + +interface WorkflowTaskServiceLike { + create(args: { + parentWorkspaceId: string; + kind: "agent"; + agentId: string; + prompt: string; + title: string; + workflowTask: { + runId: string; + stepId: string; + outputSchema?: unknown; + }; + experiments?: WorkflowTaskExperiments; + }): Promise<{ success: true; data: TaskCreateResult } | { success: false; error: string }>; + waitForAgentReport( + taskId: string, + options: WorkflowAgentWaitOptions & { + requestingWorkspaceId: string; + backgroundOnMessageQueued: boolean; + } + ): Promise<{ reportMarkdown: string; title?: string; structuredOutput?: unknown }>; + terminateAllDescendantAgentTasks?( + workspaceId: string, + options?: { workflowRunId?: string } + ): Promise; +} + +type WorkflowPatchArtifactApplier = ( + args: TaskApplyGitPatchArgs, + options?: { abortSignal?: AbortSignal } +) => Promise; + +export interface WorkflowTaskServiceAdapterOptions { + taskService: WorkflowTaskServiceLike; + parentWorkspaceId: string; + workflowRunId: string; + defaultAgentId: string; + experiments?: WorkflowTaskExperiments; + patchToolConfig?: TaskApplyGitPatchConfiguration; + applyPatchArtifact?: WorkflowPatchArtifactApplier; + getProjectTrusted?: () => boolean | Promise; +} + +export class WorkflowTaskServiceAdapter implements WorkflowTaskAdapter { + private readonly taskService: WorkflowTaskServiceLike; + private readonly parentWorkspaceId: string; + private readonly workflowRunId: string; + private readonly defaultAgentId: string; + private readonly patchToolConfig?: TaskApplyGitPatchConfiguration; + private readonly applyPatchArtifact?: WorkflowPatchArtifactApplier; + private readonly getProjectTrusted?: () => boolean | Promise; + private readonly patchApplyMutex = new AsyncMutex(); + private readonly experiments?: WorkflowTaskExperiments; + + constructor(options: WorkflowTaskServiceAdapterOptions) { + assert( + options.parentWorkspaceId.length > 0, + "WorkflowTaskServiceAdapter: parentWorkspaceId is required" + ); + assert( + options.workflowRunId.length > 0, + "WorkflowTaskServiceAdapter: workflowRunId is required" + ); + assert( + options.defaultAgentId.length > 0, + "WorkflowTaskServiceAdapter: defaultAgentId is required" + ); + this.taskService = options.taskService; + this.parentWorkspaceId = options.parentWorkspaceId; + this.workflowRunId = options.workflowRunId; + this.defaultAgentId = options.defaultAgentId; + this.patchToolConfig = options.patchToolConfig; + this.applyPatchArtifact = options.applyPatchArtifact; + this.getProjectTrusted = options.getProjectTrusted; + this.experiments = options.experiments; + } + + async applyPatch( + spec: WorkflowApplyPatchSpec, + options?: { abortSignal?: AbortSignal } + ): Promise { + assert(spec.id.length > 0, "WorkflowTaskServiceAdapter.applyPatch: spec.id is required"); + assert( + spec.sourceTaskId.length > 0, + "WorkflowTaskServiceAdapter.applyPatch: sourceTaskId is required" + ); + if ((await this.getProjectTrusted?.()) !== true) { + throw new Error("applyPatch requires Project Trust"); + } + + // Applying one patch mutates HEAD, so complete each dry-run + real apply pair before + // checking the next patch. This preserves the old Orchestrator conflict model. + await using _lock = await this.patchApplyMutex.acquire(); + const applyPatchArtifact = this.resolvePatchArtifactApplier(); + const baseArgs: TaskApplyGitPatchArgs = { + task_id: spec.sourceTaskId, + ...(spec.projectPath != null ? { project_path: spec.projectPath } : {}), + three_way: spec.threeWay, + force: spec.force, + }; + + const dryRun = await applyPatchArtifact( + { + ...baseArgs, + dry_run: true, + }, + options + ); + if (!dryRun.success) { + return dryRun; + } + + return await applyPatchArtifact( + { + ...baseArgs, + dry_run: false, + }, + options + ); + } + + private resolvePatchArtifactApplier(): WorkflowPatchArtifactApplier { + if (this.applyPatchArtifact != null) { + return this.applyPatchArtifact; + } + const patchToolConfig = this.patchToolConfig; + if (patchToolConfig == null) { + throw new Error("WorkflowTaskServiceAdapter.applyPatch requires patch tool configuration"); + } + return async (args, options) => + await applyTaskGitPatchArtifact( + { + ...patchToolConfig, + trusted: true, + }, + args, + { abortSignal: options?.abortSignal, allowAlreadyApplied: true } + ); + } + + async interruptRun(): Promise { + await this.taskService.terminateAllDescendantAgentTasks?.(this.parentWorkspaceId, { + workflowRunId: this.workflowRunId, + }); + } + + async runAgent( + spec: WorkflowAgentSpec, + lifecycle?: { onTaskCreated?: (taskId: string) => Promise | void }, + waitOptions?: WorkflowAgentWaitOptions + ): Promise { + assert(spec.id.length > 0, "WorkflowTaskServiceAdapter.runAgent: spec.id is required"); + assert(spec.prompt.length > 0, "WorkflowTaskServiceAdapter.runAgent: spec.prompt is required"); + + const workflowTask: { runId: string; stepId: string; outputSchema?: unknown } = { + runId: this.workflowRunId, + stepId: spec.id, + }; + if (spec.outputSchema !== undefined) { + workflowTask.outputSchema = spec.outputSchema; + } + + const agentId = spec.agentId ?? this.defaultAgentId; + const experiments = this.getExperimentsForAgent(agentId); + const createResult = await this.taskService.create({ + parentWorkspaceId: this.parentWorkspaceId, + kind: "agent", + agentId, + prompt: spec.prompt, + title: spec.title ?? spec.id, + workflowTask, + ...(experiments !== undefined ? { experiments } : {}), + }); + if (!createResult.success) { + throw new Error(createResult.error); + } + + await lifecycle?.onTaskCreated?.(createResult.data.taskId); + + return await this.waitForAgentTask(createResult.data.taskId, spec, waitOptions); + } + + private getExperimentsForAgent(agentId: string): WorkflowTaskExperiments | undefined { + const experiments = this.experiments; + if (experiments == null) { + return undefined; + } + + if (agentId.trim().toLowerCase() !== "explore" || experiments.subagentFileReports !== true) { + return experiments; + } + + // Explore is intentionally read-only and cannot create report.md/structured-output.json. + // Keep workflow Explore steps compatible when file-backed reporting is enabled globally. + return { ...experiments, subagentFileReports: false }; + } + + async waitForAgentTask( + taskId: string, + _spec: WorkflowAgentSpec, + waitOptions?: WorkflowAgentWaitOptions + ): Promise { + const report = await this.taskService.waitForAgentReport(taskId, { + ...(waitOptions?.abortSignal != null ? { abortSignal: waitOptions.abortSignal } : {}), + ...(waitOptions?.timeoutMs != null ? { timeoutMs: waitOptions.timeoutMs } : {}), + requestingWorkspaceId: this.parentWorkspaceId, + backgroundOnMessageQueued: waitOptions?.backgroundOnMessageQueued ?? true, + }); + + return { + taskId, + reportMarkdown: report.reportMarkdown, + ...(report.title != null ? { title: report.title } : {}), + ...(report.structuredOutput !== undefined + ? { structuredOutput: report.structuredOutput } + : {}), + }; + } +} diff --git a/src/node/services/workflows/builtInWorkflowDefinitions.test.ts b/src/node/services/workflows/builtInWorkflowDefinitions.test.ts new file mode 100644 index 0000000000..2e887cd2ee --- /dev/null +++ b/src/node/services/workflows/builtInWorkflowDefinitions.test.ts @@ -0,0 +1,576 @@ +/* eslint-disable @typescript-eslint/require-await */ +import { describe, expect, test } from "bun:test"; +import { QuickJSRuntimeFactory } from "@/node/services/ptc/quickjsRuntime"; +import { DisposableTempDir } from "@/node/services/tempDir"; +import { BUILT_IN_WORKFLOW_DEFINITIONS } from "./builtInWorkflowDefinitions"; +import { WorkflowRunStore } from "./WorkflowRunStore"; +import { WorkflowRunner, type WorkflowAgentSpec } from "./WorkflowRunner"; + +const deepResearch = BUILT_IN_WORKFLOW_DEFINITIONS.find( + (definition) => definition.name === "deep-research" +); + +const deepReviewWorkflow = BUILT_IN_WORKFLOW_DEFINITIONS.find( + (definition) => definition.name === "deep-review-workflow" +); + +describe("built-in deep-research workflow", () => { + test("coordinates staged research, verification, and final structured synthesis", async () => { + if (!deepResearch) { + throw new Error("Expected built-in deep-research workflow"); + } + using tmp = new DisposableTempDir("deep-research-workflow"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await runStore.createRun({ + id: "wfr_deep_research", + workspaceId: "workspace-1", + definition: { + name: deepResearch.name, + description: deepResearch.description, + scope: "built-in", + executable: true, + }, + definitionSource: deepResearch.source, + args: { topic: "durable workflow orchestration" }, + now: "2026-05-29T00:00:00.000Z", + }); + + const taskCalls: WorkflowAgentSpec[] = []; + const runner = new WorkflowRunner({ + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent(spec) { + taskCalls.push(spec); + switch (spec.id) { + case "scope-topic": + return { + taskId: "task_scope", + reportMarkdown: "Research durable orchestration semantics.", + structuredOutput: { + refinedTopic: "durable workflow orchestration", + questions: ["How are runs resumed?", "How are tasks verified?"], + }, + }; + case "discover-sources": + return { + taskId: "task_sources", + reportMarkdown: "Found implementation, RFC, and tests.", + structuredOutput: { + sources: [ + { title: "RFC", url: "rfc/20260529_dynamic-workflows.md", relevance: "design" }, + { + title: "Runner", + url: "src/node/services/workflows/WorkflowRunner.ts", + relevance: "implementation", + }, + ], + }, + }; + case "summarize-source-0": + return { + taskId: "task_summary_0", + reportMarkdown: "RFC describes journal replay and validation.", + structuredOutput: { + source: "RFC", + summary: "Defines durable runs and replay.", + }, + }; + case "summarize-source-1": + return { + taskId: "task_summary_1", + reportMarkdown: "Runner describes replay lookup.", + structuredOutput: { + source: "Runner", + summary: "Replays completed steps by hash.", + }, + }; + case "extract-claims": + return { + taskId: "task_claims", + reportMarkdown: "Extracted two claims.", + structuredOutput: { + claims: [ + { + claim: "Completed steps are reused on resume.", + support: "Runner step lookup", + }, + { + claim: "Structured outputs are validated at report time.", + support: "outputSchema", + }, + ], + }, + }; + case "verify-claim-0": + return { + taskId: "task_verify_0", + reportMarkdown: "Completed-step replay is supported.", + structuredOutput: { + claim: "Completed steps are reused on resume.", + verdict: "supported", + risk: "low", + }, + }; + case "verify-claim-1": + return { + taskId: "task_verify_1", + reportMarkdown: "Structured output validation is supported.", + structuredOutput: { + claim: "Structured outputs are validated at report time.", + verdict: "supported", + risk: "low", + }, + }; + case "synthesize-report": + return { + taskId: "task_final", + reportMarkdown: "# Deep Research\nDurable workflows replay completed steps.", + structuredOutput: { confidence: "medium", gaps: ["Needs UI dogfood"] }, + }; + default: + throw new Error(`Unexpected deep-research step: ${spec.id}`); + } + }, + }, + runnerId: "runner-a", + clock: { + nowIso: () => "2026-05-29T00:00:01.000Z", + nowMs: () => 1_000, + }, + }); + + const result = await runner.run("wfr_deep_research"); + const run = await runStore.getRun("wfr_deep_research"); + + expect(taskCalls.map((call) => call.id)).toEqual([ + "scope-topic", + "discover-sources", + "summarize-source-0", + "summarize-source-1", + "extract-claims", + "verify-claim-0", + "verify-claim-1", + "synthesize-report", + ]); + expect(taskCalls.map((call) => call.agentId)).toEqual([ + "explore", + "explore", + "explore", + "explore", + "exec", + "exec", + "exec", + "exec", + ]); + expect(taskCalls.every((call) => call.outputSchema != null)).toBe(true); + expect(run.events.filter((event) => event.type === "phase").map((event) => event.name)).toEqual( + [ + "scope", + "source-discovery", + "source-synthesis", + "claim-extraction", + "adversarial-verification", + "final-synthesis", + ] + ); + expect(result).toEqual({ + reportMarkdown: "# Deep Research\nDurable workflows replay completed steps.", + structuredOutput: { + topic: "durable workflow orchestration", + refinedTopic: "durable workflow orchestration", + sources: [ + { title: "RFC", url: "rfc/20260529_dynamic-workflows.md", relevance: "design" }, + { + title: "Runner", + url: "src/node/services/workflows/WorkflowRunner.ts", + relevance: "implementation", + }, + ], + claims: [ + { claim: "Completed steps are reused on resume.", support: "Runner step lookup" }, + { claim: "Structured outputs are validated at report time.", support: "outputSchema" }, + ], + verification: [ + { claim: "Completed steps are reused on resume.", verdict: "supported", risk: "low" }, + { + claim: "Structured outputs are validated at report time.", + verdict: "supported", + risk: "low", + }, + ], + confidence: "medium", + gaps: ["Needs UI dogfood"], + }, + }); + }); + + test("skips empty source and claim fan-out stages", async () => { + if (!deepResearch) { + throw new Error("Expected built-in deep-research workflow"); + } + using tmp = new DisposableTempDir("deep-research-empty-workflow"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await runStore.createRun({ + id: "wfr_deep_research_empty", + workspaceId: "workspace-1", + definition: { + name: deepResearch.name, + description: deepResearch.description, + scope: "built-in", + executable: true, + }, + definitionSource: deepResearch.source, + args: { topic: "obscure empty topic" }, + now: "2026-05-29T00:00:00.000Z", + }); + + const taskCalls: WorkflowAgentSpec[] = []; + const runner = new WorkflowRunner({ + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent(spec) { + taskCalls.push(spec); + switch (spec.id) { + case "scope-topic": + return { + taskId: "task_scope", + reportMarkdown: "Scoped obscure topic.", + structuredOutput: { refinedTopic: "obscure empty topic", questions: [] }, + }; + case "discover-sources": + return { + taskId: "task_sources", + reportMarkdown: "No high-signal sources found.", + structuredOutput: { sources: [] }, + }; + case "extract-claims": + return { + taskId: "task_claims", + reportMarkdown: "No claims extracted.", + structuredOutput: { claims: [] }, + }; + case "synthesize-report": + return { + taskId: "task_final", + reportMarkdown: "# Deep Research\nNo sources were found.", + structuredOutput: { confidence: "low", gaps: ["No sources found"] }, + }; + default: + throw new Error(`Unexpected deep-research step: ${spec.id}`); + } + }, + }, + runnerId: "runner-a", + clock: { + nowIso: () => "2026-05-29T00:00:01.000Z", + nowMs: () => 1_000, + }, + }); + + const result = await runner.run("wfr_deep_research_empty"); + const run = await runStore.getRun("wfr_deep_research_empty"); + + expect(run.status).toBe("completed"); + expect(taskCalls.map((call) => call.id)).toEqual([ + "scope-topic", + "discover-sources", + "extract-claims", + "synthesize-report", + ]); + expect(taskCalls.map((call) => call.agentId)).toEqual(["explore", "explore", "exec", "exec"]); + expect(result).toMatchObject({ + structuredOutput: { + sources: [], + claims: [], + verification: [], + }, + }); + }); + + test("caps model-produced deep-research fan-out", async () => { + if (!deepResearch) { + throw new Error("Expected built-in deep-research workflow"); + } + using tmp = new DisposableTempDir("deep-research-capped-workflow"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await runStore.createRun({ + id: "wfr_deep_research_capped", + workspaceId: "workspace-1", + definition: { + name: deepResearch.name, + description: deepResearch.description, + scope: "built-in", + executable: true, + }, + definitionSource: deepResearch.source, + args: { topic: "fanout cap" }, + now: "2026-05-29T00:00:00.000Z", + }); + + const taskCalls: WorkflowAgentSpec[] = []; + const runner = new WorkflowRunner({ + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent(spec) { + taskCalls.push(spec); + if (spec.id === "scope-topic") { + return { + taskId: "task_scope", + reportMarkdown: "Scoped.", + structuredOutput: { refinedTopic: "fanout cap", questions: ["How much fanout?"] }, + }; + } + if (spec.id === "discover-sources") { + return { + taskId: "task_sources", + reportMarkdown: "Many sources.", + structuredOutput: { + sources: Array.from({ length: 20 }, (_value, index) => ({ + title: `Source ${index}`, + url: `source-${index}.md`, + relevance: "fixture", + })), + }, + }; + } + if (spec.id.startsWith("summarize-source-")) { + return { + taskId: `task_${spec.id}`, + reportMarkdown: spec.id, + structuredOutput: { source: spec.id, summary: "summary" }, + }; + } + if (spec.id === "extract-claims") { + return { + taskId: "task_claims", + reportMarkdown: "Many claims.", + structuredOutput: { + claims: Array.from({ length: 20 }, (_value, index) => ({ + claim: `Claim ${index}`, + support: "fixture", + })), + }, + }; + } + if (spec.id.startsWith("verify-claim-")) { + return { + taskId: `task_${spec.id}`, + reportMarkdown: spec.id, + structuredOutput: { claim: spec.id, verdict: "supported", risk: "low" }, + }; + } + if (spec.id === "synthesize-report") { + return { + taskId: "task_final", + reportMarkdown: "# Capped", + structuredOutput: { confidence: "medium", gaps: [] }, + }; + } + throw new Error(`Unexpected deep-research step: ${spec.id}`); + }, + }, + runnerId: "runner-a", + clock: { + nowIso: () => "2026-05-29T00:00:01.000Z", + nowMs: () => 1_000, + }, + }); + + const result = await runner.run("wfr_deep_research_capped"); + const callIds = taskCalls.map((call) => call.id); + + expect(callIds.filter((id) => id.startsWith("summarize-source-")).length).toBe(16); + expect(callIds.filter((id) => id.startsWith("verify-claim-")).length).toBe(16); + expect(callIds).not.toContain("summarize-source-16"); + expect(callIds).not.toContain("verify-claim-16"); + const structuredOutput = ( + result as { + structuredOutput: { sources: unknown[]; claims: unknown[]; verification: unknown[] }; + } + ).structuredOutput; + expect(structuredOutput.sources).toHaveLength(16); + expect(structuredOutput.claims).toHaveLength(16); + expect(structuredOutput.verification).toHaveLength(16); + }); +}); + +describe("built-in deep-review-workflow", () => { + test("coordinates scoped review lanes, adversarial verification, and final synthesis", async () => { + if (!deepReviewWorkflow) { + throw new Error("Expected built-in deep-review-workflow workflow"); + } + using tmp = new DisposableTempDir("deep-review-workflow"); + const runStore = new WorkflowRunStore({ sessionDir: tmp.path, staleLeaseMs: 10 }); + await runStore.createRun({ + id: "wfr_deep_review_workflow", + workspaceId: "workspace-1", + definition: { + name: deepReviewWorkflow.name, + description: deepReviewWorkflow.description, + scope: "built-in", + executable: true, + }, + definitionSource: deepReviewWorkflow.source, + args: { + input: "PR #123", + files: ["src/service.ts"], + instructions: "Focus on correctness.", + maxCandidates: 2, + }, + now: "2026-05-29T00:00:00.000Z", + }); + + const issue = { + id: "correctness-missing-await", + severity: "P1", + category: "correctness", + title: "Missing await drops write failures", + rationale: "The service reports success before persistence completes.", + evidence: "src/service.ts calls persist() without awaiting it.", + filePaths: ["src/service.ts"], + suggestedFix: "Await persist() before returning success.", + validation: "Add a failing persistence regression test.", + confidence: "high", + }; + const taskCalls: WorkflowAgentSpec[] = []; + const runner = new WorkflowRunner({ + runStore, + runtimeFactory: new QuickJSRuntimeFactory(), + taskAdapter: { + async runAgent(spec) { + taskCalls.push(spec); + switch (spec.id) { + case "scope-review-surface": + return { + taskId: "task_scope", + reportMarkdown: "Review service changes.", + structuredOutput: { + summary: "PR touches persistence service code.", + files: ["src/service.ts"], + riskAreas: ["async persistence"], + lanes: ["correctness", "tests", "security-reliability"], + }, + }; + case "review-correctness": + return { + taskId: "task_correctness", + reportMarkdown: "Found missing await.", + structuredOutput: { issues: [issue] }, + }; + case "review-tests": + case "review-security-reliability": + case "review-architecture": + return { + taskId: `task_${spec.id}`, + reportMarkdown: "No additional findings.", + structuredOutput: { issues: [] }, + }; + case "triage-candidate-issues": + return { + taskId: "task_triage", + reportMarkdown: "One actionable issue remains.", + structuredOutput: { issues: [issue] }, + }; + case "verify-issue-0": + return { + taskId: "task_verify", + reportMarkdown: "Issue is valid.", + structuredOutput: { + issueId: "correctness-missing-await", + verdict: "valid", + confidence: "high", + rationale: "The code path can return before the write rejects.", + evidence: "The missing await is on the changed path.", + suggestedSeverity: "P1", + }, + }; + case "synthesize-review": + return { + taskId: "task_final", + reportMarkdown: "# Deep Review\n\n- P1 Missing await drops write failures.", + structuredOutput: { + verifiedIssueCount: 1, + risk: "medium", + validationPlan: ["bun test src/service.test.ts"], + discardedIssueCount: 0, + }, + }; + default: + throw new Error(`Unexpected deep-review step: ${spec.id}`); + } + }, + }, + runnerId: "runner-a", + clock: { + nowIso: () => "2026-05-29T00:00:01.000Z", + nowMs: () => 1_000, + }, + }); + + const result = await runner.run("wfr_deep_review_workflow"); + const run = await runStore.getRun("wfr_deep_review_workflow"); + + expect(taskCalls.map((call) => call.id)).toEqual([ + "scope-review-surface", + "review-correctness", + "review-tests", + "review-security-reliability", + "review-architecture", + "triage-candidate-issues", + "verify-issue-0", + "synthesize-review", + ]); + expect(taskCalls.map((call) => call.agentId)).toEqual([ + "explore", + "exec", + "exec", + "exec", + "exec", + "exec", + "exec", + "exec", + ]); + expect( + taskCalls + .filter((call) => call.agentId === "exec") + .every((call) => call.prompt.includes("read-only deep code review task")) + ).toBe(true); + expect(taskCalls.every((call) => call.outputSchema != null)).toBe(true); + expect(run.events.filter((event) => event.type === "phase").map((event) => event.name)).toEqual( + ["scope", "lane-review", "triage-dedupe", "adversarial-verification", "final-synthesis"] + ); + expect(result).toEqual({ + reportMarkdown: "# Deep Review\n\n- P1 Missing await drops write failures.", + structuredOutput: { + target: "PR #123", + scope: { + summary: "PR touches persistence service code.", + files: ["src/service.ts"], + riskAreas: ["async persistence"], + lanes: ["correctness", "tests", "security-reliability"], + }, + laneIssues: [issue], + triagedIssues: [issue], + verification: [ + { + issueId: "correctness-missing-await", + verdict: "valid", + confidence: "high", + rationale: "The code path can return before the write rejects.", + evidence: "The missing await is on the changed path.", + suggestedSeverity: "P1", + }, + ], + final: { + verifiedIssueCount: 1, + risk: "medium", + validationPlan: ["bun test src/service.test.ts"], + discardedIssueCount: 0, + }, + }, + }); + }); +}); diff --git a/src/node/services/workflows/builtInWorkflowDefinitions.ts b/src/node/services/workflows/builtInWorkflowDefinitions.ts new file mode 100644 index 0000000000..56d1da0545 --- /dev/null +++ b/src/node/services/workflows/builtInWorkflowDefinitions.ts @@ -0,0 +1,550 @@ +import type { WorkflowName } from "@/common/types/workflow"; + +export interface BuiltInWorkflowDefinition { + name: WorkflowName; + description: string; + source: string; +} + +export const BUILT_IN_WORKFLOW_DEFINITIONS: readonly BuiltInWorkflowDefinition[] = [ + { + name: "deep-research", + description: "Coordinate delegated agents to research, verify, and synthesize a topic.", + source: `export default function deepResearch({ args, phase, log, agent, parallelAgents }) { + const maxFanOut = 16; + const exploreAgentId = "explore"; + const reasoningAgentId = "exec"; + // Some users configure Explore with fast/cheap models; reserve Exec for reasoning-heavy synthesis. + const readOnlyReasoningPrompt = + "This is a read-only deep-research reasoning task. Do not edit files, create commits, apply patches, push branches, or open PRs. Inspect evidence only as needed and report findings.\\n\\n"; + const topic = normalizeDeepResearchTopic(args); + + phase("scope", { topic }); + const scope = agent({ + id: "scope-topic", + title: "Scope research topic", + agentId: exploreAgentId, + prompt: + "Refine this deep research topic into a focused investigation. Return concise research questions and the refined topic.\\n\\nTopic: " + + topic, + outputSchema: { + type: "object", + required: ["refinedTopic", "questions"], + additionalProperties: false, + properties: { + refinedTopic: { type: "string" }, + questions: { type: "array", items: { type: "string" } }, + }, + }, + }); + const refinedTopic = scope.structuredOutput.refinedTopic || topic; + log("Scoped deep research topic", { refinedTopic }); + + phase("source-discovery", { refinedTopic }); + const sources = agent({ + id: "discover-sources", + title: "Discover high-signal sources", + agentId: exploreAgentId, + prompt: + "Find high-signal primary or directly relevant sources for this research topic. Prefer repo files, specs, primary docs, and concrete evidence over summaries. Return sources with title, url/path, and relevance.\\n\\nTopic: " + + refinedTopic + + "\\nQuestions: " + + scope.structuredOutput.questions.join("; "), + outputSchema: { + type: "object", + required: ["sources"], + additionalProperties: false, + properties: { + sources: { + type: "array", + items: { + type: "object", + required: ["title", "url", "relevance"], + additionalProperties: false, + properties: { + title: { type: "string" }, + url: { type: "string" }, + relevance: { type: "string" }, + }, + }, + }, + }, + }, + }); + const discoveredSources = sources.structuredOutput.sources.slice(0, maxFanOut); + log("Discovered sources", { count: sources.structuredOutput.sources.length, selectedCount: discoveredSources.length }); + + phase("source-synthesis", { sourceCount: discoveredSources.length }); + const sourceSummaries = discoveredSources.length > 0 + ? parallelAgents( + discoveredSources.map(function (source, index) { + return { + id: "summarize-source-" + index, + title: "Read and summarize source " + (index + 1), + agentId: exploreAgentId, + prompt: + "Read or inspect this discovered source and summarize the evidence relevant to the research questions.\\n\\nTopic: " + + refinedTopic + + "\\nSource: " + + JSON.stringify(source), + outputSchema: { + type: "object", + required: ["source", "summary"], + additionalProperties: false, + properties: { + source: { type: "string" }, + summary: { type: "string" }, + }, + }, + }; + }) + ) + : []; + const summaries = { structuredOutput: { summaries: sourceSummaries.map(function (summary) { return summary.structuredOutput; }) } }; + + phase("claim-extraction", { summaryCount: summaries.structuredOutput.summaries.length }); + const claims = agent({ + id: "extract-claims", + title: "Extract claims and support", + agentId: reasoningAgentId, + prompt: + readOnlyReasoningPrompt + + "Extract the most important factual claims and supporting evidence from these source summaries. Return claims with support notes.\\n\\nTopic: " + + refinedTopic + + "\\nSummaries: " + + JSON.stringify(summaries.structuredOutput.summaries), + outputSchema: { + type: "object", + required: ["claims"], + additionalProperties: false, + properties: { + claims: { + type: "array", + items: { + type: "object", + required: ["claim", "support"], + additionalProperties: false, + properties: { + claim: { type: "string" }, + support: { type: "string" }, + }, + }, + }, + }, + }, + }); + + const extractedClaims = claims.structuredOutput.claims.slice(0, maxFanOut); + phase("adversarial-verification", { claimCount: extractedClaims.length }); + const verificationFindings = extractedClaims.length > 0 + ? parallelAgents( + extractedClaims.map(function (claim, index) { + return { + id: "verify-claim-" + index, + title: "Adversarially verify claim " + (index + 1), + agentId: reasoningAgentId, + prompt: + readOnlyReasoningPrompt + + "Challenge this claim. Look for contradictions, missing evidence, overreach, and lower-confidence areas. Return verdict and risk.\\n\\nTopic: " + + refinedTopic + + "\\nClaim: " + + JSON.stringify(claim), + outputSchema: { + type: "object", + required: ["claim", "verdict", "risk"], + additionalProperties: false, + properties: { + claim: { type: "string" }, + verdict: { type: "string", enum: ["supported", "mixed", "refuted", "unclear"] }, + risk: { type: "string", enum: ["low", "medium", "high"] }, + }, + }, + }; + }) + ) + : []; + const verification = { structuredOutput: { findings: verificationFindings.map(function (finding) { return finding.structuredOutput; }) } }; + log("Verified claims", { count: verification.structuredOutput.findings.length }); + + phase("final-synthesis", { topic: refinedTopic }); + const final = agent({ + id: "synthesize-report", + title: "Synthesize final deep research report", + agentId: reasoningAgentId, + prompt: + readOnlyReasoningPrompt + + "Write the final deep research report. Include key findings, citations/source references by title or path, uncertainty, and recommendations for follow-up. Return confidence and remaining gaps as structured output.\\n\\nTopic: " + + refinedTopic + + "\\nSources: " + + JSON.stringify(discoveredSources) + + "\\nClaims: " + + JSON.stringify(extractedClaims) + + "\\nVerification: " + + JSON.stringify(verification.structuredOutput.findings), + outputSchema: { + type: "object", + required: ["confidence", "gaps"], + additionalProperties: false, + properties: { + confidence: { type: "string", enum: ["low", "medium", "high"] }, + gaps: { type: "array", items: { type: "string" } }, + }, + }, + }); + + return { + reportMarkdown: final.reportMarkdown, + structuredOutput: { + topic, + refinedTopic, + sources: discoveredSources, + claims: extractedClaims, + verification: verification.structuredOutput.findings, + confidence: final.structuredOutput.confidence, + gaps: final.structuredOutput.gaps, + }, + }; +} + +function normalizeDeepResearchTopic(args) { + if (typeof args === "string" && args.trim()) return args.trim(); + if (args && typeof args === "object") { + if (typeof args.topic === "string" && args.topic.trim()) return args.topic.trim(); + if (typeof args.input === "string" && args.input.trim()) return args.input.trim(); + if (typeof args.query === "string" && args.query.trim()) return args.query.trim(); + } + return JSON.stringify(args); +} +`, + }, + // Keep the lightweight /deep-review skill; this workflow is the heavier structured path with + // adversarial verification for review findings. + { + name: "deep-review-workflow", + description: + "Coordinate adversarial review agents to find, verify, and synthesize code review findings.", + source: `export default function deepReviewWorkflow({ args, phase, log, agent, parallelAgents }) { + const exploreAgentId = "explore"; + const reasoningAgentId = "exec"; + // Scope discovery stays on Explore; review judgment uses Exec for users with fast Explore defaults. + const readOnlyReviewPrompt = + "This is a read-only deep code review task. Do not edit files, create commits, apply patches, push branches, or open PRs. Inspect repository evidence only as needed and report findings.\\n\\n"; + const input = normalizeDeepReviewArgs(args); + const maxCandidates = input.maxCandidates; + + phase("scope", { + target: input.target, + fileCount: input.files.length, + hasDiffSnapshot: input.diff.length > 0, + }); + const scope = agent({ + id: "scope-review-surface", + title: "Scope review surface", + agentId: exploreAgentId, + prompt: + "Scope this code review. Identify changed files, likely intent, touched layers, highest-risk areas, and which review lanes should run. Use repository evidence; do not assume the diff is complete if refs are provided.\\n\\n" + + renderReviewInput(input), + outputSchema: scopeSchema(), + }); + + const lanes = selectReviewLanes(scope.structuredOutput.lanes); + log("Selected deep review lanes", { lanes: lanes }); + + phase("lane-review", { lanes: lanes }); + const laneReviews = parallelAgents( + lanes.map(function (lane) { + return { + id: "review-" + lane, + title: "Review lane: " + lane, + agentId: reasoningAgentId, + prompt: + readOnlyReviewPrompt + + lanePrompt(lane) + + "\\n\\nReview target:\\n" + + renderReviewInput(input) + + "\\n\\nScoped review surface:\\n" + + JSON.stringify(scope.structuredOutput, null, 2) + + "\\n\\nReturn only concrete, actionable findings with file paths and evidence. Prefer an empty issues array over speculative feedback.", + outputSchema: issueListSchema(), + }; + }) + ); + const laneIssues = flatten( + laneReviews.map(function (review) { + return review.structuredOutput.issues || []; + }) + ); + log("Lane review produced candidate issues", { count: laneIssues.length }); + + phase("triage-dedupe", { candidateCount: laneIssues.length }); + const triage = agent({ + id: "triage-candidate-issues", + title: "Triage and dedupe review findings", + agentId: reasoningAgentId, + prompt: + readOnlyReviewPrompt + + "Deduplicate and triage these candidate code review findings. Merge duplicates, drop vague or non-actionable items, normalize severity, and preserve concrete evidence.\\n\\n" + + "Review target:\\n" + + renderReviewInput(input) + + "\\n\\nCandidate issues:\\n" + + JSON.stringify(laneIssues, null, 2), + outputSchema: issueListSchema(), + }); + const candidates = (triage.structuredOutput.issues || []).slice(0, maxCandidates); + log("Triaged candidate issues", { + candidateCount: triage.structuredOutput.issues.length, + selectedCount: candidates.length, + }); + + phase("adversarial-verification", { candidateCount: candidates.length }); + const verificationResults = candidates.length > 0 + ? parallelAgents( + candidates.map(function (issue, index) { + return { + id: "verify-issue-" + index, + title: "Verify review finding " + (index + 1), + agentId: reasoningAgentId, + prompt: + readOnlyReviewPrompt + + "Adversarially verify this code review finding. Try to disprove it. Inspect relevant code paths and tests. Decide whether it is valid, duplicate, overstated, not reproducible, or needs more information.\\n\\n" + + "Review target:\\n" + + renderReviewInput(input) + + "\\n\\nFinding:\\n" + + JSON.stringify(issue, null, 2), + outputSchema: verificationSchema(), + }; + }) + ) + : []; + const verifications = verificationResults.map(function (verification) { + return verification.structuredOutput; + }); + log("Verified candidate issues", { count: verifications.length }); + + phase("final-synthesis", { + candidateCount: candidates.length, + verificationCount: verifications.length, + }); + const final = agent({ + id: "synthesize-review", + title: "Synthesize final deep review", + agentId: reasoningAgentId, + prompt: + readOnlyReviewPrompt + + "Write the final code review. Include only findings that remain actionable after adversarial verification. Use severity P0-P4, file paths, and concrete evidence. If there are no verified issues, say so clearly. Include questions and a validation plan.\\n\\n" + + "Scoped review surface:\\n" + + JSON.stringify(scope.structuredOutput, null, 2) + + "\\n\\nTriaged issues:\\n" + + JSON.stringify(candidates, null, 2) + + "\\n\\nVerification results:\\n" + + JSON.stringify(verifications, null, 2), + outputSchema: finalSynthesisSchema(), + }); + + return { + reportMarkdown: final.reportMarkdown, + structuredOutput: { + target: input.target, + scope: scope.structuredOutput, + laneIssues: laneIssues, + triagedIssues: candidates, + verification: verifications, + final: final.structuredOutput, + }, + }; +} + +function normalizeDeepReviewArgs(args) { + const normalized = { + target: "current workspace changes", + baseRef: "", + headRef: "", + diff: "", + files: [], + instructions: "", + maxCandidates: 12, + }; + + if (typeof args === "string" && args.trim()) { + normalized.target = args.trim(); + return normalized; + } + + if (!args || typeof args !== "object") { + return normalized; + } + + if (typeof args.target === "string" && args.target.trim()) normalized.target = args.target.trim(); + else if (typeof args.input === "string" && args.input.trim()) normalized.target = args.input.trim(); + else if (typeof args.pr === "string" && args.pr.trim()) normalized.target = args.pr.trim(); + else if (typeof args.branch === "string" && args.branch.trim()) normalized.target = args.branch.trim(); + + if (typeof args.baseRef === "string") normalized.baseRef = args.baseRef.trim(); + else if (typeof args.base === "string") normalized.baseRef = args.base.trim(); + + if (typeof args.headRef === "string") normalized.headRef = args.headRef.trim(); + else if (typeof args.head === "string") normalized.headRef = args.head.trim(); + + if (typeof args.diff === "string") normalized.diff = args.diff; + if (typeof args.instructions === "string") normalized.instructions = args.instructions.trim(); + else if (typeof args.notes === "string") normalized.instructions = args.notes.trim(); + + if (Array.isArray(args.files)) { + normalized.files = args.files.filter(function (file) { + return typeof file === "string" && file.trim().length > 0; + }).map(function (file) { + return file.trim(); + }); + } + + if (typeof args.maxCandidates === "number" && args.maxCandidates > 0) { + normalized.maxCandidates = Math.min(20, Math.max(1, Math.floor(args.maxCandidates))); + } + + return normalized; +} + +function renderReviewInput(input) { + return [ + "Target: " + input.target, + input.baseRef ? "Base ref: " + input.baseRef : "", + input.headRef ? "Head ref: " + input.headRef : "", + input.files.length > 0 ? "Files: " + input.files.join(", ") : "", + input.instructions ? "Reviewer instructions: " + input.instructions : "", + input.diff ? "Diff snapshot:\\n~~~diff\\n" + input.diff + "\\n~~~" : "", + ].filter(Boolean).join("\\n"); +} + +function selectReviewLanes(lanes) { + const defaults = ["correctness", "tests", "architecture"]; + const allowed = { + correctness: true, + tests: true, + architecture: true, + "security-reliability": true, + "ux-a11y": true, + "docs-dx": true, + }; + const requested = Array.isArray(lanes) && lanes.length > 0 ? lanes : defaults; + const result = []; + for (const lane of requested) { + if (allowed[lane] && result.indexOf(lane) === -1) { + result.push(lane); + } + } + for (const fallback of defaults) { + if (result.indexOf(fallback) === -1) { + result.push(fallback); + } + } + return result.slice(0, 6); +} + +function lanePrompt(lane) { + const prompts = { + correctness: "Review for logic bugs, edge cases, races, state-machine violations, and broken invariants.", + tests: "Review test coverage, determinism, missing regression tests, and validation commands.", + architecture: "Review consistency with existing architecture, boundaries, naming, abstractions, and maintainability.", + "security-reliability": "Review security, trust boundaries, path traversal, injection, data corruption, reliability, and performance risks.", + "ux-a11y": "Review user-facing behavior, accessibility, keyboard flow, visual consistency, and empty/loading/error states.", + "docs-dx": "Review documentation, developer experience, scripts, public API clarity, and migration concerns.", + }; + return prompts[lane] || prompts.correctness; +} + +function issueListSchema() { + return { + type: "object", + required: ["issues"], + additionalProperties: false, + properties: { + issues: { + type: "array", + items: issueSchema(), + }, + }, + }; +} + +function issueSchema() { + return { + type: "object", + required: ["id", "severity", "category", "title", "rationale", "evidence", "filePaths", "confidence"], + additionalProperties: false, + properties: { + id: { type: "string" }, + severity: { type: "string", enum: ["P0", "P1", "P2", "P3", "P4"] }, + category: { type: "string" }, + title: { type: "string" }, + rationale: { type: "string" }, + evidence: { type: "string" }, + filePaths: { type: "array", items: { type: "string" } }, + suggestedFix: { type: "string" }, + validation: { type: "string" }, + confidence: { type: "string", enum: ["low", "medium", "high"] }, + }, + }; +} + +function scopeSchema() { + return { + type: "object", + required: ["summary", "files", "riskAreas", "lanes"], + additionalProperties: false, + properties: { + summary: { type: "string" }, + files: { type: "array", items: { type: "string" } }, + riskAreas: { type: "array", items: { type: "string" } }, + lanes: { + type: "array", + items: { + type: "string", + enum: ["correctness", "tests", "architecture", "security-reliability", "ux-a11y", "docs-dx"], + }, + }, + }, + }; +} + +function verificationSchema() { + return { + type: "object", + required: ["issueId", "verdict", "confidence", "rationale"], + additionalProperties: false, + properties: { + issueId: { type: "string" }, + verdict: { type: "string", enum: ["valid", "duplicate", "overstated", "not-repro", "needs-info"] }, + confidence: { type: "string", enum: ["low", "medium", "high"] }, + rationale: { type: "string" }, + evidence: { type: "string" }, + suggestedSeverity: { type: "string", enum: ["P0", "P1", "P2", "P3", "P4"] }, + }, + }; +} + +function finalSynthesisSchema() { + return { + type: "object", + required: ["verifiedIssueCount", "risk", "validationPlan"], + additionalProperties: false, + properties: { + verifiedIssueCount: { type: "number" }, + risk: { type: "string", enum: ["low", "medium", "high"] }, + validationPlan: { type: "array", items: { type: "string" } }, + discardedIssueCount: { type: "number" }, + }, + }; +} + +function flatten(arrays) { + const out = []; + for (const array of arrays) { + for (const item of array) { + out.push(item); + } + } + return out; +} +`, + }, +]; diff --git a/src/node/services/workflows/workflowReplayKey.test.ts b/src/node/services/workflows/workflowReplayKey.test.ts new file mode 100644 index 0000000000..69727efa4a --- /dev/null +++ b/src/node/services/workflows/workflowReplayKey.test.ts @@ -0,0 +1,41 @@ +import { describe, expect, test } from "bun:test"; +import { + assertWorkflowStepId, + canonicalizeWorkflowInput, + hashWorkflowStepInput, +} from "./workflowReplayKey"; + +describe("workflow replay keys", () => { + test("hashes semantically identical object inputs the same regardless of key order", () => { + const first = hashWorkflowStepInput("source-read", { + query: "mux workflows", + limits: { maxSources: 5, languages: ["ts", "tsx"] }, + }); + const second = hashWorkflowStepInput("source-read", { + limits: { languages: ["ts", "tsx"], maxSources: 5 }, + query: "mux workflows", + }); + + expect(first).toBe(second); + expect(first).toMatch(/^sha256:/); + }); + + test("keeps array order significant", () => { + expect(hashWorkflowStepInput("fanout", ["a", "b"])).not.toBe( + hashWorkflowStepInput("fanout", ["b", "a"]) + ); + }); + + test("rejects nondeterministic or non-JSON input values instead of silently hashing them", () => { + expect(() => canonicalizeWorkflowInput({ now: new Date("2026-05-29T00:00:00.000Z") })).toThrow( + /plain JSON/ + ); + expect(() => canonicalizeWorkflowInput({ missing: undefined })).toThrow(/JSON value/); + expect(() => canonicalizeWorkflowInput({ bad: Number.NaN })).toThrow(/finite/); + }); + + test("requires stable non-empty step ids for replay-boundary primitives", () => { + expect(() => assertWorkflowStepId("", "agent")).toThrow(/stable id/); + expect(() => assertWorkflowStepId("read-sources", "agent")).not.toThrow(); + }); +}); diff --git a/src/node/services/workflows/workflowReplayKey.ts b/src/node/services/workflows/workflowReplayKey.ts new file mode 100644 index 0000000000..80e4ae74f4 --- /dev/null +++ b/src/node/services/workflows/workflowReplayKey.ts @@ -0,0 +1,52 @@ +import * as crypto from "node:crypto"; + +import assert from "@/common/utils/assert"; + +export function assertWorkflowStepId(stepId: string, primitiveName: string): void { + assert( + stepId.trim().length > 0, + `${primitiveName} replay boundary requires a stable id so completed workflow work can be reused` + ); +} + +export function hashWorkflowStepInput(stepId: string, input: unknown): string { + assertWorkflowStepId(stepId, "workflow step"); + const canonical = JSON.stringify({ stepId, input: canonicalizeWorkflowInput(input) }); + return `sha256:${crypto.createHash("sha256").update(canonical).digest("hex")}`; +} + +export function canonicalizeWorkflowInput(input: unknown): unknown { + if (input == null || typeof input === "string" || typeof input === "boolean") { + return input; + } + + if (typeof input === "number") { + assert(Number.isFinite(input), "Workflow replay input numbers must be finite"); + return input; + } + + if (Array.isArray(input)) { + return input.map((value) => canonicalizeWorkflowInput(value)); + } + + if (typeof input === "object") { + assert( + Object.getPrototypeOf(input) === Object.prototype, + "Workflow replay inputs must be plain JSON objects/arrays" + ); + + const record = input as Record; + const result: Record = {}; + for (const key of Object.keys(record).sort()) { + const value = record[key]; + assert( + value !== undefined, + "Workflow replay inputs must not contain non-JSON value undefined" + ); + result[key] = canonicalizeWorkflowInput(value); + } + return result; + } + + throw new Error(`Workflow replay inputs must be JSON values, got ${typeof input}`); +} diff --git a/src/node/services/workflows/workflowScratchRoots.test.ts b/src/node/services/workflows/workflowScratchRoots.test.ts new file mode 100644 index 0000000000..564056f15d --- /dev/null +++ b/src/node/services/workflows/workflowScratchRoots.test.ts @@ -0,0 +1,53 @@ +import * as path from "node:path"; + +import { describe, expect, test } from "bun:test"; + +import { Config } from "@/node/config"; +import { DisposableTempDir } from "@/node/services/tempDir"; +import { resolveWorkflowScratchRoots } from "./workflowScratchRoots"; + +describe("resolveWorkflowScratchRoots", () => { + test("resolves scratch workflows under the active workspace root", () => { + using tmp = new DisposableTempDir("workflow-scratch-roots"); + const config = new Config(tmp.path); + const workspaceRoot = path.join(tmp.path, "project", "feature"); + + expect( + resolveWorkflowScratchRoots(config, "workspace-1", { workspaceRootPath: workspaceRoot }) + ).toEqual({ + scratchRoot: path.join(workspaceRoot, ".mux", "workflows", ".scratch"), + }); + }); + + test("uses runtime path normalization for remote workspace roots", () => { + using tmp = new DisposableTempDir("workflow-scratch-roots"); + const config = new Config(tmp.path); + + expect( + resolveWorkflowScratchRoots(config, "workspace-1", { + workspaceRootPath: "/remote/workspace", + normalizePath: (relativePath, basePath) => `${basePath}/${relativePath}`, + }) + ).toEqual({ + scratchRoot: "/remote/workspace/.mux/workflows/.scratch", + }); + }); + + test("falls back to the persisted workspace path when no root is provided", async () => { + using tmp = new DisposableTempDir("workflow-scratch-roots"); + const config = new Config(tmp.path); + const projectPath = path.join(tmp.path, "project"); + const workspacePath = path.join(projectPath, "feature"); + + await config.editConfig((current) => { + current.projects.set(projectPath, { + workspaces: [{ id: "workspace-1", name: "feature", path: workspacePath }], + }); + return current; + }); + + expect(resolveWorkflowScratchRoots(config, "workspace-1")).toEqual({ + scratchRoot: path.join(workspacePath, ".mux", "workflows", ".scratch"), + }); + }); +}); diff --git a/src/node/services/workflows/workflowScratchRoots.ts b/src/node/services/workflows/workflowScratchRoots.ts new file mode 100644 index 0000000000..5d620bef3b --- /dev/null +++ b/src/node/services/workflows/workflowScratchRoots.ts @@ -0,0 +1,46 @@ +import * as path from "node:path"; + +import type { Config } from "@/node/config"; +import { findWorkspaceEntry } from "@/node/services/taskUtils"; +import assert from "@/common/utils/assert"; + +export const WORKFLOW_SCRATCH_RELATIVE_DIR = ".mux/workflows/.scratch"; + +export interface WorkflowScratchRoots { + scratchRoot: string; +} + +export function resolveWorkflowScratchRoots( + config: Config, + workspaceId: string, + options?: { + workspaceRootPath?: string; + normalizePath?: (relativePath: string, basePath: string) => string; + } +): WorkflowScratchRoots { + const normalizedWorkspaceId = workspaceId.trim(); + assert(normalizedWorkspaceId.length > 0, "resolveWorkflowScratchRoots: workspaceId is required"); + + const optionWorkspaceRootPath = options?.workspaceRootPath?.trim(); + const workspaceRootPath = + optionWorkspaceRootPath != null && optionWorkspaceRootPath.length > 0 + ? optionWorkspaceRootPath + : resolveWorkspaceRootFromConfig(config, normalizedWorkspaceId); + assert( + workspaceRootPath.length > 0, + "resolveWorkflowScratchRoots: workspaceRootPath is required" + ); + + const scratchRoot = options?.normalizePath + ? options.normalizePath(WORKFLOW_SCRATCH_RELATIVE_DIR, workspaceRootPath) + : path.join(workspaceRootPath, ".mux", "workflows", ".scratch"); + assert(scratchRoot.length > 0, "resolveWorkflowScratchRoots: scratchRoot is required"); + + return { scratchRoot }; +} + +function resolveWorkspaceRootFromConfig(config: Config, workspaceId: string): string { + const appConfig = config.loadConfigOrDefault(); + const entry = findWorkspaceEntry(appConfig, workspaceId); + return entry?.workspace.path ?? ""; +} diff --git a/src/node/services/workspaceService.test.ts b/src/node/services/workspaceService.test.ts index 86ac538600..7317d01318 100644 --- a/src/node/services/workspaceService.test.ts +++ b/src/node/services/workspaceService.test.ts @@ -32,7 +32,12 @@ import type { TerminalService } from "@/node/services/terminalService"; import type { DesktopSessionManager } from "@/node/services/desktop/DesktopSessionManager"; import type { WorktreeArchiveSnapshot } from "@/common/schemas/project"; import type { BashToolResult } from "@/common/types/tools"; +import type { WorkspaceChatMessage } from "@/common/orpc/types"; import { createMuxMessage } from "@/common/types/message"; +import { + WORKFLOW_RUN_CARD_DISPLAY_METADATA_TYPE, + WORKFLOW_TRIGGER_DISPLAY_METADATA_TYPE, +} from "@/common/utils/workflowRunMessages"; import { getPlanFilePath } from "@/common/utils/planStorage"; import * as todoStorageModule from "@/node/services/todos/todoStorage"; import * as runtimeFactory from "@/node/runtime/runtimeFactory"; @@ -208,6 +213,75 @@ function createFrontendWorkspaceMetadata( }; } +describe("WorkspaceService workflow invocation events", () => { + test("emits workflow slash invocation rows through the active session chat stream", async () => { + const { config, historyService, cleanup } = await createTestHistoryService(); + const workspaceId = "workflow-live-events"; + const projectPath = path.join(config.rootDir, "project"); + try { + await config.addWorkspace(projectPath, { + id: workspaceId, + name: "workflow-live-events", + projectName: "project", + projectPath, + runtimeConfig: { type: "local" }, + }); + const workspaceService = createWorkspaceServiceForTest({ + config, + historyService, + aiService: createMockAIService({ + stopStream: mock(() => Promise.resolve(Ok(undefined))), + }), + extensionMetadata: new ExtensionMetadataService( + path.join(config.rootDir, "extensionMetadata.json") + ), + initStateManager: { + ...mockInitStateManager, + off: mock(() => undefined as unknown as InitStateManager), + } as unknown as InitStateManager, + }); + const session = workspaceService.getOrCreateSession(workspaceId); + const events: WorkspaceChatMessage[] = []; + const unsubscribe = session.onChatEvent(({ message }) => { + events.push(message); + }); + + try { + const persisted = await workspaceService.appendWorkflowRunInvocation({ + workspaceId, + rawCommand: "/demo investigate live events", + name: "demo", + args: { input: "investigate live events" }, + runId: "wfr_live_events", + status: "running", + result: null, + }); + + expect(persisted).toBe(true); + expect(events).toHaveLength(2); + const triggerMessage = events[0]; + const cardMessage = events[1]; + if (triggerMessage?.type !== "message" || cardMessage?.type !== "message") { + throw new Error("Expected workflow invocation to emit message events"); + } + expect(triggerMessage).toMatchObject({ role: "user", type: "message" }); + expect(triggerMessage.metadata?.muxMetadata).toEqual( + expect.objectContaining({ type: WORKFLOW_TRIGGER_DISPLAY_METADATA_TYPE }) + ); + expect(cardMessage).toMatchObject({ role: "assistant", type: "message" }); + expect(cardMessage.metadata?.muxMetadata).toEqual( + expect.objectContaining({ type: WORKFLOW_RUN_CARD_DISPLAY_METADATA_TYPE }) + ); + } finally { + unsubscribe(); + workspaceService.disposeSession(workspaceId); + } + } finally { + await cleanup(); + } + }); +}); + describe("WorkspaceService truncateHistory goal acknowledgment", () => { async function createServices(aiServiceOverride?: AIService) { const { config, historyService, cleanup } = await createTestHistoryService(); diff --git a/src/node/services/workspaceService.ts b/src/node/services/workspaceService.ts index 0a787ef1fc..453ac64a18 100644 --- a/src/node/services/workspaceService.ts +++ b/src/node/services/workspaceService.ts @@ -120,6 +120,12 @@ import { type MuxMessageMetadata, type MuxMessage, } from "@/common/types/message"; +import type { WorkflowRunRecord } from "@/common/types/workflow"; +import { + WORKFLOW_RUN_CARD_DISPLAY_METADATA_TYPE, + WORKFLOW_TRIGGER_DISPLAY_METADATA_TYPE, + buildWorkflowRunCardMessage, +} from "@/common/utils/workflowRunMessages"; import type { RuntimeConfig } from "@/common/types/runtime"; import { hasSrcBaseDir, @@ -256,6 +262,30 @@ type WorktreeArchiveSnapshotLifecycleService = Pick< >; // Trim and normalize a heartbeat message for storage. Accepts `unknown` so it safely handles // both user input (string | undefined) and persisted config values that may have been corrupted. +function isWorkflowInvocationMessage(message: MuxMessage, runId: string): boolean { + if ( + message.metadata?.muxMetadata?.type === WORKFLOW_RUN_CARD_DISPLAY_METADATA_TYPE && + message.metadata.muxMetadata.runId === runId + ) { + return true; + } + + return message.parts.some((part) => { + if (part.type !== "dynamic-tool" || part.toolName !== "workflow_run") { + return false; + } + if (part.state !== "output-available") { + return false; + } + const output = part.output; + return ( + output != null && + typeof output === "object" && + (output as Record).runId === runId + ); + }); +} + function sanitizeHeartbeatMessage(message: unknown): string | undefined { if (typeof message !== "string") { return undefined; @@ -5772,6 +5802,110 @@ export class WorkspaceService extends EventEmitter { } } + async appendWorkflowRunInvocation(input: { + workspaceId: string; + rawCommand: string; + name: string; + args: unknown; + runId: string; + status: string; + result: unknown; + run?: WorkflowRunRecord; + }): Promise { + assert(input.workspaceId.length > 0, "appendWorkflowRunInvocation requires workspaceId"); + assert(input.rawCommand.trim().length > 0, "appendWorkflowRunInvocation requires rawCommand"); + assert(input.name.length > 0, "appendWorkflowRunInvocation requires workflow name"); + assert(input.runId.length > 0, "appendWorkflowRunInvocation requires runId"); + + const now = Date.now(); + void this.updateRecencyTimestamp(input.workspaceId, now); + const commandPrefix = input.rawCommand.trim().split(/\s+/u)[0] ?? `/${input.name}`; + const userMessage = createMuxMessage( + `workflow-run-command-${input.runId}`, + "user", + input.rawCommand, + { + timestamp: now, + muxMetadata: { + type: WORKFLOW_TRIGGER_DISPLAY_METADATA_TYPE, + rawCommand: input.rawCommand, + commandPrefix, + runId: input.runId, + }, + } + ); + const workflowMessage = buildWorkflowRunCardMessage( + { name: input.name, args: input.args }, + { + runId: input.runId, + status: input.status, + result: input.result, + ...(input.run != null ? { run: input.run } : {}), + }, + now + ); + workflowMessage.metadata = { + timestamp: now, + synthetic: true, + uiVisible: true, + muxMetadata: { + type: WORKFLOW_RUN_CARD_DISPLAY_METADATA_TYPE, + runId: input.runId, + }, + }; + + const session = this.getOrCreateSession(input.workspaceId); + const userAppend = await this.historyService.appendToHistory(input.workspaceId, userMessage); + if (!userAppend.success) { + log.error("Failed to append workflow slash command to history", { + workspaceId: input.workspaceId, + runId: input.runId, + error: userAppend.error, + }); + return false; + } + session.emitChatEvent({ ...userMessage, type: "message" }); + + const toolAppend = await this.historyService.appendToHistory( + input.workspaceId, + workflowMessage + ); + if (!toolAppend.success) { + log.error("Failed to append workflow run card to history", { + workspaceId: input.workspaceId, + runId: input.runId, + error: toolAppend.error, + }); + return false; + } + session.emitChatEvent({ ...workflowMessage, type: "message" }); + return true; + } + + async isWorkflowInvocationCurrent(workspaceId: string, runId: string): Promise { + assert(workspaceId.length > 0, "isWorkflowInvocationCurrent requires workspaceId"); + assert(runId.length > 0, "isWorkflowInvocationCurrent requires runId"); + + const historyResult = await this.historyService.getHistoryFromLatestBoundary(workspaceId); + if (!historyResult.success) { + log.warn("Could not read history before workflow continuation", { + workspaceId, + runId, + error: historyResult.error, + }); + return false; + } + + const runCardIndex = historyResult.data.findIndex((message) => + isWorkflowInvocationMessage(message, runId) + ); + if (runCardIndex === -1) { + return false; + } + + return !historyResult.data.slice(runCardIndex + 1).some((message) => message.role === "user"); + } + async sendMessage( workspaceId: string, message: string,