From f8ab1d87f4f0449cf161a90f7776c341940dcf30 Mon Sep 17 00:00:00 2001 From: Ammar Date: Thu, 28 May 2026 15:28:19 -0500 Subject: [PATCH 1/3] feat: add min_completed to task_await for incremental concurrency task_await now returns once min_completed awaited tasks have completed (default 1 = first completion) instead of always blocking on every task. The parent can act on each result as it lands while the rest keep running, then re-await the remainder. Losing waits are detached via per-task AbortControllers without terminating the children, which stay re-awaitable. Updated task/task_await tool descriptions and the best-of-n/task-variants prelude to steer aggregation flows to pass the batch size. --- docs/agents/system-prompt.mdx | 2 + docs/hooks/tools.mdx | 19 +- src/common/utils/tools/toolDefinitions.ts | 26 +- .../builtInSkillContent.generated.ts | 21 +- src/node/services/systemMessage.ts | 2 + src/node/services/tools/task_await.test.ts | 264 +++++++++++++ src/node/services/tools/task_await.ts | 374 +++++++++++------- 7 files changed, 545 insertions(+), 163 deletions(-) diff --git a/docs/agents/system-prompt.mdx b/docs/agents/system-prompt.mdx index 902045776a..3ab64f8943 100644 --- a/docs/agents/system-prompt.mdx +++ b/docs/agents/system-prompt.mdx @@ -53,6 +53,7 @@ When the user asks for "best of n" work, assume they want the \`task\` tool's \` Before spawning the batch, do a small amount of preliminary analysis to capture shared context, constraints, or evaluation criteria that would otherwise be repeated by every child. Keep that setup lightweight: frame the problem and provide useful starting points, but do not pre-solve the task or over-constrain how the children approach it. Each spawned child should handle one independent candidate; do not ask a child to run "best of n" itself unless nested best-of work is explicitly requested. +Picking the best candidate requires every report, so await the full batch (pass \`task_await\` \`min_completed\` equal to the batch size, or use a foreground grouped spawn) before selecting — but you may start setup-only work (e.g. preparing the evaluation rubric or integration scaffolding) as soon as the first candidate lands. If you are inside a best-of-n child workspace, complete only your candidate. @@ -60,6 +61,7 @@ If you are inside a best-of-n child workspace, complete only your candidate. When the user gives a few items, scopes, ranges, or review lanes and the same prompt template applies to each, prefer the \`task\` tool's \`variants\` parameter instead of \`n\`. Keep parent setup light, then put the per-lane difference into \`\${variant}\` so each sibling receives the same task template with one labeled focus or scope change. Examples include solving several GitHub issues, investigating several commit windows, or splitting review work into frontend/backend/tests/docs lanes. +Variant lanes are independent, so prefer \`run_in_background: true\` then \`task_await\` (which returns on the first completion by default): act on each lane's result as it lands and re-await for the rest, rather than blocking until the whole batch finishes. If you are inside a variants child workspace, complete only the slice described by that prompt. diff --git a/docs/hooks/tools.mdx b/docs/hooks/tools.mdx index 286c25ff03..0921ffa5e6 100644 --- a/docs/hooks/tools.mdx +++ b/docs/hooks/tools.mdx @@ -651,15 +651,16 @@ If a value is too large for the environment, it may be omitted (not set). Mux al
-task_await (5) - -| Env var | JSON path | Type | Description | -| --------------------------------- | ------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `MUX_TOOL_INPUT_FILTER` | `filter` | string | Optional regex to filter bash task output lines. By default, only matching lines are returned. When filter_exclude is true, matching lines are excluded instead. Non-matching lines are discarded and cannot be retrieved later. | -| `MUX_TOOL_INPUT_FILTER_EXCLUDE` | `filter_exclude` | boolean | When true, lines matching 'filter' are excluded instead of kept. Requires 'filter' to be set. | -| `MUX_TOOL_INPUT_TASK_IDS_` | `task_ids[]` | string | List of task IDs to await — use only real IDs returned by prior task, bash, or task_list tool results; never fabricate an ID. When omitted, waits for all active descendant tasks of the current workspace. | -| `MUX_TOOL_INPUT_TASK_IDS_COUNT` | `task_ids.length` | number | Number of elements in task_ids (List of task IDs to await — use only real IDs returned by prior task, bash, or task_list tool results; never fabricate an ID. When omitted, waits for all active descendant tasks of the current workspace.) | -| `MUX_TOOL_INPUT_TIMEOUT_SECS` | `timeout_secs` | number | Maximum time to wait in seconds for each task. For bash tasks, this waits for NEW output (or process exit). If exceeded, the result returns status=queued\|running\|awaiting_report (task is still active). Defaults to 600 seconds (10 minutes) if not specified. Set to 0 for a non-blocking status check. | +task_await (6) + +| Env var | JSON path | Type | Description | +| --------------------------------- | ------------------- | ------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `MUX_TOOL_INPUT_FILTER` | `filter` | string | Optional regex to filter bash task output lines. By default, only matching lines are returned. When filter_exclude is true, matching lines are excluded instead. Non-matching lines are discarded and cannot be retrieved later. | +| `MUX_TOOL_INPUT_FILTER_EXCLUDE` | `filter_exclude` | boolean | When true, lines matching 'filter' are excluded instead of kept. Requires 'filter' to be set. | +| `MUX_TOOL_INPUT_MIN_COMPLETED` | `min_completed` | number | Number of awaited tasks that must complete before this call returns. Defaults to 1, so by default task_await returns as soon as the FIRST awaited task completes, letting you act on it while the rest keep running. The result still includes every task complete at that moment plus current status (running/queued) for the rest. Tasks that have not yet completed keep running and remain re-awaitable on a later task_await call. Raise this (e.g. set it to the total number of awaited tasks) when you genuinely need more before proceeding — for example best-of-N synthesis that must compare every candidate. Clamped to the number of awaited tasks; values above that behave like 'wait for all'. | +| `MUX_TOOL_INPUT_TASK_IDS_` | `task_ids[]` | string | List of task IDs to await — use only real IDs returned by prior task, bash, or task_list tool results; never fabricate an ID. When omitted, waits for all active descendant tasks of the current workspace. | +| `MUX_TOOL_INPUT_TASK_IDS_COUNT` | `task_ids.length` | number | Number of elements in task_ids (List of task IDs to await — use only real IDs returned by prior task, bash, or task_list tool results; never fabricate an ID. When omitted, waits for all active descendant tasks of the current workspace.) | +| `MUX_TOOL_INPUT_TIMEOUT_SECS` | `timeout_secs` | number | Maximum time to wait in seconds for each task. For bash tasks, this waits for NEW output (or process exit). If exceeded, the result returns status=queued\|running\|awaiting_report (task is still active). Defaults to 600 seconds (10 minutes) if not specified. Set to 0 for a non-blocking status check. |
diff --git a/src/common/utils/tools/toolDefinitions.ts b/src/common/utils/tools/toolDefinitions.ts index bab773c4b9..0dea3965ec 100644 --- a/src/common/utils/tools/toolDefinitions.ts +++ b/src/common/utils/tools/toolDefinitions.ts @@ -228,14 +228,15 @@ export function buildTaskToolDescription(runtimeMode: RuntimeMode | undefined): "n and variants are mutually exclusive; omit both for a single task. Leave n and variants unset unless the developer explicitly asks for parallel sibling tasks, and prefer non-interfering sub-agents for grouped runs (for example read-only agents like explore). " + "\n\nWhen the user explicitly asks for best-of-n work, the parent should begin with light preliminary analysis to extract shared context, constraints, or evaluation criteria that would otherwise be duplicated across children. " + "Keep that pre-work lightweight: frame the task and provide useful starting points, but do not pre-solve the problem or over-constrain how the children reason about it. Then delegate the substantive analysis to the spawned sub-agents. " + - "Do not also do a full parallel analysis in the parent. When you are ready to synthesize the child reports, call task_await; do not await reflexively just because tasks are running. " + + "Do not also do a full parallel analysis in the parent. Call task_await when you are ready to act on child output; do not await reflexively just because tasks are running. " + + "task_await returns as soon as the first awaited task completes by default (min_completed), so you can start dependent work on each result as it lands instead of blocking on the whole batch; for best-of-N synthesis that must compare every candidate, pass min_completed equal to the batch size (or use a foreground grouped spawn, below). " + "\n\nWhen delegating, include a compact task brief (Task / Background / Scope / Starting points / Acceptance / Deliverables / Constraints). " + "Avoid telling the sub-agent to read your plan file; child workspaces do not automatically have access to it. " + "\n\nIf run_in_background is false, waits for the sub-agent to finish and returns the completed report. When grouped sibling tasks are requested via n or variants, the completed result includes one report per spawned task. " + "If the foreground wait times out, returns queued/running task metadata with a note (the task continues running); use task_await to monitor progress. " + "If run_in_background is true, returns immediately with queued/running task metadata; use task_await to wait for completion, task_list to rediscover active tasks, and task_terminate to stop it. " + "Prefer run_in_background: false when spawning a single task — it is equivalent to spawning background + immediately awaiting, but saves a round-trip. " + - "Use run_in_background: true when launching multiple tasks in parallel so you can await them as a batch. " + + "Use run_in_background: true when launching multiple tasks in parallel so you can act on each as it completes via task_await (which returns on the first completion by default); a foreground grouped spawn (run_in_background: false) instead blocks until every sibling finishes and returns all reports at once. " + "Do not call task_await in the same parallel tool-call batch; wait for the returned task metadata first. " + "Use the bash tool to run shell commands." ); @@ -453,6 +454,21 @@ export const TaskAwaitToolArgsSchema = z "Defaults to 600 seconds (10 minutes) if not specified. " + "Set to 0 for a non-blocking status check." ), + min_completed: z + .number() + .int() + .min(1) + .nullish() + .describe( + "Number of awaited tasks that must complete before this call returns. " + + "Defaults to 1, so by default task_await returns as soon as the FIRST awaited task completes, " + + "letting you act on it while the rest keep running. " + + "The result still includes every task complete at that moment plus current status (running/queued) for the rest. " + + "Tasks that have not yet completed keep running and remain re-awaitable on a later task_await call. " + + "Raise this (e.g. set it to the total number of awaited tasks) when you genuinely need more before proceeding — " + + "for example best-of-N synthesis that must compare every candidate. " + + "Clamped to the number of awaited tasks; values above that behave like 'wait for all'." + ), }) .strict() .superRefine((args, ctx) => { @@ -1466,7 +1482,11 @@ export const TOOL_DEFINITIONS = { "For bash tasks, you may optionally pass filter/filter_exclude to include/exclude output lines by regex. " + "WARNING: when using filter, non-matching lines are permanently discarded. " + "Use this tool to WAIT; do not poll task_list in a loop to wait for task completion (that is misuse and wastes tool calls). " + - "This is similar to Promise.allSettled(): you always get per-task results. " + + "\n\nBy default (min_completed=1) this returns as soon as the FIRST awaited task completes, so you can begin dependent work on that result while the rest keep running — then call task_await again for the remainder. " + + "This is ideal for independent lanes (variants) or any case where per-result work exists. " + + "Set min_completed higher (up to the number of awaited tasks) when you genuinely need more before proceeding — e.g. best-of-N synthesis that must compare every candidate should pass min_completed equal to the batch size. " + + "The result always includes every task complete at the moment it returns, plus current status for the rest; not-yet-completed tasks keep running and stay re-awaitable on a later call. " + + "You always get per-task results (like Promise.allSettled), just possibly before every task has finished. " + "Possible statuses: completed, queued, running, awaiting_report, not_found, invalid_scope, error. " + "Bash task outputs may be automatically filtered; when this happens, check each result's note for details and (if available) where the full output was saved.", schema: TaskAwaitToolArgsSchema, diff --git a/src/node/services/agentSkills/builtInSkillContent.generated.ts b/src/node/services/agentSkills/builtInSkillContent.generated.ts index 29c32a78e4..8a44125b9c 100644 --- a/src/node/services/agentSkills/builtInSkillContent.generated.ts +++ b/src/node/services/agentSkills/builtInSkillContent.generated.ts @@ -1903,6 +1903,7 @@ export const BUILTIN_SKILL_FILES: Record> = { "Before spawning the batch, do a small amount of preliminary analysis to capture shared context, constraints, or evaluation criteria that would otherwise be repeated by every child.", "Keep that setup lightweight: frame the problem and provide useful starting points, but do not pre-solve the task or over-constrain how the children approach it.", 'Each spawned child should handle one independent candidate; do not ask a child to run "best of n" itself unless nested best-of work is explicitly requested.', + "Picking the best candidate requires every report, so await the full batch (pass \\`task_await\\` \\`min_completed\\` equal to the batch size, or use a foreground grouped spawn) before selecting — but you may start setup-only work (e.g. preparing the evaluation rubric or integration scaffolding) as soon as the first candidate lands.", "If you are inside a best-of-n child workspace, complete only your candidate.", "", "", @@ -1910,6 +1911,7 @@ export const BUILTIN_SKILL_FILES: Record> = { "When the user gives a few items, scopes, ranges, or review lanes and the same prompt template applies to each, prefer the \\`task\\` tool's \\`variants\\` parameter instead of \\`n\\`.", "Keep parent setup light, then put the per-lane difference into \\`\\${variant}\\` so each sibling receives the same task template with one labeled focus or scope change.", "Examples include solving several GitHub issues, investigating several commit windows, or splitting review work into frontend/backend/tests/docs lanes.", + "Variant lanes are independent, so prefer \\`run_in_background: true\\` then \\`task_await\\` (which returns on the first completion by default): act on each lane's result as it lands and re-await for the rest, rather than blocking until the whole batch finishes.", "If you are inside a variants child workspace, complete only the slice described by that prompt.", "", "", @@ -4643,15 +4645,16 @@ export const BUILTIN_SKILL_FILES: Record> = { "", "", "
", - "task_await (5)", - "", - "| Env var | JSON path | Type | Description |", - "| --------------------------------- | ------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |", - "| `MUX_TOOL_INPUT_FILTER` | `filter` | string | Optional regex to filter bash task output lines. By default, only matching lines are returned. When filter_exclude is true, matching lines are excluded instead. Non-matching lines are discarded and cannot be retrieved later. |", - "| `MUX_TOOL_INPUT_FILTER_EXCLUDE` | `filter_exclude` | boolean | When true, lines matching 'filter' are excluded instead of kept. Requires 'filter' to be set. |", - "| `MUX_TOOL_INPUT_TASK_IDS_` | `task_ids[]` | string | List of task IDs to await — use only real IDs returned by prior task, bash, or task_list tool results; never fabricate an ID. When omitted, waits for all active descendant tasks of the current workspace. |", - "| `MUX_TOOL_INPUT_TASK_IDS_COUNT` | `task_ids.length` | number | Number of elements in task_ids (List of task IDs to await — use only real IDs returned by prior task, bash, or task_list tool results; never fabricate an ID. When omitted, waits for all active descendant tasks of the current workspace.) |", - "| `MUX_TOOL_INPUT_TIMEOUT_SECS` | `timeout_secs` | number | Maximum time to wait in seconds for each task. For bash tasks, this waits for NEW output (or process exit). If exceeded, the result returns status=queued\\|running\\|awaiting_report (task is still active). Defaults to 600 seconds (10 minutes) if not specified. Set to 0 for a non-blocking status check. |", + "task_await (6)", + "", + "| Env var | JSON path | Type | Description |", + "| --------------------------------- | ------------------- | ------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |", + "| `MUX_TOOL_INPUT_FILTER` | `filter` | string | Optional regex to filter bash task output lines. By default, only matching lines are returned. When filter_exclude is true, matching lines are excluded instead. Non-matching lines are discarded and cannot be retrieved later. |", + "| `MUX_TOOL_INPUT_FILTER_EXCLUDE` | `filter_exclude` | boolean | When true, lines matching 'filter' are excluded instead of kept. Requires 'filter' to be set. |", + "| `MUX_TOOL_INPUT_MIN_COMPLETED` | `min_completed` | number | Number of awaited tasks that must complete before this call returns. Defaults to 1, so by default task_await returns as soon as the FIRST awaited task completes, letting you act on it while the rest keep running. The result still includes every task complete at that moment plus current status (running/queued) for the rest. Tasks that have not yet completed keep running and remain re-awaitable on a later task_await call. Raise this (e.g. set it to the total number of awaited tasks) when you genuinely need more before proceeding — for example best-of-N synthesis that must compare every candidate. Clamped to the number of awaited tasks; values above that behave like 'wait for all'. |", + "| `MUX_TOOL_INPUT_TASK_IDS_` | `task_ids[]` | string | List of task IDs to await — use only real IDs returned by prior task, bash, or task_list tool results; never fabricate an ID. When omitted, waits for all active descendant tasks of the current workspace. |", + "| `MUX_TOOL_INPUT_TASK_IDS_COUNT` | `task_ids.length` | number | Number of elements in task_ids (List of task IDs to await — use only real IDs returned by prior task, bash, or task_list tool results; never fabricate an ID. When omitted, waits for all active descendant tasks of the current workspace.) |", + "| `MUX_TOOL_INPUT_TIMEOUT_SECS` | `timeout_secs` | number | Maximum time to wait in seconds for each task. For bash tasks, this waits for NEW output (or process exit). If exceeded, the result returns status=queued\\|running\\|awaiting_report (task is still active). Defaults to 600 seconds (10 minutes) if not specified. Set to 0 for a non-blocking status check. |", "", "
", "", diff --git a/src/node/services/systemMessage.ts b/src/node/services/systemMessage.ts index ff7710b213..b9fc94db24 100644 --- a/src/node/services/systemMessage.ts +++ b/src/node/services/systemMessage.ts @@ -86,6 +86,7 @@ When the user asks for "best of n" work, assume they want the \`task\` tool's \` Before spawning the batch, do a small amount of preliminary analysis to capture shared context, constraints, or evaluation criteria that would otherwise be repeated by every child. Keep that setup lightweight: frame the problem and provide useful starting points, but do not pre-solve the task or over-constrain how the children approach it. Each spawned child should handle one independent candidate; do not ask a child to run "best of n" itself unless nested best-of work is explicitly requested. +Picking the best candidate requires every report, so await the full batch (pass \`task_await\` \`min_completed\` equal to the batch size, or use a foreground grouped spawn) before selecting — but you may start setup-only work (e.g. preparing the evaluation rubric or integration scaffolding) as soon as the first candidate lands. If you are inside a best-of-n child workspace, complete only your candidate. @@ -93,6 +94,7 @@ If you are inside a best-of-n child workspace, complete only your candidate. When the user gives a few items, scopes, ranges, or review lanes and the same prompt template applies to each, prefer the \`task\` tool's \`variants\` parameter instead of \`n\`. Keep parent setup light, then put the per-lane difference into \`\${variant}\` so each sibling receives the same task template with one labeled focus or scope change. Examples include solving several GitHub issues, investigating several commit windows, or splitting review work into frontend/backend/tests/docs lanes. +Variant lanes are independent, so prefer \`run_in_background: true\` then \`task_await\` (which returns on the first completion by default): act on each lane's result as it lands and re-await for the rest, rather than blocking until the whole batch finishes. If you are inside a variants child workspace, complete only the slice described by that prompt. diff --git a/src/node/services/tools/task_await.test.ts b/src/node/services/tools/task_await.test.ts index 1df96ede31..2cd9c8aac2 100644 --- a/src/node/services/tools/task_await.test.ts +++ b/src/node/services/tools/task_await.test.ts @@ -713,4 +713,268 @@ describe("task_await tool", () => { }) ); }); + + it("returns after the first completion by default, leaving the rest running", async () => { + using tempDir = new TestTempDir("test-task-await-tool-min-completed-default"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "parent-workspace" }); + + let t1Signal: AbortSignal | undefined; + let t2Signal: AbortSignal | undefined; + const waitForAgentReport = mock((taskId: string, opts: { abortSignal?: AbortSignal }) => { + if (taskId === "t1") { + t1Signal = opts.abortSignal; + return Promise.resolve({ reportMarkdown: "report:t1", title: "title:t1" }); + } + // t2 stays pending until its per-task signal is aborted (the early-stop detach), mirroring + // how the real waitForAgentReport rejects with "Interrupted" when its waiter is removed. + t2Signal = opts.abortSignal; + return new Promise((_resolve, reject) => { + opts.abortSignal?.addEventListener("abort", () => reject(new Error("Interrupted")), { + once: true, + }); + }); + }); + + const taskService = { + listActiveDescendantAgentTaskIds: mock(() => ["t1", "t2"]), + isDescendantAgentTask: mock(() => Promise.resolve(true)), + getAgentTaskStatus: mock(() => "running" as const), + waitForAgentReport, + } as unknown as TaskService; + + const tool = createTaskAwaitTool({ ...baseConfig, taskService }); + + const result: unknown = await Promise.resolve( + tool.execute!({ task_ids: ["t1", "t2"] }, mockToolCallOptions) + ); + + expect(result).toEqual({ + results: [ + { status: "completed", taskId: "t1", reportMarkdown: "report:t1", title: "title:t1" }, + { status: "running", taskId: "t2" }, + ], + }); + // The loser's wait is detached (so TaskService can drop its waiter) without terminating it, + // while the winner's wait is left untouched. + expect(t2Signal?.aborted).toBe(true); + expect(t1Signal?.aborted).toBe(false); + }); + + it("waits for every task when min_completed equals the batch size", async () => { + using tempDir = new TestTempDir("test-task-await-tool-min-completed-total"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "parent-workspace" }); + + const waitForAgentReport = mock((taskId: string) => { + if (taskId === "t1") { + return Promise.resolve({ reportMarkdown: "report:t1", title: "title:t1" }); + } + // t2 finishes on a later macrotask; min_completed=2 must keep waiting for it rather than + // returning early after t1. + return new Promise((resolve) => + setTimeout(() => resolve({ reportMarkdown: "report:t2", title: "title:t2" }), 5) + ); + }); + + const taskService = { + listActiveDescendantAgentTaskIds: mock(() => ["t1", "t2"]), + isDescendantAgentTask: mock(() => Promise.resolve(true)), + waitForAgentReport, + } as unknown as TaskService; + + const tool = createTaskAwaitTool({ ...baseConfig, taskService }); + + const result: unknown = await Promise.resolve( + tool.execute!({ task_ids: ["t1", "t2"], min_completed: 2 }, mockToolCallOptions) + ); + + expect(result).toEqual({ + results: [ + { status: "completed", taskId: "t1", reportMarkdown: "report:t1", title: "title:t1" }, + { status: "completed", taskId: "t2", reportMarkdown: "report:t2", title: "title:t2" }, + ], + }); + }); + + it("returns after the k-th completion when min_completed=k", async () => { + using tempDir = new TestTempDir("test-task-await-tool-min-completed-k"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "parent-workspace" }); + + const waitForAgentReport = mock((taskId: string, opts: { abortSignal?: AbortSignal }) => { + if (taskId === "t1" || taskId === "t2") { + return Promise.resolve({ reportMarkdown: `report:${taskId}`, title: `title:${taskId}` }); + } + return new Promise((_resolve, reject) => { + opts.abortSignal?.addEventListener("abort", () => reject(new Error("Interrupted")), { + once: true, + }); + }); + }); + + const taskService = { + listActiveDescendantAgentTaskIds: mock(() => ["t1", "t2", "t3"]), + isDescendantAgentTask: mock(() => Promise.resolve(true)), + getAgentTaskStatus: mock(() => "running" as const), + waitForAgentReport, + } as unknown as TaskService; + + const tool = createTaskAwaitTool({ ...baseConfig, taskService }); + + const result: unknown = await Promise.resolve( + tool.execute!({ task_ids: ["t1", "t2", "t3"], min_completed: 2 }, mockToolCallOptions) + ); + + expect(result).toEqual({ + results: [ + { status: "completed", taskId: "t1", reportMarkdown: "report:t1", title: "title:t1" }, + { status: "completed", taskId: "t2", reportMarkdown: "report:t2", title: "title:t2" }, + { status: "running", taskId: "t3" }, + ], + }); + }); + + it("clamps min_completed above the awaited count to wait for all", async () => { + using tempDir = new TestTempDir("test-task-await-tool-min-completed-clamp"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "parent-workspace" }); + + const waitForAgentReport = mock((taskId: string) => + Promise.resolve({ reportMarkdown: `report:${taskId}`, title: `title:${taskId}` }) + ); + + const taskService = { + listActiveDescendantAgentTaskIds: mock(() => ["t1", "t2"]), + isDescendantAgentTask: mock(() => Promise.resolve(true)), + waitForAgentReport, + } as unknown as TaskService; + + const tool = createTaskAwaitTool({ ...baseConfig, taskService }); + + const result: unknown = await Promise.resolve( + tool.execute!({ task_ids: ["t1", "t2"], min_completed: 50 }, mockToolCallOptions) + ); + + expect(result).toEqual({ + results: [ + { status: "completed", taskId: "t1", reportMarkdown: "report:t1", title: "title:t1" }, + { status: "completed", taskId: "t2", reportMarkdown: "report:t2", title: "title:t2" }, + ], + }); + }); + + it("returns promptly when min_completed can no longer be reached", async () => { + using tempDir = new TestTempDir("test-task-await-tool-min-completed-unreachable"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "parent-workspace" }); + + const waitForAgentReport = mock((taskId: string) => { + if (taskId === "t1") { + return Promise.resolve({ reportMarkdown: "report:t1", title: "title:t1" }); + } + // t2 fails outright, so two completions are impossible — the call must still return once + // every task has settled rather than blocking forever. + return Promise.reject(new Error("Boom")); + }); + + const taskService = { + listActiveDescendantAgentTaskIds: mock(() => ["t1", "t2"]), + isDescendantAgentTask: mock(() => Promise.resolve(true)), + getAgentTaskStatus: mock(() => null), + waitForAgentReport, + } as unknown as TaskService; + + const tool = createTaskAwaitTool({ ...baseConfig, taskService }); + + const result: unknown = await Promise.resolve( + tool.execute!({ task_ids: ["t1", "t2"], min_completed: 2 }, mockToolCallOptions) + ); + + expect(result).toEqual({ + results: [ + { status: "completed", taskId: "t1", reportMarkdown: "report:t1", title: "title:t1" }, + { status: "error", taskId: "t2", error: "Boom" }, + ], + }); + }); + + it("keeps a previously-running task awaitable on a later call", async () => { + using tempDir = new TestTempDir("test-task-await-tool-min-completed-reawait"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "parent-workspace" }); + + let t2Ready = false; + const waitForAgentReport = mock((taskId: string, opts: { abortSignal?: AbortSignal }) => { + if (taskId === "t1") { + return Promise.resolve({ reportMarkdown: "report:t1", title: "title:t1" }); + } + if (t2Ready) { + // Simulates the cached report becoming available after the child finishes. + return Promise.resolve({ reportMarkdown: "report:t2", title: "title:t2" }); + } + return new Promise((_resolve, reject) => { + opts.abortSignal?.addEventListener("abort", () => reject(new Error("Interrupted")), { + once: true, + }); + }); + }); + + const taskService = { + listActiveDescendantAgentTaskIds: mock(() => ["t1", "t2"]), + isDescendantAgentTask: mock(() => Promise.resolve(true)), + getAgentTaskStatus: mock(() => "running" as const), + waitForAgentReport, + } as unknown as TaskService; + + const tool = createTaskAwaitTool({ ...baseConfig, taskService }); + + const firstResult: unknown = await Promise.resolve( + tool.execute!({ task_ids: ["t1", "t2"] }, mockToolCallOptions) + ); + expect(firstResult).toEqual({ + results: [ + { status: "completed", taskId: "t1", reportMarkdown: "report:t1", title: "title:t1" }, + { status: "running", taskId: "t2" }, + ], + }); + + t2Ready = true; + const secondResult: unknown = await Promise.resolve( + tool.execute!({ task_ids: ["t2"] }, mockToolCallOptions) + ); + expect(secondResult).toEqual({ + results: [ + { status: "completed", taskId: "t2", reportMarkdown: "report:t2", title: "title:t2" }, + ], + }); + }); + + it("treats timeout_secs=0 as non-blocking regardless of min_completed", async () => { + using tempDir = new TestTempDir("test-task-await-tool-min-completed-timeout-zero"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "parent-workspace" }); + + const waitForAgentReport = mock(() => { + throw new Error("waitForAgentReport should not be called for timeout_secs=0"); + }); + const getAgentTaskStatus = mock(() => "running" as const); + + const taskService = { + listActiveDescendantAgentTaskIds: mock(() => ["t1", "t2"]), + isDescendantAgentTask: mock(() => Promise.resolve(true)), + getAgentTaskStatus, + waitForAgentReport, + } as unknown as TaskService; + + const tool = createTaskAwaitTool({ ...baseConfig, taskService }); + + const result: unknown = await Promise.resolve( + tool.execute!( + { task_ids: ["t1", "t2"], timeout_secs: 0, min_completed: 5 }, + mockToolCallOptions + ) + ); + + expect(result).toEqual({ + results: [ + { status: "running", taskId: "t1" }, + { status: "running", taskId: "t2" }, + ], + }); + expect(waitForAgentReport).toHaveBeenCalledTimes(0); + }); }); diff --git a/src/node/services/tools/task_await.ts b/src/node/services/tools/task_await.ts index 5d05b28d69..36f28b9cd8 100644 --- a/src/node/services/tools/task_await.ts +++ b/src/node/services/tools/task_await.ts @@ -167,134 +167,113 @@ export const createTaskAwaitTool: ToolFactory = (config: ToolConfiguration) => { ? taskService.getAgentTaskStatuses(rejectedAgentTaskIds) : new Map(); - const results = await Promise.all( - uniqueTaskIds.map(async (taskId) => { - const maybeProcessId = fromBashTaskId(taskId); - if (taskId.startsWith("bash:") && !maybeProcessId) { - return { status: "error" as const, taskId, error: "Invalid bash taskId." }; - } - - if (maybeProcessId) { - if (!config.backgroundProcessManager) { - return { - status: "error" as const, - taskId, - error: "Background process manager not available", - }; - } + // task_await resolves once `min_completed` tasks have completed (default 1 = return on the + // first completion) rather than always blocking on every awaited task. Each task gets its + // own AbortController chained to the tool-call signal so that, once we have enough + // completions, we can detach the still-pending waiters/reads without terminating those + // children — they keep running and remain re-awaitable later (reports stay cached in + // TaskService and the child's bash poll is merely interrupted, not killed). + const awaitOne = async (taskId: string, taskSignal: AbortSignal) => { + const maybeProcessId = fromBashTaskId(taskId); + if (taskId.startsWith("bash:") && !maybeProcessId) { + return { status: "error" as const, taskId, error: "Invalid bash taskId." }; + } - const proc = await config.backgroundProcessManager.getProcess(maybeProcessId); - if (!proc) { - return { status: "not_found" as const, taskId }; - } + if (maybeProcessId) { + if (!config.backgroundProcessManager) { + return { + status: "error" as const, + taskId, + error: "Background process manager not available", + }; + } - const inScope = - proc.workspaceId === workspaceId || - (await taskService.isDescendantAgentTask(workspaceId, proc.workspaceId)); - if (!inScope) { - return { status: "invalid_scope" as const, taskId }; - } + const proc = await config.backgroundProcessManager.getProcess(maybeProcessId); + if (!proc) { + return { status: "not_found" as const, taskId }; + } - const outputResult = await config.backgroundProcessManager.getOutput( - maybeProcessId, - args.filter ?? undefined, - args.filter_exclude ?? undefined, - timeoutSecsForBash, - abortSignal, - workspaceId, - "task_await" - ); - - if (!outputResult.success) { - return { status: "error" as const, taskId, error: outputResult.error }; - } + const inScope = + proc.workspaceId === workspaceId || + (await taskService.isDescendantAgentTask(workspaceId, proc.workspaceId)); + if (!inScope) { + return { status: "invalid_scope" as const, taskId }; + } - if (outputResult.status === "running" || outputResult.status === "interrupted") { - return { - status: "running" as const, - taskId, - output: outputResult.output, - elapsed_ms: outputResult.elapsed_ms, - note: outputResult.note, - }; - } + const outputResult = await config.backgroundProcessManager.getOutput( + maybeProcessId, + args.filter ?? undefined, + args.filter_exclude ?? undefined, + timeoutSecsForBash, + taskSignal, + workspaceId, + "task_await" + ); + + if (!outputResult.success) { + return { status: "error" as const, taskId, error: outputResult.error }; + } + if (outputResult.status === "running" || outputResult.status === "interrupted") { return { - status: "completed" as const, + status: "running" as const, taskId, - title: proc.displayName ?? proc.id, - reportMarkdown: formatBashOutputReport({ - processId: proc.id, - status: outputResult.status, - exitCode: outputResult.exitCode, - output: outputResult.output, - }), + output: outputResult.output, elapsed_ms: outputResult.elapsed_ms, - exitCode: outputResult.exitCode, note: outputResult.note, }; } - if (!descendantAgentTaskIdSet.has(taskId)) { - const lookup = rejectedAgentTaskStatuses.get(taskId); - const activeTaskIds = - activeDescendantAgentTaskIds.length > 0 ? activeDescendantAgentTaskIds : undefined; - if (requestedIds) { - const suggestedTaskIds = dedupeStrings([ - ...activeDescendantAgentTaskIds, - ...(await getSuggestionBashTaskIds()), - ]); - if (suggestedTaskIds.length > 0) { - return buildTaskAwaitSequencingError(taskId, suggestedTaskIds); - } - } - if (!lookup?.exists) { - return { status: "not_found" as const, taskId, activeTaskIds }; - } - return { status: "invalid_scope" as const, taskId, activeTaskIds }; - } + return { + status: "completed" as const, + taskId, + title: proc.displayName ?? proc.id, + reportMarkdown: formatBashOutputReport({ + processId: proc.id, + status: outputResult.status, + exitCode: outputResult.exitCode, + output: outputResult.output, + }), + elapsed_ms: outputResult.elapsed_ms, + exitCode: outputResult.exitCode, + note: outputResult.note, + }; + } - // When timeout_secs=0 (or rounds down to 0ms), task_await should be non-blocking. - // `waitForAgentReport` asserts timeoutMs > 0, so handle 0 explicitly by returning the - // current task status instead of awaiting. - if (timeoutMs === 0) { - const status = taskService.getAgentTaskStatus(taskId); - if (isAgentTaskActiveStatus(status)) { - return { status, taskId, ...getAgentTaskElapsedField(taskId) }; + if (!descendantAgentTaskIdSet.has(taskId)) { + const lookup = rejectedAgentTaskStatuses.get(taskId); + const activeTaskIds = + activeDescendantAgentTaskIds.length > 0 ? activeDescendantAgentTaskIds : undefined; + if (requestedIds) { + const suggestedTaskIds = dedupeStrings([ + ...activeDescendantAgentTaskIds, + ...(await getSuggestionBashTaskIds()), + ]); + if (suggestedTaskIds.length > 0) { + return buildTaskAwaitSequencingError(taskId, suggestedTaskIds); } + } + if (!lookup?.exists) { + return { status: "not_found" as const, taskId, activeTaskIds }; + } + return { status: "invalid_scope" as const, taskId, activeTaskIds }; + } - // Best-effort: the task might already have a cached report (even if its workspace was - // cleaned up). Avoid blocking when it isn't available. - try { - const report = await taskService.waitForAgentReport(taskId, { - timeoutMs: 1, - abortSignal, - requestingWorkspaceId: workspaceId, - backgroundOnMessageQueued: true, - }); - - const gitFormatPatch = await readGitFormatPatchArtifact(taskId); - return { - status: "completed" as const, - taskId, - reportMarkdown: report.reportMarkdown, - title: report.title, - ...getAgentTaskElapsedField(taskId), - ...(gitFormatPatch ? { artifacts: { gitFormatPatch } } : {}), - }; - } catch (error: unknown) { - const message = getErrorMessage(error); - if (/not found/i.test(message)) { - return { status: "not_found" as const, taskId }; - } - return { status: "error" as const, taskId, error: message }; - } + // When timeout_secs=0 (or rounds down to 0ms), task_await should be non-blocking. + // `waitForAgentReport` asserts timeoutMs > 0, so handle 0 explicitly by returning the + // current task status instead of awaiting. + if (timeoutMs === 0) { + const status = taskService.getAgentTaskStatus(taskId); + if (isAgentTaskActiveStatus(status)) { + return { status, taskId, ...getAgentTaskElapsedField(taskId) }; } + // Best-effort: the task might already have a cached report (even if its workspace was + // cleaned up). Avoid blocking when it isn't available. try { const report = await taskService.waitForAgentReport(taskId, { - timeoutMs, - abortSignal, + timeoutMs: 1, + abortSignal: taskSignal, requestingWorkspaceId: workspaceId, backgroundOnMessageQueued: true, }); @@ -309,45 +288,156 @@ export const createTaskAwaitTool: ToolFactory = (config: ToolConfiguration) => { ...(gitFormatPatch ? { artifacts: { gitFormatPatch } } : {}), }; } catch (error: unknown) { - if (error instanceof ForegroundWaitBackgroundedError) { - const currentStatus = taskService.getAgentTaskStatus(taskId); - const normalizedStatus = isAgentTaskActiveStatus(currentStatus) - ? currentStatus - : ("running" as const); - return { - status: normalizedStatus, - taskId, - ...getAgentTaskElapsedField(taskId), - note: "Task sent to background because a new message was queued. Use task_await to monitor progress.", - }; - } - - if (abortSignal?.aborted) { - return { status: "error" as const, taskId, error: "Interrupted" }; - } - const message = getErrorMessage(error); if (/not found/i.test(message)) { return { status: "not_found" as const, taskId }; } - if (/timed out/i.test(message)) { - const status = taskService.getAgentTaskStatus(taskId); - if (isAgentTaskActiveStatus(status)) { - return { status, taskId, ...getAgentTaskElapsedField(taskId) }; - } - if (!status) { - return { status: "not_found" as const, taskId }; - } - return { - status: "error" as const, - taskId, - error: `Task status is '${status}' (not awaitable via task_await).`, - }; - } return { status: "error" as const, taskId, error: message }; } - }) - ); + } + + try { + const report = await taskService.waitForAgentReport(taskId, { + timeoutMs, + abortSignal: taskSignal, + requestingWorkspaceId: workspaceId, + backgroundOnMessageQueued: true, + }); + + const gitFormatPatch = await readGitFormatPatchArtifact(taskId); + return { + status: "completed" as const, + taskId, + reportMarkdown: report.reportMarkdown, + title: report.title, + ...getAgentTaskElapsedField(taskId), + ...(gitFormatPatch ? { artifacts: { gitFormatPatch } } : {}), + }; + } catch (error: unknown) { + if (error instanceof ForegroundWaitBackgroundedError) { + const currentStatus = taskService.getAgentTaskStatus(taskId); + const normalizedStatus = isAgentTaskActiveStatus(currentStatus) + ? currentStatus + : ("running" as const); + return { + status: normalizedStatus, + taskId, + ...getAgentTaskElapsedField(taskId), + note: "Task sent to background because a new message was queued. Use task_await to monitor progress.", + }; + } + + if (abortSignal?.aborted) { + return { status: "error" as const, taskId, error: "Interrupted" }; + } + + // Intentional early-stop: this task's per-task signal was aborted because + // `min_completed` was already satisfied by other tasks (the outer tool-call signal is + // not aborted). The child keeps running and its report stays re-awaitable on a later + // task_await call, so report a live status snapshot instead of an error. + if (taskSignal.aborted) { + const status = taskService.getAgentTaskStatus(taskId); + const normalizedStatus = isAgentTaskActiveStatus(status) + ? status + : ("running" as const); + return { status: normalizedStatus, taskId, ...getAgentTaskElapsedField(taskId) }; + } + + const message = getErrorMessage(error); + if (/not found/i.test(message)) { + return { status: "not_found" as const, taskId }; + } + if (/timed out/i.test(message)) { + const status = taskService.getAgentTaskStatus(taskId); + if (isAgentTaskActiveStatus(status)) { + return { status, taskId, ...getAgentTaskElapsedField(taskId) }; + } + if (!status) { + return { status: "not_found" as const, taskId }; + } + return { + status: "error" as const, + taskId, + error: `Task status is '${status}' (not awaitable via task_await).`, + }; + } + return { status: "error" as const, taskId, error: message }; + } + }; + + const requestedMinCompleted = typeof args.min_completed === "number" ? args.min_completed : 1; + // Clamp to [1, number of awaited tasks]; values above the count behave like "wait for all". + // timeout_secs=0 is an explicit non-blocking snapshot of every task, so never early-return + // there — wait for all per-task results (which all resolve immediately) instead. + const wantCount = + timeoutMs === 0 + ? Math.max(uniqueTaskIds.length, 1) + : Math.min(Math.max(requestedMinCompleted, 1), Math.max(uniqueTaskIds.length, 1)); + + const taskControllers = new Map(); + for (const taskId of uniqueTaskIds) { + const controller = new AbortController(); + // Propagate a real tool-call interrupt to every per-task wait. + if (abortSignal) { + if (abortSignal.aborted) { + controller.abort(); + } else { + abortSignal.addEventListener("abort", () => controller.abort(), { once: true }); + } + } + taskControllers.set(taskId, controller); + } + + const resultsByTaskId = new Map>>(); + let completedCount = 0; + const taskPromises = uniqueTaskIds.map((taskId) => { + const promise = awaitOne(taskId, taskControllers.get(taskId)!.signal); + // Record results as they settle so we can both count completions and assemble the final + // array. Registered before the gate listener below, so recording always runs first for a + // given promise. + void promise.then((res) => { + resultsByTaskId.set(taskId, res); + if (res.status === "completed") { + completedCount += 1; + } + }); + return promise; + }); + + // Resolve once `wantCount` tasks have completed, or every awaited task has otherwise settled + // (failed/interrupted/timed out) — so an unreachable threshold still returns promptly. + await new Promise((resolveGate) => { + if (uniqueTaskIds.length === 0) { + resolveGate(); + return; + } + let gateResolved = false; + const checkGate = () => { + if (gateResolved) return; + if (completedCount >= wantCount || resultsByTaskId.size >= uniqueTaskIds.length) { + gateResolved = true; + resolveGate(); + } + }; + for (const promise of taskPromises) { + void promise.then(checkGate, checkGate); + } + }); + + // Detach the still-pending waiters/reads for tasks we are not returning as completed. + // Aborting only removes the in-memory waiter (and interrupts a bash poll); the child keeps + // running and its report stays cached for a later task_await call. + for (const [taskId, controller] of taskControllers) { + if (!resultsByTaskId.has(taskId)) { + controller.abort(); + } + } + + // Aborted waits resolve to live-status snapshots (agent) or running output (bash); wait for + // those to land so every awaited task has a result before assembling the ordered array. + await Promise.all(taskPromises); + + const results = uniqueTaskIds.map((taskId) => resultsByTaskId.get(taskId)!); return parseToolResult(TaskAwaitToolResultSchema, { results }, "task_await"); }, From 7a5f03860184c51dbff6e4587b605f11694e8f2a Mon Sep 17 00:00:00 2001 From: Ammar Date: Thu, 28 May 2026 15:38:24 -0500 Subject: [PATCH 2/3] fix: preserve buffered bash output when a read is interrupted task_await's early-return aborts still-pending bash getOutput reads once min_completed is satisfied. getOutput's interrupted path advanced outputBytesRead without flushing accumulatedRaw, so an aborted read could drop an unread line fragment and the next read would resume past it. Save the consumed bytes back into incompleteLineBuffer before returning. Adds a deterministic regression test. --- .../services/backgroundProcessManager.test.ts | 37 +++++++++++++++++++ src/node/services/backgroundProcessManager.ts | 6 +++ 2 files changed, 43 insertions(+) diff --git a/src/node/services/backgroundProcessManager.test.ts b/src/node/services/backgroundProcessManager.test.ts index c917520112..b8cb34cab6 100644 --- a/src/node/services/backgroundProcessManager.test.ts +++ b/src/node/services/backgroundProcessManager.test.ts @@ -491,6 +491,43 @@ describe("BackgroundProcessManager", () => { expect(output2.output).not.toContain("line 1"); }); + it("preserves buffered output when a read is interrupted before a newline arrives", async () => { + // Regression: aborting a pending getOutput (e.g. task_await returning early once + // min_completed is satisfied) must not drop bytes already consumed from the log. We append + // to the log directly so the scenario is deterministic and not dependent on stdio flushing. + const result = await manager.spawn(runtime, testWorkspaceId, "sleep 60", { + cwd: process.cwd(), + displayName: "test", + }); + expect(result.success).toBe(true); + if (!result.success) return; + + const outputPath = path.join(result.outputDir, "output.log"); + // A line fragment with no trailing newline yields no "meaningful" (complete) line, so the + // read falls through to the interrupt check after consuming (and advancing past) the bytes. + await fs.appendFile(outputPath, "partial", "utf-8"); + + const controller = new AbortController(); + controller.abort(); + const interrupted = await manager.getOutput( + result.processId, + undefined, + undefined, + 2, + controller.signal + ); + expect(interrupted.success).toBe(true); + if (!interrupted.success) return; + expect(interrupted.status).toBe("interrupted"); + + // Complete the line; the next read must include the previously-consumed fragment. + await fs.appendFile(outputPath, "done\n", "utf-8"); + const resumed = await manager.getOutput(result.processId, undefined, undefined, 1); + expect(resumed.success).toBe(true); + if (!resumed.success) return; + expect(resumed.output).toContain("partialdone"); + }); + it("should return stderr from a running process", async () => { const result = await manager.spawn(runtime, testWorkspaceId, "echo 'error message' >&2", { cwd: process.cwd(), diff --git a/src/node/services/backgroundProcessManager.ts b/src/node/services/backgroundProcessManager.ts index fdf4c5556c..f265d887b0 100644 --- a/src/node/services/backgroundProcessManager.ts +++ b/src/node/services/backgroundProcessManager.ts @@ -625,6 +625,12 @@ export class BackgroundProcessManager extends EventEmitter Date: Thu, 28 May 2026 15:46:25 -0500 Subject: [PATCH 3/3] fix: convert stray task_await waiter rejections into error results If an awaitOne path rejected outside its internal catches (e.g. a bash getProcess/getOutput read throwing), the recording callback never inserted a result, so the min_completed gate could never reach the threshold or all-settled and would stall. Wrap each per-task promise so a stray rejection becomes a recorded error result; the task counts as settled and the call returns promptly. Adds a regression test. --- src/node/services/tools/task_await.test.ts | 34 ++++++++++++++++++++++ src/node/services/tools/task_await.ts | 12 +++++++- 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/src/node/services/tools/task_await.test.ts b/src/node/services/tools/task_await.test.ts index 2cd9c8aac2..c94a8d3b36 100644 --- a/src/node/services/tools/task_await.test.ts +++ b/src/node/services/tools/task_await.test.ts @@ -977,4 +977,38 @@ describe("task_await tool", () => { }); expect(waitForAgentReport).toHaveBeenCalledTimes(0); }); + + it("surfaces a waiter that rejects outside its internal catches without stalling", async () => { + using tempDir = new TestTempDir("test-task-await-tool-min-completed-reject"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "parent-workspace" }); + + const waitForAgentReport = mock(() => + Promise.resolve({ reportMarkdown: "report:t1", title: "title:t1" }) + ); + // A bash read whose getProcess rejects escapes awaitOne's per-path catches; the call must + // still settle that task as an error so min_completed=2 can fall back to "all settled". + const backgroundProcessManager = { + list: mock(() => Promise.resolve([])), + getProcess: mock(() => Promise.reject(new Error("proc boom"))), + } as unknown as BackgroundProcessManager; + + const taskService = { + listActiveDescendantAgentTaskIds: mock(() => ["t1"]), + isDescendantAgentTask: mock(() => Promise.resolve(true)), + waitForAgentReport, + } as unknown as TaskService; + + const tool = createTaskAwaitTool({ ...baseConfig, backgroundProcessManager, taskService }); + + const result: unknown = await Promise.resolve( + tool.execute!({ task_ids: ["t1", "bash:p1"], min_completed: 2 }, mockToolCallOptions) + ); + + expect(result).toEqual({ + results: [ + { status: "completed", taskId: "t1", reportMarkdown: "report:t1", title: "title:t1" }, + { status: "error", taskId: "bash:p1", error: "proc boom" }, + ], + }); + }); }); diff --git a/src/node/services/tools/task_await.ts b/src/node/services/tools/task_await.ts index 36f28b9cd8..5715f24c43 100644 --- a/src/node/services/tools/task_await.ts +++ b/src/node/services/tools/task_await.ts @@ -391,7 +391,17 @@ export const createTaskAwaitTool: ToolFactory = (config: ToolConfiguration) => { const resultsByTaskId = new Map>>(); let completedCount = 0; const taskPromises = uniqueTaskIds.map((taskId) => { - const promise = awaitOne(taskId, taskControllers.get(taskId)!.signal); + // awaitOne resolves to a result object for every documented path, but a few calls (e.g. the + // bash getProcess/getOutput reads) run outside its internal try/catch and could reject. + // Convert any stray rejection into an `error` result so the task still counts as settled — + // otherwise the gate below could never reach `wantCount` or "all settled" and would stall. + const promise = awaitOne(taskId, taskControllers.get(taskId)!.signal).catch( + (error: unknown): Awaited> => ({ + status: "error", + taskId, + error: getErrorMessage(error), + }) + ); // Record results as they settle so we can both count completions and assemble the final // array. Registered before the gate listener below, so recording always runs first for a // given promise.