diff --git a/.github/workflows/approve-contributor.yml b/.github/workflows/approve-contributor.yml index bdd54e0260..6110c88c71 100644 --- a/.github/workflows/approve-contributor.yml +++ b/.github/workflows/approve-contributor.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Open allowlist update PR - uses: actions/github-script@v7 + uses: actions/github-script@v9 with: script: | const comment = context.payload.comment; diff --git a/.github/workflows/auto-close-harvested.yml b/.github/workflows/auto-close-harvested.yml index 1547ab9615..0fd6c82b36 100644 --- a/.github/workflows/auto-close-harvested.yml +++ b/.github/workflows/auto-close-harvested.yml @@ -38,7 +38,7 @@ jobs: close: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v7 with: # We need at least the commits that this push introduced. # fetch-depth: 0 is the simplest correct option; the diff --git a/.github/workflows/auto-tag.yml b/.github/workflows/auto-tag.yml index 80ede91e14..9fddbba9d7 100644 --- a/.github/workflows/auto-tag.yml +++ b/.github/workflows/auto-tag.yml @@ -28,7 +28,7 @@ jobs: tag: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v7 with: fetch-depth: 0 # Prefer PAT so the resulting tag push triggers release.yml. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 701eb32e10..a42c18dfc6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,9 +20,9 @@ jobs: name: Version drift runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v7 - uses: dtolnay/rust-toolchain@stable - - uses: actions/setup-node@v4 + - uses: actions/setup-node@v6 with: node-version: 20 - name: Check version drift @@ -34,7 +34,7 @@ jobs: name: Lint runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v7 with: fetch-depth: 0 - uses: dtolnay/rust-toolchain@master @@ -89,7 +89,7 @@ jobs: # coverage CNB cannot provide. os: [ubuntu-latest, macos-latest, windows-latest] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v7 if: runner.os != 'Linux' - uses: dtolnay/rust-toolchain@stable if: runner.os != 'Linux' @@ -118,11 +118,11 @@ jobs: matrix: os: ${{ fromJSON(github.event_name == 'pull_request' && '["ubuntu-latest"]' || '["ubuntu-latest","macos-latest","windows-latest"]') }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v7 if: runner.os != 'Linux' - uses: dtolnay/rust-toolchain@stable if: runner.os != 'Linux' - - uses: actions/setup-node@v4 + - uses: actions/setup-node@v6 if: runner.os != 'Linux' with: node-version: 20 @@ -145,7 +145,7 @@ jobs: if: github.event_name != 'schedule' runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v7 - uses: dtolnay/rust-toolchain@stable - name: Install Linux system dependencies run: | @@ -167,7 +167,7 @@ jobs: if: github.event_name == 'schedule' runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v7 - uses: dtolnay/rust-toolchain@stable - name: Install Linux system dependencies if: runner.os == 'Linux' diff --git a/.github/workflows/issue-gate.yml b/.github/workflows/issue-gate.yml index 8ca8c40110..f17d497c7b 100644 --- a/.github/workflows/issue-gate.yml +++ b/.github/workflows/issue-gate.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Welcome new external issue reporters - uses: actions/github-script@v7 + uses: actions/github-script@v9 with: script: | const issue = context.payload.issue; diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 035193ef1b..eeba3df758 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -76,7 +76,7 @@ jobs: artifact_name: codewhale-tui-windows-x64.exe runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v7 - uses: dtolnay/rust-toolchain@stable with: targets: ${{ matrix.target }} @@ -141,7 +141,7 @@ jobs: artifact=${{ matrix.artifact_name }} INFO echo "name=${{ matrix.artifact_name }}-${short_sha}" >> "${GITHUB_OUTPUT}" - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v7 with: name: ${{ steps.stage.outputs.name }} path: nightly/* diff --git a/.github/workflows/pr-gate.yml b/.github/workflows/pr-gate.yml index a953b3f65b..7ace3d1af8 100644 --- a/.github/workflows/pr-gate.yml +++ b/.github/workflows/pr-gate.yml @@ -19,7 +19,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Gate unapproved external pull requests - uses: actions/github-script@v7 + uses: actions/github-script@v9 with: script: | const pr = context.payload.pull_request; diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 916269434b..8280a0336e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -22,7 +22,7 @@ jobs: if: github.event_name == 'push' runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v7 - uses: dtolnay/rust-toolchain@master with: toolchain: '1.88' @@ -69,7 +69,7 @@ jobs: source_ref: ${{ steps.release.outputs.source_ref }} sha: ${{ steps.release.outputs.sha }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v7 with: fetch-depth: 0 - name: Resolve release source @@ -166,7 +166,7 @@ jobs: artifact_name: codewhale-tui-windows-x64.exe runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v7 with: ref: ${{ needs.resolve.outputs.source_ref }} - uses: dtolnay/rust-toolchain@master @@ -251,7 +251,7 @@ jobs: exit 1 fi cp "${BIN_PATH}" "${{ matrix.artifact_name }}" - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v7 with: name: ${{ matrix.artifact_name }} path: ${{ matrix.artifact_name }} @@ -261,7 +261,7 @@ jobs: if: ${{ !cancelled() && needs.build.result == 'success' }} runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v7 with: ref: ${{ needs.resolve.outputs.source_ref }} - uses: actions/download-artifact@v8 @@ -351,7 +351,7 @@ jobs: cat "$MANIFEST" - name: Upload bundle artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: codewhale-bundles path: bundles/* @@ -362,7 +362,7 @@ jobs: if: ${{ !cancelled() && needs.build.result == 'success' }} runs-on: windows-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v7 with: ref: ${{ needs.resolve.outputs.source_ref }} - uses: actions/download-artifact@v8 @@ -393,7 +393,7 @@ jobs: throw "CodeWhaleSetup.exe was not produced" } - name: Upload installer artifact - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: CodeWhaleSetup.exe path: scripts/installer/CodeWhaleSetup.exe @@ -408,12 +408,12 @@ jobs: packages: write steps: - name: Checkout release source - uses: actions/checkout@v4 + uses: actions/checkout@v7 with: ref: ${{ needs.resolve.outputs.source_ref }} path: source - name: Checkout release infrastructure - uses: actions/checkout@v4 + uses: actions/checkout@v7 with: path: infra - name: Set up QEMU @@ -432,7 +432,7 @@ jobs: run: echo "name=ghcr.io/${GITHUB_REPOSITORY,,}" >> "$GITHUB_OUTPUT" - name: Extract metadata id: meta - uses: docker/metadata-action@v5 + uses: docker/metadata-action@v6 with: images: | ${{ steps.image.outputs.name }} @@ -476,7 +476,7 @@ jobs: steps: # Checked out into a subdirectory so it cannot clobber the downloaded # artifacts; used for the release-body generator and the CHANGELOG. - - uses: actions/checkout@v4 + - uses: actions/checkout@v7 with: ref: ${{ needs.resolve.outputs.tag }} path: repo @@ -547,7 +547,7 @@ jobs: fi # Checkout main (not the tag) so the release-infra script is always # available, even for tags created before this workflow was added. - - uses: actions/checkout@v4 + - uses: actions/checkout@v7 if: steps.homebrew-token.outputs.available == 'true' with: ref: main diff --git a/.github/workflows/spam-lockdown.yml b/.github/workflows/spam-lockdown.yml index 17f11bf0db..1142c01486 100644 --- a/.github/workflows/spam-lockdown.yml +++ b/.github/workflows/spam-lockdown.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Auto-close spam patterns from new accounts - uses: actions/github-script@v7 + uses: actions/github-script@v9 with: script: | const issue = context.payload.issue; diff --git a/.github/workflows/sync-cnb.yml b/.github/workflows/sync-cnb.yml index 33c7cfe1d3..034bc3ef68 100644 --- a/.github/workflows/sync-cnb.yml +++ b/.github/workflows/sync-cnb.yml @@ -49,7 +49,7 @@ jobs: sync: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v7 with: fetch-depth: 0 diff --git a/.github/workflows/triage.yml b/.github/workflows/triage.yml index 4c7ad25b5c..3b47b2576f 100644 --- a/.github/workflows/triage.yml +++ b/.github/workflows/triage.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Auto-label by title and body - uses: actions/github-script@v7 + uses: actions/github-script@v9 with: script: | const issue = context.payload.issue; diff --git a/.github/workflows/web.yml b/.github/workflows/web.yml index a5f0e2d8bb..84fc5ad6ed 100644 --- a/.github/workflows/web.yml +++ b/.github/workflows/web.yml @@ -24,14 +24,19 @@ jobs: run: working-directory: web steps: - - uses: actions/checkout@v4 - - uses: actions/setup-node@v4 + - uses: actions/checkout@v7 + - uses: actions/setup-node@v6 with: node-version: 22 cache: 'npm' cache-dependency-path: web/package-lock.json - name: Install dependencies run: npm ci + - name: Generate derived facts + # facts.generated.ts is gitignored and produced by derive-facts.mjs; + # tsc --noEmit fails without it (TS2307) and downstream inferences + # cascade into spurious TS7006 errors, so regenerate before type check. + run: npm run prebuild - name: Run ESLint run: npm run lint - name: TypeScript type check @@ -49,8 +54,8 @@ jobs: CLOUDFLARE_ACCOUNT_ID: ${{ vars.CLOUDFLARE_ACCOUNT_ID }} CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} steps: - - uses: actions/checkout@v4 - - uses: actions/setup-node@v4 + - uses: actions/checkout@v7 + - uses: actions/setup-node@v6 with: node-version: 22 cache: 'npm' diff --git a/AGENTS.md b/AGENTS.md index cfb291de7d..edf71e9072 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -6,20 +6,21 @@ **not** hard-code a device-specific checkout path here — work in whichever local checkout you have and always **confirm with `git branch --show-current` before editing.** -- **Active branch:** `hunter/0.8.62-glm-subagents` (also at - `origin/hunter/0.8.62-glm-subagents`). 0.8.61 has shipped; all new work lands - here. -- **Workspace version is intentionally still `0.8.61`** in `Cargo.toml` — the - bump to `0.8.62` is deferred until the GLM-5.2 routing is smoke-tested end to - end against live Z.ai + OpenRouter (see CHANGELOG `## [Unreleased]`). Do not - bump it opportunistically. -- **Milestone guidepost:** GitHub milestone `v0.8.62` (id 47). Check live state - with `gh issue list --repo Hmbown/CodeWhale --milestone "v0.8.62" --state open`. -- **Default branch is `main`.** Never commit directly to `main`; always work on - `hunter/0.8.62-glm-subagents` (or a fresh branch off it for an isolated - change). Open a PR into `main` only when a unit of work is reviewable. +- **Active branch:** `codex/v0.8.63-integration` (also at + `origin/codex/v0.8.63-integration`) for the current fix/integration lane. + If a newer handoff or objective file names a different branch, verify with + `git branch --show-current` and follow the live branch. +- **Workspace version is `0.8.63`** in `Cargo.toml`. Do not bump versions + opportunistically; version bumps, tags, release artifacts, publishing, and + GitHub Releases require Hunter's explicit approval. +- **Milestone guidepost:** GitHub milestone `v0.8.63`. Check live state with + `gh issue list --repo Hmbown/CodeWhale --milestone "v0.8.63" --state open`. +- **Default branch is `main`.** Never commit directly to `main`; work on the + active integration branch or a fresh `codex/...` branch/worktree off it for + an isolated change. Open a PR into `main` only when a unit of work is + reviewable. - **Always run before pushing a change:** `cargo fmt`, then the targeted tests - for the area (`cargo test -p codewhale-tui --bins `, + for the area (`cargo test -p codewhale-tui --bin codewhale-tui --locked `, `cargo test -p codewhale-config`, `cargo test -p codewhale-protocol`, …). Full gate: `cargo test --workspace`. Release build: `cargo build --release -p codewhale-cli -p codewhale-tui`. @@ -96,7 +97,7 @@ - Close or update issues and PRs only after verifying the landed commit on the relevant branch. If the release branch already contains equivalent behavior, leave a clear note linking the commit and describing any remaining delta. -- For the active release queue, start from the GitHub `v0.8.62` milestone - (`gh issue list --repo Hmbown/CodeWhale --milestone "v0.8.62"`) and refresh +- For the active release queue, start from the GitHub `v0.8.63` milestone + (`gh issue list --repo Hmbown/CodeWhale --milestone "v0.8.63"`) and refresh state before acting. Older per-version triage docs under `docs/` are historical reference only. diff --git a/CHANGELOG.md b/CHANGELOG.md index bd5ec0d6c5..2271782efc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,96 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.8.63] - 2026-06-19 + +### Added + +- **Sub-agent fanout safeguards (#3318, #3319).** High-fanout Workflow runs can + now queue and drain more agents than the instantaneous concurrency cap by + default, with `[subagents] max_admitted` available to tune that bounded + admission population. Distinct `agent` calls are no longer capped by the + per-turn loop guard before runtime launch concurrency and provider + rate-limit backoff can apply. `[subagents] token_budget` applies a shared + aggregate token ceiling to a root `agent` run and its descendants. +- **Per-worker sub-agent token enforcement (#3321).** A `token_budget` / + `max_tokens` set on an individual `agent` call now bounds that single worker + mid-run: once its accumulated model tokens exceed the cap it stops cleanly + with a `budget_exhausted` status instead of running to `max_steps`. This + complements the scope-level admission gate (#3319) — the per-worker cap stops + one runaway worker, the scope cap bounds total fan-out — without + double-counting. Harvested from #3321 by @donglovejava. +- **Provider-specific sub-agent fanout config.** `[subagents.providers.]` + profiles now override `enabled`, `max_concurrent`, `max_admitted`, + `launch_concurrency`, `max_depth`, token budget, API timeout, and heartbeat + timeout for the active provider. Use broad direct-API profiles such as + `[subagents.providers.deepseek]` and tighter subscription profiles such as + `[subagents.providers.glm]`; `/config subagents status` shows both global + and active-provider resolved values. +- **Sub-agent control and isolation.** The single `agent` tool now exposes + status, peek, and cancel actions for running children, and accepts + `worktree: true` to create an isolated git worktree/branch for parallel edit + lanes instead of requiring callers to hand-roll a `cwd`. + +### Fixed + +- **Mode and tool catalog correctness.** Core action tools remain discoverable + in the model-facing catalog/tool search, and a consistency self-check flags + registered handlers that drift out of the advertised catalog. Review-looking + prompts in explicit Agent/YOLO mode now keep the requested mode and tools, + with only an advisory review hint. +- **Sub-agent orchestration recovery.** Child agents now retry transient + provider header/SSE timeouts before failing, and parent runs synthesize missed + child completions from terminal child state so orchestration cannot hang on a + lost completion event. +- **DeepSeek thinking tool calls.** DeepSeek chat-completions requests now omit + explicit `tool_choice` whenever reasoning/thinking is enabled, avoiding + provider rejections while leaving no-thinking routes unchanged. +- **Task sidebar shortcuts and attribution.** Ctrl-K stays palette/emacs-kill, + while Ctrl-X is scoped to Tasks-sidebar background shell cancellation. Shell + jobs launched by sub-agents now render with their child-agent owner in the + Tasks sidebar and transcript. +- **Benchmark-turn recovery and context economy.** Repeated read-only search + loop blocks now return guidance instead of fatal tool failures, Python build + failures that are missing `setuptools` include an install/retry hint, long + foreground shell timeouts steer models toward background execution, and noisy + shell/test/web outputs are compacted earlier for large-context routes. +- **Config display redaction.** `codew config get/list` now recursively masks + token-, secret-, password-, credential-, and authorization-like keys inside + unknown `extras` tables and redacts sensitive HTTP header values before + printing config output. +- **Queued follow-up hints and force-steer keys.** The pending-input preview now + advertises `Ctrl+S send now` whenever queued follow-ups exist, and + Ctrl/Cmd+Enter force-steering also accepts the common Ctrl+J terminal + encoding while a turn is running. +- **Sidebar default visibility restored (#3328).** New and upgraded sessions + now use a pinned composed sidebar by default when the terminal is wide + enough, so live Agents and Tasks surface without opting back into idle + auto-collapse. Older settings files that captured the v0.8.62 auto-collapse + default now migrate to `pinned` unless `/sidebar auto --save` records an + explicit opt-in. `/sidebar` now reports when width or auto-collapse + suppresses rendering instead of saying the sidebar is visible. Reported by + @dxfq. +- **JavaScript execution proxy env handling (#3273, #3331).** `js_execution` + now enables Node's environment-proxy mode when proxy variables are present, + mirrors lowercase proxy variables for the child process, and backfills + `HTTP_PROXY` / `HTTPS_PROXY` from `ALL_PROXY`. Reported by @lordwedggie and + harvested from #3331 by @cyq1017. +- **Legacy app-server non-loopback auth hardening (#3258).** Bare + `codewhale app-server --host 0.0.0.0` now fails fast unless an explicit + `--auth-token` or `CODEWHALE_APP_SERVER_TOKEN` is supplied, keeping generated + one-time `cwapp_*` tokens loopback-only. +- **Legacy `.deepseek` state write-path migration (#3240).** State subdirectories + (`sessions`, `slop_ledger`, `trophies`, `catalog`) are now always written under + `~/.codewhale/`, and the first write of a subdir relocates any pre-existing + `~/.deepseek/` contents into the primary location so the legacy tree stops + growing while old data is preserved. The read resolver still finds legacy data + for backfill until each subdir migrates. Reported by @Final527; onboarding + marker slice from #3302 by @nightt5879. +- **State subdir validation on Windows (#3240).** State path hardening now + rejects rooted/prefixed subdir strings such as `/etc` before resolving or + migrating state directories, keeping the `.codewhale` write resolver inside + its state root across platforms. + ## [0.8.62] - 2026-06-17 ### Changed @@ -2194,7 +2284,8 @@ overflow report and `/theme` picker edge-wrapping patch in #1814. Older releases (v0.8.39 and earlier) are archived in [docs/CHANGELOG_ARCHIVE.md](docs/CHANGELOG_ARCHIVE.md). -[Unreleased]: https://github.com/Hmbown/CodeWhale/compare/v0.8.62...HEAD +[Unreleased]: https://github.com/Hmbown/CodeWhale/compare/v0.8.63...HEAD +[0.8.63]: https://github.com/Hmbown/CodeWhale/compare/v0.8.62...v0.8.63 [0.8.62]: https://github.com/Hmbown/CodeWhale/compare/v0.8.61...v0.8.62 [0.8.61]: https://github.com/Hmbown/CodeWhale/compare/v0.8.60...v0.8.61 [0.8.60]: https://github.com/Hmbown/CodeWhale/compare/v0.8.59...v0.8.60 diff --git a/CLAUDE.md b/CLAUDE.md index e8d4faa907..84b81967de 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -34,14 +34,18 @@ for Claude-based agents working in this repository. inspect diffs, comments, check results, and release-branch conflicts before landing. -## v0.8.62 Release Work +## Current Release Work -- The active branch is `hunter/0.8.62-glm-subagents`. This repo lives on - multiple devices, so do not hard-code a checkout path — work in whichever - local checkout you have and confirm with `git branch --show-current` before - editing. 0.8.61 has shipped; do all new work here, never on `main`. -- Base release triage on the GitHub `v0.8.62` milestone - (`gh issue list --repo Hmbown/CodeWhale --milestone "v0.8.62" --state open`) +- The active branch for this release lane is `codex/v0.8.63-integration` + (also at `origin/codex/v0.8.63-integration`). This repo lives on multiple + devices, so do not hard-code a checkout path; work in whichever local + checkout you have and confirm with `git branch --show-current` before + editing. Never commit directly to `main`. +- The workspace version is `0.8.63`. Do not tag, publish, create a GitHub + Release, push release artifacts, or merge to `main` without Hunter's + explicit approval. +- Base release triage on the GitHub `v0.8.63` milestone + (`gh issue list --repo Hmbown/CodeWhale --milestone "v0.8.63" --state open`) unless Hunter gives a newer branch/milestone. - Work the queue in this order: release blockers, recently approved PRs, clean PRs with small scope, blocked PRs with obvious fixes, dirty PRs that can be diff --git a/Cargo.lock b/Cargo.lock index ff816e01e0..67c02dc048 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -160,7 +160,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -171,7 +171,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -777,7 +777,7 @@ checksum = "e9b18233253483ce2f65329a24072ec414db782531bdbb7d0bbc4bd2ce6b7e21" [[package]] name = "codewhale-agent" -version = "0.8.62" +version = "0.8.63" dependencies = [ "codewhale-config", "serde", @@ -785,7 +785,7 @@ dependencies = [ [[package]] name = "codewhale-app-server" -version = "0.8.62" +version = "0.8.63" dependencies = [ "anyhow", "axum", @@ -813,7 +813,7 @@ dependencies = [ [[package]] name = "codewhale-cli" -version = "0.8.62" +version = "0.8.63" dependencies = [ "anyhow", "chrono", @@ -841,7 +841,7 @@ dependencies = [ [[package]] name = "codewhale-config" -version = "0.8.62" +version = "0.8.63" dependencies = [ "anyhow", "codewhale-execpolicy", @@ -857,7 +857,7 @@ dependencies = [ [[package]] name = "codewhale-core" -version = "0.8.62" +version = "0.8.63" dependencies = [ "anyhow", "chrono", @@ -876,7 +876,7 @@ dependencies = [ [[package]] name = "codewhale-execpolicy" -version = "0.8.62" +version = "0.8.63" dependencies = [ "anyhow", "codewhale-protocol", @@ -885,7 +885,7 @@ dependencies = [ [[package]] name = "codewhale-hooks" -version = "0.8.62" +version = "0.8.63" dependencies = [ "anyhow", "async-trait", @@ -899,7 +899,7 @@ dependencies = [ [[package]] name = "codewhale-mcp" -version = "0.8.62" +version = "0.8.63" dependencies = [ "anyhow", "serde", @@ -908,7 +908,7 @@ dependencies = [ [[package]] name = "codewhale-protocol" -version = "0.8.62" +version = "0.8.63" dependencies = [ "chrono", "serde", @@ -918,7 +918,7 @@ dependencies = [ [[package]] name = "codewhale-release" -version = "0.8.62" +version = "0.8.63" dependencies = [ "anyhow", "reqwest", @@ -929,7 +929,7 @@ dependencies = [ [[package]] name = "codewhale-secrets" -version = "0.8.62" +version = "0.8.63" dependencies = [ "dirs", "keyring", @@ -942,7 +942,7 @@ dependencies = [ [[package]] name = "codewhale-state" -version = "0.8.62" +version = "0.8.63" dependencies = [ "anyhow", "chrono", @@ -954,7 +954,7 @@ dependencies = [ [[package]] name = "codewhale-tools" -version = "0.8.62" +version = "0.8.63" dependencies = [ "anyhow", "async-trait", @@ -968,7 +968,7 @@ dependencies = [ [[package]] name = "codewhale-tui" -version = "0.8.62" +version = "0.8.63" dependencies = [ "anyhow", "arboard", @@ -1039,7 +1039,7 @@ dependencies = [ [[package]] name = "codewhale-whaleflow" -version = "0.8.62" +version = "0.8.63" dependencies = [ "anyhow", "serde", @@ -1592,7 +1592,7 @@ dependencies = [ "libc", "option-ext", "redox_users 0.5.2", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -1787,7 +1787,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -2700,7 +2700,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -3158,9 +3158,9 @@ dependencies = [ [[package]] name = "mio" -version = "1.1.1" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" +checksum = "02bd0af71c67b473010cbbc60715ee815645a4dc942899111f494b4b737d6fda" dependencies = [ "libc", "log", @@ -3263,7 +3263,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -4277,7 +4277,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.12.1", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -4333,7 +4333,7 @@ dependencies = [ "security-framework 3.5.1", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -4835,9 +4835,9 @@ checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c" [[package]] name = "socket2" -version = "0.6.1" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" +checksum = "52d1cfed4120b4d927bf7c0f86d2087a4a7d6027c906d9f9d525a80573b9be51" dependencies = [ "libc", "windows-sys 0.60.2", @@ -5103,7 +5103,7 @@ dependencies = [ "getrandom 0.3.4", "once_cell", "rustix 1.1.4", - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] @@ -5124,7 +5124,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "230a1b821ccbd75b185820a1f1ff7b14d21da1e442e22c0863ea5f08771a8874" dependencies = [ "rustix 1.1.4", - "windows-sys 0.61.2", + "windows-sys 0.59.0", ] [[package]] @@ -5342,9 +5342,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.49.0" +version = "1.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" +checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" dependencies = [ "bytes", "libc", @@ -5359,9 +5359,9 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.6.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" +checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c" dependencies = [ "proc-macro2", "quote", @@ -5641,7 +5641,7 @@ checksum = "f2f6fb2847f6742cd76af783a2a2c49e9375d0a111c7bef6f71cd9e738c72d6e" dependencies = [ "memoffset 0.9.1", "tempfile", - "windows-sys 0.61.2", + "windows-sys 0.60.2", ] [[package]] @@ -6073,7 +6073,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.61.2", + "windows-sys 0.52.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 3bc4108c62..fdd73ef6c2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ default-members = ["crates/cli", "crates/app-server", "crates/tui"] resolver = "2" [workspace.package] -version = "0.8.62" +version = "0.8.63" edition = "2024" # Rust 1.88 stabilized `let_chains` in `if`/`while` conditions, which the # codebase relies on extensively. Cargo enforces this so users on older @@ -46,7 +46,7 @@ serde_json = "1.0.149" semver = "1.0.28" thiserror = "2.0" tempfile = "3.27" -tokio = { version = "1.49.0", features = ["full"] } +tokio = { version = "1.50.0", features = ["full"] } toml = "0.9.7" toml_edit = "0.23.10" sha2 = "0.10" diff --git a/README.ja-JP.md b/README.ja-JP.md index a430b84313..79a7f99fb6 100644 --- a/README.ja-JP.md +++ b/README.ja-JP.md @@ -17,7 +17,7 @@ Rust 製の TUI と CLI、25 のプロバイダ。DeepSeek、OpenRouter、Huggin ```bash npm install -g codewhale -codewhale --version # 0.8.62 +codewhale --version # 0.8.63 ``` npm wrapper(Node 18+)は GitHub Releases から SHA-256 検証済みのバイナリをダウンロードし、`codewhale`、`codew`、`codewhale-tui` をインストールします。ソースからビルドしたい場合は cargo(Rust 1.88+)で: @@ -44,8 +44,8 @@ nix run github:Hmbown/CodeWhale scoop install codewhale # または GitHub Releases の NSIS インストーラ # GitHub に安定して到達できない場合の CNB ミラー -cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.62 codewhale-cli --locked --force -cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.62 codewhale-tui --locked --force +cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.63 codewhale-cli --locked --force +cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.63 codewhale-tui --locked --force # 旧 Homebrew 互換。formula の改名が完了するまで deepseek-tui 名のままです brew tap Hmbown/deepseek-tui @@ -87,12 +87,12 @@ codewhale exec --allowed-tools read_file,exec_shell --max-turns 10 "fix the fail - **承認ゲート付きツールと OS サンドボックス。** ファイル、Shell、Git、Web、MCP、サブエージェントの各ツールは、明示的な承認ゲートとサンドボックスバックエンド(bwrap、Landlock、Seatbelt、seccomp)の背後で動きます。 - **信頼できるロールバック。** side-git スナップショットと `/restore` は、リポジトリの `.git` の外側に置かれます — ターンを取り消しても履歴には一切触れません。 -- **Hooks v2** *(0.8.58)*。`tool_call_before` フックが JSON で `allow`/`deny`/`ask` の判定を返します。deny 優先の優先順位、glob マッチャ、プロジェクトローカルな `.codewhale/hooks.toml` に対応。 -- **プロバイダを認識する並行サブエージェント** *(0.8.58)*。調査と実装を並列に進め、big/cheap のモデル階層はプロバイダごとに解決されます — モデル ID のハードコードはありません。 -- **耐久性のあるセッション。** fork、relay 引き継ぎ、そして Plan/Agent/YOLO のモード切り替えをまたいでもバイト単位で安定する、セッション横断のディスク永続プロンプトキャッシュ *(0.8.56)*。ターンはシステムのスリープも生き延びます *(0.8.57)*: ストリーミング中にサスペンドしても、復帰後にリクエストが静かに再発行され、ターンは失敗しません。 -- **ヘッドレスモード。** スクリプトや CI 向けに、`codewhale exec` が `--allowed-tools`、`--disallowed-tools`(deny 優先)、`--max-turns`、`--append-system-prompt` *(0.8.58)* に対応。 +- **Hooks v2**。`tool_call_before` フックが JSON で `allow`/`deny`/`ask` の判定を返します。deny 優先の優先順位、glob マッチャ、プロジェクトローカルな `.codewhale/hooks.toml` に対応。 +- **プロバイダを認識する並行サブエージェント**。調査と実装を並列に進め、big/cheap のモデル階層はプロバイダごとに解決されます — モデル ID のハードコードはありません。 +- **耐久性のあるセッション。** fork、relay 引き継ぎ、そして Plan/Agent/YOLO のモード切り替えをまたいでもバイト単位で安定する、セッション横断のディスク永続プロンプトキャッシュ。ターンはシステムのスリープも生き延びます: ストリーミング中にサスペンドしても、復帰後にリクエストが静かに再発行され、ターンは失敗しません。 +- **ヘッドレスモード。** スクリプトや CI 向けに、`codewhale exec` が `--allowed-tools`、`--disallowed-tools`(deny 優先)、`--max-turns`、`--append-system-prompt` に対応。 - **どこにでも組み込める。** HTTP/SSE と ACP の Runtime API、VS Code 拡張(Phase 0)、Telegram/Feishu ブリッジ(Weixin ブリッジは実験的)。 -- **日常使いの磨き込み。** MCP のクライアント*かつ*サーバー、再利用可能なスキル、7 ロケールのローカライズ(0.8.56 から承認ダイアログも対象)、Xiaomi MiMo による音声合成(TTS)。 +- **日常使いの磨き込み。** MCP のクライアント*かつ*サーバー、再利用可能なスキル、7 ロケールのローカライズ、Xiaomi MiMo による音声合成(TTS)。 ### あらゆるモデル、まずはオープンモデル @@ -100,11 +100,18 @@ codewhale exec --allowed-tools read_file,exec_shell --max-turns 10 "fix the fail - **オープンモデル(ホスト型):** `deepseek`(同格の中の筆頭)、`openrouter`、`huggingface`(Inference Providers)、`moonshot`(Kimi)、`volcengine`(Ark)、`nvidia-nim`、`together`、`fireworks`、`novita`、`siliconflow` / `siliconflow-CN`、`arcee`、`xiaomi-mimo`、`deepinfra`、`atlascloud`、`wanjie-ark`、さらに任意のゲートウェイに使える汎用の `openai` 互換ルート。 - **オープンモデル(セルフホスト型):** `vllm`、`sglang`、`ollama` を自分の localhost エンドポイントに向けて使えます — キーは不要です。 -- **クローズドプロバイダ(ネイティブ対応):** `anthropic` は専用の `/v1/messages` アダプタ *(0.8.58)* 経由で、適応的 thinking、プロンプトキャッシュのブレークポイント、署名付き thinking のリプレイに対応します — OpenAI 方言のシムではありません。`openai-codex` は既存の ChatGPT/Codex CLI ログインを再利用します。 +- **クローズドプロバイダ(ネイティブ対応):** `anthropic` は専用の `/v1/messages` アダプタ経由で、適応的 thinking、プロンプトキャッシュのブレークポイント、署名付き thinking のリプレイに対応します — OpenAI 方言のシムではありません。`openai-codex` は既存の ChatGPT/Codex CLI ログインを再利用します。 -ルーティングは base URL の差し替えにとどまりません: `/reasoning` の effort は各プロバイダのワイヤ方言に翻訳され、サブエージェントの階層はプロバイダごとに解決され、システムプロンプト内のモデル情報はハードコードではなくモデルごとにテンプレート化されます *(0.8.58)*。セッション中の切り替えは `/provider` と `/model` で。認証情報、base URL、能力の境界を含む完全なレジストリは [docs/PROVIDERS.md](docs/PROVIDERS.md) にあります。 +ルーティングは base URL の差し替えにとどまりません: `/reasoning` の effort は各プロバイダのワイヤ方言に翻訳され、サブエージェントの階層はプロバイダごとに解決され、システムプロンプト内のモデル情報はハードコードではなくモデルごとにテンプレート化されます。セッション中の切り替えは `/provider` と `/model` で。認証情報、base URL、能力の境界を含む完全なレジストリは [docs/PROVIDERS.md](docs/PROVIDERS.md) にあります。 -上のバージョンタグは、直近 3 リリース(0.8.56 → 0.8.58)で入ったものを示しています。詳細は [CHANGELOG.md](CHANGELOG.md) を参照してください。 +サブエージェントの fanout は設定優先です。`[subagents]` に全体の既定値を置き、 +`[subagents.providers.deepseek]`、`[subagents.providers.glm]`、 +`[subagents.providers.openrouter]` などで API ごとの上限を調整できます。直結の +DeepSeek API は広めに、サブスクリプション型や rate-limit のあるルートは 3–5 +並列に抑える、といった運用を prompt やコード変更なしで行えます。詳しくは +[docs/SUBAGENTS.md](docs/SUBAGENTS.md#concurrency-cap) を参照してください。 + +完全な変更履歴は [CHANGELOG.md](CHANGELOG.md) を参照してください。 ## 考え方 — このバージョンに入れている mission idea diff --git a/README.md b/README.md index 00f44d57df..07e284464d 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ there's a model, endpoint, or feature you don't see that you want, open an issue ```bash npm install -g codewhale -codewhale --version # 0.8.62 +codewhale --version # 0.8.63 ``` The npm wrapper (Node 18+) downloads SHA-256-verified binaries from GitHub @@ -60,8 +60,8 @@ nix run github:Hmbown/CodeWhale scoop install codewhale # or the NSIS installer from GitHub Releases # CNB mirror for users who cannot reliably reach GitHub -cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.62 codewhale-cli --locked --force -cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.62 codewhale-tui --locked --force +cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.63 codewhale-cli --locked --force +cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.63 codewhale-tui --locked --force # Legacy Homebrew compatibility while the formula is renamed brew tap Hmbown/deepseek-tui @@ -137,6 +137,13 @@ Switch mid-session with `/provider` and `/model`. The full registry — credentials, base URLs, capability boundaries — lives in [docs/PROVIDERS.md](docs/PROVIDERS.md). +Sub-agent fanout is config-first. Set global `[subagents]` defaults, then add +`[subagents.providers.deepseek]`, `[subagents.providers.glm]`, +`[subagents.providers.openrouter]`, or other provider profiles to match the API +you are actually using. Direct DeepSeek can stay wide; subscription or +rate-limited routes can stay at 3-5 concurrent agents without changing prompts +or code. See [docs/SUBAGENTS.md](docs/SUBAGENTS.md#concurrency-cap). + Atlas Cloud is included as an OpenAI-compatible hosted route for users who want its curated catalog behind one key: set `DEEPSEEK_PROVIDER=atlascloud`, `ATLASCLOUD_API_KEY`, and optionally `ATLASCLOUD_MODEL`, for example @@ -180,8 +187,8 @@ structure intact. goal is done, it's blocked, or you stop it. No turn cap. `/task` tracks background tasks; the Work sidebar shows live plan and checklist state. - **Sub-agents.** Independent investigations and implementation slices run in - parallel — up to 20 at once — each with its own clean context and - provider-aware model tier (big vs. cheap). + parallel with provider-specific fanout caps, clean context, and + provider-aware model tiers (big vs. cheap). - **25 providers.** DeepSeek, GLM, Claude, GPT, Kimi, MiniMax, OpenRouter, and local vLLM/SGLang/Ollama, all behind the same harness and tools. Switch mid-session with `/provider` and `/model`. diff --git a/README.vi.md b/README.vi.md index 4dac3ae1b4..42e2accc61 100644 --- a/README.vi.md +++ b/README.vi.md @@ -21,7 +21,7 @@ bằng `/restore` cho mọi lượt. ```bash npm install -g codewhale -codewhale --version # 0.8.62 +codewhale --version # 0.8.63 ``` Wrapper npm (Node 18+) tải binary đã xác minh SHA-256 từ GitHub Releases và @@ -50,8 +50,8 @@ nix run github:Hmbown/CodeWhale scoop install codewhale # hoặc trình cài NSIS từ GitHub Releases # CNB mirror cho người dùng khó truy cập GitHub ổn định -cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.62 codewhale-cli --locked --force -cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.62 codewhale-tui --locked --force +cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.63 codewhale-cli --locked --force +cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.63 codewhale-tui --locked --force # Homebrew legacy trong lúc formula đang được đổi tên brew tap Hmbown/deepseek-tui @@ -107,25 +107,23 @@ toàn là cơ chế runtime, không phải lời dặn mà model phải tự nh sandbox (bwrap, Landlock, Seatbelt, seccomp). - **Rollback đáng tin cậy.** Snapshot side-git và `/restore`, giữ bên ngoài `.git` của repo — hoàn tác một lượt không bao giờ chạm vào lịch sử của bạn. -- **Hooks v2** *(0.8.58)*. Hook `tool_call_before` trả về quyết định JSON +- **Hooks v2**. Hook `tool_call_before` trả về quyết định JSON `allow`/`deny`/`ask` với quy tắc deny thắng, matcher dạng glob, và `.codewhale/hooks.toml` riêng cho từng dự án. -- **Sub-agent chạy song song với định tuyến theo provider** *(0.8.58)*. Điều - tra và triển khai song song, với các tier model lớn/rẻ được phân giải theo - từng provider — không hardcode model id. +- **Sub-agent chạy song song với định tuyến theo provider**. Điều tra và triển + khai song song, với các tier model lớn/rẻ được phân giải theo từng provider — + không hardcode model id. - **Session bền.** Fork, relay handoff, và prompt cache lưu trên đĩa dùng chung giữa các session, ổn định từng byte khi chuyển qua lại giữa chế độ - Plan/Agent/YOLO *(0.8.56)*. Lượt chạy sống sót qua sleep hệ thống - *(0.8.57)*: máy ngủ giữa stream, thức dậy, request được âm thầm gửi lại - thay vì làm hỏng lượt. + Plan/Agent/YOLO. Lượt chạy sống sót qua sleep hệ thống: máy ngủ giữa stream, + thức dậy, request được âm thầm gửi lại thay vì làm hỏng lượt. - **Chế độ headless.** `codewhale exec` với `--allowed-tools`, - `--disallowed-tools` (deny thắng), `--max-turns` và - `--append-system-prompt` *(0.8.58)* cho script và CI. + `--disallowed-tools` (deny thắng), `--max-turns` và `--append-system-prompt` + cho script và CI. - **Nhúng được ở mọi nơi.** Runtime API HTTP/SSE và ACP, extension VS Code (Phase 0), và cầu nối Telegram/Feishu (cầu nối Weixin đang thử nghiệm). - **Độ hoàn thiện để dùng hằng ngày.** Vừa là MCP client *vừa* là MCP server, - skill tái sử dụng, bản địa hóa 7 ngôn ngữ (gồm cả hộp thoại phê duyệt từ - 0.8.56), và speech/TTS qua Xiaomi MiMo. + skill tái sử dụng, bản địa hóa 7 ngôn ngữ, và speech/TTS qua Xiaomi MiMo. ### Mọi model, ưu tiên model mở @@ -141,17 +139,24 @@ một bộ công cụ: - **Model mở, tự host:** `vllm`, `sglang` và `ollama` trỏ vào endpoint localhost của riêng bạn — không cần key. - **Provider đóng, hỗ trợ native:** `anthropic` qua adapter `/v1/messages` - chuyên dụng *(0.8.58)* với adaptive thinking, breakpoint prompt-cache và - phát lại signed-thinking — không phải shim giả giọng OpenAI — và - `openai-codex`, tái sử dụng phiên đăng nhập ChatGPT/Codex CLI sẵn có. + chuyên dụng với adaptive thinking, breakpoint prompt-cache và phát lại + signed-thinking — không phải shim giả giọng OpenAI — và `openai-codex`, tái + sử dụng phiên đăng nhập ChatGPT/Codex CLI sẵn có. Định tuyến không chỉ là đổi base URL: mức effort của `/reasoning` được dịch sang phương ngữ wire của từng provider, tier sub-agent phân giải theo provider, và phần facts về model trong system prompt được template theo từng -model thay vì hardcode *(0.8.58)*. Đổi giữa session bằng `/provider` và +model thay vì hardcode. Đổi giữa session bằng `/provider` và `/model`. Danh mục đầy đủ — credentials, base URL, ranh giới năng lực — nằm trong [docs/PROVIDERS.md](docs/PROVIDERS.md). +Fanout của sub-agent ưu tiên cấu hình. Đặt mặc định trong `[subagents]`, rồi +thêm `[subagents.providers.deepseek]`, `[subagents.providers.glm]`, +`[subagents.providers.openrouter]` hoặc profile provider khác để khớp API bạn +đang dùng. Direct DeepSeek có thể mở rộng; route subscription hoặc dễ bị rate +limit có thể giữ ở 3–5 agent song song mà không đổi prompt hay code. Xem +[docs/SUBAGENTS.md](docs/SUBAGENTS.md#concurrency-cap). + Các nhãn phiên bản ở trên đánh dấu những gì đã hạ cánh trong ba bản phát hành gần nhất (0.8.56 → 0.8.58). Chi tiết đầy đủ trong [CHANGELOG.md](CHANGELOG.md). diff --git a/README.zh-CN.md b/README.zh-CN.md index 68fcf23432..c8bfde72fa 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -20,7 +20,7 @@ DeepInfra 以及本地 vLLM/SGLang/Ollama 都是一等路由;当你手里是 A ```bash npm install -g codewhale -codewhale --version # 0.8.62 +codewhale --version # 0.8.63 ``` npm wrapper(Node 18+)会从 GitHub Releases 下载经 SHA-256 校验的二进制,并安装 @@ -49,8 +49,8 @@ nix run github:Hmbown/CodeWhale scoop install codewhale # 或使用 GitHub Releases 中的 NSIS 安装包 # CNB 镜像:适合无法稳定访问 GitHub 的用户 -cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.62 codewhale-cli --locked --force -cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.62 codewhale-tui --locked --force +cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.63 codewhale-cli --locked --force +cargo install --git https://cnb.cool/codewhale.net/codewhale --tag v0.8.63 codewhale-tui --locked --force # 旧 Homebrew 兼容路径:formula 改名期间仍沿用 deepseek-tui brew tap Hmbown/deepseek-tui @@ -129,18 +129,22 @@ codewhale exec --allowed-tools read_file,exec_shell --max-turns 10 "fix the fail `deepinfra`、`wanjie-ark`,外加一条通用的 `openai` 兼容路由,可接任意网关。 - **开放模型,自托管:** `vllm`、`sglang`、`ollama` 直连你自己的 localhost 端点——无需任何 key。 -- **闭源 provider,原生直连:** `anthropic` 走专用的 `/v1/messages` 适配器 - *(0.8.58)*,支持自适应思考、prompt-cache 断点和签名思考重放——不是 - OpenAI 方言的转译垫片;还有 `openai-codex`,复用已有的 ChatGPT/Codex CLI - 登录。 +- **闭源 provider,原生直连:** `anthropic` 走专用的 `/v1/messages` 适配器, + 支持自适应思考、prompt-cache 断点和签名思考重放——不是 OpenAI 方言的转译 + 垫片;还有 `openai-codex`,复用已有的 ChatGPT/Codex CLI 登录。 路由不只是换个 base URL:`/reasoning` 努力档位会翻译成各 provider 的协议方言, -子 Agent 分档按 provider 解析,系统提示中的模型事实也按模型模板化而非写死 -*(0.8.58)*。会话中途用 `/provider` 和 `/model` 即可切换。完整注册表——凭据、 -base URL、能力边界——见 [docs/PROVIDERS.md](docs/PROVIDERS.md)。 +子 Agent 分档按 provider 解析,系统提示中的模型事实也按模型模板化而非写死。 +会话中途用 `/provider` 和 `/model` 即可切换。完整注册表——凭据、base URL、 +能力边界——见 [docs/PROVIDERS.md](docs/PROVIDERS.md)。 -上面的版本标注对应最近三个版本(0.8.56 → 0.8.58)落地的内容。完整细节见 -[CHANGELOG.md](CHANGELOG.md)。 +子 Agent 扇出优先走配置:在 `[subagents]` 写全局默认值,再用 +`[subagents.providers.deepseek]`、`[subagents.providers.glm]`、 +`[subagents.providers.openrouter]` 等按 API 调整。直连 DeepSeek 可以放宽; +订阅或限流 route 可以保持 3–5 个并发,不需要改 prompt 或代码。详见 +[docs/SUBAGENTS.md](docs/SUBAGENTS.md#concurrency-cap)。 + +完整细节见 [CHANGELOG.md](CHANGELOG.md)。 ## 核心想法 —— 这个版本放进来的 mission idea diff --git a/crates/agent/Cargo.toml b/crates/agent/Cargo.toml index f2199a0976..721b656201 100644 --- a/crates/agent/Cargo.toml +++ b/crates/agent/Cargo.toml @@ -7,5 +7,5 @@ repository.workspace = true description = "Model/provider registry and fallback strategy for DeepSeek workspace architecture" [dependencies] -codewhale-config = { path = "../config", version = "0.8.62" } +codewhale-config = { path = "../config", version = "0.8.63" } serde.workspace = true diff --git a/crates/app-server/Cargo.toml b/crates/app-server/Cargo.toml index bb3ca0caa1..0432cfc6aa 100644 --- a/crates/app-server/Cargo.toml +++ b/crates/app-server/Cargo.toml @@ -12,15 +12,15 @@ autobins = false anyhow.workspace = true axum.workspace = true clap.workspace = true -codewhale-agent = { path = "../agent", version = "0.8.62" } -codewhale-config = { path = "../config", version = "0.8.62" } -codewhale-core = { path = "../core", version = "0.8.62" } -codewhale-execpolicy = { path = "../execpolicy", version = "0.8.62" } -codewhale-hooks = { path = "../hooks", version = "0.8.62" } -codewhale-mcp = { path = "../mcp", version = "0.8.62" } -codewhale-protocol = { path = "../protocol", version = "0.8.62" } -codewhale-state = { path = "../state", version = "0.8.62" } -codewhale-tools = { path = "../tools", version = "0.8.62" } +codewhale-agent = { path = "../agent", version = "0.8.63" } +codewhale-config = { path = "../config", version = "0.8.63" } +codewhale-core = { path = "../core", version = "0.8.63" } +codewhale-execpolicy = { path = "../execpolicy", version = "0.8.63" } +codewhale-hooks = { path = "../hooks", version = "0.8.63" } +codewhale-mcp = { path = "../mcp", version = "0.8.63" } +codewhale-protocol = { path = "../protocol", version = "0.8.63" } +codewhale-state = { path = "../state", version = "0.8.63" } +codewhale-tools = { path = "../tools", version = "0.8.63" } serde.workspace = true serde_json.workspace = true rustls.workspace = true diff --git a/crates/app-server/src/lib.rs b/crates/app-server/src/lib.rs index 17ef5b47f9..3700d9379e 100644 --- a/crates/app-server/src/lib.rs +++ b/crates/app-server/src/lib.rs @@ -392,6 +392,7 @@ fn resolve_auth_token(options: &AppServerOptions) -> Result> { { bail!("app-server auth token cannot be empty"); } + let has_explicit_token = configured.is_some(); if options.insecure_no_auth { if !options.listen.ip().is_loopback() { @@ -401,10 +402,16 @@ fn resolve_auth_token(options: &AppServerOptions) -> Result> { return Ok(None); } + if !has_explicit_token && !options.listen.ip().is_loopback() { + bail!( + "refusing non-loopback app-server bind without explicit auth token; pass --auth-token or set CODEWHALE_APP_SERVER_TOKEN" + ); + } + let token = configured .map(str::to_string) .unwrap_or_else(|| format!("cwapp_{}", Uuid::new_v4().simple())); - if options.auth_token.is_some() { + if has_explicit_token { eprintln!("app-server auth: bearer token required for HTTP routes."); } else { eprintln!("app-server auth: generated bearer token for this process."); @@ -1189,15 +1196,13 @@ mod tests { listen: "0.0.0.0:8787".parse().expect("socket addr"), config_path: None, auth_token: None, - insecure_no_auth: true, + insecure_no_auth: false, cors_origins: Vec::new(), }; - let err = resolve_auth_token(&options).expect_err("non-loopback unauth should fail"); - assert!( - err.to_string() - .contains("refusing unauthenticated app-server bind") - ); + let err = + resolve_auth_token(&options).expect_err("non-loopback generated auth should fail"); + assert!(err.to_string().contains("without explicit auth token")); } #[tokio::test] @@ -1413,6 +1418,19 @@ mod tests { assert_eq!(token.as_deref(), Some("my-secret")); } + #[test] + fn auth_token_explicit_allows_non_loopback_bind() { + let options = AppServerOptions { + listen: "0.0.0.0:8787".parse().expect("socket addr"), + config_path: None, + auth_token: Some("my-secret".to_string()), + insecure_no_auth: false, + cors_origins: Vec::new(), + }; + let token = resolve_auth_token(&options).unwrap(); + assert_eq!(token.as_deref(), Some("my-secret")); + } + #[test] fn insecure_no_auth_on_loopback_returns_none() { let options = AppServerOptions { @@ -1426,6 +1444,23 @@ mod tests { assert!(token.is_none()); } + #[test] + fn insecure_no_auth_on_non_loopback_fails_fast() { + let options = AppServerOptions { + listen: "0.0.0.0:8787".parse().expect("socket addr"), + config_path: None, + auth_token: None, + insecure_no_auth: true, + cors_origins: Vec::new(), + }; + + let err = resolve_auth_token(&options).expect_err("non-loopback unauth should fail"); + assert!( + err.to_string() + .contains("refusing unauthenticated app-server bind") + ); + } + // ── cors_layer ───────────────────────────────────────────────────── #[test] diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index 07f0f83b73..d921ccd32c 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -19,14 +19,14 @@ path = "src/bin/codew_legacy_shim.rs" anyhow.workspace = true clap.workspace = true clap_complete.workspace = true -codewhale-agent = { path = "../agent", version = "0.8.62" } -codewhale-app-server = { path = "../app-server", version = "0.8.62" } -codewhale-config = { path = "../config", version = "0.8.62" } -codewhale-execpolicy = { path = "../execpolicy", version = "0.8.62" } -codewhale-mcp = { path = "../mcp", version = "0.8.62" } -codewhale-release = { path = "../release", version = "0.8.62" } -codewhale-secrets = { path = "../secrets", version = "0.8.62" } -codewhale-state = { path = "../state", version = "0.8.62" } +codewhale-agent = { path = "../agent", version = "0.8.63" } +codewhale-app-server = { path = "../app-server", version = "0.8.63" } +codewhale-config = { path = "../config", version = "0.8.63" } +codewhale-execpolicy = { path = "../execpolicy", version = "0.8.63" } +codewhale-mcp = { path = "../mcp", version = "0.8.63" } +codewhale-release = { path = "../release", version = "0.8.63" } +codewhale-secrets = { path = "../secrets", version = "0.8.63" } +codewhale-state = { path = "../state", version = "0.8.63" } chrono.workspace = true dirs.workspace = true serde.workspace = true diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index a844984075..ac1a40131d 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -1,3 +1,5 @@ +#![allow(clippy::uninlined_format_args)] + mod metrics; mod update; @@ -743,7 +745,9 @@ fn run() -> Result<()> { } Some(Commands::Serve(args)) => { let resolved_runtime = resolve_runtime_for_dispatch(&mut store, &runtime_overrides); - delegate_to_tui(&cli, &resolved_runtime, tui_args("serve", args)) + // `serve` starts a long-running runtime API listener; supervise the + // delegated child so it is torn down with the dispatcher (#3259). + delegate_server_to_tui(&cli, &resolved_runtime, tui_args("serve", args)) } Some(Commands::Completions(args)) => { let resolved_runtime = resolve_runtime_for_dispatch(&mut store, &runtime_overrides); @@ -1619,7 +1623,9 @@ fn run_app_server_command( // canonical `app-server --http`/`--mobile` entrypoint reuses that mature server // by delegating to the sibling TUI binary (the same mechanism `serve` uses). if args.http || args.mobile { - return delegate_to_tui(cli, resolved_runtime, app_server_serve_passthrough(&args)); + // Delegated runtime API listener — supervise it so the child does not + // outlive the dispatcher (#3259). + return delegate_server_to_tui(cli, resolved_runtime, app_server_serve_passthrough(&args)); } let runtime = tokio::runtime::Builder::new_multi_thread() @@ -1750,6 +1756,167 @@ fn delegate_to_tui( exit_with_tui_status(status) } +/// Delegate a long-running server command (`serve --http`/`--mobile`, +/// `app-server --http`/`--mobile`) to the sibling TUI binary, supervising the +/// child so its listener does not outlive the dispatcher (#3259). +/// +/// Plain [`delegate_to_tui`] blocks on `Command::status()`, which reaps the +/// child only on the child's own exit. If the dispatcher is terminated while +/// the delegated server is still running, the child can be reparented and keep +/// its listener bound. Here the child runs under a Tokio supervisor that +/// forwards termination (Ctrl+C / SIGTERM / SIGHUP) by killing and reaping the +/// child before the dispatcher exits, and `kill_on_drop` tears the child down +/// if the dispatcher unwinds. +/// +/// An uncatchable `SIGKILL` of the dispatcher cannot run this path; covering +/// that needs `PR_SET_PDEATHSIG` (Linux) / Job Objects (Windows) and is tracked +/// as follow-up on #3259. +fn delegate_server_to_tui( + cli: &Cli, + resolved_runtime: &ResolvedRuntimeOptions, + passthrough: Vec, +) -> Result<()> { + let std_cmd = build_tui_command(cli, resolved_runtime, passthrough)?; + let tui = PathBuf::from(std_cmd.get_program()); + let runtime = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .context("failed to create server-teardown runtime")?; + runtime.block_on(async move { + let mut cmd = tokio::process::Command::from(std_cmd); + cmd.kill_on_drop(true); + let mut child = cmd + .spawn() + .map_err(|err| anyhow!("{}", tui_spawn_error(&tui, &err)))?; + match supervise_server_child(&mut child, server_shutdown_signal()).await? { + ServerTeardown::Exited(status) => exit_with_tui_status(status), + // The child has been killed and reaped; exit with the conventional + // 128 + signal code for the signal that initiated the shutdown. + ServerTeardown::Signaled(code) => std::process::exit(code), + } + }) +} + +/// Outcome of supervising a delegated server child. +#[derive(Debug)] +enum ServerTeardown { + /// The child exited on its own; its status is carried for propagation. + Exited(std::process::ExitStatus), + /// A shutdown signal fired; the child was killed and reaped. Carries the + /// conventional `128 + signal` exit code to propagate. + Signaled(i32), +} + +/// Wait for the server `child` to exit, or for `shutdown` to fire first. On +/// shutdown, kill the child and reap it so no listener is left reparented. +async fn supervise_server_child( + child: &mut tokio::process::Child, + shutdown: F, +) -> io::Result +where + F: std::future::Future, +{ + tokio::select! { + status = child.wait() => Ok(ServerTeardown::Exited(status?)), + code = shutdown => { + // Send the kill, then wait so the PID is reaped before the + // dispatcher returns and exits. + let _ = child.start_kill(); + let _ = child.wait().await; + Ok(ServerTeardown::Signaled(code)) + } + } +} + +/// Resolve when the dispatcher should tear down a delegated server child, and +/// the conventional `128 + signal` exit code to propagate: Ctrl+C on every +/// platform (130), plus SIGTERM (143) and SIGHUP (129) on Unix (e.g. +/// `kill ` or a service manager stopping the process). A signal source +/// that fails to install simply never fires, leaving Ctrl+C as the floor. +/// Mirrors `wait_for_terminating_signal` in `crates/tui/src/main.rs`. +#[cfg(unix)] +async fn server_shutdown_signal() -> i32 { + use tokio::signal::unix::{SignalKind, signal}; + let mut terminate = signal(SignalKind::terminate()).ok(); + let mut hangup = signal(SignalKind::hangup()).ok(); + let term = async { + match terminate.as_mut() { + Some(s) => { + s.recv().await; + } + None => std::future::pending::<()>().await, + } + }; + let hup = async { + match hangup.as_mut() { + Some(s) => { + s.recv().await; + } + None => std::future::pending::<()>().await, + } + }; + tokio::select! { + _ = tokio::signal::ctrl_c() => 130, + _ = term => 143, + _ = hup => 129, + } +} + +#[cfg(not(unix))] +async fn server_shutdown_signal() -> i32 { + let _ = tokio::signal::ctrl_c().await; + 130 +} + +#[cfg(all(test, unix))] +mod server_teardown_tests { + use super::*; + + #[tokio::test] + async fn supervisor_propagates_child_exit_when_no_shutdown() { + // `true` exits immediately with success; a never-firing shutdown must + // let the child's own exit win. + let mut child = tokio::process::Command::new("true") + .kill_on_drop(true) + .spawn() + .expect("spawn true"); + let outcome = supervise_server_child(&mut child, std::future::pending::()) + .await + .expect("supervise"); + match outcome { + ServerTeardown::Exited(status) => assert!(status.success()), + other => panic!("expected Exited, got {other:?}"), + } + } + + #[tokio::test] + async fn shutdown_signal_kills_and_reaps_long_running_child() { + // A long-lived child stands in for the delegated server listener; the + // regression is that it outlives dispatcher teardown (#3259). + let mut child = tokio::process::Command::new("sleep") + .arg("30") + .kill_on_drop(true) + .spawn() + .expect("spawn sleep"); + assert!( + child.id().is_some(), + "child should be running before shutdown" + ); + // A ready future models an immediate shutdown signal carrying the + // SIGTERM exit code (143). + let outcome = supervise_server_child(&mut child, async { 143 }) + .await + .expect("supervise"); + assert!(matches!(outcome, ServerTeardown::Signaled(143))); + // Once supervise returns the child has been killed AND reaped, so tokio + // drops the recorded pid — no listener is left reparented. + assert!( + child.id().is_none(), + "delegated child must be reaped after dispatcher teardown" + ); + } +} + fn run_resume_command( cli: &Cli, resolved_runtime: &ResolvedRuntimeOptions, diff --git a/crates/config/Cargo.toml b/crates/config/Cargo.toml index 22db010209..f6d67de01a 100644 --- a/crates/config/Cargo.toml +++ b/crates/config/Cargo.toml @@ -8,8 +8,8 @@ description = "Config schema and precedence model for DeepSeek workspace archite [dependencies] anyhow.workspace = true -codewhale-execpolicy = { path = "../execpolicy", version = "0.8.62" } -codewhale-secrets = { path = "../secrets", version = "0.8.62" } +codewhale-execpolicy = { path = "../execpolicy", version = "0.8.63" } +codewhale-secrets = { path = "../secrets", version = "0.8.63" } dirs.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/crates/config/src/lib.rs b/crates/config/src/lib.rs index de9c69094b..2a28bb32fb 100644 --- a/crates/config/src/lib.rs +++ b/crates/config/src/lib.rs @@ -739,6 +739,19 @@ fn get_provider_config_value( } } +fn get_provider_config_display_value( + config: &ProviderConfigToml, + field: ProviderConfigField, +) -> Option { + match field { + ProviderConfigField::ApiKey => config.api_key.as_deref().map(redact_secret), + ProviderConfigField::HttpHeaders => { + serialize_http_headers_for_display(&config.http_headers) + } + _ => get_provider_config_value(config, field), + } +} + fn set_provider_config_value( config: &mut ConfigToml, provider: ProviderKind, @@ -886,7 +899,7 @@ fn insert_provider_config_values( v.to_string(), ); } - if let Some(v) = serialize_http_headers(&config.http_headers) { + if let Some(v) = serialize_http_headers_for_display(&config.http_headers) { out.insert( provider_config_key(provider, ProviderConfigField::HttpHeaders), v, @@ -1327,7 +1340,7 @@ pub struct FleetConfigToml { /// workers so the two cannot drift into "two moving targets": /// - [`DEFAULT_SPAWN_DEPTH`] is the default recursion budget (the sub-agent /// runtime's `DEFAULT_MAX_SPAWN_DEPTH` is defined as this value). -/// - [`MAX_SPAWN_DEPTH_CEILING`] is the hard safety cap; every configured +/// - [`MAX_SPAWN_DEPTH_CEILING`] is the opt-in safety cap; every configured /// value (fleet `max_spawn_depth`, the `agent` tool's `max_depth`) clamps to it. /// /// A worker runs at `spawn_depth = 0` and may spawn while @@ -1337,10 +1350,12 @@ pub struct FleetConfigToml { /// depth 0 even when the budget is 0. pub const DEFAULT_SPAWN_DEPTH: u32 = 3; -/// Hard ceiling on recursion depth for any worker/sub-agent. See -/// [`DEFAULT_SPAWN_DEPTH`]. Raising this single constant lifts the limit -/// everywhere (the fleet clamp and `agent` validation both read it). -pub const MAX_SPAWN_DEPTH_CEILING: u32 = 3; +/// Hard ceiling on recursion depth for any worker/sub-agent. The default stays +/// conservative at [`DEFAULT_SPAWN_DEPTH`], while explicit config can opt into +/// deeper trees for direct-API providers that can tolerate the fanout. +/// Raising this single constant lifts the limit everywhere (the fleet clamp +/// and `agent` validation both read it). +pub const MAX_SPAWN_DEPTH_CEILING: u32 = 8; /// Headless worker execution constraints (#3027). /// @@ -1661,6 +1676,18 @@ impl ConfigToml { #[must_use] pub fn get_display_value(&self, key: &str) -> Option { + if let Some((provider, field)) = parse_provider_config_key(key) { + return get_provider_config_display_value(self.providers.for_provider(provider), field); + } + + if key == "http_headers" { + return serialize_http_headers_for_display(&self.http_headers); + } + + if let Some(value) = self.extras.get(key) { + return Some(redact_toml_value_for_display(key, value)); + } + self.get_value(key).map(|value| { if is_sensitive_config_key(key) { redact_secret(&value) @@ -1754,7 +1781,7 @@ impl ConfigToml { if let Some(v) = self.base_url.as_ref() { out.insert("base_url".to_string(), v.clone()); } - if let Some(v) = serialize_http_headers(&self.http_headers) { + if let Some(v) = serialize_http_headers_for_display(&self.http_headers) { out.insert("http_headers".to_string(), v); } if let Some(v) = self.default_text_model.as_ref() { @@ -1804,7 +1831,7 @@ impl ConfigToml { } for (k, v) in &self.extras { - out.insert(k.clone(), v.to_string()); + out.insert(k.clone(), redact_toml_value_for_display(k, v)); } out } @@ -3204,6 +3231,38 @@ fn effective_home_dir() -> Option { .or_else(dirs::home_dir) } +/// Reject state subdirs that could escape the state root via path injection. +/// +/// `ensure_state_dir` / `resolve_state_dir` are public APIs taking an arbitrary +/// subdir string; every in-tree caller passes a hardcoded single component +/// (e.g. `"sessions"`, `"."`). This validates defensively so a future caller +/// can never traverse out of the state root via `..` components or an absolute +/// path. Nested relative paths such as `"a/b"` are permitted. +fn ensure_safe_state_subdir(subdir: &str) -> Result<()> { + if subdir.is_empty() { + bail!("state subdir must not be empty"); + } + let path = std::path::Path::new(subdir); + if path.is_absolute() { + bail!("state subdir must not be an absolute path: {subdir}"); + } + if path.components().any(|c| { + matches!( + c, + std::path::Component::RootDir | std::path::Component::Prefix(_) + ) + }) { + bail!("state subdir must not contain a root or prefix: {subdir}"); + } + if path + .components() + .any(|c| matches!(c, std::path::Component::ParentDir)) + { + bail!("state subdir must not contain parent-dir (..) components: {subdir}"); + } + Ok(()) +} + /// Resolve a state subdirectory, preferring the CodeWhale root if /// it already exists, otherwise falling back to the legacy root. /// @@ -3211,6 +3270,7 @@ fn effective_home_dir() -> Option { /// migration has occurred or on a fresh install, but keeps reading /// from the legacy path for users who haven't migrated yet. pub fn resolve_state_dir(subdir: &str) -> Result { + ensure_safe_state_subdir(subdir)?; let primary = codewhale_home()?.join(subdir); if primary.exists() { return Ok(primary); @@ -3225,13 +3285,111 @@ pub fn resolve_state_dir(subdir: &str) -> Result { /// Ensure a state subdirectory exists under the primary CodeWhale root, /// creating it if necessary. This is the write-path resolver. +/// +/// On the first creation of a real subdirectory (not the root sentinel `"."`), +/// if a legacy `~/.deepseek/` exists but the primary +/// `~/.codewhale/` does not, the legacy directory is relocated into +/// the primary location so the user keeps their data and the legacy tree +/// stops growing (#3240). After migration, [`resolve_state_dir`] finds the +/// data in the primary location; the read resolver itself is unchanged. pub fn ensure_state_dir(subdir: &str) -> Result { + ensure_safe_state_subdir(subdir)?; let dir = codewhale_home()?.join(subdir); + migrate_legacy_state_dir(&dir, subdir)?; std::fs::create_dir_all(&dir) .with_context(|| format!("failed to create {}/", dir.display()))?; Ok(dir) } +/// One-time relocation of a legacy `~/.deepseek/` state directory into +/// the primary `~/.codewhale/` location (#3240). No-op once the primary +/// exists, for the root sentinel `"."` (a whole-tree move is owned by the +/// config-file migration), or when no legacy directory is present. +fn migrate_legacy_state_dir(primary: &Path, subdir: &str) -> Result<()> { + if primary.exists() || subdir == "." || subdir.is_empty() { + return Ok(()); + } + let legacy = match legacy_deepseek_home() { + Ok(home) => home.join(subdir), + Err(_) => return Ok(()), + }; + if !legacy.exists() { + return Ok(()); + } + // The primary's parent (the ~/.codewhale root) must exist for the rename. + if let Some(parent) = primary.parent() { + if let Err(err) = std::fs::create_dir_all(parent) { + tracing::warn!( + target: "config::migration", + "Could not create {} for state migration ({}); writing to primary anyway", + parent.display(), + err + ); + } + } + match std::fs::rename(&legacy, primary) { + Ok(()) => { + tracing::info!( + target: "config::migration", + "Migrated legacy state directory {} -> {} (relocated). The .deepseek copy was removed.", + legacy.display(), + primary.display() + ); + } + Err(err) => { + // Cross-device rename or permission issue: fall back to a + // recursive copy so the user keeps their data. The legacy tree is + // left in place; it stops growing because writes now target the + // primary path. + match copy_dir_recursive(&legacy, primary) { + Ok(()) => { + tracing::info!( + target: "config::migration", + "Migrated legacy state directory {} -> {} (copied; rename failed: {err}). \ + The legacy .deepseek copy was left in place.", + legacy.display(), + primary.display() + ); + } + Err(copy_err) => { + tracing::warn!( + target: "config::migration", + "Could not migrate legacy state {} -> {} (rename: {err}; copy: {copy_err}). \ + New data is written to the primary path; the legacy tree remains untouched.", + legacy.display(), + primary.display() + ); + } + } + } + } + Ok(()) +} + +/// Recursively copy a directory tree from `src` to `dst`, creating `dst`. +/// Symlinks and other non-file/non-dir entries are skipped (rare in state dirs). +fn copy_dir_recursive(src: &Path, dst: &Path) -> Result<()> { + std::fs::create_dir_all(dst).with_context(|| format!("failed to create {}", dst.display()))?; + for entry in + std::fs::read_dir(src).with_context(|| format!("failed to read {}", src.display()))? + { + let entry = entry.with_context(|| format!("failed to read entry in {}", src.display()))?; + let path = entry.path(); + let target = dst.join(entry.file_name()); + let file_type = entry + .file_type() + .with_context(|| format!("failed to read file type for {}", path.display()))?; + if file_type.is_dir() { + copy_dir_recursive(&path, &target)?; + } else if file_type.is_file() { + std::fs::copy(&path, &target).with_context(|| { + format!("failed to copy {} -> {}", path.display(), target.display()) + })?; + } + } + Ok(()) +} + /// Resolve a project-local state subdirectory, preferring `.codewhale/` /// when it exists, falling back to `.deepseek/` for legacy projects. /// @@ -3500,6 +3658,26 @@ fn serialize_http_headers(headers: &BTreeMap) -> Option ) } +fn serialize_http_headers_for_display(headers: &BTreeMap) -> Option { + if headers.is_empty() { + return None; + } + Some( + headers + .iter() + .map(|(name, value)| { + let display_value = if is_sensitive_config_key(name) { + redact_secret(value) + } else { + value.clone() + }; + format!("{name}={display_value}") + }) + .collect::>() + .join(","), + ) +} + fn redact_secret(secret: &str) -> String { let chars: Vec = secret.chars().collect(); if chars.len() <= 16 { @@ -3519,7 +3697,78 @@ fn redact_secret(secret: &str) -> String { #[must_use] pub fn is_sensitive_config_key(key: &str) -> bool { - key == "api_key" || key.ends_with(".api_key") + let Some(segment) = key.rsplit('.').next() else { + return false; + }; + let normalized = segment + .trim() + .trim_matches('"') + .replace('-', "_") + .to_ascii_lowercase(); + + matches!( + normalized.as_str(), + "api_key" + | "apikey" + | "api_keys" + | "authorization" + | "bearer" + | "client_secret" + | "credential" + | "credentials" + | "id_token" + | "password" + | "passwords" + | "passwd" + | "proxy_authorization" + | "refresh_token" + | "secret" + | "secrets" + | "token" + | "tokens" + ) || normalized.ends_with("_api_key") + || normalized.ends_with("_authorization") + || normalized.ends_with("_password") + || normalized.ends_with("_secret") + || normalized.ends_with("_token") +} + +fn redact_toml_value_for_display(key: &str, value: &toml::Value) -> String { + redact_toml_value_for_display_inner(key, false, value).to_string() +} + +fn redact_toml_value_for_display_inner( + key: &str, + sensitive_ancestor: bool, + value: &toml::Value, +) -> toml::Value { + let sensitive = sensitive_ancestor || is_sensitive_config_key(key); + match value { + toml::Value::String(value) if sensitive => toml::Value::String(redact_secret(value)), + toml::Value::Array(values) => toml::Value::Array( + values + .iter() + .map(|value| redact_toml_value_for_display_inner(key, sensitive, value)) + .collect(), + ), + toml::Value::Table(table) => { + let mut redacted = toml::map::Map::new(); + for (child_key, child_value) in table { + let path = if key.is_empty() { + child_key.clone() + } else { + format!("{key}.{child_key}") + }; + redacted.insert( + child_key.clone(), + redact_toml_value_for_display_inner(&path, sensitive, child_value), + ); + } + toml::Value::Table(redacted) + } + _ if sensitive => toml::Value::String("********".to_string()), + _ => value.clone(), + } } fn normalize_config_file_path(path: PathBuf) -> Result { @@ -5073,6 +5322,77 @@ command = "cargo check" ); } + #[test] + fn config_display_redacts_nested_extra_secrets() { + let mut config = ConfigToml::default(); + let mut profile = toml::map::Map::new(); + profile.insert( + "chatgpt_access_token".to_string(), + toml::Value::String("raw-chatgpt-access-token-value".to_string()), + ); + profile.insert( + "safe_label".to_string(), + toml::Value::String("visible".to_string()), + ); + + let mut nested = toml::map::Map::new(); + nested.insert( + "refresh_token".to_string(), + toml::Value::String("raw-refresh-token-value".to_string()), + ); + nested.insert("expires_at".to_string(), toml::Value::Integer(1234)); + profile.insert("session".to_string(), toml::Value::Table(nested)); + + config + .extras + .insert("extras".to_string(), toml::Value::Table(profile)); + + let listed = config.list_values(); + let rendered = listed.get("extras").expect("extras are listed"); + + assert!(rendered.contains("chatgpt_access_token")); + assert!(rendered.contains("refresh_token")); + assert!(rendered.contains("safe_label = \"visible\"")); + assert!(!rendered.contains("raw-chatgpt-access-token-value")); + assert!(!rendered.contains("raw-refresh-token-value")); + + let display = config + .get_display_value("extras") + .expect("extras display value"); + assert!(!display.contains("raw-chatgpt-access-token-value")); + assert!(!display.contains("raw-refresh-token-value")); + } + + #[test] + fn config_display_redacts_sensitive_extra_leaf_keys_and_headers() { + let mut config = ConfigToml::default(); + config.extras.insert( + "chatgpt_access_token".to_string(), + toml::Value::String("raw-chatgpt-token-value".to_string()), + ); + config.http_headers.insert( + "Authorization".to_string(), + "Bearer raw-header-token".to_string(), + ); + config + .http_headers + .insert("X-Test".to_string(), "ok".to_string()); + + assert_eq!( + config.get_display_value("chatgpt_access_token").as_deref(), + Some("\"raw-***alue\"") + ); + + let headers = config + .list_values() + .get("http_headers") + .expect("headers are listed") + .clone(); + assert!(headers.contains("Authorization=Bear***oken")); + assert!(headers.contains("X-Test=ok")); + assert!(!headers.contains("raw-header-token")); + } + #[test] fn hook_sinks_config_uses_separate_table_from_lifecycle_hooks() -> Result<()> { let raw = r#" @@ -5744,6 +6064,184 @@ unix_socket_path = "/tmp/cw-hooks.sock" let _ = fs::remove_dir_all(home); } + // ── ensure_state_dir legacy migration (#3240) ─────────────────────── + + /// Saves and restores the env vars that the state-resolvers read. + struct StateEnvRestore { + home: Option, + userprofile: Option, + codewhale_home: Option, + } + + impl Drop for StateEnvRestore { + fn drop(&mut self) { + // Safety: test-only environment mutation is serialized by env_lock(). + unsafe { + match self.home.take() { + Some(value) => env::set_var("HOME", value), + None => env::remove_var("HOME"), + } + match self.userprofile.take() { + Some(value) => env::set_var("USERPROFILE", value), + None => env::remove_var("USERPROFILE"), + } + match self.codewhale_home.take() { + Some(value) => env::set_var("CODEWHALE_HOME", value), + None => env::remove_var("CODEWHALE_HOME"), + } + } + } + } + + /// Points `HOME`/`USERPROFILE`/`CODEWHALE_HOME` at a fresh temp tree so + /// `codewhale_home()` -> `/.codewhale` and `legacy_deepseek_home()` + /// -> `/.deepseek`. Env is restored on drop. + struct StateDirEnv { + home: PathBuf, + _restore: StateEnvRestore, + } + + impl StateDirEnv { + fn install(unique: u128) -> Self { + let home = std::env::temp_dir().join(format!( + "codewhale-state-migration-{}-{unique}", + std::process::id() + )); + let restore = StateEnvRestore { + home: env::var_os("HOME"), + userprofile: env::var_os("USERPROFILE"), + codewhale_home: env::var_os("CODEWHALE_HOME"), + }; + // Safety: test-only environment mutation is serialized by env_lock(). + unsafe { + env::set_var("HOME", &home); + env::set_var("USERPROFILE", &home); + env::set_var("CODEWHALE_HOME", home.join(CODEWHALE_APP_DIR)); + } + Self { + home, + _restore: restore, + } + } + fn legacy(&self, sub: &str) -> PathBuf { + self.home.join(LEGACY_APP_DIR).join(sub) + } + fn primary(&self, sub: &str) -> PathBuf { + self.home.join(CODEWHALE_APP_DIR).join(sub) + } + } + + #[test] + fn ensure_state_dir_relocates_legacy_subdir_on_first_write() { + let _lock = env_lock(); + let unique = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("clock") + .as_nanos(); + let state_env = StateDirEnv::install(unique); + // Seed a legacy subdir; primary must not exist yet. + fs::create_dir_all(state_env.legacy("slop_ledger")).expect("legacy dir"); + fs::write( + state_env.legacy("slop_ledger").join("slop_ledger.json"), + b"legacy", + ) + .expect("legacy file"); + assert!(!state_env.primary("slop_ledger").exists()); + + let dir = ensure_state_dir("slop_ledger").expect("ensure_state_dir"); + assert_eq!(dir, state_env.primary("slop_ledger")); + // Legacy contents relocated into primary. + assert_eq!( + fs::read_to_string(state_env.primary("slop_ledger").join("slop_ledger.json")) + .expect("migrated file"), + "legacy" + ); + // The legacy subdir was relocated (moved), so .deepseek stops growing. + assert!( + !state_env.legacy("slop_ledger").exists(), + "legacy subdir should be removed after relocation" + ); + // Idempotent: a second call is a no-op now that primary exists. + ensure_state_dir("slop_ledger").expect("idempotent ensure"); + let _ = fs::remove_dir_all(&state_env.home); + } + + #[test] + fn ensure_state_dir_writes_to_primary_when_both_exist() { + let _lock = env_lock(); + let unique = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("clock") + .as_nanos(); + let state_env = StateDirEnv::install(unique); + // Migrated user: primary already exists; a legacy orphan also remains. + fs::create_dir_all(state_env.primary("sessions")).expect("primary dir"); + fs::write(state_env.primary("sessions").join("a.json"), b"primary").expect("primary file"); + fs::create_dir_all(state_env.legacy("sessions")).expect("legacy dir"); + fs::write(state_env.legacy("sessions").join("old.json"), b"legacy").expect("legacy file"); + + let dir = ensure_state_dir("sessions").expect("ensure_state_dir"); + assert_eq!(dir, state_env.primary("sessions")); + // Primary untouched; legacy orphan left as-is (not migrated, not deleted). + assert_eq!( + fs::read_to_string(state_env.primary("sessions").join("a.json")).expect("primary"), + "primary" + ); + assert!( + state_env.legacy("sessions").exists(), + "existing legacy orphan must not be deleted when primary exists" + ); + let _ = fs::remove_dir_all(&state_env.home); + } + + #[test] + fn resolve_state_dir_still_finds_legacy_for_backfill() { + let _lock = env_lock(); + let unique = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("clock") + .as_nanos(); + let state_env = StateDirEnv::install(unique); + // Only legacy exists -> read resolver returns legacy (backfill). + fs::create_dir_all(state_env.legacy("catalog")).expect("legacy dir"); + assert_eq!( + resolve_state_dir("catalog").expect("resolve"), + state_env.legacy("catalog") + ); + // After the primary is created (e.g. via a write), the read resolver + // returns primary — legacy is reachable only while primary is absent. + ensure_state_dir("catalog").expect("ensure"); + assert_eq!( + resolve_state_dir("catalog").expect("resolve after migrate"), + state_env.primary("catalog") + ); + let _ = fs::remove_dir_all(&state_env.home); + } + + #[test] + fn state_resolvers_reject_path_traversal_subdirs() { + // Defense against path injection (#3240 hardening): the public state + // resolvers must refuse subdirs that could escape the state root. + for bad in ["..", "../secret", "/etc", "a/../../b"] { + let err = ensure_state_dir(bad) + .err() + .unwrap_or_else(|| panic!("expected {bad:?} to be rejected")); + assert!( + format!("{err:#}").contains("state subdir"), + "expected rejection of {bad:?}, got {err:#}" + ); + assert!( + resolve_state_dir(bad).is_err(), + "read resolver must also reject {bad:?}" + ); + } + // Safe values are accepted (including the root sentinel "."). + assert!(ensure_safe_state_subdir(".").is_ok()); + assert!(ensure_safe_state_subdir("sessions").is_ok()); + assert!(ensure_safe_state_subdir("a/b").is_ok()); + assert!(ensure_safe_state_subdir("").is_err()); + } + #[test] fn normalize_config_file_path_rejects_traversal() { let err = normalize_config_file_path(PathBuf::from("../config.toml")) diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 2049ae7a4f..a43f4ba554 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -9,14 +9,14 @@ description = "Core runtime boundaries for DeepSeek workspace architecture" [dependencies] anyhow.workspace = true chrono.workspace = true -codewhale-agent = { path = "../agent", version = "0.8.62" } -codewhale-config = { path = "../config", version = "0.8.62" } -codewhale-execpolicy = { path = "../execpolicy", version = "0.8.62" } -codewhale-hooks = { path = "../hooks", version = "0.8.62" } -codewhale-mcp = { path = "../mcp", version = "0.8.62" } -codewhale-protocol = { path = "../protocol", version = "0.8.62" } -codewhale-state = { path = "../state", version = "0.8.62" } -codewhale-tools = { path = "../tools", version = "0.8.62" } +codewhale-agent = { path = "../agent", version = "0.8.63" } +codewhale-config = { path = "../config", version = "0.8.63" } +codewhale-execpolicy = { path = "../execpolicy", version = "0.8.63" } +codewhale-hooks = { path = "../hooks", version = "0.8.63" } +codewhale-mcp = { path = "../mcp", version = "0.8.63" } +codewhale-protocol = { path = "../protocol", version = "0.8.63" } +codewhale-state = { path = "../state", version = "0.8.63" } +codewhale-tools = { path = "../tools", version = "0.8.63" } serde_json.workspace = true tracing.workspace = true uuid.workspace = true diff --git a/crates/execpolicy/Cargo.toml b/crates/execpolicy/Cargo.toml index 3e975a45de..000e5e27eb 100644 --- a/crates/execpolicy/Cargo.toml +++ b/crates/execpolicy/Cargo.toml @@ -8,5 +8,5 @@ description = "Execution policy and approval model parity for DeepSeek workspace [dependencies] anyhow.workspace = true -codewhale-protocol = { path = "../protocol", version = "0.8.62" } +codewhale-protocol = { path = "../protocol", version = "0.8.63" } serde.workspace = true diff --git a/crates/hooks/Cargo.toml b/crates/hooks/Cargo.toml index b02abc9015..a76e128c0e 100644 --- a/crates/hooks/Cargo.toml +++ b/crates/hooks/Cargo.toml @@ -10,7 +10,7 @@ description = "Hook dispatch and notifications parity for DeepSeek workspace arc anyhow.workspace = true async-trait.workspace = true chrono.workspace = true -codewhale-protocol = { path = "../protocol", version = "0.8.62" } +codewhale-protocol = { path = "../protocol", version = "0.8.63" } reqwest.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/crates/tools/Cargo.toml b/crates/tools/Cargo.toml index 0ff1856b47..49d64f605a 100644 --- a/crates/tools/Cargo.toml +++ b/crates/tools/Cargo.toml @@ -9,7 +9,7 @@ description = "Tool invocation lifecycle, schema validation, and scheduler paral [dependencies] anyhow.workspace = true async-trait.workspace = true -codewhale-protocol = { path = "../protocol", version = "0.8.62" } +codewhale-protocol = { path = "../protocol", version = "0.8.63" } serde.workspace = true serde_json.workspace = true thiserror.workspace = true diff --git a/crates/tui/CHANGELOG.md b/crates/tui/CHANGELOG.md index a116be8440..fbd818d517 100644 --- a/crates/tui/CHANGELOG.md +++ b/crates/tui/CHANGELOG.md @@ -7,6 +7,96 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.8.63] - 2026-06-19 + +### Added + +- **Sub-agent fanout safeguards (#3318, #3319).** High-fanout Workflow runs can + now queue and drain more agents than the instantaneous concurrency cap by + default, with `[subagents] max_admitted` available to tune that bounded + admission population. Distinct `agent` calls are no longer capped by the + per-turn loop guard before runtime launch concurrency and provider + rate-limit backoff can apply. `[subagents] token_budget` applies a shared + aggregate token ceiling to a root `agent` run and its descendants. +- **Per-worker sub-agent token enforcement (#3321).** A `token_budget` / + `max_tokens` set on an individual `agent` call now bounds that single worker + mid-run: once its accumulated model tokens exceed the cap it stops cleanly + with a `budget_exhausted` status instead of running to `max_steps`. This + complements the scope-level admission gate (#3319) — the per-worker cap stops + one runaway worker, the scope cap bounds total fan-out — without + double-counting. Harvested from #3321 by @donglovejava. +- **Provider-specific sub-agent fanout config.** `[subagents.providers.]` + profiles now override `enabled`, `max_concurrent`, `max_admitted`, + `launch_concurrency`, `max_depth`, token budget, API timeout, and heartbeat + timeout for the active provider. Use broad direct-API profiles such as + `[subagents.providers.deepseek]` and tighter subscription profiles such as + `[subagents.providers.glm]`; `/config subagents status` shows both global + and active-provider resolved values. +- **Sub-agent control and isolation.** The single `agent` tool now exposes + status, peek, and cancel actions for running children, and accepts + `worktree: true` to create an isolated git worktree/branch for parallel edit + lanes instead of requiring callers to hand-roll a `cwd`. + +### Fixed + +- **Mode and tool catalog correctness.** Core action tools remain discoverable + in the model-facing catalog/tool search, and a consistency self-check flags + registered handlers that drift out of the advertised catalog. Review-looking + prompts in explicit Agent/YOLO mode now keep the requested mode and tools, + with only an advisory review hint. +- **Sub-agent orchestration recovery.** Child agents now retry transient + provider header/SSE timeouts before failing, and parent runs synthesize missed + child completions from terminal child state so orchestration cannot hang on a + lost completion event. +- **DeepSeek thinking tool calls.** DeepSeek chat-completions requests now omit + explicit `tool_choice` whenever reasoning/thinking is enabled, avoiding + provider rejections while leaving no-thinking routes unchanged. +- **Task sidebar shortcuts and attribution.** Ctrl-K stays palette/emacs-kill, + while Ctrl-X is scoped to Tasks-sidebar background shell cancellation. Shell + jobs launched by sub-agents now render with their child-agent owner in the + Tasks sidebar and transcript. +- **Benchmark-turn recovery and context economy.** Repeated read-only search + loop blocks now return guidance instead of fatal tool failures, Python build + failures that are missing `setuptools` include an install/retry hint, long + foreground shell timeouts steer models toward background execution, and noisy + shell/test/web outputs are compacted earlier for large-context routes. +- **Config display redaction.** `codew config get/list` now recursively masks + token-, secret-, password-, credential-, and authorization-like keys inside + unknown `extras` tables and redacts sensitive HTTP header values before + printing config output. +- **Queued follow-up hints and force-steer keys.** The pending-input preview now + advertises `Ctrl+S send now` whenever queued follow-ups exist, and + Ctrl/Cmd+Enter force-steering also accepts the common Ctrl+J terminal + encoding while a turn is running. +- **Sidebar default visibility restored (#3328).** New and upgraded sessions + now use a pinned composed sidebar by default when the terminal is wide + enough, so live Agents and Tasks surface without opting back into idle + auto-collapse. Older settings files that captured the v0.8.62 auto-collapse + default now migrate to `pinned` unless `/sidebar auto --save` records an + explicit opt-in. `/sidebar` now reports when width or auto-collapse + suppresses rendering instead of saying the sidebar is visible. Reported by + @dxfq. +- **JavaScript execution proxy env handling (#3273, #3331).** `js_execution` + now enables Node's environment-proxy mode when proxy variables are present, + mirrors lowercase proxy variables for the child process, and backfills + `HTTP_PROXY` / `HTTPS_PROXY` from `ALL_PROXY`. Reported by @lordwedggie and + harvested from #3331 by @cyq1017. +- **Legacy app-server non-loopback auth hardening (#3258).** Bare + `codewhale app-server --host 0.0.0.0` now fails fast unless an explicit + `--auth-token` or `CODEWHALE_APP_SERVER_TOKEN` is supplied, keeping generated + one-time `cwapp_*` tokens loopback-only. +- **Legacy `.deepseek` state write-path migration (#3240).** State subdirectories + (`sessions`, `slop_ledger`, `trophies`, `catalog`) are now always written under + `~/.codewhale/`, and the first write of a subdir relocates any pre-existing + `~/.deepseek/` contents into the primary location so the legacy tree stops + growing while old data is preserved. The read resolver still finds legacy data + for backfill until each subdir migrates. Reported by @Final527; onboarding + marker slice from #3302 by @nightt5879. +- **State subdir validation on Windows (#3240).** State path hardening now + rejects rooted/prefixed subdir strings such as `/etc` before resolving or + migrating state directories, keeping the `.codewhale` write resolver inside + its state root across platforms. + ## [0.8.62] - 2026-06-17 ### Changed @@ -1394,50 +1484,6 @@ also to issue reporters and verification helpers including **@New2Niu** reports and acceptance details that shaped these fixes, plus the WeChat/Chinese UX reports relayed during the final triage pass. -## [0.8.49] - 2026-06-01 - -### Added - -- Added the missing `[providers.moonshot]` example block for Moonshot/Kimi, - documented `completion_sound`, and refreshed the tool-surface docs for the - current registry, including `finance`, `web.run`, git history tools, memory, - OCR, and other registered tools. - -### Changed - -- Hardened prefix-cache fingerprints to hash API-visible tool schema details, - not just tool names, so schema and description drift invalidates cached - prefixes before it can confuse model calls (#2264). -- Kept `finance` registered independently from web-search tools and prevented - duplicate web/patch tool registration in agent and YOLO modes. - -### Fixed - -- Fixed the DeepSeek V4-Pro cost estimate after the 2026-05-31 pricing cutoff: - the post-promotion official rate remains one quarter of the original price, - so CodeWhale no longer shows roughly 4x too much after June 1 (#2489). -- Fixed Kimi/Moonshot tool schema normalization by moving parent `type` fields - into `anyOf`/`oneOf` items, with regression coverage for nested schema shapes - that could otherwise still fail Kimi validation (#2438). -- Fixed raw ANSI/SGR fragments leaking into footer, shell-label, and sidebar - activity text during active tool execution (#2481). -- Fixed `[tui]` config parsing when `status_items` is omitted, restoring the - documented default footer order for older and hand-written configs (#2483). -- Fixed a shell env-scrubbing test so it does not depend on the user's default - shell understanding POSIX parameter expansion. -- Removed stale `qwen/qwen3.7-max` references left in `config.example.toml` - after the v0.8.48 preset removal. - -### Community - -Thanks to **@idling11** (#2480, #2485), **@reidliu41** (#2493), -**@hongqitai** (#2495), and **@encyc** (#2477) for the fixes and reliability -work harvested into this release. - -Thanks also to reporters and verification helpers whose issues shaped the -release: **@A-Corner** (#2438), **@taiwan988** (#2483), **@AiurArtanis** -(#2489), and **@Hmbown** (#2481). - --- Older releases: [CHANGELOG.md](https://github.com/Hmbown/CodeWhale/blob/main/CHANGELOG.md) and [docs/CHANGELOG_ARCHIVE.md](https://github.com/Hmbown/CodeWhale/blob/main/docs/CHANGELOG_ARCHIVE.md). diff --git a/crates/tui/Cargo.toml b/crates/tui/Cargo.toml index d010123e66..664d8c5331 100644 --- a/crates/tui/Cargo.toml +++ b/crates/tui/Cargo.toml @@ -13,6 +13,7 @@ tui = ["dep:schemaui", "schemaui/tui", "json", "toml"] web = ["dep:schemaui", "schemaui/web", "json", "toml"] json = ["schemaui/json"] toml = ["schemaui/toml"] +long-running-tests = [] [[bin]] name = "codewhale-tui" @@ -20,12 +21,12 @@ path = "src/main.rs" [dependencies] anyhow = "1.0.100" -codewhale-config = { path = "../config", version = "0.8.62" } -codewhale-execpolicy = { path = "../execpolicy", version = "0.8.62" } -codewhale-protocol = { path = "../protocol", version = "0.8.62" } -codewhale-release = { path = "../release", version = "0.8.62" } -codewhale-secrets = { path = "../secrets", version = "0.8.62" } -codewhale-tools = { path = "../tools", version = "0.8.62" } +codewhale-config = { path = "../config", version = "0.8.63" } +codewhale-execpolicy = { path = "../execpolicy", version = "0.8.63" } +codewhale-protocol = { path = "../protocol", version = "0.8.63" } +codewhale-release = { path = "../release", version = "0.8.63" } +codewhale-secrets = { path = "../secrets", version = "0.8.63" } +codewhale-tools = { path = "../tools", version = "0.8.63" } schemaui = { version = "0.12.0", default-features = false, optional = true } async-stream = "0.3.6" async-trait = "0.1" @@ -50,7 +51,7 @@ serde_json = { version = "1.0.149", features = ["preserve_order"] } schemars = { version = "1.2.1", features = ["derive", "preserve_order"] } shellexpand = "3" toml = "0.9.7" -tokio = { version = "1.49.0", features = ["full"] } +tokio = { version = "1.50.0", features = ["full"] } tokio-util = { version = "0.7.16", features = ["io"] } unicode-width = "0.2" unicode-segmentation = "1.12" diff --git a/crates/tui/src/client/chat.rs b/crates/tui/src/client/chat.rs index fcd73cc806..a7e29ddf9e 100644 --- a/crates/tui/src/client/chat.rs +++ b/crates/tui/src/client/chat.rs @@ -147,7 +147,8 @@ impl DeepSeekClient { } body["tools"] = json!(chat_tools); } - if let Some(choice) = request.tool_choice.as_ref() + if should_send_tool_choice_for_chat(self.api_provider, request.reasoning_effort.as_deref()) + && let Some(choice) = request.tool_choice.as_ref() && let Some(mapped) = map_tool_choice_for_chat(choice) { body["tool_choice"] = mapped; @@ -270,7 +271,8 @@ impl DeepSeekClient { } body["tools"] = json!(chat_tools); } - if let Some(choice) = request.tool_choice.as_ref() + if should_send_tool_choice_for_chat(self.api_provider, request.reasoning_effort.as_deref()) + && let Some(choice) = request.tool_choice.as_ref() && let Some(mapped) = map_tool_choice_for_chat(choice) { body["tool_choice"] = mapped; @@ -1846,6 +1848,23 @@ fn map_tool_choice_for_chat(choice: &Value) -> Option { } } +fn should_send_tool_choice_for_chat(provider: ApiProvider, effort: Option<&str>) -> bool { + if !matches!(provider, ApiProvider::Deepseek | ApiProvider::DeepseekCN) { + return true; + } + !reasoning_effort_enables_thinking(effort) +} + +fn reasoning_effort_enables_thinking(effort: Option<&str>) -> bool { + let Some(effort) = effort else { + return false; + }; + !matches!( + effort.trim().to_ascii_lowercase().as_str(), + "off" | "disabled" | "none" | "false" + ) +} + /// Final-pass sanitizer over the outgoing chat-completions JSON payload. /// Forces a non-empty `reasoning_content` onto assistant messages that carry /// `tool_calls`, when the model + effort combination requires it. DeepSeek's @@ -2666,6 +2685,37 @@ mod stream_diagnostics_tests { ); } + #[test] + fn deepseek_thinking_omits_tool_choice() { + for effort in [Some("high"), Some("max"), Some("medium"), Some("")] { + assert!( + !should_send_tool_choice_for_chat(ApiProvider::Deepseek, effort), + "DeepSeek thinking rejects explicit tool_choice for {effort:?}" + ); + assert!( + !should_send_tool_choice_for_chat(ApiProvider::DeepseekCN, effort), + "DeepSeek CN thinking rejects explicit tool_choice for {effort:?}" + ); + } + + for effort in [ + None, + Some("off"), + Some("disabled"), + Some("none"), + Some("false"), + ] { + assert!(should_send_tool_choice_for_chat( + ApiProvider::Deepseek, + effort + )); + } + assert!(should_send_tool_choice_for_chat( + ApiProvider::Openrouter, + Some("high") + )); + } + #[test] fn format_stream_headers_renders_all_fields_when_present() { let mut headers = HeaderMap::new(); diff --git a/crates/tui/src/client/responses.rs b/crates/tui/src/client/responses.rs index 57e8c509e6..ec4b6d7422 100644 --- a/crates/tui/src/client/responses.rs +++ b/crates/tui/src/client/responses.rs @@ -87,20 +87,21 @@ impl DeepSeekClient { // so it must not be set again here or it would be duplicated. The // ChatGPT backend additionally requires the account id and the // experimental Responses beta opt-in. - let mut builder = self - .http_client - .post(&url) - .header("Content-Type", "application/json") - .header("Accept", "text/event-stream") - .header("OpenAI-Beta", "responses=experimental") - .header("originator", "codex_cli_rs"); - if let Some(account_id) = crate::oauth::codex_account_id() { - builder = builder.header("chatgpt-account-id", account_id); - } - - let response = builder - .json(&body) - .send() + let account_id = crate::oauth::codex_account_id(); + let response = self + .send_with_retry(|| { + let mut builder = self + .http_client + .post(&url) + .header("Content-Type", "application/json") + .header("Accept", "text/event-stream") + .header("OpenAI-Beta", "responses=experimental") + .header("originator", "codex_cli_rs"); + if let Some(account_id) = &account_id { + builder = builder.header("chatgpt-account-id", account_id); + } + builder.json(&body) + }) .await .context("Responses API request failed")?; @@ -700,7 +701,114 @@ fn parse_responses_usage(val: &Value) -> Usage { mod tests { use super::*; + use std::sync::Arc; + use std::sync::atomic::{AtomicUsize, Ordering}; + + use futures_util::StreamExt; + + use crate::config::{Config, ProviderConfig, ProvidersConfig, RetryConfig}; use crate::models::Message; + use wiremock::matchers::{method, path}; + use wiremock::{Mock, MockServer, Request, Respond, ResponseTemplate}; + + #[derive(Clone)] + struct RetryThenSuccess { + attempts: Arc, + } + + impl Respond for RetryThenSuccess { + fn respond(&self, _request: &Request) -> ResponseTemplate { + if self.attempts.fetch_add(1, Ordering::SeqCst) == 0 { + return ResponseTemplate::new(429) + .insert_header("Retry-After", "0") + .set_body_string("rate limited"); + } + + ResponseTemplate::new(200) + .insert_header("Content-Type", "text/event-stream") + .set_body_string("data: [DONE]\n\n") + } + } + + fn minimal_responses_request() -> MessageRequest { + MessageRequest { + model: "gpt-5.5".to_string(), + messages: vec![Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: "hello".to_string(), + cache_control: None, + }], + }], + max_tokens: 128, + system: None, + tools: None, + tool_choice: None, + metadata: None, + thinking: None, + reasoning_effort: None, + stream: None, + temperature: None, + top_p: None, + } + } + + fn test_codex_config(server: &MockServer) -> Config { + Config { + provider: Some("openai-codex".to_string()), + retry: Some(RetryConfig { + enabled: Some(true), + max_retries: Some(1), + initial_delay: Some(0.0), + max_delay: Some(0.0), + exponential_base: Some(1.0), + }), + providers: Some(ProvidersConfig { + openai_codex: ProviderConfig { + base_url: Some(server.uri()), + ..ProviderConfig::default() + }, + ..ProvidersConfig::default() + }), + ..Config::default() + } + } + + #[tokio::test] + async fn responses_stream_retries_rate_limited_request() { + let server = MockServer::start().await; + let attempts = Arc::new(AtomicUsize::new(0)); + Mock::given(method("POST")) + .and(path(CODEX_RESPONSES_PATH)) + .respond_with(RetryThenSuccess { + attempts: Arc::clone(&attempts), + }) + .mount(&server) + .await; + + let client = { + let _env_lock = crate::test_support::lock_test_env(); + let _codex_token = + crate::test_support::EnvVarGuard::set("OPENAI_CODEX_ACCESS_TOKEN", "test-token"); + let _legacy_codex_token = + crate::test_support::EnvVarGuard::remove("CODEX_ACCESS_TOKEN"); + DeepSeekClient::new(&test_codex_config(&server)).unwrap() + }; + let mut stream = client + .handle_responses_stream(minimal_responses_request()) + .await + .unwrap(); + + tokio::time::timeout(std::time::Duration::from_secs(5), async { + while let Some(event) = stream.next().await { + event.unwrap(); + } + }) + .await + .expect("Responses retry stream should finish after [DONE]"); + + assert_eq!(attempts.load(Ordering::SeqCst), 2); + } #[test] fn codex_reasoning_effort_uses_responses_labels() { diff --git a/crates/tui/src/commands/groups/config/config.rs b/crates/tui/src/commands/groups/config/config.rs index 2aee8b1d5b..c2a9b1bf88 100644 --- a/crates/tui/src/commands/groups/config/config.rs +++ b/crates/tui/src/commands/groups/config/config.rs @@ -3,13 +3,16 @@ use super::CommandResult; use crate::config::{ ApiProvider, COMMON_DEEPSEEK_MODELS, Config, DEFAULT_STREAM_CHUNK_TIMEOUT_SECS, - DEFAULT_XIAOMI_MIMO_BASE_URL, MAX_STREAM_CHUNK_TIMEOUT_SECS, MIN_STREAM_CHUNK_TIMEOUT_SECS, + DEFAULT_SUBAGENT_API_TIMEOUT_SECS, DEFAULT_SUBAGENT_HEARTBEAT_TIMEOUT_SECS, + DEFAULT_XIAOMI_MIMO_BASE_URL, MAX_STREAM_CHUNK_TIMEOUT_SECS, MAX_SUBAGENT_API_TIMEOUT_SECS, + MAX_SUBAGENT_HEARTBEAT_TIMEOUT_SECS, MAX_SUBAGENTS, MIN_STREAM_CHUNK_TIMEOUT_SECS, + MIN_SUBAGENT_API_TIMEOUT_SECS, MIN_SUBAGENT_HEARTBEAT_TIMEOUT_SECS, SubagentsConfig, XIAOMI_MIMO_PAY_AS_YOU_GO_BASE_URL, clear_active_provider_api_key, normalize_model_name_for_provider, }; use crate::config_persistence::{ persist_provider_base_url_key, persist_root_bool_key, persist_root_string_key, - persist_tui_integer_key, + persist_subagents_bool_key, persist_subagents_integer_key, persist_tui_integer_key, }; use crate::config_ui::{ConfigUiMode, parse_mode}; use crate::localization::resolve_locale; @@ -18,6 +21,7 @@ use crate::tui::app::{ App, AppAction, AppMode, OnboardingState, ReasoningEffort, SidebarFocus, VimMode, }; use crate::tui::approval::ApprovalMode; +use crate::tui::ui::{SidebarRenderState, sidebar_render_state}; use anyhow::Result; use std::path::{Path, PathBuf}; @@ -56,6 +60,14 @@ pub fn config_command(app: &mut App, arg: Option<&str>) -> CommandResult { if raw.is_empty() { return show_config(app, None); } + let mut raw_words = raw.splitn(2, char::is_whitespace); + if raw_words + .next() + .is_some_and(|token| token.eq_ignore_ascii_case("subagents")) + { + let rest = raw_words.next().unwrap_or("").trim(); + return subagents_config_command(app, rest); + } let parts: Vec<&str> = raw.splitn(2, ' ').collect(); if parts.len() == 1 { // Single arg: editor-mode shortcut OR show-value request. @@ -87,6 +99,9 @@ pub fn config_command(app: &mut App, arg: Option<&str>) -> CommandResult { /// Show the current value of a single setting. fn show_single_setting(app: &App, key: &str) -> CommandResult { let key = key.to_lowercase(); + if let Some(subagent_key) = key.strip_prefix("subagents.") { + return show_subagents_setting(app, subagent_key); + } fn locale_display(l: crate::localization::Locale) -> &'static str { match l { crate::localization::Locale::En => "en", @@ -320,7 +335,7 @@ pub fn verbose(app: &mut App, arg: Option<&str>) -> CommandResult { /// Toggle or focus the right sidebar. /// -/// Bare `/sidebar` toggles between hidden and auto. Explicit values mirror +/// Bare `/sidebar` toggles between hidden and pinned. Explicit values mirror /// `sidebar_focus` so users have a discoverable copy-friendly path that does /// not depend on terminal-specific key translations. pub fn sidebar(app: &mut App, arg: Option<&str>) -> CommandResult { @@ -334,28 +349,28 @@ pub fn sidebar(app: &mut App, arg: Option<&str>) -> CommandResult { let target = match tokens.as_slice() { [] | ["toggle"] => { if app.sidebar_focus == SidebarFocus::Hidden { - SidebarFocus::Auto + SidebarFocus::Pinned } else { SidebarFocus::Hidden } } [value] => match value.to_ascii_lowercase().as_str() { - "on" | "show" | "visible" => SidebarFocus::Auto, + "on" | "show" | "visible" | "pinned" => SidebarFocus::Pinned, "off" | "hide" | "hidden" | "closed" | "none" => SidebarFocus::Hidden, "auto" => SidebarFocus::Auto, - "work" | "plan" | "todos" => SidebarFocus::Work, + "work" | "plan" | "todos" => SidebarFocus::Pinned, "tasks" => SidebarFocus::Tasks, "agents" | "subagents" | "sub-agents" => SidebarFocus::Agents, "context" | "session" => SidebarFocus::Context, _ => { return CommandResult::error( - "Usage: /sidebar [on|off|auto|work|tasks|agents|context] [--save]", + "Usage: /sidebar [on|off|pinned|auto|tasks|agents|context] [--save]", ); } }, _ => { return CommandResult::error( - "Usage: /sidebar [on|off|auto|work|tasks|agents|context] [--save]", + "Usage: /sidebar [on|off|pinned|auto|tasks|agents|context] [--save]", ); } }; @@ -370,15 +385,23 @@ pub fn sidebar(app: &mut App, arg: Option<&str>) -> CommandResult { } app.needs_redraw = true; - let message = sidebar_status_message(target).to_string(); + let message = sidebar_status_message(app); CommandResult::message(message) } -fn sidebar_status_message(focus: SidebarFocus) -> &'static str { - if focus == SidebarFocus::Hidden { - "Sidebar is hidden" - } else { - "Sidebar is visible" +fn sidebar_status_message(app: &mut App) -> String { + match sidebar_render_state(app) { + SidebarRenderState::Hidden => "Sidebar is hidden".to_string(), + SidebarRenderState::SuppressedByWidth { + available_width, + min_width, + } => format!( + "Sidebar is on, but hidden because the terminal is too narrow ({available_width} cols; needs at least {min_width})" + ), + SidebarRenderState::AutoCollapsed => { + "Sidebar auto mode is on, but currently collapsed while idle".to_string() + } + SidebarRenderState::Visible => "Sidebar is visible".to_string(), } } @@ -427,9 +450,460 @@ fn stream_chunk_timeout_value_label(raw: u64, resolved: u64) -> String { } } +fn subagents_config_command(app: &mut App, raw: &str) -> CommandResult { + let mut tokens = raw.split_whitespace().collect::>(); + let persist = matches!(tokens.last(), Some(&"--save" | &"-s")); + if persist { + tokens.pop(); + } + + match tokens.as_slice() { + [] | ["status"] => subagents_status(app), + ["on"] | ["enable"] | ["enabled"] => { + set_subagents_config_value(app, "enabled", "true", persist) + } + ["off"] | ["disable"] | ["disabled"] => { + set_subagents_config_value(app, "enabled", "false", persist) + } + [key] => show_subagents_setting(app, key), + [key, value] => set_subagents_config_value(app, key, value, persist), + _ => CommandResult::error( + "Usage: /config subagents [status|on|off|enabled|max_concurrent|max_depth|launch_concurrency|api_timeout_secs|heartbeat_timeout_secs ] [--save]", + ), + } +} + +fn load_command_config(app: &App) -> Result { + Config::load(app.config_path.clone(), app.config_profile.as_deref()) + .map_err(|err| format!("Failed to load config: {err}")) +} + +fn subagents_status(app: &App) -> CommandResult { + let config = match load_command_config(app) { + Ok(config) => config, + Err(err) => return CommandResult::error(err), + }; + let path = crate::config_persistence::config_toml_path(app.config_path.as_deref()) + .map(|path| path.display().to_string()) + .unwrap_or_else(|_| "(unresolved)".to_string()); + let disabled_reason = config.subagents_disabled_reason(); + let active_provider = app.api_provider; + let subagents = config.subagents.as_ref(); + let provider_subagents = config.subagent_provider_config(active_provider); + let explicit_enabled = subagents.and_then(|cfg| cfg.enabled); + let raw_max_concurrent = subagents.and_then(|cfg| cfg.max_concurrent); + let raw_max_depth = subagents.and_then(|cfg| cfg.max_depth); + let raw_launch = subagents.and_then(|cfg| cfg.launch_concurrency); + let raw_api = subagents.and_then(|cfg| cfg.api_timeout_secs); + let raw_heartbeat = subagents.and_then(|cfg| cfg.heartbeat_timeout_secs); + let mut lines = Vec::new(); + lines.push(format!( + "Sub-agents: {}", + disabled_reason + .map(|reason| format!("disabled ({reason})")) + .unwrap_or_else(|| "enabled".to_string()) + )); + lines.push(format!("Config path: {path}")); + lines.push(format!( + "Active provider: {} ({})", + active_provider.as_str(), + active_provider.display_name() + )); + lines.push(format!( + "subagents.enabled = {}", + explicit_enabled + .map(|value| value.to_string()) + .unwrap_or_else(|| "default true".to_string()) + )); + lines.push(format!( + "subagents.max_concurrent = {} (resolved global {}; active provider {})", + option_display(raw_max_concurrent), + config.max_subagents(), + config.max_subagents_for_provider(active_provider) + )); + lines.push(format!( + "subagents.max_depth = {} (resolved global {}; active provider {})", + option_display(raw_max_depth), + config.subagent_max_spawn_depth(), + config.subagent_max_spawn_depth_for_provider(active_provider) + )); + lines.push(format!( + "subagents.launch_concurrency = {} (resolved global {}; active provider {})", + option_display(raw_launch), + config.launch_concurrency(), + config.launch_concurrency_for_provider(active_provider) + )); + lines.push(format!( + "subagents.api_timeout_secs = {} (resolved global {}; active provider {})", + option_display(raw_api), + config.subagent_api_timeout_secs(), + config.subagent_api_timeout_secs_for_provider(active_provider) + )); + lines.push(format!( + "subagents.heartbeat_timeout_secs = {} (resolved global {}; active provider {})", + option_display(raw_heartbeat), + config.subagent_heartbeat_timeout_secs(), + config.subagent_heartbeat_timeout_secs_for_provider(active_provider) + )); + if let Some(provider_subagents) = provider_subagents { + lines.push(format!( + "subagents.providers.{}.enabled = {}", + active_provider.as_str(), + provider_subagents + .enabled + .map(|value| value.to_string()) + .unwrap_or_else(|| "inherits".to_string()) + )); + lines.push(format!( + "subagents.providers.{}.max_concurrent = {}", + active_provider.as_str(), + option_display(provider_subagents.max_concurrent) + )); + lines.push(format!( + "subagents.providers.{}.max_depth = {}", + active_provider.as_str(), + option_display(provider_subagents.max_depth) + )); + lines.push(format!( + "subagents.providers.{}.launch_concurrency = {}", + active_provider.as_str(), + option_display(provider_subagents.launch_concurrency) + )); + lines.push(format!( + "subagents.providers.{}.max_admitted = {}", + active_provider.as_str(), + option_display(provider_subagents.max_admitted) + )); + } else { + lines.push(format!( + "subagents.providers.{} = inherits global", + active_provider.as_str() + )); + } + CommandResult::message(lines.join("\n")) +} + +fn show_subagents_setting(app: &App, key: &str) -> CommandResult { + let config = match load_command_config(app) { + Ok(config) => config, + Err(err) => return CommandResult::error(err), + }; + let Some(key) = canonical_subagents_key(key) else { + return CommandResult::error(format!( + "Unknown subagents setting '{key}'. Use `/config subagents status`." + )); + }; + let active_provider = app.api_provider; + let subagents = config.subagents.as_ref(); + let value = match key { + "enabled" => subagents + .and_then(|cfg| cfg.enabled) + .map(|value| value.to_string()) + .unwrap_or_else(|| "default true".to_string()), + "max_concurrent" => format!( + "{} (resolved global {}; active provider {})", + option_display(subagents.and_then(|cfg| cfg.max_concurrent)), + config.max_subagents(), + config.max_subagents_for_provider(active_provider) + ), + "max_depth" => format!( + "{} (resolved global {}; active provider {})", + option_display(subagents.and_then(|cfg| cfg.max_depth)), + config.subagent_max_spawn_depth(), + config.subagent_max_spawn_depth_for_provider(active_provider) + ), + "launch_concurrency" => format!( + "{} (resolved global {}; active provider {})", + option_display(subagents.and_then(|cfg| cfg.launch_concurrency)), + config.launch_concurrency(), + config.launch_concurrency_for_provider(active_provider) + ), + "api_timeout_secs" => format!( + "{} (resolved global {}; active provider {})", + option_display(subagents.and_then(|cfg| cfg.api_timeout_secs)), + config.subagent_api_timeout_secs(), + config.subagent_api_timeout_secs_for_provider(active_provider) + ), + "heartbeat_timeout_secs" => format!( + "{} (resolved global {}; active provider {})", + option_display(subagents.and_then(|cfg| cfg.heartbeat_timeout_secs)), + config.subagent_heartbeat_timeout_secs(), + config.subagent_heartbeat_timeout_secs_for_provider(active_provider) + ), + _ => unreachable!("canonical subagent key"), + }; + CommandResult::message(format!("subagents.{key} = {value}")) +} + +fn option_display(value: Option) -> String { + value + .map(|value| value.to_string()) + .unwrap_or_else(|| "default".to_string()) +} + +fn canonical_subagents_key(key: &str) -> Option<&'static str> { + let normalized = key.trim().to_ascii_lowercase(); + let key = normalized + .strip_prefix("subagents.") + .unwrap_or(normalized.as_str()); + match key { + "enabled" | "enable" => Some("enabled"), + "max_concurrent" | "max_subagents" | "concurrency" | "cap" => Some("max_concurrent"), + "max_depth" | "depth" | "spawn_depth" => Some("max_depth"), + "launch_concurrency" | "launches" | "launch" => Some("launch_concurrency"), + "api_timeout_secs" | "api_timeout" | "step_timeout_secs" => Some("api_timeout_secs"), + "heartbeat_timeout_secs" | "heartbeat_timeout" | "heartbeat" => { + Some("heartbeat_timeout_secs") + } + _ => None, + } +} + +fn set_subagents_config_value( + app: &mut App, + key: &str, + value: &str, + persist: bool, +) -> CommandResult { + let Some(key) = canonical_subagents_key(key) else { + return CommandResult::error(format!( + "Unknown subagents setting '{key}'. Use `/config subagents status`." + )); + }; + let mut config = match load_command_config(app) { + Ok(config) => config, + Err(err) => return CommandResult::error(err), + }; + let current_max_subagents = config.max_subagents() as u64; + let subagents = config + .subagents + .get_or_insert_with(SubagentsConfig::default); + + let mut note = None; + let save_result = match key { + "enabled" => { + let enabled = match parse_config_bool(value) { + Ok(enabled) => enabled, + Err(err) => return CommandResult::error(err), + }; + subagents.enabled = Some(enabled); + if persist { + Some(persist_subagents_bool_key( + app.config_path.as_deref(), + "enabled", + enabled, + )) + } else { + None + } + } + "max_concurrent" => { + let raw = match parse_subagents_u64(key, value) { + Ok(raw) => raw, + Err(err) => return CommandResult::error(err), + }; + let clamped = raw.min(MAX_SUBAGENTS as u64); + if clamped != raw { + note = Some(format!("clamped from {raw} to {clamped}")); + } + subagents.max_concurrent = Some(clamped as usize); + if persist { + Some(persist_subagents_integer_key( + app.config_path.as_deref(), + "max_concurrent", + clamped, + )) + } else { + None + } + } + "max_depth" => { + let raw = match parse_subagents_u64(key, value) { + Ok(raw) => raw, + Err(err) => return CommandResult::error(err), + }; + let ceiling = u64::from(codewhale_config::MAX_SPAWN_DEPTH_CEILING); + let clamped = raw.min(ceiling); + if clamped != raw { + note = Some(format!("clamped from {raw} to {clamped}")); + } + subagents.max_depth = Some(clamped as u32); + if persist { + Some(persist_subagents_integer_key( + app.config_path.as_deref(), + "max_depth", + clamped, + )) + } else { + None + } + } + "launch_concurrency" => { + let raw = match parse_subagents_u64(key, value) { + Ok(raw) => raw, + Err(err) => return CommandResult::error(err), + }; + let clamped = raw.clamp(1, current_max_subagents); + if clamped != raw { + note = Some(format!("clamped from {raw} to {clamped}")); + } + subagents.launch_concurrency = Some(clamped as usize); + if persist { + Some(persist_subagents_integer_key( + app.config_path.as_deref(), + "launch_concurrency", + clamped, + )) + } else { + None + } + } + "api_timeout_secs" => { + let raw = match parse_subagents_u64(key, value) { + Ok(raw) => raw, + Err(err) => return CommandResult::error(err), + }; + let stored = if raw == 0 { + 0 + } else { + raw.clamp(MIN_SUBAGENT_API_TIMEOUT_SECS, MAX_SUBAGENT_API_TIMEOUT_SECS) + }; + if stored != raw { + note = Some(format!("clamped from {raw} to {stored}")); + } + subagents.api_timeout_secs = Some(stored); + if persist { + Some(persist_subagents_integer_key( + app.config_path.as_deref(), + "api_timeout_secs", + stored, + )) + } else { + None + } + } + "heartbeat_timeout_secs" => { + let raw = match parse_subagents_u64(key, value) { + Ok(raw) => raw, + Err(err) => return CommandResult::error(err), + }; + let stored = if raw == 0 { + 0 + } else { + raw.clamp( + MIN_SUBAGENT_HEARTBEAT_TIMEOUT_SECS, + MAX_SUBAGENT_HEARTBEAT_TIMEOUT_SECS, + ) + }; + if stored != raw { + note = Some(format!("clamped from {raw} to {stored}")); + } + subagents.heartbeat_timeout_secs = Some(stored); + if persist { + Some(persist_subagents_integer_key( + app.config_path.as_deref(), + "heartbeat_timeout_secs", + stored, + )) + } else { + None + } + } + _ => unreachable!("canonical subagent key"), + }; + + let save_suffix = if let Some(result) = save_result { + match result { + Ok(path) => format!("saved to {}", path.display()), + Err(err) => return CommandResult::error(format!("Failed to save: {err}")), + } + } else { + "session only, add --save to persist".to_string() + }; + + if key == "max_concurrent" { + app.max_subagents = config.max_subagents_for_provider(app.api_provider); + } + let display_value = subagents_config_display_value(&config, key); + let note = note.map(|note| format!("; {note}")).unwrap_or_default(); + CommandResult::with_message_and_action( + format!( + "subagents.{key} = {display_value} ({save_suffix}; runtime updated for subsequent turns{note})" + ), + subagents_runtime_action(app, &config), + ) +} + +fn parse_subagents_u64(key: &str, value: &str) -> Result { + value + .trim() + .parse::() + .map_err(|_| format!("subagents.{key} must be a whole number")) +} + +fn subagents_config_display_value(config: &Config, key: &str) -> String { + let subagents = config.subagents.as_ref(); + match key { + "enabled" => subagents + .and_then(|cfg| cfg.enabled) + .map(|value| value.to_string()) + .unwrap_or_else(|| "default true".to_string()), + "max_concurrent" => { + if subagents.and_then(|cfg| cfg.max_concurrent) == Some(0) { + "0 (disabled)".to_string() + } else { + config.max_subagents().to_string() + } + } + "max_depth" => { + if subagents.and_then(|cfg| cfg.max_depth) == Some(0) { + "0 (agent tool disabled)".to_string() + } else { + config.subagent_max_spawn_depth().to_string() + } + } + "launch_concurrency" => config.launch_concurrency().to_string(), + "api_timeout_secs" => { + let raw = subagents.and_then(|cfg| cfg.api_timeout_secs); + if raw == Some(0) { + format!("0 (default {DEFAULT_SUBAGENT_API_TIMEOUT_SECS})") + } else { + config.subagent_api_timeout_secs().to_string() + } + } + "heartbeat_timeout_secs" => { + let raw = subagents.and_then(|cfg| cfg.heartbeat_timeout_secs); + if raw == Some(0) { + format!("0 (default {DEFAULT_SUBAGENT_HEARTBEAT_TIMEOUT_SECS})") + } else { + config.subagent_heartbeat_timeout_secs().to_string() + } + } + _ => unreachable!("canonical subagent key"), + } +} + +fn subagents_runtime_action(app: &App, config: &Config) -> AppAction { + let provider = app.api_provider; + let max_subagents = config + .max_subagents_for_provider(provider) + .clamp(1, MAX_SUBAGENTS); + AppAction::UpdateSubagentRuntimeConfig { + enabled: config.subagents_enabled_for_provider(provider), + max_subagents, + launch_concurrency: config.launch_concurrency_for_provider(provider), + max_spawn_depth: config.subagent_max_spawn_depth_for_provider(provider), + api_timeout_secs: config.subagent_api_timeout_secs_for_provider(provider), + heartbeat_timeout_secs: config.subagent_heartbeat_timeout_secs_for_provider(provider), + } +} + /// Modify a setting at runtime pub fn set_config_value(app: &mut App, key: &str, value: &str, persist: bool) -> CommandResult { let key = key.to_lowercase(); + if let Some(subagent_key) = key.strip_prefix("subagents.") { + return set_subagents_config_value(app, subagent_key, value, persist); + } match key.as_str() { "model" => { @@ -1296,6 +1770,53 @@ mod tests { app } + #[test] + fn sidebar_config_command_restores_pinned_sidebar_by_default() { + let mut app = create_test_app(); + app.sidebar_focus = SidebarFocus::Hidden; + app.last_sidebar_host_width = Some(120); + + let result = sidebar(&mut app, Some("on")); + + assert!(!result.is_error); + assert_eq!(app.sidebar_focus, SidebarFocus::Pinned); + assert_eq!(result.message.as_deref(), Some("Sidebar is visible")); + } + + #[test] + fn sidebar_config_command_reports_width_suppression() { + let mut app = create_test_app(); + app.sidebar_focus = SidebarFocus::Hidden; + app.last_sidebar_host_width = Some(80); + + let result = sidebar(&mut app, Some("on")); + + assert!(!result.is_error); + assert_eq!(app.sidebar_focus, SidebarFocus::Pinned); + assert_eq!( + result.message.as_deref(), + Some( + "Sidebar is on, but hidden because the terminal is too narrow (80 cols; needs at least 100)" + ) + ); + } + + #[test] + fn sidebar_config_command_reports_auto_idle_collapse() { + let mut app = create_test_app(); + app.sidebar_focus = SidebarFocus::Hidden; + app.last_sidebar_host_width = Some(120); + + let result = sidebar(&mut app, Some("auto")); + + assert!(!result.is_error); + assert_eq!(app.sidebar_focus, SidebarFocus::Auto); + assert_eq!( + result.message.as_deref(), + Some("Sidebar auto mode is on, but currently collapsed while idle") + ); + } + #[test] fn test_mode_yolo_sets_all_flags() { let mut app = create_test_app(); @@ -1651,6 +2172,111 @@ mod tests { assert!(msg.contains("Failed to parse boolean 'maybe'")); } + #[test] + fn config_command_subagents_off_save_persists_and_updates_runtime() { + let temp_root = env::temp_dir().join(format!( + "codewhale-subagents-off-save-test-{}", + std::process::id() + )); + fs::create_dir_all(&temp_root).unwrap(); + let config_path = temp_root.join("custom-config.toml"); + + let mut app = create_test_app(); + app.config_path = Some(config_path.clone()); + let result = config_command(&mut app, Some("subagents off --save")); + let msg = result.message.unwrap(); + let saved = fs::read_to_string(&config_path).unwrap(); + + assert!(!result.is_error); + assert!(msg.contains("subagents.enabled = false")); + assert!(msg.contains("saved to")); + assert!(saved.contains("[subagents]")); + assert!(saved.contains("enabled = false")); + match result.action { + Some(AppAction::UpdateSubagentRuntimeConfig { enabled, .. }) => { + assert!(!enabled); + } + other => panic!("expected subagent runtime update, got {other:?}"), + } + } + + #[test] + fn config_command_subagents_depth_save_clamps_to_ceiling() { + let temp_root = env::temp_dir().join(format!( + "codewhale-subagents-depth-save-test-{}", + std::process::id() + )); + fs::create_dir_all(&temp_root).unwrap(); + let config_path = temp_root.join("custom-config.toml"); + + let mut app = create_test_app(); + app.config_path = Some(config_path.clone()); + let result = config_command(&mut app, Some("subagents max_depth 99 --save")); + let msg = result.message.unwrap(); + let saved = fs::read_to_string(&config_path).unwrap(); + let ceiling = codewhale_config::MAX_SPAWN_DEPTH_CEILING; + + assert!(!result.is_error); + assert!(msg.contains(&format!("subagents.max_depth = {ceiling}"))); + assert!(msg.contains(&format!("clamped from 99 to {ceiling}"))); + assert!(saved.contains(&format!("max_depth = {ceiling}"))); + match result.action { + Some(AppAction::UpdateSubagentRuntimeConfig { + max_spawn_depth, .. + }) => { + assert_eq!(max_spawn_depth, ceiling); + } + other => panic!("expected subagent runtime update, got {other:?}"), + } + } + + #[test] + fn config_command_subagents_status_shows_raw_and_resolved_values() { + let temp_root = env::temp_dir().join(format!( + "codewhale-subagents-status-test-{}", + std::process::id() + )); + fs::create_dir_all(&temp_root).unwrap(); + let config_path = temp_root.join("custom-config.toml"); + fs::write( + &config_path, + r#" +[subagents] +enabled = true +max_concurrent = 2 +max_depth = 0 +launch_concurrency = 5 +api_timeout_secs = 0 +heartbeat_timeout_secs = 1 +"#, + ) + .unwrap(); + + let mut app = create_test_app(); + app.config_path = Some(config_path); + let result = config_command(&mut app, Some("subagents status")); + let msg = result.message.unwrap(); + + assert!(!result.is_error); + assert!(msg.contains("Sub-agents: disabled (subagents.max_depth=0)")); + assert!(msg.contains("Active provider: deepseek")); + assert!( + msg.contains("subagents.max_concurrent = 2 (resolved global 2; active provider 2)") + ); + assert!( + msg.contains("subagents.launch_concurrency = 5 (resolved global 2; active provider 2)") + ); + assert!( + msg.contains( + "subagents.api_timeout_secs = 0 (resolved global 120; active provider 120)" + ) + ); + assert!(msg.contains( + "subagents.heartbeat_timeout_secs = 1 (resolved global 150; active provider 150)" + )); + assert!(msg.contains("subagents.providers.deepseek = inherits global")); + } + #[test] fn config_command_base_url_without_save_requires_save() { let _lock = lock_test_env(); diff --git a/crates/tui/src/commands/groups/core/acceptance.rs b/crates/tui/src/commands/groups/core/acceptance.rs new file mode 100644 index 0000000000..c2dfa21695 --- /dev/null +++ b/crates/tui/src/commands/groups/core/acceptance.rs @@ -0,0 +1,198 @@ +//! Gherkin acceptance coverage for visible core command surfaces. + +use cucumber::{World as _, given, then, when, writer::Stats as _}; +use tempfile::TempDir; + +use crate::commands::{self, CommandResult}; +use crate::config::{ApiProvider, Config}; +use crate::test_support::{EnvVarGuard, lock_test_env}; +use crate::tui::app::{App, TuiOptions}; +use crate::tui::history::HistoryCell; + +const FEATURE_NAME: &str = "Core command visible surfaces"; +const FEATURE_PATH: &str = concat!( + env!("CARGO_MANIFEST_DIR"), + "/tests/features/core_command_surfaces.feature" +); +const INFORMATIONAL_SCENARIO: &str = + "Core informational commands write visible transcript messages"; +const STATE_SCENARIO: &str = "Core state commands report visible changes"; +const CLEAR_SCENARIO: &str = "Clear replaces prior transcript with visible confirmation"; +const PERSISTENT_WORK_SCENARIO: &str = "Persistent work commands report visible dispatch requests"; + +#[derive(Default, cucumber::World)] +struct CoreCommandWorld { + tmpdir: Option, + app: Option>, + home_path: Option, + last_message: Option, + last_result_is_error: Option, +} + +impl std::fmt::Debug for CoreCommandWorld { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CoreCommandWorld") + .field("has_tmpdir", &self.tmpdir.is_some()) + .field("has_app", &self.app.is_some()) + .field("home_path", &self.home_path) + .field("last_message", &self.last_message) + .field("last_result_is_error", &self.last_result_is_error) + .finish() + } +} + +#[given("a CodeWhale core command workspace")] +fn core_command_workspace(world: &mut CoreCommandWorld) { + let tmpdir = TempDir::new().expect("core command TempDir"); + let mut app = create_test_app_with_tmpdir(&tmpdir); + app.ui_locale = crate::localization::Locale::En; + app.api_provider = ApiProvider::Deepseek; + app.model = "deepseek-v4-pro".to_string(); + app.auto_model = false; + app.model_ids_passthrough = false; + + world.home_path = Some(tmpdir.path().join("home")); + world.app = Some(Box::new(app)); + world.tmpdir = Some(tmpdir); +} + +#[given("a CodeWhale core command workspace with one visible user message")] +fn core_command_workspace_with_one_visible_user_message(world: &mut CoreCommandWorld) { + core_command_workspace(world); + let app = world.app.as_deref_mut().expect("app should exist"); + app.add_message(HistoryCell::User { + content: "Remember the whale migration".to_string(), + }); +} + +#[when(regex = r#"^the user runs the core command "([^"]+)"$"#)] +fn user_runs_core_command(world: &mut CoreCommandWorld, command: String) { + let result = execute_isolated(world, &command); + record_visible_result(world, result); +} + +#[then(regex = r#"^the message window should include "([^"]+)"$"#)] +fn message_window_should_include(world: &mut CoreCommandWorld, expected: String) { + let visible = visible_message_window(world); + + assert!( + visible.contains(&expected), + "message window should include {expected:?}\nvisible transcript:\n{visible}" + ); +} + +#[then(regex = r#"^the message window should not include "([^"]+)"$"#)] +fn message_window_should_not_include(world: &mut CoreCommandWorld, forbidden: String) { + let visible = visible_message_window(world); + + assert!( + !visible.contains(&forbidden), + "message window should not include {forbidden:?}\nvisible transcript:\n{visible}" + ); +} + +#[tokio::test(flavor = "current_thread")] +async fn core_informational_commands_write_visible_transcript_messages() { + run_scenario(INFORMATIONAL_SCENARIO, 11).await; +} + +#[tokio::test(flavor = "current_thread")] +async fn core_state_commands_report_visible_changes() { + run_scenario(STATE_SCENARIO, 8).await; +} + +#[tokio::test(flavor = "current_thread")] +async fn clear_replaces_prior_transcript_with_visible_confirmation() { + run_scenario(CLEAR_SCENARIO, 4).await; +} + +#[tokio::test(flavor = "current_thread")] +async fn persistent_work_commands_report_visible_dispatch_requests() { + run_scenario(PERSISTENT_WORK_SCENARIO, 7).await; +} + +async fn run_scenario(name: &'static str, expected_steps: usize) { + let writer = CoreCommandWorld::cucumber() + .fail_on_skipped() + .with_default_cli() + .filter_run(FEATURE_PATH, move |feature, _, scenario| { + feature.name == FEATURE_NAME && scenario.name == name + }) + .await; + assert_eq!(writer.failed_steps(), 0, "scenario failed: {name}"); + assert_eq!(writer.skipped_steps(), 0, "scenario skipped steps: {name}"); + assert_eq!( + writer.passed_steps(), + expected_steps, + "scenario did not run: {name}" + ); +} + +fn create_test_app_with_tmpdir(tmpdir: &TempDir) -> App { + let options = TuiOptions { + model: "deepseek-v4-pro".to_string(), + workspace: tmpdir.path().to_path_buf(), + config_path: None, + config_profile: None, + allow_shell: false, + use_alt_screen: true, + use_mouse_capture: false, + use_bracketed_paste: true, + max_subagents: 1, + skills_dir: tmpdir.path().join("skills"), + memory_path: tmpdir.path().join("memory.md"), + notes_path: tmpdir.path().join("notes.txt"), + mcp_config_path: tmpdir.path().join("mcp.json"), + use_memory: false, + start_in_agent_mode: false, + skip_onboarding: true, + yolo: false, + resume_session_id: None, + initial_input: None, + }; + App::new(options, &Config::default()) +} + +fn execute_isolated(world: &mut CoreCommandWorld, command: &str) -> CommandResult { + let home = world + .home_path + .as_ref() + .expect("test home should exist") + .clone(); + std::fs::create_dir_all(&home).expect("create isolated test home"); + + let _lock = lock_test_env(); + let _home = EnvVarGuard::set("HOME", &home); + let _codewhale_home = EnvVarGuard::set("CODEWHALE_HOME", home.join(".codewhale")); + + let app = world.app.as_deref_mut().expect("app should exist"); + commands::user_registry::reload(Some(&app.workspace)); + commands::execute(command, app) +} + +fn record_visible_result(world: &mut CoreCommandWorld, result: CommandResult) { + world.last_result_is_error = Some(result.is_error); + world.last_message = result.message.clone(); + + if let Some(message) = result.message { + let app = world.app.as_deref_mut().expect("app should exist"); + app.add_message(HistoryCell::System { content: message }); + } +} + +fn visible_message_window(world: &CoreCommandWorld) -> String { + let app = world.app.as_deref().expect("app should exist"); + app.history + .iter() + .filter_map(|cell| match cell { + HistoryCell::User { content } + | HistoryCell::Assistant { content, .. } + | HistoryCell::System { content } + | HistoryCell::Thinking { content, .. } => Some(content.as_str()), + HistoryCell::Error { message, .. } => Some(message.as_str()), + HistoryCell::ArchivedContext { summary, .. } => Some(summary.as_str()), + HistoryCell::Tool(_) | HistoryCell::SubAgent(_) => None, + }) + .collect::>() + .join("\n") +} diff --git a/crates/tui/src/commands/groups/core/agent.rs b/crates/tui/src/commands/groups/core/agent.rs new file mode 100644 index 0000000000..6714de1355 --- /dev/null +++ b/crates/tui/src/commands/groups/core/agent.rs @@ -0,0 +1,49 @@ +//! `/agent` command. + +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::{App, AppAction}; + +use super::CommandResult; + +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "agent", + aliases: &["daili"], + usage: "/agent [N] ", + description_id: MessageId::CmdAgentDescription, +}; + +pub(in crate::commands) struct AgentCmd; + +impl RegisterCommand for AgentCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, arg: Option<&str>) -> CommandResult { + agent(app, arg) + } +} + +pub fn agent(_app: &mut App, arg: Option<&str>) -> CommandResult { + let (max_depth, task) = match super::util::parse_depth_prefixed_arg(arg, 1) { + Ok(parsed) => parsed, + Err(message) => return CommandResult::error(message), + }; + let task = match task { + Some(task) if !task.trim().is_empty() => task.trim().to_string(), + _ => { + return CommandResult::error( + "Usage: /agent [N] \n\n\ + Opens a persistent sub-agent session with recursive agent depth N (0-3, default 1).", + ); + } + }; + let message = format!( + "Launch one sub-agent for this task by calling `agent` with name `slash_agent`, `prompt: {task:?}`, and `max_depth: {max_depth}`. Use `handle_read` on the returned transcript_handle if you need more detail. Verify any claimed side effects before reporting success." + ); + CommandResult::with_message_and_action( + format!("Opening persistent sub-agent at depth {max_depth}..."), + AppAction::SendMessage(message), + ) +} diff --git a/crates/tui/src/commands/groups/core/anchor.rs b/crates/tui/src/commands/groups/core/anchor.rs index 7ba66d7a1a..f47fe3f7f1 100644 --- a/crates/tui/src/commands/groups/core/anchor.rs +++ b/crates/tui/src/commands/groups/core/anchor.rs @@ -5,14 +5,36 @@ //! preserve invariants like "This API's status field is unreliable" or //! ".ssh/ must never be touched". -use crate::tui::app::App; use std::fs; use std::io::Write; +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::App; + use super::CommandResult; const USAGE: &str = "/anchor | /anchor list | /anchor remove "; +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "anchor", + aliases: &["maodian"], + usage: USAGE, + description_id: MessageId::CmdAnchorDescription, +}; + +pub(in crate::commands) struct AnchorCmd; + +impl RegisterCommand for AnchorCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, arg: Option<&str>) -> CommandResult { + anchor(app, arg) + } +} + /// Handle the `/anchor` command with subcommands: /// - `/anchor ` — add a new anchor /// - `/anchor list` — list all anchors diff --git a/crates/tui/src/commands/groups/core/clear.rs b/crates/tui/src/commands/groups/core/clear.rs new file mode 100644 index 0000000000..46666df325 --- /dev/null +++ b/crates/tui/src/commands/groups/core/clear.rs @@ -0,0 +1,26 @@ +//! `/clear` command. + +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::App; + +use super::CommandResult; + +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "clear", + aliases: &["qingping"], + usage: "/clear", + description_id: MessageId::CmdClearDescription, +}; + +pub(in crate::commands) struct ClearCmd; + +impl RegisterCommand for ClearCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, _arg: Option<&str>) -> CommandResult { + super::core::clear(app) + } +} diff --git a/crates/tui/src/commands/groups/core/exit.rs b/crates/tui/src/commands/groups/core/exit.rs new file mode 100644 index 0000000000..30c8491f7d --- /dev/null +++ b/crates/tui/src/commands/groups/core/exit.rs @@ -0,0 +1,26 @@ +//! `/exit` command. + +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::App; + +use super::CommandResult; + +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "exit", + aliases: &["quit", "q", "tuichu"], + usage: "/exit", + description_id: MessageId::CmdExitDescription, +}; + +pub(in crate::commands) struct ExitCmd; + +impl RegisterCommand for ExitCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(_app: &mut App, _arg: Option<&str>) -> CommandResult { + super::core::exit() + } +} diff --git a/crates/tui/src/commands/groups/core/feedback.rs b/crates/tui/src/commands/groups/core/feedback.rs index fc968c73a0..c8f27ca254 100644 --- a/crates/tui/src/commands/groups/core/feedback.rs +++ b/crates/tui/src/commands/groups/core/feedback.rs @@ -1,8 +1,29 @@ use super::CommandResult; +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; use crate::tui::app::{App, AppAction}; const SECURITY_POLICY_URL: &str = "https://github.com/Hmbown/CodeWhale/security/policy"; +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "feedback", + aliases: &[], + usage: "/feedback [bug|feature|security]", + description_id: MessageId::CmdFeedbackDescription, +}; + +pub(in crate::commands) struct FeedbackCmd; + +impl RegisterCommand for FeedbackCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, arg: Option<&str>) -> CommandResult { + feedback(app, arg) + } +} + pub fn feedback(_app: &mut App, arg: Option<&str>) -> CommandResult { let raw = arg.map(str::trim).unwrap_or(""); if raw.is_empty() { diff --git a/crates/tui/src/commands/groups/core/help.rs b/crates/tui/src/commands/groups/core/help.rs new file mode 100644 index 0000000000..d15589ffee --- /dev/null +++ b/crates/tui/src/commands/groups/core/help.rs @@ -0,0 +1,26 @@ +//! `/help` command. + +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::App; + +use super::CommandResult; + +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "help", + aliases: &["?", "bangzhu", "帮助"], + usage: "/help [command]", + description_id: MessageId::CmdHelpDescription, +}; + +pub(in crate::commands) struct HelpCmd; + +impl RegisterCommand for HelpCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, arg: Option<&str>) -> CommandResult { + super::core::help(app, arg) + } +} diff --git a/crates/tui/src/commands/groups/core/hf.rs b/crates/tui/src/commands/groups/core/hf.rs index 0d2a7230e6..9934ca1036 100644 --- a/crates/tui/src/commands/groups/core/hf.rs +++ b/crates/tui/src/commands/groups/core/hf.rs @@ -1,10 +1,31 @@ //! `/hf` - Hugging Face MCP and provider concept helpers. +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; use crate::mcp::{McpConfig, McpServerConfig}; use crate::tui::app::App; use super::CommandResult; +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "hf", + aliases: &["huggingface"], + usage: "/hf [mcp |concepts]", + description_id: MessageId::CmdHfDescription, +}; + +pub(in crate::commands) struct HfCmd; + +impl RegisterCommand for HfCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, arg: Option<&str>) -> CommandResult { + hf(app, arg) + } +} + const HF_MCP_SETTINGS_URL: &str = "https://huggingface.co/settings/mcp"; const HF_MCP_DOCS_URL: &str = "https://huggingface.co/docs/hub/hf-mcp-server"; const HF_MCP_SERVER_URL: &str = "https://huggingface.co/mcp"; diff --git a/crates/tui/src/commands/groups/core/home.rs b/crates/tui/src/commands/groups/core/home.rs new file mode 100644 index 0000000000..0900c97690 --- /dev/null +++ b/crates/tui/src/commands/groups/core/home.rs @@ -0,0 +1,26 @@ +//! `/home` command. + +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::App; + +use super::CommandResult; + +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "home", + aliases: &["stats", "overview", "zhuye", "shouye"], + usage: "/home", + description_id: MessageId::CmdHomeDescription, +}; + +pub(in crate::commands) struct HomeCmd; + +impl RegisterCommand for HomeCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, _arg: Option<&str>) -> CommandResult { + super::core::home_dashboard(app) + } +} diff --git a/crates/tui/src/commands/groups/core/hooks.rs b/crates/tui/src/commands/groups/core/hooks.rs index d01a52ca49..e4beaeadce 100644 --- a/crates/tui/src/commands/groups/core/hooks.rs +++ b/crates/tui/src/commands/groups/core/hooks.rs @@ -6,11 +6,32 @@ //! actually configured in `~/.codewhale/config.toml`'s `[hooks]` //! table — the most-asked question once hooks start firing. +use crate::commands::traits::{CommandInfo, RegisterCommand}; use crate::hooks::HookEvent; +use crate::localization::MessageId; use crate::tui::app::App; use super::CommandResult; +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "hooks", + aliases: &["hook", "gouzi"], + usage: "/hooks [list|events]", + description_id: MessageId::CmdHooksDescription, +}; + +pub(in crate::commands) struct HooksCmd; + +impl RegisterCommand for HooksCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, arg: Option<&str>) -> CommandResult { + hooks(app, arg) + } +} + /// Top-level dispatch for `/hooks`. Subcommands: /// /// * `/hooks` — same as `/hooks list`. diff --git a/crates/tui/src/commands/groups/core/links.rs b/crates/tui/src/commands/groups/core/links.rs new file mode 100644 index 0000000000..473016a8fd --- /dev/null +++ b/crates/tui/src/commands/groups/core/links.rs @@ -0,0 +1,26 @@ +//! `/links` command. + +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::App; + +use super::CommandResult; + +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "links", + aliases: &["dashboard", "api", "lianjie"], + usage: "/links", + description_id: MessageId::CmdLinksDescription, +}; + +pub(in crate::commands) struct LinksCmd; + +impl RegisterCommand for LinksCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, _arg: Option<&str>) -> CommandResult { + super::core::deepseek_links(app) + } +} diff --git a/crates/tui/src/commands/groups/core/mod.rs b/crates/tui/src/commands/groups/core/mod.rs index 0151e72ec4..5eff7fd844 100644 --- a/crates/tui/src/commands/groups/core/mod.rs +++ b/crates/tui/src/commands/groups/core/mod.rs @@ -1,481 +1,140 @@ //! Core command area: model/provider selection, help, navigation, and the //! persistent RLM / sub-agent entry points. +#[cfg(all(test, feature = "long-running-tests"))] +mod acceptance; +mod agent; mod anchor; +mod clear; // This group dir intentionally has a `core.rs` child module with the same // name. The module_inception allow is a permanent structure rationale, not // migration scaffolding; see docs/architecture/command-dispatch.md. #[allow(clippy::module_inception)] mod core; +mod exit; mod feedback; +mod help; mod hf; +mod home; mod hooks; +mod links; +mod model; +mod models; +mod profile; mod provider; mod queue; +mod rlm; mod stash; +mod subagents; +mod swarm; +mod translate; +pub mod util; pub mod voice; +mod workspace; pub(in crate::commands) use self::core::reset_conversation_state; use crate::commands::CommandResult; -use crate::commands::traits::{Command, CommandGroup, CommandInfo, FunctionCommand}; -use crate::localization::MessageId; -use crate::tui::app::{App, AppAction}; +use crate::commands::traits::{Command, CommandGroup, FunctionCommand, RegisterCommand}; pub struct CoreCommands; impl CommandGroup for CoreCommands { fn commands(&self) -> Vec> { vec![ - Box::new(FunctionCommand::new(&ANCHOR_INFO, run_anchor)), - Box::new(FunctionCommand::new(&HELP_INFO, run_help)), - Box::new(FunctionCommand::new(&CLEAR_INFO, run_clear)), - Box::new(FunctionCommand::new(&EXIT_INFO, run_exit)), - Box::new(FunctionCommand::new(&MODEL_INFO, run_model)), - Box::new(FunctionCommand::new(&MODELS_INFO, run_models)), - Box::new(FunctionCommand::new(&PROVIDER_INFO, run_provider)), - Box::new(FunctionCommand::new(&QUEUE_INFO, run_queue)), - Box::new(FunctionCommand::new(&STASH_INFO, run_stash)), - Box::new(FunctionCommand::new(&HOOKS_INFO, run_hooks)), - Box::new(FunctionCommand::new(&SUBAGENTS_INFO, run_subagents)), - Box::new(FunctionCommand::new(&AGENT_INFO, run_agent)), - Box::new(FunctionCommand::new(&SWARM_INFO, run_swarm)), - Box::new(FunctionCommand::new(&LINKS_INFO, run_links)), - Box::new(FunctionCommand::new(&FEEDBACK_INFO, run_feedback)), - Box::new(FunctionCommand::new(&HF_INFO, run_hf)), - Box::new(FunctionCommand::new(&HOME_INFO, run_home)), - Box::new(FunctionCommand::new(&WORKSPACE_INFO, run_workspace)), - Box::new(FunctionCommand::new(&PROFILE_INFO, run_profile)), - Box::new(FunctionCommand::new(&RLM_INFO, run_rlm)), - Box::new(FunctionCommand::new(&TRANSLATE_INFO, run_translate)), - Box::new(FunctionCommand::new(&VOICE_INFO, run_voice)), - Box::new(FunctionCommand::new(&VOICE_SEND_INFO, run_voice_send)), - Box::new(FunctionCommand::new(&VOICE_CONTROL_INFO, run_voice_control)), + Box::new(FunctionCommand::new( + anchor::AnchorCmd::info(), + anchor::AnchorCmd::execute, + )), + Box::new(FunctionCommand::new( + help::HelpCmd::info(), + help::HelpCmd::execute, + )), + Box::new(FunctionCommand::new( + clear::ClearCmd::info(), + clear::ClearCmd::execute, + )), + Box::new(FunctionCommand::new( + exit::ExitCmd::info(), + exit::ExitCmd::execute, + )), + Box::new(FunctionCommand::new( + model::ModelCmd::info(), + model::ModelCmd::execute, + )), + Box::new(FunctionCommand::new( + models::ModelsCmd::info(), + models::ModelsCmd::execute, + )), + Box::new(FunctionCommand::new( + provider::ProviderCmd::info(), + provider::ProviderCmd::execute, + )), + Box::new(FunctionCommand::new( + queue::QueueCmd::info(), + queue::QueueCmd::execute, + )), + Box::new(FunctionCommand::new( + stash::StashCmd::info(), + stash::StashCmd::execute, + )), + Box::new(FunctionCommand::new( + hooks::HooksCmd::info(), + hooks::HooksCmd::execute, + )), + Box::new(FunctionCommand::new( + subagents::SubagentsCmd::info(), + subagents::SubagentsCmd::execute, + )), + Box::new(FunctionCommand::new( + agent::AgentCmd::info(), + agent::AgentCmd::execute, + )), + Box::new(FunctionCommand::new( + swarm::SwarmCmd::info(), + swarm::SwarmCmd::execute, + )), + Box::new(FunctionCommand::new( + links::LinksCmd::info(), + links::LinksCmd::execute, + )), + Box::new(FunctionCommand::new( + feedback::FeedbackCmd::info(), + feedback::FeedbackCmd::execute, + )), + Box::new(FunctionCommand::new(hf::HfCmd::info(), hf::HfCmd::execute)), + Box::new(FunctionCommand::new( + home::HomeCmd::info(), + home::HomeCmd::execute, + )), + Box::new(FunctionCommand::new( + workspace::WorkspaceCmd::info(), + workspace::WorkspaceCmd::execute, + )), + Box::new(FunctionCommand::new( + profile::ProfileCmd::info(), + profile::ProfileCmd::execute, + )), + Box::new(FunctionCommand::new( + rlm::RlmCmd::info(), + rlm::RlmCmd::execute, + )), + Box::new(FunctionCommand::new( + translate::TranslateCmd::info(), + translate::TranslateCmd::execute, + )), + Box::new(FunctionCommand::new( + voice::VoiceCmd::info(), + voice::VoiceCmd::execute, + )), + Box::new(FunctionCommand::new( + voice::VoiceSendCmd::info(), + voice::VoiceSendCmd::execute, + )), + Box::new(FunctionCommand::new( + voice::VoiceControlCmd::info(), + voice::VoiceControlCmd::execute, + )), ] } } - -static ANCHOR_INFO: CommandInfo = CommandInfo { - name: "anchor", - aliases: &["maodian"], - usage: "/anchor | /anchor list | /anchor remove ", - description_id: MessageId::CmdAnchorDescription, -}; -static HELP_INFO: CommandInfo = CommandInfo { - name: "help", - aliases: &["?", "bangzhu", "帮助"], - usage: "/help [command]", - description_id: MessageId::CmdHelpDescription, -}; -static CLEAR_INFO: CommandInfo = CommandInfo { - name: "clear", - aliases: &["qingping"], - usage: "/clear", - description_id: MessageId::CmdClearDescription, -}; -static EXIT_INFO: CommandInfo = CommandInfo { - name: "exit", - aliases: &["quit", "q", "tuichu"], - usage: "/exit", - description_id: MessageId::CmdExitDescription, -}; -static MODEL_INFO: CommandInfo = CommandInfo { - name: "model", - aliases: &["moxing"], - usage: "/model [name]", - description_id: MessageId::CmdModelDescription, -}; -static MODELS_INFO: CommandInfo = CommandInfo { - name: "models", - aliases: &["moxingliebiao"], - usage: "/models", - description_id: MessageId::CmdModelsDescription, -}; -static PROVIDER_INFO: CommandInfo = CommandInfo { - name: "provider", - aliases: &[], - usage: "/provider [name] [model]", - description_id: MessageId::CmdProviderDescription, -}; -static QUEUE_INFO: CommandInfo = CommandInfo { - name: "queue", - aliases: &["queued"], - usage: "/queue [list|send |edit |drop |clear]", - description_id: MessageId::CmdQueueDescription, -}; -static STASH_INFO: CommandInfo = CommandInfo { - name: "stash", - aliases: &["park"], - usage: "/stash [list|pop|clear]", - description_id: MessageId::CmdStashDescription, -}; -static HOOKS_INFO: CommandInfo = CommandInfo { - name: "hooks", - aliases: &["hook", "gouzi"], - usage: "/hooks [list|events]", - description_id: MessageId::CmdHooksDescription, -}; -static SUBAGENTS_INFO: CommandInfo = CommandInfo { - name: "subagents", - aliases: &["agents", "zhinengti"], - usage: "/subagents", - description_id: MessageId::CmdSubagentsDescription, -}; -static AGENT_INFO: CommandInfo = CommandInfo { - name: "agent", - aliases: &["daili"], - usage: "/agent [N] ", - description_id: MessageId::CmdAgentDescription, -}; -static SWARM_INFO: CommandInfo = CommandInfo { - name: "swarm", - aliases: &["fanout", "qun"], - usage: "/swarm [N] ", - description_id: MessageId::CmdSwarmDescription, -}; -static LINKS_INFO: CommandInfo = CommandInfo { - name: "links", - aliases: &["dashboard", "api", "lianjie"], - usage: "/links", - description_id: MessageId::CmdLinksDescription, -}; -static FEEDBACK_INFO: CommandInfo = CommandInfo { - name: "feedback", - aliases: &[], - usage: "/feedback [bug|feature|security]", - description_id: MessageId::CmdFeedbackDescription, -}; -static HF_INFO: CommandInfo = CommandInfo { - name: "hf", - aliases: &["huggingface"], - usage: "/hf [mcp |concepts]", - description_id: MessageId::CmdHfDescription, -}; -static HOME_INFO: CommandInfo = CommandInfo { - name: "home", - aliases: &["stats", "overview", "zhuye", "shouye"], - usage: "/home", - description_id: MessageId::CmdHomeDescription, -}; -static WORKSPACE_INFO: CommandInfo = CommandInfo { - name: "workspace", - aliases: &["cwd"], - usage: "/workspace [path]", - description_id: MessageId::CmdWorkspaceDescription, -}; -static PROFILE_INFO: CommandInfo = CommandInfo { - name: "profile", - aliases: &["dangan"], - usage: "/profile ", - description_id: MessageId::CmdHelpDescription, -}; -static RLM_INFO: CommandInfo = CommandInfo { - name: "rlm", - aliases: &["recursive", "digui"], - usage: "/rlm [N] ", - description_id: MessageId::CmdRlmDescription, -}; -static TRANSLATE_INFO: CommandInfo = CommandInfo { - name: "translate", - aliases: &["translation", "transale"], - usage: "/translate", - description_id: MessageId::CmdTranslateDescription, -}; -static VOICE_INFO: CommandInfo = CommandInfo { - name: "voice", - aliases: &["yuyin", "语音"], - usage: "/voice", - description_id: MessageId::CmdVoiceDescription, -}; -static VOICE_SEND_INFO: CommandInfo = CommandInfo { - name: "voicesend", - aliases: &["voice-send", "yuyinsend", "语音发送"], - usage: "/voicesend", - description_id: MessageId::CmdVoiceSendDescription, -}; -static VOICE_CONTROL_INFO: CommandInfo = CommandInfo { - name: "voicecontrol", - aliases: &["voice-control", "yuyincontrol", "语音控制"], - usage: "/voicecontrol", - description_id: MessageId::CmdVoiceControlDescription, -}; - -fn run_registered(app: &mut App, name: &str, arg: Option<&str>) -> CommandResult { - dispatch(app, name, arg).expect("registered core command should dispatch") -} - -fn run_anchor(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "anchor", arg) -} -fn run_help(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "help", arg) -} -fn run_clear(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "clear", arg) -} -fn run_exit(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "exit", arg) -} -fn run_model(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "model", arg) -} -fn run_models(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "models", arg) -} -fn run_provider(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "provider", arg) -} -fn run_queue(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "queue", arg) -} -fn run_stash(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "stash", arg) -} -fn run_hooks(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "hooks", arg) -} -fn run_subagents(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "subagents", arg) -} -fn run_agent(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "agent", arg) -} -fn run_swarm(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "swarm", arg) -} -fn run_links(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "links", arg) -} -fn run_feedback(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "feedback", arg) -} -fn run_hf(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "hf", arg) -} -fn run_home(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "home", arg) -} -fn run_workspace(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "workspace", arg) -} -fn run_profile(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "profile", arg) -} -fn run_rlm(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "rlm", arg) -} -fn run_translate(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "translate", arg) -} -fn run_voice(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "voice", arg) -} -fn run_voice_send(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "voicesend", arg) -} -fn run_voice_control(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "voicecontrol", arg) -} - -pub(in crate::commands) fn dispatch( - app: &mut App, - command: &str, - arg: Option<&str>, -) -> Option { - let result = match command { - "anchor" | "maodian" => anchor::anchor(app, arg), - "help" | "?" | "bangzhu" | "帮助" => core::help(app, arg), - "clear" | "qingping" => core::clear(app), - "exit" | "quit" | "q" | "tuichu" => core::exit(), - "model" | "moxing" => core::model(app, arg), - "models" | "moxingliebiao" => core::models(app), - "provider" => provider::provider(app, arg), - "queue" | "queued" => queue::queue(app, arg), - "stash" | "park" => stash::stash(app, arg), - "hooks" | "hook" | "gouzi" => hooks::hooks(app, arg), - "subagents" | "agents" | "zhinengti" => core::subagents(app), - "agent" | "daili" => agent(app, arg), - "swarm" | "fanout" | "qun" => swarm(app, arg), - "links" | "dashboard" | "api" | "lianjie" => core::deepseek_links(app), - "feedback" => feedback::feedback(app, arg), - "hf" | "huggingface" => hf::hf(app, arg), - "home" | "stats" | "overview" | "zhuye" | "shouye" => core::home_dashboard(app), - "workspace" | "cwd" => core::workspace_switch(app, arg), - "profile" | "dangan" => core::profile_switch(app, arg), - "rlm" | "recursive" | "digui" => rlm(app, arg), - "translate" | "translation" | "transale" => core::translate(app), - "voice" | "yuyin" | "语音" => voice::voice(app), - "voicesend" | "voice-send" | "yuyinsend" | "语音发送" => voice::voice_send(app), - "voicecontrol" | "voice-control" | "yuyincontrol" | "语音控制" => { - voice::voice_control(app) - } - _ => return None, - }; - Some(result) -} - -/// Execute a Recursive Language Model (RLM) turn — Algorithm 1 from -/// Zhang et al. (arXiv:2512.24601). -/// -/// The user's prompt text is passed as the argument. It will be stored -/// in the REPL as the `PROMPT` variable. The root LLM will only see -/// metadata about the REPL state, never the prompt text directly. -pub fn rlm(app: &mut App, arg: Option<&str>) -> CommandResult { - let (max_depth, target) = match parse_depth_prefixed_arg(arg, 1) { - Ok(parsed) => parsed, - Err(message) => return CommandResult::error(message), - }; - let target = match target { - Some(p) if !p.trim().is_empty() => p.trim().to_string(), - _ => { - return CommandResult::error( - "Usage: /rlm [N] \n\n\ - Opens a persistent RLM context with sub_rlm depth N (0-3, default 1)." - .to_string(), - ); - } - }; - - let source_arg = if resolves_to_existing_file(app, &target) { - format!(r#"file_path: "{target}""#) - } else { - format!("content: {target:?}") - }; - let message = format!( - "Open and use a persistent RLM session for this request. Call `rlm_open` with name `slash_rlm` and {source_arg}. Then call `rlm_configure` with `sub_rlm_max_depth: {max_depth}`. Use `rlm_eval` to inspect the context through `peek`, `search`, and `chunk`, and call `finalize(...)` from the REPL when ready. If a `var_handle` is returned, use `handle_read` for bounded slices or projections before answering." - ); - - CommandResult::with_message_and_action( - format!("Opening persistent RLM context at depth {max_depth}..."), - AppAction::SendMessage(message), - ) -} - -/// Open a persistent sub-agent session from a slash command. -pub fn agent(_app: &mut App, arg: Option<&str>) -> CommandResult { - let (max_depth, task) = match parse_depth_prefixed_arg(arg, 1) { - Ok(parsed) => parsed, - Err(message) => return CommandResult::error(message), - }; - let task = match task { - Some(task) if !task.trim().is_empty() => task.trim().to_string(), - _ => { - return CommandResult::error( - "Usage: /agent [N] \n\n\ - Opens a persistent sub-agent session with recursive agent depth N (0-3, default 1).", - ); - } - }; - let message = format!( - "Launch one sub-agent for this task by calling `agent` with name `slash_agent`, `prompt: {task:?}`, and `max_depth: {max_depth}`. Use `handle_read` on the returned transcript_handle if you need more detail. Verify any claimed side effects before reporting success." - ); - CommandResult::with_message_and_action( - format!("Opening persistent sub-agent at depth {max_depth}..."), - AppAction::SendMessage(message), - ) -} - -/// Gate the old prompt-only swarm fanout until it can route through durable -/// WhaleFlow/Fleet workers (#3218). -pub fn swarm(_app: &mut App, arg: Option<&str>) -> CommandResult { - let (_max_depth, task) = match parse_depth_prefixed_arg(arg, 1) { - Ok(parsed) => parsed, - Err(message) => return CommandResult::error(message), - }; - if !matches!(task.map(str::trim), Some(task) if !task.is_empty()) { - return CommandResult::error( - "Usage: /swarm [N] \n\n\ - /swarm is currently gated. Use /goal for a persistent objective \ - or /agent for a single sub-agent while durable Fleet-backed \ - swarm workers are still landing.", - ); - } - CommandResult::error( - "/swarm is gated in v0.8.61: prompt-only agent fanout is disabled until the durable Train-3 worker/goal re-dispatch substrate lands. Use /goal for the persistent objective or /agent [N] for one bounded sub-agent.", - ) -} - -fn parse_depth_prefixed_arg( - arg: Option<&str>, - default_depth: u32, -) -> Result<(u32, Option<&str>), String> { - let Some(raw) = arg.map(str::trim).filter(|raw| !raw.is_empty()) else { - return Ok((default_depth, None)); - }; - let mut parts = raw.splitn(2, char::is_whitespace); - let first = parts.next().unwrap_or_default(); - if first.chars().all(|ch| ch.is_ascii_digit()) { - let depth: u32 = first - .parse() - .map_err(|_| "Depth must be an integer from 0 to 3".to_string())?; - if depth > 3 { - return Err("Depth must be between 0 and 3".to_string()); - } - Ok((depth, parts.next().map(str::trim))) - } else { - Ok((default_depth, Some(raw))) - } -} - -fn resolves_to_existing_file(app: &App, input: &str) -> bool { - let path = std::path::Path::new(input); - let candidate = if path.is_absolute() { - path.to_path_buf() - } else { - app.workspace.join(path) - }; - candidate.is_file() -} - -#[cfg(test)] -mod tests { - use super::*; - - fn create_test_app() -> App { - let options = crate::tui::app::TuiOptions { - model: "deepseek-v4-pro".to_string(), - workspace: std::path::PathBuf::from("/tmp/test-workspace"), - config_path: None, - config_profile: None, - allow_shell: false, - use_alt_screen: true, - use_mouse_capture: false, - use_bracketed_paste: true, - max_subagents: 1, - skills_dir: std::path::PathBuf::from("/tmp/test-skills"), - memory_path: std::path::PathBuf::from("memory.md"), - notes_path: std::path::PathBuf::from("notes.txt"), - mcp_config_path: std::path::PathBuf::from("mcp.json"), - use_memory: false, - start_in_agent_mode: false, - skip_onboarding: true, - initial_input: None, - resume_session_id: None, - yolo: false, - }; - App::new(options, &crate::config::Config::default()) - } - - #[test] - fn swarm_is_gated_until_durable_worker_substrate_lands() { - let mut app = create_test_app(); - let result = swarm(&mut app, Some("inspect five files")); - - assert!(result.is_error); - assert!(result.action.is_none()); - assert!( - result - .message - .as_deref() - .unwrap_or_default() - .contains("gated") - ); - assert!( - result - .message - .as_deref() - .unwrap_or_default() - .contains("Train-3") - ); - } -} diff --git a/crates/tui/src/commands/groups/core/model.rs b/crates/tui/src/commands/groups/core/model.rs new file mode 100644 index 0000000000..09893ea68d --- /dev/null +++ b/crates/tui/src/commands/groups/core/model.rs @@ -0,0 +1,26 @@ +//! `/model` command. + +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::App; + +use super::CommandResult; + +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "model", + aliases: &["moxing"], + usage: "/model [name]", + description_id: MessageId::CmdModelDescription, +}; + +pub(in crate::commands) struct ModelCmd; + +impl RegisterCommand for ModelCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, arg: Option<&str>) -> CommandResult { + super::core::model(app, arg) + } +} diff --git a/crates/tui/src/commands/groups/core/models.rs b/crates/tui/src/commands/groups/core/models.rs new file mode 100644 index 0000000000..0203e7f9f3 --- /dev/null +++ b/crates/tui/src/commands/groups/core/models.rs @@ -0,0 +1,26 @@ +//! `/models` command. + +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::App; + +use super::CommandResult; + +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "models", + aliases: &["moxingliebiao"], + usage: "/models", + description_id: MessageId::CmdModelsDescription, +}; + +pub(in crate::commands) struct ModelsCmd; + +impl RegisterCommand for ModelsCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, _arg: Option<&str>) -> CommandResult { + super::core::models(app) + } +} diff --git a/crates/tui/src/commands/groups/core/profile.rs b/crates/tui/src/commands/groups/core/profile.rs new file mode 100644 index 0000000000..deef2f723f --- /dev/null +++ b/crates/tui/src/commands/groups/core/profile.rs @@ -0,0 +1,26 @@ +//! `/profile` command. + +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::App; + +use super::CommandResult; + +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "profile", + aliases: &["dangan"], + usage: "/profile ", + description_id: MessageId::CmdProfileDescription, +}; + +pub(in crate::commands) struct ProfileCmd; + +impl RegisterCommand for ProfileCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, arg: Option<&str>) -> CommandResult { + super::core::profile_switch(app, arg) + } +} diff --git a/crates/tui/src/commands/groups/core/provider.rs b/crates/tui/src/commands/groups/core/provider.rs index 2bd96a2abb..a89aca8368 100644 --- a/crates/tui/src/commands/groups/core/provider.rs +++ b/crates/tui/src/commands/groups/core/provider.rs @@ -4,14 +4,35 @@ //! `/provider` with no args opens the picker modal (#52). `/provider ` //! keeps the v0.6.6 CLI form for muscle-memory + scripted use. +use crate::commands::traits::{CommandInfo, RegisterCommand}; use crate::config::{ ApiProvider, normalize_model_name, normalize_model_name_for_provider, provider_passes_model_through, }; +use crate::localization::MessageId; use crate::tui::app::{App, AppAction}; use super::CommandResult; +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "provider", + aliases: &[], + usage: "/provider [name] [model]", + description_id: MessageId::CmdProviderDescription, +}; + +pub(in crate::commands) struct ProviderCmd; + +impl RegisterCommand for ProviderCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, arg: Option<&str>) -> CommandResult { + provider(app, arg) + } +} + /// Switch or view the current LLM backend. /// /// With no args, opens the picker modal. With ` [model]`, performs diff --git a/crates/tui/src/commands/groups/core/queue.rs b/crates/tui/src/commands/groups/core/queue.rs index 51bf2b7dbe..5c255acc8f 100644 --- a/crates/tui/src/commands/groups/core/queue.rs +++ b/crates/tui/src/commands/groups/core/queue.rs @@ -1,5 +1,6 @@ //! Queue commands: queue list/edit/drop/clear +use crate::commands::traits::{CommandInfo, RegisterCommand}; use crate::localization::{Locale, MessageId, tr}; use crate::tui::app::App; @@ -7,6 +8,25 @@ use super::CommandResult; const PREVIEW_LIMIT: usize = 120; +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "queue", + aliases: &["queued"], + usage: "/queue [list|send |edit |drop |clear]", + description_id: MessageId::CmdQueueDescription, +}; + +pub(in crate::commands) struct QueueCmd; + +impl RegisterCommand for QueueCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, arg: Option<&str>) -> CommandResult { + queue(app, arg) + } +} + pub fn queue(app: &mut App, args: Option<&str>) -> CommandResult { let locale = app.ui_locale; let arg = args.unwrap_or("").trim(); diff --git a/crates/tui/src/commands/groups/core/rlm.rs b/crates/tui/src/commands/groups/core/rlm.rs new file mode 100644 index 0000000000..a3926b19fe --- /dev/null +++ b/crates/tui/src/commands/groups/core/rlm.rs @@ -0,0 +1,67 @@ +//! `/rlm` command. + +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::{App, AppAction}; + +use super::CommandResult; + +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "rlm", + aliases: &["recursive", "digui"], + usage: "/rlm [N] ", + description_id: MessageId::CmdRlmDescription, +}; + +pub(in crate::commands) struct RlmCmd; + +impl RegisterCommand for RlmCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, arg: Option<&str>) -> CommandResult { + rlm(app, arg) + } +} + +pub fn rlm(app: &mut App, arg: Option<&str>) -> CommandResult { + let (max_depth, target) = match super::util::parse_depth_prefixed_arg(arg, 1) { + Ok(parsed) => parsed, + Err(message) => return CommandResult::error(message), + }; + let target = match target { + Some(p) if !p.trim().is_empty() => p.trim().to_string(), + _ => { + return CommandResult::error( + "Usage: /rlm [N] \n\n\ + Opens a persistent RLM context with sub_rlm depth N (0-3, default 1)." + .to_string(), + ); + } + }; + + let source_arg = if resolves_to_existing_file(app, &target) { + format!(r#"file_path: "{target}""#) + } else { + format!("content: {target:?}") + }; + let message = format!( + "Open and use a persistent RLM session for this request. Call `rlm_open` with name `slash_rlm` and {source_arg}. Then call `rlm_configure` with `sub_rlm_max_depth: {max_depth}`. Use `rlm_eval` to inspect the context through `peek`, `search`, and `chunk`, and call `finalize(...)` from the REPL when ready. If a `var_handle` is returned, use `handle_read` for bounded slices or projections before answering." + ); + + CommandResult::with_message_and_action( + format!("Opening persistent RLM context at depth {max_depth}..."), + AppAction::SendMessage(message), + ) +} + +fn resolves_to_existing_file(app: &App, input: &str) -> bool { + let path = std::path::Path::new(input); + let candidate = if path.is_absolute() { + path.to_path_buf() + } else { + app.workspace.join(path) + }; + candidate.is_file() +} diff --git a/crates/tui/src/commands/groups/core/stash.rs b/crates/tui/src/commands/groups/core/stash.rs index 1723e44037..e80d569f2d 100644 --- a/crates/tui/src/commands/groups/core/stash.rs +++ b/crates/tui/src/commands/groups/core/stash.rs @@ -5,11 +5,32 @@ //! surface; Ctrl+S in the composer is the corresponding push entry //! point. +use crate::commands::traits::{CommandInfo, RegisterCommand}; use crate::composer_stash; +use crate::localization::MessageId; use crate::tui::app::App; use super::CommandResult; +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "stash", + aliases: &["park"], + usage: "/stash [list|pop|clear]", + description_id: MessageId::CmdStashDescription, +}; + +pub(in crate::commands) struct StashCmd; + +impl RegisterCommand for StashCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, arg: Option<&str>) -> CommandResult { + stash(app, arg) + } +} + /// Top-level dispatch for `/stash`. Subcommands: /// /// * `/stash` — same as `/stash list`. diff --git a/crates/tui/src/commands/groups/core/subagents.rs b/crates/tui/src/commands/groups/core/subagents.rs new file mode 100644 index 0000000000..e51c282c36 --- /dev/null +++ b/crates/tui/src/commands/groups/core/subagents.rs @@ -0,0 +1,26 @@ +//! `/subagents` command. + +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::App; + +use super::CommandResult; + +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "subagents", + aliases: &["agents", "zhinengti"], + usage: "/subagents", + description_id: MessageId::CmdSubagentsDescription, +}; + +pub(in crate::commands) struct SubagentsCmd; + +impl RegisterCommand for SubagentsCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, _arg: Option<&str>) -> CommandResult { + super::core::subagents(app) + } +} diff --git a/crates/tui/src/commands/groups/core/swarm.rs b/crates/tui/src/commands/groups/core/swarm.rs new file mode 100644 index 0000000000..52b9cf70e3 --- /dev/null +++ b/crates/tui/src/commands/groups/core/swarm.rs @@ -0,0 +1,99 @@ +//! `/swarm` command - gated until durable Fleet-backed workers are available. + +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::App; + +use super::CommandResult; + +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "swarm", + aliases: &["fanout", "qun"], + usage: "/swarm [N] ", + description_id: MessageId::CmdSwarmDescription, +}; + +pub(in crate::commands) struct SwarmCmd; + +impl RegisterCommand for SwarmCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, arg: Option<&str>) -> CommandResult { + swarm(app, arg) + } +} + +/// Gate the old prompt-only swarm fanout until it can route through durable +/// WhaleFlow/Fleet workers (#3218). +pub fn swarm(_app: &mut App, arg: Option<&str>) -> CommandResult { + let (_max_depth, task) = match super::util::parse_depth_prefixed_arg(arg, 1) { + Ok(parsed) => parsed, + Err(message) => return CommandResult::error(message), + }; + if !matches!(task.map(str::trim), Some(task) if !task.is_empty()) { + return CommandResult::error( + "Usage: /swarm [N] \n\n\ + /swarm is currently gated. Use /goal for a persistent objective \ + or /agent for a single sub-agent while durable Fleet-backed \ + swarm workers are still landing.", + ); + } + CommandResult::error( + "/swarm is gated in v0.8.61: prompt-only agent fanout is disabled until the durable Train-3 worker/goal re-dispatch substrate lands. Use /goal for the persistent objective or /agent [N] for one bounded sub-agent.", + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn create_test_app() -> App { + let options = crate::tui::app::TuiOptions { + model: "deepseek-v4-pro".to_string(), + workspace: std::path::PathBuf::from("/tmp/test-workspace"), + config_path: None, + config_profile: None, + allow_shell: false, + use_alt_screen: true, + use_mouse_capture: false, + use_bracketed_paste: true, + max_subagents: 1, + skills_dir: std::path::PathBuf::from("/tmp/test-skills"), + memory_path: std::path::PathBuf::from("memory.md"), + notes_path: std::path::PathBuf::from("notes.txt"), + mcp_config_path: std::path::PathBuf::from("mcp.json"), + use_memory: false, + start_in_agent_mode: false, + skip_onboarding: true, + initial_input: None, + resume_session_id: None, + yolo: false, + }; + App::new(options, &crate::config::Config::default()) + } + + #[test] + fn swarm_is_gated_until_durable_worker_substrate_lands() { + let mut app = create_test_app(); + let result = swarm(&mut app, Some("inspect five files")); + + assert!(result.is_error); + assert!(result.action.is_none()); + assert!( + result + .message + .as_deref() + .unwrap_or_default() + .contains("gated") + ); + assert!( + result + .message + .as_deref() + .unwrap_or_default() + .contains("Train-3") + ); + } +} diff --git a/crates/tui/src/commands/groups/core/translate.rs b/crates/tui/src/commands/groups/core/translate.rs new file mode 100644 index 0000000000..4a626ed920 --- /dev/null +++ b/crates/tui/src/commands/groups/core/translate.rs @@ -0,0 +1,26 @@ +//! `/translate` command. + +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::App; + +use super::CommandResult; + +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "translate", + aliases: &["translation", "transale"], + usage: "/translate", + description_id: MessageId::CmdTranslateDescription, +}; + +pub(in crate::commands) struct TranslateCmd; + +impl RegisterCommand for TranslateCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, _arg: Option<&str>) -> CommandResult { + super::core::translate(app) + } +} diff --git a/crates/tui/src/commands/groups/core/util.rs b/crates/tui/src/commands/groups/core/util.rs new file mode 100644 index 0000000000..8654806211 --- /dev/null +++ b/crates/tui/src/commands/groups/core/util.rs @@ -0,0 +1,23 @@ +//! Shared helpers for core slash commands. + +pub(super) fn parse_depth_prefixed_arg( + arg: Option<&str>, + default_depth: u32, +) -> Result<(u32, Option<&str>), String> { + let Some(raw) = arg.map(str::trim).filter(|raw| !raw.is_empty()) else { + return Ok((default_depth, None)); + }; + let mut parts = raw.splitn(2, char::is_whitespace); + let first = parts.next().unwrap_or_default(); + if first.chars().all(|ch| ch.is_ascii_digit()) { + let depth: u32 = first + .parse() + .map_err(|_| "Depth must be an integer from 0 to 3".to_string())?; + if depth > 3 { + return Err("Depth must be between 0 and 3".to_string()); + } + Ok((depth, parts.next().map(str::trim))) + } else { + Ok((default_depth, Some(raw))) + } +} diff --git a/crates/tui/src/commands/groups/core/voice.rs b/crates/tui/src/commands/groups/core/voice.rs index 5d6e947214..8c0c78d523 100644 --- a/crates/tui/src/commands/groups/core/voice.rs +++ b/crates/tui/src/commands/groups/core/voice.rs @@ -29,6 +29,7 @@ use std::time::Duration; use regex::Regex; use crate::commands::CommandResult; +use crate::commands::traits::{CommandInfo, RegisterCommand}; use crate::config::Config; use crate::localization::{MessageId, tr}; use crate::tui::app::{App, AppAction}; @@ -38,6 +39,61 @@ const ASR_MODEL: &str = "mimo-v2.5-asr"; /// Model used for the AI-assisted voice-control pipeline. const VOICE_CONTROL_MODEL: &str = "mimo-v2.5"; +pub(in crate::commands) const VOICE_INFO: CommandInfo = CommandInfo { + name: "voice", + aliases: &["yuyin", "语音"], + usage: "/voice", + description_id: MessageId::CmdVoiceDescription, +}; + +pub(in crate::commands) const VOICE_SEND_INFO: CommandInfo = CommandInfo { + name: "voicesend", + aliases: &["voice-send", "yuyinsend", "语音发送"], + usage: "/voicesend", + description_id: MessageId::CmdVoiceSendDescription, +}; + +pub(in crate::commands) const VOICE_CONTROL_INFO: CommandInfo = CommandInfo { + name: "voicecontrol", + aliases: &["voice-control", "yuyincontrol", "语音控制"], + usage: "/voicecontrol", + description_id: MessageId::CmdVoiceControlDescription, +}; + +pub(in crate::commands) struct VoiceCmd; +pub(in crate::commands) struct VoiceSendCmd; +pub(in crate::commands) struct VoiceControlCmd; + +impl RegisterCommand for VoiceCmd { + fn info() -> &'static CommandInfo { + &VOICE_INFO + } + + fn execute(app: &mut App, _arg: Option<&str>) -> CommandResult { + voice(app) + } +} + +impl RegisterCommand for VoiceSendCmd { + fn info() -> &'static CommandInfo { + &VOICE_SEND_INFO + } + + fn execute(app: &mut App, _arg: Option<&str>) -> CommandResult { + voice_send(app) + } +} + +impl RegisterCommand for VoiceControlCmd { + fn info() -> &'static CommandInfo { + &VOICE_CONTROL_INFO + } + + fn execute(app: &mut App, _arg: Option<&str>) -> CommandResult { + voice_control(app) + } +} + // --- Recorder detection ---------------------------------------------------- /// Platform-specific recorder definitions. diff --git a/crates/tui/src/commands/groups/core/workspace.rs b/crates/tui/src/commands/groups/core/workspace.rs new file mode 100644 index 0000000000..169336653b --- /dev/null +++ b/crates/tui/src/commands/groups/core/workspace.rs @@ -0,0 +1,26 @@ +//! `/workspace` command. + +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::App; + +use super::CommandResult; + +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "workspace", + aliases: &["cwd"], + usage: "/workspace [path]", + description_id: MessageId::CmdWorkspaceDescription, +}; + +pub(in crate::commands) struct WorkspaceCmd; + +impl RegisterCommand for WorkspaceCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, arg: Option<&str>) -> CommandResult { + super::core::workspace_switch(app, arg) + } +} diff --git a/crates/tui/src/commands/groups/project/goal.rs b/crates/tui/src/commands/groups/project/goal.rs index 055dd737ee..67f6d220c0 100644 --- a/crates/tui/src/commands/groups/project/goal.rs +++ b/crates/tui/src/commands/groups/project/goal.rs @@ -230,10 +230,8 @@ fn write_trophy_card(app: &App, verdict: HuntVerdict) -> Result, + app: Option>, + save_path: Option, + export_path: Option, + home_path: Option, + original_session_id: Option, + fork_session_id: Option, + new_session_id: Option, + fresh_session_id: Option, + stale_session_id: Option, + last_message: Option, + last_result_is_error: Option, + last_action: Option, +} + +impl std::fmt::Debug for SessionCommandWorld { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("SessionCommandWorld") + .field("has_tmpdir", &self.tmpdir.is_some()) + .field("has_app", &self.app.is_some()) + .field("save_path", &self.save_path) + .field("export_path", &self.export_path) + .field("home_path", &self.home_path) + .field("original_session_id", &self.original_session_id) + .field("fork_session_id", &self.fork_session_id) + .field("new_session_id", &self.new_session_id) + .field("fresh_session_id", &self.fresh_session_id) + .field("stale_session_id", &self.stale_session_id) + .field("last_message", &self.last_message) + .field("last_result_is_error", &self.last_result_is_error) + .finish() + } +} + +#[given("a CodeWhale session workspace with one user message")] +fn workspace_with_one_user_message(world: &mut SessionCommandWorld) { + let tmpdir = TempDir::new().expect("session workflow TempDir"); + let mut app = create_test_app_with_tmpdir(&tmpdir); + app.api_messages.push(Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: "Remember the whale migration".to_string(), + cache_control: None, + }], + }); + app.add_message(HistoryCell::User { + content: "Remember the whale migration".to_string(), + }); + app.session.total_tokens = 321; + app.session.total_conversation_tokens = 321; + + world.save_path = Some(tmpdir.path().join("saved-session.json")); + world.export_path = Some(tmpdir.path().join("transcript.md")); + world.home_path = Some(tmpdir.path().join("home")); + world.app = Some(Box::new(app)); + world.tmpdir = Some(tmpdir); +} + +#[given("a CodeWhale persisted session workspace with one user message")] +fn persisted_workspace_with_one_user_message(world: &mut SessionCommandWorld) { + workspace_with_one_user_message(world); + let original_id = "original-session".to_string(); + let app = world.app.as_deref_mut().expect("app should exist"); + app.current_session_id = Some(original_id.clone()); + world.original_session_id = Some(original_id); + persist_active_session(world); +} + +#[given("a CodeWhale session workspace with stale and fresh saved sessions")] +fn workspace_with_stale_and_fresh_saved_sessions(world: &mut SessionCommandWorld) { + workspace_with_one_user_message(world); + persist_session_with_age(world, "fresh-session", "Fresh session", 1); + persist_session_with_age(world, "stale-session", "Stale session", 30); + world.fresh_session_id = Some("fresh-session".to_string()); + world.stale_session_id = Some("stale-session".to_string()); +} + +#[when("the user saves the active session")] +fn user_saves_active_session(world: &mut SessionCommandWorld) { + let save_path = world + .save_path + .as_ref() + .expect("save path should exist") + .to_string_lossy() + .to_string(); + let result = execute_isolated(world, &format!("/save {save_path}")); + remember_result(world, &result); + + assert!(!result.is_error, "save failed: {:?}", result.message); + assert!( + world.save_path.as_ref().expect("save path").exists(), + "save command should write the session file" + ); +} + +#[when("the user exports the active transcript")] +fn user_exports_active_transcript(world: &mut SessionCommandWorld) { + let export_path = world + .export_path + .as_ref() + .expect("export path should exist") + .to_string_lossy() + .to_string(); + let result = execute_isolated(world, &format!("/export {export_path}")); + remember_result(world, &result); + + assert!(!result.is_error, "export failed: {:?}", result.message); + assert!( + world.export_path.as_ref().expect("export path").exists(), + "export command should write the transcript" + ); +} + +#[when("the user clears the active conversation")] +fn user_clears_active_conversation(world: &mut SessionCommandWorld) { + let result = execute_isolated(world, "/clear"); + remember_result(world, &result); + + assert!(!result.is_error, "clear failed: {:?}", result.message); + let app = world.app.as_deref().expect("app should exist"); + assert!( + app.api_messages.is_empty(), + "clear command should remove active API messages" + ); + assert_eq!(app.session.total_tokens, 0); +} + +#[when("the user loads the saved session")] +fn user_loads_saved_session(world: &mut SessionCommandWorld) { + let save_path = world + .save_path + .as_ref() + .expect("save path should exist") + .to_string_lossy() + .to_string(); + let result = execute_isolated(world, &format!("/load {save_path}")); + remember_result(world, &result); + + assert!(!result.is_error, "load failed: {:?}", result.message); + world.last_message = result.message; +} + +#[when("the user forks the active session")] +fn user_forks_active_session(world: &mut SessionCommandWorld) { + let result = execute_isolated(world, "/fork"); + remember_result(world, &result); + + assert!(!result.is_error, "fork failed: {:?}", result.message); + let fork_id = world + .app + .as_deref() + .and_then(|app| app.current_session_id.clone()) + .expect("fork command should switch to a child session"); + let forked = load_saved_session(world, &fork_id); + if world.original_session_id.is_none() { + world.original_session_id = forked.metadata.parent_session_id.clone(); + } + world.fork_session_id = Some(fork_id); +} + +#[when("the user tries to fork the active session")] +fn user_tries_to_fork_active_session(world: &mut SessionCommandWorld) { + let result = execute_isolated(world, "/fork"); + remember_result(world, &result); +} + +#[when("the user starts a new session")] +fn user_starts_new_session(world: &mut SessionCommandWorld) { + let result = execute_isolated(world, "/new"); + remember_result(world, &result); + + assert!(!result.is_error, "new session failed: {:?}", result.message); + let new_id = world + .app + .as_deref() + .and_then(|app| app.current_session_id.clone()) + .expect("new command should set an active session id"); + world.new_session_id = Some(new_id); +} + +#[when(regex = r#"^the user renames the active session to "([^"]+)"$"#)] +fn user_renames_active_session(world: &mut SessionCommandWorld, title: String) { + let result = execute_isolated(world, &format!("/rename {title}")); + remember_result(world, &result); + + assert!(!result.is_error, "rename failed: {:?}", result.message); +} + +#[when("the user lists saved sessions")] +fn user_lists_saved_sessions(world: &mut SessionCommandWorld) { + let result = execute_isolated(world, "/sessions list"); + remember_result(world, &result); + + assert!( + !result.is_error, + "sessions list failed: {:?}", + result.message + ); +} + +#[when(regex = r#"^the user prunes sessions older than (\d+) days$"#)] +fn user_prunes_sessions_older_than(world: &mut SessionCommandWorld, days: String) { + let result = execute_isolated(world, &format!("/sessions prune {days}")); + remember_result(world, &result); + + assert!( + !result.is_error, + "sessions prune failed: {:?}", + result.message + ); +} + +#[when("the user compacts context")] +fn user_compacts_context(world: &mut SessionCommandWorld) { + let result = execute_isolated(world, "/compact"); + remember_result(world, &result); + + assert!(!result.is_error, "compact failed: {:?}", result.message); +} + +#[when("the user purges context")] +fn user_purges_context(world: &mut SessionCommandWorld) { + let result = execute_isolated(world, "/purge"); + remember_result(world, &result); + + assert!(!result.is_error, "purge failed: {:?}", result.message); +} + +#[when(regex = r#"^the user prepares a session relay focused on "([^"]+)"$"#)] +fn user_prepares_session_relay_focused_on(world: &mut SessionCommandWorld, focus: String) { + let result = execute_isolated(world, &format!("/relay {focus}")); + remember_result(world, &result); + + assert!(!result.is_error, "relay failed: {:?}", result.message); +} + +#[when("the user runs the singular session command")] +fn user_runs_singular_session_command(world: &mut SessionCommandWorld) { + let result = execute_isolated(world, "/session"); + remember_result(world, &result); +} + +#[then("the active session should contain the saved message")] +fn active_session_contains_saved_message(world: &mut SessionCommandWorld) { + let app = world.app.as_deref().expect("app should exist"); + let message = app + .api_messages + .first() + .expect("loaded session should have one message"); + let content = message + .content + .iter() + .find_map(|block| match block { + ContentBlock::Text { text, .. } => Some(text.as_str()), + _ => None, + }) + .expect("loaded message should have text content"); + + assert_eq!(message.role, "user"); + assert_eq!(content, "Remember the whale migration"); +} + +#[then("the saved session file should contain the saved message")] +fn saved_session_file_contains_saved_message(world: &mut SessionCommandWorld) { + let session = read_saved_session_file(world); + + assert_saved_session_contains_message(&session, "Remember the whale migration"); +} + +#[then("the active session id should match the saved session file")] +fn active_session_id_matches_saved_session_file(world: &mut SessionCommandWorld) { + let session = read_saved_session_file(world); + let app = world.app.as_deref().expect("app should exist"); + + assert_eq!( + app.current_session_id.as_deref(), + Some(session.metadata.id.as_str()) + ); +} + +#[then("the exported markdown should contain the active transcript")] +fn exported_markdown_contains_active_transcript(world: &mut SessionCommandWorld) { + let export_path = world + .export_path + .as_ref() + .expect("export path should exist"); + let content = std::fs::read_to_string(export_path) + .unwrap_or_else(|err| panic!("read exported transcript {export_path:?}: {err}")); + + assert!(content.contains("# Chat Export")); + assert!(content.contains("**You:**")); + assert!(content.contains("Remember the whale migration")); +} + +#[then("the restored token count should match the saved session")] +fn restored_token_count_matches_saved_session(world: &mut SessionCommandWorld) { + let app = world.app.as_deref().expect("app should exist"); + + assert_eq!(app.session.total_tokens, 321); + assert_eq!(app.session.total_conversation_tokens, 321); +} + +#[then("CodeWhale should report that the session was loaded")] +fn codewhale_reports_session_loaded(world: &mut SessionCommandWorld) { + let message = world + .last_message + .as_deref() + .expect("load command should produce a message"); + + assert!( + message.contains("Session loaded from"), + "unexpected load message: {message}" + ); +} + +#[then("the forked session should reference the original session")] +fn forked_session_references_original_session(world: &mut SessionCommandWorld) { + let original_id = world + .original_session_id + .as_deref() + .expect("original session id should exist"); + let fork_id = world + .fork_session_id + .as_deref() + .expect("fork session id should exist"); + let forked = load_saved_session(world, fork_id); + + assert_eq!( + forked.metadata.parent_session_id.as_deref(), + Some(original_id) + ); + assert_eq!(forked.metadata.forked_from_message_count, Some(1)); +} + +#[then("the original session should still be loadable")] +fn original_session_still_loadable(world: &mut SessionCommandWorld) { + let original_id = world + .original_session_id + .as_deref() + .expect("original session id should exist"); + let original = load_saved_session(world, original_id); + + assert_saved_session_contains_message(&original, "Remember the whale migration"); +} + +#[then("the active session should be the forked session")] +fn active_session_is_forked_session(world: &mut SessionCommandWorld) { + let fork_id = world + .fork_session_id + .as_deref() + .expect("fork session id should exist"); + let app = world.app.as_deref().expect("app should exist"); + + assert_eq!(app.current_session_id.as_deref(), Some(fork_id)); + assert_app_contains_message(app, "Remember the whale migration"); +} + +#[then("CodeWhale should reject the fork because there are no messages")] +fn codewhale_rejects_empty_fork(world: &mut SessionCommandWorld) { + assert_eq!( + world.last_result_is_error, + Some(true), + "last command should have failed" + ); + let message = world + .last_message + .as_deref() + .expect("fork rejection should include a message"); + + assert!( + message.contains("Nothing to fork"), + "unexpected fork rejection message: {message}" + ); +} + +#[then("the active session should be empty")] +fn active_session_empty(world: &mut SessionCommandWorld) { + let app = world.app.as_deref().expect("app should exist"); + + assert!(app.api_messages.is_empty()); + assert_eq!(app.session.total_tokens, 0); + assert_eq!(app.session.total_conversation_tokens, 0); +} + +#[then("the original and forked sessions should remain loadable")] +fn original_and_forked_sessions_remain_loadable(world: &mut SessionCommandWorld) { + let original_id = world + .original_session_id + .as_deref() + .expect("original session id should exist"); + let fork_id = world + .fork_session_id + .as_deref() + .expect("fork session id should exist"); + let original = load_saved_session(world, original_id); + let forked = load_saved_session(world, fork_id); + + assert_saved_session_contains_message(&original, "Remember the whale migration"); + assert_saved_session_contains_message(&forked, "Remember the whale migration"); + assert_eq!( + forked.metadata.parent_session_id.as_deref(), + Some(original_id) + ); +} + +#[then("the active session should be a new empty session")] +fn active_session_is_new_empty_session(world: &mut SessionCommandWorld) { + let original_id = world + .original_session_id + .as_deref() + .expect("original session id should exist"); + let fork_id = world + .fork_session_id + .as_deref() + .expect("fork session id should exist"); + let new_id = world + .new_session_id + .as_deref() + .expect("new session id should exist"); + let app = world.app.as_deref().expect("app should exist"); + + assert_eq!(app.current_session_id.as_deref(), Some(new_id)); + assert_ne!(new_id, original_id); + assert_ne!(new_id, fork_id); + assert!(app.api_messages.is_empty()); + assert_eq!(app.session.total_tokens, 0); +} + +#[then("the active session should be cleared without an active session id")] +fn active_session_cleared_without_active_session_id(world: &mut SessionCommandWorld) { + let app = world.app.as_deref().expect("app should exist"); + + assert!(app.current_session_id.is_none()); + assert!(app.api_messages.is_empty()); + assert_eq!(app.session.total_tokens, 0); +} + +#[then(regex = r#"^the active saved session title should be "([^"]+)"$"#)] +fn active_saved_session_title_should_be(world: &mut SessionCommandWorld, expected: String) { + let app = world.app.as_deref().expect("app should exist"); + let session_id = app + .current_session_id + .as_deref() + .expect("active session id should exist"); + let saved = load_saved_session(world, session_id); + + assert_eq!(saved.metadata.title, expected); +} + +#[then("the active session should be the original session")] +fn active_session_is_original_session(world: &mut SessionCommandWorld) { + let original_id = world + .original_session_id + .as_deref() + .expect("original session id should exist"); + let app = world.app.as_deref().expect("app should exist"); + + assert_eq!(app.current_session_id.as_deref(), Some(original_id)); + assert_app_contains_message(app, "Remember the whale migration"); +} + +#[then("the session picker should be open")] +fn session_picker_should_be_open(world: &mut SessionCommandWorld) { + let app = world.app.as_deref().expect("app should exist"); + + assert_eq!(app.view_stack.top_kind(), Some(ModalKind::SessionPicker)); +} + +#[then("CodeWhale should report that one session was pruned")] +fn codewhale_reports_one_session_pruned(world: &mut SessionCommandWorld) { + let message = world + .last_message + .as_deref() + .expect("prune command should produce a message"); + + assert!( + message.contains("pruned 1 session"), + "unexpected prune message: {message}" + ); +} + +#[then("the fresh session should still be loadable")] +fn fresh_session_still_loadable(world: &mut SessionCommandWorld) { + let fresh_id = world + .fresh_session_id + .as_deref() + .expect("fresh session id should exist"); + let fresh = load_saved_session(world, fresh_id); + + assert_eq!(fresh.metadata.title, "Fresh session"); +} + +#[then("the stale session should no longer be loadable")] +fn stale_session_no_longer_loadable(world: &mut SessionCommandWorld) { + let stale_id = world + .stale_session_id + .as_deref() + .expect("stale session id should exist"); + + assert!( + try_load_saved_session(world, stale_id).is_err(), + "stale session should have been pruned" + ); +} + +#[then("CodeWhale should trigger context compaction")] +fn codewhale_triggers_context_compaction(world: &mut SessionCommandWorld) { + assert_eq!( + world.last_result_is_error, + Some(false), + "compact command should succeed" + ); + assert!(matches!( + world.last_action.as_ref(), + Some(AppAction::CompactContext) + )); + assert_eq!( + world.last_message.as_deref(), + Some("Context compaction triggered...") + ); +} + +#[then("CodeWhale should trigger context purge")] +fn codewhale_triggers_context_purge(world: &mut SessionCommandWorld) { + assert_eq!( + world.last_result_is_error, + Some(false), + "purge command should succeed" + ); + assert!(matches!( + world.last_action.as_ref(), + Some(AppAction::PurgeContext) + )); + assert_eq!( + world.last_message.as_deref(), + Some("Agent context purge triggered...") + ); +} + +#[then(regex = r#"^CodeWhale should send a session relay instruction focused on "([^"]+)"$"#)] +fn codewhale_sends_session_relay_instruction_focused_on( + world: &mut SessionCommandWorld, + focus: String, +) { + assert_eq!( + world.last_result_is_error, + Some(false), + "relay command should succeed" + ); + let message = match world.last_action.as_ref() { + Some(AppAction::SendMessage(message)) => message, + other => panic!("expected relay SendMessage action, got {other:?}"), + }; + + assert!(message.contains("Write or update `.deepseek/handoff.md`.")); + assert!(message.contains("# Session relay")); + assert!( + message.contains(&format!("- Requested relay focus: {focus}")), + "relay instruction should include requested focus: {message}" + ); + assert_eq!( + world.last_message.as_deref(), + Some("Preparing session relay at .deepseek/handoff.md...") + ); +} + +#[then("CodeWhale should reject the unknown session command")] +fn codewhale_rejects_unknown_session_command(world: &mut SessionCommandWorld) { + assert_eq!( + world.last_result_is_error, + Some(true), + "singular /session should be rejected" + ); + let message = world + .last_message + .as_deref() + .expect("unknown command should include a message"); + + assert!( + message.contains("Unknown command: /session"), + "unexpected unknown command message: {message}" + ); + assert!( + message.contains("/sessions") || message.contains("/save"), + "unknown command should include a session-related suggestion: {message}" + ); +} + +#[tokio::test(flavor = "current_thread")] +async fn save_export_and_load_session_workflow() { + run_scenario(SAVE_LOAD_SCENARIO, 11).await; +} + +#[tokio::test(flavor = "current_thread")] +async fn fork_keeps_original_session_resumable() { + run_scenario(FORK_RESUMABLE_SCENARIO, 5).await; +} + +#[tokio::test(flavor = "current_thread")] +async fn new_session_cannot_be_forked_before_messages_exist() { + run_scenario(NEW_THEN_FORK_SCENARIO, 5).await; +} + +#[tokio::test(flavor = "current_thread")] +async fn cleared_session_cannot_be_forked_before_messages_exist() { + run_scenario(CLEAR_THEN_FORK_SCENARIO, 5).await; +} + +#[tokio::test(flavor = "current_thread")] +async fn fork_followed_by_new_keeps_both_saved_sessions() { + run_scenario(FORK_THEN_NEW_SCENARIO, 5).await; +} + +#[tokio::test(flavor = "current_thread")] +async fn fork_followed_by_clear_keeps_both_saved_sessions() { + run_scenario(FORK_THEN_CLEAR_SCENARIO, 5).await; +} + +#[tokio::test(flavor = "current_thread")] +async fn rename_updates_active_saved_session_title() { + run_scenario(RENAME_SCENARIO, 4).await; +} + +#[tokio::test(flavor = "current_thread")] +async fn sessions_list_opens_saved_session_picker() { + run_scenario(SESSIONS_LIST_SCENARIO, 4).await; +} + +#[tokio::test(flavor = "current_thread")] +async fn sessions_prune_removes_only_stale_sessions() { + run_scenario(SESSIONS_PRUNE_SCENARIO, 5).await; +} + +#[tokio::test(flavor = "current_thread")] +async fn context_management_commands_emit_actions_without_clearing_active_session() { + run_scenario(CONTEXT_MANAGEMENT_SCENARIO, 10).await; +} + +#[tokio::test(flavor = "current_thread")] +async fn singular_session_command_is_not_registered() { + run_scenario(SINGULAR_SESSION_SCENARIO, 4).await; +} + +async fn run_scenario(name: &'static str, expected_steps: usize) { + let writer = SessionCommandWorld::cucumber() + .fail_on_skipped() + .with_default_cli() + .filter_run(FEATURE_PATH, move |feature, _, scenario| { + feature.name == FEATURE_NAME && scenario.name == name + }) + .await; + assert_eq!(writer.failed_steps(), 0, "scenario failed: {name}"); + assert_eq!(writer.skipped_steps(), 0, "scenario skipped steps: {name}"); + assert_eq!( + writer.passed_steps(), + expected_steps, + "scenario did not run: {name}" + ); +} + +fn create_test_app_with_tmpdir(tmpdir: &TempDir) -> App { + let options = TuiOptions { + model: "deepseek-v4-pro".to_string(), + workspace: tmpdir.path().to_path_buf(), + config_path: None, + config_profile: None, + allow_shell: false, + use_alt_screen: true, + use_mouse_capture: false, + use_bracketed_paste: true, + max_subagents: 1, + skills_dir: tmpdir.path().join("skills"), + memory_path: tmpdir.path().join("memory.md"), + notes_path: tmpdir.path().join("notes.txt"), + mcp_config_path: tmpdir.path().join("mcp.json"), + use_memory: false, + start_in_agent_mode: false, + skip_onboarding: true, + yolo: false, + resume_session_id: None, + initial_input: None, + }; + App::new(options, &Config::default()) +} + +fn execute_isolated(world: &mut SessionCommandWorld, command: &str) -> CommandResult { + let home = world + .home_path + .as_ref() + .expect("test home should exist") + .clone(); + std::fs::create_dir_all(&home).expect("create isolated test home"); + + let _lock = lock_test_env(); + let _home = EnvVarGuard::set("HOME", &home); + let _codewhale_home = EnvVarGuard::set("CODEWHALE_HOME", home.join(".codewhale")); + + let app = world.app.as_deref_mut().expect("app should exist"); + commands::user_registry::reload(Some(&app.workspace)); + commands::execute(command, app) +} + +fn remember_result(world: &mut SessionCommandWorld, result: &CommandResult) { + world.last_result_is_error = Some(result.is_error); + world.last_message = result.message.clone(); + world.last_action = result.action.clone(); +} + +fn persist_active_session(world: &SessionCommandWorld) { + let app = world.app.as_deref().expect("app should exist"); + let session_id = app + .current_session_id + .as_ref() + .expect("active session id should exist") + .clone(); + let session = create_saved_session_with_id_and_mode( + session_id, + &app.api_messages, + &app.model, + &app.workspace, + u64::from(app.session.total_tokens), + app.system_prompt.as_ref(), + Some(app.mode.label()), + ); + let home = world + .home_path + .as_ref() + .expect("test home should exist") + .clone(); + std::fs::create_dir_all(&home).expect("create isolated test home"); + + let _lock = lock_test_env(); + let _home = EnvVarGuard::set("HOME", &home); + let _codewhale_home = EnvVarGuard::set("CODEWHALE_HOME", home.join(".codewhale")); + let manager = SessionManager::default_location().expect("open isolated session manager"); + + manager + .save_session(&session) + .expect("persist active session"); +} + +fn persist_session_with_age(world: &SessionCommandWorld, session_id: &str, title: &str, days: i64) { + let app = world.app.as_deref().expect("app should exist"); + let mut session = create_saved_session_with_id_and_mode( + session_id.to_string(), + &app.api_messages, + &app.model, + &app.workspace, + u64::from(app.session.total_tokens), + app.system_prompt.as_ref(), + Some(app.mode.label()), + ); + let timestamp = Utc::now() - ChronoDuration::days(days); + session.metadata.title = title.to_string(); + session.metadata.created_at = timestamp; + session.metadata.updated_at = timestamp; + + let home = world + .home_path + .as_ref() + .expect("test home should exist") + .clone(); + std::fs::create_dir_all(&home).expect("create isolated test home"); + + let _lock = lock_test_env(); + let _home = EnvVarGuard::set("HOME", &home); + let _codewhale_home = EnvVarGuard::set("CODEWHALE_HOME", home.join(".codewhale")); + let manager = SessionManager::default_location().expect("open isolated session manager"); + + manager.save_session(&session).expect("persist session"); +} + +fn load_saved_session(world: &SessionCommandWorld, session_id: &str) -> SavedSession { + try_load_saved_session(world, session_id) + .unwrap_or_else(|_| panic!("load saved session failed")) +} + +fn try_load_saved_session( + world: &SessionCommandWorld, + session_id: &str, +) -> std::io::Result { + let home = world + .home_path + .as_ref() + .expect("test home should exist") + .clone(); + std::fs::create_dir_all(&home).expect("create isolated test home"); + + let _lock = lock_test_env(); + let _home = EnvVarGuard::set("HOME", &home); + let _codewhale_home = EnvVarGuard::set("CODEWHALE_HOME", home.join(".codewhale")); + let manager = SessionManager::default_location().expect("open isolated session manager"); + + manager.load_session(session_id) +} + +fn read_saved_session_file(world: &SessionCommandWorld) -> SavedSession { + let save_path = world.save_path.as_ref().expect("save path should exist"); + let content = std::fs::read_to_string(save_path) + .unwrap_or_else(|err| panic!("read saved session file {save_path:?}: {err}")); + + serde_json::from_str(&content) + .unwrap_or_else(|err| panic!("parse saved session file {save_path:?}: {err}")) +} + +fn assert_app_contains_message(app: &App, expected: &str) { + let message = app + .api_messages + .first() + .expect("active session should contain one message"); + let content = message + .content + .iter() + .find_map(text_content) + .expect("active message should contain text"); + + assert_eq!(message.role, "user"); + assert_eq!(content, expected); +} + +fn assert_saved_session_contains_message(session: &SavedSession, expected: &str) { + let message = session + .messages + .first() + .expect("saved session should contain one message"); + let content = message + .content + .iter() + .find_map(text_content) + .expect("saved message should contain text"); + + assert_eq!(message.role, "user"); + assert_eq!(content, expected); +} + +fn text_content(block: &ContentBlock) -> Option<&str> { + match block { + ContentBlock::Text { text, .. } => Some(text.as_str()), + _ => None, + } +} diff --git a/crates/tui/src/commands/groups/session/compact.rs b/crates/tui/src/commands/groups/session/compact.rs new file mode 100644 index 0000000000..f988e86682 --- /dev/null +++ b/crates/tui/src/commands/groups/session/compact.rs @@ -0,0 +1,26 @@ +//! `/compact` command. + +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::App; + +use super::CommandResult; + +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "compact", + aliases: &["yasuo"], + usage: "/compact", + description_id: MessageId::CmdCompactDescription, +}; + +pub(in crate::commands) struct CompactCmd; + +impl RegisterCommand for CompactCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, _arg: Option<&str>) -> CommandResult { + super::session::compact(app) + } +} diff --git a/crates/tui/src/commands/groups/session/export.rs b/crates/tui/src/commands/groups/session/export.rs new file mode 100644 index 0000000000..7bf1a53043 --- /dev/null +++ b/crates/tui/src/commands/groups/session/export.rs @@ -0,0 +1,26 @@ +//! `/export` command. + +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::App; + +use super::CommandResult; + +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "export", + aliases: &["daochu"], + usage: "/export [path]", + description_id: MessageId::CmdExportDescription, +}; + +pub(in crate::commands) struct ExportCmd; + +impl RegisterCommand for ExportCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, arg: Option<&str>) -> CommandResult { + super::session::export(app, arg) + } +} diff --git a/crates/tui/src/commands/groups/session/fork.rs b/crates/tui/src/commands/groups/session/fork.rs new file mode 100644 index 0000000000..11975ae25f --- /dev/null +++ b/crates/tui/src/commands/groups/session/fork.rs @@ -0,0 +1,26 @@ +//! `/fork` command. + +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::App; + +use super::CommandResult; + +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "fork", + aliases: &["branch"], + usage: "/fork", + description_id: MessageId::CmdForkDescription, +}; + +pub(in crate::commands) struct ForkCmd; + +impl RegisterCommand for ForkCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, _arg: Option<&str>) -> CommandResult { + super::session::fork(app) + } +} diff --git a/crates/tui/src/commands/groups/session/load.rs b/crates/tui/src/commands/groups/session/load.rs new file mode 100644 index 0000000000..03a6cadbeb --- /dev/null +++ b/crates/tui/src/commands/groups/session/load.rs @@ -0,0 +1,26 @@ +//! `/load` command. + +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::App; + +use super::CommandResult; + +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "load", + aliases: &["jiazai"], + usage: "/load [path]", + description_id: MessageId::CmdLoadDescription, +}; + +pub(in crate::commands) struct LoadCmd; + +impl RegisterCommand for LoadCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, arg: Option<&str>) -> CommandResult { + super::session::load(app, arg) + } +} diff --git a/crates/tui/src/commands/groups/session/mod.rs b/crates/tui/src/commands/groups/session/mod.rs index c1bd1ea237..4f6a1ae5e3 100644 --- a/crates/tui/src/commands/groups/session/mod.rs +++ b/crates/tui/src/commands/groups/session/mod.rs @@ -1,316 +1,72 @@ //! Session command area: saving, forking, resuming, exporting, and the //! `/relay` session-handoff artifact. +#[cfg(all(test, feature = "long-running-tests"))] +mod acceptance; +mod compact; +mod export; +mod fork; +mod load; +mod new; +mod purge; +mod relay; mod rename; +mod save; +mod sessions; // This group dir intentionally has a `session.rs` child module with the same // name. The module_inception allow is a permanent structure rationale, not // migration scaffolding; see docs/architecture/command-dispatch.md. #[allow(clippy::module_inception)] mod session; -use std::fmt::Write as _; - use crate::commands::CommandResult; -use crate::commands::traits::{Command, CommandGroup, CommandInfo, FunctionCommand}; -use crate::localization::MessageId; -use crate::tui::app::{App, AppAction}; +use crate::commands::traits::{Command, CommandGroup, FunctionCommand, RegisterCommand}; pub struct SessionCommands; impl CommandGroup for SessionCommands { fn commands(&self) -> Vec> { vec![ - Box::new(FunctionCommand::new(&RENAME_INFO, run_rename)), - Box::new(FunctionCommand::new(&SAVE_INFO, run_save)), - Box::new(FunctionCommand::new(&FORK_INFO, run_fork)), - Box::new(FunctionCommand::new(&NEW_INFO, run_new)), - Box::new(FunctionCommand::new(&SESSIONS_INFO, run_sessions)), - Box::new(FunctionCommand::new(&LOAD_INFO, run_load)), - Box::new(FunctionCommand::new(&COMPACT_INFO, run_compact)), - Box::new(FunctionCommand::new(&PURGE_INFO, run_purge)), - Box::new(FunctionCommand::new(&RELAY_INFO, run_relay)), - Box::new(FunctionCommand::new(&EXPORT_INFO, run_export)), + Box::new(FunctionCommand::new( + rename::RenameCmd::info(), + rename::RenameCmd::execute, + )), + Box::new(FunctionCommand::new( + save::SaveCmd::info(), + save::SaveCmd::execute, + )), + Box::new(FunctionCommand::new( + fork::ForkCmd::info(), + fork::ForkCmd::execute, + )), + Box::new(FunctionCommand::new( + new::NewCmd::info(), + new::NewCmd::execute, + )), + Box::new(FunctionCommand::new( + sessions::SessionsCmd::info(), + sessions::SessionsCmd::execute, + )), + Box::new(FunctionCommand::new( + load::LoadCmd::info(), + load::LoadCmd::execute, + )), + Box::new(FunctionCommand::new( + compact::CompactCmd::info(), + compact::CompactCmd::execute, + )), + Box::new(FunctionCommand::new( + purge::PurgeCmd::info(), + purge::PurgeCmd::execute, + )), + Box::new(FunctionCommand::new( + relay::RelayCmd::info(), + relay::RelayCmd::execute, + )), + Box::new(FunctionCommand::new( + export::ExportCmd::info(), + export::ExportCmd::execute, + )), ] } } - -static RENAME_INFO: CommandInfo = CommandInfo { - name: "rename", - aliases: &["gaiming", "chongmingming"], - usage: "/rename ", - description_id: MessageId::CmdRenameDescription, -}; -static SAVE_INFO: CommandInfo = CommandInfo { - name: "save", - aliases: &[], - usage: "/save [path]", - description_id: MessageId::CmdSaveDescription, -}; -static FORK_INFO: CommandInfo = CommandInfo { - name: "fork", - aliases: &["branch"], - usage: "/fork", - description_id: MessageId::CmdForkDescription, -}; -static NEW_INFO: CommandInfo = CommandInfo { - name: "new", - aliases: &[], - usage: "/new [--force]", - description_id: MessageId::CmdNewDescription, -}; -static SESSIONS_INFO: CommandInfo = CommandInfo { - name: "sessions", - aliases: &["resume"], - usage: "/sessions [show|prune ]", - description_id: MessageId::CmdSessionsDescription, -}; -static LOAD_INFO: CommandInfo = CommandInfo { - name: "load", - aliases: &["jiazai"], - usage: "/load [path]", - description_id: MessageId::CmdLoadDescription, -}; -static COMPACT_INFO: CommandInfo = CommandInfo { - name: "compact", - aliases: &["yasuo"], - usage: "/compact", - description_id: MessageId::CmdCompactDescription, -}; -static PURGE_INFO: CommandInfo = CommandInfo { - name: "purge", - aliases: &["qingchu"], - usage: "/purge", - description_id: MessageId::CmdPurgeDescription, -}; -static RELAY_INFO: CommandInfo = CommandInfo { - name: "relay", - aliases: &["batonpass", "接力"], - usage: "/relay [focus]", - description_id: MessageId::CmdRelayDescription, -}; -static EXPORT_INFO: CommandInfo = CommandInfo { - name: "export", - aliases: &["daochu"], - usage: "/export [path]", - description_id: MessageId::CmdExportDescription, -}; - -fn run_registered(app: &mut App, name: &str, arg: Option<&str>) -> CommandResult { - dispatch(app, name, arg).expect("registered session command should dispatch") -} - -fn run_rename(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "rename", arg) -} -fn run_save(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "save", arg) -} -fn run_fork(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "fork", arg) -} -fn run_new(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "new", arg) -} -fn run_sessions(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "sessions", arg) -} -fn run_load(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "load", arg) -} -fn run_compact(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "compact", arg) -} -fn run_purge(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "purge", arg) -} -fn run_relay(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "relay", arg) -} -fn run_export(app: &mut App, arg: Option<&str>) -> CommandResult { - run_registered(app, "export", arg) -} - -pub(in crate::commands) fn dispatch( - app: &mut App, - command: &str, - arg: Option<&str>, -) -> Option { - let result = match command { - "rename" | "gaiming" | "chongmingming" => rename::rename(app, arg), - "save" => session::save(app, arg), - "fork" | "branch" => session::fork(app), - "new" => session::new_session(app, arg), - "sessions" | "resume" => session::sessions(app, arg), - "relay" | "batonpass" | "接力" => relay(app, arg), - "load" | "jiazai" => session::load(app, arg), - "compact" | "yasuo" => session::compact(app), - "purge" | "qingchu" => session::purge(app), - "export" | "daochu" => session::export(app, arg), - _ => return None, - }; - Some(result) -} - -/// Ask the active model to write a compact relay artifact for the next thread. -/// -/// The visible command is `/relay` (with `/接力` for Chinese users), but the -/// durable file path remains `.deepseek/handoff.md` for compatibility with -/// existing sessions and startup prompt loading. -pub fn relay(app: &mut App, arg: Option<&str>) -> CommandResult { - let focus = arg.map(str::trim).filter(|value| !value.is_empty()); - let message = build_relay_instruction(app, focus); - CommandResult::with_message_and_action( - "Preparing session relay at .deepseek/handoff.md...", - AppAction::SendMessage(message), - ) -} - -fn build_relay_instruction(app: &App, focus: Option<&str>) -> String { - let mut out = String::new(); - let _ = writeln!( - out, - "Create a compact session relay (接力) for a future CodeWhale thread." - ); - let _ = writeln!(out); - let _ = writeln!(out, "Write or update `.deepseek/handoff.md`."); - let _ = writeln!( - out, - "Keep the existing file path for compatibility, but title the artifact `# Session relay`." - ); - let _ = writeln!(out); - let _ = writeln!(out, "Current session snapshot:"); - let _ = writeln!(out, "- Workspace: {}", app.workspace.display()); - let _ = writeln!(out, "- Mode: {}", app.mode.label()); - let _ = writeln!(out, "- Model: {}", app.model_display_label()); - if let Some(focus) = focus { - let _ = writeln!(out, "- Requested relay focus: {focus}"); - } - if let Some(quarry) = app.hunt.quarry.as_deref() { - let _ = writeln!(out, "- Goal objective: {quarry}"); - } - if let Some(budget) = app.hunt.token_budget { - let _ = writeln!(out, "- Goal token budget: {budget}"); - } - if let Ok(todos) = app.todos.try_lock() { - let snapshot = todos.snapshot(); - if !snapshot.items.is_empty() { - let _ = writeln!( - out, - "\nWork checklist (primary progress surface, {}% complete):", - snapshot.completion_pct - ); - for item in snapshot.items { - let _ = writeln!( - out, - "- #{} [{}] {}", - item.id, - item.status.as_str(), - item.content - ); - } - } - } else { - let _ = writeln!( - out, - "\nWork checklist: unavailable because the checklist is busy." - ); - } - - if let Ok(plan) = app.plan_state.try_lock() { - let snapshot = plan.snapshot(); - if !snapshot.is_empty() { - let _ = writeln!(out, "\nOptional strategy metadata from update_plan:"); - write_plan_field(&mut out, "Title", snapshot.title.as_deref()); - write_plan_field(&mut out, "Objective", snapshot.objective.as_deref()); - write_plan_field(&mut out, "Context", snapshot.context_summary.as_deref()); - write_plan_field(&mut out, "Explanation", snapshot.explanation.as_deref()); - write_plan_list(&mut out, "Source", &snapshot.sources_used); - write_plan_list(&mut out, "Critical file", &snapshot.critical_files); - write_plan_list(&mut out, "Constraint", &snapshot.constraints); - write_plan_field( - &mut out, - "Recommended approach", - snapshot.recommended_approach.as_deref(), - ); - write_plan_field( - &mut out, - "Verification plan", - snapshot.verification_plan.as_deref(), - ); - write_plan_field( - &mut out, - "Risks and unknowns", - snapshot.risks_and_unknowns.as_deref(), - ); - write_plan_field( - &mut out, - "Handoff packet", - snapshot.handoff_packet.as_deref(), - ); - for item in snapshot.items { - let _ = writeln!(out, "- [{}] {}", plan_status_label(&item.status), item.step); - } - } - } else { - let _ = writeln!( - out, - "\nStrategy metadata: unavailable because plan state is busy." - ); - } - - let _ = writeln!( - out, - "\nBefore writing, inspect the current transcript context and any live tool evidence you need. Do not invent test results, file changes, blockers, or decisions." - ); - let _ = writeln!( - out, - "\nUse this compact structure:\n\ - # Session relay\n\ - \n\ - ## Goal\n\ - [the user's objective and any explicit constraints]\n\ - \n\ - ## Current work\n\ - [the active Work checklist item, progress, and what is mid-flight]\n\ - \n\ - ## Files and state\n\ - [changed files, important paths, sub-agents/RLM sessions, commands run]\n\ - \n\ - ## Decisions\n\ - [why key choices were made]\n\ - \n\ - ## Verification\n\ - [what passed, what failed, what was not run]\n\ - \n\ - ## Next action\n\ - [one concrete action for the next thread]" - ); - let _ = writeln!( - out, - "\nKeep it under about 900 words unless the session genuinely needs more. After writing, report the path and the single next action." - ); - out -} - -fn write_plan_field(out: &mut String, label: &str, value: Option<&str>) { - if let Some(value) = value.map(str::trim).filter(|value| !value.is_empty()) { - let _ = writeln!(out, "- {label}: {value}"); - } -} - -fn write_plan_list(out: &mut String, label: &str, values: &[String]) { - for value in values { - let value = value.trim(); - if !value.is_empty() { - let _ = writeln!(out, "- {label}: {value}"); - } - } -} - -fn plan_status_label(status: &crate::tools::plan::StepStatus) -> &'static str { - match status { - crate::tools::plan::StepStatus::Pending => "pending", - crate::tools::plan::StepStatus::InProgress => "in_progress", - crate::tools::plan::StepStatus::Completed => "completed", - } -} diff --git a/crates/tui/src/commands/groups/session/new.rs b/crates/tui/src/commands/groups/session/new.rs new file mode 100644 index 0000000000..c6f56a90d7 --- /dev/null +++ b/crates/tui/src/commands/groups/session/new.rs @@ -0,0 +1,26 @@ +//! `/new` command. + +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::App; + +use super::CommandResult; + +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "new", + aliases: &[], + usage: "/new [--force]", + description_id: MessageId::CmdNewDescription, +}; + +pub(in crate::commands) struct NewCmd; + +impl RegisterCommand for NewCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, arg: Option<&str>) -> CommandResult { + super::session::new_session(app, arg) + } +} diff --git a/crates/tui/src/commands/groups/session/purge.rs b/crates/tui/src/commands/groups/session/purge.rs new file mode 100644 index 0000000000..e13fc42056 --- /dev/null +++ b/crates/tui/src/commands/groups/session/purge.rs @@ -0,0 +1,26 @@ +//! `/purge` command. + +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::App; + +use super::CommandResult; + +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "purge", + aliases: &["qingchu"], + usage: "/purge", + description_id: MessageId::CmdPurgeDescription, +}; + +pub(in crate::commands) struct PurgeCmd; + +impl RegisterCommand for PurgeCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, _arg: Option<&str>) -> CommandResult { + super::session::purge(app) + } +} diff --git a/crates/tui/src/commands/groups/session/relay.rs b/crates/tui/src/commands/groups/session/relay.rs new file mode 100644 index 0000000000..d735de3e04 --- /dev/null +++ b/crates/tui/src/commands/groups/session/relay.rs @@ -0,0 +1,192 @@ +//! `/relay` command. + +use std::fmt::Write as _; + +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::{App, AppAction}; + +use super::CommandResult; + +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "relay", + aliases: &["batonpass", "接力"], + usage: "/relay [focus]", + description_id: MessageId::CmdRelayDescription, +}; + +pub(in crate::commands) struct RelayCmd; + +impl RegisterCommand for RelayCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, arg: Option<&str>) -> CommandResult { + relay(app, arg) + } +} + +/// Ask the active model to write a compact relay artifact for the next thread. +/// +/// The visible command is `/relay` (with `/接力` for Chinese users), but the +/// durable file path remains `.deepseek/handoff.md` for compatibility with +/// existing sessions and startup prompt loading. +pub fn relay(app: &mut App, arg: Option<&str>) -> CommandResult { + let focus = arg.map(str::trim).filter(|value| !value.is_empty()); + let message = build_relay_instruction(app, focus); + CommandResult::with_message_and_action( + "Preparing session relay at .deepseek/handoff.md...", + AppAction::SendMessage(message), + ) +} + +fn build_relay_instruction(app: &App, focus: Option<&str>) -> String { + let mut out = String::new(); + let _ = writeln!( + out, + "Create a compact session relay (接力) for a future CodeWhale thread." + ); + let _ = writeln!(out); + let _ = writeln!(out, "Write or update `.deepseek/handoff.md`."); + let _ = writeln!( + out, + "Keep the existing file path for compatibility, but title the artifact `# Session relay`." + ); + let _ = writeln!(out); + let _ = writeln!(out, "Current session snapshot:"); + let _ = writeln!(out, "- Workspace: {}", app.workspace.display()); + let _ = writeln!(out, "- Mode: {}", app.mode.label()); + let _ = writeln!(out, "- Model: {}", app.model_display_label()); + if let Some(focus) = focus { + let _ = writeln!(out, "- Requested relay focus: {focus}"); + } + if let Some(quarry) = app.hunt.quarry.as_deref() { + let _ = writeln!(out, "- Goal objective: {quarry}"); + } + if let Some(budget) = app.hunt.token_budget { + let _ = writeln!(out, "- Goal token budget: {budget}"); + } + if let Ok(todos) = app.todos.try_lock() { + let snapshot = todos.snapshot(); + if !snapshot.items.is_empty() { + let _ = writeln!( + out, + "\nWork checklist (primary progress surface, {}% complete):", + snapshot.completion_pct + ); + for item in snapshot.items { + let _ = writeln!( + out, + "- #{} [{}] {}", + item.id, + item.status.as_str(), + item.content + ); + } + } + } else { + let _ = writeln!( + out, + "\nWork checklist: unavailable because the checklist is busy." + ); + } + + if let Ok(plan) = app.plan_state.try_lock() { + let snapshot = plan.snapshot(); + if !snapshot.is_empty() { + let _ = writeln!(out, "\nOptional strategy metadata from update_plan:"); + write_plan_field(&mut out, "Title", snapshot.title.as_deref()); + write_plan_field(&mut out, "Objective", snapshot.objective.as_deref()); + write_plan_field(&mut out, "Context", snapshot.context_summary.as_deref()); + write_plan_field(&mut out, "Explanation", snapshot.explanation.as_deref()); + write_plan_list(&mut out, "Source", &snapshot.sources_used); + write_plan_list(&mut out, "Critical file", &snapshot.critical_files); + write_plan_list(&mut out, "Constraint", &snapshot.constraints); + write_plan_field( + &mut out, + "Recommended approach", + snapshot.recommended_approach.as_deref(), + ); + write_plan_field( + &mut out, + "Verification plan", + snapshot.verification_plan.as_deref(), + ); + write_plan_field( + &mut out, + "Risks and unknowns", + snapshot.risks_and_unknowns.as_deref(), + ); + write_plan_field( + &mut out, + "Handoff packet", + snapshot.handoff_packet.as_deref(), + ); + for item in snapshot.items { + let _ = writeln!(out, "- [{}] {}", plan_status_label(&item.status), item.step); + } + } + } else { + let _ = writeln!( + out, + "\nStrategy metadata: unavailable because plan state is busy." + ); + } + + let _ = writeln!( + out, + "\nBefore writing, inspect the current transcript context and any live tool evidence you need. Do not invent test results, file changes, blockers, or decisions." + ); + let _ = writeln!( + out, + "\nUse this compact structure:\n\ + # Session relay\n\ + \n\ + ## Goal\n\ + [the user's objective and any explicit constraints]\n\ + \n\ + ## Current work\n\ + [the active Work checklist item, progress, and what is mid-flight]\n\ + \n\ + ## Files and state\n\ + [changed files, important paths, sub-agents/RLM sessions, commands run]\n\ + \n\ + ## Decisions\n\ + [why key choices were made]\n\ + \n\ + ## Verification\n\ + [what passed, what failed, what was not run]\n\ + \n\ + ## Next action\n\ + [one concrete action for the next thread]" + ); + let _ = writeln!( + out, + "\nKeep it under about 900 words unless the session genuinely needs more. After writing, report the path and the single next action." + ); + out +} + +fn write_plan_field(out: &mut String, label: &str, value: Option<&str>) { + if let Some(value) = value.map(str::trim).filter(|value| !value.is_empty()) { + let _ = writeln!(out, "- {label}: {value}"); + } +} + +fn write_plan_list(out: &mut String, label: &str, values: &[String]) { + for value in values { + let value = value.trim(); + if !value.is_empty() { + let _ = writeln!(out, "- {label}: {value}"); + } + } +} + +fn plan_status_label(status: &crate::tools::plan::StepStatus) -> &'static str { + match status { + crate::tools::plan::StepStatus::Pending => "pending", + crate::tools::plan::StepStatus::InProgress => "in_progress", + crate::tools::plan::StepStatus::Completed => "completed", + } +} diff --git a/crates/tui/src/commands/groups/session/rename.rs b/crates/tui/src/commands/groups/session/rename.rs index e551cf61b5..0bd54d83a0 100644 --- a/crates/tui/src/commands/groups/session/rename.rs +++ b/crates/tui/src/commands/groups/session/rename.rs @@ -1,5 +1,7 @@ //! `/rename` command — set a custom title for the current session. +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; use crate::session_manager::{SessionManager, update_session}; use crate::tui::app::App; @@ -7,6 +9,25 @@ use super::CommandResult; const MAX_TITLE_LEN: usize = 100; +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "rename", + aliases: &["gaiming", "chongmingming"], + usage: "/rename ", + description_id: MessageId::CmdRenameDescription, +}; + +pub(in crate::commands) struct RenameCmd; + +impl RegisterCommand for RenameCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, arg: Option<&str>) -> CommandResult { + rename(app, arg) + } +} + /// Rename the current session to the given title. /// /// Usage: `/rename ` diff --git a/crates/tui/src/commands/groups/session/save.rs b/crates/tui/src/commands/groups/session/save.rs new file mode 100644 index 0000000000..fbf589f57e --- /dev/null +++ b/crates/tui/src/commands/groups/session/save.rs @@ -0,0 +1,26 @@ +//! `/save` command. + +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::App; + +use super::CommandResult; + +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "save", + aliases: &[], + usage: "/save [path]", + description_id: MessageId::CmdSaveDescription, +}; + +pub(in crate::commands) struct SaveCmd; + +impl RegisterCommand for SaveCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, arg: Option<&str>) -> CommandResult { + super::session::save(app, arg) + } +} diff --git a/crates/tui/src/commands/groups/session/sessions.rs b/crates/tui/src/commands/groups/session/sessions.rs new file mode 100644 index 0000000000..d5f37b9347 --- /dev/null +++ b/crates/tui/src/commands/groups/session/sessions.rs @@ -0,0 +1,26 @@ +//! `/sessions` command. + +use crate::commands::traits::{CommandInfo, RegisterCommand}; +use crate::localization::MessageId; +use crate::tui::app::App; + +use super::CommandResult; + +pub(in crate::commands) const COMMAND_INFO: CommandInfo = CommandInfo { + name: "sessions", + aliases: &["resume"], + usage: "/sessions [show|prune ]", + description_id: MessageId::CmdSessionsDescription, +}; + +pub(in crate::commands) struct SessionsCmd; + +impl RegisterCommand for SessionsCmd { + fn info() -> &'static CommandInfo { + &COMMAND_INFO + } + + fn execute(app: &mut App, arg: Option<&str>) -> CommandResult { + super::session::sessions(app, arg) + } +} diff --git a/crates/tui/src/commands/mod.rs b/crates/tui/src/commands/mod.rs index 7e3812fb85..cd81350f52 100644 --- a/crates/tui/src/commands/mod.rs +++ b/crates/tui/src/commands/mod.rs @@ -801,7 +801,8 @@ mod tests { #[test] fn execute_sidebar_toggles_visibility() { let mut app = create_test_app(); - app.set_sidebar_focus(SidebarFocus::Auto); + app.set_sidebar_focus(SidebarFocus::Pinned); + app.last_sidebar_host_width = Some(120); let result = execute("/sidebar", &mut app); assert!(!result.is_error); @@ -811,7 +812,7 @@ mod tests { let result = execute("/sidebar", &mut app); assert!(!result.is_error); - assert_eq!(app.sidebar_focus, SidebarFocus::Auto); + assert_eq!(app.sidebar_focus, SidebarFocus::Pinned); assert!(app.status_message.is_none()); assert_eq!(result.message.as_deref(), Some("Sidebar is visible")); } @@ -819,6 +820,7 @@ mod tests { #[test] fn execute_sidebar_accepts_explicit_focus_targets() { let mut app = create_test_app(); + app.last_sidebar_host_width = Some(120); let result = execute("/sidebar tasks", &mut app); assert!(!result.is_error); @@ -842,7 +844,7 @@ mod tests { let result = execute("/sidebar on", &mut app); assert!(!result.is_error); - assert_eq!(app.sidebar_focus, SidebarFocus::Auto); + assert_eq!(app.sidebar_focus, SidebarFocus::Pinned); assert!(app.status_message.is_none()); } diff --git a/crates/tui/src/commands/traits.rs b/crates/tui/src/commands/traits.rs index 5893e6ee2b..d466909cac 100644 --- a/crates/tui/src/commands/traits.rs +++ b/crates/tui/src/commands/traits.rs @@ -66,6 +66,15 @@ pub trait CommandGroup: Send + Sync { pub(crate) type CommandHandler = fn(&mut App, Option<&str>) -> CommandResult; +/// Trait implemented by focused built-in command modules. +/// +/// A command module owns its metadata and exposes a static execution function +/// that the group registry can wire into [`FunctionCommand`]. +pub trait RegisterCommand { + fn info() -> &'static CommandInfo; + fn execute(app: &mut App, arg: Option<&str>) -> CommandResult; +} + pub(crate) struct FunctionCommand { info: &'static CommandInfo, handler: CommandHandler, diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index 179f8a8e1e..f7cc147189 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -16,11 +16,15 @@ use serde_json::json; use std::os::unix::fs::{OpenOptionsExt, PermissionsExt}; use crate::audit::log_sensitive_event; -use crate::features::{Features, FeaturesToml, is_known_feature_key}; +use crate::features::{Feature, Features, FeaturesToml, is_known_feature_key}; use crate::hooks::HooksConfig; pub const DEFAULT_MAX_SUBAGENTS: usize = 20; pub const MAX_SUBAGENTS: usize = 20; +/// Upper bound for queued + running sub-agent admissions. This is deliberately +/// higher than the instantaneous concurrency cap so Workflow-style fanout can +/// opt into large bounded populations without unbounded queue growth. +pub const MAX_SUBAGENT_ADMISSION: usize = 200; /// Default per-step DeepSeek API timeout for sub-agent requests, in seconds. /// Matches the legacy hardcoded value so existing configs keep their old /// behavior when `[subagents] api_timeout_secs` is unset (#1806, #1808). @@ -47,6 +51,32 @@ pub const MIN_STREAM_CHUNK_TIMEOUT_SECS: u64 = 1; /// Maximum accepted stream chunk timeout. pub const MAX_STREAM_CHUNK_TIMEOUT_SECS: u64 = 3600; pub(crate) const STREAM_CHUNK_TIMEOUT_ENV: &str = "DEEPSEEK_STREAM_IDLE_TIMEOUT_SECS"; + +fn resolve_subagent_api_timeout_secs(raw: Option) -> u64 { + let raw = raw.unwrap_or(DEFAULT_SUBAGENT_API_TIMEOUT_SECS); + if raw == 0 { + return DEFAULT_SUBAGENT_API_TIMEOUT_SECS; + } + raw.clamp(MIN_SUBAGENT_API_TIMEOUT_SECS, MAX_SUBAGENT_API_TIMEOUT_SECS) +} + +fn resolve_subagent_heartbeat_timeout_secs(raw: Option, api_timeout_secs: u64) -> u64 { + let raw = raw.unwrap_or(DEFAULT_SUBAGENT_HEARTBEAT_TIMEOUT_SECS); + let configured = if raw == 0 { + DEFAULT_SUBAGENT_HEARTBEAT_TIMEOUT_SECS + } else { + raw.clamp( + MIN_SUBAGENT_HEARTBEAT_TIMEOUT_SECS, + MAX_SUBAGENT_HEARTBEAT_TIMEOUT_SECS, + ) + }; + let min_for_api = api_timeout_secs.saturating_add(30).clamp( + MIN_SUBAGENT_HEARTBEAT_TIMEOUT_SECS, + MAX_SUBAGENT_HEARTBEAT_TIMEOUT_SECS, + ); + configured.max(min_for_api) +} + pub const DEFAULT_TEXT_MODEL: &str = "deepseek-v4-pro"; pub const DEFAULT_DEEPSEEK_BASE_URL: &str = "https://api.deepseek.com/beta"; pub const DEFAULT_NVIDIA_NIM_MODEL: &str = "deepseek-ai/deepseek-v4-pro"; @@ -400,6 +430,56 @@ impl ApiProvider { } } +fn normalize_subagent_provider_key(value: &str) -> String { + value + .trim() + .to_ascii_lowercase() + .chars() + .map(|ch| match ch { + '-' | '_' | '.' | ' ' => '_', + _ => ch, + }) + .collect() +} + +fn subagent_provider_key_matches(key: &str, provider: ApiProvider) -> bool { + if ApiProvider::parse(key).is_some_and(|candidate| candidate == provider) { + return true; + } + + let normalized = normalize_subagent_provider_key(key); + if normalized == normalize_subagent_provider_key(provider.as_str()) { + return true; + } + + match provider { + ApiProvider::Deepseek => matches!( + normalized.as_str(), + "deepseek" | "deepseek_api" | "deepseek_official" + ), + ApiProvider::DeepseekCN => matches!( + normalized.as_str(), + "deepseek_cn" | "deepseek_china" | "deepseekcn" + ), + ApiProvider::Openrouter => matches!(normalized.as_str(), "openrouter" | "open_router"), + ApiProvider::OpenaiCodex => matches!( + normalized.as_str(), + "openai_codex" | "codex" | "chatgpt" | "openai_chatgpt" + ), + ApiProvider::Anthropic => { + matches!( + normalized.as_str(), + "anthropic" | "claude" | "anthropic_api" + ) + } + ApiProvider::Zai => matches!( + normalized.as_str(), + "zai" | "z_ai" | "glm" | "zai_glm" | "z_glm" + ), + _ => false, + } +} + // ============================================================================ // Provider Capability Matrix // ============================================================================ @@ -1761,6 +1841,11 @@ pub struct ContextConfig { /// `review`, `custom`). Per-call explicit model choices still win. #[derive(Debug, Clone, Deserialize, Default)] pub struct SubagentsConfig { + /// Top-level switch for the model-facing `agent` tool. `None` preserves + /// the feature-flag default; `false` hides/refuses sub-agent spawning + /// without changing the numeric queue/depth knobs. + #[serde(default)] + pub enabled: Option, #[serde(default)] pub default_model: Option, #[serde(default)] @@ -1780,12 +1865,13 @@ pub struct SubagentsConfig { #[serde(default)] pub max_concurrent: Option, /// How many levels of nested sub-agents the interactive `agent` tool may - /// spawn. `0` disables sub-agents entirely — the `agent` tool refuses to - /// spawn, a full opt-out; `1` allows one level, `2` two, and so on. When - /// unset, defaults to [`codewhale_config::DEFAULT_SPAWN_DEPTH`]; any value - /// is clamped to [`codewhale_config::MAX_SPAWN_DEPTH_CEILING`]. Fleet - /// workers are governed separately by `[fleet.exec] max_spawn_depth`; both - /// share the same default and ceiling so the limit cannot drift. + /// spawn. `0` blocks the model-facing `agent` tool at this runtime depth; + /// use `[subagents] enabled = false` for the clearer durable off switch. + /// `1` allows one level, `2` two, and so on. When unset, defaults to + /// [`codewhale_config::DEFAULT_SPAWN_DEPTH`]; any value is clamped to + /// [`codewhale_config::MAX_SPAWN_DEPTH_CEILING`]. Fleet workers are + /// governed separately by `[fleet.exec] max_spawn_depth`; both share the + /// same default and ceiling so the limit cannot drift. #[serde(default)] pub max_depth: Option, /// Number of direct (depth-1) sub-agents that may execute concurrently @@ -1794,6 +1880,16 @@ pub struct SubagentsConfig { /// throttle); explicit values are clamped to [1, max_subagents]. #[serde(default)] pub launch_concurrency: Option, + /// Maximum queued + running sub-agents admitted for one session. Defaults + /// to a large bounded queue while `launch_concurrency` keeps instantaneous + /// execution bounded. + #[serde(default, alias = "max_total", alias = "admission_limit")] + pub max_admitted: Option, + /// Optional aggregate token budget shared by a root `agent` run and its + /// descendants. When unset or 0, sub-agents keep legacy unlimited spend + /// behavior unless an individual `agent` call supplies a per-run override. + #[serde(default)] + pub token_budget: Option, /// Deprecated pre-v0.8.61 alias for `launch_concurrency`. Honored only /// when `launch_concurrency` is unset, so the new key always wins. #[serde(default, rename = "interactive_max_launch")] @@ -1812,6 +1908,34 @@ pub struct SubagentsConfig { /// cancelled before their request timeout can fire (#2614). #[serde(default)] pub heartbeat_timeout_secs: Option, + /// Per-provider overrides for sub-agent fanout and budget knobs. Keys are + /// provider names such as `deepseek`, `zai`, `openrouter`, or `anthropic`. + #[serde(default)] + pub providers: Option>, +} + +/// Provider-specific sub-agent limit overrides. +/// +/// Every field inherits from `[subagents]` when unset, so a provider profile +/// can tighten only the knobs that matter for that API's rate limits. +#[derive(Debug, Clone, Deserialize, Default)] +pub struct SubagentProviderConfig { + #[serde(default)] + pub enabled: Option, + #[serde(default)] + pub max_concurrent: Option, + #[serde(default)] + pub max_depth: Option, + #[serde(default)] + pub launch_concurrency: Option, + #[serde(default, alias = "max_total", alias = "admission_limit")] + pub max_admitted: Option, + #[serde(default)] + pub token_budget: Option, + #[serde(default)] + pub api_timeout_secs: Option, + #[serde(default)] + pub heartbeat_timeout_secs: Option, } /// `[auto]` table — knobs for the `--model auto` / `/model auto` router. @@ -2615,6 +2739,16 @@ impl Config { }) } + pub(crate) fn subagent_provider_config( + &self, + provider: ApiProvider, + ) -> Option<&SubagentProviderConfig> { + let providers = self.subagents.as_ref()?.providers.as_ref()?; + providers.iter().find_map(|(key, config)| { + subagent_provider_key_matches(key, provider).then_some(config) + }) + } + pub(crate) fn provider_config_for_mut(&mut self, provider: ApiProvider) -> &mut ProviderConfig { let providers = self.providers.get_or_insert_with(ProvidersConfig::default); match provider { @@ -3172,12 +3306,65 @@ impl Config { .clamp(1, MAX_SUBAGENTS) } + /// Return the provider-specific maximum number of concurrent sub-agents. + /// `[subagents.providers.] max_concurrent` inherits from the + /// global `[subagents]` value when unset. + #[must_use] + pub fn max_subagents_for_provider(&self, provider: ApiProvider) -> usize { + self.subagent_provider_config(provider) + .and_then(|cfg| cfg.max_concurrent) + .map(|max| max.clamp(1, MAX_SUBAGENTS)) + .unwrap_or_else(|| self.max_subagents()) + } + + /// Whether the model-facing `agent` tool is available after applying the + /// feature flag, explicit `[subagents] enabled` switch, and legacy + /// zero-valued opt-outs. + #[must_use] + pub fn subagents_enabled(&self) -> bool { + self.subagents_disabled_reason().is_none() + } + + /// Whether the model-facing `agent` tool is available for this provider + /// after applying global and provider-specific sub-agent controls. + #[must_use] + pub fn subagents_enabled_for_provider(&self, provider: ApiProvider) -> bool { + if !self.subagents_enabled() { + return false; + } + let Some(provider_cfg) = self.subagent_provider_config(provider) else { + return true; + }; + provider_cfg.enabled != Some(false) + && provider_cfg.max_concurrent != Some(0) + && provider_cfg.max_depth != Some(0) + } + + /// Machine-readable reason sub-agents are disabled, in precedence order. + #[must_use] + pub fn subagents_disabled_reason(&self) -> Option<&'static str> { + if !self.features().enabled(Feature::Subagents) { + return Some("features.subagents=false"); + } + let subagents_cfg = self.subagents.as_ref()?; + if subagents_cfg.enabled == Some(false) { + return Some("subagents.enabled=false"); + } + if subagents_cfg.max_concurrent == Some(0) { + return Some("subagents.max_concurrent=0"); + } + if subagents_cfg.max_depth == Some(0) { + return Some("subagents.max_depth=0"); + } + None + } + /// How many levels of nested sub-agents the interactive `agent` tool may /// spawn. Reads `[subagents] max_depth`; when unset it defaults to /// [`codewhale_config::DEFAULT_SPAWN_DEPTH`]. `0` is a valid value that - /// disables sub-agent spawning entirely (full opt-out). Any value is - /// clamped to [`codewhale_config::MAX_SPAWN_DEPTH_CEILING`] so the - /// operator's choice can never exceed the hard recursion ceiling. + /// blocks the `agent` tool at this runtime depth. Any value is clamped to + /// [`codewhale_config::MAX_SPAWN_DEPTH_CEILING`] so the operator's choice + /// can never exceed the hard recursion ceiling. #[must_use] pub fn subagent_max_spawn_depth(&self) -> u32 { self.subagents @@ -3187,6 +3374,15 @@ impl Config { .min(codewhale_config::MAX_SPAWN_DEPTH_CEILING) } + /// Return the provider-specific maximum sub-agent recursion depth. + #[must_use] + pub fn subagent_max_spawn_depth_for_provider(&self, provider: ApiProvider) -> u32 { + self.subagent_provider_config(provider) + .and_then(|cfg| cfg.max_depth) + .unwrap_or_else(|| self.subagent_max_spawn_depth()) + .min(codewhale_config::MAX_SPAWN_DEPTH_CEILING) + } + /// Number of direct (depth-1) sub-agents that may execute concurrently /// before further launches queue for a launch slot (#3095). Reads /// `[subagents] launch_concurrency` (or the deprecated @@ -3203,6 +3399,71 @@ impl Config { .clamp(1, max) } + /// Return the provider-specific direct launch throttle. Children above + /// this limit queue for a launch slot instead of starting immediately. + #[must_use] + pub fn launch_concurrency_for_provider(&self, provider: ApiProvider) -> usize { + let max = self.max_subagents_for_provider(provider); + self.subagent_provider_config(provider) + .and_then(|cfg| cfg.launch_concurrency) + .or_else(|| { + self.subagents + .as_ref() + .and_then(|cfg| cfg.launch_concurrency.or(cfg.interactive_max_launch_legacy)) + }) + .unwrap_or(max) + .clamp(1, max) + } + + /// Maximum queued + running sub-agents admitted for the session. + /// + /// Defaults to [`MAX_SUBAGENT_ADMISSION`] so distinct `agent` calls can + /// queue and drain through `launch_concurrency` instead of being rejected + /// at the instantaneous concurrency cap. Explicit values are clamped to + /// `[max_subagents, MAX_SUBAGENT_ADMISSION]`. + #[must_use] + pub fn max_admitted_subagents(&self) -> usize { + let max_concurrent = self.max_subagents(); + self.subagents + .as_ref() + .and_then(|cfg| cfg.max_admitted) + .unwrap_or(MAX_SUBAGENT_ADMISSION) + .clamp(max_concurrent, MAX_SUBAGENT_ADMISSION) + } + + /// Return the provider-specific queued + running admission cap. + #[must_use] + pub fn max_admitted_subagents_for_provider(&self, provider: ApiProvider) -> usize { + let max_concurrent = self.max_subagents_for_provider(provider); + self.subagent_provider_config(provider) + .and_then(|cfg| cfg.max_admitted) + .or_else(|| self.subagents.as_ref().and_then(|cfg| cfg.max_admitted)) + .unwrap_or(MAX_SUBAGENT_ADMISSION) + .clamp(max_concurrent, MAX_SUBAGENT_ADMISSION) + } + + /// Optional aggregate token budget for each root `agent` run. + /// + /// Reads `[subagents] token_budget`. `None` and `0` both mean unlimited, + /// preserving legacy behavior until a budget is explicitly configured. + #[must_use] + pub fn subagent_token_budget(&self) -> Option { + self.subagents + .as_ref() + .and_then(|cfg| cfg.token_budget) + .filter(|budget| *budget > 0) + } + + /// Return the provider-specific aggregate token budget for each root + /// `agent` run. + #[must_use] + pub fn subagent_token_budget_for_provider(&self, provider: ApiProvider) -> Option { + self.subagent_provider_config(provider) + .and_then(|cfg| cfg.token_budget) + .or_else(|| self.subagents.as_ref().and_then(|cfg| cfg.token_budget)) + .filter(|budget| *budget > 0) + } + /// Resolved per-step DeepSeek API timeout for sub-agents, in seconds. /// /// Reads `[subagents] api_timeout_secs` and clamps to @@ -3213,15 +3474,19 @@ impl Config { /// fail-fast tests, not production (#1806, #1808). #[must_use] pub fn subagent_api_timeout_secs(&self) -> u64 { - let raw = self - .subagents - .as_ref() - .and_then(|cfg| cfg.api_timeout_secs) - .unwrap_or(DEFAULT_SUBAGENT_API_TIMEOUT_SECS); - if raw == 0 { - return DEFAULT_SUBAGENT_API_TIMEOUT_SECS; - } - raw.clamp(MIN_SUBAGENT_API_TIMEOUT_SECS, MAX_SUBAGENT_API_TIMEOUT_SECS) + resolve_subagent_api_timeout_secs( + self.subagents.as_ref().and_then(|cfg| cfg.api_timeout_secs), + ) + } + + /// Return the provider-specific per-step API timeout for sub-agents. + #[must_use] + pub fn subagent_api_timeout_secs_for_provider(&self, provider: ApiProvider) -> u64 { + resolve_subagent_api_timeout_secs( + self.subagent_provider_config(provider) + .and_then(|cfg| cfg.api_timeout_secs) + .or_else(|| self.subagents.as_ref().and_then(|cfg| cfg.api_timeout_secs)), + ) } /// Resolved no-progress heartbeat timeout for running sub-agents. @@ -3233,24 +3498,28 @@ impl Config { /// configured long model request is not pre-empted by heartbeat cleanup. #[must_use] pub fn subagent_heartbeat_timeout_secs(&self) -> u64 { - let raw = self - .subagents - .as_ref() - .and_then(|cfg| cfg.heartbeat_timeout_secs) - .unwrap_or(DEFAULT_SUBAGENT_HEARTBEAT_TIMEOUT_SECS); - let configured = if raw == 0 { - DEFAULT_SUBAGENT_HEARTBEAT_TIMEOUT_SECS - } else { - raw.clamp( - MIN_SUBAGENT_HEARTBEAT_TIMEOUT_SECS, - MAX_SUBAGENT_HEARTBEAT_TIMEOUT_SECS, - ) - }; - let min_for_api = self.subagent_api_timeout_secs().saturating_add(30).clamp( - MIN_SUBAGENT_HEARTBEAT_TIMEOUT_SECS, - MAX_SUBAGENT_HEARTBEAT_TIMEOUT_SECS, - ); - configured.max(min_for_api) + resolve_subagent_heartbeat_timeout_secs( + self.subagents + .as_ref() + .and_then(|cfg| cfg.heartbeat_timeout_secs), + self.subagent_api_timeout_secs(), + ) + } + + /// Return the provider-specific no-progress heartbeat timeout. + #[must_use] + pub fn subagent_heartbeat_timeout_secs_for_provider(&self, provider: ApiProvider) -> u64 { + let api_timeout = self.subagent_api_timeout_secs_for_provider(provider); + resolve_subagent_heartbeat_timeout_secs( + self.subagent_provider_config(provider) + .and_then(|cfg| cfg.heartbeat_timeout_secs) + .or_else(|| { + self.subagents + .as_ref() + .and_then(|cfg| cfg.heartbeat_timeout_secs) + }), + api_timeout, + ) } /// Resolved per-SSE-chunk idle timeout in seconds. @@ -5526,7 +5795,9 @@ pub fn active_provider_has_config_api_key(config: &Config) -> bool { return crate::oauth::auth_file_path().exists(); } if matches!(provider, ApiProvider::Huggingface) - && std::env::var("HF_TOKEN").is_ok_and(|k| !k.trim().is_empty()) + && std::env::var("HUGGINGFACE_API_KEY") + .or_else(|_| std::env::var("HF_TOKEN")) + .is_ok_and(|k| !k.trim().is_empty()) { return true; } @@ -5736,6 +6007,17 @@ fn provider_config_table_name(provider: ApiProvider) -> Result { } fn provider_env_api_key(provider: ApiProvider) -> Option { + if provider == ApiProvider::Huggingface { + return std::env::var("HUGGINGFACE_API_KEY") + .ok() + .filter(|value| !value.trim().is_empty()) + .or_else(|| { + std::env::var("HF_TOKEN") + .ok() + .filter(|value| !value.trim().is_empty()) + }); + } + provider.env_vars().iter().find_map(|var| { std::env::var(var) .ok() @@ -7337,6 +7619,169 @@ action = "session.compact" assert_eq!(config.launch_concurrency(), 3); } + #[test] + fn subagent_token_budget_is_optional_and_zero_disables() { + assert_eq!(Config::default().subagent_token_budget(), None); + + let disabled = Config { + subagents: Some(SubagentsConfig { + token_budget: Some(0), + ..SubagentsConfig::default() + }), + ..Config::default() + }; + assert_eq!(disabled.subagent_token_budget(), None); + + let configured = Config { + subagents: Some(SubagentsConfig { + token_budget: Some(50_000), + ..SubagentsConfig::default() + }), + ..Config::default() + }; + assert_eq!(configured.subagent_token_budget(), Some(50_000)); + } + + #[test] + fn subagent_admission_limit_defaults_and_clamps() { + assert_eq!( + Config::default().max_admitted_subagents(), + MAX_SUBAGENT_ADMISSION + ); + + let configured = Config { + subagents: Some(SubagentsConfig { + max_concurrent: Some(4), + max_admitted: Some(80), + ..SubagentsConfig::default() + }), + ..Config::default() + }; + assert_eq!(configured.max_subagents(), 4); + assert_eq!(configured.max_admitted_subagents(), 80); + + let low = Config { + subagents: Some(SubagentsConfig { + max_concurrent: Some(4), + max_admitted: Some(1), + ..SubagentsConfig::default() + }), + ..Config::default() + }; + assert_eq!(low.max_admitted_subagents(), 4); + + let high = Config { + subagents: Some(SubagentsConfig { + max_admitted: Some(MAX_SUBAGENT_ADMISSION + 1), + ..SubagentsConfig::default() + }), + ..Config::default() + }; + assert_eq!(high.max_admitted_subagents(), MAX_SUBAGENT_ADMISSION); + + let alias_cfg: SubagentsConfig = + toml::from_str("admission_limit = 80").expect("parse admission alias"); + assert_eq!(alias_cfg.max_admitted, Some(80)); + } + + #[test] + fn provider_subagent_profiles_override_global_limits_with_aliases() { + let config: Config = toml::from_str( + r#" +provider = "zai" + +[subagents] +max_concurrent = 20 +launch_concurrency = 20 +max_admitted = 200 +max_depth = 6 +token_budget = 100000 +api_timeout_secs = 900 +heartbeat_timeout_secs = 1200 + +[subagents.providers.glm] +max_concurrent = 4 +launch_concurrency = 3 +max_admitted = 12 +max_depth = 2 +token_budget = 25000 +api_timeout_secs = 180 +heartbeat_timeout_secs = 240 +"#, + ) + .expect("parse provider subagent profile"); + + assert_eq!(config.api_provider(), ApiProvider::Zai); + assert_eq!(config.max_subagents(), 20); + assert_eq!(config.max_subagents_for_provider(ApiProvider::Zai), 4); + assert_eq!(config.launch_concurrency_for_provider(ApiProvider::Zai), 3); + assert_eq!( + config.max_admitted_subagents_for_provider(ApiProvider::Zai), + 12 + ); + assert_eq!( + config.subagent_max_spawn_depth_for_provider(ApiProvider::Zai), + 2 + ); + assert_eq!( + config.subagent_token_budget_for_provider(ApiProvider::Zai), + Some(25_000) + ); + assert_eq!( + config.subagent_api_timeout_secs_for_provider(ApiProvider::Zai), + 180 + ); + assert_eq!( + config.subagent_heartbeat_timeout_secs_for_provider(ApiProvider::Zai), + 240 + ); + } + + #[test] + fn provider_subagent_profiles_inherit_and_clamp_against_provider_max() { + let config: Config = toml::from_str( + r#" +[subagents] +max_concurrent = 12 +launch_concurrency = 8 +max_depth = 5 +api_timeout_secs = 300 + +[subagents.providers.deepseek_api] +max_concurrent = 30 +launch_concurrency = 30 +max_admitted = 1 + +[subagents.providers.anthropic] +enabled = false +"#, + ) + .expect("parse inherited provider subagent profile"); + + assert_eq!( + config.max_subagents_for_provider(ApiProvider::Deepseek), + MAX_SUBAGENTS + ); + assert_eq!( + config.launch_concurrency_for_provider(ApiProvider::Deepseek), + MAX_SUBAGENTS + ); + assert_eq!( + config.max_admitted_subagents_for_provider(ApiProvider::Deepseek), + MAX_SUBAGENTS + ); + assert_eq!( + config.subagent_max_spawn_depth_for_provider(ApiProvider::Deepseek), + 5 + ); + assert_eq!( + config.subagent_api_timeout_secs_for_provider(ApiProvider::Deepseek), + 300 + ); + assert!(config.subagents_enabled_for_provider(ApiProvider::Deepseek)); + assert!(!config.subagents_enabled_for_provider(ApiProvider::Anthropic)); + } + #[test] fn subagents_max_concurrent_overrides_top_level_cap() { let config = Config { @@ -7372,6 +7817,64 @@ action = "session.compact" assert_eq!(high.max_subagents(), MAX_SUBAGENTS); } + #[test] + fn subagents_enabled_reports_disable_precedence() { + assert!(Config::default().subagents_enabled()); + + let mut feature_disabled = Config::default(); + feature_disabled + .set_feature("subagents", false) + .expect("known feature"); + assert!(!feature_disabled.subagents_enabled()); + assert_eq!( + feature_disabled.subagents_disabled_reason(), + Some("features.subagents=false") + ); + + let explicit_disabled = Config { + subagents: Some(SubagentsConfig { + enabled: Some(false), + max_concurrent: Some(0), + max_depth: Some(0), + ..SubagentsConfig::default() + }), + ..Config::default() + }; + assert!(!explicit_disabled.subagents_enabled()); + assert_eq!( + explicit_disabled.subagents_disabled_reason(), + Some("subagents.enabled=false") + ); + + let zero_concurrency = Config { + subagents: Some(SubagentsConfig { + enabled: Some(true), + max_concurrent: Some(0), + max_depth: Some(1), + ..SubagentsConfig::default() + }), + ..Config::default() + }; + assert_eq!( + zero_concurrency.subagents_disabled_reason(), + Some("subagents.max_concurrent=0") + ); + + let zero_depth = Config { + subagents: Some(SubagentsConfig { + enabled: Some(true), + max_concurrent: Some(1), + max_depth: Some(0), + ..SubagentsConfig::default() + }), + ..Config::default() + }; + assert_eq!( + zero_depth.subagents_disabled_reason(), + Some("subagents.max_depth=0") + ); + } + #[test] fn subagent_max_spawn_depth_defaults_allows_zero_and_clamps() { assert_eq!( diff --git a/crates/tui/src/config_persistence.rs b/crates/tui/src/config_persistence.rs index 022b958f28..75cdd26cdb 100644 --- a/crates/tui/src/config_persistence.rs +++ b/crates/tui/src/config_persistence.rs @@ -170,6 +170,69 @@ pub(crate) fn persist_tui_integer_key( Ok(path) } +pub(crate) fn persist_subagents_bool_key( + config_path: Option<&Path>, + key: &str, + value: bool, +) -> anyhow::Result { + persist_subagents_value_key(config_path, key, toml::Value::Boolean(value)) +} + +pub(crate) fn persist_subagents_integer_key( + config_path: Option<&Path>, + key: &str, + value: u64, +) -> anyhow::Result { + use anyhow::Context; + + let value = i64::try_from(value).context("integer value is too large for TOML")?; + persist_subagents_value_key(config_path, key, toml::Value::Integer(value)) +} + +fn persist_subagents_value_key( + config_path: Option<&Path>, + key: &str, + value: toml::Value, +) -> anyhow::Result { + use anyhow::Context; + use std::fs; + + let path = config_toml_path(config_path)?; + if let Some(parent) = path.parent() { + fs::create_dir_all(parent) + .with_context(|| format!("failed to create config directory {}", parent.display()))?; + } + + let (mut doc, original_raw) = if path.exists() { + let raw = fs::read_to_string(&path) + .with_context(|| format!("failed to read config at {}", path.display()))?; + let doc: toml::Value = toml::from_str(&raw) + .with_context(|| format!("failed to parse config at {}", path.display()))?; + (doc, Some(raw)) + } else { + (toml::Value::Table(toml::value::Table::new()), None) + }; + let table = doc + .as_table_mut() + .context("config.toml root must be a table")?; + let subagents_entry = table + .entry("subagents".to_string()) + .or_insert_with(|| toml::Value::Table(toml::value::Table::new())); + let subagents_table = subagents_entry + .as_table_mut() + .context("`subagents` section in config.toml must be a table")?; + subagents_table.insert(key.to_string(), value); + + if let Some(raw) = original_raw { + save_toml_preserving_comments(&path, &doc, &raw)?; + } else { + let body = toml::to_string_pretty(&doc).context("failed to serialize config.toml")?; + fs::write(&path, body) + .with_context(|| format!("failed to write config at {}", path.display()))?; + } + Ok(path) +} + pub(crate) fn persist_provider_base_url_key( config_path: Option<&Path>, provider: ApiProvider, diff --git a/crates/tui/src/config_ui.rs b/crates/tui/src/config_ui.rs index ab0966c4b5..2e1d1c80d3 100644 --- a/crates/tui/src/config_ui.rs +++ b/crates/tui/src/config_ui.rs @@ -244,7 +244,7 @@ pub enum CostCurrencyValue { #[serde(rename_all = "snake_case")] pub enum SidebarFocusValue { Auto, - Work, + Pinned, Tasks, Agents, Context, @@ -867,7 +867,7 @@ impl SidebarFocusValue { fn as_setting(self) -> &'static str { match self { Self::Auto => "auto", - Self::Work => "work", + Self::Pinned => "pinned", Self::Tasks => "tasks", Self::Agents => "agents", Self::Context => "context", @@ -1005,7 +1005,7 @@ impl From<&str> for SidebarFocusValue { fn from(value: &str) -> Self { match SidebarFocus::from_setting(value) { SidebarFocus::Auto => Self::Auto, - SidebarFocus::Work => Self::Work, + SidebarFocus::Pinned => Self::Pinned, SidebarFocus::Tasks => Self::Tasks, SidebarFocus::Agents => Self::Agents, SidebarFocus::Context => Self::Context, diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index 2893bebd08..d55b9211a2 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -60,7 +60,7 @@ use crate::worker_profile::ModelRoute; use crate::working_set::WorkingSet; use super::events::{Event, TurnOutcomeStatus}; -use super::ops::{Op, SessionSnapshot, USER_SHELL_TOOL_ID_PREFIX}; +use super::ops::{Op, SessionSnapshot, USER_SHELL_TOOL_ID_PREFIX, UserInputProvenance}; use super::session::Session; use super::tool_parser; use super::turn::{TurnContext, post_turn_snapshot, pre_turn_snapshot}; @@ -276,10 +276,15 @@ pub struct EngineConfig { pub max_steps: u32, /// Maximum number of concurrently active subagents. pub max_subagents: usize, + /// Maximum queued + running sub-agents admitted for this engine session. + pub max_admitted_subagents: usize, /// Number of direct (depth-1) sub-agents that may execute concurrently /// before further launches queue for a launch slot (#3095). /// Resolved from `[subagents] launch_concurrency`. pub launch_concurrency: usize, + /// Whether the model-facing `agent` tool is available after applying + /// feature flags and `[subagents]` opt-out controls. + pub subagents_enabled: bool, /// Feature flags controlling tool availability. pub features: Features, /// Auto-compaction settings for long conversations. @@ -292,8 +297,12 @@ pub struct EngineConfig { pub goal_state: SharedGoalState, /// Maximum sub-agent recursion depth (default 3). See /// `SubAgentRuntime::max_spawn_depth`. Override via - /// `[runtime] max_spawn_depth = N` in `~/.deepseek/config.toml`. + /// `[subagents] max_depth = N` in `~/.codewhale/config.toml`. pub max_spawn_depth: u32, + /// Optional aggregate token budget for each root sub-agent run. + /// Descendant agents inherit the root pool unless a child starts a new + /// budget scope with an explicit per-call override. + pub subagent_token_budget: Option, /// Per-domain network policy decider (#135). Shared across the session so /// session-scoped approvals (`/network allow `) persist for the /// remainder of the run. @@ -402,13 +411,16 @@ impl Default for EngineConfig { show_thinking: true, max_steps: 100, max_subagents: DEFAULT_MAX_SUBAGENTS, + max_admitted_subagents: DEFAULT_MAX_SUBAGENTS, launch_concurrency: DEFAULT_MAX_SUBAGENTS, + subagents_enabled: true, features: Features::with_defaults(), compaction: CompactionConfig::default(), todos: new_shared_todo_list(), plan_state: new_shared_plan_state(), goal_state: new_shared_goal_state(), max_spawn_depth: crate::tools::subagent::DEFAULT_MAX_SPAWN_DEPTH, + subagent_token_budget: None, network_policy: None, snapshots_enabled: true, snapshots_max_workspace_bytes: @@ -541,6 +553,11 @@ pub struct Engine { /// turn-loop's empty-tool_uses branch to surface `` /// sentinels into the parent's transcript before deciding to end the turn. pub(super) rx_subagent_completion: mpsc::UnboundedReceiver, + /// Sub-agent completions already injected into the parent transcript. + /// Channel delivery and watchdog reconciliation both mark this set so a + /// dropped event can be synthesized once without duplicating a later + /// delivery. + delivered_subagent_completion_ids: HashSet, cancel_token: CancellationToken, shared_cancel_token: Arc>, /// Latched reason for the current cancellation, mirrored to @@ -823,8 +840,10 @@ impl Engine { let subagent_manager = new_shared_subagent_manager_with_timeout( config.workspace.clone(), config.max_subagents, + config.max_admitted_subagents, config.subagent_heartbeat_timeout, config.launch_concurrency, + config.subagent_token_budget, ); let shell_manager = config .runtime_services @@ -911,6 +930,7 @@ impl Engine { tx_event, tx_subagent_completion, rx_subagent_completion, + delivered_subagent_completion_ids: HashSet::new(), cancel_token: cancel_token.clone(), shared_cancel_token: shared_cancel_token.clone(), cancel_reason: cancel_reason.clone(), @@ -1244,6 +1264,7 @@ impl Engine { dynamic_tools, hook_executor, verbosity, + provenance, } => { self.handle_send_message( content, @@ -1266,6 +1287,7 @@ impl Engine { dynamic_tools, hook_executor, verbosity, + provenance, ) .await; } @@ -1446,6 +1468,50 @@ impl Engine { ))) .await; } + Op::SetSubagentRuntimeConfig { + enabled, + max_subagents, + launch_concurrency, + max_spawn_depth, + api_timeout_secs, + heartbeat_timeout_secs, + } => { + self.config.subagents_enabled = enabled; + self.config.max_subagents = + max_subagents.clamp(1, crate::config::MAX_SUBAGENTS); + self.config.launch_concurrency = + launch_concurrency.clamp(1, self.config.max_subagents); + self.config.max_spawn_depth = + max_spawn_depth.min(codewhale_config::MAX_SPAWN_DEPTH_CEILING); + self.config.subagent_api_timeout = Duration::from_secs(api_timeout_secs); + self.config.subagent_heartbeat_timeout = + Duration::from_secs(heartbeat_timeout_secs); + let launch_gate_applied = { + let mut manager = self.subagent_manager.write().await; + manager.update_runtime_limits( + self.config.max_subagents, + self.config.max_admitted_subagents, + self.config.subagent_heartbeat_timeout, + self.config.launch_concurrency, + self.config.subagent_token_budget, + ) + }; + let launch_note = if launch_gate_applied { + "" + } else { + "; launch_concurrency takes full effect after active sub-agents finish or the session restarts" + }; + let _ = self + .tx_event + .send(Event::status(format!( + "Sub-agent runtime updated: enabled={enabled}, max_subagents={}, launch_concurrency={}, max_depth={}{}", + self.config.max_subagents, + self.config.launch_concurrency, + self.config.max_spawn_depth, + launch_note + ))) + .await; + } Op::SyncSession { session_id, messages, @@ -1550,6 +1616,7 @@ impl Engine { Vec::new(), self.config.hook_executor.clone(), self.config.verbosity.clone(), + UserInputProvenance::ExternalUser, ) .await; } @@ -1634,6 +1701,7 @@ impl Engine { auto_model: bool, reasoning_effort: Option<&str>, reasoning_effort_auto: bool, + provenance: UserInputProvenance, ) -> ContentBlock { let today = chrono::Local::now().format("%Y-%m-%d").to_string(); let working_set_summary = self @@ -1650,6 +1718,15 @@ impl Engine { // `render_environment_block` for the prefix-cache rationale). format!("Current workspace: {}", self.config.workspace.display()), format!("Current model: {routed_model}"), + format!("Input provenance: {}", provenance.as_str()), + format!( + "Input authority: {}", + if provenance.can_authorize_work() { + "external_current_turn" + } else { + "non_authoritative" + } + ), ]; if auto_model { lines.push(format!("Auto model route: {routed_model}")); @@ -1685,6 +1762,40 @@ impl Engine { auto_model: bool, reasoning_effort: Option<&str>, reasoning_effort_auto: bool, + ) -> Message { + self.user_text_message_with_turn_metadata_for_route_and_provenance( + text, + routed_model, + auto_model, + reasoning_effort, + reasoning_effort_auto, + UserInputProvenance::ExternalUser, + ) + } + + fn runtime_text_message_with_turn_metadata( + &self, + text: String, + provenance: UserInputProvenance, + ) -> Message { + self.user_text_message_with_turn_metadata_for_route_and_provenance( + text, + &self.session.model, + self.session.auto_model, + self.session.reasoning_effort.as_deref(), + self.session.reasoning_effort_auto, + provenance, + ) + } + + fn user_text_message_with_turn_metadata_for_route_and_provenance( + &self, + text: String, + routed_model: &str, + auto_model: bool, + reasoning_effort: Option<&str>, + reasoning_effort_auto: bool, + provenance: UserInputProvenance, ) -> Message { // Place the user text first and turn_meta last so that the leading // bytes of each user message stay stable across date / model-route / @@ -1706,6 +1817,7 @@ impl Engine { auto_model, reasoning_effort, reasoning_effort_auto, + provenance, ), ], } @@ -1752,6 +1864,7 @@ impl Engine { Vec::new(), self.config.hook_executor.clone(), self.config.verbosity.clone(), + UserInputProvenance::SubAgentHandoff, ) .await; } @@ -1849,6 +1962,7 @@ impl Engine { self.emit_goal_updated().await; } + #[allow(clippy::too_many_arguments)] async fn handle_send_message( &mut self, content: String, @@ -1871,12 +1985,25 @@ impl Engine { dynamic_tools: Vec, hook_executor: Option>, verbosity: Option, + provenance: UserInputProvenance, ) { + let input_policy = effective_input_policy( + provenance, + mode, + &content, + allow_shell, + trust_mode, + auto_approve, + approval_mode, + ); + if let Some(status) = input_policy.status.clone() { + let _ = self.tx_event.send(Event::status(status)).await; + } // Reset cancel token for fresh turn (in case previous was cancelled) self.reset_cancel_token(); // Track current mode so mid-turn messages include the right mode in turn metadata. - self.current_mode = mode; + self.current_mode = input_policy.mode; // Drain stale steer messages from previous turns. while self.rx_steer.try_recv().is_ok() {} @@ -1972,23 +2099,25 @@ impl Engine { self.session .working_set .observe_user_message(&content, &self.session.workspace); - let force_update_plan_first = should_force_update_plan_first(mode, &content); + let force_update_plan_first = should_force_update_plan_first(input_policy.mode, &content); - let agent_approval_mode = agent_approval_mode_for_turn(auto_approve, approval_mode); - self.session.auto_approve = auto_approve; + let agent_approval_mode = + agent_approval_mode_for_turn(input_policy.auto_approve, input_policy.approval_mode); + self.session.auto_approve = input_policy.auto_approve; // Only track the Agent-mode approval — Yolo/Plan have fixed // approval policies that are derived from the mode itself. - if mode == AppMode::Agent { + if input_policy.mode == AppMode::Agent { self.session.approval_mode = agent_approval_mode; } // Add user message to session - let user_msg = self.user_text_message_with_turn_metadata_for_route( + let user_msg = self.user_text_message_with_turn_metadata_for_route_and_provenance( content, &model, auto_model, reasoning_effort.as_deref(), reasoning_effort_auto, + provenance, ); self.session.add_message(user_msg); @@ -2018,10 +2147,10 @@ impl Engine { self.session.reasoning_effort = reasoning_effort; self.session.reasoning_effort_auto = reasoning_effort_auto; self.session.auto_model = auto_model; - self.session.allow_shell = allow_shell; - self.config.allow_shell = allow_shell; - self.session.trust_mode = trust_mode; - self.config.trust_mode = trust_mode; + self.session.allow_shell = input_policy.allow_shell; + self.config.allow_shell = input_policy.allow_shell; + self.session.trust_mode = input_policy.trust_mode; + self.config.trust_mode = input_policy.trust_mode; self.config.translation_enabled = translation_enabled; self.config.show_thinking = show_thinking; self.config.verbosity = verbosity; @@ -2034,14 +2163,17 @@ impl Engine { let todo_list = self.config.todos.clone(); let plan_state = self.config.plan_state.clone(); - let tool_context = self.build_tool_context(mode, auto_approve); + let tool_context = self.build_tool_context(input_policy.mode, input_policy.auto_approve); let builder = self - .build_turn_tool_registry_builder(mode, todo_list, plan_state) + .build_turn_tool_registry_builder(input_policy.mode, todo_list, plan_state) .with_dynamic_tools(&dynamic_tools); - let fork_context_for_runtime = if self.config.features.enabled(Feature::Subagents) { + let subagents_available = + self.config.subagents_enabled && self.config.features.enabled(Feature::Subagents); + + let fork_context_for_runtime = if subagents_available { let state = StructuredState::capture( - mode.label(), + input_policy.mode.label(), self.config.workspace.clone(), std::env::current_dir().ok(), &self.session.working_set, @@ -2064,7 +2196,7 @@ impl Engine { // envelopes into `Event::SubAgentMailbox` so the UI can route them // to the matching in-transcript card. The drainer exits naturally // when every cloned sender is dropped at turn-end. - let mailbox_for_runtime = if self.config.features.enabled(Feature::Subagents) { + let mailbox_for_runtime = if subagents_available { let cancel_token = self.cancel_token.child_token(); let (mailbox, mut receiver) = Mailbox::new(cancel_token.clone()); let tx_event_clone = self.tx_event.clone(); @@ -2103,9 +2235,9 @@ impl Engine { None }; - let mut tool_registry = match mode { + let mut tool_registry = match input_policy.mode { AppMode::Agent | AppMode::Yolo => { - if self.config.features.enabled(Feature::Subagents) { + if subagents_available { let runtime = if let Some(client) = self.deepseek_client.clone() { let mut rt = SubAgentRuntime::new( client, @@ -2179,7 +2311,7 @@ impl Engine { let mut catalog = build_model_tool_catalog( registry.to_api_tools_with_cache(true), mcp_tools, - mode, + input_policy.mode, &self.config.tools_always_load, ); for tool in &mut catalog { @@ -2209,7 +2341,7 @@ impl Engine { &mut turn, tool_registry.as_ref(), tools, - mode, + input_policy.mode, force_update_plan_first, )) .catch_unwind() @@ -2305,6 +2437,7 @@ impl Engine { dynamic_tools: dynamic_tools.clone(), hook_executor: self.config.hook_executor.clone(), verbosity: self.config.verbosity.clone(), + provenance: UserInputProvenance::Runtime, }) .await; } @@ -3064,6 +3197,112 @@ fn goal_objective_for_prompt( // byte-stable, and strict chat-template providers never see a system message // outside messages[0]. +#[derive(Debug, Clone)] +struct EffectiveInputPolicy { + mode: AppMode, + allow_shell: bool, + trust_mode: bool, + auto_approve: bool, + approval_mode: crate::tui::approval::ApprovalMode, + status: Option, +} + +fn effective_input_policy( + provenance: UserInputProvenance, + requested_mode: AppMode, + content: &str, + allow_shell: bool, + trust_mode: bool, + auto_approve: bool, + approval_mode: crate::tui::approval::ApprovalMode, +) -> EffectiveInputPolicy { + let mut mode = requested_mode; + let mut trust_mode = trust_mode; + let mut auto_approve = auto_approve; + let mut approval_mode = approval_mode; + let mut status = None; + + if !provenance.can_authorize_work() { + let had_auto_authority = matches!(mode, AppMode::Yolo) + || trust_mode + || auto_approve + || matches!(approval_mode, crate::tui::approval::ApprovalMode::Auto); + if matches!(mode, AppMode::Yolo) { + mode = AppMode::Agent; + } + trust_mode = false; + auto_approve = false; + if matches!(approval_mode, crate::tui::approval::ApprovalMode::Auto) { + approval_mode = crate::tui::approval::ApprovalMode::Suggest; + } + if had_auto_authority { + status = Some(format!( + "Input provenance '{}' is not external user input; continuing with approvals required.", + provenance.as_str() + )); + } + } else if is_review_only_user_intent(content) { + // Advisory only: never silently override an explicitly chosen mode + // (Yolo/Agent) or strip its tools. Surface the signal so the user can + // opt into read-only Plan mode themselves with `/mode plan`. + status = Some( + "This looks like a review or inspection request. Keeping your current mode and tools — run `/mode plan` for strict read-only tools.".to_string(), + ); + } + + EffectiveInputPolicy { + mode, + allow_shell, + trust_mode, + auto_approve, + approval_mode, + status, + } +} + +fn is_review_only_user_intent(content: &str) -> bool { + let lower = content.to_ascii_lowercase(); + let asks_to_inspect = [ + "look", + "check", + "review", + "inspect", + "scan", + "audit", + "看看", + "看一下", + "检查", + "审查", + ] + .iter() + .any(|needle| lower.contains(needle)); + if !asks_to_inspect { + return false; + } + + let explicit_write = [ + "fix", + "change", + "update", + "implement", + "apply", + "patch", + "modify", + "edit", + "write", + "commit", + "修", + "改", + "补", + "提交", + "写", + ] + .iter() + .any(|needle| lower.contains(needle)); + + !explicit_write +} + fn agent_approval_mode_for_turn( auto_approve: bool, approval_mode: crate::tui::approval::ApprovalMode, @@ -3275,7 +3514,7 @@ use self::tool_catalog::{ REQUEST_USER_INPUT_NAME, active_tools_for_step, build_model_tool_catalog, ensure_advanced_tooling, execute_code_execution_tool, execute_tool_search, initial_active_tools, is_tool_search_tool, maybe_hydrate_requested_deferred_tool, - missing_tool_error_message, + missing_tool_error_message, tool_catalog_consistency_issues, }; #[cfg(test)] use self::tool_catalog::{ diff --git a/crates/tui/src/core/engine/context.rs b/crates/tui/src/core/engine/context.rs index 1ebe5840dd..4c0f9e5691 100644 --- a/crates/tui/src/core/engine/context.rs +++ b/crates/tui/src/core/engine/context.rs @@ -68,11 +68,11 @@ const TOOL_RESULT_CONTEXT_SOFT_LIMIT_CHARS: usize = 2_000; /// Snippet length kept when compacting tool output for model context. const TOOL_RESULT_CONTEXT_SNIPPET_CHARS: usize = 900; /// Hard cap for tool output inserted into a large-context model. -const LARGE_CONTEXT_TOOL_RESULT_HARD_LIMIT_CHARS: usize = 180_000; +const LARGE_CONTEXT_TOOL_RESULT_HARD_LIMIT_CHARS: usize = 48_000; /// Soft cap for known noisy tools inserted into a large-context model. -const LARGE_CONTEXT_TOOL_RESULT_SOFT_LIMIT_CHARS: usize = 60_000; -/// Snippet length kept when compacting large-context tool output. -const LARGE_CONTEXT_TOOL_RESULT_SNIPPET_CHARS: usize = 40_000; +const LARGE_CONTEXT_TOOL_RESULT_SOFT_LIMIT_CHARS: usize = 8_000; +/// Snippet length kept when compacting large-context noisy output. +const LARGE_CONTEXT_TOOL_RESULT_SNIPPET_CHARS: usize = 4_000; /// Context window size at which tool output limits can be relaxed. const LARGE_CONTEXT_WINDOW_TOKENS: u32 = 500_000; /// Max chars to keep from metadata-provided output summaries. diff --git a/crates/tui/src/core/engine/loop_guard.rs b/crates/tui/src/core/engine/loop_guard.rs index 1f13cb47d1..0257c4ad2d 100644 --- a/crates/tui/src/core/engine/loop_guard.rs +++ b/crates/tui/src/core/engine/loop_guard.rs @@ -9,7 +9,6 @@ use serde_json::Value; const IDENTICAL_CALL_BLOCK_THRESHOLD: u32 = 3; const IDENTICAL_READ_ONLY_CALL_BLOCK_THRESHOLD: u32 = 2; -const DELEGATED_TOOL_LOOP_BLOCK_THRESHOLD: u32 = 4; const BROAD_READ_ONLY_TOOL_LOOP_BLOCK_THRESHOLD: u32 = 6; const FAILURE_WARN_THRESHOLD: u32 = 3; const FAILURE_HALT_THRESHOLD: u32 = 8; @@ -118,10 +117,6 @@ fn is_delegated_tool(tool: &str) -> bool { } fn no_progress_attempt_threshold(tool: &str, _read_only: bool) -> Option { - if is_delegated_tool(tool) { - return Some(DELEGATED_TOOL_LOOP_BLOCK_THRESHOLD); - } - let tool_name = tool.to_ascii_lowercase(); let search_like = matches!( tool, @@ -292,23 +287,33 @@ mod tests { } #[test] - fn repeated_agent_delegation_is_capped_separately() { + fn distinct_agent_delegation_is_not_turn_capped() { let mut guard = LoopGuard::default(); - for idx in 0..(DELEGATED_TOOL_LOOP_BLOCK_THRESHOLD - 1) { + for idx in 0..12 { assert_eq!( guard.record_attempt("agent", &json!({"prompt": format!("task {idx}")}), false), AttemptDecision::Proceed ); } + } - let AttemptDecision::Block { kind, message } = - guard.record_attempt("agent", &json!({"prompt": "task final"}), false) + #[test] + fn identical_agent_delegation_is_still_blocked() { + let mut guard = LoopGuard::default(); + let args = json!({"prompt": "repeat the same work"}); + + assert_eq!( + guard.record_attempt("agent", &args, false), + AttemptDecision::Proceed + ); + + let AttemptDecision::Block { kind, message } = guard.record_attempt("agent", &args, false) else { - panic!("repeated delegation should force synthesis"); + panic!("identical delegation should still be blocked"); }; - assert_eq!(kind, AttemptBlockKind::NoProgressToolLoop); - assert!(message.contains("without new user input")); + assert_eq!(kind, AttemptBlockKind::IdenticalToolCall); + assert!(message.contains("already ran this turn")); } #[test] diff --git a/crates/tui/src/core/engine/lsp_hooks.rs b/crates/tui/src/core/engine/lsp_hooks.rs index 544bb9039c..593c72f6bd 100644 --- a/crates/tui/src/core/engine/lsp_hooks.rs +++ b/crates/tui/src/core/engine/lsp_hooks.rs @@ -83,7 +83,10 @@ impl Engine { if rendered.is_empty() { return; } - self.add_session_message(self.user_text_message_with_turn_metadata(rendered)) - .await; + self.add_session_message(self.runtime_text_message_with_turn_metadata( + rendered, + crate::core::ops::UserInputProvenance::Runtime, + )) + .await; } } diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs index 1de2fb23a6..9a39f30e6e 100644 --- a/crates/tui/src/core/engine/tests.rs +++ b/crates/tui/src/core/engine/tests.rs @@ -987,6 +987,121 @@ fn agent_catalog_keeps_edit_file_loaded_when_fuzz_is_omitted() { assert!(hydrated_this_batch.is_empty()); } +#[test] +fn agent_catalog_advertises_and_searches_core_action_tools() { + let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default()); + let registry = engine + .build_turn_tool_registry_builder( + AppMode::Agent, + engine.config.todos.clone(), + engine.config.plan_state.clone(), + ) + .build(engine.build_tool_context(AppMode::Agent, false)); + let always_load = HashSet::new(); + let mut catalog = build_model_tool_catalog( + registry.to_api_tools_with_cache(true), + vec![], + AppMode::Agent, + &always_load, + ); + ensure_advanced_tooling(&mut catalog, AppMode::Agent, &always_load); + + let issues = tool_catalog_consistency_issues(&catalog, ®istry); + assert!( + issues.is_empty(), + "Agent catalog should match the runtime registry: {issues:?}" + ); + + let names = catalog + .iter() + .map(|tool| tool.name.as_str()) + .collect::>(); + for tool_name in ["exec_shell", "write_file", "edit_file", "apply_patch"] { + assert!( + names.contains(tool_name), + "{tool_name} must be advertised in Agent mode" + ); + + let mut active = initial_active_tools(&catalog); + let result = execute_tool_search( + TOOL_SEARCH_BM25_NAME, + &json!({ "query": tool_name }), + &catalog, + &mut active, + ) + .expect("tool search succeeds"); + let references = result.metadata.as_ref().unwrap()["tool_references"] + .as_array() + .expect("tool references are an array"); + assert!( + references + .iter() + .any(|reference| reference.as_str() == Some(tool_name)), + "{tool_name} should be discoverable by tool_search" + ); + assert!( + active.contains(tool_name), + "{tool_name} should be activated by tool_search" + ); + } +} + +#[test] +fn catalog_consistency_self_check_flags_registered_core_tool_missing_from_catalog() { + let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default()); + let registry = engine + .build_turn_tool_registry_builder( + AppMode::Agent, + engine.config.todos.clone(), + engine.config.plan_state.clone(), + ) + .build(engine.build_tool_context(AppMode::Agent, false)); + let always_load = HashSet::new(); + let mut catalog = build_model_tool_catalog( + registry.to_api_tools_with_cache(true), + vec![], + AppMode::Agent, + &always_load, + ); + catalog.retain(|tool| tool.name != "exec_shell"); + + let issues = tool_catalog_consistency_issues(&catalog, ®istry); + assert!( + issues + .iter() + .any(|issue| issue.contains("registered core tool 'exec_shell'")), + "missing registered exec_shell should be reported: {issues:?}" + ); +} + +#[test] +fn tool_search_reports_known_core_action_tool_when_current_catalog_omits_it() { + let catalog = vec![api_tool("read_file")]; + let mut active = initial_active_tools(&catalog); + + let result = execute_tool_search( + TOOL_SEARCH_BM25_NAME, + &json!({ "query": "exec_shell" }), + &catalog, + &mut active, + ) + .expect("tool search succeeds"); + + assert!(!active.contains("exec_shell")); + let unavailable = result.metadata.as_ref().unwrap()["unavailable_tool_references"] + .as_array() + .expect("unavailable references are an array"); + assert!( + unavailable.iter().any(|reference| { + reference["tool_name"].as_str() == Some("exec_shell") + && reference["reason"] + .as_str() + .is_some_and(|reason| reason.contains("allow_shell = true")) + }), + "known-but-omitted core action tool should surface with a reason: {unavailable:?}" + ); +} + #[test] fn tools_always_load_overrides_default_native_deferral() { let always_load = HashSet::from(["git_blame".to_string()]); @@ -2308,15 +2423,20 @@ fn internal_context_budget_tiers_reserved_output_by_window() { } #[test] -fn v4_tool_outputs_keep_large_file_reads_in_context() { +fn v4_keeps_large_file_reads_but_compacts_noisy_shell_output() { let content = "0123456789abcdef\n".repeat(2_000); let output = ToolResult::success(content.clone()); - let v4_context = compact_tool_result_for_context("deepseek-v4-pro", "exec_shell", &output); + let v4_context = compact_tool_result_for_context("deepseek-v4-pro", "read_file", &output); assert_eq!(v4_context, content.trim()); + let v4_shell_context = + compact_tool_result_for_context("deepseek-v4-pro", "exec_shell", &output); + assert!(v4_shell_context.contains("exec_shell output compacted to protect context")); + assert!(v4_shell_context.len() < v4_context.len()); + let legacy_context = - compact_tool_result_for_context("deepseek-v3.2-128k", "exec_shell", &output); + compact_tool_result_for_context("deepseek-v3.2-128k", "read_file", &output); assert!(legacy_context.contains("output compacted to protect context")); assert!(legacy_context.len() < v4_context.len()); } @@ -2577,6 +2697,29 @@ fn turn_metadata_includes_current_local_date_without_working_set() { assert!(text.starts_with("\n")); assert!(text.contains(&format!("Current local date: {today}"))); assert!(text.contains("Current model: deepseek-v4-flash")); + assert!(text.contains("Input provenance: external_user")); + assert!(text.contains("Input authority: external_current_turn")); +} + +#[test] +fn runtime_turn_metadata_marks_non_authoritative_input() { + let tmp = tempdir().expect("tempdir"); + let config = EngineConfig { + workspace: tmp.path().to_path_buf(), + ..Default::default() + }; + let (engine, _handle) = Engine::new(config, &Config::default()); + let msg = engine.runtime_text_message_with_turn_metadata( + "改吧".to_string(), + UserInputProvenance::AssistantGenerated, + ); + let last_block = msg.content.last().expect("turn metadata block"); + let ContentBlock::Text { text, .. } = last_block else { + panic!("expected text metadata block"); + }; + + assert!(text.contains("Input provenance: assistant_generated")); + assert!(text.contains("Input authority: non_authoritative")); } #[test] @@ -2606,6 +2749,83 @@ fn turn_metadata_includes_auto_model_route() { assert!(!text.contains("debug this regression")); } +#[test] +fn non_external_provenance_cannot_inherit_yolo_auto_approval() { + let policy = effective_input_policy( + UserInputProvenance::SubAgentHandoff, + AppMode::Yolo, + "改吧", + true, + true, + true, + crate::tui::approval::ApprovalMode::Auto, + ); + + assert_eq!(policy.mode, AppMode::Agent); + assert!(policy.allow_shell); + assert!(!policy.trust_mode); + assert!(!policy.auto_approve); + assert_eq!( + policy.approval_mode, + crate::tui::approval::ApprovalMode::Suggest + ); + assert!( + policy + .status + .as_deref() + .is_some_and(|status| status.contains("not external user input")) + ); +} + +#[test] +fn review_only_external_input_keeps_explicit_mode_with_advisory_hint() { + // Review-only wording must never silently override an explicitly chosen + // mode (Yolo/Agent) or strip its tools. The heuristic should only surface + // an advisory hint suggesting `/mode plan` for strict read-only tools. + + let agent = effective_input_policy( + UserInputProvenance::ExternalUser, + AppMode::Agent, + "你在帮我看看 外卖部分还哪里没有使用多语言", + true, + true, + true, + crate::tui::approval::ApprovalMode::Auto, + ); + assert_eq!(agent.mode, AppMode::Agent); + assert!(agent.allow_shell); + assert!(agent.trust_mode); + assert!(agent.auto_approve); + assert!(matches!( + agent.approval_mode, + crate::tui::approval::ApprovalMode::Auto + )); + assert!(agent.status.as_deref().is_some_and(|status| { + status.contains("Keeping your current mode") && status.contains("/mode plan") + })); + + let yolo = effective_input_policy( + UserInputProvenance::ExternalUser, + AppMode::Yolo, + "check the failing tests and review the logs", + true, + true, + true, + crate::tui::approval::ApprovalMode::Auto, + ); + assert_eq!(yolo.mode, AppMode::Yolo); + assert!(yolo.allow_shell); + assert!(yolo.trust_mode); + assert!(yolo.auto_approve); + assert!(matches!( + yolo.approval_mode, + crate::tui::approval::ApprovalMode::Auto + )); + assert!(yolo.status.as_deref().is_some_and(|status| { + status.contains("Keeping your current mode") && status.contains("/mode plan") + })); +} + #[test] fn turn_metadata_omits_mode_policy() { let tmp = tempdir().expect("tempdir"); diff --git a/crates/tui/src/core/engine/tool_catalog.rs b/crates/tui/src/core/engine/tool_catalog.rs index 1063d44979..b3e7d37142 100644 --- a/crates/tui/src/core/engine/tool_catalog.rs +++ b/crates/tui/src/core/engine/tool_catalog.rs @@ -11,6 +11,7 @@ use std::time::Duration; use serde_json::{Value, json}; +use crate::mcp::McpPool; use crate::models::Tool; use crate::tools::spec::{ToolError, ToolResult, optional_u64, required_str}; use crate::tui::app::AppMode; @@ -64,6 +65,36 @@ pub(super) const DEFAULT_ACTIVE_NATIVE_TOOLS: &[&str] = &[ "write_file", ]; +const CORE_ACTION_TOOL_FALLBACKS: &[CoreActionToolFallback] = &[ + CoreActionToolFallback { + name: "exec_shell", + description: "Run shell commands in the workspace.", + unavailable_reason: "Not present in the current model-visible catalog. Shell requires Agent or Yolo mode with allow_shell = true and no command tool allow/deny gate blocking it.", + }, + CoreActionToolFallback { + name: "write_file", + description: "Create or overwrite files in the workspace.", + unavailable_reason: "Not present in the current model-visible catalog. File writes require Agent or Yolo mode and no command tool allow/deny gate blocking write_file.", + }, + CoreActionToolFallback { + name: "edit_file", + description: "Edit existing files by replacing text.", + unavailable_reason: "Not present in the current model-visible catalog. File edits require Agent or Yolo mode and no command tool allow/deny gate blocking edit_file.", + }, + CoreActionToolFallback { + name: "apply_patch", + description: "Apply a patch to one or more workspace files.", + unavailable_reason: "Not present in the current model-visible catalog. Patches require Agent or Yolo mode, the apply_patch feature, and no command tool allow/deny gate blocking apply_patch.", + }, +]; + +#[derive(Debug, Clone, Copy)] +struct CoreActionToolFallback { + name: &'static str, + description: &'static str, + unavailable_reason: &'static str, +} + pub(super) fn should_default_defer_tool(name: &str, always_load: &HashSet) -> bool { if always_load.contains(name) { return false; @@ -307,6 +338,83 @@ fn tool_search_haystack(tool: &Tool) -> String { ) } +fn tool_search_fallback_haystack(fallback: CoreActionToolFallback) -> String { + format!( + "{}\n{}\n{}", + fallback.name.to_lowercase(), + fallback.description.to_lowercase(), + fallback.unavailable_reason.to_lowercase() + ) +} + +fn catalog_contains_tool(catalog: &[Tool], name: &str) -> bool { + catalog.iter().any(|tool| tool.name == name) +} + +fn unavailable_core_action_tools_with_regex( + catalog: &[Tool], + query: &str, + max_results: usize, +) -> Result, ToolError> { + if max_results == 0 { + return Ok(Vec::new()); + } + let regex = regex::Regex::new(query) + .map_err(|err| ToolError::invalid_input(format!("Invalid regex query: {err}")))?; + Ok(CORE_ACTION_TOOL_FALLBACKS + .iter() + .copied() + .filter(|fallback| !catalog_contains_tool(catalog, fallback.name)) + .filter(|fallback| regex.is_match(&tool_search_fallback_haystack(*fallback))) + .take(max_results) + .collect()) +} + +fn unavailable_core_action_tools_with_bm25_like( + catalog: &[Tool], + query: &str, + max_results: usize, +) -> Vec { + if max_results == 0 { + return Vec::new(); + } + let terms: Vec = query + .split_whitespace() + .map(|term| term.trim().to_lowercase()) + .filter(|term| !term.is_empty()) + .collect(); + if terms.is_empty() { + return Vec::new(); + } + + let mut scored: Vec<(i64, CoreActionToolFallback)> = Vec::new(); + for fallback in CORE_ACTION_TOOL_FALLBACKS { + if catalog_contains_tool(catalog, fallback.name) { + continue; + } + let hay = tool_search_fallback_haystack(*fallback); + let name = fallback.name.to_lowercase(); + let mut score = 0i64; + for term in &terms { + if hay.contains(term) { + score += 1; + } + if name.contains(term) { + score += 2; + } + } + if score > 0 { + scored.push((score, *fallback)); + } + } + scored.sort_by(|a, b| b.0.cmp(&a.0).then_with(|| a.1.name.cmp(b.1.name))); + scored + .into_iter() + .take(max_results) + .map(|(_, fallback)| fallback) + .collect() +} + fn discover_tools_with_regex( catalog: &[Tool], query: &str, @@ -439,6 +547,51 @@ fn suggest_tool_names(catalog: &[Tool], requested: &str, limit: usize) -> Vec bool { + is_tool_search_tool(name) + || matches!(name, CODE_EXECUTION_TOOL_NAME | JS_EXECUTION_TOOL_NAME) + || McpPool::is_mcp_tool(name) +} + +pub(super) fn tool_catalog_consistency_issues( + catalog: &[Tool], + registry: &crate::tools::ToolRegistry, +) -> Vec { + let catalog_names = catalog + .iter() + .map(|tool| tool.name.as_str()) + .collect::>(); + let registry_api_tools = registry.to_api_tools(); + let registry_model_visible_names = registry_api_tools + .iter() + .map(|tool| tool.name.as_str()) + .collect::>(); + let mut issues = Vec::new(); + + for tool in catalog { + if is_synthetic_catalog_tool(&tool.name) { + continue; + } + if !registry.contains(&tool.name) { + issues.push(format!( + "catalog advertises '{}' but no registered handler exists", + tool.name + )); + } + } + + for name in DEFAULT_ACTIVE_NATIVE_TOOLS { + if registry_model_visible_names.contains(name) && !catalog_names.contains(name) { + issues.push(format!( + "registered core tool '{name}' is missing from the model/search catalog" + )); + } + } + + issues.sort(); + issues +} + pub(super) fn missing_tool_error_message(tool_name: &str, catalog: &[Tool]) -> String { let suggestions = suggest_tool_names(catalog, tool_name, 3); let shell_hint = if is_shell_tool_name(tool_name) { @@ -752,6 +905,12 @@ pub(super) fn execute_tool_search( } else { discover_tools_with_bm25_like(catalog, query, max_results) }; + let remaining_results = max_results.saturating_sub(discovered.len()); + let unavailable = if tool_name == TOOL_SEARCH_REGEX_NAME { + unavailable_core_action_tools_with_regex(catalog, query, remaining_results)? + } else { + unavailable_core_action_tools_with_bm25_like(catalog, query, remaining_results) + }; for name in &discovered { active_tools.insert(name.clone()); @@ -761,10 +920,21 @@ pub(super) fn execute_tool_search( .iter() .map(|name| json!({"type": "tool_reference", "tool_name": name})) .collect::>(); + let unavailable_references = unavailable + .iter() + .map(|fallback| { + json!({ + "type": "unavailable_tool_reference", + "tool_name": fallback.name, + "reason": fallback.unavailable_reason, + }) + }) + .collect::>(); let payload = json!({ "type": "tool_search_tool_search_result", "tool_references": references, + "unavailable_tool_references": unavailable_references.clone(), }); Ok(ToolResult { @@ -772,6 +942,7 @@ pub(super) fn execute_tool_search( success: true, metadata: Some(json!({ "tool_references": discovered, + "unavailable_tool_references": unavailable_references, })), }) } diff --git a/crates/tui/src/core/engine/turn_loop.rs b/crates/tui/src/core/engine/turn_loop.rs index f2b66b4633..9c6355e954 100644 --- a/crates/tui/src/core/engine/turn_loop.rs +++ b/crates/tui/src/core/engine/turn_loop.rs @@ -6,12 +6,39 @@ //! checkpoints, and loop termination. use super::*; +use crate::core::ops::UserInputProvenance; use crate::prompt_zones::PinnedPrefix; -fn loop_guard_block_tool_result(message: String, kind: AttemptBlockKind) -> ToolResult { +fn loop_guard_block_tool_result( + tool_name: &str, + message: String, + kind: AttemptBlockKind, +) -> ToolResult { + if loop_guard_block_is_guidance(tool_name) { + return ToolResult::success(message).with_metadata(json!({ + "loop_guard": kind.as_str(), + "loop_guard_guidance": true, + "executed": false, + })); + } + ToolResult::error(message).with_metadata(json!({"loop_guard": kind.as_str()})) } +fn loop_guard_block_is_guidance(tool_name: &str) -> bool { + let normalized = tool_name.to_ascii_lowercase(); + matches!( + normalized.as_str(), + "grep_files" + | "file_search" + | "list_dir" + | "web_search" + | "fetch_url" + | "tool_search_tool_regex" + | "tool_search_tool_bm25" + ) || normalized.contains("search") +} + const MAX_APPROVAL_INTENT_SUMMARY_CHARS: usize = 2_000; fn approval_intent_summary(text: &str) -> Option { @@ -42,7 +69,27 @@ impl Engine { async fn drain_subagent_completion_events(&mut self, status_label: &str) -> usize { let mut completions: Vec = Vec::new(); while let Ok(completion) = self.rx_subagent_completion.try_recv() { - completions.push(completion); + if self + .delivered_subagent_completion_ids + .insert(completion.agent_id.clone()) + { + completions.push(completion); + } + } + + let synthesized = { + let manager = self.subagent_manager.read().await; + manager.terminal_results_excluding(&self.delivered_subagent_completion_ids) + }; + for result in synthesized { + if self + .delivered_subagent_completion_ids + .insert(result.agent_id.clone()) + { + completions.push(crate::tools::subagent::subagent_completion_from_result( + &result, + )); + } } let count = completions.len(); @@ -93,6 +140,16 @@ impl Engine { if !tool_catalog.is_empty() { ensure_advanced_tooling(&mut tool_catalog, mode, &self.config.tools_always_load); } + if let Some(registry) = tool_registry { + let issues = tool_catalog_consistency_issues(&tool_catalog, registry); + if !issues.is_empty() { + tracing::warn!( + target: "engine.tool_catalog", + ?issues, + "model/search tool catalog is inconsistent with the runtime registry" + ); + } + } let mut active_tool_names = initial_active_tools(&tool_catalog); let mut loop_guard = LoopGuard::default(); let mut goal_continuations_this_turn = 0u32; @@ -1185,7 +1242,10 @@ impl Engine { format!("[REPL round {round_num} output]\n{}", round.stdout) }; self.add_session_message( - self.user_text_message_with_turn_metadata(feedback), + self.runtime_text_message_with_turn_metadata( + feedback, + UserInputProvenance::Runtime, + ), ) .await; } @@ -1197,9 +1257,10 @@ impl Engine { ))) .await; self.add_session_message( - self.user_text_message_with_turn_metadata(format!( - "[REPL round {round_num} execution failed]\n{e}" - )), + self.runtime_text_message_with_turn_metadata( + format!("[REPL round {round_num} execution failed]\n{e}"), + UserInputProvenance::Runtime, + ), ) .await; } @@ -1259,9 +1320,10 @@ impl Engine { ) .await { - self.add_session_message( - self.user_text_message_with_turn_metadata(continuation), - ) + self.add_session_message(self.runtime_text_message_with_turn_metadata( + continuation, + UserInputProvenance::Runtime, + )) .await; turn.next_step(); continue; @@ -1573,7 +1635,7 @@ impl Engine { loop_guard.record_attempt(&tool_name, &tool_input, read_only) { crate::logging::warn(message.clone()); - guard_result = Some(loop_guard_block_tool_result(message, kind)); + guard_result = Some(loop_guard_block_tool_result(&tool_name, message, kind)); } plans.push(ToolExecutionPlan { @@ -2444,10 +2506,23 @@ fn subagent_completion_runtime_message(payload: &str) -> Message { // role carries no semantic weight here — only template-compatibility cost. Message { role: "user".to_string(), - content: vec![ContentBlock::Text { - text: subagent_completion_runtime_text(payload), - cache_control: None, - }], + content: vec![ + ContentBlock::Text { + text: subagent_completion_runtime_text(payload), + cache_control: None, + }, + runtime_event_turn_metadata_block(UserInputProvenance::SubAgentHandoff), + ], + } +} + +fn runtime_event_turn_metadata_block(provenance: UserInputProvenance) -> ContentBlock { + ContentBlock::Text { + text: format!( + "\nInput provenance: {}\nInput authority: non_authoritative\n", + provenance.as_str() + ), + cache_control: None, } } @@ -2481,6 +2556,14 @@ fn shell_completion_status_text( { let command = truncate_runtime_status_field(&event.command, 80); status.push_str(&format!(": {command}")); + if let Some(owner) = event + .owner_agent_name + .as_deref() + .or(event.owner_agent_id.as_deref()) + .filter(|owner| !owner.trim().is_empty()) + { + status.push_str(&format!(" (by {owner})")); + } } Some(status) @@ -2857,6 +2940,8 @@ mod tests { stdout_tail: "running tests".to_string(), stderr_tail: "test failed".to_string(), linked_task_id: Some("task_1".to_string()), + owner_agent_id: Some("agent_verifier".to_string()), + owner_agent_name: Some("verifier".to_string()), }], "", ) @@ -2864,6 +2949,7 @@ mod tests { assert!(status.contains("1 background shell job finished (1 failed)")); assert!(status.contains("cargo test -p codewhale-tui")); + assert!(status.contains("by verifier")); assert!(!status.contains("runtime_event")); assert!(!status.contains("manual exec_shell_wait polling")); assert!(!status.contains("stderr_tail")); @@ -3022,6 +3108,7 @@ mod tests { #[test] fn loop_guard_block_tool_result_counts_as_failure() { let result = loop_guard_block_tool_result( + "edit_file", "Blocked: repeated call".to_string(), AttemptBlockKind::IdenticalToolCall, ); @@ -3040,6 +3127,35 @@ mod tests { ); } + #[test] + fn loop_guard_search_block_tool_result_is_guidance() { + let result = loop_guard_block_tool_result( + "grep_files", + "Stop calling `grep_files`; use current evidence.".to_string(), + AttemptBlockKind::NoProgressToolLoop, + ); + + assert!( + result.success, + "read-only search loop blocks should guide the model without feeding the failure loop" + ); + let metadata = result.metadata.as_ref().expect("metadata"); + assert_eq!( + metadata.get("loop_guard").and_then(|v| v.as_str()), + Some("no_progress_tool_loop") + ); + assert_eq!( + metadata + .get("loop_guard_guidance") + .and_then(|v| v.as_bool()), + Some(true) + ); + assert_eq!( + metadata.get("executed").and_then(|v| v.as_bool()), + Some(false) + ); + } + #[test] fn resolve_auto_effort_ignores_stored_turn_metadata() { let messages = vec![Message { diff --git a/crates/tui/src/core/ops.rs b/crates/tui/src/core/ops.rs index 3ca444b55e..a8ea61cf4f 100644 --- a/crates/tui/src/core/ops.rs +++ b/crates/tui/src/core/ops.rs @@ -27,6 +27,44 @@ pub struct SessionSnapshot { pub mode: String, } +/// Origin of text being introduced as a user-role turn. +/// +/// Chat providers force several runtime/control-plane signals through +/// `role = "user"` for compatibility, so role alone is not authority. +#[allow(dead_code)] // Some origins are reserved for ingestion sites landing after the first gate. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum UserInputProvenance { + /// Text typed or submitted through the active UI/API input boundary. + ExternalUser, + /// Runtime-generated continuation, diagnostic, or tool feedback. + Runtime, + /// Completion/event text from a child worker or sub-agent handoff. + SubAgentHandoff, + /// Text restored from a saved/imported transcript. + ImportedTranscript, + /// Text recalled from memory or another persisted source. + MemoryRecall, + /// Assistant-authored text that is shaped like a user response. + AssistantGenerated, +} + +impl UserInputProvenance { + pub fn as_str(self) -> &'static str { + match self { + Self::ExternalUser => "external_user", + Self::Runtime => "runtime", + Self::SubAgentHandoff => "subagent_handoff", + Self::ImportedTranscript => "imported_transcript", + Self::MemoryRecall => "memory_recall", + Self::AssistantGenerated => "assistant_generated", + } + } + + pub fn can_authorize_work(self) -> bool { + matches!(self, Self::ExternalUser) + } +} + /// Operations that can be submitted to the engine. #[derive(Debug, Clone)] pub enum Op { @@ -65,6 +103,9 @@ pub enum Op { /// `ToolCallBefore` hooks may deny a tool call with exit code 2. hook_executor: Option>, verbosity: Option, + /// Structural input origin. This gates whether the turn may inherit + /// YOLO/auto-approval authority; user-shaped text is not enough. + provenance: UserInputProvenance, }, /// Execute a user-submitted composer shell command (`! `) without @@ -121,6 +162,16 @@ pub enum Op { /// Update the SSE idle timeout used for subsequent streamed turns. SetStreamChunkTimeout { timeout_secs: u64 }, + /// Update sub-agent runtime controls for subsequent turns. + SetSubagentRuntimeConfig { + enabled: bool, + max_subagents: usize, + launch_concurrency: usize, + max_spawn_depth: u32, + api_timeout_secs: u64, + heartbeat_timeout_secs: u64, + }, + /// Sync engine session state (used for resume/load) SyncSession { session_id: Option, diff --git a/crates/tui/src/fleet/executor.rs b/crates/tui/src/fleet/executor.rs index fe4e95979c..4e8b0d5a46 100644 --- a/crates/tui/src/fleet/executor.rs +++ b/crates/tui/src/fleet/executor.rs @@ -31,8 +31,8 @@ use super::worker_runtime::fleet_task_prompt; /// `--auto` is always passed: a headless worker has no human to approve tool /// calls, so it runs with full (policy-gated) tool access. `--output-format /// stream-json` makes the worker emit the NDJSON event stream this module -/// parses. Recursion depth is inherited from the worker's own config -/// (`[runtime] max_spawn_depth`, default [`codewhale_config::DEFAULT_SPAWN_DEPTH`]). +/// parses. Fleet recursion depth is inherited from the worker's own config +/// (`[fleet.exec] max_spawn_depth`, default [`codewhale_config::DEFAULT_SPAWN_DEPTH`]). /// /// Secrets are NEVER placed on the argv: provider credentials are resolved by /// the worker process from its own config/keyring exactly like an interactive diff --git a/crates/tui/src/fleet/worker_runtime.rs b/crates/tui/src/fleet/worker_runtime.rs index e862c4d0a3..8b23fde008 100644 --- a/crates/tui/src/fleet/worker_runtime.rs +++ b/crates/tui/src/fleet/worker_runtime.rs @@ -552,7 +552,10 @@ mod tests { ..Default::default() }; let hardened = apply_exec_hardening(spec.clone(), &exec); - assert_eq!(hardened.max_spawn_depth, 3); + assert_eq!( + hardened.max_spawn_depth, + codewhale_config::MAX_SPAWN_DEPTH_CEILING + ); let exec = codewhale_config::FleetExecConfig { max_spawn_depth: 0, diff --git a/crates/tui/src/localization.rs b/crates/tui/src/localization.rs index ad358b9e1f..b93d086f8a 100644 --- a/crates/tui/src/localization.rs +++ b/crates/tui/src/localization.rs @@ -302,6 +302,7 @@ pub enum MessageId { CmdFeedbackDescription, CmdHfDescription, CmdHelpDescription, + CmdProfileDescription, CmdHomeDescription, CmdHooksDescription, CmdAgentDescription, @@ -425,6 +426,7 @@ pub enum MessageId { KbShellControls, KbExitEmpty, KbCommandPalette, + KbCancelBackgroundShellJobs, KbFuzzyFilePicker, KbCompactInspector, KbLastMessagePager, @@ -437,7 +439,6 @@ pub enum MessageId { KbJumpPlanAgentYolo, KbAltJumpPlanAgentYolo, KbFocusSidebar, - KbTogglePlanAgent, KbSessionPicker, KbPasteAttach, KbCopySelection, @@ -742,6 +743,7 @@ pub const ALL_MESSAGE_IDS: &[MessageId] = &[ MessageId::CmdFeedbackDescription, MessageId::CmdHfDescription, MessageId::CmdHelpDescription, + MessageId::CmdProfileDescription, MessageId::CmdHomeDescription, MessageId::CmdHooksDescription, MessageId::CmdAgentDescription, @@ -867,6 +869,7 @@ pub const ALL_MESSAGE_IDS: &[MessageId] = &[ MessageId::KbShellControls, MessageId::KbExitEmpty, MessageId::KbCommandPalette, + MessageId::KbCancelBackgroundShellJobs, MessageId::KbFuzzyFilePicker, MessageId::KbCompactInspector, MessageId::KbLastMessagePager, @@ -879,7 +882,6 @@ pub const ALL_MESSAGE_IDS: &[MessageId] = &[ MessageId::KbJumpPlanAgentYolo, MessageId::KbAltJumpPlanAgentYolo, MessageId::KbFocusSidebar, - MessageId::KbTogglePlanAgent, MessageId::KbSessionPicker, MessageId::KbPasteAttach, MessageId::KbCopySelection, @@ -1380,6 +1382,7 @@ fn english(id: MessageId) -> &'static str { MessageId::CmdFeedbackDescription => "Generate a GitHub feedback URL", MessageId::CmdHfDescription => "Inspect Hugging Face MCP setup and concepts", MessageId::CmdHelpDescription => "Show help information", + MessageId::CmdProfileDescription => "Switch to a named config profile", MessageId::CmdHomeDescription => "Show home dashboard with stats and quick actions", MessageId::CmdHooksDescription => "List configured lifecycle hooks (read-only)", MessageId::CmdAgentDescription => { @@ -1569,6 +1572,9 @@ fn english(id: MessageId) -> &'static str { MessageId::KbShellControls => "Background the running foreground shell command", MessageId::KbExitEmpty => "Exit when input is empty", MessageId::KbCommandPalette => "Open the command palette", + MessageId::KbCancelBackgroundShellJobs => { + "Cancel all running background shell jobs (Tasks sidebar)" + } MessageId::KbFuzzyFilePicker => "Open the fuzzy file picker (insert @path on Enter)", MessageId::KbCompactInspector => "Open compact session context inspector", MessageId::KbLastMessagePager => "Open pager for the last message (when input is empty)", @@ -1587,9 +1593,8 @@ fn english(id: MessageId) -> &'static str { MessageId::KbJumpPlanAgentYolo => "Trigger hotbar slots", MessageId::KbAltJumpPlanAgentYolo => "Alternative jump to Plan / Agent / YOLO mode", MessageId::KbFocusSidebar => { - "Focus Work / Tasks / Agents / Context / Auto sidebar; Ctrl+Alt+0 hides it" + "Focus Pinned / Tasks / Agents / Context / Auto sidebar; Ctrl+Alt+0 toggles pinned sidebar" } - MessageId::KbTogglePlanAgent => "Toggle between Plan and Agent modes", MessageId::KbSessionPicker => "Open the session picker", MessageId::KbPasteAttach => "Paste text or attach a clipboard image", MessageId::KbCopySelection => "Copy the current selection (Cmd+C on macOS)", @@ -1635,7 +1640,7 @@ fn english(id: MessageId) -> &'static str { MessageId::HomeQuickHelp => "/help - Show help", MessageId::HomeModeTips => "Mode Tips", MessageId::HomeAgentModeTip => "Agent mode - Use tools for autonomous tasks", - MessageId::HomeAgentModeReviewTip => " Use Ctrl+X to review in Plan mode before executing", + MessageId::HomeAgentModeReviewTip => " Type /mode plan to review before executing", MessageId::HomeAgentModeYoloTip => " Type /mode yolo to enable full tool access", MessageId::HomeYoloModeTip => "YOLO mode - Full tool access, no approvals", MessageId::HomeYoloModeCaution => " Be careful with destructive operations!", @@ -1987,6 +1992,7 @@ fn vietnamese(id: MessageId) -> Option<&'static str> { MessageId::CmdFeedbackDescription => "Tạo một URL để gửi phản hồi trên GitHub", MessageId::CmdHfDescription => "Kiểm tra thiết lập và khái niệm Hugging Face MCP", MessageId::CmdHelpDescription => "Hiển thị thông tin trợ giúp", + MessageId::CmdProfileDescription => "Chuyển sang profile cấu hình đã đặt tên", MessageId::CmdHomeDescription => { "Hiển thị bảng điều khiển trang chủ với số liệu thống kê và hành động nhanh" } @@ -2193,6 +2199,9 @@ fn vietnamese(id: MessageId) -> Option<&'static str> { MessageId::KbShellControls => "Chuyển lệnh shell đang chạy ở tiền cảnh xuống nền", MessageId::KbExitEmpty => "Thoát khi khung nhập trống", MessageId::KbCommandPalette => "Mở bảng lệnh (command palette)", + MessageId::KbCancelBackgroundShellJobs => { + "Hủy mọi tác vụ shell nền đang chạy (thanh bên Tasks)" + } MessageId::KbFuzzyFilePicker => { "Mở trình tìm file nhanh (fuzzy) (chèn @path khi nhấn Enter)" } @@ -2217,9 +2226,8 @@ fn vietnamese(id: MessageId) -> Option<&'static str> { "Phím tắt thay thế để nhảy sang chế độ Plan / Agent / YOLO" } MessageId::KbFocusSidebar => { - "Focus vào thanh bên Work / Tasks / Agents / Context / Auto; Ctrl+Alt+0 để ẩn" + "Focus vào thanh bên Pinned / Tasks / Agents / Context / Auto; Ctrl+Alt+0 để ẩn" } - MessageId::KbTogglePlanAgent => "Chuyển đổi giữa chế độ Plan và Agent", MessageId::KbSessionPicker => "Mở bảng chọn phiên làm việc", MessageId::KbPasteAttach => "Dán văn bản hoặc đính kèm hình ảnh từ bộ nhớ tạm", MessageId::KbCopySelection => "Sao chép vùng chọn hiện tại (Cmd+C trên macOS)", @@ -2265,9 +2273,7 @@ fn vietnamese(id: MessageId) -> Option<&'static str> { MessageId::HomeQuickHelp => "/help - Hiển thị trợ giúp", MessageId::HomeModeTips => "Mẹo về Chế độ", MessageId::HomeAgentModeTip => "Chế độ Agent - Sử dụng công cụ cho các nhiệm vụ tự chủ", - MessageId::HomeAgentModeReviewTip => { - " Sử dụng Ctrl+X để xem xét ở chế độ Plan trước khi thực thi" - } + MessageId::HomeAgentModeReviewTip => " Nhập /mode plan để xem xét trước khi thực thi", MessageId::HomeAgentModeYoloTip => " Nhập /mode yolo để bật toàn quyền truy cập công cụ", MessageId::HomeYoloModeTip => { "Chế độ YOLO - Toàn quyền truy cập công cụ, không cần phê duyệt" @@ -2796,6 +2802,7 @@ fn japanese(id: MessageId) -> Option<&'static str> { MessageId::CmdFeedbackDescription => "GitHub フィードバック URL を生成", MessageId::CmdHfDescription => "Hugging Face MCP の設定と概念を確認", MessageId::CmdHelpDescription => "ヘルプを表示", + MessageId::CmdProfileDescription => "名前付き設定プロファイルに切り替え", MessageId::CmdHomeDescription => "統計とクイックアクション付きのホームダッシュボードを表示", MessageId::CmdHooksDescription => { "設定済みのライフサイクルフックを一覧表示(読み取り専用)" @@ -2986,6 +2993,9 @@ fn japanese(id: MessageId) -> Option<&'static str> { MessageId::KbShellControls => "実行中のフォアグラウンドコマンドをバックグラウンドへ移す", MessageId::KbExitEmpty => "入力が空の時に終了", MessageId::KbCommandPalette => "コマンドパレットを開く", + MessageId::KbCancelBackgroundShellJobs => { + "実行中のバックグラウンド shell ジョブをすべてキャンセル(Tasks サイドバー)" + } MessageId::KbFuzzyFilePicker => "ファジーファイルピッカーを開く(Enter で @path を挿入)", MessageId::KbCompactInspector => "コンパクトなセッションコンテキスト検査ツールを開く", MessageId::KbLastMessagePager => "最後のメッセージのページャーを開く(入力が空の時)", @@ -3004,9 +3014,8 @@ fn japanese(id: MessageId) -> Option<&'static str> { MessageId::KbJumpPlanAgentYolo => "ホットバースロットを起動", MessageId::KbAltJumpPlanAgentYolo => "Plan / Agent / YOLO モードへの代替ジャンプ", MessageId::KbFocusSidebar => { - "Work / Tasks / Agents / Context / Auto / Hidden サイドバーにフォーカス" + "Pinned / Tasks / Agents / Context / Auto / Hidden サイドバーにフォーカス" } - MessageId::KbTogglePlanAgent => "Plan モードと Agent モードを切り替え", MessageId::KbSessionPicker => "セッションピッカーを開く", MessageId::KbPasteAttach => "テキストを貼り付けまたはクリップボード画像を添付", MessageId::KbCopySelection => "現在の選択をコピー(macOS は Cmd+C)", @@ -3054,7 +3063,7 @@ fn japanese(id: MessageId) -> Option<&'static str> { MessageId::HomeQuickHelp => "/help - ヘルプを表示", MessageId::HomeModeTips => "モードヒント", MessageId::HomeAgentModeTip => "Agent モード - ツールを使って自律的なタスクを実行", - MessageId::HomeAgentModeReviewTip => " 実行前に Ctrl+X で Plan モードでレビュー", + MessageId::HomeAgentModeReviewTip => " 実行前のレビューには /mode plan を入力", MessageId::HomeAgentModeYoloTip => " /mode yolo と入力して完全なツールアクセスを有効化", MessageId::HomeYoloModeTip => "YOLO モード - 完全なツールアクセス、承認なし", MessageId::HomeYoloModeCaution => " 破壊的な操作には注意してください!", @@ -3382,6 +3391,7 @@ fn chinese_simplified(id: MessageId) -> Option<&'static str> { MessageId::CmdFeedbackDescription => "生成 GitHub 反馈链接", MessageId::CmdHfDescription => "检查 Hugging Face MCP 设置和概念", MessageId::CmdHelpDescription => "显示帮助信息", + MessageId::CmdProfileDescription => "切换到命名配置配置文件", MessageId::CmdHomeDescription => "显示主页面板,含统计与快捷操作", MessageId::CmdHooksDescription => "列出已配置的生命周期钩子(只读)", MessageId::CmdAgentDescription => "打开持久子代理会话:/agent [0-3] ", @@ -3548,6 +3558,9 @@ fn chinese_simplified(id: MessageId) -> Option<&'static str> { MessageId::KbShellControls => "将正在运行的前台命令转入后台", MessageId::KbExitEmpty => "输入框为空时退出", MessageId::KbCommandPalette => "打开命令面板", + MessageId::KbCancelBackgroundShellJobs => { + "取消所有正在运行的后台 shell 作业(Tasks 侧边栏)" + } MessageId::KbFuzzyFilePicker => "打开模糊文件选择器(按 Enter 插入 @path)", MessageId::KbCompactInspector => "打开紧凑会话上下文检查器", MessageId::KbLastMessagePager => "打开最后一条消息的分页器(输入框为空时)", @@ -3561,8 +3574,7 @@ fn chinese_simplified(id: MessageId) -> Option<&'static str> { } MessageId::KbJumpPlanAgentYolo => "触发快捷栏槽位", MessageId::KbAltJumpPlanAgentYolo => "替代快捷键跳转到 Plan / Agent / YOLO 模式", - MessageId::KbFocusSidebar => "聚焦 Work / 任务 / 代理 / Context / 自动 / 隐藏侧边栏", - MessageId::KbTogglePlanAgent => "在 Plan 和 Agent 模式之间切换", + MessageId::KbFocusSidebar => "聚焦 Pinned / 任务 / 代理 / Context / 自动 / 隐藏侧边栏", MessageId::KbSessionPicker => "打开会话选择器", MessageId::KbPasteAttach => "粘贴文本或附加剪贴板图片", MessageId::KbCopySelection => "复制当前选中内容(macOS 为 Cmd+C)", @@ -3606,7 +3618,7 @@ fn chinese_simplified(id: MessageId) -> Option<&'static str> { MessageId::HomeQuickHelp => "/help - 显示帮助", MessageId::HomeModeTips => "模式提示", MessageId::HomeAgentModeTip => "Agent 模式 - 使用工具执行自主任务", - MessageId::HomeAgentModeReviewTip => " 按 Ctrl+X 可在 Plan 模式下审查后再执行", + MessageId::HomeAgentModeReviewTip => " 输入 /mode plan 可在执行前审查", MessageId::HomeAgentModeYoloTip => " 输入 /mode yolo 启用完整工具访问", MessageId::HomeYoloModeTip => "YOLO 模式 - 完整工具访问,无需审批", MessageId::HomeYoloModeCaution => " 请小心破坏性操作!", @@ -3918,6 +3930,7 @@ fn portuguese_brazil(id: MessageId) -> Option<&'static str> { MessageId::CmdFeedbackDescription => "Gerar uma URL de feedback no GitHub", MessageId::CmdHfDescription => "Inspecionar configuracao e conceitos do Hugging Face MCP", MessageId::CmdHelpDescription => "Exibir informações de ajuda", + MessageId::CmdProfileDescription => "Alternar para um perfil de configuracao nomeado", MessageId::CmdHomeDescription => "Exibir o painel inicial com estatísticas e ações rápidas", MessageId::CmdHooksDescription => { "Listar hooks de ciclo de vida configurados (somente leitura)" @@ -4122,6 +4135,9 @@ fn portuguese_brazil(id: MessageId) -> Option<&'static str> { MessageId::KbShellControls => "Enviar o comando em primeiro plano para segundo plano", MessageId::KbExitEmpty => "Sair quando entrada vazia", MessageId::KbCommandPalette => "Abrir paleta de comandos", + MessageId::KbCancelBackgroundShellJobs => { + "Cancelar todos os trabalhos shell em segundo plano em execução (barra lateral Tasks)" + } MessageId::KbFuzzyFilePicker => { "Abrir seletor de arquivo fuzzy (insere @path ao pressionar Enter)" } @@ -4144,9 +4160,8 @@ fn portuguese_brazil(id: MessageId) -> Option<&'static str> { MessageId::KbJumpPlanAgentYolo => "Acionar slots da hotbar", MessageId::KbAltJumpPlanAgentYolo => "Salto alternativo para modo Plan / Agent / YOLO", MessageId::KbFocusSidebar => { - "Focar barra lateral Work / Tasks / Agents / Context / Auto / Ocultar" + "Focar barra lateral Pinned / Tasks / Agents / Context / Auto / Ocultar" } - MessageId::KbTogglePlanAgent => "Alternar entre modos Plan e Agent", MessageId::KbSessionPicker => "Abrir seletor de sessões", MessageId::KbPasteAttach => "Colar texto ou anexar imagem da área de transferência", MessageId::KbCopySelection => "Copiar seleção atual (Cmd+C no macOS)", @@ -4192,9 +4207,7 @@ fn portuguese_brazil(id: MessageId) -> Option<&'static str> { MessageId::HomeQuickHelp => "/help - Exibir ajuda", MessageId::HomeModeTips => "Dicas de Modo", MessageId::HomeAgentModeTip => "Modo Agent - Use ferramentas para tarefas autônomas", - MessageId::HomeAgentModeReviewTip => { - " Use Ctrl+X para revisar no modo Plan antes de executar" - } + MessageId::HomeAgentModeReviewTip => " Digite /mode plan para revisar antes de executar", MessageId::HomeAgentModeYoloTip => { " Digite /mode yolo para habilitar acesso total às ferramentas" } @@ -4540,6 +4553,7 @@ fn spanish_latin_america(id: MessageId) -> Option<&'static str> { MessageId::CmdFeedbackDescription => "Generar una URL de feedback en GitHub", MessageId::CmdHfDescription => "Inspeccionar configuracion y conceptos de Hugging Face MCP", MessageId::CmdHelpDescription => "Mostrar información de ayuda", + MessageId::CmdProfileDescription => "Cambiar a un perfil de configuración con nombre", MessageId::CmdHomeDescription => { "Mostrar el panel inicial con estadísticas y acciones rápidas" } @@ -4754,6 +4768,9 @@ fn spanish_latin_america(id: MessageId) -> Option<&'static str> { MessageId::KbShellControls => "Enviar el comando en primer plano a segundo plano", MessageId::KbExitEmpty => "Salir cuando la entrada está vacía", MessageId::KbCommandPalette => "Abrir paleta de comandos", + MessageId::KbCancelBackgroundShellJobs => { + "Cancelar todos los trabajos shell en segundo plano en ejecución (barra lateral Tasks)" + } MessageId::KbFuzzyFilePicker => { "Abrir selector de archivo fuzzy (inserta @ruta al presionar Enter)" } @@ -4776,9 +4793,8 @@ fn spanish_latin_america(id: MessageId) -> Option<&'static str> { MessageId::KbJumpPlanAgentYolo => "Activar ranuras de la hotbar", MessageId::KbAltJumpPlanAgentYolo => "Salto alternativo a modo Plan / Agent / YOLO", MessageId::KbFocusSidebar => { - "Enfocar barra lateral Work / Tasks / Agents / Context / Auto / Ocultar" + "Enfocar barra lateral Pinned / Tasks / Agents / Context / Auto / Ocultar" } - MessageId::KbTogglePlanAgent => "Alternar entre modos Plan y Agent", MessageId::KbSessionPicker => "Abrir selector de sesiones", MessageId::KbPasteAttach => "Pegar texto o adjuntar imagen del portapapeles", MessageId::KbCopySelection => "Copiar selección actual (Cmd+C en macOS)", @@ -4826,9 +4842,7 @@ fn spanish_latin_america(id: MessageId) -> Option<&'static str> { MessageId::HomeQuickHelp => "/help - Mostrar ayuda", MessageId::HomeModeTips => "Tips de Modo", MessageId::HomeAgentModeTip => "Modo Agent - Usar herramientas para tareas autónomas", - MessageId::HomeAgentModeReviewTip => { - " Usa Ctrl+X para revisar en modo Plan antes de ejecutar" - } + MessageId::HomeAgentModeReviewTip => " Escribe /mode plan para revisar antes de ejecutar", MessageId::HomeAgentModeYoloTip => { " Escribe /mode yolo para habilitar acceso total a las herramientas" } diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index b5103cd702..6805a11f29 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -1,5 +1,7 @@ //! CLI entry point for CodeWhale. +#![allow(clippy::uninlined_format_args)] + use std::io::{self, IsTerminal, Read, Write}; use std::path::{Path, PathBuf}; use std::process::{Command, Stdio}; @@ -1156,8 +1158,9 @@ async fn main() -> Result<()> { || args.disallowed_tools.is_some() || args.append_system_prompt.is_some(); if needs_engine { + let provider = config.api_provider(); let max_subagents = cli.max_subagents.map_or_else( - || config.max_subagents(), + || config.max_subagents_for_provider(provider), |value| value.clamp(1, MAX_SUBAGENTS), ); let auto_mode = args.auto || yolo; @@ -1204,8 +1207,9 @@ async fn main() -> Result<()> { let workspace = cli.workspace.clone().unwrap_or_else(|| { std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")) }); + let provider = config.api_provider(); let max_subagents = cli.max_subagents.map_or_else( - || config.max_subagents(), + || config.max_subagents_for_provider(provider), |value| value.clamp(1, MAX_SUBAGENTS), ); run_swebench_command(&config, &model, workspace, max_subagents, args).await @@ -5884,8 +5888,9 @@ async fn run_interactive( } let model = config.default_model(); + let provider = config.api_provider(); let max_subagents = cli.max_subagents.map_or_else( - || config.max_subagents(), + || config.max_subagents_for_provider(provider), |value| value.clamp(1, MAX_SUBAGENTS), ); let use_alt_screen = should_use_alt_screen(cli, config); @@ -6282,6 +6287,14 @@ async fn run_exec_agent( let auto_model = route.auto_model; let effective_provider = route.provider; let effective_model = route.model; + let max_subagents = if max_subagents == config.max_subagents_for_provider(config.api_provider()) + { + execution_config + .max_subagents_for_provider(effective_provider) + .clamp(1, MAX_SUBAGENTS) + } else { + max_subagents + }; let effective_reasoning_effort = route .reasoning_effort .and_then(|effort| cli_reasoning_effort_value(&execution_config, effort)); @@ -6338,13 +6351,19 @@ async fn run_exec_agent( show_thinking: settings.show_thinking, max_steps: max_turns, max_subagents, - launch_concurrency: execution_config.launch_concurrency(), + max_admitted_subagents: execution_config + .max_admitted_subagents_for_provider(effective_provider) + .max(max_subagents), + launch_concurrency: execution_config.launch_concurrency_for_provider(effective_provider), + subagents_enabled: execution_config.subagents_enabled_for_provider(effective_provider), features: execution_config.features(), compaction, todos: new_shared_todo_list(), plan_state: new_shared_plan_state(), goal_state: crate::tools::goal::new_shared_goal_state(), - max_spawn_depth: crate::tools::subagent::DEFAULT_MAX_SPAWN_DEPTH, + max_spawn_depth: execution_config.subagent_max_spawn_depth_for_provider(effective_provider), + subagent_token_budget: execution_config + .subagent_token_budget_for_provider(effective_provider), network_policy, snapshots_enabled: execution_config.snapshots_config().enabled, snapshots_max_workspace_bytes: execution_config @@ -6355,13 +6374,13 @@ async fn run_exec_agent( runtime_services: crate::tools::spec::RuntimeToolServices::default(), subagent_model_overrides: execution_config.subagent_model_overrides(), subagent_api_timeout: std::time::Duration::from_secs( - execution_config.subagent_api_timeout_secs(), + execution_config.subagent_api_timeout_secs_for_provider(effective_provider), ), stream_chunk_timeout: std::time::Duration::from_secs( execution_config.stream_chunk_timeout_secs(), ), subagent_heartbeat_timeout: std::time::Duration::from_secs( - execution_config.subagent_heartbeat_timeout_secs(), + execution_config.subagent_heartbeat_timeout_secs_for_provider(effective_provider), ), prefer_bwrap: execution_config.prefer_bwrap.unwrap_or(false), memory_enabled: execution_config.memory_enabled(), @@ -6464,6 +6483,7 @@ async fn run_exec_agent( .unwrap_or_default() }, verbosity: execution_config.verbosity.clone(), + provenance: crate::core::ops::UserInputProvenance::ExternalUser, }) .await?; diff --git a/crates/tui/src/prompts/constitution.md b/crates/tui/src/prompts/constitution.md index ecdf20ca54..94de6c95c6 100644 --- a/crates/tui/src/prompts/constitution.md +++ b/crates/tui/src/prompts/constitution.md @@ -364,12 +364,15 @@ Reach for them when the work is genuinely independent: yourself, then decide whether to open a sub-agent based on what A found. Do not pre-open dependent work. - **Concurrency, honestly**: Up to 20 sub-agents run at once by default - (`[subagents].max_concurrent`, default 20 / ceiling 20). Open one `agent` - call per genuinely independent target in the same turn — the dispatcher - runs them in parallel — then coordinate as completion events report back. - Need more than the cap? Wait for some to finish, or ask the user. To fan - out more gently you can lower `[subagents].launch_concurrency` (how many - start at once); the default is the full cap. + (`[subagents].max_concurrent`, default 20 / ceiling 20), and additional + accepted workers queue up to the configured admission cap while launch + slots drain. Open one `agent` call per genuinely independent target in the + same turn — the dispatcher runs them in parallel or queues them — then + coordinate as completion events report back. Let runtime capacity errors, + provider rate-limit pauses, and user-visible cost/risk decide whether to + launch more; do not invent a smaller per-turn limit. To fan out more gently + you can lower `[subagents].launch_concurrency` (how many start at once); + the default is the full running cap. ## Thinking Delegation diff --git a/crates/tui/src/retry_status.rs b/crates/tui/src/retry_status.rs index f4d3d869f0..e46d8e1e65 100644 --- a/crates/tui/src/retry_status.rs +++ b/crates/tui/src/retry_status.rs @@ -99,7 +99,7 @@ pub fn snapshot() -> RetryState { pub fn note_rate_limit(delay: Duration) { let deadline = Instant::now() + delay; if let Ok(mut current) = rate_limit_cell().lock() - && current.map_or(true, |existing| existing < deadline) + && current.is_none_or(|existing| existing < deadline) { *current = Some(deadline); } diff --git a/crates/tui/src/runtime_api.rs b/crates/tui/src/runtime_api.rs index ec02d8eeea..132d9f5c26 100644 --- a/crates/tui/src/runtime_api.rs +++ b/crates/tui/src/runtime_api.rs @@ -940,6 +940,16 @@ async fn resume_session_thread( .await .map_err(|e| ApiError::internal(format!("Failed to seed thread history: {e}")))?; + // Link the session to the new thread so that `ensure_engine_loaded` + // can restore the full message history from the session file. + if let Err(e) = state + .runtime_threads + .set_thread_session_id(&thread.id, &id) + .await + { + tracing::warn!("Failed to link session {id} to thread {}: {e}", thread.id); + } + let summary = format!( "Resumed session '{}' ({} messages) into thread {}", session.metadata.title, msg_count, thread.id @@ -1014,6 +1024,19 @@ async fn create_session_from_thread( .save_session(&session) .map_err(|e| ApiError::internal(format!("Failed to save session: {e}")))?; + // Link the session to the thread so that `ensure_engine_loaded` can + // restore the full message history from the session file. + if let Err(e) = state + .runtime_threads + .set_thread_session_id(&detail.thread.id, &session_id) + .await + { + tracing::warn!( + "Failed to link session {session_id} to thread {}: {e}", + detail.thread.id + ); + } + Ok(( StatusCode::CREATED, Json(CreateSessionResponse { @@ -1048,29 +1071,117 @@ fn messages_from_thread_detail(detail: &ThreadDetail) -> Vec { let mut messages = Vec::new(); for turn in &detail.turns { + let mut assistant_blocks: Vec = Vec::new(); + let mut user_blocks: Vec = Vec::new(); + let flush_assistant = |blocks: &mut Vec, msgs: &mut Vec| { + if !blocks.is_empty() { + msgs.push(Message { + role: "assistant".to_string(), + content: std::mem::take(blocks), + }); + } + }; + let flush_user = |blocks: &mut Vec, msgs: &mut Vec| { + if !blocks.is_empty() { + msgs.push(Message { + role: "user".to_string(), + content: std::mem::take(blocks), + }); + } + }; + for item_id in &turn.item_ids { let Some(item) = items_by_id.get(item_id.as_str()) else { continue; }; - let role = match item.kind { - TurnItemKind::UserMessage => "user", - TurnItemKind::AgentMessage => "assistant", - _ => continue, - }; - let Some(text) = item.detail.as_deref().map(str::trim) else { - continue; - }; - if text.is_empty() { - continue; + match item.kind { + TurnItemKind::UserMessage => { + flush_assistant(&mut assistant_blocks, &mut messages); + + let text = item.detail.as_deref().map(str::trim).unwrap_or(""); + if !text.is_empty() { + user_blocks.push(ContentBlock::Text { + text: text.to_string(), + cache_control: None, + }); + } + } + TurnItemKind::AgentMessage => { + flush_user(&mut user_blocks, &mut messages); + let text = item.detail.as_deref().map(str::trim).unwrap_or(""); + if !text.is_empty() { + assistant_blocks.push(ContentBlock::Text { + text: text.to_string(), + cache_control: None, + }); + } + } + TurnItemKind::AgentReasoning => { + flush_user(&mut user_blocks, &mut messages); + let thinking = item.detail.as_deref().map(str::trim).unwrap_or(""); + if !thinking.is_empty() { + assistant_blocks.push(ContentBlock::Thinking { + thinking: thinking.to_string(), + signature: None, + }); + } + } + TurnItemKind::ToolCall => { + // Check metadata to distinguish tool_use from tool_result. + let meta = item.metadata.as_ref(); + let is_tool_result = meta.and_then(|m| m.get("tool_result_for")).is_some(); + if is_tool_result { + flush_assistant(&mut assistant_blocks, &mut messages); + + let tool_use_id = meta + .and_then(|m| m.get("tool_result_for")) + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let content = item.detail.as_deref().unwrap_or("").to_string(); + let is_error = meta + .and_then(|m| m.get("is_error")) + .and_then(|v| v.as_bool()) + .unwrap_or(false); + let content_blocks = meta + .and_then(|m| m.get("content_blocks")) + .and_then(|v| v.as_array()) + .cloned(); + user_blocks.push(ContentBlock::ToolResult { + tool_use_id, + content, + is_error: if is_error { Some(true) } else { None }, + content_blocks, + }); + } else { + flush_user(&mut user_blocks, &mut messages); + let tool_use_id = meta + .and_then(|m| m.get("tool_use_id")) + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let tool_name = meta + .and_then(|m| m.get("tool_name")) + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let input_str = item.detail.as_deref().unwrap_or("{}"); + let input: serde_json::Value = + serde_json::from_str(input_str).unwrap_or(serde_json::Value::Null); + assistant_blocks.push(ContentBlock::ToolUse { + id: tool_use_id, + name: tool_name, + input, + caller: None, + }); + } + } + // Skip other item kinds (file_change, command_execution, etc.) + _ => {} } - messages.push(Message { - role: role.to_string(), - content: vec![ContentBlock::Text { - text: text.to_string(), - cache_control: None, - }], - }); } + flush_assistant(&mut assistant_blocks, &mut messages); + flush_user(&mut user_blocks, &mut messages); } messages @@ -1193,8 +1304,20 @@ async fn save_current_session( .save_session(&session) .map_err(|e| ApiError::internal(format!("Failed to save session: {e}")))?; + // Link the session to the thread so that `ensure_engine_loaded` can + // restore the full message history (including thinking/tool blocks) + // from the session file instead of reconstructing from turns. + let session_id = session.metadata.id.clone(); + if let Err(e) = state + .runtime_threads + .set_thread_session_id(&thread_id, &session_id) + .await + { + tracing::warn!("Failed to link session {session_id} to thread {thread_id}: {e}"); + } + Ok(Json(SaveSessionResponse { - session_id: session.metadata.id.clone(), + session_id, session: session_to_detail(session), })) } @@ -3454,6 +3577,175 @@ mod tests { assert_eq!(block["is_error"].as_bool(), Some(false)); } + #[test] + fn messages_from_thread_detail_batches_tool_results() { + let now = Utc::now(); + let turn_id = "turn_detail".to_string(); + let thread = ThreadRecord { + schema_version: 2, + id: "thr_detail".to_string(), + created_at: now, + updated_at: now, + model: DEFAULT_TEXT_MODEL.to_string(), + workspace: PathBuf::from("."), + mode: "agent".to_string(), + allow_shell: false, + trust_mode: false, + auto_approve: false, + latest_turn_id: Some(turn_id.clone()), + latest_response_bookmark: None, + archived: false, + system_prompt: None, + task_id: None, + title: None, + session_id: None, + }; + let turn = TurnRecord { + schema_version: 2, + id: turn_id.clone(), + thread_id: thread.id.clone(), + status: RuntimeTurnStatus::Completed, + input_summary: "check".to_string(), + created_at: now, + started_at: Some(now), + ended_at: Some(now), + duration_ms: Some(0), + usage: None, + error: None, + item_ids: vec![ + "item_user".to_string(), + "item_reasoning".to_string(), + "item_tool_use".to_string(), + "item_result_one".to_string(), + "item_result_two".to_string(), + "item_answer".to_string(), + ], + steer_count: 0, + }; + let item = |id: &str, + kind: TurnItemKind, + summary: &str, + detail: Option<&str>, + metadata: Option| { + crate::runtime_threads::TurnItemRecord { + schema_version: 2, + id: id.to_string(), + turn_id: turn_id.clone(), + kind, + status: TurnItemLifecycleStatus::Completed, + summary: summary.to_string(), + detail: detail.map(str::to_string), + metadata, + artifact_refs: Vec::new(), + started_at: Some(now), + ended_at: Some(now), + } + }; + let detail = ThreadDetail { + thread, + turns: vec![turn], + items: vec![ + item( + "item_user", + TurnItemKind::UserMessage, + "check", + Some("check"), + None, + ), + item( + "item_reasoning", + TurnItemKind::AgentReasoning, + "thinking", + Some("thinking"), + None, + ), + item( + "item_tool_use", + TurnItemKind::ToolCall, + "shell", + Some(r#"{"cmd":"pwd"}"#), + Some(json!({ + "tool_use_id": "tool-1", + "tool_name": "shell" + })), + ), + item( + "item_result_one", + TurnItemKind::ToolCall, + "one", + Some("one"), + Some(json!({ + "tool_result_for": "tool-1", + "is_error": false, + "content_blocks": [{ + "type": "text", + "text": "structured one" + }] + })), + ), + item( + "item_result_two", + TurnItemKind::ToolCall, + "two", + Some("two"), + Some(json!({ + "tool_result_for": "tool-2", + "is_error": true + })), + ), + item( + "item_answer", + TurnItemKind::AgentMessage, + "done", + Some("done"), + None, + ), + ], + latest_seq: 0, + }; + + let messages = messages_from_thread_detail(&detail); + let roles = messages + .iter() + .map(|message| message.role.as_str()) + .collect::>(); + assert_eq!(roles, vec!["user", "assistant", "user", "assistant"]); + assert_eq!(messages[2].content.len(), 2); + match &messages[2].content[0] { + ContentBlock::ToolResult { + tool_use_id, + content, + is_error, + content_blocks, + } => { + assert_eq!(tool_use_id, "tool-1"); + assert_eq!(content, "one"); + assert_eq!(*is_error, None); + assert_eq!( + content_blocks + .as_ref() + .and_then(|blocks| blocks[0].get("text")), + Some(&json!("structured one")) + ); + } + other => panic!("expected first tool result, got {other:?}"), + } + match &messages[2].content[1] { + ContentBlock::ToolResult { + tool_use_id, + content, + is_error, + content_blocks, + } => { + assert_eq!(tool_use_id, "tool-2"); + assert_eq!(content, "two"); + assert_eq!(*is_error, Some(true)); + assert!(content_blocks.is_none()); + } + other => panic!("expected second tool result, got {other:?}"), + } + } + #[test] fn runtime_auth_generates_token_by_default() { let auth = resolve_runtime_auth(None, None, false); @@ -4301,7 +4593,13 @@ mod tests { }], usage: AgentRunUsage { status: "unknown".to_string(), + input_tokens: None, + output_tokens: None, total_tokens: None, + token_budget: None, + budget_spent_tokens: None, + budget_remaining_tokens: None, + budget_scope: None, note: "not reported".to_string(), }, verification: AgentRunVerificationSummary { diff --git a/crates/tui/src/runtime_threads.rs b/crates/tui/src/runtime_threads.rs index 0555e9be8a..c36abf4102 100644 --- a/crates/tui/src/runtime_threads.rs +++ b/crates/tui/src/runtime_threads.rs @@ -151,6 +151,11 @@ pub struct ThreadRecord { /// additive metadata — older readers ignore it without misinterpretation. #[serde(default, skip_serializing_if = "Option::is_none")] pub title: Option, + /// The session ID associated with this thread. When set, `ensure_engine_loaded` + /// loads the full message history (including thinking/tool blocks) from the + /// session file instead of reconstructing from turns (which loses process info). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub session_id: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -794,6 +799,32 @@ pub struct RuntimeThreadManager { pending_dynamic_tools: Arc>>>, } +/// Helper types for `seed_thread_from_messages` — intermediate representation +/// of a turn being built from session messages before persisting as items. +/// +/// A single content block extracted from an assistant message. +enum SeedItem { + Text(String), + Thinking(String), + ToolUse { + id: String, + name: String, + input: serde_json::Value, + }, + ToolResult { + tool_use_id: String, + content: String, + is_error: bool, + content_blocks: Option>, + }, +} + +/// A turn being assembled from session messages. +struct TurnSeed { + user_text: String, + items: Vec, +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum RuntimeApprovalDecision { ApproveTool, @@ -1078,6 +1109,7 @@ impl RuntimeThreadManager { system_prompt: req.system_prompt, task_id: req.task_id, title: None, + session_id: None, }; self.store.save_thread(&thread)?; self.emit_event( @@ -1324,6 +1356,28 @@ impl RuntimeThreadManager { Ok(thread) } + /// Link a session to a thread so that `ensure_engine_loaded` can restore + /// the full message history (including thinking/tool blocks) from the + /// session file instead of reconstructing from turns. + pub async fn set_thread_session_id(&self, thread_id: &str, session_id: &str) -> Result<()> { + let mut thread = self.get_thread(thread_id).await?; + if thread.session_id.as_deref() == Some(session_id) { + return Ok(()); + } + thread.session_id = Some(session_id.to_string()); + thread.updated_at = Utc::now(); + self.store.save_thread(&thread)?; + self.emit_event( + thread_id, + None, + None, + "thread.updated", + json!({ "thread": thread, "changes": { "session_id": session_id } }), + ) + .await?; + Ok(()) + } + async fn ensure_thread_has_no_active_turn(&self, thread_id: &str) -> Result<()> { let active = self.active.lock().await; if active @@ -1559,6 +1613,11 @@ impl RuntimeThreadManager { /// Seed a thread with messages from a saved session so subsequent turns /// continue with the prior conversation context. + /// + /// Unlike the old text-only implementation, this preserves all content + /// block types (thinking, tool_use, tool_result, etc.) as separate turn + /// items so that `loadHistory` in the GUI can reconstruct the full + /// conversation including process information. pub async fn seed_thread_from_messages( &self, thread_id: &str, @@ -1567,44 +1626,128 @@ impl RuntimeThreadManager { let mut thread = self.get_thread(thread_id).await?; let now = Utc::now(); - let mut user_buf: Vec = Vec::new(); - let mut pending_pairs: Vec<(String, Option)> = Vec::new(); + // Group messages into turns. A turn starts with a user message and + // includes all subsequent assistant messages (which may contain + // thinking, tool_use, tool_result blocks) until the next user message. + let mut turns: Vec = Vec::new(); + let mut current_turn: Option = None; for msg in messages { - let text = msg - .content - .iter() - .filter_map(|block| match block { - ContentBlock::Text { text, .. } => Some(text.as_str()), - _ => None, - }) - .collect::>() - .join("\n"); - if text.trim().is_empty() { - continue; - } - if msg.role == "user" { - user_buf.push(text); - } else if msg.role == "assistant" { - let user_text = if user_buf.is_empty() { - String::new() - } else { - std::mem::take(&mut user_buf).join("\n") - }; - pending_pairs.push((user_text, Some(text))); + match msg.role.as_str() { + "user" => { + let mut user_text = String::new(); + let mut tool_results = Vec::new(); + + for block in &msg.content { + match block { + ContentBlock::Text { text, .. } if !text.trim().is_empty() => { + if !user_text.is_empty() { + user_text.push('\n'); + } + user_text.push_str(text); + } + ContentBlock::ToolResult { + tool_use_id, + content, + is_error, + content_blocks, + } => { + tool_results.push(SeedItem::ToolResult { + tool_use_id: tool_use_id.clone(), + content: content.clone(), + is_error: is_error.unwrap_or(false), + content_blocks: content_blocks.clone(), + }); + } + // Other block types in user messages are rare; + // skip them gracefully. + _ => {} + } + } + + if !user_text.is_empty() { + // A real user prompt begins a new turn. Tool results + // without text belong to the preceding assistant turn. + if let Some(t) = current_turn.take() { + turns.push(t); + } + current_turn = Some(TurnSeed { + user_text, + items: tool_results, + }); + } else if !tool_results.is_empty() { + let turn = current_turn.get_or_insert_with(|| TurnSeed { + user_text: String::new(), + items: Vec::new(), + }); + turn.items.extend(tool_results); + } else { + if let Some(t) = current_turn.take() { + turns.push(t); + } + current_turn = Some(TurnSeed { + user_text: String::new(), + items: Vec::new(), + }); + } + } + "assistant" => { + // If no current turn exists (e.g. session starts with + // an assistant message), create a placeholder turn. + let turn = current_turn.get_or_insert_with(|| TurnSeed { + user_text: String::new(), + items: Vec::new(), + }); + for block in &msg.content { + match block { + ContentBlock::Text { text, .. } if !text.trim().is_empty() => { + turn.items.push(SeedItem::Text(text.clone())); + } + ContentBlock::Thinking { thinking, .. } + if !thinking.trim().is_empty() => + { + turn.items.push(SeedItem::Thinking(thinking.clone())); + } + ContentBlock::ToolUse { + id, name, input, .. + } => { + turn.items.push(SeedItem::ToolUse { + id: id.clone(), + name: name.clone(), + input: input.clone(), + }); + } + ContentBlock::ServerToolUse { + id, name, input, .. + } => { + turn.items.push(SeedItem::ToolUse { + id: id.clone(), + name: name.clone(), + input: input.clone(), + }); + } + // Skip other block types (image_url, etc.) + _ => {} + } + } + } + // System messages and other roles are ignored for turn seeding. + _ => {} } } - if !user_buf.is_empty() { - let user_text = std::mem::take(&mut user_buf).join("\n"); - pending_pairs.push((user_text, None)); + // Flush the last turn. + if let Some(t) = current_turn.take() { + turns.push(t); } - for (user_text, assistant_text) in pending_pairs { + for turn_seed in turns { let turn_id = format!("turn_{}", &Uuid::new_v4().to_string()[..8]); - let summary = crate::utils::truncate_with_ellipsis(&user_text, SUMMARY_LIMIT, "..."); + let summary = + crate::utils::truncate_with_ellipsis(&turn_seed.user_text, SUMMARY_LIMIT, "..."); let mut item_ids = Vec::new(); - if !user_text.is_empty() { + // Save user message item. + if !turn_seed.user_text.is_empty() { let item_id = format!("item_{}", &Uuid::new_v4().to_string()[..8]); self.store.save_item(&TurnItemRecord { schema_version: CURRENT_RUNTIME_SCHEMA_VERSION, @@ -1613,7 +1756,7 @@ impl RuntimeThreadManager { kind: TurnItemKind::UserMessage, status: TurnItemLifecycleStatus::Completed, summary: summary.clone(), - detail: Some(user_text), + detail: Some(turn_seed.user_text.clone()), metadata: None, artifact_refs: Vec::new(), started_at: Some(now), @@ -1622,47 +1765,148 @@ impl RuntimeThreadManager { item_ids.push(item_id); } - if let Some(assistant_text) = assistant_text { - let asst_summary = if assistant_text.len() > SUMMARY_LIMIT { - crate::utils::truncate_with_ellipsis(&assistant_text, SUMMARY_LIMIT, "...") - } else { - assistant_text.clone() - }; + // Save assistant content items in order. + for seed_item in &turn_seed.items { let item_id = format!("item_{}", &Uuid::new_v4().to_string()[..8]); - self.store.save_item(&TurnItemRecord { + match seed_item { + SeedItem::Text(text) => { + let asst_summary = if text.len() > SUMMARY_LIMIT { + crate::utils::truncate_with_ellipsis(text, SUMMARY_LIMIT, "...") + } else { + text.clone() + }; + self.store.save_item(&TurnItemRecord { + schema_version: CURRENT_RUNTIME_SCHEMA_VERSION, + id: item_id.clone(), + turn_id: turn_id.clone(), + kind: TurnItemKind::AgentMessage, + status: TurnItemLifecycleStatus::Completed, + summary: asst_summary, + detail: Some(text.clone()), + metadata: None, + artifact_refs: Vec::new(), + started_at: Some(now), + ended_at: Some(now), + })?; + } + SeedItem::Thinking(thinking) => { + let thinking_summary = if thinking.len() > SUMMARY_LIMIT { + crate::utils::truncate_with_ellipsis(thinking, SUMMARY_LIMIT, "...") + } else { + thinking.clone() + }; + self.store.save_item(&TurnItemRecord { + schema_version: CURRENT_RUNTIME_SCHEMA_VERSION, + id: item_id.clone(), + turn_id: turn_id.clone(), + kind: TurnItemKind::AgentReasoning, + status: TurnItemLifecycleStatus::Completed, + summary: thinking_summary, + detail: Some(thinking.clone()), + metadata: None, + artifact_refs: Vec::new(), + started_at: Some(now), + ended_at: Some(now), + })?; + } + SeedItem::ToolUse { + id: tool_id, + name, + input, + } => { + let input_str = + serde_json::to_string(input).unwrap_or_else(|_| input.to_string()); + let tool_summary = format!("{name}({})", { + let s = &input_str; + if s.len() > 80 { + crate::utils::truncate_with_ellipsis(s, 80, "...") + } else { + s.clone() + } + }); + self.store.save_item(&TurnItemRecord { + schema_version: CURRENT_RUNTIME_SCHEMA_VERSION, + id: item_id.clone(), + turn_id: turn_id.clone(), + kind: TurnItemKind::ToolCall, + status: TurnItemLifecycleStatus::Completed, + summary: tool_summary, + detail: Some(input_str), + metadata: Some(serde_json::Value::Object( + serde_json::json!({ + "tool_use_id": tool_id, + "tool_name": name, + }) + .as_object() + .unwrap() + .clone(), + )), + artifact_refs: Vec::new(), + started_at: Some(now), + ended_at: Some(now), + })?; + } + SeedItem::ToolResult { + tool_use_id, + content, + is_error, + content_blocks, + } => { + let result_summary = if content.len() > SUMMARY_LIMIT { + crate::utils::truncate_with_ellipsis(content, SUMMARY_LIMIT, "...") + } else { + content.clone() + }; + let mut metadata = serde_json::Map::new(); + metadata.insert("tool_result_for".to_string(), json!(tool_use_id)); + metadata.insert("is_error".to_string(), json!(is_error)); + if let Some(blocks) = content_blocks { + metadata + .insert("content_blocks".to_string(), Value::Array(blocks.clone())); + } + self.store.save_item(&TurnItemRecord { + schema_version: CURRENT_RUNTIME_SCHEMA_VERSION, + id: item_id.clone(), + turn_id: turn_id.clone(), + kind: TurnItemKind::ToolCall, + status: if *is_error { + TurnItemLifecycleStatus::Failed + } else { + TurnItemLifecycleStatus::Completed + }, + summary: result_summary, + detail: Some(content.clone()), + metadata: Some(Value::Object(metadata)), + artifact_refs: Vec::new(), + started_at: Some(now), + ended_at: Some(now), + })?; + } + } + item_ids.push(item_id); + } + + // Only create a turn if there's content. + if !item_ids.is_empty() { + self.store.save_turn(&TurnRecord { schema_version: CURRENT_RUNTIME_SCHEMA_VERSION, - id: item_id.clone(), - turn_id: turn_id.clone(), - kind: TurnItemKind::AgentMessage, - status: TurnItemLifecycleStatus::Completed, - summary: asst_summary, - detail: Some(assistant_text), - metadata: None, - artifact_refs: Vec::new(), + id: turn_id.clone(), + thread_id: thread_id.to_string(), + status: RuntimeTurnStatus::Completed, + input_summary: summary, + created_at: now, started_at: Some(now), ended_at: Some(now), + duration_ms: Some(0), + usage: None, + error: None, + item_ids, + steer_count: 0, })?; - item_ids.push(item_id); - } - self.store.save_turn(&TurnRecord { - schema_version: CURRENT_RUNTIME_SCHEMA_VERSION, - id: turn_id.clone(), - thread_id: thread_id.to_string(), - status: RuntimeTurnStatus::Completed, - input_summary: summary, - created_at: now, - started_at: Some(now), - ended_at: Some(now), - duration_ms: Some(0), - usage: None, - error: None, - item_ids, - steer_count: 0, - })?; - - thread.latest_turn_id = Some(turn_id); - thread.updated_at = now; + thread.latest_turn_id = Some(turn_id); + thread.updated_at = now; + } } self.store.save_thread(&thread)?; @@ -1832,6 +2076,7 @@ impl RuntimeThreadManager { crate::tui::approval::ApprovalMode::Suggest }, verbosity: self.config.verbosity.clone(), + provenance: crate::core::ops::UserInputProvenance::ExternalUser, }) .await .map_err(|e| anyhow!("Failed to start turn: {e}"))?; @@ -2141,6 +2386,11 @@ impl RuntimeThreadManager { .lsp .clone() .map(crate::config::LspConfigToml::into_runtime); + let provider = self.config.api_provider(); + let max_subagents = self + .config + .max_subagents_for_provider(provider) + .clamp(1, MAX_SUBAGENTS); let engine_cfg = EngineConfig { model: thread.model.clone(), workspace: thread.workspace.clone(), @@ -2160,14 +2410,20 @@ impl RuntimeThreadManager { translation_enabled: false, show_thinking: settings.show_thinking, max_steps: 100, - max_subagents: self.config.max_subagents().clamp(1, MAX_SUBAGENTS), - launch_concurrency: self.config.launch_concurrency(), + max_subagents, + max_admitted_subagents: self + .config + .max_admitted_subagents_for_provider(provider) + .max(max_subagents), + launch_concurrency: self.config.launch_concurrency_for_provider(provider), + subagents_enabled: self.config.subagents_enabled_for_provider(provider), features: self.config.features(), compaction, todos: new_shared_todo_list(), plan_state: new_shared_plan_state(), goal_state: crate::tools::goal::new_shared_goal_state(), - max_spawn_depth: self.config.subagent_max_spawn_depth(), + max_spawn_depth: self.config.subagent_max_spawn_depth_for_provider(provider), + subagent_token_budget: self.config.subagent_token_budget_for_provider(provider), network_policy, snapshots_enabled: self.config.snapshots_config().enabled, snapshots_max_workspace_bytes: self @@ -2190,13 +2446,14 @@ impl RuntimeThreadManager { }, subagent_model_overrides: self.config.subagent_model_overrides(), subagent_api_timeout: std::time::Duration::from_secs( - self.config.subagent_api_timeout_secs(), + self.config.subagent_api_timeout_secs_for_provider(provider), ), stream_chunk_timeout: std::time::Duration::from_secs( self.config.stream_chunk_timeout_secs(), ), subagent_heartbeat_timeout: std::time::Duration::from_secs( - self.config.subagent_heartbeat_timeout_secs(), + self.config + .subagent_heartbeat_timeout_secs_for_provider(provider), ), prefer_bwrap: self.config.prefer_bwrap.unwrap_or(false), memory_enabled: self.config.memory_enabled(), @@ -2226,8 +2483,46 @@ impl RuntimeThreadManager { let engine = spawn_engine(engine_cfg, &self.config); - let turns = self.store.list_turns_for_thread(&thread.id)?; - let session_messages = self.reconstruct_messages_from_turns(&turns)?; + // When the thread has an associated session, load the full message history + // (including thinking/tool blocks) from the session file. This preserves + // process information that `reconstruct_messages_from_turns` would lose. + let session_messages = if let Some(ref sid) = thread.session_id { + match crate::session_manager::default_sessions_dir() { + Ok(sessions_dir) => { + match crate::session_manager::SessionManager::new(sessions_dir) { + Ok(manager) => match manager.load_session(sid) { + Ok(session) => session.messages, + Err(e) => { + tracing::warn!( + "Failed to load session {} for thread {}: {e}; falling back to turn reconstruction", + sid, + thread.id + ); + let turns = self.store.list_turns_for_thread(&thread.id)?; + self.reconstruct_messages_from_turns(&turns)? + } + }, + Err(e) => { + tracing::warn!( + "Failed to open sessions dir: {e}; falling back to turn reconstruction" + ); + let turns = self.store.list_turns_for_thread(&thread.id)?; + self.reconstruct_messages_from_turns(&turns)? + } + } + } + Err(e) => { + tracing::warn!( + "Failed to resolve sessions dir: {e}; falling back to turn reconstruction" + ); + let turns = self.store.list_turns_for_thread(&thread.id)?; + self.reconstruct_messages_from_turns(&turns)? + } + } + } else { + let turns = self.store.list_turns_for_thread(&thread.id)?; + self.reconstruct_messages_from_turns(&turns)? + }; let sys_prompt = thread .system_prompt .as_ref() @@ -2235,7 +2530,7 @@ impl RuntimeThreadManager { if !session_messages.is_empty() || sys_prompt.is_some() { engine .send(Op::SyncSession { - session_id: None, + session_id: thread.session_id.clone(), messages: session_messages, system_prompt: sys_prompt, system_prompt_override: thread.system_prompt.is_some(), @@ -2273,32 +2568,132 @@ impl RuntimeThreadManager { fn reconstruct_messages_from_turns(&self, turns: &[TurnRecord]) -> Result> { let mut messages = Vec::new(); for turn in turns { - let items = self.store.list_items_for_turn(&turn.id)?; + let stored_items = self.store.list_items_for_turn(&turn.id)?; + let items = if turn.item_ids.is_empty() { + stored_items + } else { + let mut by_id: HashMap = stored_items + .iter() + .cloned() + .map(|item| (item.id.clone(), item)) + .collect(); + let mut ordered = Vec::new(); + for item_id in &turn.item_ids { + if let Some(item) = by_id.remove(item_id) { + ordered.push(item); + } + } + for item in stored_items { + if by_id.contains_key(&item.id) { + ordered.push(item); + } + } + ordered + }; + + let mut assistant_blocks: Vec = Vec::new(); + let mut user_blocks: Vec = Vec::new(); + let flush_assistant = |blocks: &mut Vec, msgs: &mut Vec| { + if !blocks.is_empty() { + msgs.push(Message { + role: "assistant".to_string(), + content: std::mem::take(blocks), + }); + } + }; + let flush_user = |blocks: &mut Vec, msgs: &mut Vec| { + if !blocks.is_empty() { + msgs.push(Message { + role: "user".to_string(), + content: std::mem::take(blocks), + }); + } + }; for item in items { match item.kind { TurnItemKind::UserMessage => { + flush_assistant(&mut assistant_blocks, &mut messages); let text = item.detail.unwrap_or(item.summary); - messages.push(Message { - role: "user".to_string(), - content: vec![ContentBlock::Text { + if !text.trim().is_empty() { + user_blocks.push(ContentBlock::Text { text, cache_control: None, - }], - }); + }); + } } TurnItemKind::AgentMessage => { + flush_user(&mut user_blocks, &mut messages); let text = item.detail.unwrap_or(item.summary); - messages.push(Message { - role: "assistant".to_string(), - content: vec![ContentBlock::Text { + if !text.trim().is_empty() { + assistant_blocks.push(ContentBlock::Text { text, cache_control: None, - }], - }); + }); + } + } + TurnItemKind::AgentReasoning => { + flush_user(&mut user_blocks, &mut messages); + let thinking = item.detail.unwrap_or(item.summary); + if !thinking.trim().is_empty() { + assistant_blocks.push(ContentBlock::Thinking { + thinking, + signature: None, + }); + } + } + TurnItemKind::ToolCall => { + let meta = item.metadata.as_ref(); + let is_tool_result = meta.and_then(|m| m.get("tool_result_for")).is_some(); + if is_tool_result { + flush_assistant(&mut assistant_blocks, &mut messages); + let tool_use_id = meta + .and_then(|m| m.get("tool_result_for")) + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let content = item.detail.unwrap_or_default(); + let is_error = meta + .and_then(|m| m.get("is_error")) + .and_then(|v| v.as_bool()) + .unwrap_or(false); + let content_blocks = meta + .and_then(|m| m.get("content_blocks")) + .and_then(|v| v.as_array()) + .cloned(); + user_blocks.push(ContentBlock::ToolResult { + tool_use_id, + content, + is_error: if is_error { Some(true) } else { None }, + content_blocks, + }); + } else { + flush_user(&mut user_blocks, &mut messages); + let tool_use_id = meta + .and_then(|m| m.get("tool_use_id")) + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let tool_name = meta + .and_then(|m| m.get("tool_name")) + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let input_str = item.detail.unwrap_or_default(); + let input: serde_json::Value = + serde_json::from_str(&input_str).unwrap_or(serde_json::Value::Null); + assistant_blocks.push(ContentBlock::ToolUse { + id: tool_use_id, + name: tool_name, + input, + caller: None, + }); + } } _ => {} } } + flush_assistant(&mut assistant_blocks, &mut messages); + flush_user(&mut user_blocks, &mut messages); } Ok(messages) } @@ -3503,6 +3898,7 @@ mod tests { system_prompt: None, task_id: None, title: None, + session_id: None, } } @@ -3612,6 +4008,149 @@ mod tests { let _ = std::fs::remove_dir_all(dir); } + #[test] + fn store_load_thread_defaults_missing_session_id() { + let dir = test_runtime_dir(); + let store = RuntimeThreadStore::open(dir.clone()).expect("open store"); + let thread = sample_thread("thr_legacy_session"); + let path = store.threads_dir.join(format!("{}.json", thread.id)); + std::fs::create_dir_all(path.parent().unwrap()).expect("mkdirs"); + let mut payload = serde_json::to_value(&thread).expect("serialize thread"); + payload + .as_object_mut() + .expect("thread object") + .remove("session_id"); + std::fs::write( + &path, + serde_json::to_string(&payload).expect("encode thread"), + ) + .expect("write thread"); + + let loaded = store + .load_thread(&thread.id) + .expect("legacy thread should load"); + assert_eq!(loaded.session_id, None); + + let _ = std::fs::remove_dir_all(dir); + } + + #[tokio::test] + async fn seed_thread_keeps_tool_results_on_preceding_turn() -> Result<()> { + let dir = test_runtime_dir(); + let manager = test_manager(dir.clone())?; + let thread = sample_thread("thr_seed_blocks"); + manager.store.save_thread(&thread)?; + let messages = vec![ + Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: "check the files".to_string(), + cache_control: None, + }], + }, + Message { + role: "assistant".to_string(), + content: vec![ + ContentBlock::Thinking { + thinking: "need a tool".to_string(), + signature: Some("sig-1".to_string()), + }, + ContentBlock::ToolUse { + id: "tool-1".to_string(), + name: "shell".to_string(), + input: json!({ "cmd": "one" }), + caller: None, + }, + ContentBlock::ToolUse { + id: "tool-2".to_string(), + name: "shell".to_string(), + input: json!({ "cmd": "two" }), + caller: None, + }, + ], + }, + Message { + role: "user".to_string(), + content: vec![ContentBlock::ToolResult { + tool_use_id: "tool-1".to_string(), + content: "one".to_string(), + is_error: None, + content_blocks: Some(vec![json!({ + "type": "text", + "text": "structured one" + })]), + }], + }, + Message { + role: "user".to_string(), + content: vec![ContentBlock::ToolResult { + tool_use_id: "tool-2".to_string(), + content: "two".to_string(), + is_error: Some(true), + content_blocks: None, + }], + }, + Message { + role: "assistant".to_string(), + content: vec![ContentBlock::Text { + text: "done".to_string(), + cache_control: None, + }], + }, + ]; + + manager + .seed_thread_from_messages(&thread.id, &messages) + .await?; + let turns = manager.store.list_turns_for_thread(&thread.id)?; + assert_eq!(turns.len(), 1); + + let restored = manager.reconstruct_messages_from_turns(&turns)?; + let roles = restored + .iter() + .map(|message| message.role.as_str()) + .collect::>(); + assert_eq!(roles, vec!["user", "assistant", "user", "assistant"]); + assert_eq!(restored[2].content.len(), 2); + + match &restored[2].content[0] { + ContentBlock::ToolResult { + tool_use_id, + content, + is_error, + content_blocks, + } => { + assert_eq!(tool_use_id, "tool-1"); + assert_eq!(content, "one"); + assert_eq!(*is_error, None); + assert_eq!( + content_blocks + .as_ref() + .and_then(|blocks| blocks[0].get("text")), + Some(&json!("structured one")) + ); + } + other => panic!("expected first tool result, got {other:?}"), + } + match &restored[2].content[1] { + ContentBlock::ToolResult { + tool_use_id, + content, + is_error, + content_blocks, + } => { + assert_eq!(tool_use_id, "tool-2"); + assert_eq!(content, "two"); + assert_eq!(*is_error, Some(true)); + assert!(content_blocks.is_none()); + } + other => panic!("expected second tool result, got {other:?}"), + } + + let _ = std::fs::remove_dir_all(dir); + Ok(()) + } + #[test] fn current_runtime_schema_version_is_two_on_v066() { // Locks the bump in (issue #124). Bump deliberately when persisted @@ -5559,6 +6098,7 @@ mod tests { system_prompt: None, task_id: None, title: None, + session_id: None, }; manager.store.save_thread(&thread)?; diff --git a/crates/tui/src/session_manager.rs b/crates/tui/src/session_manager.rs index 1220a948db..ac6353c0ff 100644 --- a/crates/tui/src/session_manager.rs +++ b/crates/tui/src/session_manager.rs @@ -618,9 +618,11 @@ fn is_git_metadata_entry(path: &Path) -> bool { /// Resolve the default session directory path. /// /// v0.8.44: prefers `~/.codewhale/sessions`, falls back to -/// `~/.deepseek/sessions` for existing installs. +/// `~/.deepseek/sessions` for existing installs. Uses the write-path resolver +/// so the first access relocates any legacy `~/.deepseek/sessions` into +/// `~/.codewhale/sessions` (#3240); reads still surface migrated data. pub fn default_sessions_dir() -> std::io::Result { - codewhale_config::resolve_state_dir("sessions") + codewhale_config::ensure_state_dir("sessions") .map_err(|e| std::io::Error::new(std::io::ErrorKind::NotFound, e.to_string())) } diff --git a/crates/tui/src/settings.rs b/crates/tui/src/settings.rs index d34563030b..7c7a47b331 100644 --- a/crates/tui/src/settings.rs +++ b/crates/tui/src/settings.rs @@ -278,8 +278,11 @@ pub struct Settings { pub default_mode: String, /// Sidebar width as percentage of terminal width pub sidebar_width_percent: u16, - /// Sidebar focus mode: auto, work, tasks, agents, context, hidden + /// Sidebar focus mode: pinned, auto, tasks, agents, context, hidden pub sidebar_focus: String, + /// Migration marker for users who explicitly opt into idle auto-collapse. + #[serde(default, skip_serializing_if = "is_false")] + pub sidebar_auto_collapse_opt_in: bool, /// Enable the session-context panel (#504). Shows working set, tokens, /// cost, MCP/LSP status, cycle count, and memory info. pub context_panel: bool, @@ -379,7 +382,8 @@ impl Default for Settings { transcript_spacing: "comfortable".to_string(), default_mode: "agent".to_string(), sidebar_width_percent: 28, - sidebar_focus: "auto".to_string(), + sidebar_focus: "pinned".to_string(), + sidebar_auto_collapse_opt_in: false, context_panel: false, cost_currency: "usd".to_string(), max_input_history: 100, @@ -442,6 +446,14 @@ impl Settings { s.transcript_spacing = normalize_transcript_spacing(&s.transcript_spacing).to_string(); s.tool_collapse_mode = normalize_tool_collapse_mode(&s.tool_collapse_mode).to_string(); s.sidebar_focus = normalize_sidebar_focus(&s.sidebar_focus).to_string(); + if s.sidebar_focus == "auto" && !s.sidebar_auto_collapse_opt_in { + // v0.8.62 wrote the surprising auto-collapse default into many + // full settings files. Treat unmarked saved "auto" as that + // legacy default so upgraded users get the sidebar back, while + // `/sidebar auto --save` and `/set sidebar_focus auto` below + // preserve an explicit opt-in from this release onward (#3328). + s.sidebar_focus = "pinned".to_string(); + } s.status_indicator = normalize_status_indicator(&s.status_indicator).to_string(); s.synchronized_output = normalize_synchronized_output(&s.synchronized_output).to_string(); @@ -764,18 +776,19 @@ impl Settings { "sidebar_focus" | "focus" => { let normalized = match value.trim().to_ascii_lowercase().as_str() { "auto" => "auto", - "work" | "plan" | "todos" => "work", + "pinned" | "visible" | "show" | "on" | "work" | "plan" | "todos" => "pinned", "tasks" => "tasks", "agents" | "subagents" | "sub-agents" => "agents", "context" | "session" => "context", "hidden" | "hide" | "closed" | "off" | "none" => "hidden", _ => { anyhow::bail!( - "Failed to update setting: invalid sidebar focus '{value}'. Expected: auto, work, tasks, agents, context, hidden." + "Failed to update setting: invalid sidebar focus '{value}'. Expected: pinned, auto, tasks, agents, context, hidden." ) } }; self.sidebar_focus = normalized.to_string(); + self.sidebar_auto_collapse_opt_in = normalized == "auto"; } "context_panel" | "context" | "session_panel" => { self.context_panel = parse_bool(value)?; @@ -1400,7 +1413,7 @@ fn normalize_background_color_setting(value: &str) -> Result> { fn normalize_sidebar_focus(value: &str) -> &str { match value.trim().to_ascii_lowercase().as_str() { - "work" | "plan" | "todos" => "work", + "pinned" | "visible" | "show" | "on" | "work" | "plan" | "todos" => "pinned", "tasks" => "tasks", "agents" | "subagents" | "sub-agents" => "agents", "context" | "session" => "context", @@ -1409,6 +1422,10 @@ fn normalize_sidebar_focus(value: &str) -> &str { } } +fn is_false(value: &bool) -> bool { + !*value +} + /// Resolve an environment variable as a boolean. Recognises the /// common truthy spellings (`1`, `true`, `yes`, `on`) case- /// insensitively. Used by [`Settings::apply_env_overrides`] for @@ -1466,6 +1483,28 @@ mod tests { assert!(settings.fancy_animations); } + #[test] + fn default_settings_keep_sidebar_pinned() { + let settings = Settings::default(); + assert_eq!(settings.sidebar_focus, "pinned"); + assert!(!settings.sidebar_auto_collapse_opt_in); + } + + #[test] + fn sidebar_auto_opt_in_marker_is_serialized_only_when_enabled() { + let default_body = toml::to_string_pretty(&Settings::default()).expect("serialize"); + assert!(!default_body.contains("sidebar_auto_collapse_opt_in")); + + let mut settings = Settings::default(); + settings + .set("sidebar_focus", "auto") + .expect("enable auto collapse"); + + let auto_body = toml::to_string_pretty(&settings).expect("serialize"); + assert!(auto_body.contains("sidebar_focus = \"auto\"")); + assert!(auto_body.contains("sidebar_auto_collapse_opt_in = true")); + } + #[test] fn reasoning_effort_setting_normalizes_and_clears() { let mut settings = Settings::default(); @@ -1620,17 +1659,20 @@ mod tests { } #[test] - fn sidebar_focus_accepts_work_values_and_legacy_aliases() { + fn sidebar_focus_accepts_pinned_values_and_legacy_aliases() { let mut settings = Settings::default(); + settings.set("sidebar_focus", "pinned").expect("set pinned"); + assert_eq!(settings.sidebar_focus, "pinned"); + settings.set("sidebar_focus", "work").expect("set work"); - assert_eq!(settings.sidebar_focus, "work"); + assert_eq!(settings.sidebar_focus, "pinned"); settings.set("focus", "plan").expect("legacy plan alias"); - assert_eq!(settings.sidebar_focus, "work"); + assert_eq!(settings.sidebar_focus, "pinned"); settings.set("focus", "todos").expect("legacy todos alias"); - assert_eq!(settings.sidebar_focus, "work"); + assert_eq!(settings.sidebar_focus, "pinned"); settings.set("focus", "context").expect("context focus"); assert_eq!(settings.sidebar_focus, "context"); @@ -1640,6 +1682,17 @@ mod tests { settings.set("focus", "off").expect("off alias"); assert_eq!(settings.sidebar_focus, "hidden"); + assert!(!settings.sidebar_auto_collapse_opt_in); + + settings.set("focus", "auto").expect("auto focus"); + assert_eq!(settings.sidebar_focus, "auto"); + assert!(settings.sidebar_auto_collapse_opt_in); + + settings + .set("focus", "visible") + .expect("pinned alias clears auto marker"); + assert_eq!(settings.sidebar_focus, "pinned"); + assert!(!settings.sidebar_auto_collapse_opt_in); let err = settings .set("sidebar_focus", "classic") @@ -2646,6 +2699,40 @@ mod tests { ); } + #[test] + fn settings_load_migrates_legacy_saved_auto_sidebar_focus_to_pinned() { + let _g = config_path_test_guard(); + let tmp = tempfile::tempdir().expect("tempdir"); + let settings_path = tmp.path().join("settings.toml"); + std::fs::write(&settings_path, "sidebar_focus = \"auto\"\n").expect("settings"); + let _config_override = + EnvVarRestore::set("DEEPSEEK_CONFIG_PATH", tmp.path().join("config.toml")); + + let loaded = Settings::load().expect("load settings"); + + assert_eq!(loaded.sidebar_focus, "pinned"); + assert!(!loaded.sidebar_auto_collapse_opt_in); + } + + #[test] + fn settings_load_preserves_explicit_auto_sidebar_opt_in() { + let _g = config_path_test_guard(); + let tmp = tempfile::tempdir().expect("tempdir"); + let settings_path = tmp.path().join("settings.toml"); + std::fs::write( + &settings_path, + "sidebar_focus = \"auto\"\nsidebar_auto_collapse_opt_in = true\n", + ) + .expect("settings"); + let _config_override = + EnvVarRestore::set("DEEPSEEK_CONFIG_PATH", tmp.path().join("config.toml")); + + let loaded = Settings::load().expect("load settings"); + + assert_eq!(loaded.sidebar_focus, "auto"); + assert!(loaded.sidebar_auto_collapse_opt_in); + } + #[test] fn tui_prefs_path_defaults_to_codewhale_home_for_new_writes() { let _g = config_path_test_guard(); diff --git a/crates/tui/src/slop_ledger.rs b/crates/tui/src/slop_ledger.rs index 9c97bff4e1..df94c1267e 100644 --- a/crates/tui/src/slop_ledger.rs +++ b/crates/tui/src/slop_ledger.rs @@ -259,9 +259,11 @@ pub struct SlopLedger { } impl SlopLedger { - /// Resolve the default ledger path. + /// Resolve the default ledger path under the primary `~/.codewhale` root + /// (with one-time legacy migration) so loads and saves never perpetuate + /// `~/.deepseek` (#3240). pub fn default_path() -> io::Result { - codewhale_config::resolve_state_dir("slop_ledger") + codewhale_config::ensure_state_dir("slop_ledger") .map(|p| p.join("slop_ledger.json")) .map_err(io::Error::other) } @@ -294,6 +296,9 @@ impl SlopLedger { /// Persist the ledger to disk. pub fn save(&self) -> io::Result<()> { + // `ledger_path` is resolved by `default_path()` against the primary + // ~/.codewhale root (with one-time legacy migration), so persisting + // here never perpetuates ~/.deepseek (#3240). if let Some(parent) = self.ledger_path.parent() { fs::create_dir_all(parent)?; } diff --git a/crates/tui/src/task_manager.rs b/crates/tui/src/task_manager.rs index c2feca2458..a9ee32bc79 100644 --- a/crates/tui/src/task_manager.rs +++ b/crates/tui/src/task_manager.rs @@ -334,7 +334,9 @@ impl TaskManagerConfig { default_mode: "agent".to_string(), allow_shell: config.allow_shell(), trust_mode: false, - max_subagents: config.max_subagents().clamp(1, MAX_SUBAGENTS), + max_subagents: config + .max_subagents_for_provider(config.api_provider()) + .clamp(1, MAX_SUBAGENTS), } } } diff --git a/crates/tui/src/tools/js_execution.rs b/crates/tui/src/tools/js_execution.rs index b2436c971d..bab9cf7add 100644 --- a/crates/tui/src/tools/js_execution.rs +++ b/crates/tui/src/tools/js_execution.rs @@ -14,6 +14,7 @@ //! `core::engine::tool_catalog::ensure_advanced_tooling` for the //! catalog-side dispatch. +use std::ffi::OsString; use std::path::Path; use std::time::Duration; @@ -30,6 +31,60 @@ pub const JS_EXECUTION_TOOL_NAME: &str = "js_execution"; /// Anthropic message API expects so the wire shape stays stable /// across the two interpreters. const JS_EXECUTION_TOOL_TYPE: &str = "code_execution_20250825"; +const NODE_USE_ENV_PROXY: &str = "NODE_USE_ENV_PROXY"; +const NODE_PROXY_PAIRS: &[(&str, &str)] = + &[("HTTP_PROXY", "http_proxy"), ("HTTPS_PROXY", "https_proxy")]; + +fn first_non_empty_env_from( + keys: &[&str], + env: &impl Fn(&str) -> Option, +) -> Option { + keys.iter() + .filter_map(|key| env(key)) + .find(|value| !value.is_empty()) +} + +fn node_proxy_env_overrides_from( + env: impl Fn(&str) -> Option, +) -> Vec<(&'static str, OsString)> { + let all_proxy = first_non_empty_env_from(&["ALL_PROXY", "all_proxy"], &env); + let proxy_configured = all_proxy.is_some() + || NODE_PROXY_PAIRS + .iter() + .any(|(upper, lower)| first_non_empty_env_from(&[upper, lower], &env).is_some()); + + let mut overrides = Vec::new(); + if proxy_configured && first_non_empty_env_from(&[NODE_USE_ENV_PROXY], &env).is_none() { + overrides.push((NODE_USE_ENV_PROXY, OsString::from("1"))); + } + + for (upper, lower) in NODE_PROXY_PAIRS { + if first_non_empty_env_from(&[upper], &env).is_none() + && let Some(value) = + first_non_empty_env_from(&[lower], &env).or_else(|| all_proxy.clone()) + { + overrides.push((*upper, value)); + } + } + + if first_non_empty_env_from(&["NO_PROXY"], &env).is_none() + && let Some(value) = first_non_empty_env_from(&["no_proxy"], &env) + { + overrides.push(("NO_PROXY", value)); + } + + overrides +} + +fn node_proxy_env_overrides() -> Vec<(&'static str, OsString)> { + node_proxy_env_overrides_from(|key| std::env::var_os(key)) +} + +fn apply_node_proxy_env(cmd: &mut tokio::process::Command) { + for (key, value) in node_proxy_env_overrides() { + cmd.env(key, value); + } +} /// Build the `Tool` definition the catalog should advertise when /// Node.js is present on the host. Kept as a constructor (rather @@ -87,6 +142,9 @@ pub async fn execute_js_execution_tool( let mut cmd = crate::dependencies::Node::tokio_command().ok_or_else(|| { ToolError::execution_failed("js_execution: Node.js runtime became unavailable".to_string()) })?; + // Recent Node releases use this startup env to make fetch/http(s) honor + // standard proxy variables; older runtimes ignore it and keep prior behavior. + apply_node_proxy_env(&mut cmd); cmd.arg(&script_path).current_dir(workspace); let output = tokio::time::timeout(Duration::from_secs(120), cmd.output()) @@ -116,6 +174,7 @@ pub async fn execute_js_execution_tool( #[cfg(test)] mod tests { use super::*; + use std::ffi::OsString; use tempfile::tempdir; /// Skip helper — `js_execution` is a no-op on hosts without Node. @@ -125,6 +184,14 @@ mod tests { crate::dependencies::resolve_node().is_some() } + fn proxy_env<'a>(pairs: &'a [(&'a str, &'a str)]) -> impl Fn(&str) -> Option + 'a { + move |key| { + pairs + .iter() + .find_map(|(name, value)| (*name == key).then(|| OsString::from(value))) + } + } + #[test] fn tool_definition_advertises_js_execution_name_and_required_code_field() { let tool = js_execution_tool_definition(); @@ -141,6 +208,35 @@ mod tests { ); } + #[test] + fn node_proxy_overrides_enable_env_proxy_when_proxy_env_is_present() { + let overrides = + node_proxy_env_overrides_from(proxy_env(&[("HTTPS_PROXY", "http://127.0.0.1:20499")])); + + assert_eq!( + overrides, + vec![(NODE_USE_ENV_PROXY, OsString::from("1"))], + "uppercase proxy vars are inherited by the child; only Node's env-proxy flag is needed" + ); + } + + #[test] + fn node_proxy_overrides_mirror_lowercase_proxy_vars() { + let overrides = node_proxy_env_overrides_from(proxy_env(&[ + ("https_proxy", "http://127.0.0.1:20499"), + ("no_proxy", "localhost"), + ])); + + assert_eq!( + overrides, + vec![ + (NODE_USE_ENV_PROXY, OsString::from("1")), + ("HTTPS_PROXY", OsString::from("http://127.0.0.1:20499")), + ("NO_PROXY", OsString::from("localhost")), + ] + ); + } + #[tokio::test] async fn execute_js_runs_node_and_returns_stdout_payload() { if !node_present() { diff --git a/crates/tui/src/tools/registry.rs b/crates/tui/src/tools/registry.rs index 8158d9c340..ef253a3490 100644 --- a/crates/tui/src/tools/registry.rs +++ b/crates/tui/src/tools/registry.rs @@ -103,6 +103,7 @@ impl ToolRegistry { } /// Execute a tool by name with the given input. + #[allow(dead_code)] pub async fn execute(&self, name: &str, input: Value) -> Result { let tool = self .get(name) diff --git a/crates/tui/src/tools/shell.rs b/crates/tui/src/tools/shell.rs index 9f4ca043f2..a85beb8728 100644 --- a/crates/tui/src/tools/shell.rs +++ b/crates/tui/src/tools/shell.rs @@ -115,6 +115,10 @@ pub struct ShellJobSnapshot { #[serde(default, skip_serializing_if = "Option::is_none")] pub elapsed_since_output_ms: Option, pub linked_task_id: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub owner_agent_id: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub owner_agent_name: Option, } /// Once-only completion event for a tracked background shell job. @@ -128,6 +132,17 @@ pub struct ShellCompletionEvent { pub stdout_tail: String, pub stderr_tail: String, pub linked_task_id: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub owner_agent_id: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub owner_agent_name: Option, +} + +/// Optional owner attribution for background shell work. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct ShellJobOwner { + pub agent_id: String, + pub agent_name: String, } /// Full output view used by `/jobs show `. @@ -501,6 +516,7 @@ pub struct BackgroundShell { last_observed_output_len: usize, pub sandbox_type: SandboxType, pub linked_task_id: Option, + pub owner_agent: Option, stdout_buffer: Arc>>, stderr_buffer: Option>>>, stdout_cursor: usize, @@ -772,6 +788,14 @@ impl BackgroundShell { stale, elapsed_since_output_ms, linked_task_id: self.linked_task_id.clone(), + owner_agent_id: self + .owner_agent + .as_ref() + .map(|owner| owner.agent_id.clone()), + owner_agent_name: self + .owner_agent + .as_ref() + .map(|owner| owner.agent_name.clone()), } } @@ -786,6 +810,8 @@ impl BackgroundShell { stdout_tail: snapshot.stdout_tail, stderr_tail: snapshot.stderr_tail, linked_task_id: snapshot.linked_task_id, + owner_agent_id: snapshot.owner_agent_id, + owner_agent_name: snapshot.owner_agent_name, } } @@ -992,6 +1018,34 @@ impl ShellManager { tty: bool, policy_override: Option, extra_env: HashMap, + ) -> Result { + self.execute_with_options_env_for_owner( + command, + working_dir, + timeout_ms, + background, + stdin_data, + tty, + policy_override, + extra_env, + None, + ) + } + + /// Same as `execute_with_options_env`, with optional background-job owner + /// attribution for sub-agent launched jobs. + #[allow(clippy::too_many_arguments)] + pub fn execute_with_options_env_for_owner( + &mut self, + command: &str, + working_dir: Option<&str>, + timeout_ms: u64, + background: bool, + stdin_data: Option<&str>, + tty: bool, + policy_override: Option, + extra_env: HashMap, + owner_agent: Option, ) -> Result { // Log execution via ShellDispatcher when SHELL_DISPATCHER_LOG is set. crate::shell_dispatcher::ShellDispatcher::log_exec(command); @@ -1011,7 +1065,14 @@ impl ShellManager { let exec_env = self.sandbox_manager.prepare(&spec); if background { - self.spawn_background_sandboxed(command, &work_dir, &exec_env, stdin_data, tty) + self.spawn_background_sandboxed( + command, + &work_dir, + &exec_env, + stdin_data, + tty, + owner_agent, + ) } else { if tty { return Err(anyhow!( @@ -1358,6 +1419,7 @@ impl ShellManager { exec_env: &ExecEnv, stdin_data: Option<&str>, tty: bool, + owner_agent: Option, ) -> Result { let task_id = format!("shell_{}", &Uuid::new_v4().to_string()[..8]); let started = Instant::now(); @@ -1484,6 +1546,7 @@ impl ShellManager { last_observed_output_len: 0, sandbox_type, linked_task_id: None, + owner_agent, stdout_buffer, stderr_buffer, stdout_cursor: 0, @@ -1768,6 +1831,8 @@ impl ShellManager { stale: true, elapsed_since_output_ms: None, linked_task_id, + owner_agent_id: None, + owner_agent_name: None, }, ); } @@ -1879,6 +1944,10 @@ shell sandbox). Workarounds: (1) run the Docker build from a regular terminal ou TUI, or (2) disable BuildKit with DOCKER_BUILDKIT=0 (only works if your Dockerfiles do not \ use RUN --mount directives)."; +const PYTHON_BUILD_DEPENDENCY_HINT: &str = "Python build dependency missing: setuptools is not \ +available in the active environment. Install the declared build requirements first, for example \ +`python -m pip install -U pip setuptools wheel build`, then rerun the build command."; + fn attach_cargo_failure_summary( metadata: &mut serde_json::Value, command: &str, @@ -1891,6 +1960,19 @@ fn attach_cargo_failure_summary( } } +fn attach_python_build_dependency_hint( + metadata: &mut serde_json::Value, + hint: Option<&'static str>, +) { + if let Some(hint) = hint { + metadata["python_build_dependency_hint"] = json!({ + "kind": "missing_setuptools", + "hint": hint, + "recommended_first_step": "python -m pip install -U pip setuptools wheel build", + }); + } +} + pub(crate) fn looks_like_macos_provenance_failure(result: &ShellResult) -> bool { if matches!(result.status, ShellStatus::Completed) && result.exit_code == Some(0) { return false; @@ -1909,6 +1991,58 @@ fn macos_provenance_hint(result: &ShellResult) -> Option<&'static str> { } } +fn python_build_dependency_hint(command: &str, result: &ShellResult) -> Option<&'static str> { + if matches!(result.status, ShellStatus::Completed) && result.exit_code == Some(0) { + return None; + } + + let command = command.to_ascii_lowercase(); + let combined = format!("{}\n{}", result.stdout, result.stderr).to_ascii_lowercase(); + let mentions_missing_setuptools = [ + "no module named 'setuptools'", + "no module named \"setuptools\"", + "setuptools is not available", + "cannot import 'setuptools", + "cannot import \"setuptools", + "missing dependencies", + ] + .iter() + .any(|needle| combined.contains(needle)) + && combined.contains("setuptools"); + if !mentions_missing_setuptools { + return None; + } + + let pythonish_command = [ + "python", + "pip", + "pytest", + "tox", + "nox", + "cython", + "setup.py", + "build_ext", + ] + .iter() + .any(|needle| command.contains(needle)); + let pythonish_output = [ + "setup.py", + "pyproject.toml", + "build_meta", + "build_ext", + "pep 517", + "cython", + ] + .iter() + .any(|needle| combined.contains(needle)); + + if pythonish_command || pythonish_output { + Some(PYTHON_BUILD_DEPENDENCY_HINT) + } else { + None + } +} + fn command_likely_needs_network(command: &str) -> bool { let normalized = command.to_ascii_lowercase(); let Some(primary) = extract_primary_command(&normalized) else { @@ -2002,6 +2136,32 @@ fn shell_network_restricted_hint<'a>( } } +fn shell_job_owner_from_context(context: &ToolContext) -> Option { + let agent_id = context + .owner_agent_id + .as_deref() + .map(str::trim) + .filter(|value| !value.is_empty())?; + let agent_name = context + .owner_agent_name + .as_deref() + .map(str::trim) + .filter(|value| !value.is_empty()) + .unwrap_or(agent_id); + Some(ShellJobOwner { + agent_id: agent_id.to_string(), + agent_name: agent_name.to_string(), + }) +} + +fn attach_shell_owner_metadata(metadata: &mut serde_json::Value, context: &ToolContext) { + let Some(owner) = shell_job_owner_from_context(context) else { + return; + }; + metadata["owner_agent_id"] = json!(owner.agent_id); + metadata["owner_agent_name"] = json!(owner.agent_name); +} + fn exec_shell_input_is_parallel_readonly(input: &serde_json::Value) -> bool { let Some(command) = input.get("command").and_then(serde_json::Value::as_str) else { return false; @@ -2391,13 +2551,17 @@ impl ToolSpec for ExecShellTool { } else { stdout_summary.clone() }; - let output = if result.stdout.is_empty() && result.stderr.is_empty() { + let python_dependency_hint = python_build_dependency_hint(command, &result); + let mut output = if result.stdout.is_empty() && result.stderr.is_empty() { "(no output)".to_string() } else if result.stderr.is_empty() { result.stdout.clone() } else { format!("{}\n\nSTDERR:\n{}", result.stdout, result.stderr) }; + if let Some(hint) = python_dependency_hint { + output = format!("{hint}\n\n{output}"); + } let mut metadata = json!({ "exit_code": result.exit_code, @@ -2421,7 +2585,9 @@ impl ToolSpec for ExecShellTool { "canceled": false, "sandbox_backend": "opensandbox", }); + attach_shell_owner_metadata(&mut metadata, context); attach_cargo_failure_summary(&mut metadata, command, &result); + attach_python_build_dependency_hint(&mut metadata, python_dependency_hint); return Ok(ToolResult { content: output, @@ -2447,7 +2613,7 @@ impl ToolSpec for ExecShellTool { .shell_manager .lock() .map_err(|_| ToolError::execution_failed("shell manager lock poisoned"))?; - manager.execute_with_options_env( + manager.execute_with_options_env_for_owner( command, working_dir.as_deref(), timeout_ms, @@ -2456,6 +2622,7 @@ impl ToolSpec for ExecShellTool { tty, policy_override, extra_env, + shell_job_owner_from_context(context), ) } else { execute_foreground_via_background( @@ -2499,6 +2666,7 @@ impl ToolSpec for ExecShellTool { let network_restricted_hint = shell_network_restricted_hint(context, command, &result).map(str::to_string); let provenance_hint = macos_provenance_hint(&result); + let python_dependency_hint = python_build_dependency_hint(command, &result); let mut output = if interactive { format!( "Interactive command completed (exit code: {:?})", @@ -2544,6 +2712,9 @@ impl ToolSpec for ExecShellTool { if let Some(hint) = provenance_hint { output = format!("{hint}\n\n{output}"); } + if let Some(hint) = python_dependency_hint { + output = format!("{hint}\n\n{output}"); + } let mut metadata = json!({ "exit_code": result.exit_code, @@ -2607,7 +2778,9 @@ impl ToolSpec for ExecShellTool { if provenance_hint.is_some() { metadata["macos_provenance_restricted"] = json!(true); } + attach_shell_owner_metadata(&mut metadata, context); attach_cargo_failure_summary(&mut metadata, command, &result); + attach_python_build_dependency_hint(&mut metadata, python_dependency_hint); Ok(ToolResult { content: output, @@ -2654,6 +2827,7 @@ fn build_shell_delta_tool_result(delta: ShellDeltaResult, context: &ToolContext) let network_restricted_hint = shell_network_restricted_hint(context, &delta.command, &result).map(str::to_string); let provenance_hint = macos_provenance_hint(&result); + let python_dependency_hint = python_build_dependency_hint(&delta.command, &result); let stdout_summary = summarize_output(&result.stdout); let stderr_summary = summarize_output(&result.stderr); let summary = if !stderr_summary.is_empty() { @@ -2681,6 +2855,9 @@ fn build_shell_delta_tool_result(delta: ShellDeltaResult, context: &ToolContext) if let Some(hint) = provenance_hint { output = format!("{hint}\n\n{output}"); } + if let Some(hint) = python_dependency_hint { + output = format!("{hint}\n\n{output}"); + } let mut metadata = json!({ "exit_code": result.exit_code, @@ -2704,7 +2881,9 @@ fn build_shell_delta_tool_result(delta: ShellDeltaResult, context: &ToolContext) "command": delta.command, "stream_delta": true, }); + attach_shell_owner_metadata(&mut metadata, context); attach_cargo_failure_summary(&mut metadata, &delta.command, &result); + attach_python_build_dependency_hint(&mut metadata, python_dependency_hint); let mut tool_result = ToolResult { content: output, diff --git a/crates/tui/src/tools/shell/tests.rs b/crates/tui/src/tools/shell/tests.rs index 3a7e2e7d87..818fdde42f 100644 --- a/crates/tui/src/tools/shell/tests.rs +++ b/crates/tui/src/tools/shell/tests.rs @@ -331,6 +331,50 @@ async fn background_start_advertises_task_status_completion() { ); } +#[tokio::test] +async fn background_shell_job_carries_subagent_owner() { + let tmp = tempdir().expect("tempdir"); + let ctx = ToolContext::new(tmp.path()).with_owner_agent("agent_owner", "verifier"); + let result = ExecShellTool + .execute( + json!({"command": sleep_command(2), "background": true}), + &ctx, + ) + .await + .expect("start owned background shell"); + + let metadata = result.metadata.as_ref().expect("metadata"); + assert_eq!( + metadata.get("owner_agent_id").and_then(Value::as_str), + Some("agent_owner") + ); + assert_eq!( + metadata.get("owner_agent_name").and_then(Value::as_str), + Some("verifier") + ); + let task_id = metadata + .get("task_id") + .and_then(Value::as_str) + .expect("task id") + .to_string(); + + { + let mut manager = ctx.shell_manager.lock().expect("shell manager"); + let snapshot = manager + .list_jobs() + .into_iter() + .find(|job| job.id == task_id) + .expect("owned shell job snapshot"); + assert_eq!(snapshot.owner_agent_id.as_deref(), Some("agent_owner")); + assert_eq!(snapshot.owner_agent_name.as_deref(), Some("verifier")); + } + + ShellCancelTool + .execute(json!({"task_id": task_id}), &ctx) + .await + .expect("cancel owned background shell"); +} + #[tokio::test] async fn drain_finished_jobs_reports_once() { let tmp = tempdir().expect("tempdir"); @@ -815,6 +859,58 @@ fn shell_delta_result_keeps_existing_summary_for_generic_cargo_failure() { ); } +#[test] +fn shell_delta_result_surfaces_python_build_dependency_hint() { + let tmp = tempdir().expect("tempdir"); + let ctx = ToolContext::new(tmp.path()); + let result = ShellResult { + task_id: None, + status: ShellStatus::Failed, + exit_code: Some(1), + stdout: String::new(), + stderr: "running build_ext\nModuleNotFoundError: No module named 'setuptools'\n" + .to_string(), + duration_ms: 12, + stdout_len: 0, + stderr_len: 72, + stdout_omitted: 0, + stderr_omitted: 0, + stdout_truncated: false, + stderr_truncated: false, + sandboxed: false, + sandbox_type: None, + sandbox_denied: false, + }; + + let tool_result = build_shell_delta_tool_result( + ShellDeltaResult { + command: "python setup.py build_ext --inplace".to_string(), + result, + stdout_total_len: 0, + stderr_total_len: 72, + }, + &ctx, + ); + + assert!(!tool_result.success); + assert!( + tool_result + .content + .starts_with("Python build dependency missing") + ); + let metadata = tool_result.metadata.expect("metadata"); + assert_eq!( + metadata["python_build_dependency_hint"]["kind"], + json!("missing_setuptools") + ); + assert!( + metadata["python_build_dependency_hint"]["hint"] + .as_str() + .unwrap() + .contains("setuptools") + ); +} + #[test] fn test_summarize_output_strips_truncation_note() { let long_output = "x".repeat(60_000); diff --git a/crates/tui/src/tools/spec.rs b/crates/tui/src/tools/spec.rs index 803c88507b..d95dbd26fa 100644 --- a/crates/tui/src/tools/spec.rs +++ b/crates/tui/src/tools/spec.rs @@ -117,6 +117,11 @@ pub struct ToolContext { pub workspace: PathBuf, /// Shared shell manager for background tasks and streaming IO. pub shell_manager: SharedShellManager, + /// Sub-agent that owns tool work started through this context. Root user + /// turns leave this unset; child contexts stamp it so long-running shell + /// jobs can be attributed in UI surfaces. + pub owner_agent_id: Option, + pub owner_agent_name: Option, /// Whether to allow paths outside workspace pub trust_mode: bool, /// Current sandbox policy @@ -222,6 +227,8 @@ impl ToolContext { Self { workspace, shell_manager, + owner_agent_id: None, + owner_agent_name: None, trust_mode: false, sandbox_policy: SandboxPolicy::None, notes_path, @@ -264,6 +271,8 @@ impl ToolContext { Self { workspace, shell_manager, + owner_agent_id: None, + owner_agent_name: None, trust_mode, sandbox_policy: SandboxPolicy::None, notes_path: notes_path.into(), @@ -306,6 +315,8 @@ impl ToolContext { Self { workspace, shell_manager, + owner_agent_id: None, + owner_agent_name: None, trust_mode, sandbox_policy: SandboxPolicy::None, notes_path: notes_path.into(), @@ -349,6 +360,20 @@ impl ToolContext { self } + /// Stamp tool work with the sub-agent that owns it. + #[must_use] + pub fn with_owner_agent( + mut self, + agent_id: impl Into, + agent_name: impl Into, + ) -> Self { + let agent_id = agent_id.into(); + let agent_name = agent_name.into(); + self.owner_agent_id = (!agent_id.trim().is_empty()).then_some(agent_id); + self.owner_agent_name = (!agent_name.trim().is_empty()).then_some(agent_name); + self + } + /// Attach skill discovery settings for tools that need to resolve /// model-visible skills by name. #[must_use] diff --git a/crates/tui/src/tools/subagent/mod.rs b/crates/tui/src/tools/subagent/mod.rs index 4f71650185..a56410d9f6 100644 --- a/crates/tui/src/tools/subagent/mod.rs +++ b/crates/tui/src/tools/subagent/mod.rs @@ -29,7 +29,9 @@ use crate::config::MAX_SUBAGENTS; use crate::core::events::Event; use crate::dependencies::{ExternalTool, Git}; use crate::llm_client::LlmClient; -use crate::models::{ContentBlock, Message, MessageRequest, MessageResponse, SystemPrompt, Tool}; +use crate::models::{ + ContentBlock, Message, MessageRequest, MessageResponse, SystemPrompt, Tool, Usage, +}; use crate::request_tuning::RequestTuning; use crate::tools::handle::VarHandle; use crate::tools::plan::{PlanState, SharedPlanState}; @@ -74,6 +76,7 @@ fn release_resident_leases_for(agent_id: &str) { /// the `SubAgentManager`. const DEFAULT_MAX_STEPS: u32 = u32::MAX; const TOOL_TIMEOUT: Duration = Duration::from_secs(30); +const MIN_SUBAGENT_SPAWN_TOKEN_RESERVE: u64 = 1; /// Format a step counter for sub-agent progress messages. /// @@ -91,6 +94,8 @@ fn format_step_counter(steps: u32, max_steps: u32) -> String { // the requested ceiling. const SUBAGENT_RESPONSE_MAX_TOKENS: u32 = 16_384; const MAX_CONSECUTIVE_TRUNCATED_SUBAGENT_RESPONSES: u32 = 5; +const SUBAGENT_TRANSIENT_PROVIDER_MAX_RETRIES: u32 = 2; +const SUBAGENT_TRANSIENT_PROVIDER_INITIAL_BACKOFF: Duration = Duration::from_millis(250); /// Per-step LLM API call timeout. Each `create_message` request must complete /// within this window or the step is treated as timed out. Prevents a single /// stuck API call from blocking the sub-agent indefinitely. @@ -108,6 +113,7 @@ const MAX_AGENT_WORKER_RECORDS: usize = 256; const MAX_AGENT_WORKER_EVENTS_PER_RECORD: usize = 128; const SUBAGENT_STATE_SCHEMA_VERSION: u32 = 1; const SUBAGENT_STATE_FILE: &str = "subagents.v1.json"; +const SUBAGENT_WORKTREE_ROOT_DIR: &str = ".codewhale-worktrees"; const SUBAGENT_RESTART_REASON: &str = "Interrupted by process restart"; const SUBAGENT_QUEUED_LAUNCH_REASON: &str = "queued: waiting for a sub-agent launch slot"; const SUBAGENT_MODEL_WAIT_REASON: &str = "waiting for model response"; @@ -592,6 +598,11 @@ pub enum SubAgentStatus { Interrupted(String), Failed(String), Cancelled, + /// Worker stopped because it exceeded its own per-worker token budget. + /// Distinct from the scope-level admission gate (#3319): this caps a + /// single runaway worker mid-run, while the scope gate bounds total + /// fan-out across a root run and its descendants. + BudgetExhausted, } /// Structured reason a non-running sub-agent needs parent action. @@ -754,7 +765,19 @@ pub struct AgentRunArtifactRef { pub struct AgentRunUsage { pub status: String, #[serde(default, skip_serializing_if = "Option::is_none")] + pub input_tokens: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub output_tokens: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] pub total_tokens: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub token_budget: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub budget_spent_tokens: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub budget_remaining_tokens: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub budget_scope: Option, pub note: String, } @@ -894,11 +917,54 @@ fn default_agent_run_takeover() -> AgentRunTakeoverTarget { fn default_agent_run_usage() -> AgentRunUsage { AgentRunUsage { status: "unknown".to_string(), + input_tokens: None, + output_tokens: None, total_tokens: None, + token_budget: None, + budget_spent_tokens: None, + budget_remaining_tokens: None, + budget_scope: None, note: "Token usage is not yet reported by the sub-agent worker ledger.".to_string(), } } +fn positive_token_budget(budget: Option) -> Option { + budget.filter(|value| *value > 0) +} + +fn usage_total_tokens(usage: &Usage) -> u64 { + u64::from(usage.input_tokens).saturating_add(u64::from(usage.output_tokens)) +} + +fn refresh_usage_note(usage: &mut AgentRunUsage) { + let worker_total = usage.total_tokens.unwrap_or(0); + if let Some(limit) = usage.token_budget { + let spent = usage.budget_spent_tokens.unwrap_or(worker_total); + let remaining = usage + .budget_remaining_tokens + .unwrap_or_else(|| limit.saturating_sub(spent)); + usage.status = if remaining == 0 { + "budget_exhausted".to_string() + } else if worker_total > 0 { + "reported".to_string() + } else { + "tracking".to_string() + }; + usage.note = if worker_total > 0 { + format!( + "Token budget: {spent}/{limit} spent, {remaining} remaining. This worker reported {worker_total} tokens." + ) + } else { + format!("Token budget: {spent}/{limit} spent, {remaining} remaining.") + }; + } else if worker_total > 0 { + usage.status = "reported".to_string(); + usage.note = format!("Provider reported {worker_total} tokens for this worker."); + } else if usage.status.is_empty() { + *usage = default_agent_run_usage(); + } +} + fn default_agent_run_verification() -> AgentRunVerificationSummary { AgentRunVerificationSummary { status: "self_report_only".to_string(), @@ -1124,6 +1190,8 @@ fn normalize_worker_record(mut record: AgentWorkerRecord) -> AgentWorkerRecord { } if record.usage.status.is_empty() { record.usage = default_agent_run_usage(); + } else { + refresh_usage_note(&mut record.usage); } if record.verification.status.is_empty() { record.verification = default_agent_run_verification(); @@ -1165,6 +1233,7 @@ pub(crate) struct SubAgentSpawnOptions { pub model_route: Option, pub nickname: Option, pub fork_context: bool, + pub token_budget: Option, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -1236,11 +1305,13 @@ struct SpawnRequest { model: Option, model_strength: SubAgentModelStrength, thinking: SubAgentThinking, - /// Optional working directory for the child. Must canonicalize to a - /// path inside the parent's workspace. Used to dispatch parallel work - /// into separate git worktrees: parent runs `git worktree add` first, - /// then spawns children with the worktree path as `cwd`. + /// Optional working directory for the child. Must canonicalize to a path + /// inside the parent's workspace. For first-class git worktree isolation, + /// use `worktree` instead of pre-creating a cwd by hand. cwd: Option, + /// Optional first-class git worktree isolation. When set, CodeWhale + /// creates a sibling worktree/branch and runs the child from that checkout. + worktree: Option, /// Optional file path for cache-aware resident mode (#529). When set, /// the child's prompt is prefixed with the file contents for prefix-cache /// locality. A global ownership table prevents two agents from holding @@ -1252,6 +1323,25 @@ struct SpawnRequest { /// Legacy recursion budget for descendants. The model-facing child tool /// surface is leaf-only; this remains for persisted/internal records. max_depth: Option, + /// Optional aggregate token budget for this child and its descendants. + /// When unset, the child inherits the parent's budget pool or the + /// configured root default. + token_budget: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct SubAgentWorktreeRequest { + branch: Option, + path: Option, + base_ref: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct AgentUsageBudgetScope { + scope_id: String, + limit: u64, + spent: u64, + remaining: u64, } /// Durable recovery point for an interrupted sub-agent session. @@ -1324,7 +1414,7 @@ impl Default for PersistedSubAgentState { } /// Default cap on sub-agent recursion depth. Override via -/// `[runtime] max_spawn_depth = N` in config. +/// `[subagents] max_depth = N` in config. /// /// Sourced from [`codewhale_config::DEFAULT_SPAWN_DEPTH`] so standalone /// sub-agents and fleet workers share ONE recursion axis (no "two moving @@ -1550,7 +1640,7 @@ impl SubAgentRuntime { } /// Override the maximum spawn depth (default `DEFAULT_MAX_SPAWN_DEPTH`). - /// Used by config wiring (`[runtime] max_spawn_depth = N`) and tests. + /// Used by config wiring (`[subagents] max_depth = N`) and tests. #[must_use] #[allow(dead_code)] pub fn with_max_spawn_depth(mut self, max: u32) -> Self { @@ -1759,6 +1849,8 @@ pub struct SubAgentManager { state_path: Option, max_steps: u32, max_agents: usize, + max_admitted_agents: usize, + default_token_budget: Option, running_heartbeat_timeout: Duration, /// Stable id assigned at manager construction (#405). Stamped on /// every agent the manager spawns; agents loaded from the @@ -1795,6 +1887,8 @@ impl SubAgentManager { state_path: None, max_steps: DEFAULT_MAX_STEPS, max_agents, + max_admitted_agents: max_agents, + default_token_budget: None, running_heartbeat_timeout: Duration::from_secs( crate::config::DEFAULT_SUBAGENT_HEARTBEAT_TIMEOUT_SECS, ), @@ -1817,6 +1911,23 @@ impl SubAgentManager { self } + /// Set the total queued + running admission ceiling for this manager. + /// The value is always at least the instantaneous concurrency cap. + #[must_use] + pub fn with_admission_limit(mut self, max_admitted: usize) -> Self { + self.max_admitted_agents = + max_admitted.clamp(self.max_agents, crate::config::MAX_SUBAGENT_ADMISSION); + self + } + + /// Set the default aggregate token budget for root sub-agent runs. + /// `None` and `Some(0)` both preserve unlimited legacy behavior. + #[must_use] + pub fn with_default_token_budget(mut self, budget: Option) -> Self { + self.default_token_budget = positive_token_budget(budget); + self + } + /// Return the boot id this manager stamps on agents it spawns. /// Exposed for tests; internal callers use the field directly. #[cfg(test)] @@ -1848,6 +1959,35 @@ impl SubAgentManager { self } + /// Apply live runtime limits. The launch semaphore is replaced only when + /// no sub-agent is currently running, because active tasks may still hold + /// permits from the previous semaphore. + pub fn update_runtime_limits( + &mut self, + max_agents: usize, + max_admitted_agents: usize, + running_heartbeat_timeout: Duration, + launch_concurrency: usize, + default_token_budget: Option, + ) -> bool { + self.max_agents = max_agents.clamp(1, crate::config::MAX_SUBAGENTS); + self.max_admitted_agents = + max_admitted_agents.clamp(self.max_agents, crate::config::MAX_SUBAGENT_ADMISSION); + self.default_token_budget = positive_token_budget(default_token_budget); + self.running_heartbeat_timeout = if running_heartbeat_timeout.is_zero() { + Duration::from_secs(crate::config::DEFAULT_SUBAGENT_HEARTBEAT_TIMEOUT_SECS) + } else { + running_heartbeat_timeout + }; + if self.running_count() == 0 { + self.launch_gate = + Arc::new(Semaphore::new(launch_concurrency.clamp(1, self.max_agents))); + true + } else { + false + } + } + fn persist_state(&self) -> Result<()> { let Some(path) = self.state_path.as_ref() else { return Ok(()); @@ -2024,6 +2164,7 @@ impl SubAgentManager { self.worker_records .insert(worker.spec.worker_id.clone(), worker); } + self.refresh_all_budget_scopes(); self.prune_worker_records(); Ok(()) @@ -2077,6 +2218,139 @@ impl SubAgentManager { self.worker_records.get(worker_id).cloned() } + fn aggregate_budget_spent(&self, scope_id: &str) -> u64 { + self.worker_records + .values() + .filter(|record| record.usage.budget_scope.as_deref() == Some(scope_id)) + .fold(0_u64, |total, record| { + total.saturating_add(record.usage.total_tokens.unwrap_or(0)) + }) + } + + fn inherited_budget_scope(&self, parent_run_id: Option<&str>) -> Option<(String, u64)> { + let parent = self.worker_records.get(parent_run_id?)?; + let limit = parent.usage.token_budget?; + let scope_id = parent + .usage + .budget_scope + .clone() + .unwrap_or_else(|| parent.spec.worker_id.clone()); + Some((scope_id, limit)) + } + + fn resolve_spawn_budget_scope( + &self, + worker_id: &str, + parent_run_id: Option<&str>, + requested_budget: Option, + ) -> Result> { + let scope = if let Some(limit) = positive_token_budget(requested_budget) { + Some((worker_id.to_string(), limit)) + } else if let Some(parent_scope) = self.inherited_budget_scope(parent_run_id) { + Some(parent_scope) + } else { + self.default_token_budget + .map(|limit| (worker_id.to_string(), limit)) + }; + + let Some((scope_id, limit)) = scope else { + return Ok(None); + }; + let spent = self.aggregate_budget_spent(&scope_id); + let remaining = limit.saturating_sub(spent); + if remaining < MIN_SUBAGENT_SPAWN_TOKEN_RESERVE { + return Err(anyhow!( + "Sub-agent token budget exhausted for scope {scope_id}: {spent}/{limit} tokens spent, {remaining} remaining. Wait for the parent/Workflow to summarize results or start a new agent run with an explicit token_budget override." + )); + } + Ok(Some(AgentUsageBudgetScope { + scope_id, + limit, + spent, + remaining, + })) + } + + fn attach_budget_scope(&mut self, worker_id: &str, scope: AgentUsageBudgetScope) { + let Some(record) = self.worker_records.get_mut(worker_id) else { + return; + }; + record.usage.token_budget = Some(scope.limit); + record.usage.budget_scope = Some(scope.scope_id.clone()); + record.usage.budget_spent_tokens = Some(scope.spent); + record.usage.budget_remaining_tokens = Some(scope.remaining); + refresh_usage_note(&mut record.usage); + self.refresh_budget_scope(&scope.scope_id); + } + + fn refresh_budget_scope(&mut self, scope_id: &str) { + let Some(limit) = self + .worker_records + .values() + .find(|record| record.usage.budget_scope.as_deref() == Some(scope_id)) + .and_then(|record| record.usage.token_budget) + else { + return; + }; + let spent = self.aggregate_budget_spent(scope_id); + let remaining = limit.saturating_sub(spent); + for record in self.worker_records.values_mut() { + if record.usage.budget_scope.as_deref() == Some(scope_id) { + record.usage.token_budget = Some(limit); + record.usage.budget_spent_tokens = Some(spent); + record.usage.budget_remaining_tokens = Some(remaining); + refresh_usage_note(&mut record.usage); + } + } + } + + fn refresh_all_budget_scopes(&mut self) { + let scope_ids = self + .worker_records + .values() + .filter_map(|record| record.usage.budget_scope.clone()) + .collect::>(); + for scope_id in scope_ids { + self.refresh_budget_scope(&scope_id); + } + } + + fn record_worker_usage(&mut self, worker_id: &str, usage: &Usage) { + let now_ms = epoch_millis_now(); + let total_delta = usage_total_tokens(usage); + let Some(record) = self.worker_records.get_mut(worker_id) else { + return; + }; + record.updated_at_ms = now_ms; + record.usage.input_tokens = Some( + record + .usage + .input_tokens + .unwrap_or(0) + .saturating_add(u64::from(usage.input_tokens)), + ); + record.usage.output_tokens = Some( + record + .usage + .output_tokens + .unwrap_or(0) + .saturating_add(u64::from(usage.output_tokens)), + ); + record.usage.total_tokens = Some( + record + .usage + .total_tokens + .unwrap_or(0) + .saturating_add(total_delta), + ); + let scope_id = record.usage.budget_scope.clone(); + refresh_usage_note(&mut record.usage); + if let Some(scope_id) = scope_id { + self.refresh_budget_scope(&scope_id); + } + self.persist_state_debounced(); + } + fn push_worker_event( &mut self, record: &mut AgentWorkerRecord, @@ -2152,6 +2426,7 @@ impl SubAgentManager { SubAgentStatus::Failed(err) => Some(err.clone()), SubAgentStatus::Interrupted(reason) => Some(reason.clone()), SubAgentStatus::Cancelled => Some("cancelled".to_string()), + SubAgentStatus::BudgetExhausted => Some("token budget exhausted".to_string()), SubAgentStatus::Running => Some("running".to_string()), }; self.record_worker_event(worker_id, status, message, Some(result.steps_taken), None); @@ -2204,8 +2479,44 @@ impl SubAgentManager { } } + pub fn cancel_agent(&mut self, agent_ref: &str) -> Result { + let agent_id = self.resolve_agent_ref(agent_ref)?; + let snapshot = { + let agent = self + .agents + .get_mut(&agent_id) + .ok_or_else(|| anyhow!("Agent {agent_id} not found"))?; + if agent.status != SubAgentStatus::Running { + return Ok(agent.snapshot()); + } + agent.status = SubAgentStatus::Cancelled; + agent.result = Some("Cancelled by parent request.".to_string()); + release_resident_leases_for(&agent.id); + if let Some(handle) = agent.task_handle.take() { + handle.abort(); + } + agent.input_tx = None; + agent.snapshot() + }; + self.record_worker_event( + &agent_id, + AgentWorkerStatus::Cancelled, + snapshot.result.clone(), + Some(snapshot.steps_taken), + None, + ); + self.persist_state_best_effort(); + Ok(snapshot) + } + /// Count running agents. pub fn running_count(&self) -> usize { + self.admitted_count() + } + + /// Count live sub-agents that have been admitted, including queued + /// workers waiting on the launch gate. + pub fn admitted_count(&self) -> usize { self.agents .values() .filter(|agent| { @@ -2225,6 +2536,41 @@ impl SubAgentManager { .count() } + /// Count admitted workers that are currently waiting for the launch gate. + pub fn queued_count(&self) -> usize { + self.agents + .values() + .filter(|agent| { + agent.status == SubAgentStatus::Running + && agent.task_handle.is_some() + && !self.running_heartbeat_timed_out(agent) + && self + .worker_records + .get(&agent.id) + .is_some_and(|record| record.status == AgentWorkerStatus::Queued) + }) + .count() + } + + /// Count admitted workers not currently in the queued launch state. + pub fn active_count(&self) -> usize { + self.admitted_count().saturating_sub(self.queued_count()) + } + + fn check_admission_capacity(&self) -> Result<()> { + let admitted = self.admitted_count(); + if admitted >= self.max_admitted_agents { + return Err(anyhow!( + "Sub-agent admission limit reached (max_admitted {}, admitted {}, running {}, queued {}). Wait for queued/running agents to finish, cancel unneeded agents, or raise [subagents] max_admitted for this Workflow.", + self.max_admitted_agents, + admitted, + self.active_count(), + self.queued_count() + )); + } + Ok(()) + } + fn running_heartbeat_timed_out(&self, agent: &SubAgent) -> bool { agent.status == SubAgentStatus::Running && agent.task_handle.is_some() @@ -2297,19 +2643,18 @@ impl SubAgentManager { ) -> Result { self.cleanup(COMPLETED_AGENT_RETENTION); - if self.running_count() >= self.max_agents { - return Err(anyhow!( - "Sub-agent limit reached (max {}, running {}). Cancel, close, or wait for an existing agent to finish. Consider issuing multiple tool calls in one turn (the dispatcher runs them in parallel) for parallel one-shot work.", - self.max_agents, - self.running_count() - )); - } + self.check_admission_capacity()?; if let Some(model) = options.model.as_deref() { runtime.model = model.to_string(); } let effective_model = runtime.model.clone(); let agent_id = format!("agent_{}", &Uuid::new_v4().to_string()[..8]); + let budget_scope = self.resolve_spawn_budget_scope( + &agent_id, + runtime.parent_agent_id.as_deref(), + options.token_budget, + )?; let active_names: std::collections::HashSet = self .agents .values() @@ -2404,6 +2749,9 @@ impl SubAgentManager { max_spawn_depth: runtime.max_spawn_depth, }; self.register_worker(worker_spec); + if let Some(scope) = budget_scope { + self.attach_budget_scope(&agent_id, scope); + } if let Some(event_tx) = runtime.event_tx.clone() { let _ = event_tx.try_send(Event::AgentSpawned { @@ -2426,6 +2774,7 @@ impl SubAgentManager { fork_context: options.fork_context, started_at, max_steps, + token_budget: options.token_budget, input_rx, launch_gate, }; @@ -2454,6 +2803,32 @@ impl SubAgentManager { Ok(agent.snapshot()) } + pub fn get_result_by_ref(&self, agent_ref: &str) -> Result { + let agent_id = self.resolve_agent_ref(agent_ref)?; + self.get_result(&agent_id) + } + + pub fn terminal_results_excluding( + &self, + delivered_ids: &std::collections::HashSet, + ) -> Vec { + let mut results = self + .agents + .values() + .filter(|agent| agent.status != SubAgentStatus::Running) + .filter(|agent| agent.session_boot_id == self.current_session_boot_id) + .filter(|agent| { + self.worker_records + .get(&agent.id) + .is_none_or(|record| record.spec.parent_run_id.is_none()) + }) + .filter(|agent| !delivered_ids.contains(&agent.id)) + .map(SubAgent::snapshot) + .collect::>(); + results.sort_by(|a, b| a.agent_id.cmp(&b.agent_id)); + results + } + /// Resolve either a durable agent id or a model-facing session name. fn resolve_agent_ref(&self, agent_ref: &str) -> Result { let agent_ref = agent_ref.trim(); @@ -2972,8 +3347,10 @@ pub fn new_shared_subagent_manager(workspace: PathBuf, max_agents: usize) -> Sha new_shared_subagent_manager_with_timeout( workspace, max_agents, + max_agents, Duration::from_secs(crate::config::DEFAULT_SUBAGENT_HEARTBEAT_TIMEOUT_SECS), max_agents, + None, ) } @@ -2983,14 +3360,18 @@ pub fn new_shared_subagent_manager(workspace: PathBuf, max_agents: usize) -> Sha pub fn new_shared_subagent_manager_with_timeout( workspace: PathBuf, max_agents: usize, + max_admitted_agents: usize, running_heartbeat_timeout: Duration, launch_concurrency: usize, + default_token_budget: Option, ) -> SharedSubAgentManager { let max_agents = max_agents.clamp(1, MAX_SUBAGENTS); let state_path = default_state_path(&workspace); let mut manager = SubAgentManager::new(workspace, max_agents) + .with_admission_limit(max_admitted_agents) .with_running_heartbeat_timeout(running_heartbeat_timeout) .with_launch_concurrency(launch_concurrency) + .with_default_token_budget(default_token_budget) .with_state_path(state_path); if let Err(err) = manager.load_state() { // Routed through tracing instead of stderr — see comment in @@ -3015,6 +3396,36 @@ impl AgentTool { } } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum AgentToolAction { + Start, + Status, + Peek, + Cancel, +} + +fn parse_agent_tool_action(input: &Value) -> Result { + let Some(action) = optional_input_str(input, &["action", "op"]) else { + return Ok(AgentToolAction::Start); + }; + match action.trim().to_ascii_lowercase().as_str() { + "" | "start" | "spawn" | "run" => Ok(AgentToolAction::Start), + "status" | "list" | "inspect" => Ok(AgentToolAction::Status), + "peek" | "progress" => Ok(AgentToolAction::Peek), + "cancel" | "stop" | "abort" => Ok(AgentToolAction::Cancel), + other => Err(ToolError::invalid_input(format!( + "Invalid agent action '{other}'. Use start, status, peek, or cancel." + ))), + } +} + +fn parse_agent_ref(input: &Value) -> Option { + optional_input_str(input, &["agent_id", "id", "session_name", "name"]) + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(str::to_string) +} + #[async_trait] impl ToolSpec for AgentTool { fn name(&self) -> &'static str { @@ -3023,9 +3434,10 @@ impl ToolSpec for AgentTool { fn description(&self) -> &'static str { concat!( - "Start one focused child agent task. Use this only for independent work that benefits from a clean context. ", + "Start, inspect, peek at, or cancel focused child agent tasks through one surface. Use start only for independent work that benefits from a clean context. ", + "For several independent targets, call agent separately for each target; CodeWhale runs or queues them under runtime capacity and provider rate-limit backpressure. ", "The child runs in the background and reports back automatically when finished; keep tiny reads/searches local. ", - "Returns a session projection with the generated agent_id and transcript_handle for UI/debug inspection." + "Use action=status or action=peek with agent_id to inspect progress, and action=cancel with agent_id to stop a running child. Returns session projections with transcript_handle for UI/debug inspection." ) } @@ -3033,9 +3445,22 @@ impl ToolSpec for AgentTool { json!({ "type": "object", "properties": { + "action": { + "type": "string", + "enum": ["start", "status", "peek", "cancel"], + "description": "start (default) launches a child. status lists current children or inspects agent_id. peek is status for one child. cancel stops a running child by agent_id." + }, + "agent_id": { + "type": "string", + "description": "Agent id or session name for action=status, action=peek, or action=cancel." + }, + "include_archived": { + "type": "boolean", + "description": "For action=status without agent_id, include prior-session completed agents." + }, "name": { "type": "string", - "description": "Optional stable session name. Defaults to the generated agent_id." + "description": "For action=start, optional stable session name. For status/peek/cancel, accepted as an alias for agent_id." }, "prompt": { "type": "string", @@ -3061,7 +3486,23 @@ impl ToolSpec for AgentTool { }, "cwd": { "type": "string", - "description": "Optional working directory for the child; must be inside the parent workspace" + "description": "Optional pre-existing working directory for the child; must be inside the parent workspace. Prefer worktree=true for isolated parallel edit tasks." + }, + "worktree": { + "type": "boolean", + "description": "When true, create a fresh git worktree and branch for this child before it starts. Use for parallel edit tasks that must not collide with the parent checkout." + }, + "worktree_branch": { + "type": "string", + "description": "Optional branch name for worktree=true. Defaults to codex/agent--." + }, + "worktree_base": { + "type": "string", + "description": "Optional git ref to branch the worktree from. Defaults to HEAD in the parent checkout." + }, + "worktree_path": { + "type": "string", + "description": "Optional worktree checkout path. Relative paths are created under the default sibling .codewhale-worktrees directory, not inside the parent checkout." }, "fork_context": { "type": "boolean", @@ -3072,9 +3513,14 @@ impl ToolSpec for AgentTool { "minimum": 0, "maximum": 3, "description": "Optional remaining nested-agent depth budget for this child. Defaults to the configured runtime budget." + }, + "token_budget": { + "type": "integer", + "minimum": 1, + "description": "Optional aggregate token budget for this child and descendants. When unset, the child inherits the parent budget pool or the configured root default." } }, - "required": ["prompt"] + "required": [] }) } @@ -3090,6 +3536,22 @@ impl ToolSpec for AgentTool { } async fn execute(&self, input: Value, context: &ToolContext) -> Result { + let action = parse_agent_tool_action(&input)?; + match action { + AgentToolAction::Start => {} + AgentToolAction::Status | AgentToolAction::Peek => { + return inspect_agent_from_input( + &input, + self.manager.clone(), + context, + matches!(action, AgentToolAction::Peek), + ) + .await; + } + AgentToolAction::Cancel => { + return cancel_agent_from_input(&input, self.manager.clone(), context).await; + } + } let snapshot = spawn_subagent_from_input(input, self.manager.clone(), self.runtime.clone()).await?; let worker_record = { @@ -3109,6 +3571,95 @@ impl ToolSpec for AgentTool { } } +async fn inspect_agent_from_input( + input: &Value, + manager: SharedSubAgentManager, + context: &ToolContext, + peek: bool, +) -> Result { + let include_archived = + parse_optional_bool(input, &["include_archived", "includeArchived"]).unwrap_or(false); + + if let Some(agent_ref) = parse_agent_ref(input) { + let (snapshot, worker_record) = { + let manager = manager.read().await; + let snapshot = manager + .get_result_by_ref(&agent_ref) + .map_err(|err| ToolError::invalid_input(err.to_string()))?; + let worker_record = manager.get_worker_record(&snapshot.agent_id); + (snapshot, worker_record) + }; + let projection = + subagent_session_projection(snapshot, include_archived, context, worker_record).await; + let mut tool_result = ToolResult::json(&projection) + .map_err(|err| ToolError::execution_failed(err.to_string()))?; + tool_result.metadata = Some(json!({ + "action": if peek { "peek" } else { "status" }, + "status": projection.status, + "terminal": projection.terminal, + "agent_id": projection.agent_id, + })); + return Ok(tool_result); + } + + let snapshots = { + let manager = manager.read().await; + manager + .list_filtered(include_archived) + .into_iter() + .map(|snapshot| { + let worker_record = manager.get_worker_record(&snapshot.agent_id); + (snapshot, worker_record) + }) + .collect::>() + }; + + let mut projections = Vec::with_capacity(snapshots.len()); + for (snapshot, worker_record) in snapshots { + projections.push( + subagent_session_projection(snapshot, include_archived, context, worker_record).await, + ); + } + let payload = json!({ + "action": if peek { "peek" } else { "status" }, + "count": projections.len(), + "agents": projections, + }); + let mut tool_result = + ToolResult::json(&payload).map_err(|err| ToolError::execution_failed(err.to_string()))?; + tool_result.metadata = Some(json!({ + "action": if peek { "peek" } else { "status" }, + "count": payload["count"], + })); + Ok(tool_result) +} + +async fn cancel_agent_from_input( + input: &Value, + manager: SharedSubAgentManager, + context: &ToolContext, +) -> Result { + let agent_ref = parse_agent_ref(input).ok_or_else(|| ToolError::missing_field("agent_id"))?; + let (snapshot, worker_record) = { + let mut manager = manager.write().await; + let snapshot = manager + .cancel_agent(&agent_ref) + .map_err(|err| ToolError::invalid_input(err.to_string()))?; + let worker_record = manager.get_worker_record(&snapshot.agent_id); + (snapshot, worker_record) + }; + let projection = subagent_session_projection(snapshot, false, context, worker_record).await; + let mut tool_result = ToolResult::json(&projection) + .map_err(|err| ToolError::execution_failed(err.to_string()))?; + tool_result.metadata = Some(json!({ + "action": "cancel", + "status": projection.status, + "terminal": projection.terminal, + "agent_id": projection.agent_id, + })); + Ok(tool_result) +} + async fn spawn_subagent_from_input( input: Value, manager: SharedSubAgentManager, @@ -3119,7 +3670,7 @@ async fn spawn_subagent_from_input( if runtime.would_exceed_depth() { return Err(ToolError::execution_failed(format!( "Sub-agent depth limit reached (current depth {}, max {}). \ - Increase via [runtime] max_spawn_depth in config.toml.", + Increase via [subagents] max_depth in config.toml.", runtime.spawn_depth, runtime.max_spawn_depth ))); } @@ -3132,40 +3683,20 @@ async fn spawn_subagent_from_input( ))); } - let validated_cwd = if let Some(requested_cwd) = spawn_request.cwd.as_ref() { - let parent_workspace = &runtime.context.workspace; - let resolved = if requested_cwd.is_absolute() { - requested_cwd.clone() - } else { - parent_workspace.join(requested_cwd) - }; - let canonical = resolved.canonicalize().map_err(|e| { - ToolError::invalid_input(format!( - "Invalid cwd '{}': {e} (path may not exist yet — create the worktree first)", - requested_cwd.display() - )) - })?; - let workspace_canonical = parent_workspace - .canonicalize() - .unwrap_or_else(|_| parent_workspace.clone()); - if !canonical.starts_with(&workspace_canonical) { - return Err(ToolError::invalid_input(format!( - "cwd must be inside the parent workspace: {} is not under {}", - canonical.display(), - workspace_canonical.display() - ))); - } - Some(canonical) - } else { - None - }; + if spawn_request.worktree.is_some() { + let manager_guard = manager.read().await; + manager_guard + .check_admission_capacity() + .map_err(|err| ToolError::execution_failed(err.to_string()))?; + } + let child_workspace = prepare_child_workspace(&runtime.context.workspace, &spawn_request)?; let mut child_runtime = runtime.background_runtime(); if let Some(max_depth) = spawn_request.max_depth { child_runtime.max_spawn_depth = child_runtime.spawn_depth.saturating_add(max_depth); } - if let Some(cwd) = validated_cwd { - child_runtime.context.workspace = cwd; + if let Some(workspace) = child_workspace { + child_runtime.context.workspace = workspace; } let configured_model = match spawn_request.model.clone() { Some(model) => Some(normalize_requested_subagent_model( @@ -3240,6 +3771,7 @@ async fn spawn_subagent_from_input( model_route: Some(model_route), nickname: None, fork_context: spawn_request.fork_context, + token_budget: spawn_request.token_budget, }, ) .map_err(|e| ToolError::execution_failed(format!("Failed to spawn sub-agent: {e}")))?; @@ -3360,6 +3892,12 @@ struct SubAgentTask { fork_context: bool, started_at: Instant, max_steps: u32, + /// Per-worker token cap sourced from the spawn request's `token_budget` + /// (the explicit `max_tokens`/`tokenBudget` override). `None` means no + /// per-worker limit; the worker still obeys the scope admission gate. + /// When set, the worker stops with `BudgetExhausted` once its accumulated + /// model tokens exceed this value. Independent of the scope budget (#3319). + token_budget: Option, input_rx: mpsc::UnboundedReceiver, /// Interactive launch gate (#3095). `Some` only for direct (depth-1) /// children: the task acquires a permit before its first model step and @@ -3401,6 +3939,7 @@ async fn run_subagent_task(task: SubAgentTask) { task.fork_context, task.started_at, task.max_steps, + task.token_budget, task.input_rx, ) .await; @@ -3545,6 +4084,19 @@ pub(crate) fn emit_parent_completion( true } +pub(crate) fn subagent_completion_from_result(result: &SubAgentResult) -> SubAgentCompletion { + let raw = summarize_subagent_result(result); + let (summary, truncated) = stamp_subagent_summary(&raw); + let sentinel = match &result.status { + SubAgentStatus::Failed(error) => subagent_failed_sentinel(&result.agent_id, error), + _ => subagent_done_sentinel(&result.agent_id, result, truncated), + }; + SubAgentCompletion { + agent_id: result.agent_id.clone(), + payload: format!("{summary}\n{sentinel}"), + } +} + /// Build a `` JSON sentinel for a successful child. /// Intended to surface in the parent's transcript so the model recognizes /// child completion. @@ -3712,6 +4264,103 @@ fn needs_input_for_interrupted_checkpoint( } } +#[derive(Debug)] +enum SubAgentApiRequestFailure { + Fatal(anyhow::Error), + Interrupted { + reason: String, + checkpoint_reason: &'static str, + }, +} + +fn subagent_transient_provider_retry_delay(retry_number: u32) -> Duration { + let multiplier = 1u32 + .checked_shl(retry_number.saturating_sub(1)) + .unwrap_or(4); + SUBAGENT_TRANSIENT_PROVIDER_INITIAL_BACKOFF.saturating_mul(multiplier.min(4)) +} + +fn is_transient_subagent_provider_error(error: &anyhow::Error) -> bool { + let message = format!("{error:#}").to_ascii_lowercase(); + [ + "did not receive response headers", + "response headers", + "stream request", + "request timed out", + "operation timed out", + "deadline has elapsed", + "connection reset", + "connection closed", + "connection aborted", + "temporarily unavailable", + "bad gateway", + "gateway timeout", + "service unavailable", + "502", + "503", + "504", + ] + .iter() + .any(|needle| message.contains(needle)) +} + +async fn request_subagent_model_response_with_retries( + runtime: &SubAgentRuntime, + agent_id: &str, + steps: u32, + max_steps: u32, + request: MessageRequest, +) -> std::result::Result { + let mut transient_failures = 0u32; + + loop { + match tokio::time::timeout( + runtime.step_api_timeout, + runtime.client.create_message(request.clone()), + ) + .await + { + Ok(Ok(response)) => return Ok(response), + Ok(Err(err)) if is_transient_subagent_provider_error(&err) => { + if transient_failures >= SUBAGENT_TRANSIENT_PROVIDER_MAX_RETRIES { + let attempts = transient_failures.saturating_add(1); + return Err(SubAgentApiRequestFailure::Interrupted { + reason: format!( + "Transient provider failure after {attempts} API attempt(s): {err}; checkpoint preserved for continuation" + ), + checkpoint_reason: "api_transient_provider_failure", + }); + } + + transient_failures = transient_failures.saturating_add(1); + let delay = subagent_transient_provider_retry_delay(transient_failures); + record_agent_progress( + runtime, + agent_id, + format!( + "{}: transient provider failure; retrying API request {}/{} in {}ms ({err})", + format_step_counter(steps, max_steps), + transient_failures, + SUBAGENT_TRANSIENT_PROVIDER_MAX_RETRIES, + delay.as_millis(), + ), + ); + tokio::time::sleep(delay).await; + } + Ok(Err(err)) => return Err(SubAgentApiRequestFailure::Fatal(err)), + Err(_) => { + return Err(SubAgentApiRequestFailure::Interrupted { + reason: format!( + "API call timed out after {}ms; checkpoint preserved for continuation", + runtime.step_api_timeout.as_millis() + ), + checkpoint_reason: "api_timeout", + }); + } + } + } +} + fn record_agent_progress(runtime: &SubAgentRuntime, agent_id: &str, message: impl Into) { let message = message.into(); if let Ok(mut manager) = runtime.manager.try_write() { @@ -3794,6 +4443,7 @@ async fn run_subagent( fork_context: bool, started_at: Instant, max_steps: u32, + token_budget: Option, mut input_rx: mpsc::UnboundedReceiver, ) -> Result { let system_prompt = build_subagent_system_prompt(&agent_type, &assignment); @@ -3813,9 +4463,16 @@ async fn run_subagent( structured_state_block: None, }, ); - let tool_registry = SubAgentToolRegistry::new( + let tool_registry = SubAgentToolRegistry::new_with_owner( runtime_for_tools, agent_type.clone(), + agent_id.clone(), + assignment + .role + .as_deref() + .filter(|role| !role.trim().is_empty()) + .unwrap_or(agent_type.as_str()) + .to_string(), allowed_tools.clone(), // Share the parent's todo list so child checklist updates are visible // in the Work sidebar live. Previously each child got a fresh isolated @@ -3845,6 +4502,7 @@ async fn run_subagent( let mut pending_inputs: VecDeque = VecDeque::new(); let mut consecutive_truncated_responses = 0; let mut latest_checkpoint: Option = None; + let mut tokens_used: u64 = 0; for _step in 0..max_steps { // Cooperative cancellation: bail if this session's token was cancelled @@ -4030,18 +4688,21 @@ async fn run_subagent( from_prior_session: false, }); } - api = tokio::time::timeout(runtime.step_api_timeout, runtime.client.create_message(request)) => { + api = request_subagent_model_response_with_retries( + runtime, + &agent_id, + steps, + max_steps, + request, + ) => { match api { - Ok(response) => response?, - Err(_) => { - let reason = format!( - "API call timed out after {}ms; checkpoint preserved for continuation", - runtime.step_api_timeout.as_millis() - ); + Ok(response) => response, + Err(SubAgentApiRequestFailure::Fatal(err)) => return Err(err), + Err(SubAgentApiRequestFailure::Interrupted { reason, checkpoint_reason }) => { let checkpoint = checkpoint_subagent_progress( runtime, &agent_id, - "api_timeout", + checkpoint_reason, &messages, steps, true, @@ -4111,6 +4772,91 @@ async fn run_subagent( response.usage.clone(), )); } + { + let mut manager = runtime.manager.write().await; + manager.record_worker_usage(&agent_id, &response.usage); + } + + // Per-worker token-budget enforcement (#3321): stop a single runaway + // worker once its accumulated model tokens exceed its own cap. This + // complements — and does not double-count — the scope-level admission + // gate (#3319), which bounds aggregate fan-out across siblings. The + // local accumulator mirrors the manager's `record.usage.total_tokens` + // (both derive from `response.usage`), so the scope accounting stays + // consistent and is never inflated by this check. + tokens_used = tokens_used.saturating_add(usage_total_tokens(&response.usage)); + if let Some(budget) = token_budget { + if tokens_used > budget { + record_agent_progress( + runtime, + &agent_id, + format!( + "{}: token budget exhausted ({tokens_used}/{budget})", + format_step_counter(steps, max_steps) + ), + ); + if let Some(mb) = runtime.mailbox.as_ref() { + let _ = mb.send(MailboxMessage::Cancelled { + agent_id: agent_id.clone(), + }); + } + let status = SubAgentStatus::BudgetExhausted; + let duration_ms = + u64::try_from(started_at.elapsed().as_millis()).unwrap_or(u64::MAX); + latest_checkpoint = Some( + checkpoint_subagent_progress( + runtime, + &agent_id, + "token_budget_exhausted", + &messages, + steps, + true, + ) + .await, + ); + insert_subagent_full_transcript_handle( + runtime, + &agent_id, + &agent_type, + &assignment, + &status, + final_result.as_ref(), + latest_checkpoint.as_ref(), + &messages, + steps, + duration_ms, + fork_context_enabled, + ) + .await; + return Ok(SubAgentResult { + name: agent_id.clone(), + agent_id: agent_id.clone(), + context_mode: if fork_context_enabled { + "forked" + } else { + "fresh" + } + .to_string(), + fork_context: fork_context_enabled, + workspace: Some(runtime.context.workspace.clone()), + git_branch: current_git_branch(&runtime.context.workspace), + agent_type: agent_type.clone(), + assignment: assignment.clone(), + model: runtime.model.clone(), + nickname: None, + status, + worker_status: None, + parent_run_id: runtime.parent_agent_id.clone(), + spawn_depth: runtime.spawn_depth, + result: final_result.clone(), + steps_taken: steps, + checkpoint: latest_checkpoint.clone(), + needs_input: None, + duration_ms, + from_prior_session: false, + }); + } + } for block in &response.content { match block { @@ -4553,6 +5299,12 @@ fn parse_spawn_request(input: &Value) -> Result { }); let cwd = parse_optional_cwd(input)?; + let worktree = parse_optional_worktree_request(input)?; + if cwd.is_some() && worktree.is_some() { + return Err(ToolError::invalid_input( + "Use either cwd or worktree isolation, not both".to_string(), + )); + } let model = parse_optional_subagent_model(input, "model")?; let model_strength = optional_input_str(input, &["model_strength", "modelStrength"]) .map(SubAgentModelStrength::parse) @@ -4605,6 +5357,8 @@ fn parse_spawn_request(input: &Value) -> Result { }) }) .transpose()?; + let token_budget = + parse_optional_positive_u64(input, &["token_budget", "tokenBudget", "max_tokens"])?; Ok(SpawnRequest { session_name, @@ -4616,9 +5370,11 @@ fn parse_spawn_request(input: &Value) -> Result { model_strength, thinking, cwd, + worktree, resident_file, fork_context, max_depth, + token_budget, }) } @@ -4650,6 +5406,26 @@ fn parse_optional_bool(input: &Value, names: &[&str]) -> Option { .and_then(Value::as_bool) } +fn parse_optional_positive_u64(input: &Value, names: &[&str]) -> Result, ToolError> { + for name in names { + let Some(value) = input.get(*name) else { + continue; + }; + let Some(parsed) = value.as_u64() else { + return Err(ToolError::invalid_input(format!( + "{name} must be a positive integer token count" + ))); + }; + if parsed == 0 { + return Err(ToolError::invalid_input(format!( + "{name} must be greater than zero; omit it to inherit or disable the budget" + ))); + } + return Ok(Some(parsed)); + } + Ok(None) +} + fn with_default_fork_context(mut input: Value, default: bool) -> Value { let Some(object) = input.as_object_mut() else { return input; @@ -4922,6 +5698,329 @@ fn parse_optional_cwd(input: &Value) -> Result, ToolError> { } } +fn parse_optional_worktree_request( + input: &Value, +) -> Result, ToolError> { + let worktree_flag = + parse_optional_bool_strict(input, &["worktree", "isolate_worktree", "isolateWorktree"])?; + let isolation = optional_input_str(input, &["isolation"]) + .map(|value| value.trim().to_ascii_lowercase().replace(['_', '-'], "")); + let isolation_wants_worktree = match isolation.as_deref() { + None | Some("") | Some("none") | Some("shared") => false, + Some("worktree") | Some("gitworktree") => true, + Some(other) => { + return Err(ToolError::invalid_input(format!( + "isolation must be 'worktree' or 'none' (got '{other}')" + ))); + } + }; + + let branch = optional_input_str( + input, + &[ + "worktree_branch", + "worktreeBranch", + "branch_name", + "branchName", + "branch", + ], + ) + .map(str::to_string); + let path = optional_input_str( + input, + &[ + "worktree_path", + "worktreePath", + "worktree_dir", + "worktreeDir", + ], + ) + .map(PathBuf::from); + let base_ref = optional_input_str( + input, + &["worktree_base", "worktreeBase", "base_ref", "baseRef"], + ) + .map(str::to_string); + + let has_worktree_details = branch.is_some() || path.is_some() || base_ref.is_some(); + if worktree_flag == Some(false) && (isolation_wants_worktree || has_worktree_details) { + return Err(ToolError::invalid_input( + "worktree=false conflicts with worktree isolation options".to_string(), + )); + } + if worktree_flag.unwrap_or(false) || isolation_wants_worktree || has_worktree_details { + Ok(Some(SubAgentWorktreeRequest { + branch, + path, + base_ref, + })) + } else { + Ok(None) + } +} + +fn parse_optional_bool_strict(input: &Value, names: &[&str]) -> Result, ToolError> { + for name in names { + let Some(value) = input.get(*name) else { + continue; + }; + return value.as_bool().map(Some).ok_or_else(|| { + ToolError::invalid_input(format!("{name} must be a boolean when provided")) + }); + } + Ok(None) +} + +fn prepare_child_workspace( + parent_workspace: &Path, + request: &SpawnRequest, +) -> Result, ToolError> { + if let Some(requested_cwd) = request.cwd.as_ref() { + return validate_existing_child_cwd(parent_workspace, requested_cwd).map(Some); + } + if let Some(worktree) = request.worktree.as_ref() { + return create_isolated_worktree( + parent_workspace, + worktree, + request.session_name.as_deref(), + &request.agent_type, + ) + .map(Some); + } + Ok(None) +} + +fn validate_existing_child_cwd( + parent_workspace: &Path, + requested_cwd: &Path, +) -> Result { + let resolved = if requested_cwd.is_absolute() { + requested_cwd.to_path_buf() + } else { + parent_workspace.join(requested_cwd) + }; + let canonical = resolved.canonicalize().map_err(|e| { + ToolError::invalid_input(format!( + "Invalid cwd '{}': {e} (path may not exist yet — use worktree=true to let CodeWhale create an isolated checkout)", + requested_cwd.display() + )) + })?; + let workspace_canonical = parent_workspace + .canonicalize() + .unwrap_or_else(|_| parent_workspace.to_path_buf()); + if !canonical.starts_with(&workspace_canonical) { + return Err(ToolError::invalid_input(format!( + "cwd must be inside the parent workspace: {} is not under {}", + canonical.display(), + workspace_canonical.display() + ))); + } + Ok(canonical) +} + +fn create_isolated_worktree( + parent_workspace: &Path, + request: &SubAgentWorktreeRequest, + session_name: Option<&str>, + agent_type: &SubAgentType, +) -> Result { + let repo_root = git_repo_root(parent_workspace)?; + let branch = request + .branch + .clone() + .unwrap_or_else(|| default_worktree_branch(session_name, agent_type)); + validate_git_branch_name(&repo_root, &branch)?; + + let base_ref = request + .base_ref + .as_deref() + .map(str::trim) + .filter(|value| !value.is_empty()) + .unwrap_or("HEAD") + .to_string(); + let worktree_path = resolve_worktree_path(&repo_root, &branch, request.path.as_ref())?; + if let Some(parent) = worktree_path.parent() { + fs::create_dir_all(parent).map_err(|err| { + ToolError::execution_failed(format!( + "Failed to create worktree parent '{}': {err}", + parent.display() + )) + })?; + } + + let path_arg = worktree_path.to_string_lossy().to_string(); + let args = vec![ + "worktree".to_string(), + "add".to_string(), + "-b".to_string(), + branch, + path_arg, + base_ref, + ]; + run_git_checked(&repo_root, &args, "create sub-agent worktree")?; + worktree_path.canonicalize().map_err(|err| { + ToolError::execution_failed(format!( + "Created worktree path '{}' could not be resolved: {err}", + worktree_path.display() + )) + }) +} + +fn git_repo_root(workspace: &Path) -> Result { + let output = run_git_checked( + workspace, + &["rev-parse".to_string(), "--show-toplevel".to_string()], + "resolve git repository root", + )?; + let root = output.trim(); + if root.is_empty() { + return Err(ToolError::invalid_input( + "worktree=true requires a git repository workspace".to_string(), + )); + } + Ok(PathBuf::from(root)) +} + +fn validate_git_branch_name(repo_root: &Path, branch: &str) -> Result<(), ToolError> { + let branch = branch.trim(); + if branch.is_empty() { + return Err(ToolError::invalid_input( + "worktree_branch cannot be blank".to_string(), + )); + } + run_git_checked( + repo_root, + &[ + "check-ref-format".to_string(), + "--branch".to_string(), + branch.to_string(), + ], + "validate sub-agent worktree branch", + ) + .map(|_| ()) + .map_err(|err| ToolError::invalid_input(format!("Invalid worktree_branch '{branch}': {err}"))) +} + +fn default_worktree_branch(session_name: Option<&str>, agent_type: &SubAgentType) -> String { + let seed = session_name + .map(str::trim) + .filter(|name| !name.is_empty()) + .unwrap_or_else(|| agent_type.as_str()); + format!( + "codex/agent-{}-{}", + sanitize_worktree_slug(seed), + &Uuid::new_v4().to_string()[..8] + ) +} + +fn resolve_worktree_path( + repo_root: &Path, + branch: &str, + requested_path: Option<&PathBuf>, +) -> Result { + let default_root = default_worktree_root(repo_root); + let path = match requested_path { + Some(path) if path.is_absolute() => path.to_path_buf(), + Some(path) => { + let resolved = normalize_path_lexically(&default_root.join(path)); + if !resolved.starts_with(&default_root) { + return Err(ToolError::invalid_input(format!( + "relative worktree_path '{}' must stay under {}", + path.display(), + default_root.display() + ))); + } + resolved + } + None => default_root.join(sanitize_worktree_slug(branch)), + }; + let normalized = normalize_path_lexically(&path); + let repo_canonical = repo_root + .canonicalize() + .unwrap_or_else(|_| repo_root.to_path_buf()); + if normalized.starts_with(&repo_canonical) { + return Err(ToolError::invalid_input(format!( + "worktree_path must not be inside the parent checkout: {} is under {}", + normalized.display(), + repo_canonical.display() + ))); + } + Ok(normalized) +} + +fn default_worktree_root(repo_root: &Path) -> PathBuf { + let repo_name = repo_root + .file_name() + .and_then(|name| name.to_str()) + .map(sanitize_worktree_slug) + .filter(|name| !name.is_empty()) + .unwrap_or_else(|| "repo".to_string()); + let parent = repo_root.parent().unwrap_or(repo_root); + normalize_path_lexically(&parent.join(SUBAGENT_WORKTREE_ROOT_DIR).join(repo_name)) +} + +fn sanitize_worktree_slug(input: &str) -> String { + let mut slug = String::new(); + for ch in input.chars() { + let normalized = if ch.is_ascii_alphanumeric() { + ch.to_ascii_lowercase() + } else if matches!(ch, '-' | '_' | '.') { + ch + } else { + '-' + }; + if normalized == '-' && slug.ends_with('-') { + continue; + } + slug.push(normalized); + if slug.len() >= 48 { + break; + } + } + let slug = slug.trim_matches(['-', '.', '_']).to_string(); + if slug.is_empty() { + "task".to_string() + } else { + slug + } +} + +fn normalize_path_lexically(path: &Path) -> PathBuf { + let mut normalized = PathBuf::new(); + for component in path.components() { + match component { + std::path::Component::CurDir => {} + std::path::Component::ParentDir => { + normalized.pop(); + } + other => normalized.push(other.as_os_str()), + } + } + normalized +} + +fn run_git_checked(workspace: &Path, args: &[String], action: &str) -> Result { + let arg_refs = args.iter().map(String::as_str).collect::>(); + let output = Git::output(&arg_refs, workspace).map_err(|err| { + ToolError::execution_failed(format!("Failed to {action}: could not run git: {err}")) + })?; + if output.status.success() { + return Ok(String::from_utf8_lossy(&output.stdout).to_string()); + } + let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string(); + let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string(); + let detail = if !stderr.is_empty() { + stderr + } else if !stdout.is_empty() { + stdout + } else { + format!("git exited with status {}", output.status) + }; + Err(ToolError::execution_failed(format!( + "Failed to {action}: {detail}" + ))) +} + /// Resolve a user-supplied role/agent_role value to a canonical role string. /// /// This must accept the full set that [`SubAgentType::from_str`] accepts, plus @@ -4963,6 +6062,7 @@ fn worker_status_from_subagent_status(status: &SubAgentStatus) -> AgentWorkerSta SubAgentStatus::Completed => AgentWorkerStatus::Completed, SubAgentStatus::Failed(_) => AgentWorkerStatus::Failed, SubAgentStatus::Cancelled => AgentWorkerStatus::Cancelled, + SubAgentStatus::BudgetExhausted => AgentWorkerStatus::Failed, SubAgentStatus::Interrupted(_) => AgentWorkerStatus::Interrupted, } } @@ -5121,6 +6221,8 @@ struct SubAgentToolRegistry { /// the child without the parent runtime being auto-approved (#1828, #1833). agent_type: SubAgentType, can_spawn_child: bool, + owner_agent_id: String, + owner_agent_name: String, registry: ToolRegistry, } @@ -5131,6 +6233,26 @@ impl SubAgentToolRegistry { explicit_allowed_tools: Option>, todo_list: SharedTodoList, plan_state: SharedPlanState, + ) -> Self { + Self::new_with_owner( + runtime, + agent_type, + "agent_unknown".to_string(), + "sub-agent".to_string(), + explicit_allowed_tools, + todo_list, + plan_state, + ) + } + + fn new_with_owner( + runtime: SubAgentRuntime, + agent_type: SubAgentType, + owner_agent_id: String, + owner_agent_name: String, + explicit_allowed_tools: Option>, + todo_list: SharedTodoList, + plan_state: SharedPlanState, ) -> Self { // Build the full agent surface — same as the parent's Agent mode. // Children inherit shell, file, patch, search, web, git, diagnostics, @@ -5159,6 +6281,8 @@ impl SubAgentToolRegistry { auto_approve: runtime.context.auto_approve, agent_type, can_spawn_child, + owner_agent_id, + owner_agent_name, registry, } } @@ -5279,9 +6403,15 @@ impl SubAgentToolRegistry { } } reject_subagent_terminal_takeover(name, &input)?; + let context = self + .registry + .context() + .clone() + .with_owner_agent(self.owner_agent_id.clone(), self.owner_agent_name.clone()); self.registry - .execute(name, input) + .execute_full_with_context(name, input, Some(&context)) .await + .map(|result| result.content) .map_err(|e| anyhow!(e)) } } @@ -5445,6 +6575,7 @@ fn summarize_subagent_result(result: &SubAgentResult) -> String { (SubAgentStatus::Completed, None) => "Completed (no output)".to_string(), (SubAgentStatus::Interrupted(error), _) => format!("Interrupted: {error}"), (SubAgentStatus::Cancelled, _) => "Cancelled".to_string(), + (SubAgentStatus::BudgetExhausted, _) => "Token budget exhausted".to_string(), (SubAgentStatus::Failed(error), _) => format!("Failed: {error}"), (SubAgentStatus::Running, _) => "Running".to_string(), } @@ -5457,6 +6588,7 @@ fn subagent_status_name(status: &SubAgentStatus) -> &'static str { SubAgentStatus::Interrupted(_) => "interrupted", SubAgentStatus::Failed(_) => "failed", SubAgentStatus::Cancelled => "cancelled", + SubAgentStatus::BudgetExhausted => "budget_exhausted", } } diff --git a/crates/tui/src/tools/subagent/tests.rs b/crates/tui/src/tools/subagent/tests.rs index 125789c226..2e876df019 100644 --- a/crates/tui/src/tools/subagent/tests.rs +++ b/crates/tui/src/tools/subagent/tests.rs @@ -1,6 +1,7 @@ use super::*; use crate::worker_profile::ShellPolicy; -use axum::{Json, Router, routing::post}; +use axum::{Json, Router, http::StatusCode, response::IntoResponse, routing::post}; +use std::collections::HashSet; use std::process::Command; use std::sync::atomic::{AtomicUsize, Ordering}; use tempfile::tempdir; @@ -153,6 +154,113 @@ fn headless_worker_record_tracks_lifecycle_without_tui_projection() { ); } +#[test] +fn worker_record_usage_accumulates_provider_tokens() { + let tmp = tempdir().expect("tempdir"); + let mut manager = SubAgentManager::new(tmp.path().to_path_buf(), 4); + manager.register_worker(make_worker_spec("agent_usage", tmp.path().to_path_buf())); + + manager.record_worker_usage( + "agent_usage", + &Usage { + input_tokens: 100, + output_tokens: 25, + prompt_cache_hit_tokens: Some(70), + prompt_cache_miss_tokens: Some(30), + ..Usage::default() + }, + ); + manager.record_worker_usage( + "agent_usage", + &Usage { + input_tokens: 40, + output_tokens: 10, + ..Usage::default() + }, + ); + + let record = manager + .get_worker_record("agent_usage") + .expect("worker record"); + assert_eq!(record.usage.status, "reported"); + assert_eq!(record.usage.input_tokens, Some(140)); + assert_eq!(record.usage.output_tokens, Some(35)); + assert_eq!(record.usage.total_tokens, Some(175)); + assert_eq!(record.usage.token_budget, None); + assert!( + record.usage.note.contains("175 tokens"), + "usage note includes reported total: {}", + record.usage.note + ); +} + +#[test] +fn token_budget_scope_is_shared_across_nested_workers_and_blocks_when_spent() { + let tmp = tempdir().expect("tempdir"); + let workspace = tmp.path().to_path_buf(); + let mut manager = + SubAgentManager::new(workspace.clone(), 4).with_default_token_budget(Some(100)); + + manager.register_worker(make_worker_spec("agent_root", workspace.clone())); + let root_scope = manager + .resolve_spawn_budget_scope("agent_root", None, None) + .expect("root budget resolves") + .expect("root budget present"); + manager.attach_budget_scope("agent_root", root_scope); + manager.record_worker_usage( + "agent_root", + &Usage { + input_tokens: 40, + output_tokens: 10, + ..Usage::default() + }, + ); + + let mut child_spec = make_worker_spec("agent_child", workspace); + child_spec.parent_run_id = Some("agent_root".to_string()); + let child_scope = manager + .resolve_spawn_budget_scope("agent_child", Some("agent_root"), None) + .expect("child inherits budget") + .expect("child budget present"); + assert_eq!(child_scope.scope_id, "agent_root"); + assert_eq!(child_scope.limit, 100); + assert_eq!(child_scope.spent, 50); + manager.register_worker(child_spec); + manager.attach_budget_scope("agent_child", child_scope); + manager.record_worker_usage( + "agent_child", + &Usage { + input_tokens: 30, + output_tokens: 20, + ..Usage::default() + }, + ); + + let root = manager.get_worker_record("agent_root").expect("root"); + let child = manager.get_worker_record("agent_child").expect("child"); + assert_eq!(root.usage.budget_spent_tokens, Some(100)); + assert_eq!(child.usage.budget_spent_tokens, Some(100)); + assert_eq!(root.usage.budget_remaining_tokens, Some(0)); + assert_eq!(child.usage.budget_remaining_tokens, Some(0)); + assert_eq!(root.usage.status, "budget_exhausted"); + + let err = manager + .resolve_spawn_budget_scope("agent_grandchild", Some("agent_child"), None) + .expect_err("spent shared budget blocks further child spawn"); + assert!( + err.to_string().contains("token budget exhausted"), + "actionable exhaustion error: {err}" + ); + + let override_scope = manager + .resolve_spawn_budget_scope("agent_override", Some("agent_child"), Some(20)) + .expect("explicit override starts new scope") + .expect("override budget present"); + assert_eq!(override_scope.scope_id, "agent_override"); + assert_eq!(override_scope.limit, 20); + assert_eq!(override_scope.spent, 0); +} + #[test] fn agent_worker_profile_derives_from_parent_without_escalation() { let mut runtime = stub_runtime(); @@ -389,6 +497,71 @@ async fn delayed_chat_client( (client, calls, bodies) } +async fn transient_header_timeout_then_success_chat_client( + response_text: &str, +) -> (DeepSeekClient, Arc) { + let calls = Arc::new(AtomicUsize::new(0)); + let response_text = response_text.to_string(); + let app = Router::new().route( + "/{*path}", + post({ + let calls = Arc::clone(&calls); + move |Json(_body): Json| { + let calls = Arc::clone(&calls); + let response_text = response_text.clone(); + async move { + let attempt = calls.fetch_add(1, Ordering::SeqCst) + 1; + if attempt == 1 { + return ( + StatusCode::BAD_REQUEST, + Json(json!({ + "error": { + "message": "SSE stream request did not receive response headers after 45s" + } + })), + ) + .into_response(); + } + Json(json!({ + "id": format!("chatcmpl-test-{attempt}"), + "model": "deepseek-v4-flash", + "choices": [{ + "index": 0, + "message": { + "role": "assistant", + "content": response_text + }, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": 1, + "completion_tokens": 1, + "total_tokens": 2 + } + })) + .into_response() + } + } + }), + ); + + let listener = tokio::net::TcpListener::bind("127.0.0.1:0") + .await + .expect("bind fake transient chat server"); + let addr = listener.local_addr().expect("fake chat server addr"); + tokio::spawn(async move { + let _ = axum::serve(listener, app).await; + }); + + let config = crate::config::Config { + api_key: Some("test-key".to_string()), + base_url: Some(format!("http://{addr}/v1")), + ..crate::config::Config::default() + }; + let client = DeepSeekClient::new(&config).expect("fake transient chat client"); + (client, calls) +} + fn estimate_tool_description_tokens_conservative(text: &str) -> usize { text.chars().count().div_ceil(3) } @@ -580,7 +753,9 @@ fn agent_description_explains_background_child_and_transcript_handle() { let tool = AgentTool::new(manager, stub_runtime()); let description = tool.description(); - assert!(description.contains("Start one focused child agent task")); + assert!(description.contains("Start, inspect, peek at, or cancel focused child agent tasks")); + assert!(description.contains("runs or queues")); + assert!(description.contains("provider rate-limit")); assert!(description.contains("background")); assert!(description.contains("transcript_handle")); assert!( @@ -830,15 +1005,16 @@ fn test_parse_spawn_request_rejects_invalid_session_name() { #[test] fn test_parse_spawn_request_rejects_out_of_range_max_depth() { + let ceiling = codewhale_config::MAX_SPAWN_DEPTH_CEILING; let input = json!({ "name": "review.parser", "prompt": "inspect parser", - "max_depth": 4 + "max_depth": ceiling + 1 }); let err = parse_spawn_request(&input).expect_err("max_depth should be capped at schema range"); assert!( err.to_string() - .contains("max_depth must be between 0 and 3") + .contains(&format!("max_depth must be between 0 and {ceiling}")) ); } @@ -969,6 +1145,33 @@ fn test_delegate_defaults_to_fork_context() { assert!(!parsed.fork_context); } +#[test] +fn spawn_request_parses_token_budget_override() { + let parsed = parse_spawn_request(&json!({ + "prompt": "fan out safely", + "token_budget": 12_345 + })) + .expect("token budget parses"); + assert_eq!(parsed.token_budget, Some(12_345)); + + let parsed = parse_spawn_request(&json!({ + "prompt": "fleet-shaped alias", + "max_tokens": 4_000 + })) + .expect("max_tokens alias parses"); + assert_eq!(parsed.token_budget, Some(4_000)); + + let err = parse_spawn_request(&json!({ + "prompt": "bad budget", + "token_budget": 0 + })) + .expect_err("zero budget is invalid in tool input"); + assert!( + err.to_string().contains("must be greater than zero"), + "clear token budget error: {err}" + ); +} + #[test] fn forked_subagent_messages_preserve_parent_prefix_then_append_task() { let parent_system = SystemPrompt::Text("parent system".to_string()); @@ -1154,6 +1357,13 @@ fn subagent_tool_schemas_advertise_real_type_and_role_vocabulary() { "thinking description should teach child thinking control: {thinking}" ); assert!(agent_schema["properties"].get("model").is_some()); + let worktree = schema_property_description(&agent_schema, "worktree"); + assert!( + worktree.contains("git worktree") && worktree.contains("parallel edit"), + "worktree description should teach isolated parallel edits: {worktree}" + ); + assert!(agent_schema["properties"].get("worktree_branch").is_some()); + assert!(agent_schema["properties"].get("worktree_path").is_some()); } #[test] @@ -1168,6 +1378,107 @@ fn agent_tool_prompt_schema_prefers_structured_briefs() { assert!(prompt.contains("ALREADY_KNOWN")); } +#[test] +fn agent_tool_schema_advertises_status_peek_cancel_actions() { + let tmp = tempdir().expect("tempdir"); + let manager = new_shared_subagent_manager(tmp.path().to_path_buf(), 1); + let agent_schema = AgentTool::new(manager, stub_runtime()).input_schema(); + + let action = schema_property_description(&agent_schema, "action"); + assert!(action.contains("status")); + assert!(action.contains("peek")); + assert!(action.contains("cancel")); + assert!(agent_schema["properties"].get("agent_id").is_some()); +} + +#[tokio::test] +async fn agent_tool_status_returns_running_child_projection() { + let tmp = tempdir().expect("tempdir"); + let manager = Arc::new(RwLock::new(SubAgentManager::new( + tmp.path().to_path_buf(), + 2, + ))); + let agent_id = "agent_status_probe".to_string(); + let (input_tx, _input_rx) = mpsc::unbounded_channel(); + let mut agent = SubAgent::new( + agent_id.clone(), + SubAgentType::General, + "probe".to_string(), + make_assignment(), + "deepseek-v4-flash".to_string(), + None, + None, + input_tx, + tmp.path().to_path_buf(), + manager.read().await.current_session_boot_id.clone(), + ); + agent.status = SubAgentStatus::Running; + { + let mut manager_guard = manager.write().await; + manager_guard.agents.insert(agent_id.clone(), agent); + manager_guard.register_worker(make_worker_spec(&agent_id, tmp.path().to_path_buf())); + manager_guard + .record_worker_progress(&agent_id, "step 1: requesting model response".to_string()); + } + + let tool = AgentTool::new(Arc::clone(&manager), stub_runtime()); + let context = ToolContext::new(tmp.path()); + let result = tool + .execute(json!({"action": "status", "agent_id": agent_id}), &context) + .await + .expect("status action succeeds"); + + assert_eq!(result.metadata.as_ref().unwrap()["action"], json!("status")); + assert!(result.content.contains("agent_status_probe")); + assert!(result.content.contains("running")); + assert!(result.content.contains("transcript_handle")); +} + +#[tokio::test] +async fn agent_tool_cancel_stops_running_child() { + let tmp = tempdir().expect("tempdir"); + let manager = Arc::new(RwLock::new(SubAgentManager::new( + tmp.path().to_path_buf(), + 2, + ))); + let agent_id = "agent_cancel_probe".to_string(); + let (input_tx, _input_rx) = mpsc::unbounded_channel(); + let mut agent = SubAgent::new( + agent_id.clone(), + SubAgentType::General, + "cancel".to_string(), + make_assignment(), + "deepseek-v4-flash".to_string(), + None, + None, + input_tx, + tmp.path().to_path_buf(), + manager.read().await.current_session_boot_id.clone(), + ); + agent.status = SubAgentStatus::Running; + { + let mut manager_guard = manager.write().await; + manager_guard.agents.insert(agent_id.clone(), agent); + manager_guard.register_worker(make_worker_spec(&agent_id, tmp.path().to_path_buf())); + } + + let tool = AgentTool::new(Arc::clone(&manager), stub_runtime()); + let context = ToolContext::new(tmp.path()); + let result = tool + .execute(json!({"action": "cancel", "agent_id": agent_id}), &context) + .await + .expect("cancel action succeeds"); + + assert_eq!(result.metadata.as_ref().unwrap()["action"], json!("cancel")); + assert!(result.content.contains("cancelled")); + let snapshot = manager + .read() + .await + .get_result("agent_cancel_probe") + .expect("agent remains listed"); + assert_eq!(snapshot.status, SubAgentStatus::Cancelled); +} + #[test] fn test_parse_spawn_request_rejects_conflicting_type_and_role() { let input = json!({ @@ -1612,6 +1923,7 @@ async fn api_timeout_preserves_checkpoint_and_returns_needs_input_without_parkin fork_context: false, started_at: Instant::now(), max_steps: 3, + token_budget: None, input_rx: task_input_rx, launch_gate: None, }; @@ -1728,6 +2040,86 @@ async fn api_timeout_preserves_checkpoint_and_returns_needs_input_without_parkin ); } +#[test] +fn transient_provider_classifier_matches_sse_header_timeout() { + let err = anyhow::anyhow!("SSE stream request did not receive response headers after 45s"); + + assert!(is_transient_subagent_provider_error(&err)); +} + +#[tokio::test] +async fn subagent_retries_transient_provider_header_timeout_before_succeeding() { + let tmp = tempdir().expect("tempdir"); + let manager = Arc::new(RwLock::new(SubAgentManager::new( + tmp.path().to_path_buf(), + 2, + ))); + let agent_id = "agent_transient_provider_retry".to_string(); + let (task_input_tx, task_input_rx) = mpsc::unbounded_channel(); + let agent = SubAgent::new( + agent_id.clone(), + SubAgentType::General, + "Inspect transient provider recovery".to_string(), + make_assignment(), + "deepseek-v4-flash".to_string(), + Some("Blue".to_string()), + Some(vec![]), + task_input_tx, + tmp.path().to_path_buf(), + "boot_test".to_string(), + ); + { + let mut manager = manager.write().await; + manager.agents.insert(agent_id.clone(), agent); + manager.register_worker(make_worker_spec(&agent_id, tmp.path().to_path_buf())); + } + + let (client, calls) = + transient_header_timeout_then_success_chat_client("recovered answer").await; + let mut runtime = stub_runtime().with_step_api_timeout(Duration::from_secs(5)); + runtime.client = client; + runtime.manager = Arc::clone(&manager); + runtime.context = ToolContext::new(tmp.path()); + + let task = SubAgentTask { + manager_handle: Arc::clone(&manager), + runtime, + agent_id: agent_id.clone(), + agent_type: SubAgentType::General, + prompt: "Inspect transient provider recovery".to_string(), + assignment: make_assignment(), + allowed_tools: Some(vec![]), + fork_context: false, + started_at: Instant::now(), + max_steps: 3, + token_budget: None, + input_rx: task_input_rx, + launch_gate: None, + }; + + tokio::time::timeout( + Duration::from_secs(10), + tokio::spawn(run_subagent_task(task)), + ) + .await + .expect("sub-agent task should finish") + .expect("sub-agent join should succeed"); + + assert_eq!( + calls.load(Ordering::SeqCst), + 2, + "one transient provider failure should be retried exactly once" + ); + let snapshot = { + let manager = manager.read().await; + manager + .get_result(&agent_id) + .expect("agent should stay registered") + }; + assert_eq!(snapshot.status, SubAgentStatus::Completed); + assert_eq!(snapshot.result.as_deref(), Some("recovered answer")); +} + #[tokio::test] async fn spawn_duplicate_session_name_error_names_conflicting_agent() { // #2656: the duplicate-name error must identify the conflicting agent so a @@ -1870,6 +2262,76 @@ async fn test_running_count_counts_running_agents_until_status_reconciles() { assert_eq!(manager.running_count(), 1); } +#[tokio::test] +async fn admission_limit_counts_queued_and_running_workers_separately() { + let mut manager = SubAgentManager::new(PathBuf::from("."), 2).with_admission_limit(4); + let mut handles = Vec::new(); + + for (agent_id, queued) in [ + ("agent_admit_a", false), + ("agent_admit_b", false), + ("agent_admit_c", true), + ("agent_admit_d", true), + ] { + let (input_tx, _input_rx) = mpsc::unbounded_channel(); + let mut agent = SubAgent::new( + agent_id.to_string(), + SubAgentType::Explore, + "prompt".to_string(), + make_assignment(), + "deepseek-v4-flash".to_string(), + Some("Blue".to_string()), + Some(vec!["read_file".to_string()]), + input_tx, + PathBuf::from("."), + "boot_test".to_string(), + ); + agent.status = SubAgentStatus::Running; + agent.task_handle = Some(tokio::spawn(async { + tokio::time::sleep(Duration::from_secs(60)).await; + })); + handles.push(agent_id.to_string()); + manager.agents.insert(agent_id.to_string(), agent); + manager.register_worker(make_worker_spec(agent_id, PathBuf::from("."))); + if queued { + manager.record_worker_event( + agent_id, + AgentWorkerStatus::Queued, + Some(SUBAGENT_QUEUED_LAUNCH_REASON.to_string()), + None, + None, + ); + } + + if manager.admitted_count() < 4 { + manager + .check_admission_capacity() + .expect("admission remains below total ceiling"); + } + } + + assert_eq!(manager.admitted_count(), 4); + assert_eq!(manager.active_count(), 2); + assert_eq!(manager.queued_count(), 2); + let err = manager + .check_admission_capacity() + .expect_err("admission ceiling rejects fifth worker"); + let msg = err.to_string(); + assert!( + msg.contains("max_admitted 4") && msg.contains("running 2") && msg.contains("queued 2"), + "error distinguishes running vs queued counts: {msg}" + ); + + for agent_id in handles { + manager + .agents + .get_mut(&agent_id) + .and_then(|agent| agent.task_handle.take()) + .expect("live task handle") + .abort(); + } +} + #[tokio::test] async fn cleanup_auto_cancels_stale_running_agent_and_releases_slot() { let mut manager = SubAgentManager::new(PathBuf::from("."), 1) @@ -2160,6 +2622,42 @@ fn parse_spawn_request_extracts_cwd_when_present() { ); } +#[test] +fn parse_spawn_request_accepts_worktree_isolation() { + let input = json!({ + "prompt": "build feature A", + "worktree": true, + "worktree_branch": "codex/agent-feature-a", + "worktree_path": "feature-a", + "worktree_base": "HEAD" + }); + let parsed = parse_spawn_request(&input).expect("spawn request should parse"); + let worktree = parsed.worktree.expect("worktree request"); + assert_eq!(worktree.branch.as_deref(), Some("codex/agent-feature-a")); + assert_eq!(worktree.base_ref.as_deref(), Some("HEAD")); + assert_eq!( + worktree + .path + .as_ref() + .map(|p| p.to_string_lossy().to_string()), + Some("feature-a".to_string()) + ); +} + +#[test] +fn parse_spawn_request_rejects_cwd_with_worktree_isolation() { + let input = json!({ + "prompt": "build feature A", + "cwd": ".worktrees/manual", + "worktree": true + }); + let err = parse_spawn_request(&input).expect_err("cwd and worktree should conflict"); + assert!( + err.to_string().contains("either cwd or worktree"), + "unexpected error: {err}" + ); +} + #[test] fn parse_spawn_request_cwd_absent_yields_none() { let input = json!({ "prompt": "no cwd" }); @@ -2174,6 +2672,59 @@ fn parse_spawn_request_cwd_empty_string_yields_none() { assert!(parsed.cwd.is_none(), "whitespace-only cwd should be None"); } +#[test] +fn create_isolated_worktree_creates_branch_checkout_outside_parent_repo() { + let repo = init_subagent_git_repo(); + let worktree_home = tempdir().expect("worktree home"); + let request = SubAgentWorktreeRequest { + branch: Some("codex/agent-isolated-test".to_string()), + path: Some(worktree_home.path().join("isolated")), + base_ref: None, + }; + + let path = create_isolated_worktree( + repo.path(), + &request, + Some("isolated-test"), + &SubAgentType::Implementer, + ) + .expect("worktree should be created"); + + assert!(path.exists(), "worktree path should exist"); + assert!( + !path.starts_with(repo.path()), + "generated worktree must be outside the parent checkout" + ); + assert_eq!( + current_git_branch(&path).as_deref(), + Some("codex/agent-isolated-test") + ); +} + +#[test] +fn create_isolated_worktree_rejects_invalid_branch_as_input() { + let repo = init_subagent_git_repo(); + let worktree_home = tempdir().expect("worktree home"); + let request = SubAgentWorktreeRequest { + branch: Some("bad branch name".to_string()), + path: Some(worktree_home.path().join("isolated")), + base_ref: None, + }; + + let err = create_isolated_worktree( + repo.path(), + &request, + Some("isolated-test"), + &SubAgentType::Implementer, + ) + .expect_err("invalid branch should fail"); + + assert!( + err.to_string().contains("Invalid worktree_branch"), + "unexpected error: {err}" + ); +} + #[test] fn build_subagent_system_prompt_appends_role_when_set() { let assignment = SubAgentAssignment::new("p".to_string(), Some("worker".to_string())); @@ -3370,6 +3921,81 @@ fn emit_parent_completion_dropped_receiver_does_not_panic() { ); } +#[test] +fn terminal_results_excluding_returns_only_current_root_undelivered_agents() { + let tmp = tempdir().expect("tempdir"); + let mut manager = SubAgentManager::new(tmp.path().to_path_buf(), 4); + let current_boot = manager.current_session_boot_id.clone(); + let (input_tx, _input_rx) = mpsc::unbounded_channel(); + + let mut root = SubAgent::new( + "agent_root_done".to_string(), + SubAgentType::General, + "root".to_string(), + make_assignment(), + "deepseek-v4-flash".to_string(), + None, + None, + input_tx.clone(), + tmp.path().to_path_buf(), + current_boot.clone(), + ); + root.status = SubAgentStatus::Completed; + root.result = Some("root result".to_string()); + + let mut nested = SubAgent::new( + "agent_nested_done".to_string(), + SubAgentType::General, + "nested".to_string(), + make_assignment(), + "deepseek-v4-flash".to_string(), + None, + None, + input_tx.clone(), + tmp.path().to_path_buf(), + current_boot, + ); + nested.status = SubAgentStatus::Completed; + + let mut prior = SubAgent::new( + "agent_prior_done".to_string(), + SubAgentType::General, + "prior".to_string(), + make_assignment(), + "deepseek-v4-flash".to_string(), + None, + None, + input_tx, + tmp.path().to_path_buf(), + "prior_boot".to_string(), + ); + prior.status = SubAgentStatus::Completed; + + manager.agents.insert(root.id.clone(), root); + manager.agents.insert(nested.id.clone(), nested); + manager.agents.insert(prior.id.clone(), prior); + + manager.register_worker(make_worker_spec( + "agent_root_done", + tmp.path().to_path_buf(), + )); + let mut nested_spec = make_worker_spec("agent_nested_done", tmp.path().to_path_buf()); + nested_spec.parent_run_id = Some("agent_root_parent".to_string()); + manager.register_worker(nested_spec); + manager.register_worker(make_worker_spec( + "agent_prior_done", + tmp.path().to_path_buf(), + )); + + let delivered = HashSet::from(["agent_already_delivered".to_string()]); + let results = manager.terminal_results_excluding(&delivered); + assert_eq!(results.len(), 1); + assert_eq!(results[0].agent_id, "agent_root_done"); + + let delivered = HashSet::from(["agent_root_done".to_string()]); + assert!(manager.terminal_results_excluding(&delivered).is_empty()); +} + #[tokio::test] async fn run_subagent_task_emits_parent_completion_before_terminal_update() { let manager = Arc::new(RwLock::new(SubAgentManager::new(PathBuf::from("."), 2))); @@ -3405,6 +4031,7 @@ async fn run_subagent_task_emits_parent_completion_before_terminal_update() { fork_context: false, started_at: Instant::now(), max_steps: 0, + token_budget: None, input_rx: task_input_rx, launch_gate: None, }; @@ -3862,6 +4489,7 @@ async fn launch_gate_queues_extra_direct_children() { fork_context: false, started_at: Instant::now(), max_steps: 1, + token_budget: None, input_rx, launch_gate: gate, }; @@ -3927,3 +4555,224 @@ async fn launch_gate_queues_extra_direct_children() { "queued child must not start until a permit frees: {messages:?}" ); } + +/// Stub chat server that always replies with a final assistant text whose +/// `usage` reports the given token counts. Returns the client plus a call +/// counter so tests can assert how many model turns ran before a budget cap +/// fired. Mirrors `delayed_chat_client` but with configurable usage and no +/// artificial latency. +async fn token_heavy_chat_client( + prompt_tokens: u64, + completion_tokens: u64, + response_text: &str, +) -> (DeepSeekClient, Arc) { + let calls = Arc::new(AtomicUsize::new(0)); + let response_text = response_text.to_string(); + let app = Router::new().route( + "/{*path}", + post({ + let calls = Arc::clone(&calls); + let response_text = response_text.clone(); + move |Json(_body): Json| { + let calls = Arc::clone(&calls); + let response_text = response_text.clone(); + async move { + let attempt = calls.fetch_add(1, Ordering::SeqCst) + 1; + Json(json!({ + "id": format!("chatcmpl-budget-{attempt}"), + "model": "deepseek-v4-flash", + "choices": [{ + "index": 0, + "message": { + "role": "assistant", + "content": response_text + }, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": prompt_tokens, + "completion_tokens": completion_tokens, + "total_tokens": prompt_tokens + completion_tokens + } + })) + } + } + }), + ); + + let listener = tokio::net::TcpListener::bind("127.0.0.1:0") + .await + .expect("bind fake chat server"); + let addr = listener.local_addr().expect("fake chat server addr"); + tokio::spawn(async move { + let _ = axum::serve(listener, app).await; + }); + + let config = crate::config::Config { + api_key: Some("test-key".to_string()), + base_url: Some(format!("http://{addr}/v1")), + ..crate::config::Config::default() + }; + let client = DeepSeekClient::new(&config).expect("fake chat client"); + (client, calls) +} + +/// Shared scaffolding for the per-worker token-budget runtime tests: spins up +/// a general worker against `token_heavy_chat_client` with the given cap and +/// returns the manager, agent id, call counter, and spawned task handle. +async fn spawn_budget_capped_worker( + workspace: &Path, + prompt_tokens: u64, + completion_tokens: u64, + token_budget: Option, + max_steps: u32, +) -> ( + Arc>, + String, + Arc, + tokio::task::JoinHandle<()>, +) { + let manager = Arc::new(RwLock::new(SubAgentManager::new( + workspace.to_path_buf(), + 2, + ))); + let agent_id = "agent_budget_worker".to_string(); + let (task_input_tx, task_input_rx) = mpsc::unbounded_channel(); + let agent = SubAgent::new( + agent_id.clone(), + SubAgentType::General, + "Work within budget".to_string(), + make_assignment(), + "deepseek-v4-flash".to_string(), + Some("Budget".to_string()), + Some(vec![]), + task_input_tx, + workspace.to_path_buf(), + "boot_budget".to_string(), + ); + { + let mut manager = manager.write().await; + manager.agents.insert(agent_id.clone(), agent); + manager.register_worker(make_worker_spec(&agent_id, workspace.to_path_buf())); + } + + let (client, calls) = + token_heavy_chat_client(prompt_tokens, completion_tokens, "partial answer").await; + let mut runtime = stub_runtime(); + runtime.client = client; + runtime.manager = Arc::clone(&manager); + runtime.context = ToolContext::new(workspace.to_path_buf()); + + let task = SubAgentTask { + manager_handle: Arc::clone(&manager), + runtime: runtime.clone(), + agent_id: agent_id.clone(), + agent_type: SubAgentType::General, + prompt: "Work within budget".to_string(), + assignment: make_assignment(), + allowed_tools: Some(vec![]), + fork_context: false, + started_at: Instant::now(), + max_steps, + token_budget, + input_rx: task_input_rx, + launch_gate: None, + }; + let task_handle = tokio::spawn(run_subagent_task(task)); + (manager, agent_id, calls, task_handle) +} + +#[tokio::test] +async fn worker_stops_when_per_worker_token_budget_exceeded() { + let tmp = tempdir().expect("tempdir"); + // 100 tokens/turn (60 in + 40 out) vs a 50-token cap: the worker must + // stop with `BudgetExhausted` after its very first model turn instead of + // running on to `max_steps`. + let (manager, agent_id, calls, task_handle) = + spawn_budget_capped_worker(tmp.path(), 60, 40, Some(50), 4).await; + + tokio::time::timeout(Duration::from_secs(5), task_handle) + .await + .expect("budget-capped worker must terminate") + .expect("task should finish"); + + assert_eq!( + calls.load(Ordering::SeqCst), + 1, + "worker must stop after the first over-budget turn, not run to max_steps" + ); + + let result = { + let manager = manager.read().await; + manager.get_result(&agent_id).expect("agent registered") + }; + assert!( + matches!(result.status, SubAgentStatus::BudgetExhausted), + "expected BudgetExhausted, got {:?}", + result.status + ); +} + +#[tokio::test] +async fn worker_without_per_worker_token_budget_runs_to_completion() { + let tmp = tempdir().expect("tempdir"); + // No per-worker cap: a final-text response completes the worker normally + // even though each turn reports 100 tokens. + let (manager, agent_id, calls, task_handle) = + spawn_budget_capped_worker(tmp.path(), 60, 40, None, 4).await; + + tokio::time::timeout(Duration::from_secs(5), task_handle) + .await + .expect("uncapped worker must terminate") + .expect("task should finish"); + + assert_eq!(calls.load(Ordering::SeqCst), 1); + + let result = { + let manager = manager.read().await; + manager.get_result(&agent_id).expect("agent registered") + }; + assert!( + matches!(result.status, SubAgentStatus::Completed), + "uncapped worker should complete normally, got {:?}", + result.status + ); +} + +#[tokio::test] +async fn per_worker_token_budget_does_not_double_count_scope_accounting() { + let tmp = tempdir().expect("tempdir"); + // The per-worker runtime cap stops the worker, but the scope-level + // accounting (#3319 `aggregate_budget_spent` sums worker_records' + // `total_tokens`) must reflect the tokens actually consumed exactly once + // — never inflated by the runtime accumulator that triggered the stop. + let (manager, agent_id, calls, task_handle) = + spawn_budget_capped_worker(tmp.path(), 60, 40, Some(50), 4).await; + + tokio::time::timeout(Duration::from_secs(5), task_handle) + .await + .expect("budget-capped worker must terminate") + .expect("task should finish"); + + assert_eq!(calls.load(Ordering::SeqCst), 1); + + let (result, worker_record) = { + let manager = manager.read().await; + ( + manager.get_result(&agent_id).expect("agent registered"), + manager.get_worker_record(&agent_id).expect("worker record"), + ) + }; + assert!( + matches!(result.status, SubAgentStatus::BudgetExhausted), + "expected BudgetExhausted, got {:?}", + result.status + ); + // One turn of 60 in + 40 out = 100 tokens, counted exactly once. + assert_eq!( + worker_record.usage.total_tokens, + Some(100), + "scope accounting must equal the single turn's tokens, not double-count: {:?}", + worker_record.usage + ); +} diff --git a/crates/tui/src/tools/verifier.rs b/crates/tui/src/tools/verifier.rs index 3e452f5945..db2293a85c 100644 --- a/crates/tui/src/tools/verifier.rs +++ b/crates/tui/src/tools/verifier.rs @@ -1122,8 +1122,28 @@ fn char_boundary_index(text: &str, max_chars: usize) -> usize { mod tests { use super::*; use crate::tools::shell::ShellStatus; + use std::time::Duration; use tempfile::tempdir; + const BACKGROUND_COMPLETION_WAIT_MS: u64 = 30_000; + + fn wait_for_completed_shell( + manager: &mut crate::tools::shell::ShellManager, + task_id: &str, + ) -> crate::tools::shell::ShellResult { + let deadline = Instant::now() + Duration::from_millis(BACKGROUND_COMPLETION_WAIT_MS); + + loop { + let result = manager + .get_output(task_id, true, 1_000) + .expect("background output"); + if result.status != ShellStatus::Running || Instant::now() >= deadline { + return result; + } + std::thread::sleep(Duration::from_millis(50)); + } + } + #[test] fn run_verifiers_requires_user_approval() { let tool = RunVerifiersTool; @@ -1316,12 +1336,10 @@ mod tests { Some("nonblocking") ); - let output = ctx - .shell_manager - .lock() - .expect("shell manager") - .get_output(task_id, true, 10_000) - .expect("background output"); + let output = wait_for_completed_shell( + &mut ctx.shell_manager.lock().expect("shell manager"), + task_id, + ); assert_eq!(output.status, ShellStatus::Completed); assert!( output.stdout.contains("rustc"), diff --git a/crates/tui/src/tui/active_cell.rs b/crates/tui/src/tui/active_cell.rs index f99efb2bf2..72264f9015 100644 --- a/crates/tui/src/tui/active_cell.rs +++ b/crates/tui/src/tui/active_cell.rs @@ -333,6 +333,8 @@ mod tests { output: None, live_output: None, shell_task_id: None, + owner_agent_id: None, + owner_agent_name: None, started_at: Some(Instant::now()), duration_ms: None, source: ExecSource::Assistant, diff --git a/crates/tui/src/tui/app.rs b/crates/tui/src/tui/app.rs index 207e468f16..cb57ed819f 100644 --- a/crates/tui/src/tui/app.rs +++ b/crates/tui/src/tui/app.rs @@ -343,7 +343,7 @@ impl ReasoningEffort { #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum SidebarFocus { Auto, - Work, + Pinned, Tasks, Agents, Context, @@ -396,7 +396,7 @@ impl SidebarFocus { #[must_use] pub fn from_setting(value: &str) -> Self { match value.trim().to_ascii_lowercase().as_str() { - "work" | "plan" | "todos" => Self::Work, + "pinned" | "visible" | "show" | "on" | "work" | "plan" | "todos" => Self::Pinned, "tasks" => Self::Tasks, "agents" | "subagents" | "sub-agents" => Self::Agents, "context" | "session" => Self::Context, @@ -410,7 +410,7 @@ impl SidebarFocus { pub fn as_setting(self) -> &'static str { match self { Self::Auto => "auto", - Self::Work => "work", + Self::Pinned => "pinned", Self::Tasks => "tasks", Self::Agents => "agents", Self::Context => "context", @@ -1570,6 +1570,8 @@ pub struct App { pub sidebar_resize_anchor_width: u16, /// Last sidebar area rendered (for mouse hit-testing the resize handle). pub last_sidebar_area: Option, + /// Last total chat/sidebar width considered for sidebar rendering. + pub last_sidebar_host_width: Option, /// Handle rect painted on the left edge of the sidebar (1 col). pub last_sidebar_handle_area: Option, /// Total horizontal space (chat + sidebar) used to compute the percentage @@ -1972,6 +1974,8 @@ pub struct TaskPanelEntry { pub kind: TaskPanelEntryKind, pub stale: bool, pub elapsed_since_output_ms: Option, + pub owner_agent_id: Option, + pub owner_agent_name: Option, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -2410,6 +2414,7 @@ impl App { sidebar_resize_anchor_x: 0, sidebar_resize_anchor_width: 0, last_sidebar_area: None, + last_sidebar_host_width: None, last_sidebar_handle_area: None, sidebar_resize_total_width: 0, sidebar_width_dirty: false, @@ -5570,6 +5575,14 @@ pub enum AppAction { }, UpdateCompaction(CompactionConfig), UpdateStreamChunkTimeout(u64), + UpdateSubagentRuntimeConfig { + enabled: bool, + max_subagents: usize, + launch_concurrency: usize, + max_spawn_depth: u32, + api_timeout_secs: u64, + heartbeat_timeout_secs: u64, + }, OpenContextInspector, CompactContext, PurgeContext, @@ -6106,17 +6119,18 @@ mod tests { } #[test] - fn sidebar_focus_accepts_work_and_maps_legacy_trackers_to_work() { + fn sidebar_focus_accepts_pinned_and_maps_legacy_trackers_to_pinned() { assert_eq!(SidebarFocus::from_setting("auto"), SidebarFocus::Auto); - assert_eq!(SidebarFocus::from_setting("work"), SidebarFocus::Work); - assert_eq!(SidebarFocus::from_setting("plan"), SidebarFocus::Work); - assert_eq!(SidebarFocus::from_setting("todos"), SidebarFocus::Work); + assert_eq!(SidebarFocus::from_setting("pinned"), SidebarFocus::Pinned); + assert_eq!(SidebarFocus::from_setting("work"), SidebarFocus::Pinned); + assert_eq!(SidebarFocus::from_setting("plan"), SidebarFocus::Pinned); + assert_eq!(SidebarFocus::from_setting("todos"), SidebarFocus::Pinned); assert_eq!(SidebarFocus::from_setting("tasks"), SidebarFocus::Tasks); assert_eq!(SidebarFocus::from_setting("agents"), SidebarFocus::Agents); assert_eq!(SidebarFocus::from_setting("context"), SidebarFocus::Context); assert_eq!(SidebarFocus::from_setting("hidden"), SidebarFocus::Hidden); assert_eq!(SidebarFocus::from_setting("off"), SidebarFocus::Hidden); - assert_eq!(SidebarFocus::Work.as_setting(), "work"); + assert_eq!(SidebarFocus::Pinned.as_setting(), "pinned"); assert_eq!(SidebarFocus::Hidden.as_setting(), "hidden"); } diff --git a/crates/tui/src/tui/approval.rs b/crates/tui/src/tui/approval.rs index c5167028d0..f227e13695 100644 --- a/crates/tui/src/tui/approval.rs +++ b/crates/tui/src/tui/approval.rs @@ -30,6 +30,7 @@ use crate::localization::Locale; use crate::sandbox::SandboxPolicy; use crate::tui::views::{ModalKind, ModalView, ViewAction, ViewEvent}; use crate::tui::widgets::{ApprovalWidget, ElevationWidget, Renderable}; +use codewhale_config::ToolAskRule; use crossterm::event::{KeyCode, KeyEvent}; use serde_json::Value; use std::path::{Path, PathBuf}; @@ -138,6 +139,8 @@ pub struct ApprovalRequest { /// Displayed in the approval view so users understand *why* the change /// is being made before reviewing *what* will change. pub intent_summary: Option, + /// Ask-only persistent rules that can be saved with the approval. + pub persistent_ask_rules: Vec, } /// Key approval details rendered prominently in the approval card. @@ -193,6 +196,7 @@ impl ApprovalRequest { Some(summary.to_string()) } }), + persistent_ask_rules: build_persistent_ask_rules(tool_name, params), } } @@ -218,6 +222,22 @@ impl ApprovalRequest { } } + #[must_use] + pub fn can_save_ask_rule(&self) -> bool { + !self.persistent_ask_rules.is_empty() + } + + #[must_use] + pub fn ask_rule_preview(&self) -> Option { + if self.persistent_ask_rules.is_empty() { + return None; + } + let permissions = codewhale_config::PermissionsToml { + rules: self.persistent_ask_rules.clone(), + }; + toml::to_string_pretty(&permissions).ok() + } + /// Extract the most important params for the approval card. #[must_use] pub fn prominent_detail_items(&self, locale: Locale) -> Vec { @@ -231,6 +251,22 @@ impl ApprovalRequest { } } +#[must_use] +fn build_persistent_ask_rules(tool_name: &str, params: &Value) -> Vec { + if tool_name != "exec_shell" { + return Vec::new(); + } + let Some(command) = params + .get("command") + .and_then(Value::as_str) + .map(str::trim) + .filter(|command| !command.is_empty()) + else { + return Vec::new(); + }; + vec![ToolAskRule::exec_shell(command)] +} + /// Get the category for a tool by name pub fn get_tool_category(name: &str) -> ToolCategory { if matches!(name, "write_file" | "edit_file" | "apply_patch") { @@ -888,6 +924,15 @@ impl ApprovalView { } fn emit_decision(&self, decision: ReviewDecision, timed_out: bool) -> ViewAction { + self.emit_decision_with_rules(decision, timed_out, Vec::new()) + } + + fn emit_decision_with_rules( + &self, + decision: ReviewDecision, + timed_out: bool, + persistent_ask_rules: Vec, + ) -> ViewAction { ViewAction::EmitAndClose(ViewEvent::ApprovalDecision { tool_id: self.request.id.clone(), tool_name: self.request.tool_name.clone(), @@ -895,6 +940,7 @@ impl ApprovalView { timed_out, approval_key: self.request.approval_key.clone(), approval_grouping_key: self.request.approval_grouping_key.clone(), + persistent_ask_rules, }) } @@ -947,6 +993,12 @@ impl ModalView for ApprovalView { KeyCode::Char('a') | KeyCode::Char('A') | KeyCode::Char('2') => { self.commit_option(ApprovalOption::ApproveAlways) } + KeyCode::Char('s') | KeyCode::Char('S') if self.request.can_save_ask_rule() => self + .emit_decision_with_rules( + ReviewDecision::Approved, + false, + self.request.persistent_ask_rules.clone(), + ), KeyCode::Char('n') | KeyCode::Char('N') | KeyCode::Char('d') @@ -1261,6 +1313,16 @@ mod tests { ) } + fn shell_request() -> ApprovalRequest { + ApprovalRequest::new( + "test-id", + "exec_shell", + "Run a shell command", + &json!({"command": "cargo test --workspace"}), + "tool:exec_shell", + ) + } + // ======================================================================== // Tool Category Tests // ======================================================================== @@ -1549,6 +1611,28 @@ mod tests { assert_eq!(view.risk(), RiskLevel::Benign); } + #[test] + fn exec_shell_request_builds_ask_rule_preview() { + let request = shell_request(); + + assert_eq!( + request.persistent_ask_rules, + vec![ToolAskRule::exec_shell("cargo test --workspace")] + ); + let preview = request.ask_rule_preview().expect("preview"); + assert!(preview.contains("[[rules]]")); + assert!(preview.contains("tool = \"exec_shell\"")); + assert!(preview.contains("command = \"cargo test --workspace\"")); + } + + #[test] + fn non_shell_request_has_no_persistent_ask_rules() { + let request = destructive_request(); + + assert!(request.persistent_ask_rules.is_empty()); + assert_eq!(request.ask_rule_preview(), None); + } + #[test] fn tab_toggles_collapsed_card_so_transcript_stays_visible() { // Regression for PR #1455 / @tiger-dog: the approval modal @@ -1609,6 +1693,36 @@ mod tests { } } + #[test] + fn save_ask_rule_shortcut_approves_once_with_rule() { + let mut view = ApprovalView::new(shell_request()); + + let action = view.handle_key(create_key_event(KeyCode::Char('s'))); + let ViewAction::EmitAndClose(ViewEvent::ApprovalDecision { + decision, + persistent_ask_rules, + .. + }) = action + else { + panic!("expected approval decision"); + }; + + assert_eq!(decision, ReviewDecision::Approved); + assert_eq!( + persistent_ask_rules, + vec![ToolAskRule::exec_shell("cargo test --workspace")] + ); + } + + #[test] + fn save_ask_rule_shortcut_is_ignored_without_rule() { + let mut view = ApprovalView::new(benign_request()); + + let action = view.handle_key(create_key_event(KeyCode::Char('s'))); + + assert!(matches!(action, ViewAction::None)); + } + #[test] fn benign_one_key_approves_via_numeric_pad() { let mut view = ApprovalView::new(benign_request()); diff --git a/crates/tui/src/tui/composer_ui.rs b/crates/tui/src/tui/composer_ui.rs index a73cbd80bf..ef4a835c1a 100644 --- a/crates/tui/src/tui/composer_ui.rs +++ b/crates/tui/src/tui/composer_ui.rs @@ -173,6 +173,20 @@ pub(crate) fn is_composer_newline_key(key: KeyEvent) -> bool { } } +pub(crate) fn is_forced_submit_key(key: KeyEvent) -> bool { + match key.code { + KeyCode::Enter => key.modifiers.contains(KeyModifiers::CONTROL), + // Several terminals encode Ctrl+Enter / Cmd+Enter as Ctrl+J. Keep + // Ctrl+J available as a newline while idle, but let the event loop use + // this helper to force a live steer when a turn is already running. + KeyCode::Char('j') | KeyCode::Char('J') => { + key.modifiers.contains(KeyModifiers::CONTROL) + && !key.modifiers.contains(KeyModifiers::ALT) + } + _ => false, + } +} + pub(crate) fn handle_history_search_key(app: &mut App, key: KeyEvent) { match key.code { KeyCode::Enter => { diff --git a/crates/tui/src/tui/history.rs b/crates/tui/src/tui/history.rs index 89e4c9740d..558286c7cd 100644 --- a/crates/tui/src/tui/history.rs +++ b/crates/tui/src/tui/history.rs @@ -1125,6 +1125,8 @@ pub struct ExecCell { pub output: Option, pub live_output: Option, pub shell_task_id: Option, + pub owner_agent_id: Option, + pub owner_agent_name: Option, pub started_at: Option, pub duration_ms: Option, pub source: ExecSource, @@ -1189,6 +1191,19 @@ impl ExecCell { )); } + if let Some(owner) = self + .owner_agent_name + .as_deref() + .or(self.owner_agent_id.as_deref()) + { + lines.extend(render_compact_kv( + "owner", + owner, + Style::default().fg(palette::TEXT_MUTED), + width, + )); + } + if let Some(interaction) = self.interaction.as_ref() { lines.extend(wrap_plain_line( &format!(" {interaction}"), @@ -4674,6 +4689,8 @@ mod tests { output: None, live_output: None, shell_task_id: None, + owner_agent_id: None, + owner_agent_name: None, started_at, duration_ms: None, source: ExecSource::Assistant, @@ -5015,6 +5032,8 @@ mod tests { output: Some("a\nb\n".to_string()), live_output: None, shell_task_id: None, + owner_agent_id: None, + owner_agent_name: None, started_at: None, duration_ms: Some(10), source: ExecSource::Assistant, @@ -5047,6 +5066,8 @@ mod tests { output: None, live_output: None, shell_task_id: None, + owner_agent_id: None, + owner_agent_name: None, started_at: None, duration_ms: None, source: ExecSource::Assistant, @@ -5365,6 +5386,8 @@ mod tests { output: Some("boom".to_string()), live_output: None, shell_task_id: None, + owner_agent_id: None, + owner_agent_name: None, started_at: None, duration_ms: Some(42), source: ExecSource::Assistant, @@ -5428,6 +5451,8 @@ mod tests { output: None, live_output: Some("running line 1\nrunning line 2".to_string()), shell_task_id: Some("shell_live".to_string()), + owner_agent_id: None, + owner_agent_name: None, started_at: None, duration_ms: None, source: ExecSource::Assistant, @@ -5450,6 +5475,8 @@ mod tests { output: Some("final output".to_string()), live_output: Some("stale live tail".to_string()), shell_task_id: Some("shell_live".to_string()), + owner_agent_id: None, + owner_agent_name: None, started_at: None, duration_ms: None, source: ExecSource::Assistant, @@ -5581,6 +5608,8 @@ mod tests { output: Some(output), live_output: None, shell_task_id: None, + owner_agent_id: None, + owner_agent_name: None, started_at: None, duration_ms: Some(120), source: ExecSource::Assistant, @@ -5642,6 +5671,8 @@ mod tests { output: Some(output), live_output: None, shell_task_id: None, + owner_agent_id: None, + owner_agent_name: None, started_at: None, duration_ms: Some(120), source: ExecSource::Assistant, @@ -6112,6 +6143,8 @@ mod tests { output: Some("ok".to_string()), live_output: None, shell_task_id: None, + owner_agent_id: None, + owner_agent_name: None, started_at: None, duration_ms: None, source: ExecSource::Assistant, diff --git a/crates/tui/src/tui/hotbar/actions.rs b/crates/tui/src/tui/hotbar/actions.rs index 949e321cff..91c3c22972 100644 --- a/crates/tui/src/tui/hotbar/actions.rs +++ b/crates/tui/src/tui/hotbar/actions.rs @@ -250,8 +250,8 @@ impl HotbarAction for AppHotbarAction { } AppHotbarKind::SidebarToggle => { if app.sidebar_focus == SidebarFocus::Hidden { - app.set_sidebar_focus(SidebarFocus::Auto); - app.status_message = Some("Sidebar focus: auto".to_string()); + app.set_sidebar_focus(SidebarFocus::Pinned); + app.status_message = Some("Sidebar focus: pinned".to_string()); } else { app.set_sidebar_focus(SidebarFocus::Hidden); app.status_message = Some("Sidebar hidden".to_string()); @@ -610,6 +610,7 @@ mod tests { let registry = HotbarActionRegistry::with_builtins(); let sidebar = registry.get("sidebar.toggle").expect("sidebar action"); let mut app = test_app(); + app.sidebar_focus = SidebarFocus::Pinned; assert!(sidebar.is_active(&app)); assert_eq!( @@ -620,7 +621,7 @@ mod tests { assert!(!sidebar.is_active(&app)); sidebar.dispatch(&mut app).expect("dispatch sidebar show"); - assert_eq!(app.sidebar_focus, SidebarFocus::Auto); + assert_eq!(app.sidebar_focus, SidebarFocus::Pinned); assert!(sidebar.is_active(&app)); } diff --git a/crates/tui/src/tui/keybindings.rs b/crates/tui/src/tui/keybindings.rs index 96b12fa277..3458434f5d 100644 --- a/crates/tui/src/tui/keybindings.rs +++ b/crates/tui/src/tui/keybindings.rs @@ -190,6 +190,11 @@ pub const KEYBINDINGS: &[KeybindingEntry] = &[ description_id: crate::localization::MessageId::KbCommandPalette, section: KeybindingSection::Submission, }, + KeybindingEntry { + chord: "Ctrl+X (Tasks sidebar)", + description_id: crate::localization::MessageId::KbCancelBackgroundShellJobs, + section: KeybindingSection::Submission, + }, KeybindingEntry { chord: "Ctrl+P", description_id: crate::localization::MessageId::KbFuzzyFilePicker, @@ -251,11 +256,6 @@ pub const KEYBINDINGS: &[KeybindingEntry] = &[ description_id: crate::localization::MessageId::KbFocusSidebar, section: KeybindingSection::Modes, }, - KeybindingEntry { - chord: "Ctrl+X", - description_id: crate::localization::MessageId::KbTogglePlanAgent, - section: KeybindingSection::Modes, - }, // --- Sessions --- KeybindingEntry { chord: "Ctrl+R", @@ -356,6 +356,19 @@ mod tests { ); } + #[test] + fn ctrl_x_tasks_sidebar_cancel_all_is_documented() { + let ctrl_x_tasks = KEYBINDINGS + .iter() + .find(|entry| entry.chord == "Ctrl+X (Tasks sidebar)") + .expect("Ctrl+X Tasks sidebar keybinding should be documented"); + + assert_eq!( + ctrl_x_tasks.description_id, + crate::localization::MessageId::KbCancelBackgroundShellJobs + ); + } + #[test] fn section_rank_is_a_total_order() { let sections = [ diff --git a/crates/tui/src/tui/onboarding/mod.rs b/crates/tui/src/tui/onboarding/mod.rs index 71e675685f..2068325b3a 100644 --- a/crates/tui/src/tui/onboarding/mod.rs +++ b/crates/tui/src/tui/onboarding/mod.rs @@ -18,6 +18,8 @@ use ratatui::{ use crate::palette; use crate::tui::app::{App, OnboardingState}; +const ONBOARDED_MARKER_FILE: &str = ".onboarded"; + pub fn render(f: &mut Frame, area: Rect, app: &App) { let block = Block::default().style(Style::default().bg(palette::DEEPSEEK_INK)); f.render_widget(block, area); @@ -128,13 +130,19 @@ pub fn tips_lines(app: &App) -> Vec> { } pub fn default_marker_path() -> Option { - dirs::home_dir().map(|home| { - let primary = home.join(".codewhale").join(".onboarded"); - if primary.exists() { - return primary; - } - home.join(".deepseek").join(".onboarded") - }) + crate::config::effective_home_dir().map(|home| marker_path_with_home(&home)) +} + +fn marker_path_with_home(home: &Path) -> PathBuf { + let primary = home.join(".codewhale").join(ONBOARDED_MARKER_FILE); + if primary.exists() { + return primary; + } + let legacy = home.join(".deepseek").join(ONBOARDED_MARKER_FILE); + if legacy.exists() { + return legacy; + } + primary } pub fn is_onboarded() -> bool { @@ -142,9 +150,14 @@ pub fn is_onboarded() -> bool { } pub fn mark_onboarded() -> std::io::Result { - let path = default_marker_path().ok_or_else(|| { + let home = crate::config::effective_home_dir().ok_or_else(|| { std::io::Error::new(std::io::ErrorKind::NotFound, "Home directory not found") })?; + mark_onboarded_at_home(&home) +} + +fn mark_onboarded_at_home(home: &Path) -> std::io::Result { + let path = marker_path_with_home(home); if let Some(parent) = path.parent() { std::fs::create_dir_all(parent)?; } @@ -257,6 +270,49 @@ pub fn sync_api_key_validation_status(app: &mut App, show_empty_error: bool) { mod tests { use super::*; + #[test] + fn fresh_install_marker_path_uses_codewhale_not_legacy() { + let tmp = tempfile::tempdir().expect("tempdir"); + + let expected = tmp.path().join(".codewhale").join(ONBOARDED_MARKER_FILE); + assert_eq!(marker_path_with_home(tmp.path()), expected); + + let written = mark_onboarded_at_home(tmp.path()).expect("mark onboarded"); + assert_eq!(written, expected); + assert!(expected.exists()); + assert!( + !tmp.path().join(".deepseek").exists(), + "fresh onboarding must not recreate the legacy .deepseek dir" + ); + } + + #[test] + fn existing_legacy_marker_is_preserved() { + let tmp = tempfile::tempdir().expect("tempdir"); + let legacy = tmp.path().join(".deepseek").join(ONBOARDED_MARKER_FILE); + std::fs::create_dir_all(legacy.parent().expect("legacy parent")).expect("mkdir legacy"); + std::fs::write(&legacy, "").expect("seed legacy marker"); + + assert_eq!(marker_path_with_home(tmp.path()), legacy); + assert_eq!( + mark_onboarded_at_home(tmp.path()).expect("mark onboarded"), + legacy + ); + } + + #[test] + fn codewhale_marker_wins_over_legacy_marker() { + let tmp = tempfile::tempdir().expect("tempdir"); + let primary = tmp.path().join(".codewhale").join(ONBOARDED_MARKER_FILE); + let legacy = tmp.path().join(".deepseek").join(ONBOARDED_MARKER_FILE); + for marker in [&primary, &legacy] { + std::fs::create_dir_all(marker.parent().expect("marker parent")).expect("mkdir"); + std::fs::write(marker, "").expect("seed marker"); + } + + assert_eq!(marker_path_with_home(tmp.path()), primary); + } + #[test] fn validate_rejects_empty_or_whitespace() { assert!(matches!( diff --git a/crates/tui/src/tui/shell_job_routing.rs b/crates/tui/src/tui/shell_job_routing.rs index bf915df895..0e2b68bc9e 100644 --- a/crates/tui/src/tui/shell_job_routing.rs +++ b/crates/tui/src/tui/shell_job_routing.rs @@ -173,6 +173,8 @@ mod tests { stale: true, elapsed_since_output_ms: None, linked_task_id: Some("task_1".to_string()), + owner_agent_id: None, + owner_agent_name: None, }]; let formatted = format_shell_job_list(&jobs); assert!(formatted.contains("Bash jobs (1)")); diff --git a/crates/tui/src/tui/sidebar.rs b/crates/tui/src/tui/sidebar.rs index ab007faa33..53422a8c84 100644 --- a/crates/tui/src/tui/sidebar.rs +++ b/crates/tui/src/tui/sidebar.rs @@ -1,4 +1,4 @@ -//! Sidebar rendering — Work / Tasks / Agents / Context panels. +//! Sidebar rendering — Pinned / Tasks / Agents / Context panels. //! //! Extracted from `tui/ui.rs` (P1.2). The sidebar appears to the right of //! the chat transcript when the available width allows it. Each section @@ -57,7 +57,7 @@ pub fn render_sidebar(f: &mut Frame, area: Rect, app: &mut App) { match app.sidebar_focus { SidebarFocus::Auto => render_sidebar_auto(f, area, app), - SidebarFocus::Work => render_sidebar_work(f, area, app), + SidebarFocus::Pinned => render_sidebar_pinned(f, area, app), SidebarFocus::Tasks => render_sidebar_tasks(f, area, app), SidebarFocus::Agents => render_sidebar_subagents(f, area, app), SidebarFocus::Context => render_context_panel(f, area, app), @@ -72,7 +72,22 @@ pub fn render_sidebar(f: &mut Frame, area: Rect, app: &mut App) { /// useful content, or as the one quiet empty state when nothing else is active. fn render_sidebar_auto(f: &mut Frame, area: Rect, app: &mut App) { let visible = auto_sidebar_panels(auto_sidebar_state(app)); + render_sidebar_panel_stack(f, area, app, &visible); +} + +/// Build the pinned panel stack. This uses the same content-sensitive panels +/// as Auto, but it never participates in idle auto-collapse. +fn render_sidebar_pinned(f: &mut Frame, area: Rect, app: &mut App) { + let visible = auto_sidebar_panels(auto_sidebar_state(app)); + render_sidebar_panel_stack(f, area, app, &visible); +} +fn render_sidebar_panel_stack( + f: &mut Frame, + area: Rect, + app: &mut App, + visible: &[AutoSidebarPanel], +) { let constraints: Vec = match visible.len() { 1 => vec![Constraint::Min(0)], 2 => vec![Constraint::Percentage(50), Constraint::Min(0)], @@ -1110,12 +1125,12 @@ fn task_panel_rows( .any(|task| task.id.starts_with("shell_") && task.status == "running"); let hint_action = if stale_running_shells.len() == 1 { Some(( - "Ctrl+K -> cancel stale job".to_string(), + "Ctrl+X -> cancel stale job".to_string(), format!("/jobs cancel {}", stale_running_shells[0].id), )) } else if any_running_shell { Some(( - "Ctrl+K -> /jobs cancel-all".to_string(), + "Ctrl+X -> /jobs cancel-all".to_string(), "/jobs cancel-all".to_string(), )) } else { @@ -1245,9 +1260,9 @@ fn task_panel_hover_texts(app: &App, max_rows: usize) -> Vec { .iter() .any(|task| task.id.starts_with("shell_") && task.status == "running"); if stale_running_shells == 1 { - texts.push("Ctrl+K -> cancel stale job".to_string()); + texts.push("Ctrl+X -> cancel stale job".to_string()); } else if any_running_shell { - texts.push("Ctrl+K -> /jobs cancel-all".to_string()); + texts.push("Ctrl+X -> /jobs cancel-all".to_string()); } } } @@ -1370,6 +1385,13 @@ fn push_reasoning_row_hover_texts( fn background_task_labels(task: &TaskPanelEntry, duration: &str) -> (String, String) { let stale_label = stale_no_output_label(task); + let owner_label = task + .owner_agent_name + .as_deref() + .or(task.owner_agent_id.as_deref()) + .filter(|owner| !owner.trim().is_empty()) + .map(|owner| format!("by {owner}")) + .unwrap_or_default(); let status = stale_label .as_ref() .map(|label| format!("{} ({label})", task.status)) @@ -1381,6 +1403,7 @@ fn background_task_labels(task: &TaskPanelEntry, duration: &str) -> (String, Str format!("Bash {status} {command} {duration}"), compact_join([ format!("{} \u{00B7} Bash", task.id), + owner_label, stale_label.unwrap_or_default(), ]), ); @@ -1393,7 +1416,11 @@ fn background_task_labels(task: &TaskPanelEntry, duration: &str) -> (String, Str status, duration ), - compact_join([task.prompt_summary.clone(), stale_label.unwrap_or_default()]), + compact_join([ + task.prompt_summary.clone(), + owner_label, + stale_label.unwrap_or_default(), + ]), ) } @@ -2382,6 +2409,7 @@ fn subagent_status_text(status: &SubAgentStatus) -> &'static str { SubAgentStatus::Interrupted(_) => "interrupted", SubAgentStatus::Failed(_) => "failed", SubAgentStatus::Cancelled => "canceled", + SubAgentStatus::BudgetExhausted => "budget", } } @@ -3001,11 +3029,11 @@ mod tests { SidebarHoverSection, SidebarHoverState, SidebarSubagentSummary, SidebarToolRow, SidebarWorkChecklistItem, SidebarWorkStrategyStep, SidebarWorkSummary, ToolRowOrder, agent_row_hover_text, auto_sidebar_panels, background_task_spinner_prefix, - context_panel_cost_line, editorial_tool_rows, normalize_activity_text, sidebar_agent_rows, - sidebar_hover_rows, sidebar_work_summary, sort_sidebar_agent_rows_as_tree, - subagent_panel_hover_texts, subagent_panel_lines, subagent_panel_rows, - task_panel_hover_texts, task_panel_lines, task_panel_rows, work_panel_empty_hint, - work_panel_hover_texts, work_panel_lines, + context_panel_cost_line, editorial_tool_rows, normalize_activity_text, render_sidebar, + sidebar_agent_rows, sidebar_hover_rows, sidebar_work_summary, + sort_sidebar_agent_rows_as_tree, subagent_panel_hover_texts, subagent_panel_lines, + subagent_panel_rows, task_panel_hover_texts, task_panel_lines, task_panel_rows, + work_panel_empty_hint, work_panel_hover_texts, work_panel_lines, }; use crate::config::Config; use crate::palette; @@ -3019,7 +3047,7 @@ mod tests { use crate::tui::history::{ ExecCell, ExecSource, GenericToolCell, HistoryCell, ToolCell, ToolStatus, }; - use ratatui::text::Line; + use ratatui::{Terminal, backend::TestBackend, text::Line}; use std::path::PathBuf; use std::time::{Duration, Instant}; @@ -3183,6 +3211,44 @@ mod tests { assert_eq!(panels, vec![AutoSidebarPanel::Work]); } + #[test] + fn pinned_sidebar_renders_agents_section_when_subagents_are_active() { + let mut app = create_test_app(); + app.sidebar_focus = SidebarFocus::Pinned; + app.subagent_cache + .push(cached_agent("agent-active-1", Some("critic"))); + app.agent_progress.insert( + "agent-active-1".to_string(), + "checking sidebar visibility".to_string(), + ); + + let backend = TestBackend::new(72, 18); + let mut terminal = Terminal::new(backend).expect("terminal"); + terminal + .draw(|frame| render_sidebar(frame, frame.area(), &mut app)) + .expect("draw sidebar"); + let rendered = terminal + .backend() + .buffer() + .content() + .iter() + .map(|cell| cell.symbol()) + .collect::(); + + assert!( + rendered.contains("Agents"), + "pinned sidebar must surface active sub-agents: {rendered:?}" + ); + assert!( + rendered.contains("critic") || rendered.contains("Agent 1"), + "pinned sidebar should render the child agent label: {rendered:?}" + ); + assert!( + rendered.contains("checking sidebar visibility"), + "pinned sidebar should render child progress: {rendered:?}" + ); + } + #[test] fn work_panel_empty_hint_stays_quiet_and_truncates() { let hint = work_panel_empty_hint(10); @@ -3784,6 +3850,8 @@ mod tests { output: None, live_output: None, shell_task_id: None, + owner_agent_id: None, + owner_agent_name: None, started_at: None, duration_ms: Some(ACTIVE_TOOL_STALE_RUNNING_ROW_TTL.as_millis() as u64 + 1), source: ExecSource::Assistant, @@ -3819,6 +3887,8 @@ mod tests { output: None, live_output: None, shell_task_id: None, + owner_agent_id: None, + owner_agent_name: None, started_at: Some(std::time::Instant::now()), duration_ms: None, source: ExecSource::Assistant, @@ -3835,6 +3905,8 @@ mod tests { kind: TaskPanelEntryKind::Background, stale: false, elapsed_since_output_ms: None, + owner_agent_id: None, + owner_agent_name: None, }); let text = lines_to_text(&task_panel_lines(&app, 80, 10)); @@ -3869,6 +3941,8 @@ mod tests { kind: TaskPanelEntryKind::Background, stale: false, elapsed_since_output_ms: None, + owner_agent_id: None, + owner_agent_name: None, }); let text = lines_to_text(&task_panel_lines(&app, 96, 8)); @@ -3888,6 +3962,33 @@ mod tests { ); } + #[test] + fn tasks_panel_attributes_subagent_owned_shell_jobs() { + let mut app = create_test_app(); + app.task_panel.push(TaskPanelEntry { + id: "shell_owned".to_string(), + status: "running".to_string(), + prompt_summary: "shell: cargo test -p codewhale-tui".to_string(), + duration_ms: Some(2_000), + kind: TaskPanelEntryKind::Background, + stale: false, + elapsed_since_output_ms: None, + owner_agent_id: Some("agent_verifier".to_string()), + owner_agent_name: Some("verifier".to_string()), + }); + + let text = lines_to_text(&task_panel_lines(&app, 96, 8)); + + assert!( + text.iter().any(|line| line.contains("by verifier")), + "owned shell job should show sub-agent attribution: {text:?}" + ); + assert!( + text.iter().any(|line| line.contains("shell_owned")), + "shell id should remain visible with attribution: {text:?}" + ); + } + #[test] fn background_task_spinner_advances_at_readable_cadence() { let mut task = TaskPanelEntry { @@ -3898,6 +3999,8 @@ mod tests { kind: TaskPanelEntryKind::Background, stale: false, elapsed_since_output_ms: None, + owner_agent_id: None, + owner_agent_name: None, }; assert_eq!(background_task_spinner_prefix(&task), Some("⠋")); @@ -3921,6 +4024,8 @@ mod tests { kind: TaskPanelEntryKind::ModelReasoning, stale: false, elapsed_since_output_ms: None, + owner_agent_id: None, + owner_agent_name: None, }); let text = lines_to_text(&task_panel_lines(&app, 80, 8)); @@ -3980,6 +4085,8 @@ mod tests { kind: TaskPanelEntryKind::ModelReasoning, stale: false, elapsed_since_output_ms: None, + owner_agent_id: None, + owner_agent_name: None, }); app.task_panel.push(TaskPanelEntry { id: "shell_live".to_string(), @@ -3989,6 +4096,8 @@ mod tests { kind: TaskPanelEntryKind::Background, stale: false, elapsed_since_output_ms: None, + owner_agent_id: None, + owner_agent_name: None, }); let text = lines_to_text(&task_panel_lines(&app, 96, 12)); @@ -4029,6 +4138,8 @@ mod tests { kind: TaskPanelEntryKind::Background, stale: false, elapsed_since_output_ms: None, + owner_agent_id: None, + owner_agent_name: None, }); let (lines, actions) = task_panel_rows(&app, 80, 12); @@ -4067,6 +4178,8 @@ mod tests { kind: TaskPanelEntryKind::Background, stale: true, elapsed_since_output_ms: Some(61_000), + owner_agent_id: None, + owner_agent_name: None, }); let (lines, actions) = task_panel_rows(&app, 80, 12); @@ -4108,6 +4221,8 @@ mod tests { kind: TaskPanelEntryKind::Background, stale: false, elapsed_since_output_ms: None, + owner_agent_id: None, + owner_agent_name: None, }); app.task_panel.push(TaskPanelEntry { id: "task_bbb".to_string(), @@ -4117,6 +4232,8 @@ mod tests { kind: TaskPanelEntryKind::Background, stale: false, elapsed_since_output_ms: None, + owner_agent_id: None, + owner_agent_name: None, }); let (lines, actions) = task_panel_rows(&app, 96, 16); @@ -4165,7 +4282,7 @@ mod tests { let hint_idx = text .iter() - .position(|line| line.contains("Ctrl+K")) + .position(|line| line.contains("Ctrl+X")) .expect("cancel-all hint row"); assert_eq!(actions[hint_idx].as_deref(), Some("/jobs cancel-all")); } @@ -4182,6 +4299,8 @@ mod tests { kind: TaskPanelEntryKind::Background, stale: false, elapsed_since_output_ms: None, + owner_agent_id: None, + owner_agent_name: None, }); let (lines, actions) = task_panel_rows(&app, 80, 12); @@ -4213,6 +4332,8 @@ mod tests { output: None, live_output: None, shell_task_id: None, + owner_agent_id: None, + owner_agent_name: None, started_at: Some(Instant::now()), duration_ms: None, source: ExecSource::Assistant, @@ -4229,6 +4350,8 @@ mod tests { kind: TaskPanelEntryKind::Background, stale: false, elapsed_since_output_ms: None, + owner_agent_id: None, + owner_agent_name: None, }); let (lines, actions) = task_panel_rows(&app, 96, 16); @@ -4539,6 +4662,8 @@ mod tests { output: Some("Lint pending\nTest pending".to_string()), live_output: None, shell_task_id: None, + owner_agent_id: None, + owner_agent_name: None, started_at: None, duration_ms: Some(15_000), source: ExecSource::Assistant, @@ -4582,6 +4707,8 @@ mod tests { output: Some("test failed".to_string()), live_output: None, shell_task_id: None, + owner_agent_id: None, + owner_agent_name: None, started_at: None, duration_ms: Some(1_250), source: ExecSource::Assistant, @@ -4614,6 +4741,8 @@ mod tests { output: Some("Finished".to_string()), live_output: None, shell_task_id: None, + owner_agent_id: None, + owner_agent_name: None, started_at: None, duration_ms: Some(1_250), source: ExecSource::Assistant, diff --git a/crates/tui/src/tui/subagent_routing.rs b/crates/tui/src/tui/subagent_routing.rs index 98ac479308..b89c4e7ea0 100644 --- a/crates/tui/src/tui/subagent_routing.rs +++ b/crates/tui/src/tui/subagent_routing.rs @@ -168,6 +168,7 @@ fn reconcile_cards_with_snapshots(app: &mut App) { SubAgentStatus::Completed => AgentLifecycle::Completed, SubAgentStatus::Failed(_) => AgentLifecycle::Failed, SubAgentStatus::Cancelled => AgentLifecycle::Cancelled, + SubAgentStatus::BudgetExhausted => AgentLifecycle::Failed, }; Some((agent.agent_id.clone(), lifecycle)) }) @@ -217,6 +218,7 @@ fn subagent_status_rank(status: &SubAgentStatus) -> u8 { SubAgentStatus::Failed(_) => 2, SubAgentStatus::Completed => 3, SubAgentStatus::Cancelled => 4, + SubAgentStatus::BudgetExhausted => 2, } } @@ -354,6 +356,8 @@ pub(super) fn task_summary_to_panel_entry(summary: TaskSummary) -> TaskPanelEntr kind: TaskPanelEntryKind::Background, stale: false, elapsed_since_output_ms: None, + owner_agent_id: None, + owner_agent_name: None, } } diff --git a/crates/tui/src/tui/tool_routing.rs b/crates/tui/src/tui/tool_routing.rs index e7f8ee8143..febaf649ca 100644 --- a/crates/tui/src/tui/tool_routing.rs +++ b/crates/tui/src/tui/tool_routing.rs @@ -103,6 +103,8 @@ pub(super) fn handle_tool_call_started( output: None, live_output: None, shell_task_id: None, + owner_agent_id: None, + owner_agent_name: None, started_at: Some(Instant::now()), duration_ms: None, source, @@ -137,6 +139,8 @@ pub(super) fn handle_tool_call_started( output: None, live_output: None, shell_task_id: None, + owner_agent_id: None, + owner_agent_name: None, started_at: Some(Instant::now()), duration_ms: None, source, @@ -533,6 +537,20 @@ pub(super) fn handle_tool_call_complete( if shell_task_id.is_some() { exec.shell_task_id = shell_task_id; } + exec.owner_agent_id = tool_result + .metadata + .as_ref() + .and_then(|m| m.get("owner_agent_id")) + .and_then(serde_json::Value::as_str) + .filter(|agent_id| !agent_id.trim().is_empty()) + .map(str::to_string); + exec.owner_agent_name = tool_result + .metadata + .as_ref() + .and_then(|m| m.get("owner_agent_name")) + .and_then(serde_json::Value::as_str) + .filter(|agent_name| !agent_name.trim().is_empty()) + .map(str::to_string); if let Some(meta_command) = tool_result .metadata .as_ref() @@ -1322,6 +1340,8 @@ mod tests { output: None, live_output: None, shell_task_id: None, + owner_agent_id: None, + owner_agent_name: None, started_at: None, duration_ms: Some(120), source: ExecSource::Assistant, diff --git a/crates/tui/src/tui/transcript.rs b/crates/tui/src/tui/transcript.rs index f101401d33..68fa259ec2 100644 --- a/crates/tui/src/tui/transcript.rs +++ b/crates/tui/src/tui/transcript.rs @@ -618,6 +618,8 @@ mod tests { output: None, live_output: None, shell_task_id: None, + owner_agent_id: None, + owner_agent_name: None, started_at: None, duration_ms: None, source: ExecSource::Assistant, diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index 51a2e87380..8e24d8e8b0 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -178,7 +178,7 @@ const TOOL_HANG_WATCHDOG_TIMEOUT: Duration = Duration::from_secs(900); // the per-tool spinner pulse — keep this fast enough that the spout reads as // motion (~12 fps) instead of teleport-frames. const UI_STATUS_ANIMATION_MS: u64 = 80; -const SIDEBAR_VISIBLE_MIN_WIDTH: u16 = 100; +pub(crate) const SIDEBAR_VISIBLE_MIN_WIDTH: u16 = 100; const DEFAULT_TERMINAL_PROBE_TIMEOUT_MS: u64 = 500; const PERIODIC_FULL_REPAINT_EVERY_N: u64 = 50; const TURN_META_PREFIX: &str = ""; @@ -225,6 +225,51 @@ fn should_auto_approve_approval_request( || app.approval_mode == ApprovalMode::Auto) } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum SidebarRenderState { + Hidden, + SuppressedByWidth { + available_width: u16, + min_width: u16, + }, + AutoCollapsed, + Visible, +} + +pub(crate) fn sidebar_render_state(app: &mut App) -> SidebarRenderState { + if app.sidebar_focus == SidebarFocus::Hidden { + return SidebarRenderState::Hidden; + } + + if let Some(available_width) = sidebar_host_width_hint(app) + && available_width < SIDEBAR_VISIBLE_MIN_WIDTH + { + return SidebarRenderState::SuppressedByWidth { + available_width, + min_width: SIDEBAR_VISIBLE_MIN_WIDTH, + }; + } + + if crate::tui::sidebar::sidebar_auto_idle(app) { + return SidebarRenderState::AutoCollapsed; + } + + SidebarRenderState::Visible +} + +fn sidebar_host_width_hint(app: &App) -> Option { + app.last_sidebar_host_width.or_else(|| { + let transcript_width = app.viewport.last_transcript_area.map(|area| area.width)?; + let sidebar_width = app + .viewport + .last_sidebar_area + .or(app.last_sidebar_area) + .map(|area| area.width) + .unwrap_or(0); + Some(transcript_width.saturating_add(sidebar_width)) + }) +} + fn sidebar_width_for_chat_area(app: &App, chat_width: u16) -> Option { if app.sidebar_focus == SidebarFocus::Hidden || chat_width < SIDEBAR_VISIBLE_MIN_WIDTH { return None; @@ -265,6 +310,12 @@ enum TranslationEvent { // TurnComplete / focus-gain / resize. The alt-screen buffer's double-buffering // plus ratatui's `terminal.clear()` are sufficient to repaint cleanly. const TERMINAL_ORIGIN_RESET: &[u8] = b"\x1b[r\x1b[?6l\x1b[H"; +// Xterm alternate-scroll mode keeps wheel events inside the alternate-screen +// viewport. Crossterm's mouse-capture command does not enable this DEC private +// mode, so terminals can still scroll the host scrollback if mouse capture is +// disabled, dropped during focus changes, or unavailable in the host. +const ENABLE_ALT_SCROLL_MODE: &[u8] = b"\x1b[?1007h"; +const DISABLE_ALT_SCROLL_MODE: &[u8] = b"\x1b[?1007l"; /// Begin synchronized update (DEC 2026): tell the terminal to defer /// rendering until END_SYNC_UPDATE is received. Best-effort — /// terminals that don't support this silently ignore the sequence. @@ -808,6 +859,7 @@ pub async fn run_tui(config: &Config, options: TuiOptions) -> Result<()> { cleanup_guard.defused = true; pop_keyboard_enhancement_flags(terminal.backend_mut()); + disable_alternate_scroll_mode(terminal.backend_mut()); execute!(terminal.backend_mut(), DisableFocusChange)?; disable_raw_mode()?; if use_alt_screen { @@ -998,6 +1050,7 @@ impl Drop for TerminalCleanupGuard { let mut stdout = io::stdout(); pop_keyboard_enhancement_flags(&mut stdout); + disable_alternate_scroll_mode(&mut stdout); let _ = execute!(stdout, DisableFocusChange); let _ = disable_raw_mode(); if self.use_alt_screen { @@ -1059,6 +1112,8 @@ fn handle_memory_quick_add(app: &mut App, input: &str, config: &Config) { } fn build_engine_config(app: &App, config: &Config) -> EngineConfig { + let provider = app.api_provider; + let max_subagents = app.max_subagents.clamp(1, crate::config::MAX_SUBAGENTS); EngineConfig { model: app.model.clone(), workspace: app.workspace.clone(), @@ -1086,8 +1141,12 @@ fn build_engine_config(app: &App, config: &Config) -> EngineConfig { // model stops emitting tool calls. A real runaway is rare and // human-noticeable; we trust the operator over a hard step cap. max_steps: u32::MAX, - max_subagents: app.max_subagents, - launch_concurrency: config.launch_concurrency(), + max_subagents, + max_admitted_subagents: config + .max_admitted_subagents_for_provider(provider) + .max(max_subagents), + launch_concurrency: config.launch_concurrency_for_provider(provider), + subagents_enabled: config.subagents_enabled_for_provider(provider), features: config.features(), compaction: app.compaction_config(), todos: app.todos.clone(), @@ -1097,7 +1156,8 @@ fn build_engine_config(app: &App, config: &Config) -> EngineConfig { app.hunt.token_budget, app.hunt.verdict.goal_status(), ), - max_spawn_depth: crate::tools::subagent::DEFAULT_MAX_SPAWN_DEPTH, + max_spawn_depth: config.subagent_max_spawn_depth_for_provider(provider), + subagent_token_budget: config.subagent_token_budget_for_provider(provider), allowed_tools: app.active_allowed_tools.clone(), disallowed_tools: None, hook_executor: app.runtime_services.hook_executor.clone(), @@ -1115,9 +1175,13 @@ fn build_engine_config(app: &App, config: &Config) -> EngineConfig { .map(crate::config::LspConfigToml::into_runtime), runtime_services: app.runtime_services.clone(), subagent_model_overrides: config.subagent_model_overrides(), - subagent_api_timeout: Duration::from_secs(config.subagent_api_timeout_secs()), + subagent_api_timeout: Duration::from_secs( + config.subagent_api_timeout_secs_for_provider(provider), + ), stream_chunk_timeout: Duration::from_secs(app.stream_chunk_timeout_secs), - subagent_heartbeat_timeout: Duration::from_secs(config.subagent_heartbeat_timeout_secs()), + subagent_heartbeat_timeout: Duration::from_secs( + config.subagent_heartbeat_timeout_secs_for_provider(provider), + ), prefer_bwrap: config.prefer_bwrap.unwrap_or(false), memory_enabled: config.memory_enabled(), memory_path: config.memory_path(), @@ -1221,6 +1285,8 @@ async fn refresh_active_task_panel(app: &mut App, task_manager: &SharedTaskManag kind: TaskPanelEntryKind::Background, stale: job.stale, elapsed_since_output_ms: job.elapsed_since_output_ms, + owner_agent_id: job.owner_agent_id, + owner_agent_name: job.owner_agent_name, }); } } @@ -1334,6 +1400,8 @@ fn active_reasoning_task_entries(app: &App) -> Vec { kind: TaskPanelEntryKind::ModelReasoning, stale: false, elapsed_since_output_ms: None, + owner_agent_id: None, + owner_agent_name: None, }), _ => None, }) @@ -1375,6 +1443,8 @@ fn active_rlm_task_entries(app: &App) -> Vec { kind: TaskPanelEntryKind::Background, stale: false, elapsed_since_output_ms: None, + owner_agent_id: None, + owner_agent_name: None, }) }) .collect() @@ -3588,20 +3658,14 @@ async fn run_event_loop( continue; } + if key.code == KeyCode::Char('x') + && key.modifiers.contains(KeyModifiers::CONTROL) + && prefill_jobs_cancel_all_if_tasks_sidebar(app) + { + continue; + } + if key.code == KeyCode::Char('k') && key.modifiers.contains(KeyModifiers::CONTROL) { - if app.view_stack.is_empty() - && app.sidebar_focus == SidebarFocus::Tasks - && app - .task_panel - .iter() - .any(|task| task.id.starts_with("shell_") && task.status == "running") - { - app.input = "/jobs cancel-all".to_string(); - app.cursor_position = app.input.len(); - app.status_message = - Some("Press Enter to cancel all running commands".to_string()); - continue; - } // When the composer is the active input target (no modal/pager // intercepting keys), Ctrl+K performs an emacs-style kill to // end-of-line. If the kill is a no-op (cursor at end of empty @@ -3926,8 +3990,8 @@ async fn run_event_loop( if key.modifiers.contains(KeyModifiers::ALT) && key_shortcuts::has_control_like_modifier(key.modifiers) => { - app.set_sidebar_focus(SidebarFocus::Work); - app.status_message = Some("Sidebar focus: work".to_string()); + app.set_sidebar_focus(SidebarFocus::Pinned); + app.status_message = Some("Sidebar focus: pinned".to_string()); continue; } KeyCode::Char('2') @@ -3963,8 +4027,8 @@ async fn run_event_loop( if key.modifiers.contains(KeyModifiers::ALT) && !key.modifiers.contains(KeyModifiers::CONTROL) => { - app.set_sidebar_focus(SidebarFocus::Work); - app.status_message = Some("Sidebar focus: work".to_string()); + app.set_sidebar_focus(SidebarFocus::Pinned); + app.status_message = Some("Sidebar focus: pinned".to_string()); continue; } KeyCode::Char('@') @@ -4413,7 +4477,9 @@ async fn run_event_loop( } } // Input handling - _ if is_composer_newline_key(key) => { + _ if is_composer_newline_key(key) + && !(app.is_loading && is_forced_submit_key(key)) => + { app.insert_char('\n'); } KeyCode::Enter @@ -4426,7 +4492,12 @@ async fn run_event_loop( continue; } // #382: Ctrl+Enter forces a steer into the current turn. - KeyCode::Enter if key.modifiers.contains(KeyModifiers::CONTROL) => { + // Some terminals report Ctrl/Cmd+Enter as Ctrl+J; while a + // turn is running, accept that encoding here instead of + // inserting a newline. + _ if is_forced_submit_key(key) + && (matches!(key.code, KeyCode::Enter) || app.is_loading) => + { if let Some(input) = app.submit_input() { if handle_bang_shell_input(app, &engine_handle, &input).await? { continue; @@ -4773,13 +4844,6 @@ async fn run_event_loop( } else { app.push_status_toast("Cut failed", StatusToastLevel::Error, None); } - } else { - let new_mode = match app.mode { - AppMode::Plan => AppMode::Agent, - AppMode::Agent => AppMode::Yolo, - AppMode::Yolo => AppMode::Plan, - }; - apply_mode_update(app, &engine_handle, new_mode).await; } } _ if key_shortcuts::is_paste_shortcut(&key) => { @@ -4933,8 +4997,8 @@ fn persist_sidebar_settings_if_dirty(app: &mut App) { fn apply_alt_0_shortcut(app: &mut App, modifiers: KeyModifiers) { if modifiers.contains(KeyModifiers::CONTROL) { if app.sidebar_focus == SidebarFocus::Hidden { - app.set_sidebar_focus(SidebarFocus::Auto); - app.status_message = Some("Sidebar focus: auto".to_string()); + app.set_sidebar_focus(SidebarFocus::Pinned); + app.status_message = Some("Sidebar focus: pinned".to_string()); } else { app.set_sidebar_focus(SidebarFocus::Hidden); app.status_message = Some("Sidebar hidden".to_string()); @@ -6199,6 +6263,7 @@ async fn dispatch_user_message( dynamic_tools: Vec::new(), hook_executor: app.runtime_services.hook_executor.clone(), verbosity: app.verbosity.clone(), + provenance: crate::core::ops::UserInputProvenance::ExternalUser, }) .await { @@ -6670,6 +6735,9 @@ async fn switch_provider( let new_endpoint = display_base_url_host(&new_base_url); let cache_scope_changed = previous_provider != target || previous_model != new_model; app.api_provider = target; + app.max_subagents = config + .max_subagents_for_provider(target) + .clamp(1, crate::config::MAX_SUBAGENTS); app.provider_chain = target .kind() .map(|kind| codewhale_config::ProviderChain::new(kind, &config.fallback_providers)) @@ -7127,6 +7195,25 @@ async fn apply_command_result( .send(Op::SetStreamChunkTimeout { timeout_secs }) .await; } + AppAction::UpdateSubagentRuntimeConfig { + enabled, + max_subagents, + launch_concurrency, + max_spawn_depth, + api_timeout_secs, + heartbeat_timeout_secs, + } => { + let _ = engine_handle + .send(Op::SetSubagentRuntimeConfig { + enabled, + max_subagents, + launch_concurrency, + max_spawn_depth, + api_timeout_secs, + heartbeat_timeout_secs, + }) + .await; + } AppAction::OpenConfigEditor(mode) => match mode { ConfigUiMode::Native => { if app.view_stack.top_kind() != Some(ModalKind::Config) { @@ -8276,6 +8363,7 @@ fn render(f: &mut Frame, app: &mut App) { // Auto-reveal: in Auto focus mode, collapse the sidebar to a // full-width transcript when nothing is active; bring it back the // moment there is a To-do, a live fleet, or background jobs. + app.last_sidebar_host_width = Some(chat_area.width); let sidebar_auto_collapsed = crate::tui::sidebar::sidebar_auto_idle(app); if !sidebar_auto_collapsed && let Some(sidebar_width) = sidebar_width_for_chat_area(app, chat_area.width) @@ -8664,10 +8752,12 @@ async fn handle_view_events( timed_out, approval_key, approval_grouping_key, + persistent_ask_rules, } => { apply_approval_decision( app, engine_handle, + config, ApprovalDecisionEvent { tool_id, tool_name, @@ -8675,6 +8765,7 @@ async fn handle_view_events( timed_out, approval_key, approval_grouping_key, + persistent_ask_rules, }, ) .await; @@ -8834,6 +8925,25 @@ async fn handle_view_events( .send(Op::SetStreamChunkTimeout { timeout_secs }) .await; } + AppAction::UpdateSubagentRuntimeConfig { + enabled, + max_subagents, + launch_concurrency, + max_spawn_depth, + api_timeout_secs, + heartbeat_timeout_secs, + } => { + let _ = engine_handle + .send(Op::SetSubagentRuntimeConfig { + enabled, + max_subagents, + launch_concurrency, + max_spawn_depth, + api_timeout_secs, + heartbeat_timeout_secs, + }) + .await; + } AppAction::OpenConfigView => {} _ => {} } @@ -9014,11 +9124,13 @@ struct ApprovalDecisionEvent { timed_out: bool, approval_key: String, approval_grouping_key: String, + persistent_ask_rules: Vec, } async fn apply_approval_decision( app: &mut App, engine_handle: &mut EngineHandle, + config: &mut Config, event: ApprovalDecisionEvent, ) { if event.decision == ReviewDecision::ApprovedForSession { @@ -9031,6 +9143,15 @@ async fn apply_approval_decision( .insert(event.approval_grouping_key.clone()); } + if matches!( + event.decision, + ReviewDecision::Approved | ReviewDecision::ApprovedForSession + ) && !event.persistent_ask_rules.is_empty() + && !event.timed_out + { + persist_ask_rules_from_approval(app, config, &event.persistent_ask_rules); + } + match event.decision { ReviewDecision::Approved | ReviewDecision::ApprovedForSession => { let _ = engine_handle.approve_tool_call(event.tool_id).await; @@ -9053,6 +9174,35 @@ async fn apply_approval_decision( } } +fn persist_ask_rules_from_approval( + app: &mut App, + config: &mut Config, + rules: &[codewhale_config::ToolAskRule], +) { + match codewhale_config::ConfigStore::load(app.config_path.clone()).and_then(|mut store| { + let added = store.append_ask_rules(rules)?; + let permissions_path = store.permissions_path(); + config.exec_policy_engine = store.exec_policy_engine(); + Ok((added, permissions_path)) + }) { + Ok((added, path)) if added > 0 => { + app.status_message = Some(format!( + "Saved {added} ask permission rule(s) to {}", + path.display() + )); + } + Ok((_added, path)) => { + app.status_message = Some(format!( + "Ask permission rule already saved in {}", + path.display() + )); + } + Err(err) => { + app.status_message = Some(format!("Failed to save ask permission rule: {err:#}")); + } + } +} + fn mark_active_turn_cancelled_locally(app: &mut App) { // #2739: every local cancel surface (Esc, Ctrl+C, approval abort, paused // command abort) must snapshot before it clears turn state. Otherwise @@ -9540,6 +9690,7 @@ fn pause_terminal( // mode. Best-effort — terminals that didn't accept the flags // silently ignore the pop. Matches the shutdown and panic paths. pop_keyboard_enhancement_flags(terminal.backend_mut()); + disable_alternate_scroll_mode(terminal.backend_mut()); execute!(terminal.backend_mut(), DisableFocusChange)?; disable_raw_mode()?; if use_alt_screen { @@ -9678,6 +9829,29 @@ pub(crate) fn pop_keyboard_enhancement_flags(writer: &mut W) { let _ = execute!(writer, PopKeyboardEnhancementFlags); } +fn set_alternate_scroll_mode(writer: &mut W, enabled: bool) { + let sequence = if enabled { + ENABLE_ALT_SCROLL_MODE + } else { + DISABLE_ALT_SCROLL_MODE + }; + if let Err(err) = writer.write_all(sequence).and_then(|()| writer.flush()) { + tracing::debug!( + ?err, + enabled, + "alternate-scroll terminal mode change ignored" + ); + } +} + +fn enable_alternate_scroll_mode(writer: &mut W) { + set_alternate_scroll_mode(writer, true); +} + +fn disable_alternate_scroll_mode(writer: &mut W) { + set_alternate_scroll_mode(writer, false); +} + /// Best-effort terminal restoration for emergency exit paths /// (panic hook, signal handlers). Mirrors the normal teardown in /// `run_event_loop` but tolerates any subset of modes not actually being @@ -9688,6 +9862,7 @@ pub(crate) fn pop_keyboard_enhancement_flags(writer: &mut W) { pub fn emergency_restore_terminal() { let mut stdout = std::io::stdout(); pop_keyboard_enhancement_flags(&mut stdout); + disable_alternate_scroll_mode(&mut stdout); let _ = execute!(stdout, DisableFocusChange); let _ = execute!(stdout, DisableBracketedPaste); let _ = execute!(stdout, DisableMouseCapture); @@ -9748,6 +9923,7 @@ fn recover_terminal_modes( pop_keyboard_enhancement_flags(writer); push_keyboard_enhancement_flags(writer); + enable_alternate_scroll_mode(writer); if use_mouse_capture && let Err(err) = execute!(writer, EnableMouseCapture) { tracing::debug!(?err, "EnableMouseCapture ignored"); } @@ -9872,6 +10048,23 @@ pub(crate) fn request_foreground_shell_background(app: &mut App) { } } +pub(crate) fn prefill_jobs_cancel_all_if_tasks_sidebar(app: &mut App) -> bool { + if !app.view_stack.is_empty() + || app.sidebar_focus != SidebarFocus::Tasks + || !app + .task_panel + .iter() + .any(|task| task.id.starts_with("shell_") && task.status == "running") + { + return false; + } + + app.input = "/jobs cancel-all".to_string(); + app.cursor_position = app.input.len(); + app.status_message = Some("Press Enter to cancel all running commands".to_string()); + true +} + pub(crate) fn active_foreground_shell_running(app: &App) -> bool { app.active_cell.as_ref().is_some_and(|active| { active.entries().iter().any(|cell| { diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs index e6e90a5876..b4b0f27bde 100644 --- a/crates/tui/src/tui/ui/tests.rs +++ b/crates/tui/src/tui/ui/tests.rs @@ -78,6 +78,11 @@ struct SettingsHomeGuard { _tmp: TempDir, previous_home: Option, previous_userprofile: Option, + previous_codewhale_home: Option, + previous_deepseek_config_path: Option, + previous_xdg_config_home: Option, + previous_appdata: Option, + previous_localappdata: Option, _lock: MutexGuard<'static, ()>, } @@ -87,15 +92,31 @@ impl SettingsHomeGuard { let tmp = TempDir::new().expect("settings tempdir"); let previous_home = std::env::var_os("HOME"); let previous_userprofile = std::env::var_os("USERPROFILE"); + let previous_codewhale_home = std::env::var_os("CODEWHALE_HOME"); + let previous_deepseek_config_path = std::env::var_os("DEEPSEEK_CONFIG_PATH"); + let previous_xdg_config_home = std::env::var_os("XDG_CONFIG_HOME"); + let previous_appdata = std::env::var_os("APPDATA"); + let previous_localappdata = std::env::var_os("LOCALAPPDATA"); + let codewhale_home = tmp.path().join(".codewhale"); // Safety: test-only environment mutation guarded by a global mutex. unsafe { std::env::set_var("HOME", tmp.path()); std::env::set_var("USERPROFILE", tmp.path()); + std::env::set_var("CODEWHALE_HOME", &codewhale_home); + std::env::set_var("DEEPSEEK_CONFIG_PATH", codewhale_home.join("config.toml")); + std::env::set_var("XDG_CONFIG_HOME", tmp.path().join("xdg-config")); + std::env::set_var("APPDATA", tmp.path().join("appdata")); + std::env::set_var("LOCALAPPDATA", tmp.path().join("localappdata")); } Self { _tmp: tmp, previous_home, previous_userprofile, + previous_codewhale_home, + previous_deepseek_config_path, + previous_xdg_config_home, + previous_appdata, + previous_localappdata, _lock: lock, } } @@ -103,17 +124,26 @@ impl SettingsHomeGuard { impl Drop for SettingsHomeGuard { fn drop(&mut self) { - // Safety: test-only environment mutation guarded by a global mutex. - unsafe { - match self.previous_home.take() { - Some(previous) => std::env::set_var("HOME", previous), - None => std::env::remove_var("HOME"), - } - match self.previous_userprofile.take() { - Some(previous) => std::env::set_var("USERPROFILE", previous), - None => std::env::remove_var("USERPROFILE"), + fn restore(key: &str, previous: Option) { + // Safety: test-only environment mutation guarded by a global mutex. + unsafe { + match previous { + Some(previous) => std::env::set_var(key, previous), + None => std::env::remove_var(key), + } } } + + restore("HOME", self.previous_home.take()); + restore("USERPROFILE", self.previous_userprofile.take()); + restore("CODEWHALE_HOME", self.previous_codewhale_home.take()); + restore( + "DEEPSEEK_CONFIG_PATH", + self.previous_deepseek_config_path.take(), + ); + restore("XDG_CONFIG_HOME", self.previous_xdg_config_home.take()); + restore("APPDATA", self.previous_appdata.take()); + restore("LOCALAPPDATA", self.previous_localappdata.take()); } } @@ -174,6 +204,10 @@ fn recover_terminal_modes_emits_expected_csi_sequences_with_gating() { on.contains("\x1b[>1u") && off.contains("\x1b[>1u"), "Kitty keyboard disambiguation flag must be re-pushed regardless of gating" ); + assert!( + on.contains("\x1b[?1007h") && off.contains("\x1b[?1007h"), + "alternate-scroll mode must be re-armed regardless of mouse-capture gating" + ); assert!( on.contains("\x1b[?1000h"), @@ -202,6 +236,17 @@ fn recover_terminal_modes_runs_without_panic_on_windows() { recover_terminal_modes(&mut buf, false, false); } +#[test] +fn alternate_scroll_mode_disable_emits_xterm_reset() { + let mut buf: Vec = Vec::new(); + disable_alternate_scroll_mode(&mut buf); + let seq = String::from_utf8_lossy(&buf); + assert!( + seq.contains("\x1b[?1007l"), + "disable_alternate_scroll_mode must emit the xterm alternate-scroll reset" + ); +} + // On Windows crossterm's PushKeyboardEnhancementFlags never writes bytes // (is_ansi_code_supported() == false), so the fix writes the escape // directly. Verify the direct path emits the expected Kitty keyboard @@ -288,6 +333,34 @@ fn composer_newline_shortcuts_do_not_steal_ctrl_enter() { ))); } +#[test] +fn forced_submit_accepts_ctrl_enter_and_ctrl_j_encodings() { + assert!(is_forced_submit_key(KeyEvent::new( + KeyCode::Enter, + KeyModifiers::CONTROL, + ))); + assert!(is_forced_submit_key(KeyEvent::new( + KeyCode::Enter, + KeyModifiers::CONTROL | KeyModifiers::SHIFT, + ))); + assert!(is_forced_submit_key(KeyEvent::new( + KeyCode::Char('j'), + KeyModifiers::CONTROL, + ))); + assert!(is_forced_submit_key(KeyEvent::new( + KeyCode::Char('J'), + KeyModifiers::CONTROL | KeyModifiers::SHIFT, + ))); + assert!(!is_forced_submit_key(KeyEvent::new( + KeyCode::Char('j'), + KeyModifiers::ALT | KeyModifiers::CONTROL, + ))); + assert!(!is_forced_submit_key(KeyEvent::new( + KeyCode::Enter, + KeyModifiers::ALT, + ))); +} + #[cfg(target_os = "macos")] #[test] fn cmd_enter_normalizes_to_control_enter_not_newline() { @@ -2094,6 +2167,8 @@ fn active_tool_status_label_summarizes_live_tool_group() { output: None, live_output: None, shell_task_id: None, + owner_agent_id: None, + owner_agent_name: None, started_at: app.turn_started_at, duration_ms: None, source: ExecSource::Assistant, @@ -2134,6 +2209,8 @@ fn shell_live_output_update_matches_exact_task_id_only() { output: None, live_output: None, shell_task_id: Some("shell_a".to_string()), + owner_agent_id: None, + owner_agent_name: None, started_at: None, duration_ms: None, source: ExecSource::Assistant, @@ -2146,6 +2223,8 @@ fn shell_live_output_update_matches_exact_task_id_only() { output: None, live_output: Some("previous".to_string()), shell_task_id: Some("shell_b".to_string()), + owner_agent_id: None, + owner_agent_name: None, started_at: None, duration_ms: None, source: ExecSource::Assistant, @@ -2172,6 +2251,8 @@ fn shell_live_output_update_matches_exact_task_id_only() { stale: false, elapsed_since_output_ms: None, linked_task_id: None, + owner_agent_id: None, + owner_agent_name: None, }, ); @@ -2196,6 +2277,8 @@ fn shell_live_output_update_skips_finalized_exec_cell() { output: Some("final output".to_string()), live_output: Some("old live output".to_string()), shell_task_id: Some("shell_a".to_string()), + owner_agent_id: None, + owner_agent_name: None, started_at: None, duration_ms: Some(10), source: ExecSource::Assistant, @@ -2221,6 +2304,8 @@ fn shell_live_output_update_skips_finalized_exec_cell() { stale: false, elapsed_since_output_ms: None, linked_task_id: None, + owner_agent_id: None, + owner_agent_name: None, }, ); @@ -2241,6 +2326,8 @@ fn active_tool_status_label_strips_shell_wrappers_from_ci_polling() { output: None, live_output: None, shell_task_id: None, + owner_agent_id: None, + owner_agent_name: None, started_at: app.turn_started_at, duration_ms: None, source: ExecSource::Assistant, @@ -2457,54 +2544,10 @@ fn provider_picker_reselecting_active_provider_preserves_current_model() { #[tokio::test] async fn provider_switch_clears_turn_cache_history() { // `switch_provider` persists the new provider to `Settings`, which - // writes through `dirs::data_dir()` (`~/Library/Application - // Support/deepseek/settings.toml` on macOS). Without redirecting - // HOME / USERPROFILE we would clobber the developer's real - // preferences and leave `default_provider = "ollama"` behind — - // which then leaks into any subsequent test that constructs an - // `App`. Hold the process-wide env lock for the duration so we - // serialize with other tests that mutate the same env vars. - // Wrap the lock inside a guard struct so clippy's - // `await_holding_lock` doesn't fire on the `.await` below; the - // pattern matches other tests that guard HOME / USERPROFILE mutations. - struct HomeGuard { - _tmp: tempfile::TempDir, - prev_home: Option, - prev_userprofile: Option, - _lock: std::sync::MutexGuard<'static, ()>, - } - impl Drop for HomeGuard { - fn drop(&mut self) { - // SAFETY: still holding the process-wide env lock. - unsafe { - match self.prev_home.take() { - Some(v) => std::env::set_var("HOME", v), - None => std::env::remove_var("HOME"), - } - match self.prev_userprofile.take() { - Some(v) => std::env::set_var("USERPROFILE", v), - None => std::env::remove_var("USERPROFILE"), - } - } - } - } - let _home = { - let lock = crate::test_support::lock_test_env(); - let tmp = tempfile::TempDir::new().expect("tempdir"); - let prev_home = std::env::var_os("HOME"); - let prev_userprofile = std::env::var_os("USERPROFILE"); - // SAFETY: serialized by the process-wide test env lock. - unsafe { - std::env::set_var("HOME", tmp.path()); - std::env::set_var("USERPROFILE", tmp.path()); - } - HomeGuard { - _tmp: tmp, - prev_home, - prev_userprofile, - _lock: lock, - } - }; + // writes through settings path resolution. Without redirecting the + // CodeWhale/legacy config homes we would clobber the developer's real + // preferences and leave `default_provider = "ollama"` behind. + let _home = SettingsHomeGuard::new(); let mut app = create_test_app(); app.push_turn_cache_record(crate::tui::app::TurnCacheRecord { @@ -3715,14 +3758,14 @@ fn ctrl_alt_0_hides_sidebar() { } #[test] -fn ctrl_alt_0_restores_auto_sidebar_when_already_hidden() { +fn ctrl_alt_0_restores_pinned_sidebar_when_already_hidden() { let mut app = create_test_app(); app.sidebar_focus = SidebarFocus::Hidden; apply_alt_0_shortcut(&mut app, KeyModifiers::ALT | KeyModifiers::CONTROL); - assert_eq!(app.sidebar_focus, SidebarFocus::Auto); - assert_eq!(app.status_message.as_deref(), Some("Sidebar focus: auto")); + assert_eq!(app.sidebar_focus, SidebarFocus::Pinned); + assert_eq!(app.status_message.as_deref(), Some("Sidebar focus: pinned")); } #[test] @@ -3744,13 +3787,49 @@ fn hidden_sidebar_focus_suppresses_sidebar_split_even_when_wide() { let mut app = create_test_app(); app.sidebar_width_percent = 28; - app.sidebar_focus = SidebarFocus::Auto; + app.sidebar_focus = SidebarFocus::Pinned; assert_eq!(sidebar_width_for_chat_area(&app, 120), Some(33)); app.sidebar_focus = SidebarFocus::Hidden; assert_eq!(sidebar_width_for_chat_area(&app, 120), None); } +#[test] +fn sidebar_width_gate_suppresses_visible_focus_when_narrow() { + let mut app = create_test_app(); + app.sidebar_focus = SidebarFocus::Pinned; + app.last_sidebar_host_width = Some(80); + + assert_eq!( + sidebar_render_state(&mut app), + SidebarRenderState::SuppressedByWidth { + available_width: 80, + min_width: SIDEBAR_VISIBLE_MIN_WIDTH, + } + ); +} + +#[test] +fn pinned_sidebar_is_visible_when_idle_and_wide() { + let mut app = create_test_app(); + app.sidebar_focus = SidebarFocus::Pinned; + app.last_sidebar_host_width = Some(120); + + assert_eq!(sidebar_render_state(&mut app), SidebarRenderState::Visible); +} + +#[test] +fn auto_sidebar_status_reports_idle_collapse_when_wide() { + let mut app = create_test_app(); + app.sidebar_focus = SidebarFocus::Auto; + app.last_sidebar_host_width = Some(120); + + assert_eq!( + sidebar_render_state(&mut app), + SidebarRenderState::AutoCollapsed + ); +} + #[test] fn sidebar_auto_idle_collapses_when_nothing_active() { let mut app = create_test_app(); @@ -3791,6 +3870,8 @@ fn jobs_panel_ignores_model_reasoning_but_shows_for_real_jobs() { kind: crate::tui::app::TaskPanelEntryKind::ModelReasoning, stale: false, elapsed_since_output_ms: None, + owner_agent_id: None, + owner_agent_name: None, }]; assert!( crate::tui::sidebar::sidebar_auto_idle(&mut app), @@ -3806,6 +3887,8 @@ fn jobs_panel_ignores_model_reasoning_but_shows_for_real_jobs() { kind: crate::tui::app::TaskPanelEntryKind::Background, stale: false, elapsed_since_output_ms: None, + owner_agent_id: None, + owner_agent_name: None, }); assert!( crate::tui::sidebar::sidebar_auto_idle(&mut app), @@ -3821,6 +3904,8 @@ fn jobs_panel_ignores_model_reasoning_but_shows_for_real_jobs() { kind: crate::tui::app::TaskPanelEntryKind::Background, stale: false, elapsed_since_output_ms: None, + owner_agent_id: None, + owner_agent_name: None, }); assert!( !crate::tui::sidebar::sidebar_auto_idle(&mut app), @@ -3828,6 +3913,76 @@ fn jobs_panel_ignores_model_reasoning_but_shows_for_real_jobs() { ); } +#[test] +fn ctrl_x_jobs_prefill_only_catches_running_shell_jobs_in_tasks_sidebar() { + let mut app = create_test_app(); + app.sidebar_focus = SidebarFocus::Tasks; + app.input = "draft".to_string(); + app.cursor_position = app.input.len(); + app.task_panel.push(TaskPanelEntry { + id: "shell_active".to_string(), + status: "running".to_string(), + prompt_summary: "shell: cargo test".to_string(), + duration_ms: Some(10), + kind: TaskPanelEntryKind::Background, + stale: false, + elapsed_since_output_ms: None, + owner_agent_id: None, + owner_agent_name: None, + }); + + assert!(prefill_jobs_cancel_all_if_tasks_sidebar(&mut app)); + assert_eq!(app.input, "/jobs cancel-all"); + assert_eq!(app.cursor_position, app.input.len()); + assert_eq!( + app.status_message.as_deref(), + Some("Press Enter to cancel all running commands") + ); +} + +#[test] +fn ctrl_x_jobs_prefill_falls_through_outside_tasks_sidebar_shell_jobs() { + let mut non_shell = create_test_app(); + non_shell.sidebar_focus = SidebarFocus::Tasks; + non_shell.input = "draft".to_string(); + non_shell.cursor_position = non_shell.input.len(); + non_shell.task_panel.push(TaskPanelEntry { + id: "task_active".to_string(), + status: "running".to_string(), + prompt_summary: "summarize the release notes".to_string(), + duration_ms: Some(10), + kind: TaskPanelEntryKind::Background, + stale: false, + elapsed_since_output_ms: None, + owner_agent_id: None, + owner_agent_name: None, + }); + + assert!(!prefill_jobs_cancel_all_if_tasks_sidebar(&mut non_shell)); + assert_eq!(non_shell.input, "draft"); + + let mut other_sidebar = create_test_app(); + other_sidebar.sidebar_focus = SidebarFocus::Agents; + other_sidebar.input = "draft".to_string(); + other_sidebar.cursor_position = other_sidebar.input.len(); + other_sidebar.task_panel.push(TaskPanelEntry { + id: "shell_active".to_string(), + status: "running".to_string(), + prompt_summary: "shell: cargo test".to_string(), + duration_ms: Some(10), + kind: TaskPanelEntryKind::Background, + stale: false, + elapsed_since_output_ms: None, + owner_agent_id: None, + owner_agent_name: None, + }); + + assert!(!prefill_jobs_cancel_all_if_tasks_sidebar( + &mut other_sidebar + )); + assert_eq!(other_sidebar.input, "draft"); +} + // ── Sidebar resize-handle mouse tests ────────────────────────────── fn setup_resize_handle(app: &mut App, handle_x: u16, sidebar_width: u16, total_width: u16) { @@ -6662,6 +6817,8 @@ fn terminal_pause_has_live_owner_only_for_running_exec_cells() { output: None, live_output: None, shell_task_id: None, + owner_agent_id: None, + owner_agent_name: None, started_at: Some(Instant::now()), duration_ms: None, source: ExecSource::Assistant, @@ -8546,6 +8703,60 @@ fn approval_prompt_uses_event_input_after_message_complete_drain() { assert_ne!(content.trim(), "{}"); } +#[tokio::test] +async fn approval_decision_persists_ask_rules_to_permissions_file() { + let tmp = TempDir::new().expect("tempdir"); + let config_path = tmp.path().join("config.toml"); + let mut app = create_test_app(); + app.config_path = Some(config_path.clone()); + let mut config = Config::default(); + let mut engine = mock_engine_handle(); + let rule = codewhale_config::ToolAskRule::exec_shell("cargo test"); + + apply_approval_decision( + &mut app, + &mut engine.handle, + &mut config, + ApprovalDecisionEvent { + tool_id: "tool-1".to_string(), + tool_name: "exec_shell".to_string(), + decision: ReviewDecision::Approved, + timed_out: false, + approval_key: "approval-key".to_string(), + approval_grouping_key: "approval-group".to_string(), + persistent_ask_rules: vec![rule.clone()], + }, + ) + .await; + + assert_eq!( + engine.recv_approval_event().await, + Some(crate::core::engine::MockApprovalEvent::Approved { + id: "tool-1".to_string() + }) + ); + let store = codewhale_config::ConfigStore::load(Some(config_path)).expect("load config store"); + assert_eq!(store.permissions().rules, vec![rule]); + assert!( + app.status_message + .as_deref() + .is_some_and(|message| message.contains("Saved 1 ask permission rule")) + ); + + let decision = config + .exec_policy_engine + .check(codewhale_execpolicy::ExecPolicyContext { + command: "cargo test --workspace", + cwd: tmp.path().to_string_lossy().as_ref(), + tool: Some("exec_shell"), + path: None, + ask_for_approval: codewhale_execpolicy::AskForApproval::OnFailure, + sandbox_mode: None, + }) + .expect("check persisted runtime policy"); + assert!(decision.requires_approval); +} + #[test] fn second_thinking_block_appends_new_entry_in_same_active_cell() { // Real V4 turns can emit Thinking → Tool → Thinking → Tool before any @@ -9274,6 +9485,8 @@ fn render_footer_from_surfaces_background_shell_even_without_tasks_panel() { kind: crate::tui::app::TaskPanelEntryKind::Background, stale: false, elapsed_since_output_ms: None, + owner_agent_id: None, + owner_agent_name: None, }]; let props = render_footer_from(&app, &[], None); @@ -10503,6 +10716,8 @@ mod work_sidebar_projection_tests { kind: crate::tui::app::TaskPanelEntryKind::Background, stale: false, elapsed_since_output_ms: None, + owner_agent_id: None, + owner_agent_name: None, }; assert_eq!(entry.status, "completed"); assert_ne!(entry.status, "running"); @@ -10609,8 +10824,9 @@ fn agent_progress_redraw_coalesces_once_per_agent_per_drain() { #[test] fn six_worker_progress_storm_keeps_input_render_and_cancel_live() { + let max_engine_events_per_drain = MAX_ENGINE_EVENTS_PER_DRAIN; assert!( - MAX_ENGINE_EVENTS_PER_DRAIN <= 128, + max_engine_events_per_drain <= 128, "engine event drains must stay bounded so high sub-agent fanout cannot monopolize the UI tick" ); diff --git a/crates/tui/src/tui/views/mod.rs b/crates/tui/src/tui/views/mod.rs index 35c293a2ff..4adfa971f3 100644 --- a/crates/tui/src/tui/views/mod.rs +++ b/crates/tui/src/tui/views/mod.rs @@ -106,6 +106,8 @@ pub enum ViewEvent { approval_key: String, /// Lossy / arity-aware fingerprint, used to scope *approvals*. approval_grouping_key: String, + /// Ask-only permission rules to append when the decision approves. + persistent_ask_rules: Vec, }, ElevationDecision { tool_id: String, @@ -1257,7 +1259,7 @@ fn config_hint_for_key(key: &str) -> &'static str { } "mcp_config_path" => "path to mcp.json", "fleet.exec.max_spawn_depth" => { - "0 blocks child agents; 3 default (same axis as sub-agents); capped at 3" + "0 blocks child agents; 3 default (same axis as sub-agents); capped at 8" } _ => "", } @@ -1883,6 +1885,7 @@ impl ModalView for SubAgentsView { SubAgentStatus::Interrupted(_) => interrupted.push(agent), SubAgentStatus::Failed(_) => failed.push(agent), SubAgentStatus::Cancelled => cancelled.push(agent), + SubAgentStatus::BudgetExhausted => failed.push(agent), } } @@ -2158,6 +2161,11 @@ fn format_agent_status( Some(reason.as_str()), ), SubAgentStatus::Cancelled => ("cancelled", Style::default().fg(palette::TEXT_MUTED), None), + SubAgentStatus::BudgetExhausted => ( + "budget_exhausted", + Style::default().fg(palette::STATUS_WARNING), + None, + ), SubAgentStatus::Failed(reason) => ( "failed", Style::default().fg(palette::DEEPSEEK_RED), diff --git a/crates/tui/src/tui/widgets/mod.rs b/crates/tui/src/tui/widgets/mod.rs index f388af4d82..60286b76c7 100644 --- a/crates/tui/src/tui/widgets/mod.rs +++ b/crates/tui/src/tui/widgets/mod.rs @@ -1354,6 +1354,30 @@ impl Renderable for ApprovalWidget<'_> { } } + if let Some(preview) = self.request.ask_rule_preview() { + lines.push(Line::from("")); + lines.push(Line::from(vec![ + Span::raw(" "), + Span::styled( + label_ask_rule_preview(locale), + Style::default().fg(palette::TEXT_HINT), + ), + ])); + let max_width = card_area.width.saturating_sub(6) as usize; + for line in preview + .lines() + .filter(|line| !line.trim().is_empty()) + .take(4) + { + let truncated = + crate::utils::truncate_with_ellipsis(line.trim(), max_width.max(20), "..."); + lines.push(Line::from(vec![ + Span::raw(" "), + Span::styled(truncated, Style::default().fg(palette::TEXT_SECONDARY)), + ])); + } + } + lines.push(Line::from("")); let options = approval_options_for(risk, locale); @@ -1399,6 +1423,14 @@ impl Renderable for ApprovalWidget<'_> { footer_controls(locale), Style::default().fg(palette::TEXT_HINT), ), + if self.request.can_save_ask_rule() { + Span::styled( + save_ask_rule_hint(locale), + Style::default().fg(palette_colors.shortcut), + ) + } else { + Span::raw("") + }, ])); let title = format!( @@ -1602,6 +1634,20 @@ fn footer_controls(locale: Locale) -> &'static str { tr(locale, MessageId::ApprovalControlsHint) } +fn save_ask_rule_hint(locale: Locale) -> &'static str { + match locale { + Locale::ZhHans => " s 批准并保存询问规则", + _ => " s approve + save ask rule", + } +} + +fn label_ask_rule_preview(locale: Locale) -> &'static str { + match locale { + Locale::ZhHans => "询问规则预览:", + _ => "Ask rule preview:", + } +} + fn selection_hint_prefix(locale: Locale) -> &'static str { tr(locale, MessageId::ApprovalChooseHint) } diff --git a/crates/tui/src/tui/widgets/pending_input_preview.rs b/crates/tui/src/tui/widgets/pending_input_preview.rs index 0341cbfb72..10a86845d6 100644 --- a/crates/tui/src/tui/widgets/pending_input_preview.rs +++ b/crates/tui/src/tui/widgets/pending_input_preview.rs @@ -171,7 +171,10 @@ impl PendingInputPreview { } if !self.queued_messages.is_empty() { lines.push(Line::from(vec![Span::styled( - format!(" {} edit last queued message", self.edit_binding.label), + format!( + " Ctrl+S send now · {} edit last queued", + self.edit_binding.label + ), dim, )])); } @@ -395,7 +398,8 @@ mod tests { assert!(rows[2].contains("/queue send 1")); assert!(rows[2].contains("drop 1")); assert!(rows[2].contains("clear")); - assert!(rows[3].contains("edit last queued message")); + assert!(rows[3].contains("Ctrl+S send now")); + assert!(rows[3].contains("edit last queued")); } #[test] @@ -417,9 +421,7 @@ mod tests { "missing restore hint: {rows:?}" ); assert!( - !rows - .iter() - .any(|row| row.contains("edit last queued message")), + !rows.iter().any(|row| row.contains("edit last queued")), "editing mode should not also advertise opening a queued edit: {rows:?}" ); } @@ -484,7 +486,7 @@ mod tests { "unexpected Esc hint: {rows:?}" ); assert!( - !rows.iter().any(|r| r.contains("edit last queued message")), + !rows.iter().any(|r| r.contains("edit last queued")), "unexpected edit hint in pending-steer-only view: {rows:?}" ); } @@ -505,6 +507,7 @@ mod tests { assert!(rows.iter().any(|r| r.contains("rejected"))); assert!(rows.iter().any(|r| r.contains("queued"))); assert!(rows.iter().any(|r| r.contains("↑"))); + assert!(rows.iter().any(|r| r.contains("Ctrl+S"))); } #[test] @@ -574,7 +577,8 @@ mod tests { assert!(rows[3].contains("line3")); assert!(rows[4].contains("…")); assert!(rows[5].contains("/queue send 1")); - assert!(rows[6].contains("edit last queued message")); + assert!(rows[6].contains("Ctrl+S send now")); + assert!(rows[6].contains("edit last queued")); } #[test] diff --git a/crates/tui/tests/core_session_command_extraction.rs b/crates/tui/tests/core_session_command_extraction.rs new file mode 100644 index 0000000000..a2d8bf9bf6 --- /dev/null +++ b/crates/tui/tests/core_session_command_extraction.rs @@ -0,0 +1,163 @@ +//! Gherkin binary health and eval harness smoke test for command extraction. +//! +//! This runs the binary through `codewhale-tui eval` and verifies that the +//! executable still loads and reports a successful JSON evaluation after the +//! core/session command modules are extracted. + +use std::path::PathBuf; +use std::process::Command; + +use cucumber::{World as _, given, then, when, writer::Stats as _}; +use serde_json::Value; +use tempfile::TempDir; + +const FEATURE_NAME: &str = "Core and session command extraction"; +const FEATURE_PATH: &str = concat!( + env!("CARGO_MANIFEST_DIR"), + "/tests/features/core_session_command_extraction.feature" +); +const CORE_SCENARIO: &str = "The binary loads and runs the evaluation harness after extraction"; + +#[derive(Debug, Default, cucumber::World)] +struct CoreSessionExtractionWorld { + record_dir: Option, + report: Option, +} + +#[given("a clean CodeWhale evaluation workspace")] +fn clean_codewhale_evaluation_workspace(world: &mut CoreSessionExtractionWorld) { + world.record_dir = Some(TempDir::new().expect("evaluation TempDir")); +} + +#[when("the evaluation harness runs a shell command")] +fn eval_harness_runs_shell_command(world: &mut CoreSessionExtractionWorld) { + let record_dir = world + .record_dir + .as_ref() + .expect("evaluation workspace should exist"); + + let output = Command::new(codewhale_tui_binary()) + .args([ + "eval", + "--json", + "--shell-command", + "echo eval-harness", + "--record", + ]) + .arg(record_dir.path()) + .output() + .expect("codewhale-tui eval should start"); + + assert!( + output.status.success(), + "codewhale-tui eval failed\nstderr:\n{}", + String::from_utf8_lossy(&output.stderr) + ); + + let report: Value = serde_json::from_slice(&output.stdout).unwrap_or_else(|err| { + panic!( + "eval --json should emit valid JSON: {err}\nstdout:\n{}", + String::from_utf8_lossy(&output.stdout) + ) + }); + + world.report = Some(report); +} + +#[then("the harness completes successfully")] +fn harness_completes_successfully(world: &mut CoreSessionExtractionWorld) { + let report = world.report.as_ref().expect("eval report should exist"); + + let success = report + .get("metrics") + .and_then(|metrics| metrics.get("success")) + .and_then(|value| value.as_bool()) + .unwrap_or(false); + assert!( + success, + "eval report 'metrics.success' should be true, got: {report:?}" + ); +} + +#[then("the JSON report contains a step with the expected kind")] +fn json_report_contains_step_with_expected_kind(world: &mut CoreSessionExtractionWorld) { + let report = world.report.as_ref().expect("eval report should exist"); + + let steps = report + .get("steps") + .and_then(|value| value.as_array()) + .expect("eval report should have a 'steps' array"); + + assert!( + !steps.is_empty(), + "eval report should have at least one step" + ); + + let first_step = &steps[0]; + let kind = first_step + .get("kind") + .and_then(|value| value.as_str()) + .expect("step should have a 'kind' field"); + + assert_eq!( + kind, "List", + "first step kind should be 'List', got: {kind}" + ); + + let step_success = first_step + .get("success") + .and_then(|value| value.as_bool()) + .unwrap_or(false); + assert!( + step_success, + "first step 'success' should be true, got: {first_step:?}" + ); + + let output = first_step + .get("output") + .and_then(|value| value.as_str()) + .unwrap_or(""); + assert!( + !output.is_empty(), + "step output should not be empty: {first_step:?}" + ); +} + +#[tokio::test(flavor = "current_thread")] +async fn codewhale_eval_runs_after_extraction() { + let writer = CoreSessionExtractionWorld::cucumber() + .fail_on_skipped() + .with_default_cli() + .filter_run(FEATURE_PATH, move |feature, _, scenario| { + feature.name == FEATURE_NAME && scenario.name == CORE_SCENARIO + }) + .await; + assert_eq!(writer.failed_steps(), 0, "scenario failed: {CORE_SCENARIO}"); + assert_eq!( + writer.skipped_steps(), + 0, + "scenario skipped steps: {CORE_SCENARIO}" + ); + assert_eq!( + writer.passed_steps(), + 4, + "scenario did not run: {CORE_SCENARIO}" + ); +} + +fn codewhale_tui_binary() -> PathBuf { + if let Some(path) = option_env!("CARGO_BIN_EXE_codewhale-tui") { + return PathBuf::from(path); + } + if let Ok(path) = std::env::var("CARGO_BIN_EXE_codewhale-tui") { + return PathBuf::from(path); + } + + let mut path = std::env::current_exe().expect("current test executable path"); + path.pop(); + if path.ends_with("deps") { + path.pop(); + } + path.push(format!("codewhale-tui{}", std::env::consts::EXE_SUFFIX)); + path +} diff --git a/crates/tui/tests/epic_acceptance_harness.rs b/crates/tui/tests/epic_acceptance_harness.rs new file mode 100644 index 0000000000..74e6e307a3 --- /dev/null +++ b/crates/tui/tests/epic_acceptance_harness.rs @@ -0,0 +1,51 @@ +//! EPIC acceptance harness smoke test. +//! +//! Proves that the Gherkin/Cucumber infrastructure is available and functional +//! on the target branch. + +use cucumber::{World as _, given, then, when, writer::Stats as _}; + +const FEATURE_NAME: &str = "EPIC acceptance harness"; +const FEATURE_PATH: &str = concat!( + env!("CARGO_MANIFEST_DIR"), + "/tests/features/epic_acceptance_harness.feature" +); +const SMOKE_SCENARIO: &str = "Gherkin acceptance tests can run on the target branch"; + +#[derive(Debug, Default, cucumber::World)] +struct EpicAcceptanceWorld; + +#[given("the acceptance harness is available")] +fn acceptance_harness_available(_world: &mut EpicAcceptanceWorld) {} + +#[when("the runner discovers EPIC scenarios")] +fn runner_discovers_epic_scenarios(_world: &mut EpicAcceptanceWorld) {} + +#[then("the runner exits successfully")] +fn runner_exits_successfully(_world: &mut EpicAcceptanceWorld) {} + +#[tokio::test(flavor = "current_thread")] +async fn acceptance_harness_smoke_test() { + let writer = EpicAcceptanceWorld::cucumber() + .fail_on_skipped() + .with_default_cli() + .filter_run(FEATURE_PATH, move |feature, _, scenario| { + feature.name == FEATURE_NAME && scenario.name == SMOKE_SCENARIO + }) + .await; + assert_eq!( + writer.failed_steps(), + 0, + "scenario failed: {SMOKE_SCENARIO}" + ); + assert_eq!( + writer.skipped_steps(), + 0, + "scenario skipped steps: {SMOKE_SCENARIO}" + ); + assert_eq!( + writer.passed_steps(), + 3, + "scenario did not run: {SMOKE_SCENARIO}" + ); +} diff --git a/crates/tui/tests/features/core_command_surfaces.feature b/crates/tui/tests/features/core_command_surfaces.feature new file mode 100644 index 0000000000..69c52aaa51 --- /dev/null +++ b/crates/tui/tests/features/core_command_surfaces.feature @@ -0,0 +1,42 @@ +@long-running +# [LONG RUNNING] Opt-in core command acceptance workflows. Run with: +# cargo test -p codewhale-tui --bin codewhale-tui --features long-running-tests commands::groups::core::acceptance -- --test-threads=1 +Feature: Core command visible surfaces + + Scenario: Core informational commands write visible transcript messages + Given a CodeWhale core command workspace + When the user runs the core command "/help links" + Then the message window should include "Usage: /links" + And the message window should include "Aliases: dashboard, api" + When the user runs the core command "/links" + Then the message window should include "https://platform.deepseek.com" + When the user runs the core command "/workspace" + Then the message window should include "Current workspace:" + When the user runs the core command "/home" + Then the message window should include "codewhale Home Dashboard" + And the message window should include "/links" + + Scenario: Core state commands report visible changes + Given a CodeWhale core command workspace + When the user runs the core command "/model auto" + Then the message window should include "Model changed:" + And the message window should include "auto" + When the user runs the core command "/translate" + Then the message window should include "Output translation enabled" + When the user runs the core command "/translate" + Then the message window should include "Output translation disabled" + + Scenario: Clear replaces prior transcript with visible confirmation + Given a CodeWhale core command workspace with one visible user message + When the user runs the core command "/clear" + Then the message window should include "Conversation cleared" + And the message window should not include "Remember the whale migration" + + Scenario: Persistent work commands report visible dispatch requests + Given a CodeWhale core command workspace + When the user runs the core command "/agent 2 summarize logs" + Then the message window should include "Opening persistent sub-agent at depth 2" + When the user runs the core command "/rlm 1 inspect command extraction" + Then the message window should include "Opening persistent RLM context at depth 1" + When the user runs the core command "/swarm 2 audit commands" + Then the message window should include "/swarm is gated" diff --git a/crates/tui/tests/features/core_session_command_extraction.feature b/crates/tui/tests/features/core_session_command_extraction.feature new file mode 100644 index 0000000000..a4cfb20a9e --- /dev/null +++ b/crates/tui/tests/features/core_session_command_extraction.feature @@ -0,0 +1,7 @@ +Feature: Core and session command extraction + + Scenario: The binary loads and runs the evaluation harness after extraction + Given a clean CodeWhale evaluation workspace + When the evaluation harness runs a shell command + Then the harness completes successfully + And the JSON report contains a step with the expected kind diff --git a/crates/tui/tests/features/epic_acceptance_harness.feature b/crates/tui/tests/features/epic_acceptance_harness.feature new file mode 100644 index 0000000000..af694f79ed --- /dev/null +++ b/crates/tui/tests/features/epic_acceptance_harness.feature @@ -0,0 +1,6 @@ +Feature: EPIC acceptance harness + + Scenario: Gherkin acceptance tests can run on the target branch + Given the acceptance harness is available + When the runner discovers EPIC scenarios + Then the runner exits successfully diff --git a/crates/tui/tests/features/session_command_workflows.feature b/crates/tui/tests/features/session_command_workflows.feature new file mode 100644 index 0000000000..fccd44f360 --- /dev/null +++ b/crates/tui/tests/features/session_command_workflows.feature @@ -0,0 +1,89 @@ +@long-running +# [LONG RUNNING] Opt-in acceptance workflows. Run with: +# cargo test -p codewhale-tui --bin codewhale-tui --features long-running-tests commands::groups::session::acceptance -- --test-threads=1 +Feature: Session command workflows + + Scenario: Save, export, and load preserve the active session + Given a CodeWhale session workspace with one user message + When the user saves the active session + And the user exports the active transcript + And the user clears the active conversation + And the user loads the saved session + Then the saved session file should contain the saved message + And the active session id should match the saved session file + And the exported markdown should contain the active transcript + And the active session should contain the saved message + And the restored token count should match the saved session + And CodeWhale should report that the session was loaded + + Scenario: Fork keeps the original session resumable + Given a CodeWhale persisted session workspace with one user message + When the user forks the active session + Then the forked session should reference the original session + And the original session should still be loadable + And the active session should be the forked session + + Scenario: New session cannot be forked before messages exist + Given a CodeWhale session workspace with one user message + When the user starts a new session + And the user tries to fork the active session + Then CodeWhale should reject the fork because there are no messages + And the active session should be empty + + Scenario: Cleared session cannot be forked before messages exist + Given a CodeWhale session workspace with one user message + When the user clears the active conversation + And the user tries to fork the active session + Then CodeWhale should reject the fork because there are no messages + And the active session should be empty + + Scenario: Fork followed by new keeps both saved sessions + Given a CodeWhale persisted session workspace with one user message + When the user forks the active session + And the user starts a new session + Then the original and forked sessions should remain loadable + And the active session should be a new empty session + + Scenario: Fork followed by clear keeps both saved sessions + Given a CodeWhale persisted session workspace with one user message + When the user forks the active session + And the user clears the active conversation + Then the original and forked sessions should remain loadable + And the active session should be cleared without an active session id + + Scenario: Rename updates the active saved session title + Given a CodeWhale persisted session workspace with one user message + When the user renames the active session to "Renamed whale path" + Then the active saved session title should be "Renamed whale path" + And the active session should be the original session + + Scenario: Sessions list opens the saved session picker + Given a CodeWhale persisted session workspace with one user message + When the user lists saved sessions + Then the session picker should be open + And the original session should still be loadable + + Scenario: Sessions prune removes only stale sessions + Given a CodeWhale session workspace with stale and fresh saved sessions + When the user prunes sessions older than 7 days + Then CodeWhale should report that one session was pruned + And the fresh session should still be loadable + And the stale session should no longer be loadable + + Scenario: Context management commands emit actions without clearing the active session + Given a CodeWhale session workspace with one user message + When the user compacts context + Then CodeWhale should trigger context compaction + And the active session should contain the saved message + When the user purges context + Then CodeWhale should trigger context purge + And the active session should contain the saved message + When the user prepares a session relay focused on "handoff details" + Then CodeWhale should send a session relay instruction focused on "handoff details" + And the active session should contain the saved message + + Scenario: Singular session command is not registered + Given a CodeWhale session workspace with one user message + When the user runs the singular session command + Then CodeWhale should reject the unknown session command + And the active session should contain the saved message diff --git a/crates/whaleflow/src/lib.rs b/crates/whaleflow/src/lib.rs index 6664b95911..8165db65f1 100644 --- a/crates/whaleflow/src/lib.rs +++ b/crates/whaleflow/src/lib.rs @@ -183,6 +183,8 @@ pub struct BudgetSpec { pub timeout_secs: Option, #[serde(default)] pub max_parallel: Option, + #[serde(default)] + pub max_tokens: Option, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)] @@ -669,6 +671,8 @@ pub struct MockWorkflowExecutor { cancelled: bool, max_leaf_steps: Option, leaf_steps_executed: u32, + max_leaf_tokens: Option, + leaf_tokens_used: u64, } impl MockWorkflowExecutor { @@ -713,6 +717,11 @@ impl MockWorkflowExecutor { self } + pub fn with_max_leaf_tokens(mut self, max_leaf_tokens: u64) -> Self { + self.max_leaf_tokens = Some(max_leaf_tokens); + self + } + pub fn run( &mut self, spec: &WorkflowSpec, @@ -946,14 +955,44 @@ impl MockWorkflowExecutor { status: WorkflowRunStatus::BudgetExceeded, usage: WorkflowUsage::default(), memo_usage: WorkflowMemoUsage::default(), - output: Some("mock workflow leaf budget exhausted".to_string()), + output: Some("mock workflow leaf step budget exhausted".to_string()), + artifacts: Vec::new(), + }; + } + if self + .max_leaf_tokens + .is_some_and(|max| self.leaf_tokens_used >= max) + || spec.budget.max_tokens == Some(0) + { + return MockLeafOutcome { + status: WorkflowRunStatus::BudgetExceeded, + usage: WorkflowUsage::default(), + memo_usage: WorkflowMemoUsage::default(), + output: Some("mock workflow leaf token budget exhausted".to_string()), artifacts: Vec::new(), }; } self.leaf_steps_executed = self.leaf_steps_executed.saturating_add(1); - self.leaf_outcomes + let outcome = self + .leaf_outcomes .remove(&spec.id) - .unwrap_or_else(|| MockLeafOutcome::succeeded(format!("mock leaf {}", spec.id))) + .unwrap_or_else(|| MockLeafOutcome::succeeded(format!("mock leaf {}", spec.id))); + let tokens = outcome.usage.total_tokens(); + if let Some(per_leaf_token_cap) = spec.budget.max_tokens { + if tokens > per_leaf_token_cap { + return MockLeafOutcome { + status: WorkflowRunStatus::BudgetExceeded, + usage: outcome.usage, + memo_usage: outcome.memo_usage, + output: Some(format!( + "mock workflow leaf token budget exhausted ({tokens} > {per_leaf_token_cap})" + )), + artifacts: outcome.artifacts, + }; + } + } + self.leaf_tokens_used = self.leaf_tokens_used.saturating_add(tokens); + outcome } fn next_predicate_result(&mut self, node_id: &str) -> bool { @@ -2148,6 +2187,7 @@ mod tests { max_steps: Some(8), timeout_secs: Some(300), max_parallel: None, + max_tokens: None, }, permissions: PermissionSpec::default(), model_policy: ModelPolicy { @@ -2164,6 +2204,7 @@ mod tests { max_steps: Some(30), timeout_secs: Some(1_800), max_parallel: Some(2), + max_tokens: None, }, permissions: PermissionSpec { allow_write: false, @@ -2191,6 +2232,7 @@ mod tests { max_steps: Some(12), timeout_secs: Some(600), max_parallel: Some(2), + max_tokens: None, }, permissions: PermissionSpec::default(), model_policy: ModelPolicy::default(), @@ -2593,6 +2635,7 @@ mod tests { max_steps: Some(0), timeout_secs: None, max_parallel: None, + max_tokens: None, }, ), leaf_node("summarize"), @@ -2617,6 +2660,180 @@ mod tests { ); } + #[test] + fn mock_executor_stops_when_global_token_budget_is_exhausted() { + let workflow = workflow_spec(vec![WorkflowNode::BranchSet(BranchSpec { + id: "discover".to_string(), + description: None, + parallel: true, + budget: BudgetSpec::default(), + permissions: PermissionSpec::default(), + model_policy: ModelPolicy::default(), + children: vec![ + leaf_node("scan-readme"), + leaf_node("scan-config"), + leaf_node("scan-tests"), + ], + })]); + + // First leaf uses 600 tokens (300 in + 300 out); after the second leaf + // (500 tokens) the running total is 1100, exceeding the 1000-token + // global cap, so the third leaf hits the exhausted budget and halts the + // run. + let mut executor = MockWorkflowExecutor::new() + .with_max_leaf_tokens(1000) + .with_leaf_outcome( + "scan-readme", + MockLeafOutcome::succeeded("readme done").with_usage(WorkflowUsage { + input_tokens: 300, + output_tokens: 300, + cost_microusd: 0, + }), + ) + .with_leaf_outcome( + "scan-config", + MockLeafOutcome::succeeded("config done").with_usage(WorkflowUsage { + input_tokens: 250, + output_tokens: 250, + cost_microusd: 0, + }), + ); + let execution = executor.run(&workflow).expect("mock workflow should run"); + + assert_eq!(execution.status, WorkflowRunStatus::BudgetExceeded); + // Leaves 1+2 consume 1100 tokens, exhausting the 1000-token global cap. + // The third leaf is attempted, sees the budget already exceeded, and is + // recorded as BudgetExceeded — the same boundary-leaf behaviour used by + // step budgets (max_leaf_steps). The budget outcome carries no tokens, + // so total usage stays at 1100. + assert_eq!(execution.leaf_results.len(), 3); + assert_eq!( + execution.leaf_results[0].status, + WorkflowRunStatus::Succeeded + ); + assert_eq!( + execution.leaf_results[1].status, + WorkflowRunStatus::Succeeded + ); + assert_eq!( + execution.leaf_results[2].status, + WorkflowRunStatus::BudgetExceeded + ); + assert_eq!(execution.usage.total_tokens(), 1100); + } + + #[test] + fn mock_executor_honors_zero_token_leaf_budget() { + let workflow = workflow_spec(vec![WorkflowNode::BranchSet(BranchSpec { + id: "verify".to_string(), + description: None, + parallel: false, + budget: BudgetSpec::default(), + permissions: PermissionSpec::default(), + model_policy: ModelPolicy::default(), + children: vec![ + leaf_node_with_budget( + "run-tests", + BudgetSpec { + max_steps: None, + timeout_secs: None, + max_parallel: None, + max_tokens: Some(0), + }, + ), + leaf_node("summarize"), + ], + })]); + + let mut executor = MockWorkflowExecutor::new(); + let execution = executor.run(&workflow).expect("mock workflow should run"); + + assert_eq!(execution.status, WorkflowRunStatus::BudgetExceeded); + assert_eq!(execution.leaf_results.len(), 1); + assert_eq!( + execution.leaf_results[0].status, + WorkflowRunStatus::BudgetExceeded + ); + assert!( + execution.leaf_results[0] + .output + .as_deref() + .unwrap_or_default() + .contains("token budget exhausted") + ); + } + + #[test] + fn mock_executor_honors_per_leaf_token_cap() { + let workflow = workflow_spec(vec![WorkflowNode::BranchSet(BranchSpec { + id: "review".to_string(), + description: None, + parallel: false, + budget: BudgetSpec::default(), + permissions: PermissionSpec::default(), + model_policy: ModelPolicy::default(), + children: vec![ + leaf_node_with_budget( + "expensive-scan", + BudgetSpec { + max_steps: None, + timeout_secs: None, + max_parallel: None, + max_tokens: Some(500), + }, + ), + leaf_node("summarize"), + ], + })]); + + // The leaf outcome uses 800 tokens which exceeds the per-leaf cap of 500. + let mut executor = MockWorkflowExecutor::new().with_leaf_outcome( + "expensive-scan", + MockLeafOutcome::succeeded("scan done").with_usage(WorkflowUsage { + input_tokens: 500, + output_tokens: 300, + cost_microusd: 0, + }), + ); + let execution = executor.run(&workflow).expect("mock workflow should run"); + + assert_eq!(execution.status, WorkflowRunStatus::BudgetExceeded); + assert_eq!(execution.leaf_results.len(), 1); + assert_eq!( + execution.leaf_results[0].status, + WorkflowRunStatus::BudgetExceeded + ); + assert!( + execution.leaf_results[0] + .output + .as_deref() + .unwrap_or_default() + .contains("token budget exhausted") + ); + } + + #[test] + fn budget_spec_serializes_max_tokens() { + let budget = BudgetSpec { + max_steps: Some(10), + timeout_secs: Some(600), + max_parallel: Some(4), + max_tokens: Some(50_000), + }; + let json = serde_json::to_string(&budget).expect("serialize budget"); + let parsed: BudgetSpec = serde_json::from_str(&json).expect("parse budget"); + assert_eq!(parsed, budget); + assert!(json.contains("\"max_tokens\":50000")); + + // Default (all None) round-trips without the field present. + let default_json = + serde_json::to_string(&BudgetSpec::default()).expect("serialize default"); + let parsed_default: BudgetSpec = + serde_json::from_str(&default_json).expect("parse default budget"); + assert_eq!(parsed_default, BudgetSpec::default()); + assert!(parsed_default.max_tokens.is_none()); + } + #[test] fn loop_until_stops_on_pass() { let workflow = workflow_spec(vec![WorkflowNode::LoopUntil(LoopUntilSpec { diff --git a/docs/AGENT_RUNTIME.md b/docs/AGENT_RUNTIME.md index 03baeb0659..f935cf3b69 100644 --- a/docs/AGENT_RUNTIME.md +++ b/docs/AGENT_RUNTIME.md @@ -58,6 +58,12 @@ retry while an equivalent fleet worker would retry and preserve ledger evidence, then the cutover is incomplete. Treat that as a CodeWhale runtime gap, not as normal "sub-agent behavior". +The compatibility `agent` runtime now retries transient provider header, +stream, and timeout failures with backoff before marking a worker interrupted; +when retries are exhausted it preserves a checkpoint and returns a continuation +handle. The remaining convergence work is to keep that lifecycle durable across +process restarts, remote execution, and full fleet-ledger scheduling. + The target rule is: - durable or long-running work goes through the fleet worker lifecycle; @@ -107,7 +113,7 @@ delegation levels. Sub-agents and fleet workers share **one** axis, sourced from - `DEFAULT_SPAWN_DEPTH = 3` — the default budget for both standalone sub-agents and fleet workers (so they cannot drift into "two moving targets"); -- `MAX_SPAWN_DEPTH_CEILING = 3` — the hard cap that every configured value +- `MAX_SPAWN_DEPTH_CEILING = 8` — the opt-in cap that every configured value (fleet `max_spawn_depth`, `agent`'s `max_depth`) clamps to. The root worker always runs even at budget 0; the budget gates *child* diff --git a/docs/BENCHMARKS.md b/docs/BENCHMARKS.md index 41c1ccb9c1..8e12f53b99 100644 --- a/docs/BENCHMARKS.md +++ b/docs/BENCHMARKS.md @@ -120,6 +120,42 @@ The harness writes raw Harbor logs plus `summary.json`, `summary.md`, and reported as JSON `null`, and generated run directories are intentionally ignored by git; keep only curated summaries in docs or release notes. +### Compare local release artifacts against baselines + +Use the local-artifact runner when npm still points at the previous public +release and you need Terminal-Bench rows for a candidate branch. It uploads +explicit Linux `codewhale` and `codewhale-tui` binaries into each Harbor task +container, so the benchmark evidence is tied to the intended build instead of +whatever npm currently serves. + +```bash +export CODEWHALE_LINUX_BIN=/path/to/codewhale-linux-x64-0.8.63 +export CODEWHALE_TUI_LINUX_BIN=/path/to/codewhale-tui-linux-x64-0.8.63 + +python scripts/benchmarks/run-codewhale-terminal-bench.py \ + --task build-cython-ext \ + --model deepseek/deepseek-v4-flash \ + --reasoning-effort off +``` + +Run the thin direct DeepSeek baseline and stock mini-swe-agent baseline with +matching task/model settings when you need comparison rows: + +```bash +python scripts/benchmarks/run-deepseek-direct-terminal-bench.py \ + --task build-cython-ext \ + --model deepseek/deepseek-v4-flash \ + --reasoning-effort off + +python scripts/benchmarks/run-mini-swe-terminal-bench.py \ + --task build-cython-ext \ + --model deepseek/deepseek-v4-flash +``` + +All three runners support `--dry-run` to print the Harbor command and write +metadata scaffolding without launching task containers. Generated run +directories stay under `benchmark_results/` and remain ignored by git. + ## PinchBench PinchBench measures agent performance on real-world tasks — scheduling, email diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 16dce5cda9..a1d4579d37 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -121,7 +121,7 @@ Supported keys in the project overlay (top-level fields only): | `sandbox_mode` | `"read-only"` / `"workspace-write"` / `"danger-full-access"` | | `mcp_config_path` | per-repo MCP server set | | `notes_path` | keep notes in-repo | -| `max_subagents` | clamp concurrency for a constrained repo (clamped to 1..=20) | +| `max_subagents` | clamp sub-agent concurrency for a constrained repo (clamped to 1..=20) | | `allow_shell` | gate shell tool access on `false` | The overlay is intentionally narrow — it covers the fields a repo @@ -827,7 +827,9 @@ Press **Ctrl+S** in the composer to park the current draft to drafts with one-line previews and timestamps; `/stash pop` restores the most recently parked draft (LIFO); `/stash clear` wipes the file. Capped at 200 entries; multiline drafts -round-trip intact. +round-trip intact. When a turn is already running and queued follow-ups exist, +the pending-input preview advertises **Ctrl+S send now**; in that state Ctrl+S +sends the next queued follow-up into the active turn instead of stashing. ## Settings File (Persistent UI Preferences) @@ -886,12 +888,16 @@ Common settings keys: context panel, `/cost`, `/tokens`, and long-turn notification summaries. The aliases `rmb` and `yuan` normalize to `cny`. - `default_mode` (agent, plan, yolo; legacy `normal` is accepted and normalized to `agent`) -- `sidebar_focus` (`auto`, `work`, `tasks`, `agents`, `context`, `hidden`; default - `auto`): selects the right sidebar focus. `auto` prioritizes Work, Tasks, - Agents, then optional Context, and uses Work as the single quiet empty state. +- `sidebar_focus` (`pinned`, `auto`, `tasks`, `agents`, `context`, `hidden`; default + `pinned`): selects the right sidebar focus. `pinned` keeps the right sidebar + visible when the terminal is wide enough and composes Work, Tasks, Agents, + and optional Context as they have live content. `auto` uses the same composed + panels but collapses while idle. Saving + `/sidebar auto --save` records an explicit auto-collapse opt-in so upgraded + settings files that only captured the old default can migrate back to `pinned`. `hidden` disables the right sidebar entirely so raw terminal selection cannot cross from the transcript into sidebar borders. Legacy `plan` and `todos` - values are accepted and normalized to `work`. + values, plus the old `work` name, are accepted and normalized to `pinned`. - `max_history` (number of submitted input history entries; cleared drafts are also kept locally for composer history search) - `default_model` (model name override) @@ -985,22 +991,64 @@ If you are upgrading from older releases: Explicit tool `model` values win, then role/type overrides, then the parent runtime model. Supported convenience keys are `default_model`, `worker_model`, `explorer_model`, `awaiter_model`, - `review_model`, `custom_model`, `max_concurrent`, `launch_concurrency`, - `api_timeout_secs`, and `heartbeat_timeout_secs`. The `[subagents] - max_concurrent` value overrides top-level `max_subagents` and is also clamped - to `1..=20`. `[subagents] launch_concurrency` sets how many direct children - start at once before the rest queue for a launch slot; it defaults to the - resolved `max_subagents` cap and is clamped to `1..=max_subagents` (the - deprecated `interactive_max_launch` key is accepted as an alias, with the new - key winning when both are set). `[subagents] - api_timeout_secs` controls the per-step API timeout for sub-agent model calls - and is clamped to `1..=1800`, with `0` or unset preserving the legacy 120 - second default. `[subagents] heartbeat_timeout_secs` controls stale running - agent cleanup, defaults to `300`, and is clamped to `30..=3600` while staying - above the resolved API timeout. + `review_model`, `custom_model`, `max_concurrent`, `max_admitted`, + `launch_concurrency`, `token_budget`, `api_timeout_secs`, and + `heartbeat_timeout_secs`. The `[subagents] max_concurrent` value overrides + top-level `max_subagents` and is also clamped to `1..=20`. `[subagents] + max_admitted` (aliases: `max_total`, `admission_limit`) is the bounded total + of queued plus running sub-agents; it defaults to `200` so high-fanout turns + can queue and drain while runtime launch pressure remains bounded, and is + clamped to `max_concurrent..=200`. `[subagents] + launch_concurrency` sets how many direct children start at once before the + rest queue for a launch slot; it defaults to the resolved `max_subagents` cap + and is clamped to `1..=max_subagents` (the deprecated + `interactive_max_launch` key is accepted as an alias, with the new key + winning when both are set). `[subagents] token_budget` is an optional + aggregate token ceiling for each root `agent` run and its descendants; unset + or `0` preserves unlimited legacy behavior. `[subagents] api_timeout_secs` + controls the per-step API timeout for sub-agent model calls and is clamped to + `1..=1800`, with `0` or unset preserving the legacy 120 second default. + `[subagents] heartbeat_timeout_secs` controls stale running agent cleanup, + defaults to `300`, and is clamped to `30..=3600` while staying above the + resolved API timeout. `[subagents.providers.]` accepts the same + fanout, depth, budget, and timeout knobs (`enabled`, `max_concurrent`, + `max_admitted`, `launch_concurrency`, `max_depth`, `token_budget`, + `api_timeout_secs`, `heartbeat_timeout_secs`) and inherits the global + `[subagents]` value for any key you omit. Provider keys accept canonical + names such as `deepseek`, `zai`, `openrouter`, `anthropic`, plus convenience + aliases such as `glm` for Z.ai and `deepseek_api` for direct DeepSeek: + + ```toml + [subagents] + max_concurrent = 20 + launch_concurrency = 20 + max_admitted = 200 + max_depth = 6 + + [subagents.providers.deepseek] + max_concurrent = 20 + launch_concurrency = 20 + max_admitted = 200 + + [subagents.providers.glm] + max_concurrent = 4 + launch_concurrency = 3 + max_admitted = 12 + max_depth = 2 + + [subagents.providers.openrouter] + max_concurrent = 5 + launch_concurrency = 3 + max_admitted = 20 + ``` + + `/config subagents status` prints both global values and the active + provider's resolved profile so rate-limit tuning is visible in the TUI. `[subagents.models]` accepts lower-case role or type keys such as `worker`, - `explorer`, `general`, `explore`, `plan`, and `review`. Values must normalize - to a supported DeepSeek model id before an agent is spawned. + `explorer`, `general`, `explore`, `plan`, and `review`. Values are validated + against the active provider at spawn time; direct DeepSeek requires DeepSeek + IDs, while OpenAI-compatible/custom provider routes pass explicit model IDs + through to that provider. - `skills_dir` (string, optional): defaults to `~/.codewhale/skills` (each skill is a directory containing `SKILL.md`). Workspace-local `.agents/skills` or `./skills` are preferred when present; the runtime also discovers global diff --git a/docs/CONTRIBUTORS.md b/docs/CONTRIBUTORS.md index 5a4a81f650..cfc3787540 100644 --- a/docs/CONTRIBUTORS.md +++ b/docs/CONTRIBUTORS.md @@ -27,6 +27,92 @@ notes, and relevant issue/PR comments. ## Contributors by time +
+v0.8.63 — sub-agent budgets, command extraction & reliability + + +The v0.8.63 release hardened sub-agent fanout with token-budget governance and +queue-and-drain admission, split the TUI command surface into focused modules, +and landed reliability fixes for app-server teardown, JavaScript-execution +proxying, and DeepSeek thinking tool calls — alongside community contributions. + +- **[donglovejava](https://github.com/donglovejava)** — per-worker sub-agent + token-budget enforcement, so a `token_budget`/`max_tokens` on an individual + `agent` call bounds that worker mid-run with a clean `budget_exhausted` stop + (#3321, harvested) +- **[cyq1017](https://github.com/cyq1017)** — `js_execution` proxy-environment + handling (#3331), Hugging Face API-key env in the auth probe (#3329), and Codex + Responses request retry (#3344) — harvested into the train +- **[aboimpinto](https://github.com/aboimpinto)** — FEAT-005 command extraction: + core/session command groups split into focused modules via `RegisterCommand`, + `/swarm` migration, and Gherkin acceptance coverage (#3330, merged literally + with authorship preserved) +- **[wuisabel-gif](https://github.com/wuisabel-gif)** — tear down the delegated + serve/app-server child process when the dispatcher exits (#3259 / #3317) +- **[nightt5879](https://github.com/nightt5879)** — keep the onboarding marker in + the codewhale home view (#3302) and branch-hygiene check hardening (#3348) +- **[gaord](https://github.com/gaord)** — preserve thinking/tool blocks when + seeding a thread from a saved session, plus Hugging Face provider env (#3329) +- **[greyfreedom](https://github.com/greyfreedom)** — persist ask-permission rules + from approvals and stabilize the CI verifier/provider-registry checks +- Reports that shaped fixes: **[lordwedggie](https://github.com/lordwedggie)** + (#3331 proxy env), **[Final527](https://github.com/Final527)** (#3240 legacy + state migration), **[dxfq](https://github.com/dxfq)** (#3228 sidebar default) + +
+ +
+v0.8.62 — provider/model routing, TOML comment preservation & community closeout + + +The v0.8.62 release retuned provider/model routing (GLM-5.2 as the default direct +Z.AI model, `type: "explore"` sub-agents defaulting to the cheaper same-family +sibling), added TOML comment preservation and the CodeWhale-only skill discovery +gate, and shipped the static Linux x64 musl binary — alongside a broad community +closeout and a retroactive credit reconciliation pass. + +- **[zlh124](https://github.com/zlh124)** — preserve user comments and formatting + when rewriting `config.toml`/`settings.toml`/`tui.toml` (with a malformed-file + fallback) and Linux build deps in the cargo install guides (#3270) +- **[idling11](https://github.com/idling11)** — Kimi `type:object` schema root for + all parameter shapes (#3281), `approval_mode` restore on Plan→Agent with a + wait-for-user guard (#3279), and workroom metadata draft types +- **[LeoLin990405](https://github.com/LeoLin990405)** — Poppler `pdftotext -v` + detection (#1667), session persistence before stall/cancel recovery (#2739), + and debounced thinking-stream re-renders (#1620) +- **[nightt5879](https://github.com/nightt5879)** — CodeWhale-only skill discovery + gate (`[skills].scan_codewhale_only`) ignoring cross-tool directories (#3296) and + app-server no-auth loopback docs +- **[reidliu41](https://github.com/reidliu41)** — slash commands exposed as hotbar + actions (#3269) +- **[wavezhang](https://github.com/wavezhang)** — static Linux x64 (musl) release + binaries +- **[wuisabel-gif](https://github.com/wuisabel-gif)** — per-tool snapshot gate + respecting `[snapshots].enabled` (#3292) and composer history written under + `.codewhale` +- **[gaord](https://github.com/gaord)** — `workspace_follow_symlinks` setting for + symlink-aware tool operations with hardened path handling +- **[greyfreedom](https://github.com/greyfreedom)** — ask-permission rules honored + at runtime (#3295) +- **[aboimpinto](https://github.com/aboimpinto)** — EPIC-001 command-boundary + replay and user-registry review feedback +- **[h3c-hexin](https://github.com/h3c-hexin)** — volatile workspace path moved + out of the static system prefix (prefix-cache hygiene) +- **[hongchen1993](https://github.com/hongchen1993)** — heuristic-only auto routing + when the flash router is unavailable +- **[lucaszhu-hue](https://github.com/lucaszhu-hue)** — Atlas Cloud provider setup + docs +- Retroactive reconciliation (shipped earlier, credited now): + **[manaskarra](https://github.com/manaskarra)** / **[xfy6238](https://github.com/xfy6238)** (#1157), + **[djairjr](https://github.com/djairjr)** (#1309 alongside reidliu41), + **[Geallier](https://github.com/Geallier)** (#1470), + **[quentin-lian](https://github.com/quentin-lian)** / **[k0tran](https://github.com/k0tran)** (#1531 / #1992), + **[F1LT3R](https://github.com/F1LT3R)** (#1656), + **[cmyyy](https://github.com/cmyyy)** (#1842), + **[Final527](https://github.com/Final527)** (#3058) + +
+
v0.8.61 — runtime control plane & community closeout diff --git a/docs/INSTALL.md b/docs/INSTALL.md index 537c8d656a..c85cd10e08 100644 --- a/docs/INSTALL.md +++ b/docs/INSTALL.md @@ -24,43 +24,49 @@ v0.8.8 onward; Linux RISC-V starts with the first release after v0.8.47. | macOS | x64 | ✅ | ✅ | `codewhale-macos-x64`, `codewhale-tui-macos-x64` | | macOS | arm64 (M-series) | ✅ | ✅ | `codewhale-macos-arm64`, `codewhale-tui-macos-arm64` | | Windows | x64 | ✅ | ✅ | `codewhale-windows-x64.exe`, `codewhale-tui-windows-x64.exe` | -| Other Linux (musl, other architectures) | — | ❌¹ | ✅² | build from source | +| Linux x64 on musl (Alpine) | ✅ (static) | ✅ | ✅ | static `codewhale-tui-linux-x64` (musl) asset | +| Other Linux (musl non-x64, other arches) | — | ❌¹ | ✅² | build from source | | FreeBSD / OpenBSD | — | ❌ | ✅² | build from source | ¹ The npm package will exit with a clear error and point you here. ² Provided your toolchain can compile a recent Rust workspace; see [Build from source](#7-build-from-source) below. -The Linux release assets are glibc builds, not musl builds. They dynamically -link normal Linux runtime libraries such as `libdbus-1` and `libc`; SQLite is -currently bundled into the binary through `rusqlite` so users do not need a -separate `libsqlite3` runtime package for official release assets. Musl-based -systems such as Alpine should use [Build from source](#7-build-from-source). +The Linux **x64** release assets are **static (musl) builds** as of v0.8.63. +They have no glibc dependency and run on any x86_64 Linux, including Ubuntu +22.04, Debian stable, RHEL/CentOS, and Alpine/musl. SQLite is bundled into the +binary through `rusqlite`, so no separate `libsqlite3` runtime package is needed. -### Linux glibc floor +The Linux **arm64** and **riscv64** release assets are still GNU libc (glibc) +builds. They dynamically link normal Linux runtime libraries such as +`libdbus-1` and `libc`, and are built on Ubuntu 24.04, so they can require +`GLIBC_2.39`. -The official Linux GNU release assets require the glibc version used by the -release builder. In the current v0.8.62 release lane, native Linux GNU assets -are built on Ubuntu 24.04 and can require `GLIBC_2.39`. Ubuntu 22.04 ships -glibc 2.35, so those binaries fail with errors such as: +### Linux glibc floor (arm64 / riscv64) + +This floor applies only to the **GNU libc** assets (arm64, riscv64). The static +x64 (musl) asset has no `GLIBC_*` symbols, so it passes the install preflight +and runs on older systems without error. In the current v0.8.63 release lane, +the GNU assets are built on Ubuntu 24.04 and can require `GLIBC_2.39`. Ubuntu +22.04 ships glibc 2.35, so those arm64/riscv64 binaries fail with errors such as: ```text version `GLIBC_2.39' not found ``` The npm wrapper, `codewhale update`, and the Unix archive installer preflight -Linux binaries before installing them and point older systems to Cargo/source -builds. If you are on Ubuntu 22.04, Debian stable, RHEL/CentOS, Alpine/musl, or -another older Linux base, use: +Linux GNU binaries before installing them and point older systems to Cargo/source +builds. If you are on Ubuntu 22.04 arm64, Debian stable, RHEL/CentOS, or another +older GNU base for a non-x64 asset, use: ```bash cargo install codewhale-cli --locked cargo install codewhale-tui --locked ``` -Release engineering follow-up: build Linux GNU assets against an older glibc -baseline, or add a musl/static Linux asset. This install guide documents the -floor and preflight behavior; it does not change CI runner selection. +Future release engineering may add static (musl) arm64/riscv64 assets so the +glibc floor goes away entirely; until then, x64 is static and arm64/riscv64 +build from source on older distros. > **Linux ARM64 note (v0.8.7 and earlier).** v0.8.7 and earlier do **not** > publish a Linux ARM64 prebuilt; users on HarmonyOS thin-and-light, Asahi @@ -111,11 +117,11 @@ a download sourced from an impersonating repository or mirror. ## 3. Install via npm npm is the recommended install path. The `codewhale` wrapper is published at -v0.8.62 (Node 18+; wrapper available for v0.8.56 and later). +v0.8.63 (Node 18+; wrapper available for v0.8.56 and later). ```bash npm install -g codewhale -codewhale --version # 0.8.62 +codewhale --version # 0.8.63 ``` `postinstall` downloads the right pair of binaries from the matching GitHub diff --git a/docs/KEYBINDINGS.md b/docs/KEYBINDINGS.md index eb1f63a3d7..2a8ce661a4 100644 --- a/docs/KEYBINDINGS.md +++ b/docs/KEYBINDINGS.md @@ -20,8 +20,8 @@ Bindings are not (yet) user-configurable — tracked for a future release (#436, | `Ctrl-O` | Open Activity Detail for selected/live/recent tool work, or the full reasoning timeline for thinking blocks when the composer is empty | | `Ctrl-Shift-E` / `Cmd-Shift-E` | Toggle the file-tree sidebar | | `Alt-G` | Scroll transcript to top when the composer is empty | -| `Alt-!` / `Alt-@` / `Alt-#` / `Alt-$` / `Alt-0` | Focus Work / Tasks / Agents / Context / Auto sidebar | -| `Ctrl-Alt-0` | Hide the right sidebar | +| `Alt-!` / `Alt-@` / `Alt-#` / `Alt-$` / `Alt-0` | Focus Pinned / Tasks / Agents / Context / Auto sidebar | +| `Ctrl-Alt-0` | Hide/show the pinned sidebar | | `Esc` | Close topmost modal · cancel slash menu · dismiss toast | ## Composer @@ -31,7 +31,8 @@ Editing the message you're about to send. | Chord | Action | |-----------------------------|---------------------------------------------------------| | `Enter` | Send the message (or run the slash command) | -| `Alt-Enter` / `Ctrl-J` | Insert a newline without sending | +| `Alt-Enter` / `Ctrl-J` | Insert a newline without sending (`Ctrl-J` force-steers while a turn is running) | +| `Ctrl-Enter` / `Cmd-Enter` | Force a live steer into the current turn when supported by the terminal | | `Ctrl-U` | Delete to start of line | | `Ctrl-W` | Delete previous word | | `Ctrl-A` / `Home` | Move to start of line | @@ -42,7 +43,7 @@ Editing the message you're about to send. | `Ctrl-Y` | Yank (paste) from kill buffer | | `↑` / `↓` | Cycle composer history (also selects popup/attachment items) | | `Ctrl-P` / `Ctrl-N` | Cycle composer history (alternative) | -| `Ctrl-S` | Stash current draft (`/stash list`, `/stash pop` to recover) | +| `Ctrl-S` | Stash current draft; with queued follow-ups during a running turn, send the next queued item now | | `Alt-R` | Search prompt history (Alt-R to exit) | | `Tab` | Slash-command / `@`-mention completion (popup-aware) | | `Ctrl-O` | Open external editor for the composer draft when it has focus | @@ -76,6 +77,7 @@ When `[memory] enabled = true`, typing `# foo` and pressing `Enter` appends `foo | `↑` / `↓` / `j` / `k`| Move selection | | `Enter` | Activate the selected item (open / focus / cancel) | | `Tab` | Cycle to next sidebar panel (Work → Tasks → Agents → Context) | +| `Ctrl-X` | Cancel all running background shell jobs when the Tasks panel is focused | | `Esc` | Return focus to composer | ## Slash-command palette (after `Ctrl-K` or typing `/`) diff --git a/docs/RUNTIME_API.md b/docs/RUNTIME_API.md index c14d10f479..4ff805a168 100644 --- a/docs/RUNTIME_API.md +++ b/docs/RUNTIME_API.md @@ -58,6 +58,9 @@ every endpoint documented below is identical across both entrypoints. The runtime API token is read from `--auth-token`, then `CODEWHALE_RUNTIME_TOKEN`, then `DEEPSEEK_RUNTIME_TOKEN`; use `--insecure-no-auth` only with a loopback bind. The `serve` compatibility aliases keep their `--insecure` flag. +The legacy in-process `codewhale app-server` also requires an explicit +`--auth-token` or `CODEWHALE_APP_SERVER_TOKEN` before binding a non-loopback +host; its generated one-time `cwapp_*` token is loopback-only. The `--stdio` control transport is newline-delimited JSON-RPC 2.0. Probe it without spending model tokens: diff --git a/docs/SUBAGENTS.md b/docs/SUBAGENTS.md index bbd371ccab..30e960e5de 100644 --- a/docs/SUBAGENTS.md +++ b/docs/SUBAGENTS.md @@ -13,12 +13,13 @@ model-facing launcher is the single `agent` tool and detached work should converge on the same lifecycle as Agent Fleet. The current `agent` implementation delegates to the durable sub-agent runtime -while that -cutover completes. It can still be useful for short in-session delegation, but -if a child fails once on a transient provider timeout while an equivalent fleet -worker would retry from the ledger, that is a runtime unification gap. For work -that must survive provider hiccups, process restarts, sleep, or remote -execution, prefer Fleet or a WhaleFlow-backed fleet run. +while that cutover completes. It can still be useful for short in-session +delegation. Transient provider header/stream/time-out failures are retried with +backoff inside the child runtime before the worker is marked interrupted; if the +retry budget is exhausted, CodeWhale preserves a checkpoint and returns a +continuation handle instead of leaving the parent to infer what happened. For +work that must survive process restarts, sleep, or remote execution, prefer +Fleet or a WhaleFlow-backed fleet run. Sub-agents inherit the parent's tool registry by default, but child agents are leaf workers: they do not receive `agent` or nested lifecycle tools. `agent` @@ -79,6 +80,25 @@ Use fresh sessions for independent exploration. Use forked sessions when the task depends on decisions, files, todos, or plan state already in the parent transcript. +## Worktree isolation + +For parallel edit lanes, launch the child with `worktree: true`. CodeWhale +creates a fresh git worktree and branch for that child, runs the child from the +isolated checkout, and reports the resulting workspace/branch in the returned +session projection and worker record. By default the branch is +`codex/agent--` and the checkout lives beside the parent repo under +`.codewhale-worktrees/`, so the parent checkout stays clean. + +Optional fields: + +- `worktree_branch`: exact branch to create. +- `worktree_base`: git ref to branch from; defaults to `HEAD`. +- `worktree_path`: exact checkout path. Relative paths stay under the default + sibling `.codewhale-worktrees/` root. + +Do not combine `cwd` with `worktree`; `cwd` remains the manual escape hatch for +an already-created directory inside the parent workspace. + ## Delegation briefs The parent should pass a compact brief instead of a loose paragraph. The current @@ -181,11 +201,12 @@ the next turn. ## Concurrency cap -Up to **20** sub-agents run concurrently by default (configurable via +Up to **20** sub-agents can run concurrently by default (configurable via `[subagents].max_concurrent` in `~/.codewhale/config.toml`; the default equals -the hard ceiling of 20). When the parent hits the cap, `agent` returns an error -with the cap value; the parent should wait for background completion events -before opening more agents, or ask the user. +the hard instantaneous-concurrency ceiling of 20). The session admits a bounded +queue of up to **200** running plus queued sub-agents by default, so a turn can +request broad fan-out and let the manager drain it without creating an +unbounded population. By default every admitted child may start immediately — there is no artificial throttle. If you want gentler fan-out, lower `[subagents].launch_concurrency` @@ -194,10 +215,71 @@ for a launch slot rather than bursting. `launch_concurrency` defaults to the resolved `max_subagents` cap. (The pre-v0.8.61 `interactive_max_launch` key is still accepted as a deprecated alias; the new key wins when both are set.) -The cap counts only **running** agents — completed / failed / -cancelled records persist for inspection but don't occupy a slot. -Agents that lost their `task_handle` (e.g. across a process -restart) also don't count against the cap. +High-fanout Workflows can tune that bounded population with `[subagents] +max_admitted` (aliases: `max_total`, `admission_limit`). That total ceiling +counts both **running** and **queued** agents, while `launch_concurrency` keeps +instantaneous execution bounded. Completed / failed / cancelled records persist +for inspection but don't occupy an admission slot. Agents that lost their +`task_handle` (e.g. across a process restart) also don't count against the cap. + +Provider profiles let one config stay aggressive for direct API routes while +keeping subscription or aggregator routes gentle. Every key under +`[subagents.providers.]` inherits from `[subagents]` when omitted. +Provider keys accept canonical names such as `deepseek`, `zai`, `openrouter`, +and aliases such as `glm` for Z.ai: + +```toml +[subagents] +# Global fallback for providers without a profile. +max_concurrent = 20 +launch_concurrency = 20 +max_admitted = 200 +max_depth = 6 +token_budget = 100000 + +[subagents.providers.deepseek] +# Direct API key with room to fan out. +max_concurrent = 20 +launch_concurrency = 20 +max_admitted = 200 + +[subagents.providers.glm] +# Z.ai / GLM subscription-style route: keep pressure tight. +max_concurrent = 4 +launch_concurrency = 3 +max_admitted = 12 +max_depth = 2 +api_timeout_secs = 180 +heartbeat_timeout_secs = 240 + +[subagents.providers.openrouter] +max_concurrent = 5 +launch_concurrency = 3 +max_admitted = 20 + +[subagents.providers.anthropic] +max_concurrent = 3 +launch_concurrency = 2 +max_admitted = 12 +``` + +Use `/config subagents status` to see both the global values and the active +provider's resolved fanout, depth, and timeout profile. + +## Token budget governor + +Set `[subagents].token_budget` to give each root `agent` run an aggregate +token ceiling shared by that child and all of its descendants. A child can also +start a new scoped budget with the model-facing `agent` tool's +`token_budget` field (the `max_tokens` alias is accepted for Workflow-shaped +callers). When no budget is configured or supplied, behavior is unchanged. + +Provider-reported input and output tokens are folded into the worker record as +each child model call completes. The persisted `usage` object shows the +worker's own totals plus aggregate `budget_spent_tokens` and +`budget_remaining_tokens` for the shared scope. Once the shared scope is +exhausted, further descendant spawns are rejected with an actionable message +instead of opening more agents into a spent pool. ## Per-role models (#3018) @@ -325,8 +407,9 @@ child's assignment no longer fits. Artifacts are symbolic refs. Use `handle_read` on the returned `transcript_handle` for transcript details, and treat `result_summary` as a child self-report unless `verification.status` points to a separate gate or -receipt. `usage.status` is `unknown` until sub-agent token accounting is wired -into the worker ledger. +receipt. `usage.status` is `unknown` until provider usage is reported; then it +switches to `reported`, or `budget_exhausted` when a configured shared token +budget has no remaining tokens. ## Output contract diff --git a/docs/TOOL_SURFACE.md b/docs/TOOL_SURFACE.md index 7bf0cbbed5..ddcc85fd67 100644 --- a/docs/TOOL_SURFACE.md +++ b/docs/TOOL_SURFACE.md @@ -269,7 +269,7 @@ reflect very different cost classes: | Tool | What each child does | Wall-clock | Token cost | Cap | |---|---|---|---|---| -| `agent` | Full sub-agent loop (planning, tool calls, multi-turn streaming) | minutes | thousands of tokens | 10 in flight by default (`[subagents].max_concurrent`, hard ceiling 20) | +| `agent` | Full sub-agent loop (planning, tool calls, multi-turn streaming) | minutes | thousands of tokens | 20 running by default (`[subagents].max_concurrent`, hard ceiling 20), with up to 200 running + queued admitted by default | | `rlm_eval` helper `sub_query_batch` | One-shot non-streaming Chat Completions calls pinned to `deepseek-v4-flash` inside a live RLM session | seconds | ~hundreds of tokens | 16 per call | The caps appear in each tool's description and error messages so the model diff --git a/npm/codewhale/package.json b/npm/codewhale/package.json index ee43c63f7f..74ddcb2237 100644 --- a/npm/codewhale/package.json +++ b/npm/codewhale/package.json @@ -1,7 +1,7 @@ { "name": "codewhale", - "version": "0.8.62", - "codewhaleBinaryVersion": "0.8.62", + "version": "0.8.63", + "codewhaleBinaryVersion": "0.8.63", "description": "Install and run CodeWhale, the agentic terminal for open-source and open-weight coding models, from GitHub release artifacts.", "author": "Hmbown", "license": "MIT", diff --git a/npm/codewhale/scripts/preflight-glibc.js b/npm/codewhale/scripts/preflight-glibc.js index d0fb957d7b..f09622c9bd 100644 --- a/npm/codewhale/scripts/preflight-glibc.js +++ b/npm/codewhale/scripts/preflight-glibc.js @@ -98,14 +98,12 @@ function glibcCompatibilityMessage(required, host) { return [ `Prebuilt CodeWhale Linux binaries require GLIBC_${formatVersion(required)}, but ${hostLine}`, "", - "Official Linux release binaries are GNU libc builds. Ubuntu 22.04 ships glibc 2.35,", - "so it cannot run a binary built against Ubuntu 24.04/glibc 2.39.", + "The Linux x64 release asset is a static (musl) build that runs on any glibc,", + "but the Linux arm64 and riscv64 assets are GNU libc builds linked against", + "Ubuntu 24.04/glibc 2.39, which Ubuntu 22.04 (glibc 2.35) cannot run.", "", buildFromSourceHint(), "", - "Release engineering follow-up: build Linux GNU assets against an older glibc", - "baseline, or add a musl/static Linux asset.", - "", "Set CODEWHALE_SKIP_GLIBC_CHECK=1 to bypass this check at your own risk.", ].join("\n"); } diff --git a/npm/codewhale/test/install.test.js b/npm/codewhale/test/install.test.js index 0c7ed3ffb3..2c0f363b69 100644 --- a/npm/codewhale/test/install.test.js +++ b/npm/codewhale/test/install.test.js @@ -118,7 +118,8 @@ test("glibc preflight message is CodeWhale-branded and actionable", () => { assert.match(message, /Prebuilt CodeWhale Linux binaries require GLIBC_2\.39/); assert.match(message, /this system has glibc 2\.35/); assert.match(message, /cargo install codewhale-cli --locked/); - assert.match(message, /build Linux GNU assets against an older glibc/); + assert.match(message, /Linux x64 release asset is a static \(musl\) build/); + assert.match(message, /arm64 and riscv64 assets are GNU libc builds/); assert.match(message, /CODEWHALE_SKIP_GLIBC_CHECK=1/); }); diff --git a/scripts/benchmarks/README.md b/scripts/benchmarks/README.md index df0bd92e43..e9c2699c02 100644 --- a/scripts/benchmarks/README.md +++ b/scripts/benchmarks/README.md @@ -22,6 +22,14 @@ python scripts/benchmarks/cli-compare.py \ --task prove-plus-comm \ --model deepseek/deepseek-chat +# Local release artifact vs direct baselines on Terminal-Bench sample +export CODEWHALE_LINUX_BIN=/path/to/codewhale-linux-x64-0.8.63 +export CODEWHALE_TUI_LINUX_BIN=/path/to/codewhale-tui-linux-x64-0.8.63 +python scripts/benchmarks/run-codewhale-terminal-bench.py \ + --dry-run \ + --task build-cython-ext \ + --model deepseek/deepseek-v4-flash + # PinchBench (auto-install + run) ./scripts/benchmarks/run-pinchbench.sh \ --install \ @@ -32,13 +40,52 @@ python scripts/benchmarks/cli-compare.py \ - `run-swebench.sh` — SWE-bench batch driver and evaluator - `run-terminal-bench.sh` — Terminal-Bench runner via Harbor +- `run-codewhale-terminal-bench.py` — Terminal-Bench runner for explicit + local Linux CodeWhale release artifacts +- `run-deepseek-direct-terminal-bench.py` — thin direct DeepSeek API baseline +- `run-mini-swe-terminal-bench.py` — stock mini-swe-agent Terminal-Bench + baseline - `run-pinchbench.sh` — PinchBench runner with auto-install - `cli-compare.py` — CodeWhale/Codex Terminal-Bench comparison harness - `harbor/__init__.py` — Harbor adapter for CodeWhale (Python) - `harbor/codewhale_agent.py` — Adapter entry point +- `harbor/codewhale_local_agent.py` — Adapter that uploads explicit local + Linux CodeWhale artifacts into Harbor task containers +- `harbor/deepseek_direct_agent.py` — Direct DeepSeek chat-completions + baseline with minimal shell/file tools - `harbor/codex_agent.py` — Codex adapter for paired CLI comparisons ## Documentation See [docs/BENCHMARKS.md](../../docs/BENCHMARKS.md) for full setup instructions, reproducibility checklists, and references. + +## Terminal-Bench Harness Diagnostics + +The local CodeWhale Terminal-Bench adapter runs an artifact preflight inside +each task container before the agent starts: + +```bash +codewhale --version +ldd "$(command -v codewhale)" +/lib/x86_64-linux-gnu/libc.so.6 || true +``` + +Rows with loader, glibc, OpenSSL, or related library failures are classified as +`artifact_incompatible` instead of model failures. The adapter also injects a +compact harness note listing detected verifier surfaces, task-specific +readiness probes when known, background service helpers, and timeout classes. + +Summary rows include one primary `failure_class`: + +```text +solved +model_wrong_answer +tool_policy_loop +artifact_incompatible +setup_timeout +background_not_ready +verifier_environment_failure +context_exhaustion +harness_exception +``` diff --git a/scripts/benchmarks/harbor/codewhale_local_agent.py b/scripts/benchmarks/harbor/codewhale_local_agent.py new file mode 100644 index 0000000000..b0aaae3238 --- /dev/null +++ b/scripts/benchmarks/harbor/codewhale_local_agent.py @@ -0,0 +1,491 @@ +"""Harbor adapter that runs a local CodeWhale Linux binary artifact. + +The stock CodeWhale Harbor adapter installs from npm, but npm may lag the local +release branch. This adapter uploads explicit Linux binaries into each +Terminal-Bench task container so benchmark rows identify the intended local +build. +""" + +from __future__ import annotations + +import os +import shlex +from pathlib import Path, PurePosixPath + +from harbor.agents.installed.base import BaseInstalledAgent, CliFlag, with_prompt_template +from harbor.environments.base import BaseEnvironment +from harbor.models.agent.context import AgentContext +from harbor.models.trial.paths import EnvironmentPaths + +CODEWHALE_LINUX_BIN_ENV = "CODEWHALE_LINUX_BIN" +CODEWHALE_TUI_LINUX_BIN_ENV = "CODEWHALE_TUI_LINUX_BIN" +HARNESS_LIBRARY = "/usr/local/lib/codewhale-bench-harness.sh" +HARNESS_TIMEOUTS = { + "default_command_s": 30, + "build_command_s": 300, + "background_start_s": 600, + "readiness_probe_s": 120, + "verifier_s": 900, +} +TASK_READINESS_PROBES = { + "configure-git-webserver": ( + "curl -fsS http://127.0.0.1:8080/ >/dev/null && " + "rm -rf /tmp/codewhale-readiness-git-probe && " + "git clone http://127.0.0.1:8080/repo.git /tmp/codewhale-readiness-git-probe" + ), + "qemu-alpine-ssh": ( + "timeout 20 bash -lc 'printf \"\\n\" | nc -w 5 127.0.0.1 6665 | " + "grep -Ei \"login:|localhost login\"'" + ), + "qemu-startup": ( + "timeout 20 bash -lc 'printf \"\\n\" | nc -w 5 127.0.0.1 6665 | " + "grep -Ei \"login:|localhost login\"'" + ), +} + + +HARNESS_LIBRARY_BODY = r"""#!/usr/bin/env bash +# Shell helpers exposed to benchmark agents. They keep background service +# lifecycle and readiness probes consistent across Terminal-Bench tasks. + +codewhale_background_root() { + local root="${CODEWHALE_BACKGROUND_ROOT:-/tmp/codewhale-background}" + mkdir -p "$root" + printf '%s\n' "$root" +} + +start_background() { + local command="$1" + local name="$2" + local ready_probe="${3:-}" + local timeout_s="${4:-600}" + local root log pid_file pid + root="$(codewhale_background_root)" + log="$root/$name.log" + pid_file="$root/$name.pid" + if [[ -s "$pid_file" ]] && kill -0 "$(cat "$pid_file")" 2>/dev/null; then + printf 'background_already_running name=%s pid=%s log=%s\n' "$name" "$(cat "$pid_file")" "$log" + else + rm -f "$log" + setsid bash -lc "$command" >"$log" 2>&1 < /dev/null & + pid="$!" + printf '%s\n' "$pid" >"$pid_file" + printf 'background_started name=%s pid=%s log=%s\n' "$name" "$pid" "$log" + fi + if [[ -n "$ready_probe" ]]; then + assert_ready "$name" "$ready_probe" "$timeout_s" + fi +} + +read_background_log() { + local name="$1" + local since="${2:-200}" + local root log + root="$(codewhale_background_root)" + log="$root/$name.log" + if [[ ! -f "$log" ]]; then + printf 'background_log_missing name=%s log=%s\n' "$name" "$log" >&2 + return 1 + fi + tail -n "$since" "$log" +} + +stop_background() { + local name="$1" + local root pid_file pid + root="$(codewhale_background_root)" + pid_file="$root/$name.pid" + if [[ ! -s "$pid_file" ]]; then + printf 'background_not_running name=%s\n' "$name" + return 0 + fi + pid="$(cat "$pid_file")" + if kill -0 "$pid" 2>/dev/null; then + kill "-$pid" 2>/dev/null || kill "$pid" 2>/dev/null || true + sleep 1 + kill -9 "-$pid" 2>/dev/null || kill -9 "$pid" 2>/dev/null || true + fi + rm -f "$pid_file" + printf 'background_stopped name=%s pid=%s\n' "$name" "$pid" +} + +assert_ready() { + local name="$1" + local ready_probe="$2" + local timeout_s="${3:-120}" + local deadline=$((SECONDS + timeout_s)) + until bash -lc "$ready_probe"; do + if (( SECONDS >= deadline )); then + printf 'background_not_ready name=%s timeout_s=%s probe=%s\n' "$name" "$timeout_s" "$ready_probe" >&2 + read_background_log "$name" 120 >&2 || true + return 124 + fi + sleep 2 + done + printf 'background_ready name=%s probe=%s\n' "$name" "$ready_probe" +} +""" + + +class CodeWhaleLocalAgent(BaseInstalledAgent): + """Run CodeWhale from host-built Linux binaries inside a Harbor task.""" + + _OUTPUT_FILENAME = "codewhale.txt" + _REMOTE_BIN = "/usr/local/bin/codewhale" + _REMOTE_TUI_BIN = "/usr/local/bin/codewhale-tui" + + CLI_FLAGS = [ + CliFlag("max_subagents", cli="--max-subagents", type="int", default=None), + ] + + def __init__( + self, + *args, + local_binary_path: str | None = None, + local_tui_binary_path: str | None = None, + provider: str | None = None, + reasoning_effort: str | None = None, + **kwargs, + ): + super().__init__(*args, **kwargs) + self._local_binary_path = self._resolve_local_path( + local_binary_path, + CODEWHALE_LINUX_BIN_ENV, + ) + self._local_tui_binary_path = self._resolve_local_path( + local_tui_binary_path, + CODEWHALE_TUI_LINUX_BIN_ENV, + ) + self._provider_override = provider + self._reasoning_effort = self._normalize_reasoning_effort(reasoning_effort) + + @staticmethod + def _resolve_local_path(explicit: str | None, env_key: str) -> Path | None: + value = explicit or os.environ.get(env_key) + if value and value.strip(): + return Path(value.strip()).expanduser() + return None + + @staticmethod + def name() -> str: + return "codewhale-local" + + def get_version_command(self) -> str | None: + return f"{self._REMOTE_BIN} --version" + + def parse_version(self, stdout: str) -> str: + text = stdout.strip() + for line in text.splitlines(): + line = line.strip() + if line: + for prefix in ("codewhale-tui ", "codewhale-cli ", "codewhale "): + if line.lower().startswith(prefix): + return line[len(prefix) :] + return line + return text + + async def install(self, environment: BaseEnvironment) -> None: + if self._local_binary_path is None: + raise FileNotFoundError( + "CodeWhale Linux binary path is required; pass " + "local_binary_path=... or set CODEWHALE_LINUX_BIN." + ) + if self._local_tui_binary_path is None: + raise FileNotFoundError( + "CodeWhale TUI Linux binary path is required; pass " + "local_tui_binary_path=... or set CODEWHALE_TUI_LINUX_BIN." + ) + if not self._local_binary_path.is_file(): + raise FileNotFoundError(f"CodeWhale Linux binary not found: {self._local_binary_path}") + if not self._local_tui_binary_path.is_file(): + raise FileNotFoundError( + f"CodeWhale TUI Linux binary not found: {self._local_tui_binary_path}" + ) + + await self.exec_as_root( + environment, + command=( + "if command -v apt-get >/dev/null 2>&1; then " + "apt-get update && " + "ssl_pkg=''; " + "if apt-cache show libssl3 >/dev/null 2>&1; then ssl_pkg=libssl3; " + "elif apt-cache show libssl1.1 >/dev/null 2>&1; then ssl_pkg=libssl1.1; fi; " + "DEBIAN_FRONTEND=noninteractive apt-get install -y " + "--no-install-recommends bash ca-certificates git ripgrep libdbus-1-3 $ssl_pkg; " + "elif command -v apk >/dev/null 2>&1; then " + "apk add --no-cache bash ca-certificates git ripgrep openssl dbus-libs; " + "fi" + ), + ) + await environment.upload_file(self._local_binary_path, self._REMOTE_BIN) + await environment.upload_file(self._local_tui_binary_path, self._REMOTE_TUI_BIN) + await self._install_harness_library(environment) + await self.exec_as_root( + environment, + command=( + f"chmod 755 {self._REMOTE_BIN} {self._REMOTE_TUI_BIN} && " + f"ln -sf {self._REMOTE_BIN} /usr/local/bin/codew && " + f"{self._REMOTE_BIN} --version && {self._REMOTE_TUI_BIN} --version" + ), + ) + await self._run_artifact_preflight(environment) + + async def _install_harness_library(self, environment: BaseEnvironment) -> None: + quoted_body = shlex.quote(HARNESS_LIBRARY_BODY) + await self.exec_as_root( + environment, + command=( + "mkdir -p /usr/local/lib && " + f"printf %s {quoted_body} > {shlex.quote(HARNESS_LIBRARY)} && " + f"chmod 644 {shlex.quote(HARNESS_LIBRARY)}" + ), + ) + + async def _run_artifact_preflight(self, environment: BaseEnvironment) -> None: + agent_dir = shlex.quote(EnvironmentPaths.agent_dir.as_posix()) + preflight_path = shlex.quote( + PurePosixPath(EnvironmentPaths.agent_dir / "codewhale-artifact-preflight.txt").as_posix() + ) + await self.exec_as_root( + environment, + command=( + f"mkdir -p {agent_dir}; " + "set +e; " + "{ " + "echo '$ codewhale --version'; " + f"{self._REMOTE_BIN} --version; version_status=$?; " + "echo '$ ldd \"$(command -v codewhale)\"'; " + "ldd \"$(command -v codewhale)\" || true; " + "echo '$ /lib/x86_64-linux-gnu/libc.so.6 || true'; " + "/lib/x86_64-linux-gnu/libc.so.6 || true; " + "exit $version_status; " + f"}} > {preflight_path} 2>&1; " + "status=$?; " + f"cat {preflight_path}; " + "if [ $status -ne 0 ] || " + f"grep -Eiq 'error while loading shared libraries|GLIBC_[0-9]|version .* not found|libssl[^[:space:]]*.*not found|libcrypto[^[:space:]]*.*not found|libdbus[^[:space:]]*.*not found|OpenSSL.*(not found|incompatible)' {preflight_path}; " + "then " + "echo 'artifact_incompatible: CodeWhale Linux artifact failed container preflight' >&2; " + "exit 86; " + "fi" + ), + ) + + def _provider_and_model(self) -> tuple[str, str]: + raw = self.model_name or "deepseek/deepseek-v4-flash" + if "/" in raw: + provider, model = raw.split("/", 1) + else: + provider, model = "deepseek", raw + if self._provider_override: + provider = self._provider_override + if provider == "openai-compatible": + provider = "openai" + return provider, model + + @staticmethod + def _normalize_reasoning_effort(reasoning_effort: str | None) -> str | None: + if reasoning_effort is None: + return None + normalized = reasoning_effort.strip().lower() + aliases = { + "none": "off", + "disabled": "off", + "false": "off", + "medium": "high", + "mid": "high", + "maximum": "max", + "xhigh": "max", + "ultracode": "max", + } + normalized = aliases.get(normalized, normalized) + if normalized not in {"off", "high", "max"}: + raise ValueError( + "reasoning_effort must be one of off, high, or max " + f"(got {reasoning_effort!r})" + ) + return normalized + + @staticmethod + def _context_task_name(context: AgentContext) -> str | None: + for attr in ("task_name", "name", "id"): + value = getattr(context, attr, None) + if isinstance(value, str) and value.strip(): + return value.strip() + task = getattr(context, "task", None) + if task is not None: + for attr in ("name", "task_name", "id"): + value = getattr(task, attr, None) + if isinstance(value, str) and value.strip(): + return value.strip() + return None + + @staticmethod + def _readiness_probe_for_task(task_name: str | None) -> str | None: + if not task_name: + return None + normalized = task_name.strip().lower() + for key, probe in TASK_READINESS_PROBES.items(): + if key in normalized: + return probe + return None + + async def _detect_verifier_surfaces( + self, + environment: BaseEnvironment, + env: dict[str, str], + workspace: str, + ) -> list[str]: + result = await self.exec_as_agent( + environment, + command=( + "set +e; " + "for path in /tests ./tests ./tests/verify.sh task.yaml pytest.ini pyproject.toml setup.cfg tox.ini README.md README.rst README.txt; do " + "[ -e \"$path\" ] && printf '%s\\n' \"$path\"; " + "done; " + "find . -maxdepth 2 -type f \\( -name 'test_*.py' -o -name '*_test.py' -o -name 'Makefile' \\) -print 2>/dev/null | head -n 12" + ), + env=env, + cwd=workspace, + ) + seen: set[str] = set() + surfaces: list[str] = [] + for line in (result.stdout or "").splitlines(): + item = line.strip() + if item and item not in seen: + surfaces.append(item) + seen.add(item) + return surfaces[:16] + + @staticmethod + def _harness_note( + verifier_surfaces: list[str], + task_name: str | None, + readiness_probe: str | None, + ) -> str: + lines = [ + "Benchmark harness note:", + f"- Background service helpers are available with: source {HARNESS_LIBRARY}", + "- Helpers: start_background COMMAND NAME READY_PROBE TIMEOUT_S; read_background_log NAME [LINES]; stop_background NAME; assert_ready NAME READY_PROBE TIMEOUT_S.", + "- Timeout classes: default commands 30s, build commands 300s, background starts 600s, readiness probes 120s, verifiers 900s.", + ] + if task_name: + lines.append(f"- Task name: {task_name}") + if readiness_probe: + lines.append(f"- Task readiness probe: {readiness_probe}") + if verifier_surfaces: + lines.append("- Detected verifier/test surfaces:") + lines.extend(f" - {surface}" for surface in verifier_surfaces) + else: + lines.append("- Detected verifier/test surfaces: none from the standard quick scan.") + return "\n".join(lines) + + @staticmethod + def _key_env_for_provider(provider: str) -> str: + return { + "deepseek": "DEEPSEEK_API_KEY", + "openrouter": "OPENROUTER_API_KEY", + "openai": "OPENAI_API_KEY", + "zai": "ZAI_API_KEY", + "z-ai": "ZAI_API_KEY", + }.get(provider, f"{provider.replace('-', '_').upper()}_API_KEY") + + @with_prompt_template + async def run( + self, + instruction: str, + environment: BaseEnvironment, + context: AgentContext, + ) -> None: + provider, model = self._provider_and_model() + key_env = self._key_env_for_provider(provider) + api_key = self._get_env(key_env) + if not api_key: + raise ValueError(f"{key_env} is required for CodeWhale {provider} runs") + + pwd = await self.exec_as_agent(environment, "pwd") + workspace = (pwd.stdout or "/workspace").strip() or "/workspace" + task_name = self._context_task_name(context) + readiness_probe = self._readiness_probe_for_task(task_name) + output_path = PurePosixPath(EnvironmentPaths.agent_dir / self._OUTPUT_FILENAME) + harness_note_path = PurePosixPath(EnvironmentPaths.agent_dir / "codewhale-harness-note.txt") + cli_flags = self.build_cli_flags() + extra_flags = f"{cli_flags} " if cli_flags else "" + config_path = PurePosixPath("/tmp/codewhale-home/config.toml") + config_arg = ( + f"--config {shlex.quote(config_path.as_posix())} " + if self._reasoning_effort + else "" + ) + + env: dict[str, str] = { + key_env: api_key, + "AWS_LC_SYS_NO_ASM": "1", + "CODEWHALE_HOME": "/tmp/codewhale-home", + "CODEWHALE_PROVIDER": provider, + "CODEWHALE_MODEL": model, + } + for name in ("DEEPSEEK_BASE_URL", "CODEWHALE_BASE_URL", "OPENROUTER_BASE_URL"): + value = self._get_env(name) + if value: + env[name] = value + + verifier_surfaces = await self._detect_verifier_surfaces(environment, env, workspace) + harness_note = self._harness_note(verifier_surfaces, task_name, readiness_probe) + + escaped_instruction = shlex.quote(f"{harness_note}\n\n{instruction}") + config_lines = [ + f'provider = "{provider}"', + f'default_text_model = "{model}"', + ] + if self._reasoning_effort: + config_lines.append(f'reasoning_effort = "{self._reasoning_effort}"') + write_config = "printf '%s\\n' " + " ".join( + shlex.quote(line) for line in config_lines + ) + f" > {shlex.quote(config_path.as_posix())}" + await self.exec_as_agent( + environment, + command=( + f"mkdir -p {shlex.quote(EnvironmentPaths.agent_dir.as_posix())} " + '"/tmp/codewhale-home" && ' + f"{write_config} && " + f"printf '%s\\n' {shlex.quote(harness_note)} > {shlex.quote(harness_note_path.as_posix())}" + ), + env=env, + cwd=workspace, + ) + await self.exec_as_agent( + environment, + command=( + "set +e; " + f"{self._REMOTE_BIN} " + f"{config_arg}" + f"--provider {shlex.quote(provider)} " + f"--model {shlex.quote(model)} " + f"--workspace {shlex.quote(workspace)} " + "--yolo " + "exec --auto --output-format stream-json " + f"{extra_flags}" + f"-- {escaped_instruction} " + f"2>&1 None: + task_name = self._context_task_name(context) + metadata = { + "task_name": task_name, + "readiness_probe": self._readiness_probe_for_task(task_name), + "harness_timeouts": HARNESS_TIMEOUTS, + "harness_note_path": str(self.logs_dir / "codewhale-harness-note.txt"), + } + output_path = self.logs_dir / self._OUTPUT_FILENAME + if output_path.exists(): + metadata["codewhale_log"] = str(output_path) + metadata["reasoning_effort"] = self._reasoning_effort + context.metadata = metadata diff --git a/scripts/benchmarks/harbor/deepseek_direct_agent.py b/scripts/benchmarks/harbor/deepseek_direct_agent.py new file mode 100644 index 0000000000..eab924e285 --- /dev/null +++ b/scripts/benchmarks/harbor/deepseek_direct_agent.py @@ -0,0 +1,335 @@ +"""Thin Harbor agent that calls DeepSeek directly with shell/file tools. + +This is a deliberately small baseline for CodeWhale-vs-API comparisons. It +does not install an agent in the task container; the Harbor adapter calls +DeepSeek's OpenAI-compatible chat-completions endpoint from the host and uses +Harbor environment operations for the only two exposed tools. +""" + +from __future__ import annotations + +import asyncio +import base64 +import json +import os +import shlex +import urllib.error +import urllib.request +from pathlib import PurePosixPath +from typing import Any + +from harbor.agents.base import BaseAgent +from harbor.environments.base import BaseEnvironment +from harbor.models.agent.context import AgentContext + + +class DeepSeekDirectAgent(BaseAgent): + """Direct DeepSeek API baseline with a minimal tool loop.""" + + _OUTPUT_FILENAME = "direct-deepseek.jsonl" + + def __init__( + self, + *args: Any, + reasoning_effort: str | None = None, + max_steps: int = 24, + max_tokens: int = 4096, + base_url: str | None = None, + **kwargs: Any, + ) -> None: + super().__init__(*args, **kwargs) + self._reasoning_effort = self._normalize_reasoning_effort(reasoning_effort) + self._max_steps = int(max_steps) + self._max_tokens = int(max_tokens) + self._base_url = ( + base_url + or os.environ.get("DEEPSEEK_BASE_URL") + or os.environ.get("CODEWHALE_BASE_URL") + or "https://api.deepseek.com/beta" + ).rstrip("/") + self._input_tokens = 0 + self._output_tokens = 0 + self._cache_tokens = 0 + self._reasoning_tokens = 0 + + @staticmethod + def name() -> str: + return "deepseek-direct" + + def version(self) -> str | None: + return "direct-chat-completions" + + async def setup(self, environment: BaseEnvironment) -> None: + return None + + @staticmethod + def _normalize_reasoning_effort(reasoning_effort: str | None) -> str | None: + if reasoning_effort is None: + return None + normalized = reasoning_effort.strip().lower() + aliases = { + "none": "off", + "disabled": "off", + "false": "off", + "medium": "high", + "mid": "high", + "maximum": "max", + "xhigh": "max", + "ultracode": "max", + } + normalized = aliases.get(normalized, normalized) + if normalized not in {"off", "high", "max"}: + raise ValueError( + "reasoning_effort must be one of off, high, or max " + f"(got {reasoning_effort!r})" + ) + return normalized + + def _provider_and_model(self) -> tuple[str, str]: + raw = self.model_name or "deepseek/deepseek-v4-flash" + if "/" in raw: + provider, model = raw.split("/", 1) + else: + provider, model = "deepseek", raw + return provider, model + + @staticmethod + def _tools() -> list[dict[str, Any]]: + return [ + { + "type": "function", + "function": { + "name": "exec_shell", + "description": "Run a shell command in the task workspace.", + "parameters": { + "type": "object", + "properties": { + "command": {"type": "string"}, + "timeout_sec": { + "type": "integer", + "minimum": 1, + "maximum": 600, + }, + }, + "required": ["command"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "write_file", + "description": "Write UTF-8 text to a file in the task container.", + "parameters": { + "type": "object", + "properties": { + "path": {"type": "string"}, + "content": {"type": "string"}, + }, + "required": ["path", "content"], + }, + }, + }, + ] + + def _payload(self, messages: list[dict[str, Any]], require_tool: bool = False) -> dict[str, Any]: + _, model = self._provider_and_model() + payload: dict[str, Any] = { + "model": model, + "messages": messages, + "tools": self._tools(), + "temperature": 0, + "max_tokens": self._max_tokens, + "stream": False, + } + if self._reasoning_effort == "off": + payload["tool_choice"] = "required" if require_tool else "auto" + payload["thinking"] = {"type": "disabled"} + elif self._reasoning_effort: + # DeepSeek thinking mode rejects explicit tool_choice, including + # "required"; omit it and let the model choose from the tool list. + payload["reasoning_effort"] = self._reasoning_effort + payload["thinking"] = {"type": "enabled"} + else: + payload["tool_choice"] = "required" if require_tool else "auto" + return payload + + def _api_key(self) -> str: + key = os.environ.get("DEEPSEEK_API_KEY") + if not key: + raise ValueError("DEEPSEEK_API_KEY is required") + return key + + async def _call_deepseek( + self, messages: list[dict[str, Any]], require_tool: bool = False + ) -> dict[str, Any]: + payload = self._payload(messages, require_tool=require_tool) + + def post() -> dict[str, Any]: + request = urllib.request.Request( + f"{self._base_url}/chat/completions", + data=json.dumps(payload).encode("utf-8"), + headers={ + "Authorization": f"Bearer {self._api_key()}", + "Content-Type": "application/json", + }, + method="POST", + ) + try: + with urllib.request.urlopen(request, timeout=300) as response: + return json.loads(response.read().decode("utf-8")) + except urllib.error.HTTPError as exc: + body = exc.read().decode("utf-8", errors="replace") + raise RuntimeError(f"DeepSeek HTTP {exc.code}: {body}") from exc + + return await asyncio.to_thread(post) + + def _record_usage(self, response: dict[str, Any]) -> None: + usage = response.get("usage") + if not isinstance(usage, dict): + return + self._input_tokens += int(usage.get("prompt_tokens") or usage.get("input_tokens") or 0) + self._output_tokens += int( + usage.get("completion_tokens") or usage.get("output_tokens") or 0 + ) + prompt_details = usage.get("prompt_tokens_details") + if isinstance(prompt_details, dict): + self._cache_tokens += int(prompt_details.get("cached_tokens") or 0) + completion_details = usage.get("completion_tokens_details") + if isinstance(completion_details, dict): + self._reasoning_tokens += int(completion_details.get("reasoning_tokens") or 0) + + def _log(self, obj: dict[str, Any]) -> None: + self.logs_dir.mkdir(parents=True, exist_ok=True) + with (self.logs_dir / self._OUTPUT_FILENAME).open("a", encoding="utf-8") as handle: + handle.write(json.dumps(obj, ensure_ascii=False, sort_keys=True) + "\n") + + @staticmethod + def _compact_exec_result(stdout: str | None, stderr: str | None, code: int) -> str: + out = stdout or "" + err = stderr or "" + text = f"exit_code={code}\nstdout:\n{out}\nstderr:\n{err}" + if len(text) > 12000: + return text[:12000] + "\n...[truncated]" + return text + + async def _run_tool( + self, + tool_name: str, + arguments: dict[str, Any], + environment: BaseEnvironment, + workspace: str, + ) -> str: + if tool_name == "exec_shell": + command = str(arguments.get("command") or "") + timeout_sec = int(arguments.get("timeout_sec") or 120) + timeout_sec = max(1, min(timeout_sec, 600)) + result = await environment.exec( + command, + cwd=workspace, + timeout_sec=timeout_sec, + ) + return self._compact_exec_result(result.stdout, result.stderr, result.return_code) + + if tool_name == "write_file": + path = str(arguments.get("path") or "") + content = str(arguments.get("content") or "") + if not path: + return "error: missing path" + encoded = base64.b64encode(content.encode("utf-8")).decode("ascii") + parent = PurePosixPath(path).parent.as_posix() + command = ( + f"mkdir -p {shlex.quote(parent)} && " + f"printf %s {shlex.quote(encoded)} | base64 -d > {shlex.quote(path)}" + ) + result = await environment.exec(command, cwd=workspace, timeout_sec=60) + return self._compact_exec_result(result.stdout, result.stderr, result.return_code) + + return f"error: unknown tool {tool_name}" + + async def run( + self, + instruction: str, + environment: BaseEnvironment, + context: AgentContext, + ) -> None: + pwd = await environment.exec("pwd", timeout_sec=10) + workspace = (pwd.stdout or "/app").strip() or "/app" + system = ( + "You are a terminal coding agent inside a benchmark container. " + "Use the provided tools to inspect files, run commands, and write the required artifacts. " + "The benchmark only grades files and container state, not prose. " + "Do not answer with an explanation when a file must be saved. " + "If the task asks to save a file, call write_file with the exact requested path. " + "Complete the task directly; when the required file or state is done, reply with DONE." + ) + messages: list[dict[str, Any]] = [ + {"role": "system", "content": system}, + {"role": "user", "content": instruction}, + ] + + for step in range(self._max_steps): + require_tool = step == 0 or ( + messages[-1].get("role") == "user" + and "did not call a tool" in str(messages[-1].get("content", "")) + ) + response = await self._call_deepseek(messages, require_tool=require_tool) + self._record_usage(response) + self._log({"type": "response", "step": step, "response": response}) + choice = (response.get("choices") or [{}])[0] + message = choice.get("message") or {} + tool_calls = message.get("tool_calls") or [] + messages.append(message) + if not tool_calls: + if "DONE" in str(message.get("content") or "").upper(): + break + if step < self._max_steps - 1: + messages.append( + { + "role": "user", + "content": ( + "You did not call a tool. This benchmark will fail unless " + "you create the required artifact in the container. Use " + "write_file or exec_shell now; do not continue in prose." + ), + } + ) + continue + break + for tool_call in tool_calls: + function = tool_call.get("function") or {} + tool_name = function.get("name") or "" + raw_args = function.get("arguments") or "{}" + try: + arguments = json.loads(raw_args) if isinstance(raw_args, str) else raw_args + except json.JSONDecodeError: + arguments = {"command": str(raw_args)} + if not isinstance(arguments, dict): + arguments = {} + output = await self._run_tool(tool_name, arguments, environment, workspace) + self._log( + { + "type": "tool_result", + "step": step, + "tool_call_id": tool_call.get("id"), + "tool_name": tool_name, + "arguments": arguments, + "output": output, + } + ) + messages.append( + { + "role": "tool", + "tool_call_id": tool_call.get("id"), + "content": output, + } + ) + + context.n_input_tokens = self._input_tokens + context.n_output_tokens = self._output_tokens + context.n_cache_tokens = self._cache_tokens + context.metadata = { + "direct_deepseek_log": str(self.logs_dir / self._OUTPUT_FILENAME), + "reasoning_effort": self._reasoning_effort, + "reasoning_tokens": self._reasoning_tokens, + } diff --git a/scripts/benchmarks/run-codewhale-terminal-bench.py b/scripts/benchmarks/run-codewhale-terminal-bench.py new file mode 100644 index 0000000000..165f82a9a3 --- /dev/null +++ b/scripts/benchmarks/run-codewhale-terminal-bench.py @@ -0,0 +1,800 @@ +#!/usr/bin/env python3 +"""Run CodeWhale local artifacts on Terminal-Bench through Harbor. + +This harness is intentionally local and evidence-oriented: + +- it benchmarks explicit Linux CodeWhale binaries, not the npm package; +- it loads provider credentials into the Harbor subprocess environment only; +- it writes compact summaries from Harbor result JSON and CodeWhale stream logs. +""" + +from __future__ import annotations + +import argparse +import json +import os +import re +import subprocess +import sys +import time +import tomllib +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +SCRIPT = Path(__file__).resolve() +REPO_ROOT = SCRIPT.parents[2] + +DEFAULT_DATASET = "terminal-bench-sample@2.0" +DEFAULT_AGENT = "scripts.benchmarks.harbor.codewhale_local_agent:CodeWhaleLocalAgent" +DEFAULT_RESULTS_ROOT = REPO_ROOT / "benchmark_results" / "tbench-codewhale" +CODEWHALE_LINUX_BIN_ENV = "CODEWHALE_LINUX_BIN" +CODEWHALE_TUI_LINUX_BIN_ENV = "CODEWHALE_TUI_LINUX_BIN" +DEFAULT_MODELS = ["deepseek/deepseek-v4-flash", "deepseek/deepseek-v4-pro"] +DEFAULT_TASKS = [ + "build-cython-ext", + "chess-best-move", + "configure-git-webserver", + "fix-code-vulnerability", + "log-summary-date-ranges", + "polyglot-c-py", + "qemu-alpine-ssh", + "qemu-startup", + "regex-log", + "sqlite-with-gcov", +] +DEFAULT_DEEPSEEK_BASE_URL = "https://api.deepseek.com/beta" +EXPLICIT_REASONING_EFFORTS = ("off", "high", "max") +FAILURE_CLASSES = ( + "solved", + "model_wrong_answer", + "tool_policy_loop", + "artifact_incompatible", + "setup_timeout", + "background_not_ready", + "verifier_environment_failure", + "context_exhaustion", + "harness_exception", +) +HARNESS_TIMEOUTS = { + "default_command_s": 30, + "build_command_s": 300, + "background_start_s": 600, + "readiness_probe_s": 120, + "verifier_s": 900, +} +ARTIFACT_PREFLIGHT_COMMANDS = [ + "codewhale --version", + 'ldd "$(command -v codewhale)"', + "/lib/x86_64-linux-gnu/libc.so.6 || true", +] +TASK_READINESS_PROBES = { + "configure-git-webserver": ( + "curl -fsS http://127.0.0.1:8080/ >/dev/null && " + "rm -rf /tmp/codewhale-readiness-git-probe && " + "git clone http://127.0.0.1:8080/repo.git /tmp/codewhale-readiness-git-probe" + ), + "qemu-alpine-ssh": ( + "timeout 20 bash -lc 'printf \"\\n\" | nc -w 5 127.0.0.1 6665 | " + "grep -Ei \"login:|localhost login\"'" + ), + "qemu-startup": ( + "timeout 20 bash -lc 'printf \"\\n\" | nc -w 5 127.0.0.1 6665 | " + "grep -Ei \"login:|localhost login\"'" + ), +} +KNOWN_MODEL_TOOLS = ( + "grep_files", + "read_file", + "write_file", + "edit_file", + "exec_shell", + "apply_patch", + "list_dir", + "find_files", +) +TOOL_POLICY_LOOP_THRESHOLD = 3 +DENIAL_TERMS = ( + "denied", + "not allowed", + "not available", + "blocked", + "forbidden", + "tool policy", + "use a different tool", + "stop using", +) +ARTIFACT_INCOMPATIBLE_RE = re.compile( + r"artifact_incompatible|error while loading shared libraries|" + r"glibc_[0-9]|version `?glibc|version .* not found|" + r"libssl[^\\n]*not found|libcrypto[^\\n]*not found|libdbus[^\\n]*not found|" + r"openssl[^\\n]*(?:not found|incompatible)", + re.IGNORECASE, +) +BACKGROUND_NOT_READY_RE = re.compile( + r"background_not_ready|readiness probe failed|timed out waiting for .*ready|" + r"connection refused|service .*not ready", + re.IGNORECASE, +) +VERIFIER_ENVIRONMENT_RE = re.compile( + r"verifier_environment_failure|verifier .*environment|grader .*environment|" + r"tests?/verify\\.sh: .*not found|pytest: command not found", + re.IGNORECASE, +) +CONTEXT_EXHAUSTION_RE = re.compile( + r"context_exhaustion|context window|maximum context|token limit|context length", + re.IGNORECASE, +) + + +def stable_path(path: Path) -> str: + try: + return str(path.relative_to(REPO_ROOT)) + except ValueError: + return str(path) + + +def provider_from_model(model: str) -> str: + return model.split("/", 1)[0] if "/" in model else "deepseek" + + +def label_for_model(model: str, reasoning_effort: str | None) -> str: + return f"{model}@{reasoning_effort or 'default'}" + + +def readiness_probe_for_task(task: str | None) -> str | None: + if not task: + return None + normalized = task.strip().lower() + for task_key, probe in TASK_READINESS_PROBES.items(): + if task_key in normalized: + return probe + return None + + +def task_harness_metadata(tasks: list[str]) -> dict[str, dict[str, Any]]: + return { + task: { + "readiness_probe": readiness_probe_for_task(task), + "timeout_policy": HARNESS_TIMEOUTS, + } + for task in tasks + } + + +def env_key_for_provider(provider: str) -> str: + return { + "deepseek": "DEEPSEEK_API_KEY", + "openrouter": "OPENROUTER_API_KEY", + "openai": "OPENAI_API_KEY", + "zai": "ZAI_API_KEY", + "z-ai": "ZAI_API_KEY", + }.get(provider, f"{provider.replace('-', '_').upper()}_API_KEY") + + +def resolve_artifact_path(cli_path: Path | None, env_key: str) -> Path | None: + if cli_path is not None: + return cli_path.expanduser() + value = os.environ.get(env_key) + if value and value.strip(): + return Path(value.strip()).expanduser() + return None + + +def load_codewhale_config() -> dict[str, Any]: + path = Path.home() / ".codewhale" / "config.toml" + if not path.exists(): + return {} + return tomllib.loads(path.read_text()) + + +def config_provider_table(config: dict[str, Any]) -> dict[str, Any]: + providers = config.get("providers") + return providers if isinstance(providers, dict) else {} + + +def config_api_key(config: dict[str, Any], provider: str) -> str | None: + providers = config_provider_table(config) + provider_cfg = providers.get(provider, {}) + if isinstance(provider_cfg, dict): + key = provider_cfg.get("api_key") + if isinstance(key, str) and key.strip(): + return key.strip() + key = config.get("api_key") + if provider == "deepseek" and isinstance(key, str) and key.strip(): + return key.strip() + return None + + +def config_base_url(config: dict[str, Any], provider: str) -> str | None: + providers = config_provider_table(config) + provider_cfg = providers.get(provider, {}) + if isinstance(provider_cfg, dict): + base_url = provider_cfg.get("base_url") + if isinstance(base_url, str) and base_url.strip(): + return base_url.strip() + base_url = config.get("base_url") + if provider == "deepseek" and isinstance(base_url, str) and base_url.strip(): + return base_url.strip() + if provider == "deepseek": + return DEFAULT_DEEPSEEK_BASE_URL + return None + + +def build_env( + models: list[str], + linux_bin: Path | None, + tui_linux_bin: Path | None, +) -> dict[str, str]: + config = load_codewhale_config() + env = os.environ.copy() + if linux_bin is not None: + env[CODEWHALE_LINUX_BIN_ENV] = str(linux_bin) + if tui_linux_bin is not None: + env[CODEWHALE_TUI_LINUX_BIN_ENV] = str(tui_linux_bin) + python_path = env.get("PYTHONPATH") + env["PYTHONPATH"] = ( + str(REPO_ROOT) if not python_path else f"{REPO_ROOT}{os.pathsep}{python_path}" + ) + + providers = sorted({provider_from_model(model) for model in models}) + for provider in providers: + key_env = env_key_for_provider(provider) + if not env.get(key_env): + key = config_api_key(config, provider) + if key: + env[key_env] = key + base_url = config_base_url(config, provider) + if base_url: + base_env = f"{provider.replace('-', '_').upper()}_BASE_URL" + env.setdefault(base_env, base_url) + if provider == "deepseek": + env.setdefault("CODEWHALE_BASE_URL", base_url) + return env + + +def validate_prereqs(args: argparse.Namespace, env: dict[str, str]) -> None: + missing: list[str] = [] + artifacts = [ + ("CodeWhale Linux binary", args.linux_bin, "--linux-bin", CODEWHALE_LINUX_BIN_ENV), + ( + "CodeWhale TUI Linux binary", + args.tui_linux_bin, + "--tui-linux-bin", + CODEWHALE_TUI_LINUX_BIN_ENV, + ), + ] + for label, path, flag, env_key in artifacts: + if path is None: + missing.append(f"{label} ({flag} or {env_key})") + elif not path.is_file(): + missing.append(f"{label} not found: {path}") + for provider in sorted({provider_from_model(model) for model in args.models}): + key_env = env_key_for_provider(provider) + if not env.get(key_env): + missing.append(key_env) + if missing: + for item in missing: + print(f"missing prerequisite: {item}", file=sys.stderr) + raise SystemExit(2) + if subprocess.run(["docker", "info"], capture_output=True).returncode != 0: + raise SystemExit("Docker is not running") + if subprocess.run(["harbor", "--version"], capture_output=True).returncode != 0: + raise SystemExit("harbor is not installed") + + +def run_command(cmd: list[str], env: dict[str, str], timeout: int | None) -> int: + print("$ " + " ".join(cmd)) + start = time.time() + try: + proc = subprocess.run(cmd, cwd=REPO_ROOT, env=env, timeout=timeout) + elapsed = time.time() - start + print(f"exit={proc.returncode} elapsed_s={elapsed:.1f}") + return proc.returncode + except subprocess.TimeoutExpired: + elapsed = time.time() - start + print(f"timeout elapsed_s={elapsed:.1f}", file=sys.stderr) + return 124 + + +def json_load(path: Path) -> dict[str, Any] | None: + try: + data = json.loads(path.read_text()) + except (OSError, json.JSONDecodeError): + return None + return data if isinstance(data, dict) else None + + +def seconds_between(started_at: str | None, finished_at: str | None) -> float | None: + if not started_at or not finished_at: + return None + try: + start = datetime.fromisoformat(started_at.replace("Z", "+00:00")) + finish = datetime.fromisoformat(finished_at.replace("Z", "+00:00")) + except ValueError: + return None + return round((finish - start).total_seconds(), 3) + + +def first_number(mapping: dict[str, Any], keys: tuple[str, ...]) -> int | float | None: + for key in keys: + value = mapping.get(key) + if isinstance(value, (int, float)): + return value + return None + + +def merge_usage(target: dict[str, Any], usage: dict[str, Any]) -> None: + mapping = { + "input_tokens": ("input_tokens", "prompt_tokens", "n_input_tokens"), + "cached_tokens": ("cached_input_tokens", "cache_read_input_tokens", "cached_tokens", "n_cache_tokens"), + "output_tokens": ("output_tokens", "completion_tokens", "n_output_tokens"), + "reasoning_tokens": ("reasoning_tokens", "thinking_tokens", "reasoning_completion_tokens"), + "cost_usd": ("cost_usd", "cost"), + } + for out_key, keys in mapping.items(): + if target.get(out_key) is None: + value = first_number(usage, keys) + if value is not None: + target[out_key] = value + + +def walk_usage(obj: Any, row: dict[str, Any]) -> None: + if isinstance(obj, dict): + if any(key in obj for key in ("input_tokens", "prompt_tokens", "n_input_tokens", "cost_usd")): + merge_usage(row, obj) + for key in ("usage", "token_usage", "metrics", "agent_result"): + child = obj.get(key) + if isinstance(child, dict): + walk_usage(child, row) + for value in obj.values(): + if isinstance(value, (dict, list)): + walk_usage(value, row) + elif isinstance(obj, list): + for item in obj: + walk_usage(item, row) + + +def denied_tool_counts(text: str) -> dict[str, int]: + counts = {tool: 0 for tool in KNOWN_MODEL_TOOLS} + for line in text.splitlines(): + lowered = line.lower() + if not any(term in lowered for term in DENIAL_TERMS): + continue + for tool in KNOWN_MODEL_TOOLS: + if tool in lowered: + counts[tool] += 1 + return {tool: count for tool, count in counts.items() if count > 0} + + +def merge_denied_tool_counts(row: dict[str, Any], counts: dict[str, int]) -> None: + if not counts: + return + existing = row.get("denied_tool_counts") + if not isinstance(existing, dict): + existing = {} + row["denied_tool_counts"] = existing + for tool, count in counts.items(): + existing[tool] = int(existing.get(tool, 0)) + count + + +def read_text_if_exists(path: Path) -> str: + try: + return path.read_text(errors="replace") + except OSError: + return "" + + +def parse_agent_log(path: Path, row: dict[str, Any]) -> None: + try: + text = path.read_text(errors="replace") + except OSError: + return + row["transcript_path"] = stable_path(path) + row["transcript_bytes"] = len(text.encode("utf-8", errors="replace")) + merge_denied_tool_counts(row, denied_tool_counts(text)) + for line in text.splitlines(): + stripped = line.strip() + json_start = stripped.find("{") + if json_start < 0: + continue + stripped = stripped[json_start:] + try: + obj = json.loads(stripped) + except json.JSONDecodeError: + continue + walk_usage(obj, row) + + +def parse_exception(exception_info: Any) -> str | None: + if not exception_info: + return None + if isinstance(exception_info, dict): + typ = exception_info.get("type") or exception_info.get("exception_type") + message = exception_info.get("message") or exception_info.get("exception_message") + if typ and message: + return f"{typ}: {message}" + if typ: + return str(typ) + if message: + return str(message) + return str(exception_info) + + +def classify_failure(row: dict[str, Any]) -> str: + reward = row.get("reward") + if isinstance(reward, (int, float)) and reward >= 1.0: + return "solved" + + evidence = "\n".join( + str(row.get(key) or "") + for key in ( + "exception", + "verifier_exception", + "artifact_preflight_excerpt", + "background_error", + "transcript_excerpt", + ) + ) + if ARTIFACT_INCOMPATIBLE_RE.search(evidence): + return "artifact_incompatible" + + denied_counts = row.get("denied_tool_counts") + if isinstance(denied_counts, dict): + repeated = [ + (tool, int(count)) + for tool, count in denied_counts.items() + if isinstance(count, int) and count >= TOOL_POLICY_LOOP_THRESHOLD + ] + if repeated: + tool, count = sorted(repeated, key=lambda item: (-item[1], item[0]))[0] + row["denied_tool"] = tool + row["denied_tool_repeat_count"] = count + return "tool_policy_loop" + + if BACKGROUND_NOT_READY_RE.search(evidence): + return "background_not_ready" + if VERIFIER_ENVIRONMENT_RE.search(evidence): + return "verifier_environment_failure" + if CONTEXT_EXHAUSTION_RE.search(evidence): + return "context_exhaustion" + if "timeout" in evidence.lower() or "timed out" in evidence.lower(): + return "setup_timeout" + if row.get("exception") or row.get("verifier_exception"): + return "harness_exception" + return "model_wrong_answer" + + +def short_excerpt(text: str, max_chars: int = 1200) -> str | None: + clean = text.strip() + if not clean: + return None + if len(clean) <= max_chars: + return clean + return clean[: max_chars - 3] + "..." + + +def parse_trial(trial_dir: Path, model: str, reasoning_effort: str | None = None) -> dict[str, Any] | None: + data = json_load(trial_dir / "result.json") + if data is None or "task_name" not in data: + return None + agent_result = data.get("agent_result") if isinstance(data.get("agent_result"), dict) else {} + verifier = data.get("verifier_result") if isinstance(data.get("verifier_result"), dict) else {} + rewards = verifier.get("rewards") if isinstance(verifier.get("rewards"), dict) else {} + row: dict[str, Any] = { + "model": model, + "reasoning_effort": reasoning_effort, + "task": data.get("task_name"), + "trial_dir": stable_path(trial_dir), + "reward": rewards.get("reward"), + "exception": parse_exception(data.get("exception_info")), + "verifier_exception": parse_exception(verifier.get("exception_info")), + "failure_class": None, + "readiness_probe": readiness_probe_for_task(str(data.get("task_name") or "")), + "denied_tool": None, + "denied_tool_repeat_count": 0, + "denied_tool_counts": {}, + "runtime_s": seconds_between(data.get("started_at"), data.get("finished_at")), + "input_tokens": agent_result.get("n_input_tokens"), + "cached_tokens": agent_result.get("n_cache_tokens"), + "output_tokens": agent_result.get("n_output_tokens"), + "reasoning_tokens": None, + "cost_usd": agent_result.get("cost_usd"), + "transcript_path": None, + "transcript_bytes": None, + "artifact_preflight_path": None, + "artifact_preflight_excerpt": None, + "harness_note_path": None, + } + for log_name in ( + "codewhale.txt", + "direct-deepseek.jsonl", + "mini-swe-agent.txt", + "codex.txt", + "oracle.txt", + ): + log_path = trial_dir / "agent" / log_name + if log_path.exists(): + parse_agent_log(log_path, row) + break + preflight_path = trial_dir / "agent" / "codewhale-artifact-preflight.txt" + preflight_text = read_text_if_exists(preflight_path) + if preflight_text: + row["artifact_preflight_path"] = stable_path(preflight_path) + row["artifact_preflight_excerpt"] = short_excerpt(preflight_text) + harness_note_path = trial_dir / "agent" / "codewhale-harness-note.txt" + if harness_note_path.exists(): + row["harness_note_path"] = stable_path(harness_note_path) + metadata = agent_result.get("metadata") + if isinstance(metadata, dict) and row.get("reasoning_tokens") is None: + reasoning_tokens = metadata.get("reasoning_tokens") + if isinstance(reasoning_tokens, (int, float)): + row["reasoning_tokens"] = reasoning_tokens + if row.get("readiness_probe") is None and isinstance(metadata.get("readiness_probe"), str): + row["readiness_probe"] = metadata.get("readiness_probe") + row["failure_class"] = classify_failure(row) + return row + + +def parse_job(job_dir: Path, model: str, reasoning_effort: str | None = None) -> list[dict[str, Any]]: + rows: list[dict[str, Any]] = [] + for result_path in sorted(job_dir.glob("*__*/result.json")): + trial = parse_trial(result_path.parent, model, reasoning_effort) + if trial: + rows.append(trial) + return rows + + +def parse_run_dir(run_dir: Path) -> list[dict[str, Any]]: + rows: list[dict[str, Any]] = [] + metadata = json_load(run_dir / "metadata.json") or {} + model_by_job = metadata.get("model_by_job", {}) + if not isinstance(model_by_job, dict): + model_by_job = {} + effort_by_job = metadata.get("reasoning_effort_by_job", {}) + if not isinstance(effort_by_job, dict): + effort_by_job = {} + for job_dir in sorted(run_dir.iterdir()): + if not job_dir.is_dir(): + continue + model = model_by_job.get(job_dir.name) + if not model: + config = json_load(job_dir / "config.json") or {} + models = config.get("models") or config.get("model") + if isinstance(models, list) and models: + model = str(models[0]) + elif isinstance(models, str): + model = models + else: + model = job_dir.name + effort = effort_by_job.get(job_dir.name) + rows.extend(parse_job(job_dir, str(model), str(effort) if effort else None)) + return rows + + +def aggregate(rows: list[dict[str, Any]]) -> list[dict[str, Any]]: + groups: dict[str, list[dict[str, Any]]] = {} + for row in rows: + groups.setdefault(str(row.get("model")), []).append(row) + out: list[dict[str, Any]] = [] + for model, model_rows in sorted(groups.items()): + rewards = [float(r["reward"]) for r in model_rows if isinstance(r.get("reward"), (int, float))] + runtimes = [float(r["runtime_s"]) for r in model_rows if isinstance(r.get("runtime_s"), (int, float))] + failure_classes: dict[str, int] = {} + for row in model_rows: + failure_class = str(row.get("failure_class") or "harness_exception") + failure_classes[failure_class] = failure_classes.get(failure_class, 0) + 1 + out.append( + { + "model": model, + "trials": len(model_rows), + "solved": sum(1 for reward in rewards if reward >= 1.0), + "mean_reward": round(sum(rewards) / len(rewards), 4) if rewards else None, + "exceptions": sum(1 for row in model_rows if row.get("exception")), + "failure_classes": failure_classes, + "mean_runtime_s": round(sum(runtimes) / len(runtimes), 2) if runtimes else None, + "input_tokens": sum(int(r.get("input_tokens") or 0) for r in model_rows) or None, + "cached_tokens": sum(int(r.get("cached_tokens") or 0) for r in model_rows) or None, + "output_tokens": sum(int(r.get("output_tokens") or 0) for r in model_rows) or None, + "reasoning_tokens": sum(int(r.get("reasoning_tokens") or 0) for r in model_rows) or None, + "cost_usd": round(sum(float(r.get("cost_usd") or 0.0) for r in model_rows), 6) or None, + } + ) + return out + + +def markdown(rows: list[dict[str, Any]], aggregates: list[dict[str, Any]]) -> str: + lines = ["# CodeWhale Terminal-Bench Summary", ""] + lines.append("## Aggregate") + lines.append("") + lines.append("| model | trials | solved | mean reward | exceptions | failure classes | mean runtime s | input tokens | output tokens | reasoning tokens | cost usd |") + lines.append("| --- | ---: | ---: | ---: | ---: | --- | ---: | ---: | ---: | ---: | ---: |") + for row in aggregates: + rendered = {k: ("null" if v is None else v) for k, v in row.items()} + rendered["failure_classes"] = json.dumps( + row.get("failure_classes") or {}, + sort_keys=True, + separators=(",", ":"), + ) + lines.append( + "| {model} | {trials} | {solved} | {mean_reward} | {exceptions} | {failure_classes} | {mean_runtime_s} | {input_tokens} | {output_tokens} | {reasoning_tokens} | {cost_usd} |".format( + **rendered + ) + ) + lines.extend(["", "## Per Task", ""]) + lines.append("| model | effort | task | reward | failure class | denied tool | exception | runtime s | input tokens | output tokens | transcript |") + lines.append("| --- | --- | --- | ---: | --- | --- | --- | ---: | ---: | ---: | --- |") + for row in sorted(rows, key=lambda r: (str(r.get("model")), str(r.get("task")))): + exception = str(row.get("exception") or "") + if len(exception) > 90: + exception = exception[:87] + "..." + denied_tool = row.get("denied_tool") or "" + repeat_count = row.get("denied_tool_repeat_count") or 0 + if denied_tool and repeat_count: + denied_tool = f"{denied_tool} x{repeat_count}" + lines.append( + "| {model} | {reasoning_effort} | {task} | {reward} | {failure_class} | {denied_tool} | {exception} | {runtime_s} | {input_tokens} | {output_tokens} | {transcript_path} |".format( + model=row.get("model"), + reasoning_effort=row.get("reasoning_effort") or "default", + task=row.get("task"), + reward="null" if row.get("reward") is None else row.get("reward"), + failure_class=row.get("failure_class") or "", + denied_tool=str(denied_tool).replace("|", "\\|"), + exception=exception.replace("|", "\\|"), + runtime_s="null" if row.get("runtime_s") is None else row.get("runtime_s"), + input_tokens="null" if row.get("input_tokens") is None else row.get("input_tokens"), + output_tokens="null" if row.get("output_tokens") is None else row.get("output_tokens"), + transcript_path=row.get("transcript_path") or "", + ) + ) + lines.append("") + return "\n".join(lines) + + +def write_summaries(run_dir: Path) -> None: + rows = parse_run_dir(run_dir) + aggregates = aggregate(rows) + (run_dir / "summary.json").write_text( + json.dumps({"aggregate": aggregates, "rows": rows}, indent=2, sort_keys=True) + ) + (run_dir / "summary.md").write_text(markdown(rows, aggregates)) + print(markdown(rows, aggregates)) + + +def run_matrix(args: argparse.Namespace, env: dict[str, str]) -> Path: + timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") + run_dir = args.results_root / timestamp + run_dir.mkdir(parents=True, exist_ok=False) + model_by_job: dict[str, str] = {} + effort_by_job: dict[str, str | None] = {} + metadata = { + "created_at_utc": datetime.now(timezone.utc).isoformat(), + "dataset": args.dataset, + "tasks": args.tasks, + "models": args.models, + "reasoning_efforts": args.reasoning_efforts or ["default"], + "agent_import_path": args.agent_import_path, + "linux_bin": str(args.linux_bin) if args.linux_bin else None, + "tui_linux_bin": str(args.tui_linux_bin) if args.tui_linux_bin else None, + "artifact_preflight_commands": ARTIFACT_PREFLIGHT_COMMANDS, + "failure_classes": list(FAILURE_CLASSES), + "harness_timeouts": HARNESS_TIMEOUTS, + "task_harness": task_harness_metadata(args.tasks), + "credential_env_present": { + env_key_for_provider(provider_from_model(model)): bool(env.get(env_key_for_provider(provider_from_model(model)))) + for model in args.models + }, + "model_by_job": model_by_job, + "reasoning_effort_by_job": effort_by_job, + } + + for model in args.models: + for reasoning_effort in (args.reasoning_efforts or [None]): + safe_model = model.replace("/", "_").replace(":", "_") + safe_effort = reasoning_effort or "default" + job_name = f"codewhale-{safe_model}-thinking-{safe_effort}-{timestamp}" + model_by_job[job_name] = label_for_model(model, reasoning_effort) + effort_by_job[job_name] = reasoning_effort + (run_dir / "metadata.json").write_text(json.dumps(metadata, indent=2, sort_keys=True)) + cmd = [ + "harbor", + "run", + "-d", + args.dataset, + "--agent-import-path", + args.agent_import_path, + "-m", + model, + "-n", + str(args.concurrency), + "--job-name", + job_name, + "-o", + str(run_dir), + "--agent-include-logs", + "codewhale.txt", + "--agent-include-logs", + "codewhale-artifact-preflight.txt", + "--agent-include-logs", + "codewhale-harness-note.txt", + "--yes", + ] + if reasoning_effort: + cmd.extend(["--agent-kwarg", f"reasoning_effort={reasoning_effort}"]) + for task in args.tasks: + cmd.extend(["--include-task-name", task]) + if args.max_retries: + cmd.extend(["--max-retries", str(args.max_retries)]) + if args.timeout_multiplier != 1.0: + cmd.extend(["--timeout-multiplier", str(args.timeout_multiplier)]) + if args.dry_run: + print("$ " + " ".join(cmd)) + continue + exit_code = run_command(cmd, env=env, timeout=args.wall_timeout) + write_summaries(run_dir) + if exit_code != 0: + raise SystemExit(exit_code) + + (run_dir / "metadata.json").write_text(json.dumps(metadata, indent=2, sort_keys=True)) + return run_dir + + +def main() -> None: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--dataset", default=DEFAULT_DATASET) + parser.add_argument("--task", dest="tasks", action="append", default=[]) + parser.add_argument("--model", dest="models", action="append", default=[]) + parser.add_argument( + "--reasoning-effort", + dest="reasoning_efforts", + action="append", + choices=EXPLICIT_REASONING_EFFORTS, + default=[], + help="Explicit CodeWhale reasoning tier to benchmark; repeat for a matrix.", + ) + parser.add_argument("--agent-import-path", default=DEFAULT_AGENT) + parser.add_argument("--results-root", type=Path, default=DEFAULT_RESULTS_ROOT) + parser.add_argument( + "--linux-bin", + type=Path, + default=None, + help=f"Host path to the Linux codewhale binary; defaults to {CODEWHALE_LINUX_BIN_ENV}.", + ) + parser.add_argument( + "--tui-linux-bin", + type=Path, + default=None, + help=( + "Host path to the Linux codewhale-tui binary; defaults to " + f"{CODEWHALE_TUI_LINUX_BIN_ENV}." + ), + ) + parser.add_argument("--concurrency", type=int, default=1) + parser.add_argument("--max-retries", type=int, default=0) + parser.add_argument("--timeout-multiplier", type=float, default=1.0) + parser.add_argument("--wall-timeout", type=int, default=None) + parser.add_argument("--dry-run", action="store_true") + parser.add_argument("--regenerate", type=Path) + args = parser.parse_args() + + args.tasks = args.tasks or DEFAULT_TASKS + args.models = args.models or DEFAULT_MODELS + args.linux_bin = resolve_artifact_path(args.linux_bin, CODEWHALE_LINUX_BIN_ENV) + args.tui_linux_bin = resolve_artifact_path( + args.tui_linux_bin, + CODEWHALE_TUI_LINUX_BIN_ENV, + ) + + if args.regenerate: + write_summaries(args.regenerate) + return + + env = build_env(args.models, args.linux_bin, args.tui_linux_bin) + validate_prereqs(args, env) + run_dir = run_matrix(args, env) + write_summaries(run_dir) + print(f"results_dir={run_dir}") + + +if __name__ == "__main__": + main() diff --git a/scripts/benchmarks/run-deepseek-direct-terminal-bench.py b/scripts/benchmarks/run-deepseek-direct-terminal-bench.py new file mode 100644 index 0000000000..431f708e1f --- /dev/null +++ b/scripts/benchmarks/run-deepseek-direct-terminal-bench.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 +"""Run the thin direct DeepSeek API baseline on Terminal-Bench through Harbor.""" + +from __future__ import annotations + +import argparse +import importlib.util +import json +import subprocess +import sys +import time +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +SCRIPT = Path(__file__).resolve() +REPO_ROOT = SCRIPT.parents[2] +CODEWHALE_RUNNER = REPO_ROOT / "scripts" / "benchmarks" / "run-codewhale-terminal-bench.py" +DEFAULT_DATASET = "terminal-bench-sample@2.0" +DEFAULT_AGENT = "scripts.benchmarks.harbor.deepseek_direct_agent:DeepSeekDirectAgent" +DEFAULT_RESULTS_ROOT = REPO_ROOT / "benchmark_results" / "tbench-direct-api-thin" +DEFAULT_MODEL = "deepseek/deepseek-v4-flash" +DEFAULT_TASKS = [ + "build-cython-ext", + "configure-git-webserver", + "fix-code-vulnerability", + "log-summary-date-ranges", + "polyglot-c-py", + "regex-log", + "sqlite-with-gcov", +] +EXPLICIT_REASONING_EFFORTS = ("off", "high", "max") + + +def load_codewhale_runner() -> Any: + spec = importlib.util.spec_from_file_location("codewhale_tbench_runner", CODEWHALE_RUNNER) + if spec is None or spec.loader is None: + raise RuntimeError(f"unable to load {CODEWHALE_RUNNER}") + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def run_command(cmd: list[str], env: dict[str, str], timeout: int | None) -> int: + print("$ " + " ".join(cmd)) + start = time.time() + try: + proc = subprocess.run(cmd, cwd=REPO_ROOT, env=env, timeout=timeout) + elapsed = time.time() - start + print(f"exit={proc.returncode} elapsed_s={elapsed:.1f}") + return proc.returncode + except subprocess.TimeoutExpired: + elapsed = time.time() - start + print(f"timeout elapsed_s={elapsed:.1f}", file=sys.stderr) + return 124 + + +def validate_prereqs(env: dict[str, str]) -> None: + missing: list[str] = [] + if not env.get("DEEPSEEK_API_KEY"): + missing.append("DEEPSEEK_API_KEY") + if missing: + for item in missing: + print(f"missing prerequisite: {item}", file=sys.stderr) + raise SystemExit(2) + if subprocess.run(["docker", "info"], capture_output=True).returncode != 0: + raise SystemExit("Docker is not running") + if subprocess.run(["harbor", "--version"], capture_output=True).returncode != 0: + raise SystemExit("harbor is not installed") + + +def main() -> None: + common = load_codewhale_runner() + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--dataset", default=DEFAULT_DATASET) + parser.add_argument("--task", dest="tasks", action="append", default=[]) + parser.add_argument("--model", default=DEFAULT_MODEL) + parser.add_argument( + "--reasoning-effort", + dest="reasoning_effort", + choices=EXPLICIT_REASONING_EFFORTS, + default="off", + ) + parser.add_argument("--agent-import-path", default=DEFAULT_AGENT) + parser.add_argument("--results-root", type=Path, default=DEFAULT_RESULTS_ROOT) + parser.add_argument("--concurrency", type=int, default=1) + parser.add_argument("--max-retries", type=int, default=0) + parser.add_argument("--timeout-multiplier", type=float, default=1.0) + parser.add_argument("--wall-timeout", type=int, default=None) + parser.add_argument("--max-steps", type=int, default=24) + parser.add_argument("--max-tokens", type=int, default=4096) + parser.add_argument("--dry-run", action="store_true") + parser.add_argument("--regenerate", type=Path) + args = parser.parse_args() + + if args.regenerate: + common.write_summaries(args.regenerate) + return + + args.tasks = args.tasks or DEFAULT_TASKS + env = common.build_env([args.model], None, None) + validate_prereqs(env) + + timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") + safe_model = args.model.replace("/", "_").replace(":", "_") + job_name = f"direct-{safe_model}-thinking-{args.reasoning_effort}-{timestamp}" + run_dir = args.results_root / job_name + run_dir.mkdir(parents=True, exist_ok=False) + metadata = { + "created_at_utc": datetime.now(timezone.utc).isoformat(), + "dataset": args.dataset, + "tasks": args.tasks, + "models": [args.model], + "reasoning_effort": args.reasoning_effort, + "agent_import_path": args.agent_import_path, + "model_by_job": {job_name: common.label_for_model(args.model, args.reasoning_effort)}, + "reasoning_effort_by_job": {job_name: args.reasoning_effort}, + "credential_env_present": {"DEEPSEEK_API_KEY": bool(env.get("DEEPSEEK_API_KEY"))}, + } + (run_dir / "metadata.json").write_text(json.dumps(metadata, indent=2, sort_keys=True)) + + cmd = [ + "harbor", + "run", + "-d", + args.dataset, + "--agent-import-path", + args.agent_import_path, + "-m", + args.model, + "-n", + str(args.concurrency), + "--job-name", + job_name, + "-o", + str(run_dir), + "--agent-include-logs", + "direct-deepseek.jsonl", + "--agent-kwarg", + f"reasoning_effort={args.reasoning_effort}", + "--agent-kwarg", + f"max_steps={args.max_steps}", + "--agent-kwarg", + f"max_tokens={args.max_tokens}", + "--yes", + ] + for task in args.tasks: + cmd.extend(["--include-task-name", task]) + if args.max_retries: + cmd.extend(["--max-retries", str(args.max_retries)]) + if args.timeout_multiplier != 1.0: + cmd.extend(["--timeout-multiplier", str(args.timeout_multiplier)]) + + if args.dry_run: + print("$ " + " ".join(cmd)) + return + + exit_code = run_command(cmd, env=env, timeout=args.wall_timeout) + common.write_summaries(run_dir) + print(f"results_dir={run_dir}") + if exit_code != 0: + raise SystemExit(exit_code) + + +if __name__ == "__main__": + main() diff --git a/scripts/benchmarks/run-mini-swe-terminal-bench.py b/scripts/benchmarks/run-mini-swe-terminal-bench.py new file mode 100644 index 0000000000..7a0cc08d34 --- /dev/null +++ b/scripts/benchmarks/run-mini-swe-terminal-bench.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 +"""Run Harbor's stock mini-swe-agent baseline on Terminal-Bench.""" + +from __future__ import annotations + +import argparse +import importlib.util +import json +import subprocess +import sys +import time +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +SCRIPT = Path(__file__).resolve() +REPO_ROOT = SCRIPT.parents[2] +CODEWHALE_RUNNER = REPO_ROOT / "scripts" / "benchmarks" / "run-codewhale-terminal-bench.py" + +DEFAULT_DATASET = "terminal-bench-sample@2.0" +DEFAULT_AGENT = "mini-swe-agent" +DEFAULT_RESULTS_ROOT = REPO_ROOT / "benchmark_results" / "tbench-mini-swe-default" +DEFAULT_MODEL = "deepseek/deepseek-v4-flash" +EXPLICIT_REASONING_EFFORTS = ("off", "high", "max") + + +def load_codewhale_runner() -> Any: + spec = importlib.util.spec_from_file_location("codewhale_tbench_runner", CODEWHALE_RUNNER) + if spec is None or spec.loader is None: + raise RuntimeError(f"unable to load {CODEWHALE_RUNNER}") + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def run_command(cmd: list[str], env: dict[str, str], timeout: int | None) -> int: + printable = ["" if part.startswith("DEEPSEEK_API_BASE=") else part for part in cmd] + print("$ " + " ".join(printable)) + start = time.time() + try: + proc = subprocess.run(cmd, cwd=REPO_ROOT, env=env, timeout=timeout) + elapsed = time.time() - start + print(f"exit={proc.returncode} elapsed_s={elapsed:.1f}") + return proc.returncode + except subprocess.TimeoutExpired: + elapsed = time.time() - start + print(f"timeout elapsed_s={elapsed:.1f}", file=sys.stderr) + return 124 + + +def validate_prereqs(env: dict[str, str]) -> None: + missing: list[str] = [] + if not env.get("DEEPSEEK_API_KEY"): + missing.append("DEEPSEEK_API_KEY") + if missing: + for item in missing: + print(f"missing prerequisite: {item}", file=sys.stderr) + raise SystemExit(2) + if subprocess.run(["docker", "info"], capture_output=True).returncode != 0: + raise SystemExit("Docker is not running") + if subprocess.run(["harbor", "--version"], capture_output=True).returncode != 0: + raise SystemExit("harbor is not installed") + + +def main() -> None: + common = load_codewhale_runner() + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--dataset", default=DEFAULT_DATASET) + parser.add_argument("--task", dest="tasks", action="append", default=[]) + parser.add_argument("--model", default=DEFAULT_MODEL) + parser.add_argument( + "--reasoning-effort", + dest="reasoning_effort", + choices=EXPLICIT_REASONING_EFFORTS, + default=None, + help="Optional mini-swe-agent reasoning effort override. Omit for stock defaults.", + ) + parser.add_argument("--agent", default=DEFAULT_AGENT) + parser.add_argument("--results-root", type=Path, default=DEFAULT_RESULTS_ROOT) + parser.add_argument("--concurrency", type=int, default=1) + parser.add_argument("--max-retries", type=int, default=0) + parser.add_argument("--timeout-multiplier", type=float, default=1.0) + parser.add_argument("--wall-timeout", type=int, default=None) + parser.add_argument("--cost-limit", default="0") + parser.add_argument("--dry-run", action="store_true") + parser.add_argument("--regenerate", type=Path) + args = parser.parse_args() + + if args.regenerate: + common.write_summaries(args.regenerate) + return + + args.tasks = args.tasks or common.DEFAULT_TASKS + env = common.build_env([args.model], None, None) + deepseek_base = env.get("DEEPSEEK_API_BASE") or env.get("DEEPSEEK_BASE_URL") or env.get("CODEWHALE_BASE_URL") + if deepseek_base: + env.setdefault("DEEPSEEK_API_BASE", deepseek_base) + validate_prereqs(env) + + timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") + safe_model = args.model.replace("/", "_").replace(":", "_") + effort_label = args.reasoning_effort or "stock" + job_name = f"mini-swe-{safe_model}-thinking-{effort_label}-{timestamp}" + run_dir = args.results_root / job_name + run_dir.mkdir(parents=True, exist_ok=False) + + metadata = { + "created_at_utc": datetime.now(timezone.utc).isoformat(), + "dataset": args.dataset, + "tasks": args.tasks, + "models": [args.model], + "reasoning_effort": args.reasoning_effort, + "agent": args.agent, + "model_by_job": {job_name: common.label_for_model(args.model, args.reasoning_effort)}, + "reasoning_effort_by_job": {job_name: args.reasoning_effort}, + "credential_env_present": {"DEEPSEEK_API_KEY": bool(env.get("DEEPSEEK_API_KEY"))}, + } + (run_dir / "metadata.json").write_text(json.dumps(metadata, indent=2, sort_keys=True)) + + cmd = [ + "harbor", + "run", + "-d", + args.dataset, + "--agent", + args.agent, + "-m", + args.model, + "-n", + str(args.concurrency), + "--job-name", + job_name, + "-o", + str(run_dir), + "--agent-include-logs", + "mini-swe-agent.txt", + "--agent-include-logs", + "mini-swe-agent.trajectory.json", + "--agent-kwarg", + f"cost_limit={args.cost_limit}", + "--yes", + ] + if deepseek_base: + cmd.extend(["--agent-env", f"DEEPSEEK_API_BASE={deepseek_base}"]) + if args.reasoning_effort: + cmd.extend(["--agent-kwarg", f"reasoning_effort={args.reasoning_effort}"]) + for task in args.tasks: + cmd.extend(["--include-task-name", task]) + if args.max_retries: + cmd.extend(["--max-retries", str(args.max_retries)]) + if args.timeout_multiplier != 1.0: + cmd.extend(["--timeout-multiplier", str(args.timeout_multiplier)]) + + if args.dry_run: + print("$ " + " ".join(cmd)) + return + + exit_code = run_command(cmd, env=env, timeout=args.wall_timeout) + common.write_summaries(run_dir) + print(f"results_dir={run_dir}") + if exit_code != 0: + raise SystemExit(exit_code) + + +if __name__ == "__main__": + main() diff --git a/scripts/benchmarks/test_run_codewhale_terminal_bench.py b/scripts/benchmarks/test_run_codewhale_terminal_bench.py new file mode 100644 index 0000000000..6f2ed62240 --- /dev/null +++ b/scripts/benchmarks/test_run_codewhale_terminal_bench.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +"""Focused tests for the CodeWhale Terminal-Bench summary layer.""" + +from __future__ import annotations + +import importlib.util +import json +import tempfile +import unittest +from pathlib import Path + + +SCRIPT = Path(__file__).resolve() +RUNNER = SCRIPT.with_name("run-codewhale-terminal-bench.py") + + +def load_runner(): + spec = importlib.util.spec_from_file_location("codewhale_tbench_runner", RUNNER) + if spec is None or spec.loader is None: + raise RuntimeError(f"unable to load {RUNNER}") + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +runner = load_runner() + + +class CodeWhaleTerminalBenchSummaryTests(unittest.TestCase): + def test_readiness_probe_uses_task_specific_predicate(self) -> None: + probe = runner.readiness_probe_for_task("terminal-bench/qemu-alpine-ssh") + self.assertIsNotNone(probe) + self.assertIn("login:", probe) + self.assertIn("nc -w 5 127.0.0.1 6665", probe) + + def test_repeated_denied_tool_calls_classify_as_tool_policy_loop(self) -> None: + row = { + "reward": 0, + "exception": None, + "verifier_exception": None, + "denied_tool_counts": {"grep_files": 3}, + } + + self.assertEqual(runner.classify_failure(row), "tool_policy_loop") + self.assertEqual(row["denied_tool"], "grep_files") + self.assertEqual(row["denied_tool_repeat_count"], 3) + + def test_artifact_preflight_errors_classify_as_artifact_incompatible(self) -> None: + row = { + "reward": None, + "exception": "RuntimeError: error while loading shared libraries: libssl.so.3: cannot open shared object file", + "verifier_exception": None, + "denied_tool_counts": {}, + } + + self.assertEqual(runner.classify_failure(row), "artifact_incompatible") + + def test_parse_trial_preserves_failure_class_metadata(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + trial = Path(tmp) / "codewhale__qemu-alpine-ssh" + agent_dir = trial / "agent" + agent_dir.mkdir(parents=True) + (trial / "result.json").write_text( + json.dumps( + { + "task_name": "qemu-alpine-ssh", + "started_at": "2026-06-21T00:00:00Z", + "finished_at": "2026-06-21T00:01:00Z", + "agent_result": {"n_input_tokens": 10, "n_output_tokens": 2}, + "verifier_result": {"rewards": {"reward": 0}}, + } + ) + ) + (agent_dir / "codewhale.txt").write_text( + "\n".join( + [ + "tool denied: grep_files is not available", + "tool denied: grep_files is not available", + "tool denied: grep_files is not available", + ] + ) + ) + (agent_dir / "codewhale-artifact-preflight.txt").write_text( + "codewhale 0.8.63\n" + ) + (agent_dir / "codewhale-harness-note.txt").write_text("Benchmark harness note\n") + + row = runner.parse_trial(trial, "deepseek/deepseek-v4-flash") + + self.assertIsNotNone(row) + assert row is not None + self.assertEqual(row["failure_class"], "tool_policy_loop") + self.assertEqual(row["denied_tool"], "grep_files") + self.assertIn("login:", row["readiness_probe"]) + self.assertIsNotNone(row["artifact_preflight_path"]) + self.assertIsNotNone(row["harness_note_path"]) + + def test_markdown_includes_failure_class_columns(self) -> None: + rows = [ + { + "model": "m", + "reasoning_effort": None, + "task": "t", + "reward": 0, + "failure_class": "background_not_ready", + "denied_tool": None, + "denied_tool_repeat_count": 0, + "exception": None, + "runtime_s": 1, + "input_tokens": 1, + "output_tokens": 1, + "transcript_path": "log.txt", + } + ] + text = runner.markdown(rows, runner.aggregate(rows)) + + self.assertIn("failure classes", text) + self.assertIn("failure class", text) + self.assertIn("background_not_ready", text) + + +if __name__ == "__main__": + unittest.main() diff --git a/scripts/check-coauthor-trailers.py b/scripts/check-coauthor-trailers.py index 1ecd605cbc..693f3b0982 100644 --- a/scripts/check-coauthor-trailers.py +++ b/scripts/check-coauthor-trailers.py @@ -54,11 +54,15 @@ def author(self) -> str: @dataclass(frozen=True) class Commit: sha: str + parents: str author_name: str author_email: str subject: str body: str + def is_merge_commit(self) -> bool: + return len(self.parents.split()) > 1 + def norm_key(value: str) -> str: return value.strip().lower() @@ -110,7 +114,7 @@ def git_log(commit_range: str) -> list[Commit]: [ "git", "log", - "--format=%H%x00%an%x00%ae%x00%s%x00%B%x1e", + "--format=%H%x00%P%x00%an%x00%ae%x00%s%x00%B%x1e", commit_range, ], cwd=ROOT, @@ -123,8 +127,8 @@ def git_log(commit_range: str) -> list[Commit]: for record in raw.split("\x1e"): if not record.strip(): continue - parts = record.split("\x00", 4) - if len(parts) != 5: + parts = record.split("\x00", 5) + if len(parts) != 6: raise RuntimeError("failed to parse git log output") commits.append(Commit(*parts)) return commits @@ -179,7 +183,7 @@ def validate(commits: list[Commit], aliases: dict[str, Identity], check_authors: if CANONICAL_NOREPLY_RE.match(coauthor.email): continue if is_bot_identity(coauthor.name, coauthor.email): - if is_harvested_commit: + if is_harvested_commit and not commit.is_merge_commit(): errors.append( f"{prefix}: remove bot/tool co-author trailer " f"{coauthor.name} <{coauthor.email}>; contributor trailers are for humans." diff --git a/scripts/mobile-smoke.sh b/scripts/mobile-smoke.sh index d333ff5eeb..2a0c45c388 100755 --- a/scripts/mobile-smoke.sh +++ b/scripts/mobile-smoke.sh @@ -43,7 +43,7 @@ start_server() { SERVER_PID=$! # Wait for the server to become ready. for _ in $(seq 1 30); do - if curl -sf "http://127.0.0.1:${port}/health" >/dev/null 2>&1; then + if curl -sf --max-time 2 "http://127.0.0.1:${port}/health" >/dev/null 2>&1; then return 0 fi sleep 0.3 @@ -73,7 +73,7 @@ assert_status() { fi local url="http://127.0.0.1:${PORT}${path}" - local curl_args=(-sf -o /dev/null -w '%{http_code}' -X "$method") + local curl_args=(-sf --max-time 10 -o /dev/null -w '%{http_code}' -X "$method") if [[ -n "$header" ]]; then curl_args+=(-H "$header") fi @@ -95,7 +95,7 @@ assert_status() { assert_body_contains() { local method="$1" path="$2" header="$3" substring="$4" local url="http://127.0.0.1:${PORT}${path}" - local curl_args=(-sf -X "$method") + local curl_args=(-sf --max-time 10 -X "$method") if [[ -n "$header" ]]; then curl_args+=(-H "$header") fi @@ -157,7 +157,7 @@ STDOUT_FILE=$(mktemp) SERVER_PID=$! SERVER_READY=0 for _ in $(seq 1 30); do - if curl -sf "http://127.0.0.1:${PORT}/health" > /dev/null 2>&1; then + if curl -sf --max-time 2 "http://127.0.0.1:${PORT}/health" > /dev/null 2>&1; then SERVER_READY=1 break fi @@ -193,4 +193,4 @@ log "Results: $PASS passed, $FAIL failed" if [[ "$FAIL" -gt 0 ]]; then exit 1 -fi +fi \ No newline at end of file diff --git a/scripts/remote-smoke/setup-vm.sh b/scripts/remote-smoke/setup-vm.sh index 3331216648..278e967746 100755 --- a/scripts/remote-smoke/setup-vm.sh +++ b/scripts/remote-smoke/setup-vm.sh @@ -146,7 +146,7 @@ for _ in $(seq 1 20); do curl -fsS --max-time 2 http://127.0.0.1:7878/health >/dev/null 2>&1 && break sleep 1 done -curl -fsS http://127.0.0.1:7878/health; echo +curl -fsS --max-time 3 http://127.0.0.1:7878/health; echo systemctl start codewhale-telegram-bridge sleep 3 CODEWHALE_BRIDGE=telegram bash /tmp/codewhale/scripts/tencent-lighthouse/doctor.sh diff --git a/web/app/[locale]/faq/page.tsx b/web/app/[locale]/faq/page.tsx index 2fe3e46889..7efb4e0b7d 100644 --- a/web/app/[locale]/faq/page.tsx +++ b/web/app/[locale]/faq/page.tsx @@ -515,10 +515,10 @@ default_text_model = "openrouter/deepseek/deepseek-v4-pro"`} q: "什么是 Goal 模式?现在可用吗?", a: ( <> - Goal 模式是未来的工作流/标签页方向,用于长时间运行的多步目标——不是当前的 /goal 命令。 - 当前的 /goal 是当前 TUI 会话的目标设置器;app-server 客户端也可以通过 thread/goal/* 方法持久化线程目标。 - 完整的 Goal 工作区(自主多回合任务执行,带更完整的检查点/恢复 UI)仍在规划中。 - 关注 #891 的进展。 + /goal 为当前 TUI 会话设置目标,支持 pauseresumecompleteblockedclear 控制。 + App-server 客户端也可以通过 thread/goal/* 方法持久化线程范围的目标,支持 setgetclear。 + 它不会新增一个应用模式;模式切换器仍然是 Plan、Agent 和 YOLO。 + 跟踪进展:#891。 ), sources: ["#891"], diff --git a/web/app/[locale]/page.tsx b/web/app/[locale]/page.tsx index e28bfa7566..65e9e6f183 100644 --- a/web/app/[locale]/page.tsx +++ b/web/app/[locale]/page.tsx @@ -26,6 +26,7 @@ const RELEASE_CONTRIBUTORS = [ "@dzyuan", "@mvanhorn", "@malsony", + "@manaskarra", "@gaord", "@yuanchenglu", "@idling11", @@ -55,6 +56,7 @@ const RELEASE_CONTRIBUTORS = [ "@mo-vic", "@hufanexplore", "@hoclaptrinh33", + "@quentin-lian", "@BryonGo", ]; @@ -69,7 +71,15 @@ const RELEASE_HELPERS = [ "@jretz", "@Neo-millunnium", "@caeserchen", + "@cmyyy", + "@djairjr", + "@F1LT3R", + "@Final527", + "@Geallier", + "@k0tran", + "@lordwedggie", "@T-Phuong-Nguyen", + "@xfy6238", "@zhyuzhyu", "@0gl20shk0sbt36", "@hatakes", @@ -132,7 +142,6 @@ export default async function HomePage({ params }: { params: Promise<{ locale: s } const highlights = isZh && dispatch.highlightsZh ? dispatch.highlightsZh : dispatch.highlights; - const releaseVersion = facts.version ?? "0.8.62"; return ( <>