diff --git a/.gitignore b/.gitignore index 71e8ddf2d..ec6143c40 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,4 @@ screenshot_*.png *.screenshot.png *.bmp *.tiff +app.db diff --git a/.squad/decisions.md b/.squad/decisions.md deleted file mode 100644 index cd8b21a28..000000000 --- a/.squad/decisions.md +++ /dev/null @@ -1,27 +0,0 @@ -# PR Review Squad — Shared Decisions - -These rules apply to every worker on every PR fix task. Deviating from them causes commits landing on the wrong remote or history rewrites that break collaborators. - -## Push Safety - -1. **NEVER force push** — no `--force`, `--force-with-lease`, or any force variant. Force pushing after a rebase is what caused wrong-remote pushes in the first place. - -2. **ALWAYS use `gh pr checkout `** to check out a PR branch — never `git fetch origin pull//head:pr-`. The `gh` tool sets the branch tracking to the correct remote (fork or origin) automatically. A manually fetched branch has no tracking and `git push` will default to `origin`, silently pushing to the wrong repo. - -3. **ALWAYS integrate with `git merge origin/main`** — never `git rebase origin/main`. Merge adds a merge commit (no history rewrite, no force push needed). - -4. **ALWAYS verify the push target before pushing**: - ```bash - gh pr view --json headRepositoryOwner,headRefName \ - --jq '"Expected: " + .headRepositoryOwner.login + "/" + .headRefName' - git config branch.$(git branch --show-current).remote - ``` - These must agree. If they don't, something is wrong — stop and investigate. - -## Review Workflow - -5. When reviewing only (no fix), use `gh pr diff ` — never check out the branch. - -6. Consensus filter: include a finding in the final report only if flagged by 2+ of the 5 sub-agent models. - -7. Do not comment on style, naming, or formatting. Flag only: bugs, data loss, race conditions, security issues, logic errors. diff --git a/.squad/push-to-pr.sh b/.squad/push-to-pr.sh deleted file mode 100644 index 3c44b02eb..000000000 --- a/.squad/push-to-pr.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env bash -# push-to-pr.sh — Safe push helper for PR review workers -# -# Usage: .squad/push-to-pr.sh -# -# This script: -# 1. Reads PR metadata to find the correct remote and branch -# 2. Verifies the current branch matches the PR branch -# 3. Pushes to the correct remote (handles forks transparently) -# 4. Verifies the push landed by comparing local and remote HEADs - -set -euo pipefail - -PR_NUMBER="${1:?Usage: push-to-pr.sh }" - -echo "==> Fetching PR #${PR_NUMBER} metadata..." -PR_JSON=$(gh pr view "$PR_NUMBER" --json headRefName,headRepositoryOwner,headRepository) -BRANCH=$(echo "$PR_JSON" | jq -r '.headRefName') -OWNER=$(echo "$PR_JSON" | jq -r '.headRepositoryOwner.login') -REPO=$(echo "$PR_JSON" | jq -r '.headRepository.name') - -echo " PR branch: ${BRANCH}" -echo " PR owner: ${OWNER}" -echo " PR repo: ${REPO}" - -# Verify current branch matches the PR branch -CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD) -if [ "$CURRENT_BRANCH" != "$BRANCH" ]; then - echo "ERROR: Current branch '${CURRENT_BRANCH}' does not match PR branch '${BRANCH}'" - echo " Run: gh pr checkout ${PR_NUMBER}" - exit 1 -fi - -# Find the remote that points to owner/repo -# gh pr checkout registers the fork owner's login as the remote name -REMOTE=$(git remote -v | grep "${OWNER}/${REPO}" | head -1 | awk '{print $1}' || true) -if [ -z "$REMOTE" ]; then - echo "ERROR: No remote found matching ${OWNER}/${REPO}" - echo "Available remotes:" - git remote -v - exit 1 -fi - -echo "==> Pushing to remote '${REMOTE}' (${OWNER}/${REPO}), branch '${BRANCH}'..." -git push "$REMOTE" HEAD:"$BRANCH" - -# Verify push succeeded by comparing SHAs -LOCAL_SHA=$(git rev-parse HEAD) -REMOTE_SHA=$(git ls-remote "$REMOTE" "refs/heads/${BRANCH}" | awk '{print $1}') - -if [ "$LOCAL_SHA" = "$REMOTE_SHA" ]; then - echo "✅ Push verified: ${LOCAL_SHA}" -else - echo "❌ Push verification failed!" - echo " Local: ${LOCAL_SHA}" - echo " Remote: ${REMOTE_SHA}" - exit 1 -fi diff --git a/.squad/routing.md b/.squad/routing.md deleted file mode 100644 index 72941ff72..000000000 --- a/.squad/routing.md +++ /dev/null @@ -1,98 +0,0 @@ -# PR Review Squad — Work Routing - -## Fix Process (when told to fix a PR) - -> **Critical:** Follow this process exactly. Deviating — especially using rebase or force push — causes commits to land on the wrong remote. - -### 1. Check out the PR branch -```bash -gh pr checkout -``` -This sets the branch tracking to the correct remote automatically (fork or origin). -**Never** use `git fetch origin pull//head:...` — that creates a branch with no tracking. - -> **Worktree conflict?** If `gh pr checkout` fails with "already checked out at...", run: -> ```bash -> git worktree list # find which worktree has the branch -> git worktree remove # remove stale worktree if safe, OR -> gh pr checkout -b pr--fix # use a unique local branch name -> ``` - -### 2. Integrate with main (MERGE, not rebase) -```bash -git fetch origin main -git merge origin/main -``` -**Never** use `git rebase origin/main`. Merge adds a merge commit; no force push needed. -If there are conflicts, resolve them, then `git add && git merge --continue`. - -### 3. Make the fix -- Use the `edit` tool for file changes, never `sed` -- Make minimal, surgical changes - -### 4. Run tests -Discover and run the repo's test suite. Look for test projects, Makefiles, CI scripts, or package.json test scripts. Run them and verify only pre-existing failures remain. - -### 5. Commit -```bash -git add # Never git add -A blindly -git commit -m "fix: - -Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>" -``` - -### 6. Push to the correct remote - -**Always verify the push target before pushing:** -```bash -# Get expected owner/branch from the PR -gh pr view --json headRepositoryOwner,headRefName \ - --jq '"Expected: " + .headRepositoryOwner.login + "/" + .headRefName' - -# Confirm branch tracking matches -git config branch.$(git branch --show-current).remote -``` -These must agree. If they don't, stop and investigate before pushing. - -Once verified: -```bash -git push -``` -`gh pr checkout` sets branch tracking correctly, so bare `git push` lands on the right remote. - -**If `git push` fails** (e.g., tracking not set up correctly), push explicitly using the owner's remote. -`gh pr checkout` registers the fork owner's GitHub login as a named remote — use it directly: -```bash -# Discover the owner's remote name -OWNER=$(gh pr view --json headRepositoryOwner --jq '.headRepositoryOwner.login') -BRANCH=$(gh pr view --json headRefName --jq '.headRefName') -git remote -v | grep "$OWNER" # confirm remote exists - -git push "$OWNER" HEAD:"$BRANCH" -``` -Alternatively, use `.squad/push-to-pr.sh ` which automates the above. - -### 7. Verify the push landed -```bash -gh pr view --json commits --jq '.commits[-1].messageHeadline' -``` -The last commit headline should match your fix commit message. - -### 8. Re-review -Dispatch 5 parallel sub-agent reviews with the updated diff (include previous findings for status tracking). - ---- - -## Review Process (no fix) - -Use `gh pr diff ` — **never** check out the branch for review-only tasks. - -**IMPORTANT: Assign each PR to exactly ONE reviewer worker.** Do NOT spread a single PR review across multiple workers. One worker reviews one PR — that worker handles multi-model consensus internally. - -If multiple PRs need reviewing, assign one PR per worker (up to the number of available workers). - ---- - -## Why `gh pr checkout` + merge beats manual fetch + rebase - -`gh pr checkout` reads PR metadata and configures the branch to track the correct remote (fork or origin). Bare `git fetch pull//head:...` creates a local branch with no upstream — `git push` then defaults to `origin`, silently pushing to the base repository instead of the author's fork. diff --git a/.squad/team.md b/.squad/team.md deleted file mode 100644 index 9dc5ceaa2..000000000 --- a/.squad/team.md +++ /dev/null @@ -1,7 +0,0 @@ -# PR Review Squad - -Workers that review and fix pull requests using multi-model consensus review. - -| Member | Role | Expertise | -|--------|------|-----------| -| reviewer | PR Reviewer | Multi-model dispatch, consensus synthesis, bug detection | diff --git a/PolyPilot.Tests/SessionOrganizationTests.cs b/PolyPilot.Tests/SessionOrganizationTests.cs index adc2b2242..7759bca1e 100644 --- a/PolyPilot.Tests/SessionOrganizationTests.cs +++ b/PolyPilot.Tests/SessionOrganizationTests.cs @@ -2090,6 +2090,29 @@ public void BuiltInPresets_IncludePRReviewSquad() Assert.NotNull(prSquad.RoutingContext); Assert.NotNull(prSquad.WorkerSystemPrompts); Assert.Equal(prSquad.WorkerModels.Length, prSquad.WorkerSystemPrompts!.Length); + + // All 5 workers must be Opus (they dispatch sub-agents internally) + foreach (var model in prSquad.WorkerModels) + Assert.Equal("claude-opus-4.6", model); + + // Worker prompts must instruct multi-model sub-agent dispatch and adversarial consensus + foreach (var prompt in prSquad.WorkerSystemPrompts) + { + Assert.Contains("claude-opus-4.6", prompt); + Assert.Contains("claude-sonnet-4.6", prompt); + Assert.Contains("gpt-5.3-codex", prompt); + Assert.Contains("task", prompt); // dispatch via task tool + Assert.Contains("Adversarial Consensus", prompt); + Assert.Contains("NEVER post more than one comment", prompt); + } + + // Fix process must use merge, not rebase + Assert.Contains("git merge", prSquad.SharedContext!); + Assert.DoesNotContain("git rebase", prSquad.SharedContext); + Assert.DoesNotContain("force-with-lease", prSquad.SharedContext); + + // Routing must enforce 1 worker per PR + Assert.Contains("ONE worker per PR", prSquad.RoutingContext!); } [Fact] @@ -2531,7 +2554,7 @@ public void Scenario_CreateGroupFromPreset() // Step 4: System creates the group - verify the preset structure // (CopilotService.CreateGroupFromPresetAsync does the actual creation at runtime) - Assert.Equal("claude-sonnet-4.6", prReview.WorkerModels[0]); + Assert.Equal("claude-opus-4.6", prReview.WorkerModels[0]); // Step 5-6: Each member has appropriate capabilities var orchCaps = ModelCapabilities.GetCapabilities(prReview.OrchestratorModel); diff --git a/PolyPilot/Components/Pages/Dashboard.razor b/PolyPilot/Components/Pages/Dashboard.razor index 2a954bcef..f3d7b2dd3 100644 --- a/PolyPilot/Components/Pages/Dashboard.razor +++ b/PolyPilot/Components/Pages/Dashboard.razor @@ -2219,11 +2219,11 @@ return; } - var started = await CopilotService.StartFleetAsync(sessionName, arg); + var (started, error) = await CopilotService.StartFleetAsync(sessionName, arg); if (started) session.History.Add(ChatMessage.SystemMessage($"🚀 Fleet started for: *{arg}*")); else - session.History.Add(ChatMessage.ErrorMessage("Failed to start fleet mode. Ensure the session is connected and idle.")); + session.History.Add(ChatMessage.ErrorMessage($"Failed to start fleet mode: {error ?? "Unknown error"}")); } private async Task HandlePromptCommand(string sessionName, AgentSessionInfo session, string arg) diff --git a/PolyPilot/Models/ModelCapabilities.cs b/PolyPilot/Models/ModelCapabilities.cs index a27434ed3..3351f7bb5 100644 --- a/PolyPilot/Models/ModelCapabilities.cs +++ b/PolyPilot/Models/ModelCapabilities.cs @@ -192,7 +192,7 @@ public record GroupPreset(string Name, string Description, string Emoji, MultiAg public string?[]? WorkerDisplayNames { get; init; } private const string WorkerReviewPrompt = """ - You are a PR reviewer. When assigned a PR, follow this process: + You are a PR reviewer. When assigned a PR, do a thorough multi-model code review. ## 1. Gather Context - Run `gh pr view ` to read the description, labels, milestone, and linked issues @@ -200,55 +200,69 @@ public record GroupPreset(string Name, string Description, string Emoji, MultiAg - Run `gh pr checks ` to check CI status — if builds failed, determine whether failures are PR-specific or pre-existing infra issues (same failures on the base branch = not PR-specific) - Run `gh pr view --json reviews,comments` to check existing review comments — don't duplicate feedback already given - ## 2. Verify Claims Against Code - - Don't trust the PR description blindly — trace through the actual source code - - If the PR references a prior fix or revert, check `git log --oneline --all -- ` to understand the history - - If the change is scoped narrowly (e.g., "only affects streams, not files"), verify that claim by reading the surrounding code paths - - Check for variable name typos, missing underscores, wrong method overloads — compilers catch some, but cross-platform #if blocks can hide build errors + ## 2. Multi-Model Review + Dispatch 3 parallel sub-agent reviews via the `task` tool, each with a different model: + - One with model `claude-opus-4.6` — deep reasoning, architecture, subtle logic bugs + - One with model `claude-sonnet-4.6` — fast pattern matching, common bug classes, security + - One with model `gpt-5.3-codex` — alternative perspective, edge cases - ## 3. Dispatch Multi-Model Reviews - Dispatch 5 parallel reviews via the task tool using claude-opus-4.6, claude-opus-4.6, claude-sonnet-4.6, gemini-3-pro-preview, and gpt-5.3-codex. Include the full diff and any CI/review context in each prompt. Each sub-agent returns findings as: - ``` - [SEVERITY] file:line — description - ``` - Where SEVERITY is: 🔴 CRITICAL, 🟡 MODERATE, 🟢 MINOR + Each sub-agent should receive the full diff and review for: regressions, security issues, bugs, data loss, race conditions, and code quality. Do NOT ask about style or formatting. + + If a model is unavailable, proceed with the remaining models. If only 1 model ran, include all its findings with a ⚠️ LOW CONFIDENCE disclaimer. + + ## 3. Adversarial Consensus + After collecting all sub-agent reviews: + - If all 3 models agree on a finding, include it immediately + - If only 1 model flagged a finding, share that finding with the other 2 models (dispatch follow-up sub-agents) and ask: "Model X found this issue — do you agree or disagree? Explain why." + - If after the adversarial round, 2+ models agree, include the finding. If still only 1 model, discard it (note in informational section) + - For findings where models disagree on severity, use the median severity ## 4. Synthesize Final Report - - Only include issues flagged by 2+ models (consensus filter) - - Rank by severity - - Include file path and line numbers - - Note CI status: ✅ passing, ❌ failing (PR-specific), ⚠️ failing (pre-existing) + Produce ONE comprehensive report with: + - Findings ranked by severity: 🔴 CRITICAL, 🟡 MODERATE, 🟢 MINOR + - For each finding: file path, line numbers, which models flagged it, what's wrong, why it matters + - CI status: ✅ passing, ❌ failing (PR-specific), ⚠️ failing (pre-existing) - Note if prior review comments were addressed or still outstanding - - Assess test coverage: Are there new code paths that lack tests? Suggest specific test cases or scenarios that should be added. + - Assess test coverage: Are there new code paths that lack tests? - End with recommended action: ✅ Approve, ⚠️ Request changes (with specific ask), or 🔴 Do not merge - ## 5. Fix Process (when told to fix a PR) - 1. `gh pr checkout ` then `git fetch origin main && git rebase origin/main` + ## 5. Posting the Review + Post exactly ONE comment per review using `gh pr comment --body ""`. + - If you previously posted a comment on this PR, EDIT it instead: find your comment ID with `gh api repos/{owner}/{repo}/issues/{number}/comments` and update via `gh api repos/{owner}/{repo}/issues/comments/{id} -X PATCH -f body=""` + - NEVER post multiple comments — always update/replace the existing one + - The comment should be self-contained: include all findings, consensus results, and recommendation in a single comment + + ## 6. Fix Process (when told to fix a PR) + 1. `gh pr checkout ` then `git fetch origin main && git merge origin/main` (resolve any conflicts) 2. View the file, find the issue, use the edit tool to make minimal changes 3. Discover and run the repo's test suite (look for test projects, Makefiles, CI scripts, package.json scripts, etc.) - 4. Commit with Co-authored-by trailer, push with `--force-with-lease` - 5. After pushing, do a full re-review (repeat the 5-model dispatch above) + 4. `git add ` (NEVER `git add -A`), verify with `git status`, commit with Co-authored-by trailer, push + 5. Verify push landed: `git fetch origin && git log --oneline origin/ -3` — confirm your commit appears + 6. If push didn't land, investigate and retry before reporting success + + ## 7. Re-Review Process (when re-reviewing after fixes) + Re-run the 3-model review on the updated diff. For each finding from the previous review, report status: + - ✅ FIXED — the issue is resolved + - ❌ STILL PRESENT — the issue remains + - ⚠️ PARTIALLY FIXED — partially addressed, explain what remains + - ➖ N/A — no longer applicable (code removed, etc.) - ## 6. Re-Review Process (when previous findings exist) - Include previous findings in each sub-agent prompt and ask them to report: - ``` - ## Previous Findings Status - - Finding 1: FIXED / STILL PRESENT / N/A - ``` + Update (EDIT, not add) your existing PR comment with the re-review results appended. ## Rules - If workers share a worktree, NEVER checkout a branch during review-only tasks — use `gh pr diff` instead - If each worker has its own isolated worktree, you may freely checkout branches for both review and fix tasks - Always include the FULL diff — never truncate - Use the edit tool for file changes, not sed + - NEVER post more than one comment on a PR — always edit/replace """; public static readonly GroupPreset[] BuiltIn = new[] { new GroupPreset( - "PR Review Squad", "5 reviewers with multi-model consensus (2+ models must agree)", + "PR Review Squad", "5 reviewers — each does multi-model consensus (Opus + Sonnet + Codex)", "📋", MultiAgentMode.Orchestrator, - "claude-opus-4.6", new[] { "claude-sonnet-4.6", "claude-sonnet-4.6", "claude-sonnet-4.6", "claude-sonnet-4.6", "claude-sonnet-4.6" }) + "claude-opus-4.6", new[] { "claude-opus-4.6", "claude-opus-4.6", "claude-opus-4.6", "claude-opus-4.6", "claude-opus-4.6" }) { WorkerSystemPrompts = new[] { @@ -257,27 +271,27 @@ public record GroupPreset(string Name, string Description, string Emoji, MultiAg SharedContext = """ ## Review Standards - - Only flag real issues: bugs, security holes, logic errors, data loss risks, race conditions + - Only flag real issues: bugs, security holes, logic errors, data loss risks, race conditions, regressions - NEVER comment on style, formatting, naming conventions, or documentation - Every finding must include: file path, line number (or range), what's wrong, and why it matters - If a PR looks clean, say so — don't invent problems to justify your existence - - An issue must be flagged by at least 2 of the 5 sub-agent models to be included in the final report (consensus filter) + - An issue must survive adversarial consensus: if only 1 model flags it, the other models get a chance to agree/disagree before inclusion + - Post exactly ONE comment per PR — always edit/replace, never add multiple comments ## Fix Standards - - When fixing a PR: checkout, git rebase origin/main, apply minimal fixes, run tests, commit with Co-authored-by trailer, push - - After pushing fixes, always do a full re-review (5-model dispatch again) + - When fixing a PR: checkout, git merge origin/main, apply minimal fixes, run tests, commit with Co-authored-by trailer, push + - After pushing fixes, always do a full re-review - Include previous findings in re-review prompts so sub-agents can verify fix status - - Use --force-with-lease (never --force) when pushing rebased branches + - Verify push landed: git fetch origin && git log to confirm your commit appears - Never git add -A blindly — use git add and check git status first ## Operational Lessons - - Workers reliably complete review-only tasks (fetch diff + dispatch sub-agents) + - Workers reliably complete review-only tasks (fetch diff + review) - Workers sometimes fail multi-step fix tasks silently — always verify push landed with git fetch - If a worker's fix task didn't produce a commit after 5+ minutes, re-dispatch with more explicit instructions - - Opus workers are more reliable for complex fix+review tasks than Sonnet workers - - Always include the FULL diff in sub-agent prompts (truncated diffs cause incorrect findings) + - Always include the FULL diff in review prompts (truncated diffs cause incorrect findings) """, RoutingContext = """ ## Core Rule @@ -286,23 +300,21 @@ public record GroupPreset(string Name, string Description, string Emoji, MultiAg ## Task Assignment - When given PRs to review, assign ONE PR to EACH worker. Distribute round-robin. If more PRs than workers, assign multiple per worker. + Assign ONE worker per PR. Each worker handles its own multi-model review internally (dispatching sub-agents to Opus, Sonnet, and Codex). Do NOT assign multiple workers to the same PR. - For review-only tasks: - - If workers share a worktree: "Review PR #. Do NOT checkout the branch — use gh pr diff only." - - If workers have isolated worktrees: "Review PR #." (they can checkout freely) - For fix tasks, tell the worker: "Fix PR #. Checkout, rebase on origin/main, apply fixes, test, push, then re-review." + When given multiple PRs, distribute round-robin across workers. If more PRs than workers, assign multiple PRs per worker. - Workers handle the multi-model dispatch internally. However, for fix tasks, you MUST give explicit step-by-step instructions. + For review-only tasks, tell each worker: "Please do a full code review of PR #. Check for regressions, security issues, and code quality." + - If workers share a worktree, add: "Do NOT checkout the branch — use gh pr diff only." + For fix tasks, tell the worker: "Fix PR #. Checkout, merge origin/main, apply fixes, test, push, then re-review." ## Orchestrator Responsibilities - 1. Track state: Which PRs each worker reviewed, findings, fix status, merge readiness + 1. Track state: Which PRs each worker is reviewing, findings, fix status, merge readiness 2. Merge: gh pr merge --squash 3. Verify pushes: After a worker claims to have pushed, always run git fetch origin and check git log to confirm 4. Re-dispatch on failure: Workers sometimes fail silently on multi-step tasks. Check for new commits after fix tasks. - 5. Re-review pattern: When re-reviewing, include previous findings in the prompt so sub-agents can verify what's fixed vs still present - 6. Worktree safety: If workers share a worktree, only ONE can checkout/push at a time. If workers have isolated worktrees, they can work in parallel. + 5. Worktree safety: If workers share a worktree, only ONE can checkout/push at a time. If workers have isolated worktrees, they can work in parallel. ## Summary Table Format diff --git a/PolyPilot/Services/CopilotService.Events.cs b/PolyPilot/Services/CopilotService.Events.cs index b202130c5..9e1c20b60 100644 --- a/PolyPilot/Services/CopilotService.Events.cs +++ b/PolyPilot/Services/CopilotService.Events.cs @@ -233,7 +233,9 @@ private void HandleSessionEvent(SessionState state, SessionEvent evt) // JSON-RPC connection is alive, so future Case A resets are legitimate. Interlocked.Exchange(ref state.WatchdogCaseAResets, 0); Interlocked.Exchange(ref state.WatchdogCaseBResets, 0); - Interlocked.Exchange(ref state.WatchdogCaseBLastFileSize, 0); + // Don't reset WatchdogCaseBLastFileSize to 0 — keep the last known file size + // so when Case B first triggers after events stop, prevSize > 0 and the stale + // detection works on the first iteration instead of wasting a 180s cycle. Interlocked.Exchange(ref state.WatchdogCaseBStaleCount, 0); // Clear the reconnect flag — event stream is alive for this session. state.IsReconnectedSend = false; diff --git a/PolyPilot/Services/CopilotService.Organization.cs b/PolyPilot/Services/CopilotService.Organization.cs index 31c410505..09701d446 100644 --- a/PolyPilot/Services/CopilotService.Organization.cs +++ b/PolyPilot/Services/CopilotService.Organization.cs @@ -38,7 +38,10 @@ public partial class CopilotService private static readonly TimeSpan WorkerExecutionTimeoutRemote = TimeSpan.FromMinutes(10); private static readonly Regex WorkerNamePattern = new(@"-[Ww]orker-\d+(-\d+)?$", RegexOptions.Compiled); - /// How long to poll for premature idle indicators after the initial TCS completes. + /// Maximum time the orchestrator waits for all workers to complete. + /// Shorter than WorkerExecutionTimeout — if a worker is stuck, the orchestrator + /// proceeds with partial results rather than blocking the group forever. + private static readonly TimeSpan OrchestratorCollectionTimeout = TimeSpan.FromMinutes(15); /// Checks both WasPrematurelyIdled flag (set by EVT-REARM) and events.jsonl freshness /// (CLI still writing events). The events.jsonl check catches cases where EVT-REARM /// takes 30-60s to fire. @@ -81,6 +84,11 @@ public partial class CopilotService // so the model sees them in its conversation context. private readonly ConcurrentDictionary> _reflectQueuedPrompts = new(); + // Per-group queued user prompts for non-reflect Orchestrator mode. + // When a user sends a message while an orchestrator dispatch is running, + // the message is queued here and drained after the current dispatch completes. + private readonly ConcurrentDictionary> _orchestratorQueuedPrompts = new(); + #region Session Organization (groups, pinning, sorting) public async Task CreateMultiAgentGroupAsync(string groupName, string orchestratorModel, string workerModel, int workerCount, MultiAgentMode mode, string? systemPrompt = null) @@ -1030,6 +1038,9 @@ public void DeleteGroup(string groupId) // Collect all worktree IDs for cleanup before removing metadata var worktreeIds = new HashSet(); + + // Clean up orchestrator queue state for this group + _orchestratorQueuedPrompts.TryRemove(groupId, out _); if (group2?.WorktreeId != null) worktreeIds.Add(group2.WorktreeId); // CreatedWorktreeIds is the authoritative list (covers cases where session creation failed) if (group2?.CreatedWorktreeIds != null) @@ -1637,9 +1648,31 @@ public async Task SendToMultiAgentGroupAsync(string groupId, string prompt, Canc if (members.Count == 0) { Debug($"[DISPATCH] SendToMultiAgentGroupAsync: no members for group '{group.Name}'"); return; } // Serialize dispatches to the same group (bridge + event queue drain race). - // Callers wait their turn rather than being dropped. + // For Orchestrator mode: non-blocking check — queue if busy, with user feedback. + // For other modes: blocking wait (they complete quickly). var dispatchLock = _groupDispatchLocks.GetOrAdd(groupId, _ => new SemaphoreSlim(1, 1)); - await dispatchLock.WaitAsync(cancellationToken); + + if (group.OrchestratorMode == MultiAgentMode.Orchestrator) + { + if (!dispatchLock.Wait(0)) + { + // Orchestrator is busy — queue the prompt and show feedback + var orchestratorName = GetOrchestratorSession(groupId); + Debug($"[DISPATCH] Orchestrator busy for group '{group.Name}' — queuing prompt for after current dispatch"); + var queue = _orchestratorQueuedPrompts.GetOrAdd(groupId, _ => new ConcurrentQueue()); + queue.Enqueue(prompt); + if (orchestratorName != null) + { + AddOrchestratorSystemMessage(orchestratorName, + $"📨 New task queued (will be sent to orchestrator when current work completes): {prompt}"); + } + return; + } + } + else + { + await dispatchLock.WaitAsync(cancellationToken); + } try { @@ -1657,6 +1690,8 @@ public async Task SendToMultiAgentGroupAsync(string groupId, string prompt, Canc case MultiAgentMode.Orchestrator: await SendViaOrchestratorAsync(groupId, members, prompt, cancellationToken); + // Drain any prompts queued while this dispatch was running + await DrainOrchestratorQueueAsync(groupId, members, cancellationToken); break; case MultiAgentMode.OrchestratorReflect: @@ -1676,9 +1711,54 @@ public async Task SendToMultiAgentGroupAsync(string groupId, string prompt, Canc } /// - /// Build a multi-agent context prefix for a session in a group. - /// Includes model info for each member so agents know each other's capabilities. + /// Drain queued user prompts that arrived while a non-reflect orchestrator dispatch was running. + /// Each queued prompt is sent to the orchestrator as a new task, which dispatches to available workers. + /// Called while still holding the dispatch lock, so no new dispatches can interleave. + /// Capped at 3 per cycle to prevent unbounded lock holding. /// + private async Task DrainOrchestratorQueueAsync(string groupId, List members, CancellationToken cancellationToken) + { + if (!_orchestratorQueuedPrompts.TryGetValue(groupId, out var queue)) + return; + + const int maxDrainPerCycle = 3; + int drained = 0; + while (drained < maxDrainPerCycle && queue.TryDequeue(out var queuedPrompt)) + { + cancellationToken.ThrowIfCancellationRequested(); + Debug($"[DISPATCH] Draining queued orchestrator prompt for group '{groupId}' (len={queuedPrompt.Length})"); + + try + { + await SendViaOrchestratorAsync(groupId, members, queuedPrompt, cancellationToken); + } + catch (Exception ex) + { + Debug($"[DISPATCH] Queued orchestrator prompt failed: {ex.GetType().Name}: {ex.Message}"); + var orchestratorName = GetOrchestratorSession(groupId); + if (orchestratorName != null) + { + AddOrchestratorSystemMessage(orchestratorName, + $"⚠️ Failed to process queued task: {ex.Message}"); + } + } + drained++; + } + + // If there are still queued prompts, notify the user + if (queue.Count > 0) + { + Debug($"[DISPATCH] {queue.Count} queued prompt(s) remain after draining {drained} — will process on next cycle"); + var orchName = GetOrchestratorSession(groupId); + if (orchName != null) + { + AddOrchestratorSystemMessage(orchName, + $"📨 {queue.Count} queued message(s) remaining — will process after this cycle completes."); + } + } + } + + /// private string BuildMultiAgentPrefix(string sessionName, SessionGroup group, List allMembers) { var meta = Organization.Sessions.FirstOrDefault(m => m.SessionName == sessionName); @@ -1848,7 +1928,53 @@ private async Task SendViaOrchestratorAsync(string groupId, List members if (workerTasks.Count < assignments.Count) await Task.Delay(1000, cancellationToken); } - var results = await Task.WhenAll(workerTasks); + + // Bounded wait: if any worker is stuck, proceed with partial results + // rather than blocking the entire orchestrator group indefinitely. + var allDone = Task.WhenAll(workerTasks); + // Use CancellationToken.None for the timeout delay — if the caller's token + // is cancelled, Task.WhenAny returns the cancelled allDone (not timeout), + // and OperationCanceledException propagates cleanly without entering the + // force-complete branch. + var timeout = Task.Delay(OrchestratorCollectionTimeout, CancellationToken.None); + WorkerResult[] results; + if (await Task.WhenAny(allDone, timeout) != allDone) + { + Debug($"[DISPATCH] Orchestrator collection timeout ({OrchestratorCollectionTimeout.TotalMinutes}m) — force-completing stuck workers"); + foreach (var a in assignments) + { + if (_sessions.TryGetValue(a.WorkerName, out var ws)) + { + if (ws.Info.IsProcessing) + { + Debug($"[DISPATCH] Force-completing stuck worker '{a.WorkerName}'"); + AddOrchestratorSystemMessage(a.WorkerName, + "⚠️ Worker timed out — orchestrator is proceeding with partial results."); + await ForceCompleteProcessingAsync(a.WorkerName, ws, $"orchestrator collection timeout ({OrchestratorCollectionTimeout.TotalMinutes}m)"); + } + else if (ws.ResponseCompletion?.Task.IsCompleted == false) + { + // Worker hasn't started processing yet (e.g., stuck in SendAsync). + // Resolve the TCS so ExecuteWorkerAsync unblocks. + Debug($"[DISPATCH] Resolving TCS for non-processing worker '{a.WorkerName}'"); + ws.ResponseCompletion?.TrySetResult("(worker timed out — never started processing)"); + } + } + } + // Collect results — all tasks should now be completed (force-completed or already done). + // Use try/catch since force-completed tasks may fault. + var partialResults = new List(); + foreach (var t in workerTasks) + { + try { partialResults.Add(await t); } + catch (Exception ex) { partialResults.Add(new WorkerResult("unknown", null, false, $"Error: {ex.Message}", TimeSpan.Zero)); } + } + results = partialResults.ToArray(); + } + else + { + results = await allDone; + } // After early dispatch, the orchestrator may still be doing tool work. await WaitForSessionIdleAsync(orchestratorName, cancellationToken); diff --git a/PolyPilot/Services/CopilotService.cs b/PolyPilot/Services/CopilotService.cs index 405b29c17..1f64b6bea 100644 --- a/PolyPilot/Services/CopilotService.cs +++ b/PolyPilot/Services/CopilotService.cs @@ -1984,25 +1984,30 @@ public async Task DeselectAgentAsync(string sessionName) /// /// Starts fleet mode (parallel subagent execution) for the given session with the provided prompt. - /// Returns true if the fleet was started successfully, false otherwise. + /// Returns (true, null) on success or (false, reason) on failure. /// - public async Task StartFleetAsync(string sessionName, string prompt) + public async Task<(bool Started, string? Error)> StartFleetAsync(string sessionName, string prompt) { - if (!_sessions.TryGetValue(sessionName, out var state) || state.Session == null) - return false; + if (!_sessions.TryGetValue(sessionName, out var state)) + return (false, "Session not found."); + + if (state.Session == null) + return (false, "Session is not connected (Session object is null)."); if (state.Info.IsProcessing) - return false; + return (false, "Session is currently processing. Wait for it to finish."); try { var result = await state.Session.Rpc.Fleet.StartAsync(prompt, CancellationToken.None); - return result?.Started ?? false; + if (result?.Started == true) + return (true, null); + return (false, "CLI returned Started=false. Fleet mode may not be supported by this CLI version."); } catch (Exception ex) { - System.Diagnostics.Debug.WriteLine($"[Fleet] StartAsync failed for '{sessionName}': {ex.Message}"); - return false; + Debug($"[Fleet] StartAsync failed for '{sessionName}': {ex.GetType().Name}: {ex.Message}"); + return (false, "RPC error communicating with CLI. Check logs for details."); } }