diff --git a/CHANGELOG.md b/CHANGELOG.md index 040cbca89..72e9d134b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed +- **run/mcp**: Make fat-harness acceptance opt-in via `seed.orchestrator.execution_mode: fat_harness` for fresh CLI/MCP seed execution. Missing/blank execution mode now uses the default runner again until seed authoring and QA guidance consistently emit profile-compatible typed evidence for every AC. This mitigates layered scaffold AC failures reported in #1202. + ### Added - **providers**: GitHub Copilot CLI adapter (`CopilotCliLLMAdapter`) — first-class peer of Codex/Gemini/OpenCode adapters. Switch with `OUROBOROS_LLM_BACKEND=copilot`. Uses local `copilot -p` non-interactive mode with `GH_TOKEN`/`GITHUB_TOKEN` auth, hard tool envelope via `--available-tools`+`--allow-tool`+`--add-dir`, sandbox-class permission mapping, JSONL stream parsing, recursion guard via shared `_OUROBOROS_DEPTH` counter (max depth 5), and auth-error short-circuit on `401`/missing-token detections. Optional install: `pip install ouroboros-ai[copilot]` (the Copilot CLI itself is installed externally). - **opencode**: Subagent bridge plugin (`src/ouroboros/opencode/plugin/ouroboros-bridge.ts`) — routes MCP `ouroboros_*` tool calls with a `_subagent` parameter into OpenCode's native Task subagent panes via `session.promptAsync`. Fire-and-forget dispatch returns from the hook in ~10ms, eliminating the blocking 200s+ latency of the previous `session.prompt` approach. Installed automatically by `ouroboros setup`. See [OpenCode Subagent Bridge](docs/guides/opencode-subagent-bridge.md). diff --git a/src/ouroboros/cli/commands/run.py b/src/ouroboros/cli/commands/run.py index b280bbb9a..ad2660621 100644 --- a/src/ouroboros/cli/commands/run.py +++ b/src/ouroboros/cli/commands/run.py @@ -243,27 +243,31 @@ def _load_skip_completed_markers( def _resolve_fat_harness_mode(seed_data: dict[str, Any]) -> bool: - """Typed evidence plus verifier PASS is the only CLI acceptance path. + """Resolve the fresh-run fat-harness selector. - ``seed.orchestrator.execution_mode`` was the temporary #920 PR-4 opt-in - selector. After #978 P5, ``legacy`` is rejected instead of silently - accepting a self-report fallback selector. + Fat-harness acceptance is opt-in until the shipped authoring/QA pipeline can + reliably produce profile-compatible typed evidence for every AC. Seeds that + request ``seed.orchestrator.execution_mode: fat_harness`` keep the stricter + verifier-gated path; missing/blank selectors use the legacy runner. """ orchestrator_config = seed_data.get("orchestrator") if not isinstance(orchestrator_config, dict): - return True + return False execution_mode = orchestrator_config.get("execution_mode") if execution_mode == "legacy": print_error( "seed.orchestrator.execution_mode='legacy' was removed after #978 P5; " - "typed evidence plus verifier PASS is now required for acceptance." + "omit the selector for the default runner or set execution_mode='fat_harness' " + "to opt in to typed evidence plus verifier PASS acceptance." ) raise typer.Exit(1) - if execution_mode not in (None, "", "fat_harness"): + if execution_mode in (None, ""): + return False + if execution_mode != "fat_harness": print_error( - "seed.orchestrator.execution_mode is no longer configurable after " - f"the fat-harness default flip (got {execution_mode!r})." + "seed.orchestrator.execution_mode must be 'fat_harness' when set " + f"(got {execution_mode!r})." ) raise typer.Exit(1) @@ -277,18 +281,18 @@ def _resolve_resume_fat_harness_mode( """Resolve resume acceptance mode from persisted contract with safe migration. New sessions persist ``fat_harness_mode`` at prepare time. Historical - sessions may not have that field, so only an explicit historical - ``execution_mode: legacy`` selector resumes ungated; unknown/missing state - falls back to the conservative typed-evidence gate. + sessions may not have that field, so only an explicit ``fat_harness`` + selector resumes with verifier-gated typed-evidence enforcement; + unknown/missing state falls back to the default runner. """ persisted = progress.get("fat_harness_mode") if isinstance(persisted, bool): return persisted orchestrator_config = seed_data.get("orchestrator") - return not ( + return ( isinstance(orchestrator_config, dict) - and orchestrator_config.get("execution_mode") == "legacy" + and orchestrator_config.get("execution_mode") == "fat_harness" ) @@ -431,7 +435,7 @@ async def _run_orchestrator( print_info(f"Max decomposition depth: {resolved_max_decomposition_depth}") print_info(f"Max parallel workers: {resolved_max_parallel_workers}") if resolved_fat_harness_mode: - print_info("Execution mode: fat_harness (default)") + print_info("Execution mode: fat_harness") if externally_satisfied_acs: print_info(f"Externally satisfied ACs: {len(externally_satisfied_acs)}") diff --git a/src/ouroboros/mcp/tools/execution_handlers.py b/src/ouroboros/mcp/tools/execution_handlers.py index 8d66f9aca..c733adbd2 100644 --- a/src/ouroboros/mcp/tools/execution_handlers.py +++ b/src/ouroboros/mcp/tools/execution_handlers.py @@ -102,15 +102,16 @@ def _validate_fresh_execution_mode( return Result.err( MCPToolError( "seed.orchestrator.execution_mode='legacy' was removed after #978 P5; " - "typed evidence plus verifier PASS is now required for acceptance.", + "omit the selector for the default runner or set execution_mode='fat_harness' " + "to opt in to typed evidence plus verifier PASS acceptance.", tool_name=tool_name, ) ) if execution_mode not in (None, "", "fat_harness"): return Result.err( MCPToolError( - "seed.orchestrator.execution_mode is no longer configurable after " - f"the fat-harness default flip (got {execution_mode!r}).", + "seed.orchestrator.execution_mode must be 'fat_harness' when set " + f"(got {execution_mode!r}).", tool_name=tool_name, ) ) @@ -498,13 +499,13 @@ async def handle( # Create checkpoint store for execution state persistence checkpoint_store = CheckpointStore() checkpoint_store.initialize() - fat_harness_mode = True + fat_harness_mode = execution_mode == "fat_harness" if is_resume: persisted_fat_harness_mode = tracker.progress.get("fat_harness_mode") if isinstance(persisted_fat_harness_mode, bool): fat_harness_mode = persisted_fat_harness_mode else: - fat_harness_mode = execution_mode != "legacy" + fat_harness_mode = execution_mode == "fat_harness" # Create orchestrator runner runner = OrchestratorRunner( diff --git a/tests/unit/cli/test_run_qa.py b/tests/unit/cli/test_run_qa.py index d6114b886..af7bd791b 100644 --- a/tests/unit/cli/test_run_qa.py +++ b/tests/unit/cli/test_run_qa.py @@ -78,9 +78,9 @@ ) -def test_resolve_fat_harness_mode_defaults_to_enabled() -> None: - """The #920 PR-5 default flip enables fat-harness without seed opt-in.""" - assert _resolve_fat_harness_mode(VALID_SEED_DATA) is True +def test_resolve_fat_harness_mode_defaults_to_disabled() -> None: + """Fresh runs use the default runner unless the seed opts into fat-harness.""" + assert _resolve_fat_harness_mode(VALID_SEED_DATA) is False def test_resolve_fat_harness_mode_accepts_fat_harness_execution_mode() -> None: @@ -113,12 +113,12 @@ def test_resolve_resume_fat_harness_mode_uses_persisted_contract() -> None: assert _resolve_resume_fat_harness_mode(seed_data, {"fat_harness_mode": False}) is False -def test_resolve_resume_fat_harness_mode_migrates_missing_contract_conservatively() -> None: - """Only explicit historical legacy selectors resume ungated when contract is absent.""" - legacy_seed = {**VALID_SEED_DATA, "orchestrator": {"execution_mode": "legacy"}} +def test_resolve_resume_fat_harness_mode_migrates_missing_contract_to_default_runner() -> None: + """Only explicit fat-harness selectors resume with verifier-gated acceptance.""" + fat_harness_seed = {**VALID_SEED_DATA, "orchestrator": {"execution_mode": "fat_harness"}} - assert _resolve_resume_fat_harness_mode(legacy_seed, {}) is False - assert _resolve_resume_fat_harness_mode(VALID_SEED_DATA, {}) is True + assert _resolve_resume_fat_harness_mode(fat_harness_seed, {}) is True + assert _resolve_resume_fat_harness_mode(VALID_SEED_DATA, {}) is False def test_resolve_max_decomposition_depth_defaults_to_two(monkeypatch: pytest.MonkeyPatch) -> None: @@ -306,12 +306,12 @@ async def test_run_orchestrator_passes_resolved_execution_caps_to_runner(tmp_pat assert mock_runner_cls.call_args.kwargs["max_decomposition_depth"] == 3 assert mock_runner_cls.call_args.kwargs["max_parallel_workers"] == 7 - assert mock_runner_cls.call_args.kwargs["fat_harness_mode"] is True + assert mock_runner_cls.call_args.kwargs["fat_harness_mode"] is False @pytest.mark.asyncio -async def test_run_orchestrator_passes_default_fat_harness_mode_to_runner(tmp_path: Path) -> None: - """The default #920 PR-5 path selects fat-harness without seed opt-in.""" +async def test_run_orchestrator_passes_default_runner_mode_to_runner(tmp_path: Path) -> None: + """The default path leaves fat-harness disabled unless the seed opts in.""" seed_file = tmp_path / "seed.yaml" seed_file.write_text("goal: ignored\n", encoding="utf-8") @@ -350,7 +350,7 @@ async def test_run_orchestrator_passes_default_fat_harness_mode_to_runner(tmp_pa mock_event_store_cls.return_value.initialize = AsyncMock() await _run_orchestrator(seed_file) - assert mock_runner_cls.call_args.kwargs["fat_harness_mode"] is True + assert mock_runner_cls.call_args.kwargs["fat_harness_mode"] is False @pytest.mark.asyncio diff --git a/tests/unit/mcp/tools/test_definitions.py b/tests/unit/mcp/tools/test_definitions.py index f9df3b1e8..7ad265c41 100644 --- a/tests/unit/mcp/tools/test_definitions.py +++ b/tests/unit/mcp/tools/test_definitions.py @@ -240,7 +240,7 @@ async def resume_session(self, *args: object, **kwargs: object) -> Result: assert resumed.is_ok assert legacy_resumed.is_ok assert missing_contract_resumed.is_ok - assert captured_modes == [True, True, False, True] + assert captured_modes == [False, True, False, False] async def test_handle_rejects_removed_legacy_execution_mode(self) -> None: """MCP execute_seed matches the CLI removal of the legacy selector.""" @@ -289,7 +289,7 @@ async def test_handle_rejects_unknown_execution_mode(self) -> None: ) assert result.is_err - assert "execution_mode is no longer configurable" in str(result.error) + assert "execution_mode must be 'fat_harness' when set" in str(result.error) async def test_handle_reports_execution_handler_config_error(self) -> None: """Config failures should surface with execution-handler context."""