Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Fixed
- **run/mcp**: Make fat-harness acceptance opt-in via `seed.orchestrator.execution_mode: fat_harness` for fresh CLI/MCP seed execution. Missing/blank execution mode now uses the default runner again until seed authoring and QA guidance consistently emit profile-compatible typed evidence for every AC. This mitigates layered scaffold AC failures reported in #1202.

### Added
- **providers**: GitHub Copilot CLI adapter (`CopilotCliLLMAdapter`) — first-class peer of Codex/Gemini/OpenCode adapters. Switch with `OUROBOROS_LLM_BACKEND=copilot`. Uses local `copilot -p` non-interactive mode with `GH_TOKEN`/`GITHUB_TOKEN` auth, hard tool envelope via `--available-tools`+`--allow-tool`+`--add-dir`, sandbox-class permission mapping, JSONL stream parsing, recursion guard via shared `_OUROBOROS_DEPTH` counter (max depth 5), and auth-error short-circuit on `401`/missing-token detections. Optional install: `pip install ouroboros-ai[copilot]` (the Copilot CLI itself is installed externally).
- **opencode**: Subagent bridge plugin (`src/ouroboros/opencode/plugin/ouroboros-bridge.ts`) — routes MCP `ouroboros_*` tool calls with a `_subagent` parameter into OpenCode's native Task subagent panes via `session.promptAsync`. Fire-and-forget dispatch returns from the hook in ~10ms, eliminating the blocking 200s+ latency of the previous `session.prompt` approach. Installed automatically by `ouroboros setup`. See [OpenCode Subagent Bridge](docs/guides/opencode-subagent-bridge.md).
Expand Down
34 changes: 19 additions & 15 deletions src/ouroboros/cli/commands/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,27 +308,31 @@ def _load_skip_completed_markers(


def _resolve_fat_harness_mode(seed_data: dict[str, Any]) -> bool:
"""Typed evidence plus verifier PASS is the only CLI acceptance path.
"""Resolve the fresh-run fat-harness selector.

``seed.orchestrator.execution_mode`` was the temporary #920 PR-4 opt-in
selector. After #978 P5, ``legacy`` is rejected instead of silently
accepting a self-report fallback selector.
Fat-harness acceptance is opt-in until the shipped authoring/QA pipeline can
reliably produce profile-compatible typed evidence for every AC. Seeds that
request ``seed.orchestrator.execution_mode: fat_harness`` keep the stricter
verifier-gated path; missing/blank selectors use the legacy runner.
"""
orchestrator_config = seed_data.get("orchestrator")
if not isinstance(orchestrator_config, dict):
return True
return False

execution_mode = orchestrator_config.get("execution_mode")
if execution_mode == "legacy":
print_error(
"seed.orchestrator.execution_mode='legacy' was removed after #978 P5; "
"typed evidence plus verifier PASS is now required for acceptance."
"omit the selector for the default runner or set execution_mode='fat_harness' "
"to opt in to typed evidence plus verifier PASS acceptance."
)
raise typer.Exit(1)
if execution_mode not in (None, "", "fat_harness"):
if execution_mode in (None, ""):
return False
if execution_mode != "fat_harness":
print_error(
"seed.orchestrator.execution_mode is no longer configurable after "
f"the fat-harness default flip (got {execution_mode!r})."
"seed.orchestrator.execution_mode must be 'fat_harness' when set "
f"(got {execution_mode!r})."
)
raise typer.Exit(1)

Expand All @@ -342,18 +346,18 @@ def _resolve_resume_fat_harness_mode(
"""Resolve resume acceptance mode from persisted contract with safe migration.

New sessions persist ``fat_harness_mode`` at prepare time. Historical
sessions may not have that field, so only an explicit historical
``execution_mode: legacy`` selector resumes ungated; unknown/missing state
falls back to the conservative typed-evidence gate.
sessions may not have that field, so only an explicit ``fat_harness``
selector resumes with verifier-gated typed-evidence enforcement;
unknown/missing state falls back to the default runner.
"""
persisted = progress.get("fat_harness_mode")
if isinstance(persisted, bool):
return persisted

orchestrator_config = seed_data.get("orchestrator")
return not (
return (
isinstance(orchestrator_config, dict)
and orchestrator_config.get("execution_mode") == "legacy"
and orchestrator_config.get("execution_mode") == "fat_harness"
)


Expand Down Expand Up @@ -498,7 +502,7 @@ async def _run_orchestrator(
print_info(f"Max decomposition depth: {resolved_max_decomposition_depth}")
print_info(f"Max parallel workers: {resolved_max_parallel_workers}")
if resolved_fat_harness_mode:
print_info("Execution mode: fat_harness (default)")
print_info("Execution mode: fat_harness")
if externally_satisfied_acs:
print_info(f"Externally satisfied ACs: {len(externally_satisfied_acs)}")

Expand Down
130 changes: 114 additions & 16 deletions src/ouroboros/mcp/tools/execution_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,21 +102,90 @@ def _validate_fresh_execution_mode(
return Result.err(
MCPToolError(
"seed.orchestrator.execution_mode='legacy' was removed after #978 P5; "
"typed evidence plus verifier PASS is now required for acceptance.",
"omit the selector for the default runner or set execution_mode='fat_harness' "
"to opt in to typed evidence plus verifier PASS acceptance.",
tool_name=tool_name,
)
)
if execution_mode not in (None, "", "fat_harness"):
return Result.err(
MCPToolError(
"seed.orchestrator.execution_mode is no longer configurable after "
f"the fat-harness default flip (got {execution_mode!r}).",
"seed.orchestrator.execution_mode must be 'fat_harness' when set "
f"(got {execution_mode!r}).",
tool_name=tool_name,
)
)
return Result.ok(None)


def _validate_plugin_execution_mode(
execution_mode: Any,
*,
tool_name: str,
) -> Result[None, MCPToolError]:
"""Reject acceptance modes plugin dispatch cannot enforce."""
if execution_mode == "fat_harness":
return Result.err(
MCPToolError(
"seed.orchestrator.execution_mode='fat_harness' is not supported in "
"OpenCode plugin dispatch because the child task cannot enforce typed "
"evidence plus verifier PASS acceptance. Run without plugin dispatch or "
"omit the selector for the default runner.",
tool_name=tool_name,
)
)
return Result.ok(None)


async def _validate_plugin_resume_acceptance_contract(
*,
event_store: EventStore | None,
execution_mode: Any,
session_id: str | None,
tool_name: str,
) -> Result[None, MCPToolError]:
"""Reject plugin resumes whose persisted contract requires fat-harness."""
if not session_id:
return Result.ok(None)

store = event_store or EventStore()
owns_store = event_store is None
try:
await store.initialize()
tracker_result = await SessionRepository(store).reconstruct_session(session_id)
if tracker_result.is_err:
return Result.err(
MCPToolError(
f"Session resume failed: {tracker_result.error.message}",
tool_name=tool_name,
)
)
persisted_fat_harness_mode = tracker_result.value.progress.get("fat_harness_mode")
if persisted_fat_harness_mode is True:
return Result.err(
MCPToolError(
"OpenCode plugin dispatch cannot resume sessions created with "
"fat_harness_mode=True because the child task cannot enforce typed "
"evidence plus verifier PASS acceptance. Resume without plugin dispatch.",
tool_name=tool_name,
)
)
if execution_mode == "fat_harness" and not isinstance(persisted_fat_harness_mode, bool):
return Result.err(
MCPToolError(
"OpenCode plugin dispatch cannot resume sessions whose seed requests "
"execution_mode='fat_harness' without a persisted fat_harness_mode "
"contract because the child task cannot enforce typed evidence plus "
"verifier PASS acceptance. Resume without plugin dispatch.",
tool_name=tool_name,
)
)
return Result.ok(None)
finally:
if owns_store:
await store.close()


def _pause_metadata_from_progress(progress: dict[str, Any]) -> dict[str, Any]:
"""Extract pause metadata safe to expose in MCP tool results."""
metadata: dict[str, Any] = {}
Expand Down Expand Up @@ -350,18 +419,32 @@ async def handle(
if mode_result.is_err:
return mode_result

# --- Subagent dispatch: gate on runtime + opencode_mode ---
payload = build_execute_subagent(
seed_content=seed_content,
session_id=session_id,
seed_path=arguments.get("seed_path"),
cwd=str(resolved_cwd),
max_iterations=max_iterations,
skip_qa=arguments.get("skip_qa", False),
model_tier=model_tier,
max_parallel_workers=max_parallel_workers,
)
if should_dispatch_via_plugin(self.agent_runtime_backend, self.opencode_mode):
if is_resume:
plugin_mode_result = await _validate_plugin_resume_acceptance_contract(
event_store=self.event_store,
execution_mode=execution_mode,
session_id=session_id,
tool_name="ouroboros_execute_seed",
)
else:
plugin_mode_result = _validate_plugin_execution_mode(
execution_mode,
tool_name="ouroboros_execute_seed",
)
if plugin_mode_result.is_err:
return plugin_mode_result
# --- Subagent dispatch: gate on runtime + opencode_mode ---
payload = build_execute_subagent(
seed_content=seed_content,
session_id=session_id,
seed_path=arguments.get("seed_path"),
cwd=str(resolved_cwd),
max_iterations=max_iterations,
skip_qa=arguments.get("skip_qa", False),
model_tier=model_tier,
max_parallel_workers=max_parallel_workers,
)
await emit_subagent_dispatched_event(
self.event_store,
session_id=session_id,
Expand Down Expand Up @@ -498,13 +581,13 @@ async def handle(
# Create checkpoint store for execution state persistence
checkpoint_store = CheckpointStore()
checkpoint_store.initialize()
fat_harness_mode = True
fat_harness_mode = execution_mode == "fat_harness"
if is_resume:
persisted_fat_harness_mode = tracker.progress.get("fat_harness_mode")
if isinstance(persisted_fat_harness_mode, bool):
fat_harness_mode = persisted_fat_harness_mode
else:
fat_harness_mode = execution_mode != "legacy"
fat_harness_mode = execution_mode == "fat_harness"

# Create orchestrator runner
runner = OrchestratorRunner(
Expand Down Expand Up @@ -1138,6 +1221,21 @@ async def _handle_inner(
# --- Subagent dispatch: gate on runtime + opencode_mode ---
# StartExecuteSeedHandler delegates to ExecuteSeedHandler internally.
if should_dispatch_via_plugin(self.agent_runtime_backend, self.opencode_mode):
if is_resume:
plugin_mode_result = await _validate_plugin_resume_acceptance_contract(
event_store=self.event_store,
execution_mode=execution_mode,
session_id=arguments.get("session_id"),
tool_name="ouroboros_start_execute_seed",
)
else:
plugin_mode_result = _validate_plugin_execution_mode(
execution_mode,
tool_name="ouroboros_start_execute_seed",
)
if plugin_mode_result.is_err:
return plugin_mode_result

# Initialize event store first so the audit event persists.
await self._event_store.initialize()

Expand Down
24 changes: 12 additions & 12 deletions tests/unit/cli/test_run_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,9 @@ def test_resolve_cli_project_dir_uses_parent_when_context_reference_is_file(
)


def test_resolve_fat_harness_mode_defaults_to_enabled() -> None:
"""The #920 PR-5 default flip enables fat-harness without seed opt-in."""
assert _resolve_fat_harness_mode(VALID_SEED_DATA) is True
def test_resolve_fat_harness_mode_defaults_to_disabled() -> None:
"""Fresh runs use the default runner unless the seed opts into fat-harness."""
assert _resolve_fat_harness_mode(VALID_SEED_DATA) is False


def test_resolve_fat_harness_mode_accepts_fat_harness_execution_mode() -> None:
Expand Down Expand Up @@ -251,12 +251,12 @@ def test_resolve_resume_fat_harness_mode_uses_persisted_contract() -> None:
assert _resolve_resume_fat_harness_mode(seed_data, {"fat_harness_mode": False}) is False


def test_resolve_resume_fat_harness_mode_migrates_missing_contract_conservatively() -> None:
"""Only explicit historical legacy selectors resume ungated when contract is absent."""
legacy_seed = {**VALID_SEED_DATA, "orchestrator": {"execution_mode": "legacy"}}
def test_resolve_resume_fat_harness_mode_migrates_missing_contract_to_default_runner() -> None:
"""Only explicit fat-harness selectors resume with verifier-gated acceptance."""
fat_harness_seed = {**VALID_SEED_DATA, "orchestrator": {"execution_mode": "fat_harness"}}

assert _resolve_resume_fat_harness_mode(legacy_seed, {}) is False
assert _resolve_resume_fat_harness_mode(VALID_SEED_DATA, {}) is True
assert _resolve_resume_fat_harness_mode(fat_harness_seed, {}) is True
assert _resolve_resume_fat_harness_mode(VALID_SEED_DATA, {}) is False


def test_resolve_max_decomposition_depth_defaults_to_two(monkeypatch: pytest.MonkeyPatch) -> None:
Expand Down Expand Up @@ -444,12 +444,12 @@ async def test_run_orchestrator_passes_resolved_execution_caps_to_runner(tmp_pat

assert mock_runner_cls.call_args.kwargs["max_decomposition_depth"] == 3
assert mock_runner_cls.call_args.kwargs["max_parallel_workers"] == 7
assert mock_runner_cls.call_args.kwargs["fat_harness_mode"] is True
assert mock_runner_cls.call_args.kwargs["fat_harness_mode"] is False


@pytest.mark.asyncio
async def test_run_orchestrator_passes_default_fat_harness_mode_to_runner(tmp_path: Path) -> None:
"""The default #920 PR-5 path selects fat-harness without seed opt-in."""
async def test_run_orchestrator_passes_default_runner_mode_to_runner(tmp_path: Path) -> None:
"""The default path leaves fat-harness disabled unless the seed opts in."""
seed_file = tmp_path / "seed.yaml"
seed_file.write_text("goal: ignored\n", encoding="utf-8")

Expand Down Expand Up @@ -488,7 +488,7 @@ async def test_run_orchestrator_passes_default_fat_harness_mode_to_runner(tmp_pa
mock_event_store_cls.return_value.initialize = AsyncMock()
await _run_orchestrator(seed_file)

assert mock_runner_cls.call_args.kwargs["fat_harness_mode"] is True
assert mock_runner_cls.call_args.kwargs["fat_harness_mode"] is False


@pytest.mark.asyncio
Expand Down
Loading
Loading