Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions src/ouroboros/auto/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,10 +686,11 @@ async def run(self, state: AutoPipelineState) -> AutoPipelineResult:
self._save(state)
return self._result(state, ledger, blocker=str(exc) or state.last_error)
except Exception as exc:
state.mark_failed(
f"seed generation failed: {exc}",
tool_name="seed_generator",
)
message = f"seed generation failed: {exc}"
if _is_seed_generation_blocker(exc):
state.mark_blocked(message, tool_name="seed_generator")
else:
state.mark_failed(message, tool_name="seed_generator")
record_authoring_backend(state)
self._save(state)
return self._result(state, ledger, blocker=state.last_error)
Expand Down Expand Up @@ -3101,6 +3102,12 @@ def _accepts_keyword(func: Callable[..., Any], name: str) -> bool:
return False


def _is_seed_generation_blocker(exc: Exception) -> bool:
"""Classify recoverable authoring validation as blocked, not failed."""
message = str(exc)
return "Ambiguity score" in message and "exceeds threshold" in message


def _recoverable_phase_for_tool(tool_name: str | None) -> AutoPhase | None:
if tool_name in {
"interview.start",
Expand Down
27 changes: 24 additions & 3 deletions src/ouroboros/codex_permissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,14 @@ def resolve_codex_permission_mode(
return candidate # type: ignore[return-value]


def build_codex_exec_args_for_sandbox(sandbox: SandboxClass) -> list[str]:
def build_codex_exec_args_for_sandbox(
sandbox: SandboxClass,
*,
source: str | None = None,
permission_mode: str | None = None,
default_mode: CodexPermissionMode | None = None,
resolved_mode: CodexPermissionMode | None = None,
) -> list[str]:
"""Translate a sandbox class into Codex CLI exec flags.

This is the canonical entry point for new call sites. Engine code
Expand All @@ -73,14 +80,22 @@ def build_codex_exec_args_for_sandbox(sandbox: SandboxClass) -> list[str]:
msg = f"No Codex CLI mapping registered for sandbox class {sandbox!r}"
raise KeyError(msg)
if sandbox is SandboxClass.UNRESTRICTED:
log.warning("permissions.bypass_activated", sandbox=sandbox.value)
log.warning(
"permissions.bypass_activated",
sandbox=sandbox.value,
source=source,
permission_mode=permission_mode,
default_mode=default_mode,
resolved_mode=resolved_mode,
)
return list(args)


def build_codex_exec_permission_args(
permission_mode: str | None,
*,
default_mode: CodexPermissionMode = "default",
source: str | None = None,
) -> list[str]:
"""Translate a legacy permission-mode string into Codex CLI exec flags.

Expand All @@ -95,7 +110,13 @@ def build_codex_exec_permission_args(
no sandbox
"""
resolved = resolve_codex_permission_mode(permission_mode, default_mode=default_mode)
return build_codex_exec_args_for_sandbox(_PERMISSION_MODE_TO_SANDBOX[resolved])
return build_codex_exec_args_for_sandbox(
_PERMISSION_MODE_TO_SANDBOX[resolved],
source=source,
permission_mode=permission_mode,
default_mode=default_mode,
resolved_mode=resolved,
)


__all__ = [
Expand Down
38 changes: 37 additions & 1 deletion src/ouroboros/mcp/tools/authoring_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1434,6 +1434,8 @@ async def _complete_interview_response(
state: InterviewState,
session_id: str,
score: AmbiguityScore | None = None,
*,
seed_ready_override: bool | None = None,
) -> Result[MCPToolResult, MCPServerError]:
"""Complete the interview and return a Seed-ready MCP response."""
complete_result = await engine.complete_interview(state)
Expand Down Expand Up @@ -1484,7 +1486,13 @@ async def _complete_interview_response(
"completed": True,
"ambiguity_score": score.overall_score if score is not None else None,
"milestone": _milestone_for_score(score),
"seed_ready": score.is_ready_for_seed if score is not None else None,
"seed_ready": (
seed_ready_override
if seed_ready_override is not None
else score.is_ready_for_seed
if score is not None
else None
),
"required_client_gates": REQUIRED_CLIENT_GATES,
**_interview_reasoning_meta(
state=state,
Expand Down Expand Up @@ -2248,6 +2256,34 @@ async def handle(
advance_streak=False,
reset_on_failure=True,
)
# Safe-default synthesis is emitted only after the
# auto driver has filled every remaining required
# ledger gap with audited conservative defaults. Do
# not require the semantic ambiguity scorer to also
# cross the normal human "done" threshold; that score
# can lag behind the ledger and would leave a trailing
# unanswered question in the persisted transcript.
if is_safe_default_synthesis:
if has_pending_round:
state.rounds.pop()
from ouroboros.bigbang.interview import InterviewRound

state.rounds.append(
InterviewRound(
round_number=len(state.rounds) + 1,
question=last_question or "[driver safe-default finalization]",
user_response=answer,
)
)
state.clear_stored_ambiguity()
state.mark_updated()
return await self._complete_interview_response(
engine,
state,
session_id,
None,
seed_ready_override=True,
)
if exit_score is not None and qualifies_for_seed_completion(
exit_score,
is_brownfield=state.is_brownfield,
Expand Down
114 changes: 79 additions & 35 deletions src/ouroboros/orchestrator/codex_cli_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ def _build_permission_args(self) -> list[str]:
return build_codex_exec_permission_args(
self._permission_mode,
default_mode="acceptEdits",
source=f"{self._log_namespace}.agent_runtime",
)

def _get_configured_cli_path(self) -> str | None:
Expand Down Expand Up @@ -539,43 +540,87 @@ def _log_invalid_skill_intercept(self, dispatch_result: InvalidSkill) -> None:
error=self._invalid_skill_log_error(dispatch_result),
)

@staticmethod
def _auto_dispatch_error_category(
error_type: str | None,
error_text: str,
) -> str | None:
"""Classify terminal auto-dispatch failures for operator diagnostics."""
normalized_error = error_text.lower()
transport_markers = (
"transport closed",
"connection closed",
"stdio closed",
"broken pipe",
)
unavailable_markers = (
"unavailable",
"not found",
"not registered",
"unknown tool",
"no such tool",
)

if any(marker in normalized_error for marker in transport_markers):
return "mcp_transport_closed"
if error_type == "MCPResourceNotFoundError":
return "mcp_registration_missing"
if error_type == "LookupError" and "no local handler registered" in normalized_error:
return "local_handler_missing"
if error_type in {"MCPClientError", "MCPToolError"} and any(
marker in normalized_error for marker in unavailable_markers
):
return "mcp_registration_missing"
return None

@staticmethod
def _is_auto_recoverable_dispatch_unavailable(recoverable_error: AgentMessage) -> bool:
"""Return whether a recoverable auto dispatch error means the tool is unavailable."""
error_type = recoverable_error.data.get("error_type")
error_text = recoverable_error.content.lower()
if error_type == "MCPResourceNotFoundError":
return True
if error_type == "LookupError":
return "no local handler registered" in error_text

if error_type == "MCPClientError":
return any(
marker in error_text
for marker in (
"unavailable",
"not found",
"not registered",
"unknown tool",
"no such tool",
)
)
if error_type != "MCPToolError":
error_type = str(recoverable_error.data.get("error_type") or "")
error_text = recoverable_error.content
if error_text.lower().startswith("auto pipeline failed:"):
return False
return CodexCliRuntime._auto_dispatch_error_category(error_type, error_text) is not None

if error_text.startswith("auto pipeline failed:"):
return False
return any(
marker in error_text
for marker in (
"unavailable",
"not found",
"not registered",
"unknown tool",
"no such tool",
def _build_auto_dispatch_unavailable_content(
self,
tool_name: str,
category: str | None,
) -> str:
"""Return the operator-facing auto dispatch failure message."""
if category == "mcp_transport_closed":
return (
"Cannot run ooo auto: MCP transport closed before "
f"`{tool_name}` completed. Run `ouroboros mcp doctor` to verify "
"the server, then reconnect or restart the Codex App MCP session; "
"this is a transport/session failure, not proof that the tool is "
"unregistered."
)
return (
"Cannot run ooo auto: required MCP tool "
f"`{tool_name}` is unavailable. "
"Run `ouroboros mcp doctor` / setup to register the MCP server."
)

@staticmethod
def _is_mcp_transport_closed_error(message: AgentMessage) -> bool:
"""Return True for recoverable-looking MCP client transport closures."""
error_type = message.data.get("error_type")
if error_type != "MCPClientError":
return False
return (
CodexCliRuntime._auto_dispatch_error_category(error_type, message.content)
== "mcp_transport_closed"
)

@staticmethod
def _is_recoverable_mcp_error_type(message: AgentMessage) -> bool:
"""Return True for MCP transport errors that should not be passed through."""
error_type = message.data.get("error_type")
if error_type in {"MCPConnectionError", "MCPTimeoutError"}:
return True
return CodexCliRuntime._is_mcp_transport_closed_error(message)

def _build_auto_dispatch_unavailable_message(
self,
intercept: Resolved,
Expand All @@ -596,14 +641,13 @@ def _build_auto_dispatch_unavailable_message(
data["dispatch_error_type"] = dispatch_error_type
if dispatch_error:
data["dispatch_error"] = dispatch_error
category = self._auto_dispatch_error_category(dispatch_error_type, dispatch_error or "")
if category:
data["dispatch_error_category"] = category

return AgentMessage(
type="result",
content=(
"Cannot run ooo auto: required MCP tool "
f"`{intercept.mcp_tool}` is unavailable. "
"Run `ouroboros mcp doctor` / setup to register the MCP server."
),
content=self._build_auto_dispatch_unavailable_content(intercept.mcp_tool, category),
data=data,
resume_handle=current_handle,
)
Expand Down Expand Up @@ -719,7 +763,7 @@ def _extract_recoverable_dispatch_error(
if metadata.get("is_retriable") is True or metadata.get("retriable") is True:
return final_message

if final_message.data.get("error_type") in {"MCPConnectionError", "MCPTimeoutError"}:
if self._is_recoverable_mcp_error_type(final_message):
return final_message

return None
Expand Down
1 change: 1 addition & 0 deletions src/ouroboros/providers/codex_cli_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ def _build_permission_args(self) -> list[str]:
return build_codex_exec_permission_args(
self._permission_mode,
default_mode="default",
source=f"{self._log_namespace}.llm_adapter",
)

def _get_configured_cli_path(self) -> str | None:
Expand Down
32 changes: 32 additions & 0 deletions tests/unit/auto/test_interview_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -1383,6 +1383,38 @@ async def generate_seed(session_id: str) -> Seed: # noqa: ARG001
assert "Outputs are stdout-only by inference" not in result.assumptions


@pytest.mark.asyncio
async def test_pipeline_blocks_on_seed_ambiguity_validation(tmp_path) -> None:
async def start(goal: str, cwd: str) -> InterviewTurn: # noqa: ARG001
return InterviewTurn("done", "interview_1", seed_ready=True, completed=True)

async def answer(session_id: str, text: str) -> InterviewTurn: # noqa: ARG001
raise AssertionError("completed interview should not need another answer")

async def generate_seed(session_id: str) -> Seed: # noqa: ARG001
raise RuntimeError(
"ouroboros_generate_seed failed: Validation error: Ambiguity score 0.26 "
"exceeds threshold 0.2. Cannot generate Seed."
)

state = AutoPipelineState(goal="Build a CLI", cwd=str(tmp_path))
ledger = SeedDraftLedger.from_goal(state.goal)
_fill_ready(ledger)
state.ledger = ledger.to_dict()
driver = AutoInterviewDriver(
FunctionInterviewBackend(start, answer), store=AutoStore(tmp_path), max_rounds=1
)
pipeline = AutoPipeline(driver, generate_seed, store=AutoStore(tmp_path), skip_run=True)

result = await pipeline.run(state)

assert result.status == "blocked"
assert state.phase == AutoPhase.BLOCKED
assert state.interview_completed is True
assert "Ambiguity score 0.26 exceeds threshold 0.2" in (result.blocker or "")
assert state.last_tool_name == "seed_generator"


@pytest.mark.asyncio
async def test_pipeline_uses_explicit_goal_facts_before_completed_interview(tmp_path) -> None:
async def start(goal: str, cwd: str) -> InterviewTurn: # noqa: ARG001
Expand Down
5 changes: 4 additions & 1 deletion tests/unit/mcp/tools/test_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2376,7 +2376,10 @@ async def complete_interview(
assert result.value.meta["completed"] is True
assert result.value.meta["seed_ready"] is True
assert state.status is InterviewStatus.COMPLETED
assert state.rounds == []
assert len(state.rounds) == 1
assert state.rounds[0].question == "[driver safe-default finalization]"
assert state.rounds[0].user_response is not None
assert "[safe-default-synthesis]" in state.rounds[0].user_response
mock_engine.complete_interview.assert_awaited_once()


Expand Down
Loading
Loading