Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions src/ouroboros/auto/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -1348,6 +1348,41 @@ def _checkpoint_dispatch(envelope: dict[str, Any]) -> None:
state, ledger, review=review, blocker=state.last_error, run_subagent=run_subagent
)
if terminal_status == "failed" and stop_reason in _RALPH_BLOCKED_STOP_REASONS:
# L5-a / #1157: when Ralph terminates with ``oscillation_detected``
# and the session has a wired lateral thinker in complete-product
# mode, route through UNSTUCK_LATERAL first instead of bailing
# straight to BLOCKED. Mirrors the EVALUATE → UNSTUCK_LATERAL
# path at ``_evaluate_after_qa``. Other Ralph stop_reasons
# (iteration_timeout, wall_clock_exhausted, grade_regressing,
# max_generations reached) continue to BLOCKED unchanged because
# those terminals are budget exhaustions rather than
# spec-reframe candidates.
if (
stop_reason == "oscillation_detected"
and self.lateral_thinker is not None
and state.complete_product
):
state.transition(
AutoPhase.UNSTUCK_LATERAL,
"Ralph oscillation_detected; invoking lateral persona for reframing",
)
self._save(state)
return await self._run_lateral(
state,
ledger,
seed,
qa_score=0.0,
qa_verdict="oscillation_detected",
qa_differences=(
"Ralph oscillated between grade states without converging on A grade.",
),
qa_suggestions=(
"Reframe the Seed acceptance criteria so the grade oscillation pattern cannot recur.",
),
cache_suffix="",
review=review,
run_subagent=run_subagent,
)
state.mark_blocked(stop_reason, tool_name="ralph_starter")
self._save(state)
return self._result(
Expand Down Expand Up @@ -2353,6 +2388,14 @@ async def _poll_ralph_job(
self._save(state)
return self._result(state, ledger, review=review, blocker=state.last_error)
if terminal_status == "failed" and stop_reason in _RALPH_BLOCKED_STOP_REASONS:
# L5-a / #1157: the live ``_handoff_to_ralph`` path routes
# ``oscillation_detected`` through ``UNSTUCK_LATERAL`` when a
# lateral thinker is wired. The resume path intentionally does
# not yet plumb lateral recovery — instead it BLOCKED-s with
# ``tool_name="ralph_starter"``, which ``_recoverable_phase_for_tool``
# maps back to ``RALPH_HANDOFF`` so a re-resume retries Ralph
# from scratch rather than stranding the session. Extending
# lateral recovery into the resume path is reserved for L5-b.
state.mark_blocked(stop_reason, tool_name="ralph_starter")
self._save(state)
return self._result(state, ledger, review=review, blocker=state.last_error)
Expand Down Expand Up @@ -2475,6 +2518,13 @@ async def _reattach_ralph_job(
self._save(state)
return self._result(state, ledger, blocker=state.last_error)
if terminal_status == "failed" and stop_reason in _RALPH_BLOCKED_STOP_REASONS:
# L5-a / #1157: re-attach observes an already-dispatched Ralph
# job's terminal status and does not currently plumb
# ``oscillation_detected`` through ``UNSTUCK_LATERAL``. Falling
# through to BLOCKED with ``tool_name="ralph_starter"`` keeps
# the session resumable (``_recoverable_phase_for_tool`` maps
# ralph_starter back to RALPH_HANDOFF). Lateral recovery on
# the re-attach branch is reserved for L5-b.
state.mark_blocked(stop_reason, tool_name="ralph_starter")
self._save(state)
return self._result(state, ledger, blocker=state.last_error)
Expand Down
6 changes: 6 additions & 0 deletions src/ouroboros/auto/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,12 @@ class AutoResumeCapability(StrEnum):
},
AutoPhase.RALPH_HANDOFF: {
AutoPhase.EVALUATE,
# L5-a / #1157: Ralph terminal with ``oscillation_detected`` routes
# through UNSTUCK_LATERAL first when a lateral_thinker is wired in
# complete-product mode, mirroring the EVALUATE → UNSTUCK_LATERAL
# path. Other Ralph stop_reasons (budget-exhaustion terminals) fall
# through to BLOCKED directly.
AutoPhase.UNSTUCK_LATERAL,
AutoPhase.COMPLETE,
AutoPhase.BLOCKED,
AutoPhase.FAILED,
Expand Down
264 changes: 264 additions & 0 deletions tests/unit/auto/test_pipeline_oscillation_lateral.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,264 @@
"""L5-a regression tests for Ralph ``oscillation_detected`` → UNSTUCK_LATERAL plumbing (#1157).

When Ralph terminates with ``stop_reason == "oscillation_detected"`` in
complete-product mode AND a ``lateral_thinker`` is wired on the
pipeline, the auto pipeline now routes through ``UNSTUCK_LATERAL`` and
invokes ``_run_lateral`` first instead of bailing straight to
``BLOCKED``. Mirrors the EVALUATE→UNSTUCK_LATERAL path already
implemented for QA failures.

Other Ralph blocked stop_reasons (iteration_timeout,
wall_clock_exhausted, grade_regressing, max_generations reached) are
budget-exhaustion terminals rather than spec-reframe candidates, so
they continue to BLOCKED unchanged.
"""

from __future__ import annotations

from typing import Any

import pytest

from ouroboros.auto.adapters import LateralResult
from ouroboros.auto.grading import GradeResult, SeedGrade
from ouroboros.auto.interview_driver import AutoInterviewResult
from ouroboros.auto.pipeline import AutoPipeline
from ouroboros.auto.seed_reviewer import SeedReview, SeedReviewer
from ouroboros.auto.state import AutoPhase, AutoPipelineState
from ouroboros.core.seed import (
EvaluationPrinciple,
ExitCondition,
OntologyField,
OntologySchema,
Seed,
SeedMetadata,
)
from ouroboros.resilience.lateral import ThinkingPersona

# ---------------------------------------------------------------------------
# Test fixtures — duplicated from test_pipeline_ralph_handoff because
# tests/unit/auto/ is not a Python package (no __init__.py) so a relative
# import is not available. Kept minimal and in sync with the source file.
# ---------------------------------------------------------------------------


def _build_seed(seed_id: str = "seed_test_001") -> Seed:
return Seed(
goal="Build a CLI",
constraints=("Use existing project patterns",),
acceptance_criteria=("Command prints stable output",),
ontology_schema=OntologySchema(
name="CliTask",
description="CLI task ontology",
fields=(OntologyField(name="command", field_type="string", description="Command"),),
),
evaluation_principles=(
EvaluationPrinciple(name="testability", description="Observable behavior", weight=1.0),
),
exit_conditions=(
ExitCondition(
name="verified",
description="Checks pass",
evaluation_criteria="All acceptance criteria pass",
),
),
metadata=SeedMetadata(seed_id=seed_id, ambiguity_score=0.12),
)


class _StubInterviewDriver:
def __init__(self) -> None:
self.invocations = 0
self.progress_callback = None

async def run(self, state: AutoPipelineState, ledger: Any) -> AutoInterviewResult:
self.invocations += 1
state.interview_session_id = "interview_stub"
state.interview_completed = True
return AutoInterviewResult(
status="seed_ready",
session_id="interview_stub",
ledger=ledger,
rounds=1,
)


def _state_at_run_phase(tmp_path) -> AutoPipelineState:
state = AutoPipelineState(goal="Build a CLI", cwd=str(tmp_path))
state.arm_deadline()
state.transition(AutoPhase.INTERVIEW, "interview")
state.interview_session_id = "interview_stub"
state.interview_completed = True
state.transition(AutoPhase.SEED_GENERATION, "seed")
seed = _build_seed()
state.seed_id = seed.metadata.seed_id
state.seed_artifact = seed.to_dict()
state.last_grade = "A"
state.transition(AutoPhase.REVIEW, "review")
state.transition(AutoPhase.RUN, "run")
return state


async def _run_starter_ok(_seed: Seed) -> dict[str, Any]:
return {
"job_id": "job_run_001",
"session_id": "exec_session_001",
"execution_id": "execution_001",
}


async def _seed_generator_unused(_session_id: str) -> Seed: # pragma: no cover
raise AssertionError("seed generator should not run when seed_artifact is set")


class _PassReviewer(SeedReviewer):
def __init__(self) -> None: # noqa: D401 - intentionally trivial
pass

def review(self, seed: Seed, *, ledger: Any = None) -> SeedReview: # noqa: ARG002
grade = GradeResult(grade=SeedGrade.A, scores={}, findings=[], blockers=[], may_run=True)
return SeedReview(grade_result=grade, findings=())


def _oscillation_ralph_starter():
"""Return a ralph_starter stub that terminates with oscillation_detected."""

async def ralph_starter(_seed: Any, **_kwargs: Any) -> dict[str, Any]:
return {
"job_id": "job_ralph_oscillate_001",
"lineage_id": "ralph-oscillate",
"dispatch_mode": "job",
"terminal_status": "failed",
"stop_reason": "oscillation_detected",
}

return ralph_starter


@pytest.mark.asyncio
async def test_ralph_oscillation_enters_unstuck_lateral_when_wired(tmp_path) -> None:
"""L5-a: Ralph oscillation + complete_product + lateral_thinker wired →
transitions through UNSTUCK_LATERAL and runs the persona advisor
before BLOCKED.

The lateral thinker is invoked with a synthetic QA-style payload
derived from the Ralph oscillation context; the persona output is
persisted on state and surfaced through the result envelope just
like the EVALUATE→UNSTUCK_LATERAL path does for QA failures.
"""
state = _state_at_run_phase(tmp_path)

captured_calls: list[dict[str, Any]] = []

async def lateral_thinker(**kwargs: Any) -> LateralResult:
captured_calls.append(dict(kwargs))
return LateralResult(
persona="architect",
approach_summary="Architect: Reframes the spec to prevent oscillation cycles",
text="# Lateral Thinking: Architect\n\nThe oscillation suggests the AC pair conflicts...",
)

pipeline = AutoPipeline(
_StubInterviewDriver(),
_seed_generator_unused,
run_starter=_run_starter_ok,
reviewer=_PassReviewer(),
ralph_starter=_oscillation_ralph_starter(),
complete_product=True,
lateral_thinker=lateral_thinker,
)

result = await pipeline.run(state)

# Pipeline lands in BLOCKED after lateral runs to terminal — same shape
# as the EVALUATE→UNSTUCK_LATERAL path's blocker outcome.
assert result.status == "blocked"
assert state.phase is AutoPhase.BLOCKED
assert state.last_tool_name == "lateral_thinker"

# Lateral thinker was actually invoked — pin the integration, not the
# specific persona (persona routing depends on the synthetic QA-shape
# we synthesize from oscillation_detected, which may shift across
# persona-routing tweaks).
assert captured_calls, "lateral_thinker was not invoked"
first_call = captured_calls[0]
assert isinstance(first_call.get("persona"), ThinkingPersona)
# The synthetic QA differences carry the oscillation marker so the
# persona's reframing input reflects what actually went wrong.
differences_text = str(first_call.get("qa_differences"))
assert "oscillat" in differences_text.lower(), (
f"expected oscillation marker in qa_differences; got {differences_text!r}"
)

# Persona output surfaced on the envelope.
assert state.last_lateral_persona == "architect"
assert state.last_lateral_approach_summary is not None
assert "Architect" in state.last_lateral_approach_summary
assert result.last_lateral_persona == "architect"


@pytest.mark.asyncio
async def test_ralph_oscillation_blocks_directly_when_no_lateral_thinker(tmp_path) -> None:
"""Regression: without a wired lateral_thinker, oscillation_detected
keeps the legacy behaviour of going straight to BLOCKED. Otherwise
pipelines that intentionally opt out of lateral recovery would
suddenly hard-fail differently."""
state = _state_at_run_phase(tmp_path)

pipeline = AutoPipeline(
_StubInterviewDriver(),
_seed_generator_unused,
run_starter=_run_starter_ok,
reviewer=_PassReviewer(),
ralph_starter=_oscillation_ralph_starter(),
complete_product=True,
lateral_thinker=None,
)

result = await pipeline.run(state)

assert result.status == "blocked"
assert state.phase is AutoPhase.BLOCKED
assert state.last_error == "oscillation_detected"
assert state.last_tool_name == "ralph_starter"
# No lateral persona output should appear on the envelope when the
# lateral thinker is not wired.
assert state.last_lateral_persona is None


@pytest.mark.asyncio
async def test_ralph_iteration_timeout_does_not_invoke_lateral(tmp_path) -> None:
"""Regression: ``iteration_timeout`` is a budget terminal, not a
spec-reframe candidate. It must keep going to BLOCKED directly even
when a lateral_thinker is wired. Pinned so a future broadening of
the L5-a path through to budget terminals requires an explicit
decision rather than slipping in by accident."""
state = _state_at_run_phase(tmp_path)

async def ralph_starter(_seed: Any, **_kwargs: Any) -> dict[str, Any]:
return {
"job_id": "job_ralph_iter_timeout",
"lineage_id": "ralph-iter",
"dispatch_mode": "job",
"terminal_status": "failed",
"stop_reason": "iteration_timeout",
}

async def lateral_thinker(**_kwargs: Any) -> LateralResult: # pragma: no cover
raise AssertionError("lateral_thinker must not be invoked for iteration_timeout")

pipeline = AutoPipeline(
_StubInterviewDriver(),
_seed_generator_unused,
run_starter=_run_starter_ok,
reviewer=_PassReviewer(),
ralph_starter=ralph_starter,
complete_product=True,
lateral_thinker=lateral_thinker,
)

result = await pipeline.run(state)

assert result.status == "blocked"
assert state.last_error == "iteration_timeout"
assert state.last_tool_name == "ralph_starter"
10 changes: 6 additions & 4 deletions tests/unit/auto/test_pipeline_ralph_handoff.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,12 +198,14 @@ def test_state_machine_allows_run_to_ralph_handoff() -> None:


def test_state_machine_allows_ralph_handoff_terminal_transitions() -> None:
"""RALPH_HANDOFF must reach COMPLETE/BLOCKED/FAILED. EVALUATE is the
intermediate verification gate added by RFC #809 Phase 2.1, but it is
not itself terminal — the assertion checks that every direct successor
of RALPH_HANDOFF is either terminal or the EVALUATE bridge."""
"""RALPH_HANDOFF must reach COMPLETE/BLOCKED/FAILED plus the EVALUATE
bridge added by RFC #809 Phase 2.1 and the UNSTUCK_LATERAL bridge
added by L5-a / #1157 (oscillation_detected routes through lateral
persona advisor first when complete_product + lateral_thinker are
wired)."""
assert _ALLOWED_TRANSITIONS[AutoPhase.RALPH_HANDOFF] == {
AutoPhase.EVALUATE,
AutoPhase.UNSTUCK_LATERAL,
AutoPhase.COMPLETE,
AutoPhase.BLOCKED,
AutoPhase.FAILED,
Expand Down
Loading