Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 111 additions & 4 deletions src/ouroboros/auto/safe_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,29 @@ def _is_valid_default_spec(spec: _DefaultSpec) -> bool:
),
}


# Line-anchored marker for a user-declared non-goal / exclusion section in
# a free-form goal string. Examples that match:
# ``non_goals: …`` ``non-goals: …`` ``Non Goals: …``
# ``excludes: …`` ``Out-of-scope: …`` ``- non_goals:``
# The trailing colon is required so that prose that merely mentions
# ``non-goals`` in a sentence is not mistaken for a section header.
_PROMPT_NON_GOAL_HEADER = re.compile(
r"^\s*(?:[-*•]\s+)?(?:non[ _-]?goals?|excludes?|out[ _-]?of[ _-]?scope)\s*:",
re.IGNORECASE,
)

# Any other line-anchored ``<label>:`` header, used to detect the *next*
# section that ends a multi-line non-goals body. Matches things like
# ``actors:``, ``inputs:``, ``- constraints:`` while leaving body lines
# such as `` - production deploy`` unmatched.
_PROMPT_SECTION_HEADER = re.compile(
r"^\s*(?:[-*•]\s+)?[A-Za-z][A-Za-z0-9_ -]{0,40}\s*:(?:\s|$)",
)

_PROMPT_LIST_ITEM = re.compile(r"^\s*(?:[-*•]|\d+[.)])\s+")


_UNSAFE_CONTEXT_PATTERNS: tuple[tuple[str, str], ...] = (
(
"credentials/secrets",
Expand Down Expand Up @@ -319,9 +342,18 @@ def _unsafe_context_reason(
interview questions and the still-open ``pending_question``, because a
clarifying question like "should this deploy to production?" does not
authorize a deploy — only the answer does. It also ignores ``NON_GOAL``
entries because confirmed non-goals are explicit *exclusions*; treating
"non-goals are credentials and production deployment" as active unsafe
scope would invert the user's intent.
ledger entries because confirmed non-goals are explicit *exclusions*;
treating "non-goals are credentials and production deployment" as active
unsafe scope would invert the user's intent.

The same exclusion principle is applied at the *string* level to any
``non_goals: …`` / ``excludes: …`` / ``out-of-scope: …`` section in the
free-form ``goal`` argument — see :func:`_strip_prompt_non_goal_sections`
for the rationale. Without this pre-pass, a caller that already declares
its non-goals in the goal string (e.g. a handoff prompt body) would have
those exclusions silently flipped into "active unsafe scope" because the
interview has not yet had a chance to register them as ``NON_GOAL``
ledger entries.
"""
# NFKC compatibility decomposition collapses fullwidth/half-width Latin,
# ligatures and other compatibility variants onto their canonical ASCII
Expand All @@ -330,12 +362,13 @@ def _unsafe_context_reason(
# block, U+FF21..U+FF5A) or ``finalize`` (the ``fi`` ligature U+FB01).
# Without the normalization step ``\b(deploy|production|...)\b`` would
# not match those forms, defeating the gate's purpose.
sanitized_goal = _strip_prompt_non_goal_sections(goal)
context = unicodedata.normalize(
"NFKC",
"\n".join(
value
for value in (
goal,
sanitized_goal,
*_unsafe_ledger_values(ledger),
*_interview_answers(ledger),
)
Expand Down Expand Up @@ -544,6 +577,80 @@ def _unsafe_context_reason(
)


def _strip_prompt_non_goal_sections(text: str) -> str:
"""Remove user-declared non-goal sections from a goal string before
unsafe-context matching.

:func:`_unsafe_context_reason` already excludes ledger ``NON_GOAL``
entries on the documented principle that confirmed non-goals are
explicit *exclusions* and must not be treated as active unsafe scope.
That exclusion only fires after the interview has structured those
exclusions into the ledger. Callers that pre-declare their non-goals
inside the free-form goal string — typically scripted invocations or
handoff prompts that bundle the seven canonical interview slots in
the request body — would otherwise see the same words leak into the
matcher input before the interview ever ran, flipping the gate into
an unsafe-context block on the user's own exclusion text.

The helper recognises a non-goal section header
(``non_goals:``, ``non-goals:``, ``non goals:``, ``excludes:`` or
``out-of-scope:``) at the start of a line (allowing leading
whitespace or a list bullet). Inline header bodies are stripped on
that line only. Separate section bodies are stripped only while their
continuation is structurally clear: indented lines or list items. This
intentionally fails closed for unindented prose following an inline or
empty non-goal header, because such prose may be active unsafe scope.
A multi-line body ends when one of these terminators is reached:

* a blank line, or
* an unindented non-list line, or
* a non-empty line that begins another labelled section header
(``actors:``, ``inputs:``, ``- constraints:``, …), which is then
preserved.

Free-form prose that merely mentions ``non-goals`` mid-sentence does
not match because the regex is line-anchored and requires a trailing
colon.
"""
lines = text.splitlines()
out: list[str] = []
skipping = False
for line in lines:
header_match = _PROMPT_NON_GOAL_HEADER.search(line)
if header_match:
# Inline body belongs to this line only; do not let it swallow
# following active prose.
skipping = not line[header_match.end() :].strip()
continue
if not skipping:
out.append(line)
continue
# We are inside a non-goal block — decide whether to keep skipping.
if not line.strip():
skipping = False
continue
if line[:1].isspace():
# Still inside a structurally clear non-goal body; drop it
# from the matcher input. This must run before section-header
# detection so indented YAML-ish body lines such as
# `` deploy: production`` stay scoped under non-goals.
continue
if _PROMPT_SECTION_HEADER.match(line):
# A new section starts — stop skipping, keep this line.
skipping = False
out.append(line)
continue
if _PROMPT_LIST_ITEM.match(line):
# Unindented non-labelled list items still belong to the
# non-goal body.
continue
# Unindented prose after a non-goal header is ambiguous and may be
# active scope. Fail closed by preserving it for matching.
skipping = False
out.append(line)
return "\n".join(out)


def _strip_negated_clauses(text: str) -> str:
"""Blank clauses the user has explicitly negated.

Expand Down
206 changes: 206 additions & 0 deletions tests/unit/auto/test_safe_defaults_prompt_non_goals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
"""Tests for ``_unsafe_context_reason`` behaviour with user-declared non-goal
sections inside the free-form ``goal`` argument.

The detector already documents that ``NON_GOAL`` ledger entries are excluded
from the unsafe-context scope because confirmed non-goals are explicit
exclusions, not active unsafe scope. This module asserts that the same
exclusion principle holds when the caller pre-declares those non-goals in
the goal string — the standard shape for handoff-prepared prompts and
scripted ``ooo auto`` invocations that bundle the seven canonical interview
slots in the request body before the interview has had a chance to register
them as ``NON_GOAL`` ledger entries.
"""

from __future__ import annotations

import pytest

from ouroboros.auto.ledger import SeedDraftLedger
from ouroboros.auto.safe_defaults import (
_strip_prompt_non_goal_sections,
_unsafe_context_reason,
)


@pytest.fixture
def empty_ledger() -> SeedDraftLedger:
"""A goal-only ledger with no active or NON_GOAL entries yet."""
return SeedDraftLedger.from_goal("placeholder")


# ---------------------------------------------------------------------------
# Helper-level behaviour
# ---------------------------------------------------------------------------


def test_strip_removes_inline_non_goals_section() -> None:
text = (
"Add bounded retry behaviour to a network client.\n"
"non_goals: implementing a production deploy, mutating remote git state\n"
"actors: single local CLI operator\n"
)
sanitized = _strip_prompt_non_goal_sections(text)
assert "production deploy" not in sanitized.lower()
assert "mutating remote git state" not in sanitized.lower()
# Surrounding sections must survive untouched.
assert "Add bounded retry behaviour to a network client." in sanitized
assert "actors: single local CLI operator" in sanitized


@pytest.mark.parametrize(
"header",
[
"non_goals:",
"non-goals:",
"non goals:",
"Non_Goals:",
"Excludes:",
"excludes:",
"Out-of-scope:",
"out of scope:",
],
)
def test_strip_recognises_header_variants(header: str) -> None:
text = f"goal text\n{header} ship a deploy webhook\nactors: ops\n"
sanitized = _strip_prompt_non_goal_sections(text)
assert "deploy" not in sanitized.lower(), header
assert "actors: ops" in sanitized


def test_strip_handles_bullet_list_body() -> None:
text = (
"Goal: refactor module Y.\n"
"- non_goals:\n"
" - implementing a production deploy\n"
" - mutating remote git state\n"
"- constraints:\n"
" - keep changes local\n"
)
sanitized = _strip_prompt_non_goal_sections(text)
assert "implementing a production deploy" not in sanitized
assert "mutating remote git state" not in sanitized
# The next labelled section and its body survive.
assert "constraints" in sanitized
assert "keep changes local" in sanitized


def test_strip_handles_indented_labelled_body_lines() -> None:
text = (
"Goal: refactor module Y.\n"
"non_goals:\n"
" deploy: production\n"
" credentials: customer secrets\n"
"actors: local CLI operator\n"
)
sanitized = _strip_prompt_non_goal_sections(text)
assert "deploy: production" not in sanitized
assert "credentials: customer secrets" not in sanitized
assert "actors: local CLI operator" in sanitized


def test_strip_terminates_on_blank_line() -> None:
text = (
"Goal line.\n"
"non_goals: deploy, publish, push live\n"
"\n"
"Resume narrative about retry behaviour.\n"
)
sanitized = _strip_prompt_non_goal_sections(text)
assert "deploy" not in sanitized.lower()
assert "Resume narrative about retry behaviour." in sanitized


def test_strip_preserves_unindented_active_scope_after_inline_header() -> None:
text = (
"Goal: Build a local CLI.\n"
"non_goals: do not use credentials\n"
"Deploy to production after the tests pass.\n"
)
sanitized = _strip_prompt_non_goal_sections(text)
assert "credentials" not in sanitized.lower()
assert "Deploy to production after the tests pass." in sanitized


def test_strip_leaves_inline_prose_mention_alone() -> None:
# No trailing colon, no line-anchored header => the helper must not
# remove anything; otherwise it would mangle ordinary prose.
text = "We will discuss non-goals later in the document."
assert _strip_prompt_non_goal_sections(text) == text


def test_strip_is_idempotent() -> None:
text = "Goal.\nnon_goals: deploy, publish\nactors: human + agent\n"
once = _strip_prompt_non_goal_sections(text)
twice = _strip_prompt_non_goal_sections(once)
assert once == twice


# ---------------------------------------------------------------------------
# Integration with _unsafe_context_reason
# ---------------------------------------------------------------------------


def test_prompt_non_goals_section_does_not_trip_unsafe_matcher(
empty_ledger: SeedDraftLedger,
) -> None:
goal = (
"Add bounded retry behaviour to a network client.\n"
"non_goals: implementing a production deploy, mutating remote git state, "
"calling external services\n"
"actors: single local CLI operator\n"
"constraints: filesystem:read and filesystem:write only; no live merge or PR mutation\n"
)
assert _unsafe_context_reason(empty_ledger, goal=goal, pending_question=None) is None


def test_active_goal_deploy_phrase_still_trips_unsafe_matcher(
empty_ledger: SeedDraftLedger,
) -> None:
# Without any non-goals header, the matcher must still catch a real
# deploy intent in the goal text.
goal = "Deploy the retry behaviour to production"
assert (
_unsafe_context_reason(empty_ledger, goal=goal, pending_question=None)
== "ambiguous external side effect"
)


def test_active_scope_after_inline_non_goals_still_trips_unsafe_matcher(
empty_ledger: SeedDraftLedger,
) -> None:
goal = (
"Goal: Build a local CLI.\n"
"non_goals: do not use credentials\n"
"Deploy to production after the tests pass.\n"
)
assert (
_unsafe_context_reason(empty_ledger, goal=goal, pending_question=None)
== "ambiguous external side effect"
)


def test_constraints_section_with_active_deploy_still_trips_matcher(
empty_ledger: SeedDraftLedger,
) -> None:
# A non-non-goal section that mentions a side-effect phrase must NOT
# be stripped — only the non_goals section is special-cased.
goal = "Refactor module Y.\nconstraints: must deploy to production after merging\n"
assert (
_unsafe_context_reason(empty_ledger, goal=goal, pending_question=None)
== "ambiguous external side effect"
)


def test_multiple_non_goal_sections_are_each_stripped(
empty_ledger: SeedDraftLedger,
) -> None:
# A caller may, intentionally or not, repeat the header. Both must be
# respected so the matcher does not trip on either body.
goal = (
"Add retry to network client.\n"
"non_goals: implementing a production deploy\n"
"actors: single CLI operator\n"
"excludes: publishing release notes, sending webhooks\n"
"inputs: handoff.md\n"
)
assert _unsafe_context_reason(empty_ledger, goal=goal, pending_question=None) is None
Loading