Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions modeladvisor/autoapply.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
STATUS_SKIPPED = "skipped" # a change was computed but the gate withheld it
STATUS_DRYRUN = "dry-run" # a change is planned but --dry-run is in effect
STATUS_ERROR = "error" # could not resolve config / compute (per-agent isolated)
STATUS_ROLLED_BACK = "rolled-back" # write failed post-apply validation; file restored


@dataclass
Expand Down Expand Up @@ -167,6 +168,7 @@ def summary(self) -> dict:
STATUS_NOOP: self.count(STATUS_NOOP),
STATUS_SKIPPED: self.count(STATUS_SKIPPED),
STATUS_BLOCKED: self.count(STATUS_BLOCKED),
STATUS_ROLLED_BACK: self.count(STATUS_ROLLED_BACK),
STATUS_ERROR: self.count(STATUS_ERROR),
}

Expand Down Expand Up @@ -514,19 +516,26 @@ def auto_apply(
# Route the agent to the WHOLE chosen tier — provider + model +
# run_target — not just the model, so a cross-provider (e.g. Codex) tier
# actually runs on its provider. Additive + byte-preserving; ``model``
# is still always written (back-compat).
# is still always written (back-compat). The validated write re-parses
# the file afterwards and restores the pre-write content on a parse
# failure (single-write granularity — earlier same-file applies from
# this sweep survive), so a malformed write can never be left in place
# to break the next config load.
chosen = cfg.tier(decision.chosen_tier)
try:
if target.path not in backed_up:
decision.backup_path = _cli.backup_file(target.path)
backed_up.add(target.path)
_cli.set_tier_fields(
_cli.set_tier_fields_validated(
target,
provider=chosen.provider,
model=chosen.model,
run_target=chosen.run_target,
)
decision.status = STATUS_APPLIED
except _cli.ApplyValidationError as e:
decision.status = STATUS_ROLLED_BACK
decision.reason = f"write rolled back (config restored): {e}"
except Exception as e:
decision.status = STATUS_ERROR
decision.reason = f"write failed: {e}"
Expand Down
113 changes: 98 additions & 15 deletions modeladvisor/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import re
import shutil
import sys
import tomllib
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any, Mapping, Optional, Sequence
Expand Down Expand Up @@ -475,14 +476,22 @@ def cmd_apply(args: argparse.Namespace, out: io.TextIOBase) -> int:
return 3
if not args.dry_run:
backup = backup_file(target.path)
set_tier_fields(
target,
provider=new_provider,
model=new_model,
run_target=new_run_target,
)
payload["applied"] = True
payload["backup"] = backup
try:
set_tier_fields_validated(
target,
provider=new_provider,
model=new_model,
run_target=new_run_target,
)
except ApplyValidationError as e:
payload["applied"] = False
payload["rolled_back"] = True
payload["error"] = str(e)
json.dump(payload, out, indent=2, default=str)
out.write("\n")
return 4
payload["applied"] = True
json.dump(payload, out, indent=2, default=str)
out.write("\n")
return 0
Expand Down Expand Up @@ -516,12 +525,21 @@ def cmd_apply(args: argparse.Namespace, out: io.TextIOBase) -> int:
return 0

backup = backup_file(target.path)
set_tier_fields(
target,
provider=new_provider,
model=new_model,
run_target=new_run_target,
)
try:
set_tier_fields_validated(
target,
provider=new_provider,
model=new_model,
run_target=new_run_target,
)
except ApplyValidationError as e:
out.write(f" backup: {backup}\n")
out.write(f" FAILED: {e}\n")
out.write(
" the config file was restored to its pre-apply content; "
"nothing changed.\n"
)
return 4

out.write(f" backup: {backup}\n")
out.write(
Expand Down Expand Up @@ -1274,13 +1292,78 @@ def set_tier_fields(
# (each insert goes after `name`, so the last-written sits closest to it;
# writing run_target last keeps model above it / provider at the bottom of
# the inserted run — harmless either way, and replaces are position-stable).
#
# SPAN REFRESH between writes: ``target.span`` is byte offsets into the file
# AS RESOLVED — the first write changes the file's length, so a span-scoped
# (non-flat) target's offsets are stale for the second and third write. A
# stale end can cut the body slice mid-line and splice bytes (e.g. an
# existing ``model = "...-4-5"`` near the block tail becoming
# ``model = "new"-4-5"``). Re-classify the file before each subsequent
# write so every edit runs against fresh offsets.
first = True
for field, val in (
("provider", provider),
("model", model),
("run_target", run_target),
):
if val is not None and val != "":
set_field(target, field, val)
if val is None or val == "":
continue
if not first and target.span is not None:
target = _classify_config_file(target.path, target.agent)
set_field(target, field, val)
first = False


class ApplyValidationError(RuntimeError):
"""A post-apply config validation failed; the write was rolled back."""


def validate_toml_file(path: str) -> Optional[str]:
"""Re-parse ``path`` as TOML; return ``None`` if it parses, else the error.

The read-back half of validate-after-apply: an apply that leaves the config
unparseable would otherwise surface only when gc next loads the file —
silently stopping the agent's scheduling (the bad-pin footgun). Cheap
(stdlib ``tomllib`` on a small file) and side-effect free.
"""
try:
with open(path, "rb") as fh:
tomllib.load(fh)
except tomllib.TOMLDecodeError as e:
return str(e)
except OSError as e: # vanished / unreadable after write
return str(e)
return None


def set_tier_fields_validated(
target: ConfigTarget,
*,
provider: Optional[str] = None,
model: Optional[str] = None,
run_target: Optional[str] = None,
) -> None:
""":func:`set_tier_fields` + validate-after-apply with single-write rollback.

Captures the file content immediately before writing; after the write the
file is re-parsed as TOML. On a parse failure the pre-write content is
restored atomically and :class:`ApplyValidationError` is raised, so a
malformed write can never be left in place to break the next config load.

Rollback granularity is deliberately THIS write — not the once-per-run
``.advisor-bak-*`` backup — so when several agents share one config file
(the city.toml case), an earlier valid apply from the same sweep survives a
later agent's failed write.
"""
pre_text = _read(target.path)
set_tier_fields(target, provider=provider, model=model, run_target=run_target)
err = validate_toml_file(target.path)
if err is not None:
_atomic_write(target.path, pre_text)
raise ApplyValidationError(
f"post-apply validation failed for {target.path} "
f"(write rolled back): {err}"
)


def _replace_or_insert_top(
Expand Down
86 changes: 86 additions & 0 deletions tests/test_autoapply.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,92 @@ def test_real_apply_is_byte_preserving_and_backs_up(monkeypatch, cfg, tmp_path):
assert baks[0].read_text() == flat


def test_corrupt_write_is_rolled_back_not_left_broken(monkeypatch, cfg, tmp_path):
"""validate-after-apply: a write that leaves the file unparseable is
restored to its pre-write content and reported as STATUS_ROLLED_BACK —
auto-apply can never leave behind a config gc would fail to load."""
from modeladvisor import cli as madcli

flat = 'scope = "rig"\n'
agent_toml = tmp_path / "agent.toml"
agent_toml.write_text(flat, encoding="utf-8")
monkeypatch.setenv("ADVISOR_AGENT_TOML", str(agent_toml))

recs = _wins("claude::polecat::implement::sonnet", 60) + _wins(
"claude::polecat::lookup::sonnet", 60
)
st = _build_store(cfg, recs)

def corrupting(target, **kw):
with open(target.path, "w", encoding="utf-8") as fh:
fh.write("[broken\n")

monkeypatch.setattr(madcli, "set_tier_fields", corrupting)

rep = autoapply.auto_apply(
cfg, st, dry_run=False, agents=["polecat"], engine=madengine
)
d = rep.decisions[0]
assert d.status == autoapply.STATUS_ROLLED_BACK
assert "rolled back" in d.reason
# the file is byte-identical to its pre-write content
assert agent_toml.read_text() == flat
# the roll-up summary counts it (and nothing reports as applied)
assert rep.summary()[autoapply.STATUS_ROLLED_BACK] == 1
assert rep.summary()[autoapply.STATUS_APPLIED] == 0


def test_shared_file_earlier_apply_survives_later_rollback(
monkeypatch, cfg, city, tmp_path
):
"""Rollback granularity is the single write, not the per-run backup:
polecat's valid apply into the shared city.toml survives witness's
corrupted write being rolled back."""
from modeladvisor import cli as madcli

monkeypatch.setenv("ADVISOR_AGENT_TOML", str(city))
# witness hand-set to haiku so the safe direction pulls it up (an apply)
city.write_text(
city.read_text().replace(
'[[agent]]\nname = "witness"\nscope = "rig"\n',
'[[agent]]\nname = "witness"\nmodel = "claude-haiku-4-5"\nscope = "rig"\n',
),
encoding="utf-8",
)
recs = _wins("claude::polecat::implement::sonnet", 60) + _wins(
"claude::polecat::lookup::sonnet", 60
)
st = _build_store(cfg, recs)

real = madcli.set_tier_fields
calls = {"n": 0}

def corrupt_second(target, **kw):
calls["n"] += 1
if calls["n"] == 1:
return real(target, **kw)
with open(target.path, "a", encoding="utf-8") as fh:
fh.write("[broken\n")

monkeypatch.setattr(madcli, "set_tier_fields", corrupt_second)

rep = autoapply.auto_apply(
cfg, st, dry_run=False, agents=["polecat", "witness"], engine=madengine
)
by = {d.agent: d for d in rep.decisions}
assert by["polecat"].status == autoapply.STATUS_APPLIED
assert by["witness"].status == autoapply.STATUS_ROLLED_BACK

text = city.read_text()
# polecat's applied change is still present...
assert 'model = "claude-sonnet-4-5"' in text
# ...the corrupted fragment is gone and the file still parses
assert "[broken" not in text
assert madcli.validate_toml_file(str(city)) is None
# witness is back to its pre-write, hand-set model
assert 'model = "claude-haiku-4-5"' in text


def test_single_backup_per_run_for_shared_file(monkeypatch, cfg, city, tmp_path):
"""All agents share one city.toml → it is backed up ONCE per run."""
monkeypatch.setenv("ADVISOR_AGENT_TOML", str(city))
Expand Down
95 changes: 95 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,101 @@ def test_apply_writes_flat_agent_toml_and_backs_up(monkeypatch, tmp_path):
assert baks[0].read_text() == FLAT_AGENT_TOML


# --------------------------------------------------------------------------- #
# apply — triple-write span staleness regression (the bug validate-after-apply
# first caught): replacing an existing field near a block's tail after an
# insert grew the block must not splice bytes.
# --------------------------------------------------------------------------- #

def test_apply_block_replace_after_insert_stays_parseable(monkeypatch, tmp_path):
"""[[agent]] block ending in an existing ``model`` line: the apply inserts
``provider`` (block grows), then must REPLACE ``model`` against fresh —
not resolve-time — offsets. With stale offsets the body slice cut the
model line mid-string and produced ``model = "new"-4-5"``."""
import tomllib as _toml

_install_fake(monkeypatch)
city = _write(
tmp_path,
"city.toml",
CITY_TOML_WITH_AGENT_BLOCK.replace(
'name = "polecat"\nscope = "rig"\nmax_active_sessions = 5\n',
'name = "polecat"\nscope = "rig"\nmodel = "claude-haiku-4-5"\n',
),
)
monkeypatch.setenv("ADVISOR_AGENT_TOML", str(city))

rc, text = _run("apply", "polecat", "--shape", "implement")
assert rc == 0

updated = city.read_text()
# parses cleanly...
_toml.loads(updated)
# ...the model was replaced exactly (no spliced remnant of the old value)
assert updated.count('model = "claude-sonnet-4-5"') == 1
assert "haiku" not in updated
assert '"-4-5"' not in updated
assert updated.count("provider = ") == 2 # [workspace] + the agent block
# (the fake engine supplies no run_target; empty fields are skipped)
assert updated.count("run_target = ") == 0


# --------------------------------------------------------------------------- #
# apply — validate-after-apply: a malformed write is rolled back, exit 4
# --------------------------------------------------------------------------- #

def test_validate_toml_file_unit(tmp_path):
good = _write(tmp_path, "good.toml", 'model = "x"\n')
assert cli.validate_toml_file(str(good)) is None
bad = _write(tmp_path, "bad.toml", "[broken\n")
err = cli.validate_toml_file(str(bad))
assert err is not None and err != ""


def test_apply_validation_failure_rolls_back(monkeypatch, tmp_path):
"""A write that leaves the config unparseable is restored byte-for-byte
and the command exits 4 — never a broken config left for gc to load."""
_install_fake(monkeypatch)
agent_toml = _write(tmp_path, "agent.toml", FLAT_AGENT_TOML)
monkeypatch.setenv("ADVISOR_AGENT_TOML", str(agent_toml))

def corrupting(target, **kw):
with open(target.path, "w", encoding="utf-8") as fh:
fh.write('model = "unterminated\n')

monkeypatch.setattr(cli, "set_tier_fields", corrupting)

rc, text = _run("apply", "polecat", "--shape", "implement")
assert rc == 4
assert "FAILED" in text and "restored" in text
# the file is byte-identical to its pre-apply content
assert agent_toml.read_text() == FLAT_AGENT_TOML
# the once-per-apply backup was still taken (and holds the original)
baks = list(tmp_path.glob("*advisor-bak*"))
assert len(baks) == 1
assert baks[0].read_text() == FLAT_AGENT_TOML


def test_apply_validation_failure_json(monkeypatch, tmp_path):
_install_fake(monkeypatch)
agent_toml = _write(tmp_path, "agent.toml", FLAT_AGENT_TOML)
monkeypatch.setenv("ADVISOR_AGENT_TOML", str(agent_toml))

def corrupting(target, **kw):
with open(target.path, "w", encoding="utf-8") as fh:
fh.write("[broken\n")

monkeypatch.setattr(cli, "set_tier_fields", corrupting)

rc, text = _run("apply", "polecat", "--shape", "implement", "--json")
assert rc == 4
obj = json.loads(text)
assert obj["applied"] is False
assert obj["rolled_back"] is True
assert "error" in obj and "rolled back" in obj["error"]
assert agent_toml.read_text() == FLAT_AGENT_TOML


def test_apply_writes_into_matching_agent_block(monkeypatch, tmp_path):
"""`model` must land inside the [[agent]] name="polecat" block, not mayor's."""
_install_fake(monkeypatch)
Expand Down