From 57ba534d11a0574f8732af73ae31c4315d1eaa8b Mon Sep 17 00:00:00 2001 From: Wldc4rd Date: Tue, 9 Jun 2026 19:00:04 +0000 Subject: [PATCH] feat(apply): validate-after-apply with single-write rollback + fix triple-write span splice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two coupled changes to the apply/auto-apply write path: 1. VALIDATE-AFTER-APPLY (new): every applied write is re-parsed as TOML immediately after writing. On a parse failure the pre-write content is restored atomically and the failure is surfaced — apply exits 4 with a rolled_back JSON field; auto-apply reports a new per-agent "rolled-back" status (counted in the summary). A malformed write can no longer be left in place to break the next config load (the bad-pin-silently-stops-the-scheduler footgun). Rollback granularity is deliberately the SINGLE write, not the once-per-run .advisor-bak-*: when several agents share one config file (the city.toml case), an earlier valid apply from the same sweep survives a later agent's failed write. 2. SPAN-SPLICE FIX (latent bug the validator caught on first contact): set_tier_fields does three sequential set_field writes but target.span is resolve-time byte offsets; the first insert grows a block-scoped target, so the next write sliced the body with a stale end — cutting an existing field line mid-string and splicing its remainder after the replacement: model = "claude-haiku-4-5" became model = "claude-opus-4-8"-4-5" (unparseable). The repo's own shared-file scenario (test_single_backup_per_run_for_shared_file) silently produced a broken file; its assertions checked substrings, not parseability. Fix: re-classify the target (fresh span) before each subsequent write; flat targets (span=None) unaffected. Tests: +6 (validator unit; apply rollback human+JSON exit 4; auto-apply rolled-back status + summary; shared-file earlier-apply-survives-rollback; span-splice regression). Suite: 258 passed. Co-Authored-By: Claude Opus 4.8 (1M context) --- modeladvisor/autoapply.py | 13 ++++- modeladvisor/cli.py | 113 +++++++++++++++++++++++++++++++++----- tests/test_autoapply.py | 86 +++++++++++++++++++++++++++++ tests/test_cli.py | 95 ++++++++++++++++++++++++++++++++ 4 files changed, 290 insertions(+), 17 deletions(-) diff --git a/modeladvisor/autoapply.py b/modeladvisor/autoapply.py index 8b9301f..768cfec 100644 --- a/modeladvisor/autoapply.py +++ b/modeladvisor/autoapply.py @@ -90,6 +90,7 @@ STATUS_SKIPPED = "skipped" # a change was computed but the gate withheld it STATUS_DRYRUN = "dry-run" # a change is planned but --dry-run is in effect STATUS_ERROR = "error" # could not resolve config / compute (per-agent isolated) +STATUS_ROLLED_BACK = "rolled-back" # write failed post-apply validation; file restored @dataclass @@ -167,6 +168,7 @@ def summary(self) -> dict: STATUS_NOOP: self.count(STATUS_NOOP), STATUS_SKIPPED: self.count(STATUS_SKIPPED), STATUS_BLOCKED: self.count(STATUS_BLOCKED), + STATUS_ROLLED_BACK: self.count(STATUS_ROLLED_BACK), STATUS_ERROR: self.count(STATUS_ERROR), } @@ -514,19 +516,26 @@ def auto_apply( # Route the agent to the WHOLE chosen tier — provider + model + # run_target — not just the model, so a cross-provider (e.g. Codex) tier # actually runs on its provider. Additive + byte-preserving; ``model`` - # is still always written (back-compat). + # is still always written (back-compat). The validated write re-parses + # the file afterwards and restores the pre-write content on a parse + # failure (single-write granularity — earlier same-file applies from + # this sweep survive), so a malformed write can never be left in place + # to break the next config load. chosen = cfg.tier(decision.chosen_tier) try: if target.path not in backed_up: decision.backup_path = _cli.backup_file(target.path) backed_up.add(target.path) - _cli.set_tier_fields( + _cli.set_tier_fields_validated( target, provider=chosen.provider, model=chosen.model, run_target=chosen.run_target, ) decision.status = STATUS_APPLIED + except _cli.ApplyValidationError as e: + decision.status = STATUS_ROLLED_BACK + decision.reason = f"write rolled back (config restored): {e}" except Exception as e: decision.status = STATUS_ERROR decision.reason = f"write failed: {e}" diff --git a/modeladvisor/cli.py b/modeladvisor/cli.py index 662b8e9..b8c4ad1 100644 --- a/modeladvisor/cli.py +++ b/modeladvisor/cli.py @@ -48,6 +48,7 @@ import re import shutil import sys +import tomllib from dataclasses import dataclass from datetime import datetime, timezone from typing import Any, Mapping, Optional, Sequence @@ -475,14 +476,22 @@ def cmd_apply(args: argparse.Namespace, out: io.TextIOBase) -> int: return 3 if not args.dry_run: backup = backup_file(target.path) - set_tier_fields( - target, - provider=new_provider, - model=new_model, - run_target=new_run_target, - ) - payload["applied"] = True payload["backup"] = backup + try: + set_tier_fields_validated( + target, + provider=new_provider, + model=new_model, + run_target=new_run_target, + ) + except ApplyValidationError as e: + payload["applied"] = False + payload["rolled_back"] = True + payload["error"] = str(e) + json.dump(payload, out, indent=2, default=str) + out.write("\n") + return 4 + payload["applied"] = True json.dump(payload, out, indent=2, default=str) out.write("\n") return 0 @@ -516,12 +525,21 @@ def cmd_apply(args: argparse.Namespace, out: io.TextIOBase) -> int: return 0 backup = backup_file(target.path) - set_tier_fields( - target, - provider=new_provider, - model=new_model, - run_target=new_run_target, - ) + try: + set_tier_fields_validated( + target, + provider=new_provider, + model=new_model, + run_target=new_run_target, + ) + except ApplyValidationError as e: + out.write(f" backup: {backup}\n") + out.write(f" FAILED: {e}\n") + out.write( + " the config file was restored to its pre-apply content; " + "nothing changed.\n" + ) + return 4 out.write(f" backup: {backup}\n") out.write( @@ -1274,13 +1292,78 @@ def set_tier_fields( # (each insert goes after `name`, so the last-written sits closest to it; # writing run_target last keeps model above it / provider at the bottom of # the inserted run — harmless either way, and replaces are position-stable). + # + # SPAN REFRESH between writes: ``target.span`` is byte offsets into the file + # AS RESOLVED — the first write changes the file's length, so a span-scoped + # (non-flat) target's offsets are stale for the second and third write. A + # stale end can cut the body slice mid-line and splice bytes (e.g. an + # existing ``model = "...-4-5"`` near the block tail becoming + # ``model = "new"-4-5"``). Re-classify the file before each subsequent + # write so every edit runs against fresh offsets. + first = True for field, val in ( ("provider", provider), ("model", model), ("run_target", run_target), ): - if val is not None and val != "": - set_field(target, field, val) + if val is None or val == "": + continue + if not first and target.span is not None: + target = _classify_config_file(target.path, target.agent) + set_field(target, field, val) + first = False + + +class ApplyValidationError(RuntimeError): + """A post-apply config validation failed; the write was rolled back.""" + + +def validate_toml_file(path: str) -> Optional[str]: + """Re-parse ``path`` as TOML; return ``None`` if it parses, else the error. + + The read-back half of validate-after-apply: an apply that leaves the config + unparseable would otherwise surface only when gc next loads the file — + silently stopping the agent's scheduling (the bad-pin footgun). Cheap + (stdlib ``tomllib`` on a small file) and side-effect free. + """ + try: + with open(path, "rb") as fh: + tomllib.load(fh) + except tomllib.TOMLDecodeError as e: + return str(e) + except OSError as e: # vanished / unreadable after write + return str(e) + return None + + +def set_tier_fields_validated( + target: ConfigTarget, + *, + provider: Optional[str] = None, + model: Optional[str] = None, + run_target: Optional[str] = None, +) -> None: + """:func:`set_tier_fields` + validate-after-apply with single-write rollback. + + Captures the file content immediately before writing; after the write the + file is re-parsed as TOML. On a parse failure the pre-write content is + restored atomically and :class:`ApplyValidationError` is raised, so a + malformed write can never be left in place to break the next config load. + + Rollback granularity is deliberately THIS write — not the once-per-run + ``.advisor-bak-*`` backup — so when several agents share one config file + (the city.toml case), an earlier valid apply from the same sweep survives a + later agent's failed write. + """ + pre_text = _read(target.path) + set_tier_fields(target, provider=provider, model=model, run_target=run_target) + err = validate_toml_file(target.path) + if err is not None: + _atomic_write(target.path, pre_text) + raise ApplyValidationError( + f"post-apply validation failed for {target.path} " + f"(write rolled back): {err}" + ) def _replace_or_insert_top( diff --git a/tests/test_autoapply.py b/tests/test_autoapply.py index 073d0ab..b7b5d92 100644 --- a/tests/test_autoapply.py +++ b/tests/test_autoapply.py @@ -387,6 +387,92 @@ def test_real_apply_is_byte_preserving_and_backs_up(monkeypatch, cfg, tmp_path): assert baks[0].read_text() == flat +def test_corrupt_write_is_rolled_back_not_left_broken(monkeypatch, cfg, tmp_path): + """validate-after-apply: a write that leaves the file unparseable is + restored to its pre-write content and reported as STATUS_ROLLED_BACK — + auto-apply can never leave behind a config gc would fail to load.""" + from modeladvisor import cli as madcli + + flat = 'scope = "rig"\n' + agent_toml = tmp_path / "agent.toml" + agent_toml.write_text(flat, encoding="utf-8") + monkeypatch.setenv("ADVISOR_AGENT_TOML", str(agent_toml)) + + recs = _wins("claude::polecat::implement::sonnet", 60) + _wins( + "claude::polecat::lookup::sonnet", 60 + ) + st = _build_store(cfg, recs) + + def corrupting(target, **kw): + with open(target.path, "w", encoding="utf-8") as fh: + fh.write("[broken\n") + + monkeypatch.setattr(madcli, "set_tier_fields", corrupting) + + rep = autoapply.auto_apply( + cfg, st, dry_run=False, agents=["polecat"], engine=madengine + ) + d = rep.decisions[0] + assert d.status == autoapply.STATUS_ROLLED_BACK + assert "rolled back" in d.reason + # the file is byte-identical to its pre-write content + assert agent_toml.read_text() == flat + # the roll-up summary counts it (and nothing reports as applied) + assert rep.summary()[autoapply.STATUS_ROLLED_BACK] == 1 + assert rep.summary()[autoapply.STATUS_APPLIED] == 0 + + +def test_shared_file_earlier_apply_survives_later_rollback( + monkeypatch, cfg, city, tmp_path +): + """Rollback granularity is the single write, not the per-run backup: + polecat's valid apply into the shared city.toml survives witness's + corrupted write being rolled back.""" + from modeladvisor import cli as madcli + + monkeypatch.setenv("ADVISOR_AGENT_TOML", str(city)) + # witness hand-set to haiku so the safe direction pulls it up (an apply) + city.write_text( + city.read_text().replace( + '[[agent]]\nname = "witness"\nscope = "rig"\n', + '[[agent]]\nname = "witness"\nmodel = "claude-haiku-4-5"\nscope = "rig"\n', + ), + encoding="utf-8", + ) + recs = _wins("claude::polecat::implement::sonnet", 60) + _wins( + "claude::polecat::lookup::sonnet", 60 + ) + st = _build_store(cfg, recs) + + real = madcli.set_tier_fields + calls = {"n": 0} + + def corrupt_second(target, **kw): + calls["n"] += 1 + if calls["n"] == 1: + return real(target, **kw) + with open(target.path, "a", encoding="utf-8") as fh: + fh.write("[broken\n") + + monkeypatch.setattr(madcli, "set_tier_fields", corrupt_second) + + rep = autoapply.auto_apply( + cfg, st, dry_run=False, agents=["polecat", "witness"], engine=madengine + ) + by = {d.agent: d for d in rep.decisions} + assert by["polecat"].status == autoapply.STATUS_APPLIED + assert by["witness"].status == autoapply.STATUS_ROLLED_BACK + + text = city.read_text() + # polecat's applied change is still present... + assert 'model = "claude-sonnet-4-5"' in text + # ...the corrupted fragment is gone and the file still parses + assert "[broken" not in text + assert madcli.validate_toml_file(str(city)) is None + # witness is back to its pre-write, hand-set model + assert 'model = "claude-haiku-4-5"' in text + + def test_single_backup_per_run_for_shared_file(monkeypatch, cfg, city, tmp_path): """All agents share one city.toml → it is backed up ONCE per run.""" monkeypatch.setenv("ADVISOR_AGENT_TOML", str(city)) diff --git a/tests/test_cli.py b/tests/test_cli.py index c95be74..900a369 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -267,6 +267,101 @@ def test_apply_writes_flat_agent_toml_and_backs_up(monkeypatch, tmp_path): assert baks[0].read_text() == FLAT_AGENT_TOML +# --------------------------------------------------------------------------- # +# apply — triple-write span staleness regression (the bug validate-after-apply +# first caught): replacing an existing field near a block's tail after an +# insert grew the block must not splice bytes. +# --------------------------------------------------------------------------- # + +def test_apply_block_replace_after_insert_stays_parseable(monkeypatch, tmp_path): + """[[agent]] block ending in an existing ``model`` line: the apply inserts + ``provider`` (block grows), then must REPLACE ``model`` against fresh — + not resolve-time — offsets. With stale offsets the body slice cut the + model line mid-string and produced ``model = "new"-4-5"``.""" + import tomllib as _toml + + _install_fake(monkeypatch) + city = _write( + tmp_path, + "city.toml", + CITY_TOML_WITH_AGENT_BLOCK.replace( + 'name = "polecat"\nscope = "rig"\nmax_active_sessions = 5\n', + 'name = "polecat"\nscope = "rig"\nmodel = "claude-haiku-4-5"\n', + ), + ) + monkeypatch.setenv("ADVISOR_AGENT_TOML", str(city)) + + rc, text = _run("apply", "polecat", "--shape", "implement") + assert rc == 0 + + updated = city.read_text() + # parses cleanly... + _toml.loads(updated) + # ...the model was replaced exactly (no spliced remnant of the old value) + assert updated.count('model = "claude-sonnet-4-5"') == 1 + assert "haiku" not in updated + assert '"-4-5"' not in updated + assert updated.count("provider = ") == 2 # [workspace] + the agent block + # (the fake engine supplies no run_target; empty fields are skipped) + assert updated.count("run_target = ") == 0 + + +# --------------------------------------------------------------------------- # +# apply — validate-after-apply: a malformed write is rolled back, exit 4 +# --------------------------------------------------------------------------- # + +def test_validate_toml_file_unit(tmp_path): + good = _write(tmp_path, "good.toml", 'model = "x"\n') + assert cli.validate_toml_file(str(good)) is None + bad = _write(tmp_path, "bad.toml", "[broken\n") + err = cli.validate_toml_file(str(bad)) + assert err is not None and err != "" + + +def test_apply_validation_failure_rolls_back(monkeypatch, tmp_path): + """A write that leaves the config unparseable is restored byte-for-byte + and the command exits 4 — never a broken config left for gc to load.""" + _install_fake(monkeypatch) + agent_toml = _write(tmp_path, "agent.toml", FLAT_AGENT_TOML) + monkeypatch.setenv("ADVISOR_AGENT_TOML", str(agent_toml)) + + def corrupting(target, **kw): + with open(target.path, "w", encoding="utf-8") as fh: + fh.write('model = "unterminated\n') + + monkeypatch.setattr(cli, "set_tier_fields", corrupting) + + rc, text = _run("apply", "polecat", "--shape", "implement") + assert rc == 4 + assert "FAILED" in text and "restored" in text + # the file is byte-identical to its pre-apply content + assert agent_toml.read_text() == FLAT_AGENT_TOML + # the once-per-apply backup was still taken (and holds the original) + baks = list(tmp_path.glob("*advisor-bak*")) + assert len(baks) == 1 + assert baks[0].read_text() == FLAT_AGENT_TOML + + +def test_apply_validation_failure_json(monkeypatch, tmp_path): + _install_fake(monkeypatch) + agent_toml = _write(tmp_path, "agent.toml", FLAT_AGENT_TOML) + monkeypatch.setenv("ADVISOR_AGENT_TOML", str(agent_toml)) + + def corrupting(target, **kw): + with open(target.path, "w", encoding="utf-8") as fh: + fh.write("[broken\n") + + monkeypatch.setattr(cli, "set_tier_fields", corrupting) + + rc, text = _run("apply", "polecat", "--shape", "implement", "--json") + assert rc == 4 + obj = json.loads(text) + assert obj["applied"] is False + assert obj["rolled_back"] is True + assert "error" in obj and "rolled back" in obj["error"] + assert agent_toml.read_text() == FLAT_AGENT_TOML + + def test_apply_writes_into_matching_agent_block(monkeypatch, tmp_path): """`model` must land inside the [[agent]] name="polecat" block, not mayor's.""" _install_fake(monkeypatch)