From 57ba534d11a0574f8732af73ae31c4315d1eaa8b Mon Sep 17 00:00:00 2001
From: Wldc4rd <charlie@couttspnw.com>
Date: Tue, 9 Jun 2026 19:00:04 +0000
Subject: [PATCH] feat(apply): validate-after-apply with single-write rollback
 + fix triple-write span splice
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two coupled changes to the apply/auto-apply write path:

1. VALIDATE-AFTER-APPLY (new): every applied write is re-parsed as TOML
   immediately after writing. On a parse failure the pre-write content is
   restored atomically and the failure is surfaced — apply exits 4 with a
   rolled_back JSON field; auto-apply reports a new per-agent
   "rolled-back" status (counted in the summary). A malformed write can
   no longer be left in place to break the next config load (the
   bad-pin-silently-stops-the-scheduler footgun).

   Rollback granularity is deliberately the SINGLE write, not the
   once-per-run .advisor-bak-*: when several agents share one config file
   (the city.toml case), an earlier valid apply from the same sweep
   survives a later agent's failed write.

2. SPAN-SPLICE FIX (latent bug the validator caught on first contact):
   set_tier_fields does three sequential set_field writes but target.span
   is resolve-time byte offsets; the first insert grows a block-scoped
   target, so the next write sliced the body with a stale end — cutting an
   existing field line mid-string and splicing its remainder after the
   replacement: model = "claude-haiku-4-5" became
   model = "claude-opus-4-8"-4-5" (unparseable). The repo's own
   shared-file scenario (test_single_backup_per_run_for_shared_file)
   silently produced a broken file; its assertions checked substrings, not
   parseability. Fix: re-classify the target (fresh span) before each
   subsequent write; flat targets (span=None) unaffected.

Tests: +6 (validator unit; apply rollback human+JSON exit 4; auto-apply
rolled-back status + summary; shared-file earlier-apply-survives-rollback;
span-splice regression). Suite: 258 passed.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 modeladvisor/autoapply.py |  13 ++++-
 modeladvisor/cli.py       | 113 +++++++++++++++++++++++++++++++++-----
 tests/test_autoapply.py   |  86 +++++++++++++++++++++++++++++
 tests/test_cli.py         |  95 ++++++++++++++++++++++++++++++++
 4 files changed, 290 insertions(+), 17 deletions(-)

diff --git a/modeladvisor/autoapply.py b/modeladvisor/autoapply.py
index 8b9301f..768cfec 100644
--- a/modeladvisor/autoapply.py
+++ b/modeladvisor/autoapply.py
@@ -90,6 +90,7 @@
 STATUS_SKIPPED = "skipped"  # a change was computed but the gate withheld it
 STATUS_DRYRUN = "dry-run"  # a change is planned but --dry-run is in effect
 STATUS_ERROR = "error"  # could not resolve config / compute (per-agent isolated)
+STATUS_ROLLED_BACK = "rolled-back"  # write failed post-apply validation; file restored
 
 
 @dataclass
@@ -167,6 +168,7 @@ def summary(self) -> dict:
             STATUS_NOOP: self.count(STATUS_NOOP),
             STATUS_SKIPPED: self.count(STATUS_SKIPPED),
             STATUS_BLOCKED: self.count(STATUS_BLOCKED),
+            STATUS_ROLLED_BACK: self.count(STATUS_ROLLED_BACK),
             STATUS_ERROR: self.count(STATUS_ERROR),
         }
 
@@ -514,19 +516,26 @@ def auto_apply(
         # Route the agent to the WHOLE chosen tier — provider + model +
         # run_target — not just the model, so a cross-provider (e.g. Codex) tier
         # actually runs on its provider.  Additive + byte-preserving; ``model``
-        # is still always written (back-compat).
+        # is still always written (back-compat).  The validated write re-parses
+        # the file afterwards and restores the pre-write content on a parse
+        # failure (single-write granularity — earlier same-file applies from
+        # this sweep survive), so a malformed write can never be left in place
+        # to break the next config load.
         chosen = cfg.tier(decision.chosen_tier)
         try:
             if target.path not in backed_up:
                 decision.backup_path = _cli.backup_file(target.path)
                 backed_up.add(target.path)
-            _cli.set_tier_fields(
+            _cli.set_tier_fields_validated(
                 target,
                 provider=chosen.provider,
                 model=chosen.model,
                 run_target=chosen.run_target,
             )
             decision.status = STATUS_APPLIED
+        except _cli.ApplyValidationError as e:
+            decision.status = STATUS_ROLLED_BACK
+            decision.reason = f"write rolled back (config restored): {e}"
         except Exception as e:
             decision.status = STATUS_ERROR
             decision.reason = f"write failed: {e}"
diff --git a/modeladvisor/cli.py b/modeladvisor/cli.py
index 662b8e9..b8c4ad1 100644
--- a/modeladvisor/cli.py
+++ b/modeladvisor/cli.py
@@ -48,6 +48,7 @@
 import re
 import shutil
 import sys
+import tomllib
 from dataclasses import dataclass
 from datetime import datetime, timezone
 from typing import Any, Mapping, Optional, Sequence
@@ -475,14 +476,22 @@ def cmd_apply(args: argparse.Namespace, out: io.TextIOBase) -> int:
             return 3
         if not args.dry_run:
             backup = backup_file(target.path)
-            set_tier_fields(
-                target,
-                provider=new_provider,
-                model=new_model,
-                run_target=new_run_target,
-            )
-            payload["applied"] = True
             payload["backup"] = backup
+            try:
+                set_tier_fields_validated(
+                    target,
+                    provider=new_provider,
+                    model=new_model,
+                    run_target=new_run_target,
+                )
+            except ApplyValidationError as e:
+                payload["applied"] = False
+                payload["rolled_back"] = True
+                payload["error"] = str(e)
+                json.dump(payload, out, indent=2, default=str)
+                out.write("\n")
+                return 4
+            payload["applied"] = True
         json.dump(payload, out, indent=2, default=str)
         out.write("\n")
         return 0
@@ -516,12 +525,21 @@ def cmd_apply(args: argparse.Namespace, out: io.TextIOBase) -> int:
         return 0
 
     backup = backup_file(target.path)
-    set_tier_fields(
-        target,
-        provider=new_provider,
-        model=new_model,
-        run_target=new_run_target,
-    )
+    try:
+        set_tier_fields_validated(
+            target,
+            provider=new_provider,
+            model=new_model,
+            run_target=new_run_target,
+        )
+    except ApplyValidationError as e:
+        out.write(f"  backup: {backup}\n")
+        out.write(f"  FAILED: {e}\n")
+        out.write(
+            "  the config file was restored to its pre-apply content; "
+            "nothing changed.\n"
+        )
+        return 4
 
     out.write(f"  backup: {backup}\n")
     out.write(
@@ -1274,13 +1292,78 @@ def set_tier_fields(
     # (each insert goes after `name`, so the last-written sits closest to it;
     # writing run_target last keeps model above it / provider at the bottom of
     # the inserted run — harmless either way, and replaces are position-stable).
+    #
+    # SPAN REFRESH between writes: ``target.span`` is byte offsets into the file
+    # AS RESOLVED — the first write changes the file's length, so a span-scoped
+    # (non-flat) target's offsets are stale for the second and third write.  A
+    # stale end can cut the body slice mid-line and splice bytes (e.g. an
+    # existing ``model = "...-4-5"`` near the block tail becoming
+    # ``model = "new"-4-5"``).  Re-classify the file before each subsequent
+    # write so every edit runs against fresh offsets.
+    first = True
     for field, val in (
         ("provider", provider),
         ("model", model),
         ("run_target", run_target),
     ):
-        if val is not None and val != "":
-            set_field(target, field, val)
+        if val is None or val == "":
+            continue
+        if not first and target.span is not None:
+            target = _classify_config_file(target.path, target.agent)
+        set_field(target, field, val)
+        first = False
+
+
+class ApplyValidationError(RuntimeError):
+    """A post-apply config validation failed; the write was rolled back."""
+
+
+def validate_toml_file(path: str) -> Optional[str]:
+    """Re-parse ``path`` as TOML; return ``None`` if it parses, else the error.
+
+    The read-back half of validate-after-apply: an apply that leaves the config
+    unparseable would otherwise surface only when gc next loads the file —
+    silently stopping the agent's scheduling (the bad-pin footgun).  Cheap
+    (stdlib ``tomllib`` on a small file) and side-effect free.
+    """
+    try:
+        with open(path, "rb") as fh:
+            tomllib.load(fh)
+    except tomllib.TOMLDecodeError as e:
+        return str(e)
+    except OSError as e:  # vanished / unreadable after write
+        return str(e)
+    return None
+
+
+def set_tier_fields_validated(
+    target: ConfigTarget,
+    *,
+    provider: Optional[str] = None,
+    model: Optional[str] = None,
+    run_target: Optional[str] = None,
+) -> None:
+    """:func:`set_tier_fields` + validate-after-apply with single-write rollback.
+
+    Captures the file content immediately before writing; after the write the
+    file is re-parsed as TOML.  On a parse failure the pre-write content is
+    restored atomically and :class:`ApplyValidationError` is raised, so a
+    malformed write can never be left in place to break the next config load.
+
+    Rollback granularity is deliberately THIS write — not the once-per-run
+    ``.advisor-bak-*`` backup — so when several agents share one config file
+    (the city.toml case), an earlier valid apply from the same sweep survives a
+    later agent's failed write.
+    """
+    pre_text = _read(target.path)
+    set_tier_fields(target, provider=provider, model=model, run_target=run_target)
+    err = validate_toml_file(target.path)
+    if err is not None:
+        _atomic_write(target.path, pre_text)
+        raise ApplyValidationError(
+            f"post-apply validation failed for {target.path} "
+            f"(write rolled back): {err}"
+        )
 
 
 def _replace_or_insert_top(
diff --git a/tests/test_autoapply.py b/tests/test_autoapply.py
index 073d0ab..b7b5d92 100644
--- a/tests/test_autoapply.py
+++ b/tests/test_autoapply.py
@@ -387,6 +387,92 @@ def test_real_apply_is_byte_preserving_and_backs_up(monkeypatch, cfg, tmp_path):
     assert baks[0].read_text() == flat
 
 
+def test_corrupt_write_is_rolled_back_not_left_broken(monkeypatch, cfg, tmp_path):
+    """validate-after-apply: a write that leaves the file unparseable is
+    restored to its pre-write content and reported as STATUS_ROLLED_BACK —
+    auto-apply can never leave behind a config gc would fail to load."""
+    from modeladvisor import cli as madcli
+
+    flat = 'scope = "rig"\n'
+    agent_toml = tmp_path / "agent.toml"
+    agent_toml.write_text(flat, encoding="utf-8")
+    monkeypatch.setenv("ADVISOR_AGENT_TOML", str(agent_toml))
+
+    recs = _wins("claude::polecat::implement::sonnet", 60) + _wins(
+        "claude::polecat::lookup::sonnet", 60
+    )
+    st = _build_store(cfg, recs)
+
+    def corrupting(target, **kw):
+        with open(target.path, "w", encoding="utf-8") as fh:
+            fh.write("[broken\n")
+
+    monkeypatch.setattr(madcli, "set_tier_fields", corrupting)
+
+    rep = autoapply.auto_apply(
+        cfg, st, dry_run=False, agents=["polecat"], engine=madengine
+    )
+    d = rep.decisions[0]
+    assert d.status == autoapply.STATUS_ROLLED_BACK
+    assert "rolled back" in d.reason
+    # the file is byte-identical to its pre-write content
+    assert agent_toml.read_text() == flat
+    # the roll-up summary counts it (and nothing reports as applied)
+    assert rep.summary()[autoapply.STATUS_ROLLED_BACK] == 1
+    assert rep.summary()[autoapply.STATUS_APPLIED] == 0
+
+
+def test_shared_file_earlier_apply_survives_later_rollback(
+    monkeypatch, cfg, city, tmp_path
+):
+    """Rollback granularity is the single write, not the per-run backup:
+    polecat's valid apply into the shared city.toml survives witness's
+    corrupted write being rolled back."""
+    from modeladvisor import cli as madcli
+
+    monkeypatch.setenv("ADVISOR_AGENT_TOML", str(city))
+    # witness hand-set to haiku so the safe direction pulls it up (an apply)
+    city.write_text(
+        city.read_text().replace(
+            '[[agent]]\nname = "witness"\nscope = "rig"\n',
+            '[[agent]]\nname = "witness"\nmodel = "claude-haiku-4-5"\nscope = "rig"\n',
+        ),
+        encoding="utf-8",
+    )
+    recs = _wins("claude::polecat::implement::sonnet", 60) + _wins(
+        "claude::polecat::lookup::sonnet", 60
+    )
+    st = _build_store(cfg, recs)
+
+    real = madcli.set_tier_fields
+    calls = {"n": 0}
+
+    def corrupt_second(target, **kw):
+        calls["n"] += 1
+        if calls["n"] == 1:
+            return real(target, **kw)
+        with open(target.path, "a", encoding="utf-8") as fh:
+            fh.write("[broken\n")
+
+    monkeypatch.setattr(madcli, "set_tier_fields", corrupt_second)
+
+    rep = autoapply.auto_apply(
+        cfg, st, dry_run=False, agents=["polecat", "witness"], engine=madengine
+    )
+    by = {d.agent: d for d in rep.decisions}
+    assert by["polecat"].status == autoapply.STATUS_APPLIED
+    assert by["witness"].status == autoapply.STATUS_ROLLED_BACK
+
+    text = city.read_text()
+    # polecat's applied change is still present...
+    assert 'model = "claude-sonnet-4-5"' in text
+    # ...the corrupted fragment is gone and the file still parses
+    assert "[broken" not in text
+    assert madcli.validate_toml_file(str(city)) is None
+    # witness is back to its pre-write, hand-set model
+    assert 'model = "claude-haiku-4-5"' in text
+
+
 def test_single_backup_per_run_for_shared_file(monkeypatch, cfg, city, tmp_path):
     """All agents share one city.toml → it is backed up ONCE per run."""
     monkeypatch.setenv("ADVISOR_AGENT_TOML", str(city))
diff --git a/tests/test_cli.py b/tests/test_cli.py
index c95be74..900a369 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -267,6 +267,101 @@ def test_apply_writes_flat_agent_toml_and_backs_up(monkeypatch, tmp_path):
     assert baks[0].read_text() == FLAT_AGENT_TOML
 
 
+# --------------------------------------------------------------------------- #
+# apply — triple-write span staleness regression (the bug validate-after-apply
+# first caught): replacing an existing field near a block's tail after an
+# insert grew the block must not splice bytes.
+# --------------------------------------------------------------------------- #
+
+def test_apply_block_replace_after_insert_stays_parseable(monkeypatch, tmp_path):
+    """[[agent]] block ending in an existing ``model`` line: the apply inserts
+    ``provider`` (block grows), then must REPLACE ``model`` against fresh —
+    not resolve-time — offsets.  With stale offsets the body slice cut the
+    model line mid-string and produced ``model = "new"-4-5"``."""
+    import tomllib as _toml
+
+    _install_fake(monkeypatch)
+    city = _write(
+        tmp_path,
+        "city.toml",
+        CITY_TOML_WITH_AGENT_BLOCK.replace(
+            'name = "polecat"\nscope = "rig"\nmax_active_sessions = 5\n',
+            'name = "polecat"\nscope = "rig"\nmodel = "claude-haiku-4-5"\n',
+        ),
+    )
+    monkeypatch.setenv("ADVISOR_AGENT_TOML", str(city))
+
+    rc, text = _run("apply", "polecat", "--shape", "implement")
+    assert rc == 0
+
+    updated = city.read_text()
+    # parses cleanly...
+    _toml.loads(updated)
+    # ...the model was replaced exactly (no spliced remnant of the old value)
+    assert updated.count('model = "claude-sonnet-4-5"') == 1
+    assert "haiku" not in updated
+    assert '"-4-5"' not in updated
+    assert updated.count("provider = ") == 2  # [workspace] + the agent block
+    # (the fake engine supplies no run_target; empty fields are skipped)
+    assert updated.count("run_target = ") == 0
+
+
+# --------------------------------------------------------------------------- #
+# apply — validate-after-apply: a malformed write is rolled back, exit 4
+# --------------------------------------------------------------------------- #
+
+def test_validate_toml_file_unit(tmp_path):
+    good = _write(tmp_path, "good.toml", 'model = "x"\n')
+    assert cli.validate_toml_file(str(good)) is None
+    bad = _write(tmp_path, "bad.toml", "[broken\n")
+    err = cli.validate_toml_file(str(bad))
+    assert err is not None and err != ""
+
+
+def test_apply_validation_failure_rolls_back(monkeypatch, tmp_path):
+    """A write that leaves the config unparseable is restored byte-for-byte
+    and the command exits 4 — never a broken config left for gc to load."""
+    _install_fake(monkeypatch)
+    agent_toml = _write(tmp_path, "agent.toml", FLAT_AGENT_TOML)
+    monkeypatch.setenv("ADVISOR_AGENT_TOML", str(agent_toml))
+
+    def corrupting(target, **kw):
+        with open(target.path, "w", encoding="utf-8") as fh:
+            fh.write('model = "unterminated\n')
+
+    monkeypatch.setattr(cli, "set_tier_fields", corrupting)
+
+    rc, text = _run("apply", "polecat", "--shape", "implement")
+    assert rc == 4
+    assert "FAILED" in text and "restored" in text
+    # the file is byte-identical to its pre-apply content
+    assert agent_toml.read_text() == FLAT_AGENT_TOML
+    # the once-per-apply backup was still taken (and holds the original)
+    baks = list(tmp_path.glob("*advisor-bak*"))
+    assert len(baks) == 1
+    assert baks[0].read_text() == FLAT_AGENT_TOML
+
+
+def test_apply_validation_failure_json(monkeypatch, tmp_path):
+    _install_fake(monkeypatch)
+    agent_toml = _write(tmp_path, "agent.toml", FLAT_AGENT_TOML)
+    monkeypatch.setenv("ADVISOR_AGENT_TOML", str(agent_toml))
+
+    def corrupting(target, **kw):
+        with open(target.path, "w", encoding="utf-8") as fh:
+            fh.write("[broken\n")
+
+    monkeypatch.setattr(cli, "set_tier_fields", corrupting)
+
+    rc, text = _run("apply", "polecat", "--shape", "implement", "--json")
+    assert rc == 4
+    obj = json.loads(text)
+    assert obj["applied"] is False
+    assert obj["rolled_back"] is True
+    assert "error" in obj and "rolled back" in obj["error"]
+    assert agent_toml.read_text() == FLAT_AGENT_TOML
+
+
 def test_apply_writes_into_matching_agent_block(monkeypatch, tmp_path):
     """`model` must land inside the [[agent]] name="polecat" block, not mayor's."""
     _install_fake(monkeypatch)