From b996c376259a88974436329d535cfa6c41879cf9 Mon Sep 17 00:00:00 2001 From: "Aaron K. Clark" Date: Wed, 20 May 2026 17:19:16 -0500 Subject: [PATCH] reliability(patterns): atomic save of usage cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit \`socrates patterns review\` on a CompanyOS with many projects can take seconds to walk every .md file. A SIGINT during the final \`(patterns_dir / .usage-cache.json).write_text(...)\` previously left a truncated/invalid file. The next run's \`_load_usage_cache\` would JSONDecodeError on the partial file and return None — silently discarding ALL cached segments and forcing a full re-scan. (The current load already swallows the JSONDecodeError into None; the bug is the silent cache reset, not a crash.) Fix: same atomic-write pattern as interview.py's save — \`.tmp\` + os.replace. Cache is either fully old or fully new, never half. Tempfile cleaned up on both success and failure paths. Tests added (2): - post-save: no .tmp leftover - mid-save os.replace failure: pre-existing cache survives intact, no .tmp leftover 149/149 tests pass; ruff + mypy clean. --- src/socrates120x/patterns.py | 21 +++++++++++++-- tests/test_patterns.py | 52 ++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 2 deletions(-) diff --git a/src/socrates120x/patterns.py b/src/socrates120x/patterns.py index 7a35e36..c2dde00 100644 --- a/src/socrates120x/patterns.py +++ b/src/socrates120x/patterns.py @@ -18,6 +18,7 @@ import contextlib import datetime as _dt import json +import os import re import sys from dataclasses import dataclass @@ -258,10 +259,26 @@ def _load_usage_cache(patterns_dir: Path) -> dict[str, Any] | None: def _save_usage_cache(patterns_dir: Path, payload: dict[str, Any]) -> None: + """Persist the usage cache atomically. + + `socrates patterns review` on a CompanyOS with many projects can take + seconds. A SIGINT mid-write would leave a truncated/invalid cache + file, which would then crash the next run inside json.loads. Use a + same-directory tempfile + os.replace for atomicity (same pattern as + interview.py's atomic save) so the cache is either fully old or + fully new — never half-written. + """ if not patterns_dir.is_dir(): return - with contextlib.suppress(OSError): - (patterns_dir / USAGE_CACHE_FILENAME).write_text(json.dumps(payload, indent=2)) + target = patterns_dir / USAGE_CACHE_FILENAME + tmp = target.with_name(target.name + ".tmp") + try: + with contextlib.suppress(OSError): + tmp.write_text(json.dumps(payload, indent=2), encoding="utf-8") + os.replace(tmp, target) + finally: + with contextlib.suppress(FileNotFoundError): + tmp.unlink() def format_pattern_report(report: PatternReport, *, use_color: bool | None = None) -> str: diff --git a/tests/test_patterns.py b/tests/test_patterns.py index f0c219c..ff1cdb5 100644 --- a/tests/test_patterns.py +++ b/tests/test_patterns.py @@ -266,3 +266,55 @@ def test_cache_rejects_old_version(company: Path) -> None: refreshed = json.loads(cache_path.read_text()) assert refreshed["version"] == 2 assert "phantom-from-v1" not in str(refreshed) + + +# --------------------------------------------------------------------------- +# Atomic cache save (reliability/patterns-cache-atomic-save) +# --------------------------------------------------------------------------- + + +def test_usage_cache_save_is_atomic_no_tempfile_left(company) -> None: + """After a successful run, no .tmp leftover next to the cache.""" + _make_build(company, "alpha") + today = _dt.date.today().isoformat() + (company / "patterns" / "CANDIDATE-x.md").write_text( + _pattern(today, "alpha", "x"), encoding="utf-8", + ) + review_patterns(company) # writes the cache + cache = company / "patterns" / ".usage-cache.json" + assert cache.is_file() + assert not (company / "patterns" / ".usage-cache.json.tmp").exists() + + +def test_usage_cache_save_does_not_clobber_on_failure( + company, monkeypatch +) -> None: + """If os.replace fails mid-save, the pre-existing cache must not be + truncated/wiped — atomic-write contract.""" + import socrates120x.patterns as patterns_mod + + _make_build(company, "alpha") + today = _dt.date.today().isoformat() + (company / "patterns" / "CANDIDATE-x.md").write_text( + _pattern(today, "alpha", "x"), encoding="utf-8", + ) + # First run writes a real cache. + review_patterns(company) + cache = company / "patterns" / ".usage-cache.json" + original = cache.read_text(encoding="utf-8") + + # Now patch os.replace to fail and re-run — original cache must survive. + def boom(src, dst): + raise OSError("simulated replace failure") + monkeypatch.setattr(patterns_mod.os, "replace", boom) + # Touch a build .md file so the cache would have been rewritten. + state = company / "builds" / "alpha" / "planning" / "STATE.md" + state.write_text(state.read_text(encoding="utf-8") + "\nedit\n", encoding="utf-8") + review_patterns(company) # must not raise; failed save is swallowed + + # Pre-existing cache is untouched (or has been replaced atomically by + # earlier successful writes — but never truncated/empty). + survived = cache.read_text(encoding="utf-8") + assert survived == original + # No stranded tempfile. + assert not (company / "patterns" / ".usage-cache.json.tmp").exists()