CryptoJones · CryptoJones · May 20, 2026
diff --git a/src/socrates120x/interview.py b/src/socrates120x/interview.py
@@ -11,7 +11,10 @@
 
 from __future__ import annotations
 
+import contextlib
 import json
+import os
+import sys
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any
@@ -30,6 +33,31 @@
 __all__ = ["Interview", "Question", "QUESTIONS", "is_interactive"]
 
 
+def _atomic_write_text(path: Path, text: str) -> None:
+    """Write *text* to *path* via a same-directory tempfile + rename.
+
+    A naked path.write_text() opens, truncates, and writes — if the process
+    is killed mid-write (Ctrl-C, OOM, power loss) the file is left
+    truncated or partially written, which then breaks the next
+    `socrates init --resume` with a JSONDecodeError stacktrace.
+    Tempfile + rename is atomic on POSIX (and on Windows since Python 3.3
+    via os.replace) so the file on disk is either the old contents or
+    the new contents — never half-and-half.
+    """
+    tmp = path.with_name(path.name + ".tmp")
+    try:
+        tmp.write_text(text)
+        # os.replace is atomic across filesystems on the same device; if the
+        # tempfile lives in the same dir as the target (which it does here),
+        # we're always on the same device.
+        os.replace(tmp, path)
+    finally:
+        # If anything went wrong, leave a clean directory — don't strand the
+        # tempfile for the next run to wonder about.
+        with contextlib.suppress(FileNotFoundError):
+            tmp.unlink()
+
+
 # The full init interview. Order matters — earlier answers prime the user for
 # later ones (decisions before risks, risks before open questions, etc.).
 QUESTIONS: tuple[Question, ...] = (
@@ -193,11 +221,33 @@ class Interview:
     questions: tuple[Question, ...] = field(default_factory=lambda: QUESTIONS)
 
     def load(self) -> None:
-        if self.answers_path.exists() and self.resume:
+        """Load answers from disk if --resume was passed.
+
+        A previous run that was killed mid-save could have left a corrupted
+        file. Instead of blowing up with a JSONDecodeError stacktrace, warn
+        the operator and start fresh — they'd have to re-answer questions
+        either way, and the alternative is unrecoverable from the CLI.
+        """
+        if not (self.answers_path.exists() and self.resume):
+            return
+        try:
             self.answers = json.loads(self.answers_path.read_text())
+        except (json.JSONDecodeError, OSError) as e:
+            print(
+                f"warning: could not read {self.answers_path}: {e}\n"
+                f"  Starting interview from scratch. The corrupt file will be "
+                f"overwritten on the first answer.",
+                file=sys.stderr,
+            )
+            self.answers = {}
 
     def save(self) -> None:
-        self.answers_path.write_text(json.dumps(self.answers, indent=2) + "\n")
+        # Atomic write — see _atomic_write_text. Without this, Ctrl-C during
+        # save() leaves a truncated file that crashes the next --resume.
+        _atomic_write_text(
+            self.answers_path,
+            json.dumps(self.answers, indent=2) + "\n",
+        )
 
     def run(
         self,

diff --git a/tests/test_interview.py b/tests/test_interview.py
@@ -120,3 +120,94 @@ def fake_editor_run(cmd: list[str], check: bool = True) -> Any:  # noqa: ARG001
 
     # business_goal is a multiline required question — should have come from the editor.
     assert iv.answers["business_goal"] == "real answer line one\nreal answer line two"
+
+
+# ---------------------------------------------------------------------------
+# Atomic save + corrupted resume recovery (reliability/interview-atomic-save)
+# ---------------------------------------------------------------------------
+
+
+def test_save_is_atomic_no_tempfile_left_behind(tmp_path: Path) -> None:
+    """save() must not leave behind the .tmp file used for the atomic rename."""
+    iv = Interview(answers_path=tmp_path / "answers.json", project_name="p")
+    iv.answers = {"k": "v"}
+    iv.save()
+    assert (tmp_path / "answers.json").exists()
+    assert not (tmp_path / "answers.json.tmp").exists()
+
+
+def test_save_does_not_leave_partial_file_on_failure(
+    tmp_path: Path, monkeypatch: Any
+) -> None:
+    """If the rename step fails, the final file should not exist in a partial
+    state. (The tempfile may exist but the target should be untouched.)"""
+    answers_path = tmp_path / "answers.json"
+    # Seed with a known good file.
+    answers_path.write_text('{"old": "value"}\n')
+
+    iv = Interview(answers_path=answers_path, project_name="p")
+    iv.answers = {"new": "value"}
+
+    # Make os.replace fail so we hit the cleanup path.
+    import socrates120x.interview as iv_mod
+    real_replace = iv_mod.os.replace
+    def boom(src: object, dst: object) -> None:
+        raise OSError("simulated replace failure")
+    monkeypatch.setattr(iv_mod.os, "replace", boom)
+
+    import contextlib  # noqa: PLC0415
+    with contextlib.suppress(OSError):
+        iv.save()  # expected to fail at the rename; verify state AFTER
+
+    # The pre-existing file must NOT have been clobbered by the failed save.
+    assert answers_path.read_text() == '{"old": "value"}\n'
+    # And no tempfile should be left behind to confuse the next run.
+    assert not (tmp_path / "answers.json.tmp").exists()
+    # Restore (paranoia).
+    monkeypatch.setattr(iv_mod.os, "replace", real_replace)
+
+
+def test_resume_with_corrupt_answers_file_warns_and_starts_fresh(
+    tmp_path: Path, capsys: Any
+) -> None:
+    """A previous Ctrl-C during save could have left a truncated/invalid
+    JSON file. Re-running with --resume must warn and start fresh
+    instead of crashing with JSONDecodeError."""
+    answers_path = tmp_path / "answers.json"
+    # Simulate a half-written file (the kind a SIGINT mid-write would leave).
+    answers_path.write_text('{"k": "v"')  # missing closing brace
+
+    iv = Interview(
+        answers_path=answers_path,
+        project_name="recoverable",
+        resume=True,
+    )
+    iv.load()  # should NOT raise
+    err = capsys.readouterr().err
+    assert "warning" in err.lower()
+    assert "answers.json" in err
+    # And answers should be empty so the interview can proceed from scratch.
+    assert iv.answers == {}
+
+
+def test_resume_with_unreadable_answers_file_warns_and_starts_fresh(
+    tmp_path: Path, capsys: Any, monkeypatch: Any
+) -> None:
+    """An OSError on read (e.g., permission denied) should also warn rather
+    than crash."""
+    answers_path = tmp_path / "answers.json"
+    answers_path.write_text('{"k": "v"}')
+
+    # Monkeypatch read_text to raise OSError to simulate permission denied.
+    real_read = Path.read_text
+    def boom(self: Path, *a: object, **kw: object) -> str:
+        if self == answers_path:
+            raise OSError("simulated permission denied")
+        return real_read(self, *a, **kw)
+    monkeypatch.setattr(Path, "read_text", boom)
+
+    iv = Interview(answers_path=answers_path, project_name="p", resume=True)
+    iv.load()  # should NOT raise
+    err = capsys.readouterr().err
+    assert "warning" in err.lower()
+    assert iv.answers == {}