-
Notifications
You must be signed in to change notification settings - Fork 0
feat(sdlc): auto-promote policy_decide on a clean shadow-week (kill the 3b cutover cliff) #3829
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,6 +30,7 @@ | |
| import os | ||
| import re | ||
| import shlex | ||
| import sys | ||
| from dataclasses import dataclass | ||
| from datetime import UTC, datetime | ||
| from pathlib import Path | ||
|
|
@@ -851,6 +852,288 @@ def evaluate_shadow_clean( | |
| } | ||
|
|
||
|
|
||
| # --- The auto-promotion state machine (reform fix: kill the manual 3b cutover) - | ||
| # | ||
| # ``evaluate_shadow_clean`` answers "is the shadow-week clean?" but nothing acted on | ||
| # a YES — ``3b-cutover`` was a MANUAL cliff a human had to step off by a deadline. A | ||
| # clean predicate that never promotes itself is the same freeze-blocks-thaw bug one | ||
| # layer up. This is the missing actuator: a reversible, version-stamped ladder | ||
| # | ||
| # shadow ──clean──▶ canary ──clean ≥24h──▶ authoritative (─not-clean──▶ shadow) | ||
| # | ||
| # that the promote timer advances one rung per clean tick and ROLLS BACK to shadow | ||
| # the instant the predicate fails (master design §4.1: "advisory-canary, reversible, | ||
| # never a hard cliff"). It only ever advances a recorded POSTURE — wiring that | ||
| # posture into the live gate verdict remains a separate, gated step (§4.1: the canary | ||
| # logs both decisions "before becoming the live verdict"). Per the permanent-canary | ||
| # discipline, any change to ``policy_decide`` (a new ``POLICY_DECIDE_FN_VERSION``) | ||
| # resets the ladder to shadow so the new logic must re-prove itself from scratch. | ||
|
|
||
| PROMOTION_SHADOW = "shadow" | ||
| PROMOTION_CANARY = "canary" | ||
| PROMOTION_AUTHORITATIVE = "authoritative" | ||
| _PROMOTION_STATES = frozenset({PROMOTION_SHADOW, PROMOTION_CANARY, PROMOTION_AUTHORITATIVE}) | ||
|
|
||
| #: The canary dwells in dual-decision mode this long before it may go authoritative. | ||
| DEFAULT_CANARY_WINDOW_SECONDS = 24 * 3600 | ||
|
|
||
| #: Current promotion posture (a projection); the audit trail is the ledger beside it. | ||
| DEFAULT_PROMOTION_STATE = Path(os.path.expanduser("~/.cache/hapax/policy-decide-promotion.json")) | ||
| DEFAULT_PROMOTION_LEDGER = Path(os.path.expanduser("~/.cache/hapax/policy-decide-promotion.jsonl")) | ||
|
|
||
| _EPOCH = datetime(1970, 1, 1, tzinfo=UTC) | ||
|
|
||
|
|
||
| def _iso(when: datetime) -> str: | ||
| return when.astimezone(UTC).strftime("%Y-%m-%dT%H:%M:%SZ") | ||
|
|
||
|
|
||
| @dataclass(frozen=True) | ||
| class PromotionState: | ||
| """The current promotion posture: which rung, stamped with the version that earned it.""" | ||
|
|
||
| state: str | ||
| policy_version: str | ||
| entered_state_at: datetime | ||
| updated_at: datetime | ||
|
|
||
|
|
||
| @dataclass(frozen=True) | ||
| class PromotionDecision: | ||
| """One transition verdict from ``decide_promotion`` — pure, carries its own version stamp.""" | ||
|
|
||
| from_state: str | ||
| to_state: str | ||
| changed: bool | ||
| reason: str | ||
| clean: bool | ||
| policy_version: str | ||
| dwell_seconds: float | ||
| now: datetime | ||
|
|
||
| def next_state(self, current: PromotionState) -> PromotionState: | ||
| """The posture to persist after this decision (entry clock resets only on a change).""" | ||
| entered = self.now if self.changed else current.entered_state_at | ||
| return PromotionState( | ||
| state=self.to_state, | ||
| policy_version=self.policy_version, | ||
| entered_state_at=entered, | ||
| updated_at=self.now, | ||
| ) | ||
|
|
||
|
|
||
| def decide_promotion( | ||
| current: PromotionState, | ||
| verdict: dict[str, object], | ||
| *, | ||
| policy_version: str, | ||
| now: datetime, | ||
| canary_window_seconds: float = DEFAULT_CANARY_WINDOW_SECONDS, | ||
| ) -> PromotionDecision: | ||
| """Pure ladder transition. ``verdict`` is an ``evaluate_shadow_clean`` result. | ||
|
|
||
| Advances one rung per clean tick, requires a ``canary_window_seconds`` dwell before | ||
| canary→authoritative, and rolls back to shadow on any not-clean verdict or any | ||
| ``policy_version`` change (the permanent-canary reset). Never raises; never skips a | ||
| rung; never hard-cuts straight to authoritative. | ||
| """ | ||
| clean = bool(verdict.get("clean")) | ||
| frm = current.state | ||
| dwell = max(0.0, (now - current.entered_state_at).total_seconds()) | ||
|
|
||
| def decision(to_state: str, changed: bool, reason: str) -> PromotionDecision: | ||
| return PromotionDecision( | ||
| from_state=frm, | ||
| to_state=to_state, | ||
| changed=changed, | ||
| reason=reason, | ||
| clean=clean, | ||
| policy_version=policy_version, | ||
| dwell_seconds=dwell, | ||
| now=now, | ||
| ) | ||
|
|
||
| # Permanent-canary discipline: a new policy_decide version must re-prove from shadow. | ||
| if current.policy_version != policy_version: | ||
| return decision( | ||
| PROMOTION_SHADOW, | ||
| True, | ||
| f"policy_version {current.policy_version}→{policy_version}: " | ||
| "re-entering shadow (permanent-canary discipline)", | ||
| ) | ||
|
|
||
| if frm == PROMOTION_SHADOW: | ||
| if clean: | ||
| return decision( | ||
| PROMOTION_CANARY, True, "shadow-week clean → canary (dual-decision window opens)" | ||
| ) | ||
| return decision(PROMOTION_SHADOW, False, _hold_reason(verdict)) | ||
|
|
||
| if frm == PROMOTION_CANARY: | ||
| if not clean: | ||
| return decision( | ||
| PROMOTION_SHADOW, | ||
| True, | ||
| f"canary regression ({_hold_reason(verdict)}) → rollback to shadow", | ||
| ) | ||
| if dwell >= canary_window_seconds: | ||
| return decision( | ||
| PROMOTION_AUTHORITATIVE, | ||
| True, | ||
| f"canary clean ≥{canary_window_seconds / 3600:.0f}h → authoritative-ready", | ||
|
Comment on lines
+979
to
+983
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
After a clean shadow week enters canary, this branch promotes solely because wall-clock dwell elapsed while the latest Useful? React with 👍 / 👎. |
||
| ) | ||
| return decision( | ||
| PROMOTION_CANARY, | ||
| False, | ||
| f"canary clean, {dwell / 3600:.1f}h/{canary_window_seconds / 3600:.0f}h elapsed", | ||
| ) | ||
|
|
||
| if frm == PROMOTION_AUTHORITATIVE: | ||
| if not clean: | ||
| return decision( | ||
| PROMOTION_SHADOW, | ||
| True, | ||
| f"authoritative regression ({_hold_reason(verdict)}) → rollback to shadow", | ||
| ) | ||
| return decision(PROMOTION_AUTHORITATIVE, False, "authoritative steady (shadow-week clean)") | ||
|
|
||
| return decision(PROMOTION_SHADOW, True, f"unknown promotion state '{frm}' → reset to shadow") | ||
|
|
||
|
|
||
| def _hold_reason(verdict: dict[str, object]) -> str: | ||
| reasons = verdict.get("reasons") | ||
| if isinstance(reasons, list) and reasons: | ||
| return "; ".join(str(r) for r in reasons) | ||
| return "shadow-week not clean" | ||
|
|
||
|
|
||
| def load_promotion_state( | ||
| state_path: str | os.PathLike[str] = DEFAULT_PROMOTION_STATE, | ||
| ) -> PromotionState: | ||
| """Read the persisted posture; an absent/garbage file defaults to a fresh shadow rung.""" | ||
| fresh = PromotionState(PROMOTION_SHADOW, POLICY_DECIDE_FN_VERSION, _EPOCH, _EPOCH) | ||
| try: | ||
| data = json.loads(Path(state_path).read_text(encoding="utf-8")) | ||
| except (FileNotFoundError, ValueError, OSError): | ||
| return fresh | ||
| if not isinstance(data, dict): | ||
| return fresh | ||
| state = str(data.get("state") or PROMOTION_SHADOW) | ||
| if state not in _PROMOTION_STATES: | ||
| state = PROMOTION_SHADOW | ||
| entered = _parse_decision_ts(data.get("entered_state_at")) or _EPOCH | ||
| updated = _parse_decision_ts(data.get("updated_at")) or entered | ||
| return PromotionState( | ||
| state=state, | ||
| policy_version=str(data.get("policy_version") or POLICY_DECIDE_FN_VERSION), | ||
| entered_state_at=entered, | ||
| updated_at=updated, | ||
| ) | ||
|
|
||
|
|
||
| def save_promotion_state( | ||
| state: PromotionState, state_path: str | os.PathLike[str] = DEFAULT_PROMOTION_STATE | ||
| ) -> None: | ||
| """Persist the posture atomically (tmp + replace). Best-effort; never raises (advisory).""" | ||
| try: | ||
| path = Path(state_path) | ||
| path.parent.mkdir(parents=True, exist_ok=True) | ||
| tmp = path.with_suffix(path.suffix + ".promote-tmp") | ||
| tmp.write_text( | ||
| json.dumps( | ||
| { | ||
| "state": state.state, | ||
| "policy_version": state.policy_version, | ||
| "entered_state_at": _iso(state.entered_state_at), | ||
| "updated_at": _iso(state.updated_at), | ||
| } | ||
| ) | ||
| + "\n", | ||
| encoding="utf-8", | ||
| ) | ||
| tmp.replace(path) | ||
| except OSError: | ||
| pass | ||
|
|
||
|
|
||
| def append_promotion_ledger( | ||
| decision: PromotionDecision, | ||
| next_state: PromotionState, | ||
| *, | ||
| ledger_path: str | os.PathLike[str] = DEFAULT_PROMOTION_LEDGER, | ||
| ) -> None: | ||
| """Append a transition row to the audit ledger. Best-effort; never raises (advisory).""" | ||
| try: | ||
| path = Path(ledger_path) | ||
| path.parent.mkdir(parents=True, exist_ok=True) | ||
| row = { | ||
| "ts": _iso(decision.now), | ||
| "from_state": decision.from_state, | ||
| "to_state": decision.to_state, | ||
| "changed": decision.changed, | ||
| "reason": decision.reason, | ||
| "clean": decision.clean, | ||
| "policy_version": decision.policy_version, | ||
| "dwell_seconds": round(decision.dwell_seconds, 1), | ||
| "entered_state_at": _iso(next_state.entered_state_at), | ||
| } | ||
| with path.open("a", encoding="utf-8") as handle: | ||
| handle.write(json.dumps(row) + "\n") | ||
| except OSError: | ||
| pass | ||
|
|
||
|
|
||
| def run_promotion_cycle( | ||
| *, | ||
| decision_log_path: str | os.PathLike[str] = DEFAULT_DECISION_LOG, | ||
| shadow_ledger_path: str | os.PathLike[str] = DEFAULT_SHADOW_LEDGER, | ||
| state_path: str | os.PathLike[str] = DEFAULT_PROMOTION_STATE, | ||
| ledger_path: str | os.PathLike[str] = DEFAULT_PROMOTION_LEDGER, | ||
| now: datetime | None = None, | ||
| policy_version: str = POLICY_DECIDE_FN_VERSION, | ||
| canary_window_seconds: float = DEFAULT_CANARY_WINDOW_SECONDS, | ||
| min_days: float = 7.0, | ||
| min_decisions: int = 200, | ||
| replay: bool = False, | ||
| ) -> dict[str, object]: | ||
| """Evaluate the shadow-week and advance/roll-back the promotion ladder one rung. | ||
|
|
||
| Optionally refreshes the divergence ledger first (``replay=True``) so coverage and | ||
| asymmetry are read consistently. Persists the new posture every tick (so ``updated_at`` | ||
| tracks liveness) and appends to the audit ledger only on an actual transition. | ||
| Advisory: it advances a posture, never the live gate verdict. Never raises. | ||
| """ | ||
| now = now or datetime.now(UTC) | ||
| if replay: | ||
| replay_decision_log(decision_log_path, shadow_ledger_path) | ||
| verdict = evaluate_shadow_clean( | ||
| decision_log_path, shadow_ledger_path, min_days=min_days, min_decisions=min_decisions | ||
| ) | ||
| current = load_promotion_state(state_path) | ||
| decision = decide_promotion( | ||
| current, | ||
| verdict, | ||
| policy_version=policy_version, | ||
| now=now, | ||
| canary_window_seconds=canary_window_seconds, | ||
| ) | ||
| nxt = decision.next_state(current) | ||
| save_promotion_state(nxt, state_path) | ||
| if decision.changed: | ||
| append_promotion_ledger(decision, nxt, ledger_path=ledger_path) | ||
| return { | ||
| "ts": _iso(now), | ||
| "from_state": decision.from_state, | ||
| "to_state": decision.to_state, | ||
| "changed": decision.changed, | ||
| "reason": decision.reason, | ||
| "clean": decision.clean, | ||
| "policy_version": decision.policy_version, | ||
| "dwell_seconds": round(decision.dwell_seconds, 1), | ||
| "verdict": verdict, | ||
| } | ||
|
|
||
|
|
||
| # --- Advisory shadow CLI ------------------------------------------------------ | ||
|
|
||
|
|
||
|
|
@@ -919,5 +1202,44 @@ def main(argv: list[str] | None = None) -> int: | |
| return 0 # advisory: never enforces during the shadow window | ||
|
|
||
|
|
||
| def promote_main(argv: list[str] | None = None) -> int: | ||
| """Advance the promotion ladder one rung — the reform 3b AUTO-PROMOTER entrypoint. | ||
|
|
||
| Wired to ``policy-decide-promote.timer`` via ``python -m shared.policy_decide promote``. | ||
| Each tick evaluates the shadow-week and advances/rolls-back the reversible, | ||
| version-stamped posture — the missing actuator that turns a clean predicate into an | ||
| actual promotion instead of the manual 3b-cutover cliff. ``--replay`` refreshes the | ||
| divergence ledger from the decision log first so coverage + asymmetry read | ||
| consistently. ADVISORY ONLY: it moves a recorded POSTURE, never the live gate | ||
| verdict, and always exits 0. | ||
| """ | ||
| parser = argparse.ArgumentParser(prog="policy_decide promote") | ||
| parser.add_argument("--decision-log", default=str(DEFAULT_DECISION_LOG)) | ||
| parser.add_argument("--ledger", default=str(DEFAULT_SHADOW_LEDGER)) | ||
| parser.add_argument("--state", default=str(DEFAULT_PROMOTION_STATE)) | ||
| parser.add_argument("--promotion-ledger", default=str(DEFAULT_PROMOTION_LEDGER)) | ||
| parser.add_argument("--min-days", type=float, default=7.0) | ||
| parser.add_argument("--min-decisions", type=int, default=200) | ||
| parser.add_argument( | ||
| "--replay", | ||
| action="store_true", | ||
| help="refresh the divergence ledger from the decision log before evaluating", | ||
| ) | ||
| args = parser.parse_args(argv) | ||
| result = run_promotion_cycle( | ||
| decision_log_path=args.decision_log, | ||
| shadow_ledger_path=args.ledger, | ||
| state_path=args.state, | ||
| ledger_path=args.promotion_ledger, | ||
| min_days=args.min_days, | ||
| min_decisions=args.min_decisions, | ||
| replay=args.replay, | ||
| ) | ||
| print(json.dumps(result, indent=2)) | ||
| return 0 # advisory: advances a posture, never the live gate verdict | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| if len(sys.argv) > 1 and sys.argv[1] == "promote": | ||
| raise SystemExit(promote_main(sys.argv[2:])) | ||
| raise SystemExit(main()) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| [Unit] | ||
| Description=Reform 3b AUTO-PROMOTER — advance the policy_decide promotion ladder when the shadow-week is clean | ||
| Documentation=file:///home/hapax/Documents/Personal/30-areas/hapax/reform-execution-manifest.yaml | ||
| After=network.target | ||
| OnFailure=notify-failure@%n.service | ||
|
|
||
| [Service] | ||
| Type=oneshot | ||
| # Advisory auto-promoter: the missing actuator on top of the shadow producer + | ||
| # evaluator. Each tick it refreshes the divergence ledger (--replay), evaluates the | ||
| # shadow-week, and advances/rolls-back the reversible, version-stamped promotion | ||
| # posture one rung (shadow→canary→authoritative). Runs from the canonical worktree | ||
| # (the deploy target). Never enforces — it moves a recorded posture, exits 0. | ||
| ExecStart=/home/hapax/.local/bin/uv run --directory /home/hapax/projects/hapax-council python -m shared.policy_decide promote --replay | ||
| Environment=PATH=/home/hapax/.local/bin:/home/hapax/.cargo/bin:/usr/local/bin:/usr/bin:/bin | ||
| Environment=HOME=/home/hapax | ||
| Nice=10 | ||
| MemoryMax=1G | ||
| StandardOutput=journal | ||
| StandardError=journal | ||
| SyslogIdentifier=policy-decide-promote |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| [Unit] | ||
| Description=Advance the policy_decide promotion ladder hourly when the shadow-week is clean (reform 3b auto-promoter) | ||
|
|
||
| [Timer] | ||
| # Hourly is ample: the ladder advances at most one rung per tick and the canary | ||
| # dwell (24h) is the real rate limiter. First run trails the producer's 10min boot | ||
| # replay so it evaluates fresh evidence. | ||
| OnBootSec=20min | ||
| OnUnitActiveSec=1h | ||
| RandomizedDelaySec=120 | ||
| Persistent=true | ||
|
|
||
| [Install] | ||
| WantedBy=timers.target |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When
POLICY_DECIDE_FN_VERSIONchanges, this branch only rewrites the posture toshadowwith the new version; it does not invalidate or checkpoint the shadow evidence that produced the currentcleanverdict. If the decision log already contains an old clean 7-day span, the next hourlyrun_promotion_cyclewill reuse that same historical verdict and promoteshadow → canarywithout collecting a fresh shadow week for the new logic, so the advertised permanent-canary discipline is bypassed after any policy_decide change.Useful? React with 👍 / 👎.