Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,15 @@ jobs:
git fetch --quiet origin "$PUSH_BEFORE_SHA" || true
bash scripts/check-legal-name-leaks.sh --diff "$PUSH_BEFORE_SHA..HEAD"
fi
# Cross-runtime gate-manifest wiring check (reform §6 P0 — one gate-set
# SSOT). Verifies the codex hook adapter, antigrav/vibe/gemini capability
# markers, and CI jobs/run-markers still match hooks/gate-manifest.yaml.
# The live ~/.claude/settings.json check has no file on CI runners, so it
# is skipped here and enforced by the pre-commit hook on the operator
# machine where the live settings exist.
- name: Gate manifest wiring check
if: needs.post_merge_duplicate_filter.outputs.duplicate_merge_group != 'true' && needs.docs_only_filter.outputs.docs_only != 'true'
run: uv run python scripts/gate-manifest-check.py --skip-claude-settings


typecheck:
Expand Down
9 changes: 9 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,12 @@ repos:
language: system
files: ^(config/pipewire/[^/]+\.conf|scripts/check-audio-conf-names\.py)$
pass_filenames: false
- id: gate-manifest-check
name: Cross-runtime gate-manifest wiring check (incl. live claude settings)
# --require-claude-settings checks the live ~/.claude/settings.json (the
# most drift-prone wiring) against hooks/gate-manifest.yaml. Runs on the
# operator machine where the live settings exist; CI checks the rest.
entry: uv run python scripts/gate-manifest-check.py --require-claude-settings
language: system
files: ^(hooks/gate-manifest\.yaml|hooks/scripts/(codex|antigrav)-hook-adapter\.sh|scripts/hapax-(codex|antigrav|vibe|gemini)|config/codex/config\.toml|\.github/workflows/ci\.yml)$
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Run the live settings pre-commit check unconditionally

Because this hook has a files: filter and no always_run: true, pre-commit only invokes the --require-claude-settings live-settings check when one of these repo paths is staged. The drift-prone file it is meant to protect (~/.claude/settings.json) is outside the repo, so settings drift followed by an ordinary source/docs commit skips the check entirely instead of enforcing the advertised live wiring validation.

Useful? React with 👍 / 👎.

pass_filenames: false
1 change: 1 addition & 0 deletions hooks/gate-manifest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -264,3 +264,4 @@ runtimes:
- 'uv run --no-project --with pyrefly==0.62.0 pyrefly check'
- 'uv run --no-project --with pytest==9.0.2 --with pyyaml pytest'
- 'uv run --no-sync python scripts/ci_governance_coverage_gate.py'
- 'uv run python scripts/gate-manifest-check.py --skip-claude-settings'
64 changes: 55 additions & 9 deletions scripts/cc-pr-autoqueue.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,13 @@
FleetThrottlePolicy,
ThrottleDecision,
active_quarantined_pr_numbers,
bisection_plan_for_failed_runs,
decide_fleet_throttle,
read_jsonl_records,
read_quarantine,
recommend_max_entries_to_build,
reconcile_flake_quarantines,
write_quarantine,
)
from shared.release_gate import evaluate_avsdlc_release_gate # noqa: E402
from shared.sdlc_lifecycle import ( # noqa: E402
Expand Down Expand Up @@ -88,7 +91,12 @@
CI_REPAIR_KINDS = {"cicd-speedup", "ci-repair", "ci-speedup", "merge-queue-repair"}
CI_REPAIR_TAGS = {"cicd", "ci", "autoqueue"}
INDEPENDENT_QUEUE_ADMISSION = {"independent", "independent_route"}
DEFAULT_STORM_OPEN_PR_THRESHOLD = 8
# Open-PR COUNT is advisory-only — it raises a "busy" signal but NEVER freezes
# admission (FM-3). The only freeze is failure-RATE based (decide_fleet_throttle).
# The old ``*_STORM_OPEN_PR_THRESHOLD`` naming implied a count freeze that no
# longer exists; the advisory name is canonical, the storm alias is deprecated.
DEFAULT_ADVISORY_OPEN_PR_COUNT = 8
DEFAULT_STORM_OPEN_PR_THRESHOLD = DEFAULT_ADVISORY_OPEN_PR_COUNT # deprecated alias
DEFAULT_STORM_FAILED_MERGE_GROUP_THRESHOLD = 1
DEFAULT_STORM_RECENT_RUN_LIMIT = 20
STORM_MAX_ENTRIES_TO_BUILD = 1
Expand Down Expand Up @@ -198,6 +206,7 @@ class StormMode:
failure_rate: float
failure_rate_samples: int
rate_frozen: bool
recommended_bisections: tuple[dict[str, Any], ...] = ()

def as_dict(self, *, repo: str) -> dict[str, Any]:
return {
Expand All @@ -210,6 +219,7 @@ def as_dict(self, *, repo: str) -> dict[str, Any]:
"blocked_queued_prs": list(self.blocked_queued_prs),
"failed_recent_merge_group_run_count": len(self.failed_recent_merge_group_runs),
"failed_recent_merge_group_runs": list(self.failed_recent_merge_group_runs),
"recommended_bisections": list(self.recommended_bisections),
"failure_rate": self.failure_rate,
"failure_rate_samples": self.failure_rate_samples,
"rate_frozen": self.rate_frozen,
Expand Down Expand Up @@ -1002,6 +1012,9 @@ def _build_storm_mode(
failure_rate=throttle_decision.failure_rate,
failure_rate_samples=throttle_decision.samples,
rate_frozen=throttle_decision.frozen,
recommended_bisections=tuple(
bisection_plan_for_failed_runs(failed_recent_merge_group_runs)
),
)


Expand All @@ -1016,8 +1029,9 @@ def run_reconciler(
required_checks: tuple[str, ...] = DEFAULT_REQUIRED_CHECKS,
limit: int = 100,
lineage_ledger_path: Path | None = DEFAULT_LEDGER_PATH,
quarantine_path: Path = DEFAULT_QUARANTINE_PATH,
storm_mode_enabled: bool = True,
storm_open_pr_threshold: int = DEFAULT_STORM_OPEN_PR_THRESHOLD,
advisory_open_pr_count: int = DEFAULT_ADVISORY_OPEN_PR_COUNT,
storm_failed_merge_group_threshold: int = DEFAULT_STORM_FAILED_MERGE_GROUP_THRESHOLD,
storm_recent_run_limit: int = DEFAULT_STORM_RECENT_RUN_LIMIT,
auto_arm_ledger_path: Path | None = None,
Expand Down Expand Up @@ -1051,10 +1065,24 @@ def run_reconciler(
]
now = datetime.now(UTC)
lineage_records = read_jsonl_records(lineage_ledger_path) if lineage_ledger_path else []
quarantined_prs = active_quarantined_pr_numbers(
read_quarantine(DEFAULT_QUARANTINE_PATH), now=now
throttle_policy = FleetThrottlePolicy(advisory_open_pr_count=advisory_open_pr_count)
# Quarantine WRITE side (FM-3/FM-4 reversible quarantine): open quarantines for
# PRs over the failure threshold, lift expired ones, and persist (apply mode
# only). PRs already quarantined ON ENTRY are excluded from THIS tick's
# failure-rate signal; PRs newly quarantined this tick are persisted now and
# take effect next tick — isolating a flaky PR converges without a one-tick
# regression in fleet protection.
existing_quarantine = read_quarantine(quarantine_path)
quarantined_prs = active_quarantined_pr_numbers(existing_quarantine, now=now)
quarantine_reconciliation = reconcile_flake_quarantines(
Comment on lines +1075 to +1077
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Apply quarantines to PR admission decisions

Please feed the active flake quarantine into classify_pr/the mutation loop, not just the fleet-rate calculation. With an existing active quarantine for a PR whose checks and task metadata are otherwise ready, these new lines only exclude it from decide_fleet_throttle/recommend_max_entries_to_build; the later classify_pr(...) calls receive no quarantine context and can still return queue or enable_auto_merge, so apply=True will call gh pr merge for the quarantined flaky PR and defeat the quarantine.

Useful? React with 👍 / 👎.

existing_quarantine,
lineage_records,
candidate_prs={pr.number for pr in prs},
policy=throttle_policy,
now=now,
)
throttle_policy = FleetThrottlePolicy(advisory_open_pr_count=storm_open_pr_threshold)
if apply and (quarantine_reconciliation.newly_quarantined or quarantine_reconciliation.lifted):
write_quarantine(quarantine_path, quarantine_reconciliation.records)
Comment on lines +1075 to +1085
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | 🏗️ Heavy lift

Serialize the quarantine read/modify/write cycle.

This new write-side path reads the ledger, reconciles in memory, then overwrites the whole file without any lock. Two overlapping --apply runs can start from the same snapshot and the later write_quarantine() will silently drop the earlier run's newly opened/lifted records.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@scripts/cc-pr-autoqueue.py` around lines 987 - 997, The read/modify/write of
the quarantine ledger is racy: the sequence using read_quarantine(...),
reconcile_flake_quarantines(...), and write_quarantine(...) must be serialized
so two concurrent --apply runs don't clobber each other; wrap the full read →
reconcile → write sequence with an exclusive file lock on quarantine_path (or
implement an optimistic read-verify-retry loop that re-reads the file before
writing and retries if it changed) so that the code that calls read_quarantine,
reconcile_flake_quarantines, and write_quarantine holds the lock until
write_quarantine completes; ensure the lock is acquired before calling
read_quarantine and released only after write_quarantine returns (use a
cross-platform file lock helper or retry-on-conflict loop).

throttle_decision = decide_fleet_throttle(
lineage_records,
open_pr_count=len(prs),
Expand Down Expand Up @@ -1190,6 +1218,18 @@ def run_reconciler(
"active_ci_repair_task_ids": list(active_ci_repair_task_ids),
"storm_mode_enabled": storm_mode_enabled,
"storm_mode": storm_mode.as_dict(repo=repo),
"flake_quarantine": {
"path": str(quarantine_path),
"active": quarantine_reconciliation.active,
"newly_quarantined": quarantine_reconciliation.newly_quarantined,
"lifted": quarantine_reconciliation.lifted,
"written": bool(
apply
and (
quarantine_reconciliation.newly_quarantined or quarantine_reconciliation.lifted
)
),
},
"lineage_ledger_path": str(lineage_ledger_path) if lineage_ledger_path else None,
"open_pr_count": len(prs),
"queued_prs": sorted(queued_prs),
Expand Down Expand Up @@ -1258,10 +1298,16 @@ def main(argv: list[str] | None = None) -> int:
help="Report storm/admission pressure but do not add storm admission holds.",
)
parser.add_argument(
"--storm-open-pr-threshold",
"--advisory-open-pr-count",
"--storm-open-pr-threshold", # deprecated alias
type=int,
default=DEFAULT_STORM_OPEN_PR_THRESHOLD,
help="Open PR count at or above which storm admission pressure is active.",
dest="advisory_open_pr_count",
default=DEFAULT_ADVISORY_OPEN_PR_COUNT,
help=(
"Open PR count at or above which the queue reports an advisory 'busy' "
"signal. Advisory only — it never freezes admission (the only freeze is "
"failure-rate based). --storm-open-pr-threshold is a deprecated alias."
),
)
parser.add_argument(
"--storm-failed-merge-group-threshold",
Expand Down Expand Up @@ -1302,7 +1348,7 @@ def main(argv: list[str] | None = None) -> int:
limit=args.limit,
lineage_ledger_path=args.lineage_ledger_path,
storm_mode_enabled=not args.disable_storm_mode,
storm_open_pr_threshold=args.storm_open_pr_threshold,
advisory_open_pr_count=args.advisory_open_pr_count,
storm_failed_merge_group_threshold=args.storm_failed_merge_group_threshold,
storm_recent_run_limit=args.storm_recent_run_limit,
)
Expand Down
139 changes: 139 additions & 0 deletions scripts/executor_contract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
"""Executor adapter contract — the one capability surface every runtime conforms
to (reform §6 P1).

Each launcher (Claude, Codex, Gemini, Vibe, Antigrav) speaks a common adapter
CLI; their genuine differences (Gemini read-only, Antigrav's IDE-surface hook
gap, which runtimes have a real headless path) are reported as machine-legible
*capability flags* by :func:`capabilities`, NOT branched in the dispatcher. The
dispatcher consumes :func:`supports_route` to decide launchability instead of a
hard ``(platform, mode)`` if-ladder, and ``hapax-executor-capabilities`` /
``hapax-methodology-dispatch --capabilities`` emit the registry as JSON so the
CLOG cockpit and other clients read the same contract.

Colocated with the dispatcher and the ``hapax-executor-capabilities`` probe under
``scripts/`` so all three share one definition.
"""

from __future__ import annotations

from pydantic import BaseModel

# The canonical adapter CLI every launcher accepts (quirks live in the flags
# below, not in extra options). Order is informational.
ADAPTER_CLI_CONTRACT: tuple[str, ...] = (
"--lane",
"--task",
"--mode", # headless | interactive | receipt-only
"--prompt",
"--no-claim",
"--force",
Comment on lines +23 to +29
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Align the advertised adapter flags with launchers

The contract emitted by hapax-executor-capabilities --contract is not accepted by the new Codex headless adapter: hapax-codex-headless only parses --task, --no-claim, and --force before positional <cx-session> <governed-initial-message>, so a client following this machine-readable contract with --lane cx-red --prompt ... is rejected before launch. Either implement these flags in the adapters or stop advertising them as accepted by every launcher.

Useful? React with 👍 / 👎.

)

# Dispatch modes an executor can be launched in. ``receipt-only`` is a
# dispatch-level validation mode (no spawn), so it is not an executor capability.
LAUNCH_MODES: tuple[str, ...] = ("headless", "interactive")


class ExecutorCapabilities(BaseModel, frozen=True):
"""Machine-legible capability flags for one executor runtime."""

platform: str
modes: tuple[str, ...] # launchable dispatch modes
profiles: tuple[str, ...] # capability profiles the route table exposes
mutates: bool # can mutate source under governance
claims: bool # participates in the cc-task claim lease
hooks_wired: bool # the dispatch-launched path enforces governance hooks
headless: bool # has a genuine non-interactive (no tmux pane) path
read_only: bool = False # default posture is read-only
notes: str = ""

def supports(self, mode: str) -> bool:
return mode in self.modes


EXECUTOR_REGISTRY: dict[str, ExecutorCapabilities] = {
"claude": ExecutorCapabilities(
platform="claude",
modes=("headless", "interactive"),
profiles=("full", "opus", "sonnet"),
mutates=True,
claims=True,
hooks_wired=True,
headless=True,
notes="stream-json headless lane (hapax-claude-headless) + tmux interactive",
),
"codex": ExecutorCapabilities(
platform="codex",
modes=("headless",),
profiles=("full", "spark"),
mutates=True,
claims=True,
hooks_wired=True,
headless=True,
notes=(
"codex exec headless (hapax-codex-headless). The tmux pane (hapax-codex) "
"exists for direct interactive use but is not a governed dispatch route."
),
),
"gemini": ExecutorCapabilities(
platform="gemini",
modes=("headless",),
profiles=("full", "worker", "flash", "lite"),
mutates=False,
claims=False,
Comment on lines +81 to +83
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Expose Gemini worker mutation in capabilities

This capability record lists the worker profile but still reports mutates=False and claims=False, even though hapax-methodology-dispatch has a mutable gemini/headless/worker route and launches it with HAPAX_GEMINI_APPROVAL_MODE=auto_edit. Any cockpit or scheduler using the new machine-readable capabilities will incorrectly treat the governed Gemini worker path as read-only/non-claiming and can route or display it unsafely.

Useful? React with 👍 / 👎.

hooks_wired=True,
headless=True,
read_only=True,
notes="read-only/plan-mode by policy; the worker profile is a governed auto-edit exception",
),
"vibe": ExecutorCapabilities(
platform="vibe",
modes=("headless",),
profiles=("full",),
mutates=True,
claims=True,
hooks_wired=True,
headless=True,
notes="bounded one-shot headless worker lane",
),
"antigrav": ExecutorCapabilities(
platform="antigrav",
modes=("interactive",),
profiles=("full",),
mutates=True,
claims=True,
hooks_wired=True,
headless=False,
notes=(
"agy CLI interactive; PreToolUse gate wired via antigrav-hook-adapter "
"(#3802). Residual gap: the IDE Edit/Write surface is not gated."
),
),
}


def capabilities(platform: str) -> ExecutorCapabilities | None:
"""Return the capability flags for ``platform`` (None if unknown)."""
return EXECUTOR_REGISTRY.get(platform)


def supports_route(platform: str, mode: str) -> bool:
"""True when ``platform`` has a launchable adapter for ``mode``."""
caps = capabilities(platform)
return caps is not None and caps.supports(mode)


def capabilities_payload() -> dict[str, dict]:
"""The whole registry as JSON-serialisable flags (the ``capabilities`` probe)."""
return {name: caps.model_dump() for name, caps in sorted(EXECUTOR_REGISTRY.items())}


__all__ = [
"ADAPTER_CLI_CONTRACT",
"LAUNCH_MODES",
"EXECUTOR_REGISTRY",
"ExecutorCapabilities",
"capabilities",
"supports_route",
"capabilities_payload",
]
Loading
Loading