From dfd9144a32c3b743087a25a9de73c6298f171395 Mon Sep 17 00:00:00 2001 From: Oleksii Dolhov Date: Thu, 25 Jun 2026 10:49:07 +0300 Subject: [PATCH 1/2] fix(ci): guard the Alembic (Postgres) track in schema-parity (#1342) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The schema-parity required check validated only the SQLite track (migrations.py ↔ schema.py). A schema change that ships the SQLite migration but omits the Alembic revision under src/backend/migrations/versions/ passed every required check green yet broke PostgreSQL — init_database() runs alembic_runner.upgrade_to_head(), which applies revision files only and does not autogenerate from tables.py. Two PRs reached "green CI but PG-broken" and had to be held by hand. Add a cross-track guard, folded into the existing required schema-parity job (no new required-check to manage): - scripts/ci/check_alembic_parity.py — fails a PR that ADDS schema DDL to db/{migrations,schema,tables}.py without a net-new revision file under src/backend/migrations/versions/. Pure stdlib, PR-only (diffs base...head). - Heuristic / false-positive guard: the signal is a DDL keyword on an *added, non-comment* line (SQL: CREATE/ALTER/ADD COLUMN/…; SQLAlchemy: Column(/Table(/Index(/…). Comment edits, data-only and down migrations carry no DDL keyword, so they don't trip it. Documented in the script docstring and the workflow header. - tests/unit/test_alembic_parity_guard.py — 20 tests incl. the acceptance fixtures (SQLite-only change fails; dual-tracked passes; comment/data-only pass). Wired into the parity pytest run. Also notes the enforcement in architecture.md Invariant #3. Verified locally: 24 tests pass; end-to-end smoke across clean / SQLite-only (exit 1) / paired-revision (exit 0) scenarios behaves correctly. Related to #1342 Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/schema-parity.yml | 27 ++- docs/memory/architecture.md | 2 +- scripts/ci/check_alembic_parity.py | 207 +++++++++++++++++++++++ tests/unit/test_alembic_parity_guard.py | 209 ++++++++++++++++++++++++ 4 files changed, 442 insertions(+), 3 deletions(-) create mode 100755 scripts/ci/check_alembic_parity.py create mode 100644 tests/unit/test_alembic_parity_guard.py diff --git a/.github/workflows/schema-parity.yml b/.github/workflows/schema-parity.yml index 6a73f7daf..42d6e06e1 100644 --- a/.github/workflows/schema-parity.yml +++ b/.github/workflows/schema-parity.yml @@ -17,6 +17,17 @@ name: Schema Parity Gate # doing work. When a relevant file changes, the real test runs and can fail. # Change detection is pure `git diff` — no third-party action and no token # scope beyond `contents: read`. +# +# CROSS-TRACK ALEMBIC GUARD (#1342): the pytest above guards only the SQLite +# track. PostgreSQL is a SECOND track — `init_database()` runs +# `alembic_runner.upgrade_to_head()`, which applies revision files only (it does +# NOT autogenerate from `tables.py`). A SQLite-only schema change therefore +# merges green yet PG-broken. The extra step below (`scripts/ci/check_alembic_parity.py`) +# fails a PR that ADDS schema DDL to db/{migrations,schema,tables}.py without a +# net-new revision under src/backend/migrations/versions/. Heuristic & false- +# positive guard (comments / data-only / down migrations carry no DDL keyword, +# so they don't trip) are documented in that script's module docstring. PR-only +# (it needs the PR base/head to diff); pure stdlib, no extra deps. on: pull_request: @@ -24,9 +35,12 @@ on: branches: [main, dev] paths: - 'src/backend/db/**' + - 'src/backend/migrations/**' - 'src/backend/database.py' - 'src/backend/utils/helpers.py' - 'tests/unit/test_schema_parity.py' + - 'tests/unit/test_alembic_parity_guard.py' + - 'scripts/ci/check_alembic_parity.py' - 'tests/requirements-test.txt' - '.github/workflows/schema-parity.yml' workflow_dispatch: @@ -42,6 +56,15 @@ jobs: with: fetch-depth: 0 # need base history to diff the PR's changed paths + # Cross-track guard (#1342): SQLite-track DDL must ship a paired Alembic + # revision. PR-only — needs the PR base/head SHAs to diff. Pure stdlib. + - name: Alembic (Postgres) parity guard + if: github.event_name == 'pull_request' + env: + PR_BASE_SHA: ${{ github.event.pull_request.base.sha }} + PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} + run: python3 scripts/ci/check_alembic_parity.py + - name: Detect schema-relevant changes id: changes env: @@ -64,7 +87,7 @@ jobs: fi CHANGED="$(git diff --name-only "$PR_BASE_SHA...$PR_HEAD_SHA")" echo "Changed files in PR:"; echo "$CHANGED" - if printf '%s\n' "$CHANGED" | grep -qE '^(src/backend/db/|src/backend/database\.py$|src/backend/utils/helpers\.py$|tests/unit/test_schema_parity\.py$|tests/requirements-test\.txt$|\.github/workflows/schema-parity\.yml$)'; then + if printf '%s\n' "$CHANGED" | grep -qE '^(src/backend/db/|src/backend/database\.py$|src/backend/utils/helpers\.py$|tests/unit/test_schema_parity\.py$|tests/unit/test_alembic_parity_guard\.py$|scripts/ci/check_alembic_parity\.py$|tests/requirements-test\.txt$|\.github/workflows/schema-parity\.yml$)'; then echo "relevant=true" >> "$GITHUB_OUTPUT" echo "schema-relevant files changed — running the parity check." else @@ -98,4 +121,4 @@ jobs: REDIS_URL: "redis://test:test@redis:6379" REDIS_PASSWORD: "test" REDIS_BACKEND_PASSWORD: "test" - run: pytest tests/unit/test_schema_parity.py -v --tb=short + run: pytest tests/unit/test_schema_parity.py tests/unit/test_alembic_parity_guard.py -v --tb=short diff --git a/docs/memory/architecture.md b/docs/memory/architecture.md index 25cd0115d..5e2b4ff98 100644 --- a/docs/memory/architecture.md +++ b/docs/memory/architecture.md @@ -785,7 +785,7 @@ These are structural patterns that must be preserved. Breaking them causes casca 2. **DB Layer: Class-per-domain with Mixin Composition** — Each `db/` file defines an `XOperations` class. Agent-specific settings use mixins (`db/agent_settings/`) composed into `AgentOperations`. New agent settings → new mixin, not a bigger class. -3. **Schema in `db/schema.py`, Migrations in `db/migrations.py`** — All OSS table DDL lives in `schema.py`. Schema changes require a versioned migration in `migrations.py` (tracked in the `schema_migrations` table). Never create tables ad-hoc in service code. **Runner safety (#1160):** `init_database()` wraps both migration passes + `init_schema` in a cross-process `flock` (`db/migration_lock.py`) so concurrent uvicorn workers + the scheduler container can't race the suite; table-rebuild migrations use `_atomic_rebuild` (rename-swap inside an explicit `BEGIN`/`COMMIT`) so a crash mid-rebuild rolls back instead of losing rows between an autocommitted `DROP` and the re-insert; a failed migration is named on its traceback (`add_note`) and surfaced as `first_pending` in the `/health` 503. **Backend split (#1183):** the `db/migrations.py` bespoke runner (PRAGMA + `INSERT OR IGNORE`) is **SQLite-only**. The PostgreSQL backend is owned by **Alembic** — `init_database()`'s non-SQLite branch calls `db/alembic_runner.upgrade_to_head()` (`src/backend/migrations/` + `alembic.ini`; `env.py` targets the `db/tables.py` MetaData). A fresh PG DB is built by the `0001_baseline` revision (which reuses the exact `init_schema_postgres` head DDL); a pre-Alembic PG DB is stamped at baseline, not rebuilt. The two systems coexist during the SQLite→Postgres transition, so a schema change currently lands in **both** `migrations.py` (SQLite) and a new Alembic revision (Postgres) until SQLite reaches **end-of-support on September 1, 2026** (decision #1278; migration guide `docs/migrations/SQLITE_TO_POSTGRES.md`) — after which the long-term goal (#746) is `tables.py` MetaData as the single source with autogenerated revisions. **Two-track migrations (open-core):** enterprise modules own only `enterprise_*` tables and migrate them through a **separate** runner (`enterprise/backend/_migrations.py`) tracked in `enterprise_schema_migrations` — never the OSS `schema_migrations`, so the two version-lines can't collide. Enterprise authors one file per migration in the module's `migrations/` package (`NNNN_slug.py` with `NAME` + `upgrade(cursor, conn)`, auto-discovered in filename order). Enterprise migrations may FK-into OSS tables but must **never ALTER** an OSS table — anything OSS must enforce goes through an OSS migration as an edition-agnostic primitive (e.g. `users.suspended_at`, #995). The enterprise runner is invoked from `register_enterprise` *after* OSS `init_database`, so OSS tables already exist. +3. **Schema in `db/schema.py`, Migrations in `db/migrations.py`** — All OSS table DDL lives in `schema.py`. Schema changes require a versioned migration in `migrations.py` (tracked in the `schema_migrations` table). Never create tables ad-hoc in service code. **Runner safety (#1160):** `init_database()` wraps both migration passes + `init_schema` in a cross-process `flock` (`db/migration_lock.py`) so concurrent uvicorn workers + the scheduler container can't race the suite; table-rebuild migrations use `_atomic_rebuild` (rename-swap inside an explicit `BEGIN`/`COMMIT`) so a crash mid-rebuild rolls back instead of losing rows between an autocommitted `DROP` and the re-insert; a failed migration is named on its traceback (`add_note`) and surfaced as `first_pending` in the `/health` 503. **Backend split (#1183):** the `db/migrations.py` bespoke runner (PRAGMA + `INSERT OR IGNORE`) is **SQLite-only**. The PostgreSQL backend is owned by **Alembic** — `init_database()`'s non-SQLite branch calls `db/alembic_runner.upgrade_to_head()` (`src/backend/migrations/` + `alembic.ini`; `env.py` targets the `db/tables.py` MetaData). A fresh PG DB is built by the `0001_baseline` revision (which reuses the exact `init_schema_postgres` head DDL); a pre-Alembic PG DB is stamped at baseline, not rebuilt. The two systems coexist during the SQLite→Postgres transition, so a schema change currently lands in **both** `migrations.py` (SQLite) and a new Alembic revision (Postgres) — both tracks CI-guarded by the `schema-parity` job (SQLite parity pytest + the `scripts/ci/check_alembic_parity.py` cross-track guard that fails a DDL change missing its Alembic revision, #1342) — until SQLite reaches **end-of-support on September 1, 2026** (decision #1278; migration guide `docs/migrations/SQLITE_TO_POSTGRES.md`) — after which the long-term goal (#746) is `tables.py` MetaData as the single source with autogenerated revisions. **Two-track migrations (open-core):** enterprise modules own only `enterprise_*` tables and migrate them through a **separate** runner (`enterprise/backend/_migrations.py`) tracked in `enterprise_schema_migrations` — never the OSS `schema_migrations`, so the two version-lines can't collide. Enterprise authors one file per migration in the module's `migrations/` package (`NNNN_slug.py` with `NAME` + `upgrade(cursor, conn)`, auto-discovered in filename order). Enterprise migrations may FK-into OSS tables but must **never ALTER** an OSS table — anything OSS must enforce goes through an OSS migration as an edition-agnostic primitive (e.g. `users.suspended_at`, #995). The enterprise runner is invoked from `register_enterprise` *after* OSS `init_database`, so OSS tables already exist. 4. **Router Registration Order Matters** — In `main.py`, static routes like `/api/agents/context-stats` must come before `/{name}` catch-all. New collection-level agent endpoints must be registered before parameterized routes. diff --git a/scripts/ci/check_alembic_parity.py b/scripts/ci/check_alembic_parity.py new file mode 100755 index 000000000..575b1aff2 --- /dev/null +++ b/scripts/ci/check_alembic_parity.py @@ -0,0 +1,207 @@ +#!/usr/bin/env python3 +"""Cross-track schema-parity guard: SQLite DDL change ⇒ Alembic revision (#1342). + +The `schema-parity` pytest suite only validates the **SQLite** track +(`db/migrations.py` ↔ `db/schema.py`). Per Architectural Invariant #3, every +schema change must ALSO ship a new **Alembic** revision under +`src/backend/migrations/versions/` — otherwise PostgreSQL breaks, because +`init_database()` runs `alembic_runner.upgrade_to_head()`, which applies +revision files only (it does not autogenerate from `tables.py`). With no guard, +a SQLite-only schema change merges green yet PG-broken. + +Heuristic (documented for the false-positive guard, AC #4): + + A PR that ADDS data-definition (DDL) to any of + `src/backend/db/{migrations.py,schema.py,tables.py}` MUST also ADD a net-new + file under `src/backend/migrations/versions/`. + + "DDL" = an *added* (`+`), *non-comment* diff line carrying a schema keyword: + - SQL DDL (migrations.py / schema.py): CREATE TABLE/INDEX/TRIGGER, + ALTER TABLE, ADD/DROP COLUMN, RENAME COLUMN/TO, DROP TABLE/INDEX. + - SQLAlchemy DDL (tables.py): Column(/Table(/Index(/UniqueConstraint(/ + ForeignKey(/PrimaryKeyConstraint(. + + Because the signal is a DDL *token on an added line*, it does NOT trip on: + - comment / docstring-only edits (comment lines are skipped), + - data-only or down migrations (UPDATE/INSERT/DELETE carry no DDL keyword), + - edits that only touch an existing migration's body without new DDL. + + A real column/table add always emits `ADD COLUMN` / `CREATE TABLE` (in + migrations.py) or a new `Column(`/`Table(` (in tables.py), so it is caught. + A schema.py-only DDL edit that forgets the SQLite migration is caught + separately (red) by the existing schema-parity pytest, which then forces the + migration — and this guard then forces the revision. + +Usage: + check_alembic_parity.py + # or, with no args, reads PR_BASE_SHA / PR_HEAD_SHA from the environment. + +Exit codes: 0 = pass (no DDL, or DDL + revision present), 1 = missing revision, +2 = usage / git error. Pure stdlib; no third-party deps. +""" +from __future__ import annotations + +import os +import re +import subprocess +import sys + +# Files whose DDL changes require a paired Alembic revision. +SQL_DDL_FILES = ("src/backend/db/migrations.py", "src/backend/db/schema.py") +SQLALCHEMY_DDL_FILE = "src/backend/db/tables.py" +WATCHED_FILES = (*SQL_DDL_FILES, SQLALCHEMY_DDL_FILE) + +# Net-new revision target. +REVISION_DIR = "src/backend/migrations/versions/" + +# Raw-SQL DDL keywords (case-insensitive). Anchored on word boundaries so e.g. +# "created_at" does not match "CREATE". +_SQL_DDL = re.compile( + r"\b(" + r"CREATE\s+(?:UNIQUE\s+)?INDEX" + r"|CREATE\s+TABLE" + r"|CREATE\s+TRIGGER" + r"|ALTER\s+TABLE" + r"|ADD\s+COLUMN" + r"|DROP\s+COLUMN" + r"|DROP\s+TABLE" + r"|DROP\s+INDEX" + r"|RENAME\s+COLUMN" + r"|RENAME\s+TO" + r")\b", + re.IGNORECASE, +) + +# SQLAlchemy schema constructs in tables.py (the Alembic MetaData source). +_SQLALCHEMY_DDL = re.compile( + r"\b(Column|Table|Index|UniqueConstraint|ForeignKey|PrimaryKeyConstraint)\s*\(" +) + + +def _patterns_for(path: str) -> re.Pattern | None: + if path == SQLALCHEMY_DDL_FILE: + return _SQLALCHEMY_DDL + if path in SQL_DDL_FILES: + return _SQL_DDL + return None + + +def _is_comment(content: str) -> bool: + """A line we treat as a comment for DDL-detection purposes.""" + stripped = content.lstrip() + return stripped.startswith("#") + + +def iter_added_lines(diff_text: str): + """Yield (path, content) for every added line of a unified diff. + + Tracks the current file from `+++ b/` headers; skips the `+++`/`---` + headers themselves. Robust to multi-file diffs. + """ + current: str | None = None + for line in diff_text.splitlines(): + if line.startswith("+++ "): + target = line[4:].strip() + if target == "/dev/null": + current = None + else: + # strip the "b/" prefix git uses + current = target[2:] if target.startswith("b/") else target + continue + if line.startswith("--- ") or line.startswith("diff --git"): + continue + if line.startswith("+") and not line.startswith("+++"): + if current is not None: + yield current, line[1:] + + +def find_ddl_evidence(diff_text: str) -> list[str]: + """Return human-readable evidence strings for added DDL lines.""" + evidence: list[str] = [] + for path, content in iter_added_lines(diff_text): + pattern = _patterns_for(path) + if pattern is None or _is_comment(content): + continue + match = pattern.search(content) + if match: + evidence.append(f"{path}: + {content.strip()[:120]}") + return evidence + + +def diff_has_ddl(diff_text: str) -> bool: + return bool(find_ddl_evidence(diff_text)) + + +def added_revision_files(name_status_lines: list[str]) -> list[str]: + """Net-new (`A`) files under the Alembic versions dir, excluding __init__.""" + found: list[str] = [] + for raw in name_status_lines: + parts = raw.split("\t") + if len(parts) < 2: + continue + status, path = parts[0], parts[-1] + if not status.startswith("A"): + continue + if path.startswith(REVISION_DIR) and path.endswith(".py") and not path.endswith("__init__.py"): + found.append(path) + return found + + +def would_block(ddl_diff_text: str, name_status_lines: list[str]) -> bool: + """Core decision: DDL added on the SQLite track with no net-new revision.""" + return diff_has_ddl(ddl_diff_text) and not added_revision_files(name_status_lines) + + +def _git(args: list[str]) -> str: + return subprocess.run( + ["git", *args], check=True, capture_output=True, text=True + ).stdout + + +def main(argv: list[str]) -> int: + base = argv[1] if len(argv) > 2 else os.environ.get("PR_BASE_SHA", "") + head = argv[2] if len(argv) > 2 else os.environ.get("PR_HEAD_SHA", "") + null_sha = "0" * 40 + if not base or not head or base == null_sha: + print("alembic-parity: no PR base/head available — skipping (pass).") + return 0 + + rng = f"{base}...{head}" + try: + ddl_diff = _git(["diff", rng, "--", *WATCHED_FILES]) + name_status = _git(["diff", "--name-status", rng]).splitlines() + except subprocess.CalledProcessError as exc: + print(f"alembic-parity: git diff failed: {exc.stderr}", file=sys.stderr) + return 2 + + evidence = find_ddl_evidence(ddl_diff) + if not evidence: + print("alembic-parity: no SQLite/MetaData DDL added — nothing to guard (pass).") + return 0 + + revisions = added_revision_files(name_status) + print("alembic-parity: DDL change detected in this PR:") + for line in evidence[:20]: + print(f" • {line}") + if revisions: + print("alembic-parity: paired Alembic revision(s) added:") + for rev in revisions: + print(f" ✓ {rev}") + print("alembic-parity: PASS — both tracks updated.") + return 0 + + print( + "\nalembic-parity: FAIL — this PR adds schema DDL on the SQLite track but\n" + f"adds no new Alembic revision under {REVISION_DIR}\n" + "PostgreSQL applies revision files only (it does NOT autogenerate from\n" + "tables.py), so this change would break PG. Add a revision per\n" + "Architectural Invariant #3 (see docs/migrations/SQLITE_TO_POSTGRES.md).\n" + "If this is a data-only/down migration or a non-DDL edit, ensure the\n" + "added lines carry no DDL keyword (see the heuristic in this script).", + file=sys.stderr, + ) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv)) diff --git a/tests/unit/test_alembic_parity_guard.py b/tests/unit/test_alembic_parity_guard.py new file mode 100644 index 000000000..6117995e2 --- /dev/null +++ b/tests/unit/test_alembic_parity_guard.py @@ -0,0 +1,209 @@ +"""Unit tests for the cross-track Alembic parity guard (#1342). + +The guard fails a PR that adds SQLite-track schema DDL without a paired net-new +Alembic revision. These tests exercise the pure decision functions against +synthetic unified diffs — the same shapes the CI step feeds from `git diff` — +covering the acceptance criteria: + + * a SQLite-only schema change FAILS, + * a properly dual-tracked change PASSES, + * comment / data-only / down-migration edits do NOT trip it (false-positive guard). +""" +import importlib.util +from pathlib import Path + +import pytest + +# Load the guard module from scripts/ci/ (not an importable package). +_GUARD_PATH = Path(__file__).resolve().parents[2] / "scripts" / "ci" / "check_alembic_parity.py" +_spec = importlib.util.spec_from_file_location("check_alembic_parity", _GUARD_PATH) +guard = importlib.util.module_from_spec(_spec) +_spec.loader.exec_module(guard) + + +def _diff(path: str, added: list[str]) -> str: + """Build a minimal unified diff adding `added` lines to `path`.""" + body = "".join(f"+{line}\n" for line in added) + return ( + f"diff --git a/{path} b/{path}\n" + f"--- a/{path}\n" + f"+++ b/{path}\n" + "@@ -1,0 +1,%d @@\n" % len(added) + ) + body + + +NEW_REVISION = "A\tsrc/backend/migrations/versions/0005_agent_loops_new_cols.py" + + +# --- DDL detection: positives ------------------------------------------------- + +def test_add_column_in_migrations_is_ddl(): + diff = _diff( + "src/backend/db/migrations.py", + [' cursor.execute("ALTER TABLE agent_loops ADD COLUMN foo TEXT")'], + ) + assert guard.diff_has_ddl(diff) is True + + +def test_create_table_in_schema_is_ddl(): + diff = _diff( + "src/backend/db/schema.py", + [" CREATE TABLE widgets ("], + ) + assert guard.diff_has_ddl(diff) is True + + +def test_create_index_in_schema_is_ddl(): + diff = _diff( + "src/backend/db/schema.py", + [" CREATE INDEX idx_widgets_name ON widgets(name);"], + ) + assert guard.diff_has_ddl(diff) is True + + +def test_new_column_in_tables_metadata_is_ddl(): + diff = _diff( + "src/backend/db/tables.py", + [' Column("foo", Text),'], + ) + assert guard.diff_has_ddl(diff) is True + + +def test_new_table_in_tables_metadata_is_ddl(): + diff = _diff( + "src/backend/db/tables.py", + ["widgets = Table("], + ) + assert guard.diff_has_ddl(diff) is True + + +# --- DDL detection: negatives (false-positive guard) -------------------------- + +def test_comment_mentioning_alter_table_is_not_ddl(): + diff = _diff( + "src/backend/db/migrations.py", + [" # this migration would ALTER TABLE if it were not a no-op"], + ) + assert guard.diff_has_ddl(diff) is False + + +def test_data_only_migration_is_not_ddl(): + diff = _diff( + "src/backend/db/migrations.py", + [' cursor.execute("UPDATE agent_ownership SET require_email = 1")'], + ) + assert guard.diff_has_ddl(diff) is False + + +def test_column_body_line_without_keyword_is_not_ddl(): + # A column row inside an existing CREATE TABLE string (schema.py) carries no + # DDL keyword on its own; the paired migrations.py ADD COLUMN is the signal. + diff = _diff("src/backend/db/schema.py", [" new_col TEXT,"]) + assert guard.diff_has_ddl(diff) is False + + +def test_created_at_does_not_match_create(): + diff = _diff("src/backend/db/schema.py", [" created_at TEXT NOT NULL,"]) + assert guard.diff_has_ddl(diff) is False + + +def test_unrelated_file_ddl_is_ignored(): + # DDL-looking text in a non-watched file must not trip the guard. + diff = _diff("src/backend/services/foo.py", [" sql = 'ALTER TABLE x ADD COLUMN y'"]) + assert guard.diff_has_ddl(diff) is False + + +# --- revision detection ------------------------------------------------------- + +def test_added_revision_detected(): + assert guard.added_revision_files([NEW_REVISION]) == [ + "src/backend/migrations/versions/0005_agent_loops_new_cols.py" + ] + + +def test_modified_revision_does_not_count(): + line = "M\tsrc/backend/migrations/versions/0004_agent_ownership_voice_name.py" + assert guard.added_revision_files([line]) == [] + + +def test_versions_init_excluded(): + line = "A\tsrc/backend/migrations/versions/__init__.py" + assert guard.added_revision_files([line]) == [] + + +def test_rename_status_with_score_handled(): + # git emits e.g. "R100\told\tnew" — last column is the path; not an add. + line = "R100\tsrc/backend/migrations/versions/0004_x.py\tsrc/backend/migrations/versions/0004_y.py" + assert guard.added_revision_files([line]) == [] + + +# --- end-to-end decision (acceptance criteria) -------------------------------- + +def test_sqlite_only_schema_change_blocks(): + """AC: a SQLite-only schema change FAILS the check.""" + ddl = _diff( + "src/backend/db/migrations.py", + [' cursor.execute("ALTER TABLE agent_loops ADD COLUMN bar TEXT")'], + ) + name_status = ["M\tsrc/backend/db/migrations.py", "M\tsrc/backend/db/schema.py"] + assert guard.would_block(ddl, name_status) is True + + +def test_dual_tracked_change_passes(): + """AC: a properly dual-tracked change PASSES.""" + ddl = _diff( + "src/backend/db/migrations.py", + [' cursor.execute("ALTER TABLE agent_loops ADD COLUMN bar TEXT")'], + ) + name_status = [ + "M\tsrc/backend/db/migrations.py", + "M\tsrc/backend/db/schema.py", + "M\tsrc/backend/db/tables.py", + NEW_REVISION, + ] + assert guard.would_block(ddl, name_status) is False + + +def test_comment_only_change_passes(): + """AC: comment edits to watched files do not trip the guard.""" + ddl = _diff("src/backend/db/migrations.py", [" # tidy up a docstring; ALTER TABLE mention"]) + name_status = ["M\tsrc/backend/db/migrations.py"] + assert guard.would_block(ddl, name_status) is False + + +def test_data_only_migration_passes_without_revision(): + """AC: data-only migration (no DDL keyword) does not require a revision.""" + ddl = _diff( + "src/backend/db/migrations.py", + [' cursor.execute("DELETE FROM idempotency_keys WHERE created_at < ?")'], + ) + name_status = ["M\tsrc/backend/db/migrations.py"] + assert guard.would_block(ddl, name_status) is False + + +# --- diff parsing ------------------------------------------------------------- + +def test_iter_added_lines_tracks_file_and_skips_headers(): + diff = ( + _diff("src/backend/db/tables.py", [' Column("a", Text),']) + + _diff("src/backend/db/schema.py", [" CREATE TABLE z ("]) + ) + pairs = list(guard.iter_added_lines(diff)) + files = {p for p, _ in pairs} + assert files == {"src/backend/db/tables.py", "src/backend/db/schema.py"} + # the `+++ b/...` header lines must not be yielded as added content + assert all(not c.startswith("++") for _, c in pairs) + + +def test_evidence_lists_offending_lines(): + diff = _diff( + "src/backend/db/migrations.py", + [' cursor.execute("ALTER TABLE agent_loops ADD COLUMN bar TEXT")'], + ) + evidence = guard.find_ddl_evidence(diff) + assert len(evidence) == 1 + assert "ADD COLUMN" in evidence[0] + + +if __name__ == "__main__": + raise SystemExit(pytest.main([__file__, "-v"])) From fc4767687cb27edc12a6e1896f8e9cc993e23e7d Mon Sep 17 00:00:00 2001 From: Oleksii Dolhov Date: Thu, 25 Jun 2026 11:17:14 +0300 Subject: [PATCH 2/2] fix(ci): tighten Alembic guard to require a new MIGRATIONS entry (#1342) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verifying against real repo history surfaced a false positive: the migration-runner refactor #1263 (_atomic_rebuild table rebuilds) re-emits CREATE TABLE / CREATE INDEX for *existing* tables in a rename-swap but adds no actual schema and no new MIGRATIONS entry — yet the original "any added DDL keyword" heuristic flagged it, violating AC #4 (non-schema edits must not trip). Tighten the signal to two conjuncts: a schema change must (1) register a net-new ("name", _migrate_fn) entry in the MIGRATIONS list AND (2) carry a DDL keyword. Runner refactors / table rebuilds add no entry → exempt; data-only new migrations carry no DDL → exempt; real column/table adds do both → caught. Validated against real commits: • #740 agent_loops, #526 agent_ownership column → FAIL (correctly blocked) • #668 compat, voice_name (both shipped an Alembic revision) → PASS • #1263 runner refactor → PASS (false positive fixed) A full post-Alembic history scan finds 0 outstanding missing revisions, so no backfill is owed; the pre-Alembic columns are already in 0001_baseline. 28 tests pass (added MIGRATIONS-entry detection + the #1263 rebuild case). Related to #1342 Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/ci/check_alembic_parity.py | 84 ++++++++++++++++++------- tests/unit/test_alembic_parity_guard.py | 62 ++++++++++++++---- 2 files changed, 113 insertions(+), 33 deletions(-) diff --git a/scripts/ci/check_alembic_parity.py b/scripts/ci/check_alembic_parity.py index 575b1aff2..d1052f3f3 100755 --- a/scripts/ci/check_alembic_parity.py +++ b/scripts/ci/check_alembic_parity.py @@ -11,26 +11,32 @@ Heuristic (documented for the false-positive guard, AC #4): - A PR that ADDS data-definition (DDL) to any of - `src/backend/db/{migrations.py,schema.py,tables.py}` MUST also ADD a net-new - file under `src/backend/migrations/versions/`. - - "DDL" = an *added* (`+`), *non-comment* diff line carrying a schema keyword: - - SQL DDL (migrations.py / schema.py): CREATE TABLE/INDEX/TRIGGER, - ALTER TABLE, ADD/DROP COLUMN, RENAME COLUMN/TO, DROP TABLE/INDEX. - - SQLAlchemy DDL (tables.py): Column(/Table(/Index(/UniqueConstraint(/ - ForeignKey(/PrimaryKeyConstraint(. - - Because the signal is a DDL *token on an added line*, it does NOT trip on: + A PR that registers a NEW SQLite migration carrying schema DDL MUST also ADD + a net-new file under `src/backend/migrations/versions/`. Two conjuncts: + + (1) NEW MIGRATION — an added line in `db/migrations.py` registers a net-new + entry in the `MIGRATIONS` list: `("", _migrate_),`. This is the + unambiguous "I'm shipping a schema change" act; runner refactors, lock + changes, and table-rebuild helpers do NOT add an entry. + (2) DDL — an *added* (`+`), *non-comment* diff line in + `db/{migrations,schema,tables}.py` carries a schema keyword: + - SQL (migrations.py / schema.py): CREATE TABLE/INDEX/TRIGGER, + ALTER TABLE, ADD/DROP COLUMN, RENAME COLUMN/TO, DROP TABLE/INDEX. + - SQLAlchemy (tables.py): Column(/Table(/Index(/UniqueConstraint(/ + ForeignKey(/PrimaryKeyConstraint(. + + Requiring BOTH is what keeps the false-positive rate down — it does NOT trip on: - comment / docstring-only edits (comment lines are skipped), - - data-only or down migrations (UPDATE/INSERT/DELETE carry no DDL keyword), - - edits that only touch an existing migration's body without new DDL. + - data-only or down migrations (a new entry, but no DDL keyword), + - migration-runner refactors / table REBUILDS that re-emit CREATE TABLE for + an *existing* table without registering a new migration (conjunct 1 false). - A real column/table add always emits `ADD COLUMN` / `CREATE TABLE` (in - migrations.py) or a new `Column(`/`Table(` (in tables.py), so it is caught. - A schema.py-only DDL edit that forgets the SQLite migration is caught - separately (red) by the existing schema-parity pytest, which then forces the - migration — and this guard then forces the revision. + A real column/table add always registers a new `MIGRATIONS` entry AND emits + `ADD COLUMN` / `CREATE TABLE` (migrations.py) or a new `Column(`/`Table(` + (tables.py), so it is caught. A schema.py-only DDL edit that forgets the + SQLite migration is caught separately (red) by the existing schema-parity + pytest, which then forces the migration — and this guard then forces the + revision. Usage: check_alembic_parity.py @@ -77,6 +83,11 @@ r"\b(Column|Table|Index|UniqueConstraint|ForeignKey|PrimaryKeyConstraint)\s*\(" ) +# A net-new entry in the `MIGRATIONS = [...]` list in migrations.py: +# ("agent_loops_tables", _migrate_agent_loops_tables), +# Trinity registers every migration here and names the fn `_migrate_*`. +_MIGRATIONS_ENTRY = re.compile(r"""\(\s*["'][^"']+["']\s*,\s*_migrate_\w+""") + def _patterns_for(path: str) -> re.Pattern | None: if path == SQLALCHEMY_DDL_FILE: @@ -132,6 +143,17 @@ def diff_has_ddl(diff_text: str) -> bool: return bool(find_ddl_evidence(diff_text)) +def added_migration_entries(diff_text: str) -> list[str]: + """Net-new `MIGRATIONS` list entries added in db/migrations.py.""" + entries: list[str] = [] + for path, content in iter_added_lines(diff_text): + if path != "src/backend/db/migrations.py" or _is_comment(content): + continue + if _MIGRATIONS_ENTRY.search(content): + entries.append(content.strip()) + return entries + + def added_revision_files(name_status_lines: list[str]) -> list[str]: """Net-new (`A`) files under the Alembic versions dir, excluding __init__.""" found: list[str] = [] @@ -147,9 +169,14 @@ def added_revision_files(name_status_lines: list[str]) -> list[str]: return found +def is_schema_change(ddl_diff_text: str) -> bool: + """A new SQLite migration registering schema DDL (conjuncts 1 AND 2).""" + return bool(added_migration_entries(ddl_diff_text)) and diff_has_ddl(ddl_diff_text) + + def would_block(ddl_diff_text: str, name_status_lines: list[str]) -> bool: - """Core decision: DDL added on the SQLite track with no net-new revision.""" - return diff_has_ddl(ddl_diff_text) and not added_revision_files(name_status_lines) + """Core decision: a DDL-bearing new migration with no net-new revision.""" + return is_schema_change(ddl_diff_text) and not added_revision_files(name_status_lines) def _git(args: list[str]) -> str: @@ -174,13 +201,26 @@ def main(argv: list[str]) -> int: print(f"alembic-parity: git diff failed: {exc.stderr}", file=sys.stderr) return 2 + new_migrations = added_migration_entries(ddl_diff) evidence = find_ddl_evidence(ddl_diff) + if not new_migrations: + print( + "alembic-parity: no net-new MIGRATIONS entry — runner refactor / rebuild /\n" + "non-schema edit, nothing to guard (pass)." + ) + return 0 if not evidence: - print("alembic-parity: no SQLite/MetaData DDL added — nothing to guard (pass).") + print( + "alembic-parity: new migration is data-only (no DDL keyword) — " + "no Alembic revision required (pass)." + ) return 0 revisions = added_revision_files(name_status) - print("alembic-parity: DDL change detected in this PR:") + print("alembic-parity: new schema migration detected in this PR:") + for entry in new_migrations[:10]: + print(f" + {entry}") + print("alembic-parity: carrying DDL:") for line in evidence[:20]: print(f" • {line}") if revisions: diff --git a/tests/unit/test_alembic_parity_guard.py b/tests/unit/test_alembic_parity_guard.py index 6117995e2..034f07faa 100644 --- a/tests/unit/test_alembic_parity_guard.py +++ b/tests/unit/test_alembic_parity_guard.py @@ -34,6 +34,15 @@ def _diff(path: str, added: list[str]) -> str: NEW_REVISION = "A\tsrc/backend/migrations/versions/0005_agent_loops_new_cols.py" +# A net-new MIGRATIONS list entry — the "I'm shipping a schema change" signal. +MIGRATIONS_ENTRY = ' ("agent_loops_new_cols", _migrate_agent_loops_new_cols),' + + +def _migration_diff(ddl_lines: list[str], *, entry: bool = True) -> str: + """A migrations.py diff: optional new MIGRATIONS entry + DDL body lines.""" + added = ([MIGRATIONS_ENTRY] if entry else []) + ddl_lines + return _diff("src/backend/db/migrations.py", added) + # --- DDL detection: positives ------------------------------------------------- @@ -137,13 +146,29 @@ def test_rename_status_with_score_handled(): assert guard.added_revision_files([line]) == [] +# --- MIGRATIONS-entry detection (conjunct 1) ---------------------------------- + +def test_new_migrations_entry_detected(): + diff = _diff("src/backend/db/migrations.py", [MIGRATIONS_ENTRY]) + assert guard.added_migration_entries(diff) == [MIGRATIONS_ENTRY.strip()] + + +def test_commented_migrations_entry_not_detected(): + diff = _diff("src/backend/db/migrations.py", [' # ("old", _migrate_old),']) + assert guard.added_migration_entries(diff) == [] + + +def test_entry_in_other_file_not_detected(): + diff = _diff("src/backend/db/schema.py", [' ("x", _migrate_x),']) + assert guard.added_migration_entries(diff) == [] + + # --- end-to-end decision (acceptance criteria) -------------------------------- def test_sqlite_only_schema_change_blocks(): - """AC: a SQLite-only schema change FAILS the check.""" - ddl = _diff( - "src/backend/db/migrations.py", - [' cursor.execute("ALTER TABLE agent_loops ADD COLUMN bar TEXT")'], + """AC: a SQLite-only schema change (new migration + DDL) FAILS the check.""" + ddl = _migration_diff( + [' cursor.execute("ALTER TABLE agent_loops ADD COLUMN bar TEXT")'] ) name_status = ["M\tsrc/backend/db/migrations.py", "M\tsrc/backend/db/schema.py"] assert guard.would_block(ddl, name_status) is True @@ -151,9 +176,8 @@ def test_sqlite_only_schema_change_blocks(): def test_dual_tracked_change_passes(): """AC: a properly dual-tracked change PASSES.""" - ddl = _diff( - "src/backend/db/migrations.py", - [' cursor.execute("ALTER TABLE agent_loops ADD COLUMN bar TEXT")'], + ddl = _migration_diff( + [' cursor.execute("ALTER TABLE agent_loops ADD COLUMN bar TEXT")'] ) name_status = [ "M\tsrc/backend/db/migrations.py", @@ -172,12 +196,28 @@ def test_comment_only_change_passes(): def test_data_only_migration_passes_without_revision(): - """AC: data-only migration (no DDL keyword) does not require a revision.""" - ddl = _diff( - "src/backend/db/migrations.py", - [' cursor.execute("DELETE FROM idempotency_keys WHERE created_at < ?")'], + """AC: data-only migration (new entry, no DDL keyword) needs no revision.""" + ddl = _migration_diff( + [' cursor.execute("DELETE FROM idempotency_keys WHERE created_at < ?")'] + ) + name_status = ["M\tsrc/backend/db/migrations.py"] + assert guard.would_block(ddl, name_status) is False + + +def test_runner_refactor_with_rebuild_ddl_passes(): + """Real #1263 class: a migration-runner refactor that re-emits CREATE TABLE + for an existing table (table rebuild) but registers NO new MIGRATIONS entry + must NOT trip the guard, even though it carries DDL keywords.""" + ddl = _migration_diff( + [ + " CREATE TABLE agent_sharing_new (", + ' cursor.execute(f"ALTER TABLE {new} RENAME TO {table}")', + ' "CREATE INDEX IF NOT EXISTS idx_agent_skills_agent ON agent_skills(agent_name)",', + ], + entry=False, # the refactor adds no new MIGRATIONS entry ) name_status = ["M\tsrc/backend/db/migrations.py"] + assert guard.is_schema_change(ddl) is False assert guard.would_block(ddl, name_status) is False