diff --git a/crates/loomweave-cli/src/doctor.rs b/crates/loomweave-cli/src/doctor.rs index dd3d29e2..37749a9a 100644 --- a/crates/loomweave-cli/src/doctor.rs +++ b/crates/loomweave-cli/src/doctor.rs @@ -85,6 +85,7 @@ pub fn run(path: &Path, fix: bool, json_output: bool) -> Result { tally += check_db_tracked(&project_root, fix); tally += check_gitignore_current(&project_root, fix); tally += check_loomweave_dir(&project_root); + tally += check_index_integrity(&project_root, fix); println!("--- llm ---"); tally += check_llm_provider(&project_root); @@ -170,6 +171,7 @@ impl DoctorJsonCheck { fn json_report(project_root: &Path, fix: bool) -> DoctorJsonReport { let mut checks = vec![ check_loomweave_dir_json(project_root), + check_index_integrity_json(project_root, fix), check_index_freshness_json(project_root), check_plugin_availability_json(), check_skill_json(project_root, fix), @@ -371,6 +373,219 @@ fn check_loomweave_dir(project_root: &Path) -> Tally { } } +/// Outcome of the index-integrity check (clarion-abda98c869 recovery). Shared by +/// the text and JSON paths so they cannot drift. +enum IntegrityOutcome { + /// No healthy index to check — the `.weft/loomweave.schema` check owns that + /// state; integrity stays silent rather than double-reporting. + Skipped, + Healthy, + /// Corruption found, `--fix` not requested. + Found { + stale: usize, + mismatches: usize, + sample: Vec, + }, + /// `--fix` ran and fully restored integrity. + Repaired { + removed_files: usize, + removed_entities: usize, + }, + /// `--fix` removed stale rows but residual corruption remains (needs a full + /// re-analyze), or repair could not run. + ResidualAfterFix { + removed_files: usize, + removed_entities: usize, + residual: usize, + }, + /// Opening/repairing the DB errored (e.g. busy under a running `serve`). + Error(String), +} + +/// Detect (and, under `--fix`, repair) index-integrity corruption: stale +/// vanished-from-disk file entities and the `LMWV-INFRA-PARENT-CONTAINS-MISMATCH` +/// invariant violations a file→package refactor leaves behind. Only runs on a +/// healthy, migrated index (the schema check owns the other states). +fn index_integrity_outcome(project_root: &Path, fix: bool) -> IntegrityOutcome { + if !matches!( + classify_index_db_health(project_root), + IndexDbHealth::Healthy + ) { + return IntegrityOutcome::Skipped; + } + let db_path = loomweave_core::store::db_path(project_root); + + if fix { + match repair_index_integrity(&db_path, project_root) { + Ok(report) => { + let residual = report.residual.stale_file_entities.len() + + report.residual.parent_contains_mismatches.len(); + if residual == 0 { + IntegrityOutcome::Repaired { + removed_files: report.removed_file_entities, + removed_entities: report.removed_entities_total, + } + } else { + IntegrityOutcome::ResidualAfterFix { + removed_files: report.removed_file_entities, + removed_entities: report.removed_entities_total, + residual, + } + } + } + Err(err) => IntegrityOutcome::Error(err.to_string()), + } + } else { + match check_index_integrity_readonly(&db_path, project_root) { + Ok(report) if report.is_healthy() => IntegrityOutcome::Healthy, + Ok(report) => { + let sample = report + .stale_file_entities + .iter() + .map(|s| format!("stale file: {}", s.path)) + .chain( + report + .parent_contains_mismatches + .iter() + .map(|m| m.detail.clone()), + ) + .take(3) + .collect(); + IntegrityOutcome::Found { + stale: report.stale_file_entities.len(), + mismatches: report.parent_contains_mismatches.len(), + sample, + } + } + Err(err) => IntegrityOutcome::Error(err.to_string()), + } + } +} + +fn check_index_integrity_readonly( + db_path: &Path, + project_root: &Path, +) -> Result { + let conn = Connection::open_with_flags(db_path, rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY) + .with_context(|| format!("open index {} read-only", db_path.display()))?; + loomweave_storage::pragma::apply_read_pragmas(&conn).map_err(|e| anyhow::anyhow!("{e}"))?; + loomweave_storage::integrity::check_integrity(&conn, project_root) + .map_err(|e| anyhow::anyhow!("{e}")) +} + +fn repair_index_integrity( + db_path: &Path, + project_root: &Path, +) -> Result { + let mut conn = Connection::open(db_path) + .with_context(|| format!("open index {} for repair", db_path.display()))?; + loomweave_storage::pragma::apply_write_pragmas(&conn).map_err(|e| anyhow::anyhow!("{e}"))?; + loomweave_storage::integrity::repair_integrity(&mut conn, project_root) + .map_err(|e| anyhow::anyhow!("{e}")) +} + +const INTEGRITY_REBUILD_HINT: &str = "stop any running `loomweave serve`, then run `loomweave analyze --no-incremental` \ + to fully rebuild the graph"; + +/// Text-path index-integrity check. +fn check_index_integrity(project_root: &Path, fix: bool) -> Tally { + match index_integrity_outcome(project_root, fix) { + IntegrityOutcome::Skipped => Tally::default(), + IntegrityOutcome::Healthy => { + ok("index integrity: no stale entities or parent/contains mismatches") + } + IntegrityOutcome::Found { + stale, + mismatches, + sample, + } => problem( + &format!( + "index integrity: {stale} stale file entit{} + {mismatches} parent/contains \ + mismatch{} (e.g. {})", + if stale == 1 { "y" } else { "ies" }, + if mismatches == 1 { "" } else { "es" }, + sample.first().map_or("—", String::as_str), + ), + Some("loomweave doctor --fix --path . (surgically removes stale rows)"), + ), + IntegrityOutcome::Repaired { + removed_files, + removed_entities, + } => ok(&format!( + "index integrity: repaired — removed {removed_files} stale file entit{} \ + ({removed_entities} entit{} total); index is now consistent", + if removed_files == 1 { "y" } else { "ies" }, + if removed_entities == 1 { "y" } else { "ies" }, + )), + IntegrityOutcome::ResidualAfterFix { + removed_files, + removed_entities, + residual, + } => problem( + &format!( + "index integrity: removed {removed_files} stale file entit{} ({removed_entities} \ + total) but {residual} violation{} remain that surgical repair cannot fix", + if removed_files == 1 { "y" } else { "ies" }, + if residual == 1 { "" } else { "s" }, + ), + Some(INTEGRITY_REBUILD_HINT), + ), + IntegrityOutcome::Error(err) => problem( + &format!("index integrity: check/repair failed: {err}"), + Some("ensure no `loomweave serve` holds the database, then retry"), + ), + } +} + +/// JSON-path twin of [`check_index_integrity`]. +fn check_index_integrity_json(project_root: &Path, fix: bool) -> DoctorJsonCheck { + const ID: &str = "index.integrity"; + match index_integrity_outcome(project_root, fix) { + IntegrityOutcome::Skipped => { + DoctorJsonCheck::ok(ID, "no healthy index to check (see .weft/loomweave.schema)") + } + IntegrityOutcome::Healthy => { + DoctorJsonCheck::ok(ID, "no stale entities or parent/contains mismatches") + } + IntegrityOutcome::Found { + stale, + mismatches, + sample, + } => DoctorJsonCheck::problem( + ID, + format!( + "{stale} stale file entities + {mismatches} parent/contains mismatches \ + (run with --fix to repair); examples: {}", + sample.join("; ") + ), + ), + IntegrityOutcome::Repaired { + removed_files, + removed_entities, + } => DoctorJsonCheck::fixed( + ID, + format!( + "repaired — removed {removed_files} stale file entities ({removed_entities} \ + entities total); index is now consistent" + ), + ), + IntegrityOutcome::ResidualAfterFix { + removed_files, + removed_entities, + residual, + } => DoctorJsonCheck::problem( + ID, + format!( + "removed {removed_files} stale file entities ({removed_entities} total) but \ + {residual} violations remain; {INTEGRITY_REBUILD_HINT}" + ), + ), + IntegrityOutcome::Error(err) => { + DoctorJsonCheck::problem(ID, format!("check/repair failed: {err}")) + } + } +} + /// Whether the regenerable runtime DB is committed to git. /// /// `loomweave.db` mutates on every `analyze`/`scan`; tracking it leaves a diff --git a/crates/loomweave-storage/src/integrity.rs b/crates/loomweave-storage/src/integrity.rs new file mode 100644 index 00000000..292ffe21 --- /dev/null +++ b/crates/loomweave-storage/src/integrity.rs @@ -0,0 +1,258 @@ +//! Index-integrity diagnosis and surgical repair (clarion-abda98c869 recovery +//! surface for `loomweave doctor --fix`). +//! +//! Two related corruptions are detected; the common, recoverable one is repaired: +//! +//! * **Stale file entities** — a `core:file:*` entity whose source path no longer +//! exists on disk (the file was deleted, renamed, or converted file↔package). +//! `entities` is cumulative and never run-pruned, so such rows linger until an +//! analyze run's SEI orphan pass retires them; until then their dangling +//! `contains` edges can violate the parent/contains invariant. +//! * **Parent/contains mismatches** — the `LMWV-INFRA-PARENT-CONTAINS-MISMATCH` +//! invariant (ADR-026 decision 2) the writer enforces at `CommitRun`. A +//! file→package refactor (`m.py` → `m/__init__.py`, *same* module qualname) +//! leaves a stale file entity whose `contains` edge competes with the new file's, +//! and the run aborts at phase3 before the orphan pass can clean up. +//! +//! Repair removes each stale file entity and every entity anchored to it +//! (`source_file_id`); their edges and tags cascade away (`ON DELETE CASCADE`, +//! `foreign_keys = ON`). This is the same retirement the analyze SEI pass +//! performs, applied proactively so a corrupted index becomes analysable again +//! without a full rebuild. The delete runs under `defer_foreign_keys = ON` so the +//! removal set need not be ordered; the deferred check at commit guarantees no +//! surviving row is left dangling. Mismatches not attributable to a stale file +//! (genuine writer corruption) are surfaced as *residual* for a full re-analyze. + +use std::collections::BTreeSet; +use std::path::Path; + +use rusqlite::Connection; + +use crate::error::Result; + +/// A `core:file:*` entity whose source path no longer exists on disk. +#[derive(Debug, Clone)] +pub struct StaleFileEntity { + pub id: String, + /// Best-effort display path (relative to the project root). + pub path: String, +} + +/// One violation of the parent/contains dual-encoding invariant. +#[derive(Debug, Clone)] +pub struct ParentContainsMismatch { + pub detail: String, +} + +/// The read-only integrity verdict for an index. +#[derive(Debug, Default)] +pub struct IntegrityReport { + pub stale_file_entities: Vec, + pub parent_contains_mismatches: Vec, +} + +impl IntegrityReport { + #[must_use] + pub fn is_healthy(&self) -> bool { + self.stale_file_entities.is_empty() && self.parent_contains_mismatches.is_empty() + } +} + +/// Outcome of a [`repair_integrity`] pass. +#[derive(Debug)] +pub struct RepairReport { + /// Number of stale (vanished-from-disk) file entities removed. + pub removed_file_entities: usize, + /// Total entities removed (the stale files plus everything anchored to them). + pub removed_entities_total: usize, + /// Integrity re-check after the repair. A non-healthy residual means + /// corruption that surgical orphan-removal cannot fix — a full re-analyze + /// (`loomweave analyze --no-incremental`) is required. + pub residual: IntegrityReport, +} + +/// Read-only integrity check. `project_root` resolves `core:file:*` entity paths +/// to decide whether a file still exists on disk. +/// +/// # Errors +/// +/// Returns [`crate::error::StorageError::Sqlite`] on any query failure. +pub fn check_integrity(conn: &Connection, project_root: &Path) -> Result { + Ok(IntegrityReport { + stale_file_entities: stale_file_entities(conn, project_root)?, + parent_contains_mismatches: parent_contains_mismatches(conn)?, + }) +} + +/// Surgically remove stale file entities (and everything anchored to them), then +/// re-check. Idempotent: a healthy index is left untouched. +/// +/// # Errors +/// +/// Returns [`crate::error::StorageError::Sqlite`] on any query/transaction +/// failure, including a deferred foreign-key violation at commit (which rolls the +/// repair back, leaving the index unchanged). +pub fn repair_integrity(conn: &mut Connection, project_root: &Path) -> Result { + let stale = stale_file_entities(conn, project_root)?; + let removed_file_entities = stale.len(); + let mut removed_entities_total = 0usize; + + if !stale.is_empty() { + let tx = conn.transaction()?; + // Defer FK checks to commit so the removal set need not be topologically + // ordered; cascade still fires for edges/tags as each entity is deleted. + tx.execute_batch("PRAGMA defer_foreign_keys = ON;")?; + + // Stage the removal set (stale file entities + everything anchored to + // them via `source_file_id`) so the null-out and delete share one set. + tx.execute_batch( + "CREATE TEMP TABLE IF NOT EXISTS __lw_to_delete (id TEXT PRIMARY KEY); \ + DELETE FROM __lw_to_delete;", + )?; + let mut to_delete: BTreeSet = BTreeSet::new(); + { + let mut child_stmt = tx.prepare("SELECT id FROM entities WHERE source_file_id = ?1")?; + for file in &stale { + to_delete.insert(file.id.clone()); + let kids = child_stmt.query_map([&file.id], |row| row.get::<_, String>(0))?; + for kid in kids { + to_delete.insert(kid?); + } + } + let mut ins = tx.prepare("INSERT OR IGNORE INTO __lw_to_delete (id) VALUES (?1)")?; + for id in &to_delete { + ins.execute([id])?; + } + } + + // Null the four NO-ACTION foreign-key columns into `entities(id)` that do + // NOT cascade, where a SURVIVING row would otherwise be left pointing at a + // deleted entity (`edges.from_id`/`to_id`, tags, taint, caches all cascade + // and need no handling). `source_file_id` always names a file entity, so + // nulling it drops only stale provenance; a dangling `parent_id` on a + // survivor is corruption itself (a moved child whose old parent vanished) + // and is cleared so a re-analyze can re-establish it. + tx.execute_batch( + "UPDATE entities SET parent_id = NULL \ + WHERE parent_id IN (SELECT id FROM __lw_to_delete) \ + AND id NOT IN (SELECT id FROM __lw_to_delete); \ + UPDATE entities SET source_file_id = NULL \ + WHERE source_file_id IN (SELECT id FROM __lw_to_delete) \ + AND id NOT IN (SELECT id FROM __lw_to_delete); \ + UPDATE edges SET source_file_id = NULL \ + WHERE source_file_id IN (SELECT id FROM __lw_to_delete); \ + UPDATE entity_unresolved_call_sites SET source_file_id = NULL \ + WHERE source_file_id IN (SELECT id FROM __lw_to_delete);", + )?; + + removed_entities_total = tx.execute( + "DELETE FROM entities WHERE id IN (SELECT id FROM __lw_to_delete)", + [], + )?; + tx.execute_batch("DROP TABLE __lw_to_delete;")?; + tx.commit()?; + } + + Ok(RepairReport { + removed_file_entities, + removed_entities_total, + residual: check_integrity(conn, project_root)?, + }) +} + +/// File entities whose source path is gone from disk. The canonical path is the +/// `core:file:` id (ADR-003); `source_file_path` is a fallback. +fn stale_file_entities(conn: &Connection, project_root: &Path) -> Result> { + let mut stmt = conn.prepare("SELECT id, source_file_path FROM entities WHERE kind = 'file'")?; + let rows = stmt.query_map([], |row| { + Ok((row.get::<_, String>(0)?, row.get::<_, Option>(1)?)) + })?; + let mut stale = Vec::new(); + for row in rows { + let (id, source_path) = row?; + if !file_entity_exists(&id, source_path.as_deref(), project_root) { + let path = source_path + .or_else(|| id.strip_prefix("core:file:").map(ToOwned::to_owned)) + .unwrap_or_else(|| id.clone()); + stale.push(StaleFileEntity { id, path }); + } + } + Ok(stale) +} + +/// Does the file backing a `core:file:*` entity still exist on disk? +fn file_entity_exists(id: &str, source_path: Option<&str>, project_root: &Path) -> bool { + if let Some(rel) = id.strip_prefix("core:file:") + && project_root.join(rel).exists() + { + return true; + } + if let Some(path) = source_path { + let raw = Path::new(path); + if raw.is_absolute() && raw.exists() { + return true; + } + if project_root.join(path).exists() { + return true; + } + } + false +} + +/// Both directions of the parent/contains dual-encoding invariant (mirrors the +/// writer's `CommitRun` check, ADR-026 decision 2), collecting *all* violations. +fn parent_contains_mismatches(conn: &Connection) -> Result> { + let mut out = Vec::new(); + + // Direction 1: every entity.parent_id has a matching `contains` edge from it. + let mut s1 = conn.prepare( + "SELECT e.id, e.parent_id, ce.from_id \ + FROM entities e \ + LEFT JOIN edges ce ON ce.kind = 'contains' AND ce.to_id = e.id \ + WHERE e.parent_id IS NOT NULL \ + AND (ce.from_id IS NULL OR ce.from_id != e.parent_id)", + )?; + let r1 = s1.query_map([], |row| { + Ok(( + row.get::<_, String>(0)?, + row.get::<_, Option>(1)?, + row.get::<_, Option>(2)?, + )) + })?; + for row in r1 { + let (eid, parent, ce_from) = row?; + out.push(ParentContainsMismatch { + detail: format!( + "entity {eid:?} declares parent_id={parent:?} but no matching `contains` \ + edge exists (closest contains.from_id={ce_from:?})" + ), + }); + } + + // Direction 2: every `contains` edge has a child whose parent_id matches. + let mut s2 = conn.prepare( + "SELECT ce.from_id, ce.to_id, e.parent_id \ + FROM edges ce \ + JOIN entities e ON e.id = ce.to_id \ + WHERE ce.kind = 'contains' \ + AND (e.parent_id IS NULL OR e.parent_id != ce.from_id)", + )?; + let r2 = s2.query_map([], |row| { + Ok(( + row.get::<_, String>(0)?, + row.get::<_, String>(1)?, + row.get::<_, Option>(2)?, + )) + })?; + for row in r2 { + let (from, to, parent) = row?; + out.push(ParentContainsMismatch { + detail: format!( + "contains edge ({from:?} -> {to:?}) has no matching child parent_id \ + (child.parent_id={parent:?})" + ), + }); + } + + Ok(out) +} diff --git a/crates/loomweave-storage/src/lib.rs b/crates/loomweave-storage/src/lib.rs index b1719fca..7773fc50 100644 --- a/crates/loomweave-storage/src/lib.rs +++ b/crates/loomweave-storage/src/lib.rs @@ -11,6 +11,7 @@ pub mod error; pub mod findings; pub mod glob; pub mod guidance; +pub mod integrity; pub mod pragma; pub mod prior_index; pub mod query; diff --git a/crates/loomweave-storage/tests/index_integrity.rs b/crates/loomweave-storage/tests/index_integrity.rs new file mode 100644 index 00000000..2b150bc2 --- /dev/null +++ b/crates/loomweave-storage/tests/index_integrity.rs @@ -0,0 +1,275 @@ +//! Index-integrity detection + surgical repair (clarion-abda98c869 recovery via +//! `loomweave doctor --fix`). Reproduces the file→package refactor corruption +//! (`m.py` becomes `m/__init__.py`, same module qualname) that leaves a stale +//! file entity whose `contains` edge trips `LMWV-INFRA-PARENT-CONTAINS-MISMATCH`. + +use std::path::Path; + +use loomweave_storage::integrity::{check_integrity, repair_integrity}; +use loomweave_storage::{pragma, schema}; +use rusqlite::{Connection, params}; + +/// Build a db seeded with the composer-style corruption against an on-disk tree +/// where only the new package (`.../mod/__init__.py`) exists and the old module +/// file (`.../mod.py`) has vanished. +fn seed(project_root: &Path) -> Connection { + // On-disk: the new package exists; the old file does not. + std::fs::create_dir_all(project_root.join("src/app/mod")).unwrap(); + std::fs::write(project_root.join("src/app/mod/__init__.py"), b"").unwrap(); + + let db_path = project_root.join("test.db"); + let mut conn = Connection::open(&db_path).unwrap(); + pragma::apply_write_pragmas(&conn).unwrap(); + schema::apply_migrations(&mut conn).unwrap(); + + let insert_entity = |conn: &Connection, + id: &str, + kind: &str, + parent: Option<&str>, + source_file_id: Option<&str>, + source_path: &str| { + conn.execute( + "INSERT INTO entities (id, plugin_id, kind, name, short_name, parent_id, \ + source_file_id, source_file_path, properties, created_at, updated_at) \ + VALUES (?1,?2,?3,?1,?1,?4,?5,?6,'{}','t','t')", + params![ + id, + if kind == "file" { "core" } else { "python" }, + kind, + parent, + source_file_id, + source_path + ], + ) + .unwrap(); + }; + let insert_contains = |conn: &Connection, from: &str, to: &str| { + conn.execute( + "INSERT INTO edges (kind, from_id, to_id, confidence) VALUES ('contains',?1,?2,'resolved')", + params![from, to], + ) + .unwrap(); + }; + + let old_file = "core:file:src/app/mod.py"; + let new_file = "core:file:src/app/mod/__init__.py"; + let module = "python:module:app.mod"; + let old_fn = "python:function:app.mod.legacy_helper"; + + // Both file entities exist in the cumulative index; only new_file is on disk. + insert_entity( + &conn, + old_file, + "file", + None, + Some(old_file), + "src/app/mod.py", + ); + insert_entity( + &conn, + new_file, + "file", + None, + Some(new_file), + "src/app/mod/__init__.py", + ); + // The module now anchors to the new __init__.py file (parent + source). + insert_entity( + &conn, + module, + "module", + Some(new_file), + Some(new_file), + "src/app/mod/__init__.py", + ); + // A stale function still anchored to the vanished old file. + insert_entity( + &conn, + old_fn, + "function", + Some(module), + Some(old_file), + "src/app/mod.py", + ); + + // Two contains edges into the module — the stale one is the invariant breaker. + insert_contains(&conn, old_file, module); // STALE (old_file vanished) + insert_contains(&conn, new_file, module); // valid + insert_contains(&conn, module, old_fn); // stale child + + conn +} + +#[test] +fn detects_stale_file_and_parent_contains_mismatch() { + let dir = tempfile::tempdir().unwrap(); + let conn = seed(dir.path()); + + let report = check_integrity(&conn, dir.path()).unwrap(); + + assert!(!report.is_healthy(), "corruption must be detected"); + // The vanished old file is the one stale file entity. + assert_eq!(report.stale_file_entities.len(), 1, "{report:?}"); + assert_eq!(report.stale_file_entities[0].id, "core:file:src/app/mod.py"); + // The stale contains edge trips the parent/contains invariant. + assert!( + !report.parent_contains_mismatches.is_empty(), + "parent/contains mismatch must be detected: {report:?}" + ); +} + +#[test] +fn repair_removes_stale_rows_and_restores_integrity() { + let dir = tempfile::tempdir().unwrap(); + let mut conn = seed(dir.path()); + + let repair = repair_integrity(&mut conn, dir.path()).unwrap(); + + assert_eq!(repair.removed_file_entities, 1, "{repair:?}"); + // old file + its stale child function removed. + assert_eq!(repair.removed_entities_total, 2, "{repair:?}"); + assert!( + repair.residual.is_healthy(), + "residual: {:?}", + repair.residual + ); + + // The surviving module + new file are intact; the stale rows are gone. + let exists = |id: &str| -> bool { + conn.query_row("SELECT 1 FROM entities WHERE id = ?1", params![id], |_| { + Ok(()) + }) + .is_ok() + }; + assert!(exists("python:module:app.mod"), "module must survive"); + assert!( + exists("core:file:src/app/mod/__init__.py"), + "new file must survive" + ); + assert!( + !exists("core:file:src/app/mod.py"), + "stale file must be gone" + ); + assert!( + !exists("python:function:app.mod.legacy_helper"), + "stale fn must be gone" + ); + + // The stale contains edge cascaded away; the valid one remains. + let contains_into_module: i64 = conn + .query_row( + "SELECT count(*) FROM edges WHERE kind='contains' AND to_id='python:module:app.mod'", + [], + |r| r.get(0), + ) + .unwrap(); + assert_eq!( + contains_into_module, 1, + "only the valid contains edge survives" + ); + + // Re-checking a now-clean index reports healthy. + assert!(check_integrity(&conn, dir.path()).unwrap().is_healthy()); +} + +#[test] +fn repair_nulls_dangling_edge_provenance_into_vanished_file() { + // The elspeth failure mode: an edge between two SURVIVING entities whose + // `source_file_id` points at a vanished file. `edges.source_file_id` is a + // NO-ACTION FK (no cascade), so naive deletion fails the FK check at commit. + // Repair must null the dangling provenance and keep the edge. + let dir = tempfile::tempdir().unwrap(); + std::fs::create_dir_all(dir.path().join("src/app")).unwrap(); + std::fs::write(dir.path().join("src/app/live.py"), b"").unwrap(); + let mut conn = Connection::open(dir.path().join("test.db")).unwrap(); + pragma::apply_write_pragmas(&conn).unwrap(); + schema::apply_migrations(&mut conn).unwrap(); + + let ent = |conn: &Connection, id: &str, kind: &str, sfi: &str| { + conn.execute( + "INSERT INTO entities (id, plugin_id, kind, name, short_name, source_file_id, \ + source_file_path, properties, created_at, updated_at) \ + VALUES (?1, ?2, ?3, ?1, ?1, ?4, 'p', '{}', 't', 't')", + params![ + id, + if kind == "file" { "core" } else { "python" }, + kind, + sfi + ], + ) + .unwrap(); + }; + let live_file = "core:file:src/app/live.py"; + let gone_file = "core:file:src/app/gone.py"; + ent(&conn, live_file, "file", live_file); + ent(&conn, gone_file, "file", gone_file); + // Two surviving functions in the live file… + ent(&conn, "python:function:app.live.a", "function", live_file); + ent(&conn, "python:function:app.live.b", "function", live_file); + // …with a calls edge whose provenance points at the vanished file. + conn.execute( + "INSERT INTO edges (kind, from_id, to_id, source_file_id, confidence) \ + VALUES ('calls','python:function:app.live.a','python:function:app.live.b',?1,'resolved')", + params![gone_file], + ) + .unwrap(); + + let repair = repair_integrity(&mut conn, dir.path()).unwrap(); + assert_eq!( + repair.removed_file_entities, 1, + "only the vanished file is removed" + ); + assert!(repair.residual.is_healthy(), "{:?}", repair.residual); + + // The edge survives (relationship preserved) with provenance nulled. + let (cnt, sfi): (i64, Option) = conn + .query_row( + "SELECT count(*), max(source_file_id) FROM edges WHERE kind='calls' \ + AND from_id='python:function:app.live.a'", + [], + |r| Ok((r.get(0)?, r.get(1)?)), + ) + .unwrap(); + assert_eq!(cnt, 1, "the cross-file edge must be preserved"); + assert_eq!(sfi, None, "its dangling provenance must be nulled"); +} + +#[test] +fn repair_is_a_noop_on_a_healthy_index() { + let dir = tempfile::tempdir().unwrap(); + // Healthy: a single on-disk file + its module, consistent parent/contains. + std::fs::create_dir_all(dir.path().join("src/app")).unwrap(); + std::fs::write(dir.path().join("src/app/clean.py"), b"").unwrap(); + let db_path = dir.path().join("test.db"); + let mut conn = Connection::open(&db_path).unwrap(); + pragma::apply_write_pragmas(&conn).unwrap(); + schema::apply_migrations(&mut conn).unwrap(); + conn.execute( + "INSERT INTO entities (id, plugin_id, kind, name, short_name, source_file_id, \ + source_file_path, properties, created_at, updated_at) \ + VALUES ('core:file:src/app/clean.py','core','file','f','f','core:file:src/app/clean.py',\ + 'src/app/clean.py','{}','t','t')", + [], + ) + .unwrap(); + conn.execute( + "INSERT INTO entities (id, plugin_id, kind, name, short_name, parent_id, source_file_id, \ + source_file_path, properties, created_at, updated_at) \ + VALUES ('python:module:app.clean','python','module','m','m','core:file:src/app/clean.py',\ + 'core:file:src/app/clean.py','src/app/clean.py','{}','t','t')", + [], + ) + .unwrap(); + conn.execute( + "INSERT INTO edges (kind, from_id, to_id, confidence) \ + VALUES ('contains','core:file:src/app/clean.py','python:module:app.clean','resolved')", + [], + ) + .unwrap(); + + assert!(check_integrity(&conn, dir.path()).unwrap().is_healthy()); + let repair = repair_integrity(&mut conn, dir.path()).unwrap(); + assert_eq!(repair.removed_file_entities, 0); + assert_eq!(repair.removed_entities_total, 0); + assert!(repair.residual.is_healthy()); +}