From 8a733289bb73e7a04f4501d3343fe490d0760301 Mon Sep 17 00:00:00 2001 From: John Morrissey <544926+tachyon-beep@users.noreply.github.com> Date: Fri, 26 Jun 2026 06:40:22 +1000 Subject: [PATCH] fix(storage): prune stale contains edge when a file_scope claim moves (clarion-abda98c869) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A file_scope entity's `parent_id` and its `contains` edge are dual encodings of one fact (ADR-026). When the claiming file moves between runs — a module/package dual-claim whose claimer flips (e.g. Python `colliding.py` vs `colliding/__init__.py` both declaring module `specimen.colliding`), or a genuine entity move — the new claimer sets `parent_id` + its own `contains` edge, but the PREVIOUS claimer's `contains` edge is never pruned. The two edges then disagree with the single `parent_id`, and `parent_contains_mismatch` aborts the entire run at flush/commit with LMWV-INFRA-PARENT-CONTAINS-MISMATCH. The collect_source_files sort (PR #57) only made the *move* case deterministic; the dual-claim case fails regardless of order, and any index already carrying a stale edge fails every subsequent full `analyze`. Fix: in the writer's `insert_entity`, after the entity upsert, prune any `contains` edge into the entity whose `from_id != parent_id` — in the same transaction as the parent write. `parent_id` is authoritative, so this only removes contradictions, never the matching edge (`from_id == parent_id`); a root entity (no parent_id) is left untouched. The result is order-independent (the matching edge is never pruned regardless of entity-vs-edge insert order) and self-healing: an already-corrupted index recovers on the next analysis that re-emits the entity. Verified: re-running the original failing repro (`analyze --no-incremental`) now completes and collapses the module's two stale contains edges to the single consistent one. Both existing parent-contains validations still reject genuinely-broken graphs (missing edge; orphan edge / no parent) — the prune fires only when parent_id is set and only deletes from_id != parent_id. Regression test (writer-level, deterministic, no file-order dependence): reclaiming_entity_under_a_new_parent_prunes_the_stale_contains_edge. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/loomweave-storage/src/writer.rs | 20 ++++ .../loomweave-storage/tests/writer_actor.rs | 113 ++++++++++++++++++ 2 files changed, 133 insertions(+) diff --git a/crates/loomweave-storage/src/writer.rs b/crates/loomweave-storage/src/writer.rs index 16edfac4..ba154683 100644 --- a/crates/loomweave-storage/src/writer.rs +++ b/crates/loomweave-storage/src/writer.rs @@ -752,6 +752,26 @@ fn insert_entity( entity.updated_at, ], )?; + // ADR-026 dual-encoding self-heal (clarion-abda98c869). `parent_id` is the + // authoritative parent; a `contains` edge into this entity from any OTHER + // node is a stale claim left when the claiming file moved — a file_scope + // module/package dual-claim whose claimer flipped between runs, or a + // relocated entity whose old file's contains edge outlived the move. Such an + // edge contradicts `parent_id` and trips `parent_contains_mismatch` at + // flush/commit, aborting the whole run. Prune it here, in the same + // transaction as the parent write, so parent_id and its single contains edge + // stay consistent regardless of intra-run file order — and an already + // corrupted index self-heals on the next analysis that re-emits the entity. + // The claiming file's own contains edge (from_id == parent_id) is never + // touched, so this only removes contradictions, never the matching edge; an + // entity with no parent_id (a root) is left entirely alone. + if let Some(parent_id) = entity.parent_id.as_deref() { + conn.execute( + "DELETE FROM edges \ + WHERE kind = 'contains' AND to_id = ?1 AND from_id != ?2", + params![entity.id, parent_id], + )?; + } conn.execute( "DELETE FROM entity_tags WHERE entity_id = ?1 AND plugin_id = ?2", params![entity.id, entity.plugin_id], diff --git a/crates/loomweave-storage/tests/writer_actor.rs b/crates/loomweave-storage/tests/writer_actor.rs index e633c3e5..4f87a13f 100644 --- a/crates/loomweave-storage/tests/writer_actor.rs +++ b/crates/loomweave-storage/tests/writer_actor.rs @@ -2697,6 +2697,119 @@ async fn parent_id_without_matching_contains_edge_rejects_run() { assert_eq!(entity_count, 0, "rejection must roll back entity inserts"); } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn reclaiming_entity_under_a_new_parent_prunes_the_stale_contains_edge() { + // clarion-abda98c869: when a file_scope entity's claim moves between files + // across runs (a module/package dual-claim flip, or a genuine move), the + // PREVIOUS claimer's `contains` edge must not linger and contradict the new + // `parent_id`. Re-inserting the entity with a new parent prunes any stale + // contains edge into it, keeping the ADR-026 dual encoding consistent — so + // the run commits instead of aborting with LMWV-INFRA-PARENT-CONTAINS-MISMATCH. + let dir = tempfile::tempdir().unwrap(); + let path = prepared_db(&dir); + let (writer, handle) = Writer::spawn(path.clone(), 50, 256).unwrap(); + let tx = writer.sender(); + + // Run 1: file A claims module m (parent=A, contains A->m). Consistent. + begin_demo_run(&tx, "run-1").await; + send::<()>(&tx, |ack| WriterCmd::InsertEntity { + entity: Box::new(make_file_entity_named("core:file:a.py", "a.py")), + ack, + }) + .await + .unwrap(); + let mut module_a = make_module_entity("python:module:m"); + module_a.parent_id = Some("core:file:a.py".to_owned()); + send::<()>(&tx, |ack| WriterCmd::InsertEntity { + entity: Box::new(module_a), + ack, + }) + .await + .unwrap(); + send::<()>(&tx, |ack| WriterCmd::InsertEdge { + edge: Box::new(make_contains_edge("core:file:a.py", "python:module:m")), + ack, + }) + .await + .unwrap(); + send::<()>(&tx, |ack| WriterCmd::CommitRun { + run_id: "run-1".into(), + status: RunStatus::Completed, + completed_at: now_iso(), + stats_json: "{}".into(), + ack, + }) + .await + .expect("run 1 is internally consistent"); + + // Run 2: file B re-claims module m (parent=B, contains B->m). The A->m edge + // from run 1 is stale and would trip the parent-contains validation; the + // re-claim must prune it rather than abort the run. + begin_demo_run(&tx, "run-2").await; + send::<()>(&tx, |ack| WriterCmd::InsertEntity { + entity: Box::new(make_file_entity_named("core:file:b.py", "b.py")), + ack, + }) + .await + .unwrap(); + let mut module_b = make_module_entity("python:module:m"); + module_b.parent_id = Some("core:file:b.py".to_owned()); + send::<()>(&tx, |ack| WriterCmd::InsertEntity { + entity: Box::new(module_b), + ack, + }) + .await + .unwrap(); + send::<()>(&tx, |ack| WriterCmd::InsertEdge { + edge: Box::new(make_contains_edge("core:file:b.py", "python:module:m")), + ack, + }) + .await + .unwrap(); + send::<()>(&tx, |ack| WriterCmd::CommitRun { + run_id: "run-2".into(), + status: RunStatus::Completed, + completed_at: now_iso(), + stats_json: "{}".into(), + ack, + }) + .await + .expect("re-claim must prune the stale contains edge and commit, not abort"); + + drop(tx); + drop(writer); + handle.await.unwrap().unwrap(); + + // The graph reflects the new claimer only: exactly one contains edge, from B, + // matching the module's parent_id. + let pool = ReaderPool::open(&path, 1).unwrap(); + let (froms, parent): (Vec, Option) = pool + .with_reader(|conn| { + let mut stmt = conn.prepare( + "SELECT from_id FROM edges \ + WHERE kind = 'contains' AND to_id = 'python:module:m' \ + ORDER BY from_id", + )?; + let froms = stmt + .query_map([], |row| row.get::<_, String>(0))? + .collect::, _>>()?; + let parent = conn.query_row( + "SELECT parent_id FROM entities WHERE id = 'python:module:m'", + [], + |row| row.get::<_, Option>(0), + )?; + Ok((froms, parent)) + }) + .await + .unwrap(); + assert_eq!( + froms, + ["core:file:b.py"], + "the stale A->m contains edge must be pruned; only B->m survives" + ); + assert_eq!(parent.as_deref(), Some("core:file:b.py")); +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn orphan_contains_edge_with_no_matching_parent_id_rejects_run() { // Inverse direction of parent-id consistency: a contains edge exists but