diff --git a/crates/loomweave-cli/src/analyze.rs b/crates/loomweave-cli/src/analyze.rs index 4e6bb6ee..48482635 100644 --- a/crates/loomweave-cli/src/analyze.rs +++ b/crates/loomweave-cli/src/analyze.rs @@ -3728,6 +3728,15 @@ fn host_finding_to_record( } fn host_finding_anchor_id(hf: &HostFinding, project_root: &Path, project_anchor: &str) -> String { + // Precedence: an explicit entity anchor (clarion-48af930f2a — the + // duplicate-locator finding anchors to the colliding entity so the shadow is + // queryable from the entity read path) wins; else a file-path anchor + // resolved to its core file entity; else the file-less project anchor. The + // entity-id anchor is taken verbatim — the FK is enforced at insert, and the + // emitter only sets it for an entity it has already streamed to the store. + if let Some(entity_id) = hf.metadata.get("anchor_entity_id") { + return entity_id.clone(); + } hf.metadata .get("anchor_file_path") .and_then(|path| core_file_entity_id(project_root, Path::new(path)).ok()) diff --git a/crates/loomweave-cli/src/analyze/duplicate_guard.rs b/crates/loomweave-cli/src/analyze/duplicate_guard.rs index 72d725d2..4d860904 100644 --- a/crates/loomweave-cli/src/analyze/duplicate_guard.rs +++ b/crates/loomweave-cli/src/analyze/duplicate_guard.rs @@ -38,8 +38,10 @@ use loomweave_core::HostFinding; /// Plugin-neutral rule id (deliberately not `LMWV-RUST-*`/`LMWV-PY-*`; the /// legacy plugin-prefixed names are a known wart, clarion-a65cb18b02). /// Severity is ERROR — see `infra_severity` — because the absorbed collision -/// is silent data loss. -pub(crate) const DUPLICATE_LOCATOR_RULE_ID: &str = "LMWV-DUPLICATE-LOCATOR"; +/// is silent data loss. The string lives in `loomweave-core` so the detector +/// here and the storage-side disclosure query agree on one constant +/// (clarion-48af930f2a). +pub(crate) use loomweave_core::DUPLICATE_LOCATOR_RULE_ID; /// Per-plugin, per-run duplicate-locator tracker. One `HashMap` /// across the run's entities is fine at the 100k-entity scale `analyze` @@ -150,13 +152,17 @@ impl DuplicateLocatorGuard { second_path.to_owned(), ); metadata.insert("shape".to_owned(), shape.as_str().to_owned()); - // Anchor the finding to a real file (the first-seen declaration) so it - // carries a `source_file_path` and reaches Filigree's scan-results emit. - // Without this, `host_finding_anchor_id` falls back to the file-less - // project anchor (`core:project:*`), which the emit skips as - // `skipped_no_path` — leaving the duplicate-locator lacuna untrackable - // in Filigree (the residual half of the dogfood's Friction A). - metadata.insert("anchor_file_path".to_owned(), first_path.to_owned()); + // Anchor the finding to the COLLIDING ENTITY (the survivor row), not a + // file (clarion-48af930f2a). `host_finding_anchor_id` prefers this key, + // so `findings.entity_id` becomes the chimeric id — which makes the + // shadow queryable from the entity read path (`entity_finding_list` / + // the `collision` projection in `entity_json`) instead of silently + // resolving to a clean row. The entity carries its own + // `source_file_path`, so the Filigree scan-results emit still gets a real + // path via `findings_for_emit`'s join (the file-anchor workaround for the + // dogfood's Friction A is now subsumed — the entity is always a better + // anchor than its file). + metadata.insert("anchor_entity_id".to_owned(), entity_id.to_owned()); Some(HostFinding { subcode: DUPLICATE_LOCATOR_RULE_ID.to_owned(), message: format!( diff --git a/crates/loomweave-cli/tests/duplicate_locator.rs b/crates/loomweave-cli/tests/duplicate_locator.rs index ce93bbd2..83c1d2f9 100644 --- a/crates/loomweave-cli/tests/duplicate_locator.rs +++ b/crates/loomweave-cli/tests/duplicate_locator.rs @@ -3,10 +3,12 @@ //! The writer absorbs a colliding entity id via `ON CONFLICT(id) DO UPDATE` //! (last-write-wins) — deliberately, because the absorption is load-bearing //! for incremental upserts. These tests assert that the host's analyze path -//! nevertheless SURFACES a collision as a project-level -//! `LMWV-DUPLICATE-LOCATOR` ERROR finding, and — just as important — that the -//! alarm stays silent on every legitimate-recurrence shape (unchanged -//! re-analysis, genuine moves, the clarion-6ec7317628 module dual-claim). +//! nevertheless SURFACES a collision as an entity-anchored +//! `LMWV-DUPLICATE-LOCATOR` ERROR finding (anchored to the colliding entity +//! since clarion-48af930f2a, so the shadow is queryable from the entity read +//! path), and — just as important — that the alarm stays silent on every +//! legitimate-recurrence shape (unchanged re-analysis, genuine moves, the +//! clarion-6ec7317628 module dual-claim). //! //! Driven through the fixture plugin's content-driven `gadget ` lines //! (each emits a `fixture:gadget:` entity) and the @@ -153,6 +155,23 @@ fn single_finding(conn: &Connection, rule_id: &str) -> (String, String, String) .expect("query finding") } +/// The `entity_id` anchor column of the single `rule_id` finding. +fn finding_entity_id(conn: &Connection, rule_id: &str) -> String { + conn.query_row( + "SELECT entity_id FROM findings WHERE rule_id = ?1", + [rule_id], + |row| row.get(0), + ) + .expect("query finding entity_id") +} + +fn entity_row_count(conn: &Connection, id: &str) -> i64 { + conn.query_row("SELECT COUNT(*) FROM entities WHERE id = ?1", [id], |row| { + row.get(0) + }) + .expect("query entity row count") +} + /// In-run, same-file shape: one file emits the same gadget id twice (three /// times, in fact — proving one finding per id per run, not per occurrence). /// The run still completes (the alarm detects; it does not block). @@ -181,9 +200,10 @@ fn in_run_same_file_duplicate_emits_single_error_finding() { "evidence must carry the same-file shape; got {evidence}" ); assert!( - evidence.contains("anchor_file_path"), - "finding must carry anchor_file_path so it reaches Filigree's emit \ - (a file-less project anchor is skipped as skipped_no_path); got {evidence}" + evidence.contains("anchor_entity_id"), + "finding must anchor to the colliding entity (clarion-48af930f2a) so it \ + is queryable from the entity read path AND still reaches Filigree's \ + emit with a real path via the entity's own source_file_path; got {evidence}" ); // The run committed: the entity row exists despite the collision. @@ -360,3 +380,72 @@ fn file_scope_same_file_duplicate_is_flagged() { "evidence must carry the same-file shape; got {evidence}" ); } + +/// clarion-48af930f2a: the duplicate-locator finding must anchor to the +/// COLLIDING ENTITY (the survivor row), not the file — so an entity-scoped read +/// (`entity_finding_list` / the `collision` projection in `entity_json`) on the +/// shadowed declaration surfaces the chimera instead of returning a clean row. +/// And the disclosure must follow the standing finding lifecycle: it survives a +/// no-op incremental run (loomweave's normal mode) and clears only on a clean +/// full pass that re-walks the files and no longer reproduces the collision. +#[test] +fn duplicate_locator_finding_anchors_to_entity_and_follows_lifecycle() { + let fixture_bin = fixture_binary_path(); + let plugin_dir = setup_plugin_dir(&fixture_bin); + let (project_dir, new_path) = setup_project(&plugin_dir); + write_source(&project_dir, "alpha.mt", "gadget shared.item\n"); + write_source(&project_dir, "beta.mt", "gadget shared.item\n"); + + // Run 1 (full): the cross-file collision fires. + analyze(&project_dir, &new_path, &[]); + { + let conn = open_db(&project_dir); + assert_eq!( + finding_count(&conn, RULE_ID), + 1, + "run 1 must surface the collision exactly once" + ); + // The anchor is the colliding ENTITY, not a `core:file:*` row. This is + // what makes the shadow queryable from the entity read path. + assert_eq!( + finding_entity_id(&conn, RULE_ID), + "fixture:gadget:shared.item", + "the finding must anchor to the colliding entity id, not the file" + ); + // The survivor row exists under that id (one row per id, by design). + assert_eq!(entity_row_count(&conn, "fixture:gadget:shared.item"), 1); + } + + // Run 2 (incremental no-op): nothing changed, so neither file is dispatched + // and the collision is not re-detected. The disclosure must NOT be swept — + // the general stale-finding sweep is gated to a clean full pass. + analyze(&project_dir, &new_path, &[]); + { + let conn = open_db(&project_dir); + assert_eq!( + finding_count(&conn, RULE_ID), + 1, + "a no-op incremental run must not retire the still-valid collision" + ); + assert_eq!( + finding_entity_id(&conn, RULE_ID), + "fixture:gadget:shared.item", + "the anchor must remain the colliding entity across an incremental no-op" + ); + } + + // Run 3 (resolution): remove one declaration and force a full re-pass. The + // collision no longer reproduces, so the stale disclosure is retired. + fs::remove_file(project_dir.path().join("beta.mt")).expect("remove beta.mt"); + analyze(&project_dir, &new_path, &["--no-incremental"]); + { + let conn = open_db(&project_dir); + assert_eq!( + finding_count(&conn, RULE_ID), + 0, + "a clean full pass that no longer reproduces the collision must clear it" + ); + // The surviving declaration is still in the graph, now uncontested. + assert_eq!(entity_row_count(&conn, "fixture:gadget:shared.item"), 1); + } +} diff --git a/crates/loomweave-core/src/lib.rs b/crates/loomweave-core/src/lib.rs index 2fb82160..a3ce6582 100644 --- a/crates/loomweave-core/src/lib.rs +++ b/crates/loomweave-core/src/lib.rs @@ -41,6 +41,7 @@ pub use plugin::{ CrashLoopBreaker, CrashLoopState, // discovery (Task 5) — callers enumerate plugins + DUPLICATE_LOCATOR_RULE_ID, DiscoveredPlugin, DiscoveryError, EdgeConfidence, diff --git a/crates/loomweave-core/src/plugin/host_findings.rs b/crates/loomweave-core/src/plugin/host_findings.rs index 54f02392..f4c9840c 100644 --- a/crates/loomweave-core/src/plugin/host_findings.rs +++ b/crates/loomweave-core/src/plugin/host_findings.rs @@ -12,6 +12,17 @@ use crate::plugin::limits::{ }; use crate::plugin::protocol::UnresolvedCallSite; +/// Emitted when two source declarations assemble the SAME entity locator/id and +/// the writer's `ON CONFLICT(id) DO UPDATE` upsert absorbs the collision as +/// silent last-write-wins data loss (clarion-b19fe90c3e detection; +/// clarion-48af930f2a disclosure). Deliberately plugin-neutral (not +/// `LMWV-RUST-*`/`LMWV-PY-*`): the collision is a host-side store property that +/// every language plugin can hit. Defined here — the single source of truth — +/// so the analyze-side detector, the storage-side disclosure query, and any +/// reader agree on one string rather than three drifting copies +/// (cf. clarion-570e9ac203, the broader tag-vocabulary seam). +pub const DUPLICATE_LOCATOR_RULE_ID: &str = "LMWV-DUPLICATE-LOCATOR"; + /// Emitted when a plugin emits an entity whose `kind` is not in the manifest's /// `entity_kinds` list (ADR-022 ontology boundary). pub const FINDING_UNDECLARED_KIND: &str = "LMWV-INFRA-PLUGIN-UNDECLARED-KIND"; diff --git a/crates/loomweave-core/src/plugin/mod.rs b/crates/loomweave-core/src/plugin/mod.rs index dd0b7124..c16d80e0 100644 --- a/crates/loomweave-core/src/plugin/mod.rs +++ b/crates/loomweave-core/src/plugin/mod.rs @@ -30,6 +30,7 @@ pub use host::{ AcceptedEdge, AcceptedEntity, AnalyzeFileOutcome, BriefingBlockReason, HostError, HostFinding, PluginHost, RawEdge, RawEntity, }; +pub use host_findings::DUPLICATE_LOCATOR_RULE_ID; pub use jail::{JailError, jail, jail_to_string}; pub use limits::{ BreakerState, CapExceeded, ContentLengthCeiling, DEFAULT_MAX_NOFILE, DEFAULT_MAX_NPROC, diff --git a/crates/loomweave-mcp/src/lib.rs b/crates/loomweave-mcp/src/lib.rs index 24b37133..d7ca86b8 100644 --- a/crates/loomweave-mcp/src/lib.rs +++ b/crates/loomweave-mcp/src/lib.rs @@ -38,10 +38,10 @@ use loomweave_storage::{ InferredEdgeWriteStats, ReaderPool, ReferenceDirection, ReferenceEdgeMatch, RolledUpReferenceEdge, StorageError, SummaryCacheEntry, SummaryCacheKey, UnresolvedCallSiteRow, WriterCmd, call_edges_from, call_edges_targeting, containing_module_id, - entity_briefing_block_reason, entity_by_id, import_edges_for_entity, - inferred_edge_cache_key_id, module_reference_rollup, reference_edges_for_entity, - relation_edges_for_entity, resolve_entity_ref, sei_for_locator, tags_for_entity, - unresolved_call_sites_for_caller, unresolved_caller_count_for_target, + duplicate_locator_collision, entity_briefing_block_reason, entity_by_id, + import_edges_for_entity, inferred_edge_cache_key_id, module_reference_rollup, + reference_edges_for_entity, relation_edges_for_entity, resolve_entity_ref, sei_for_locator, + tags_for_entity, unresolved_call_sites_for_caller, unresolved_caller_count_for_target, unresolved_callers_for_target, }; @@ -4312,10 +4312,42 @@ fn entity_json(conn: &rusqlite::Connection, entity: &EntityRow) -> Value { "tags".to_owned(), json!(tags_for_entity(conn, &entity.id).unwrap_or_default()), ); + // Disclose a same-locator collision the store absorbed as last-write-wins + // (clarion-48af930f2a): this id's row may be a chimera of two source + // declarations, so a consumer must not read it (or its edges) as a clean + // single declaration. Only present when a collision exists; absence means + // none. Graceful-degrade to absent on any lookup error — the disclosure + // is enrichment and must never fail the structural read (same posture as + // the SEI/tags joins above). + if let Some(collision) = collision_json(conn, &entity.id) { + object.insert("collision".to_owned(), collision); + } } value } +/// Build the `collision` disclosure object for `entity_id`, or `None` when the +/// id carries no `LMWV-DUPLICATE-LOCATOR` finding. Read regardless of finding +/// status, so a suppressed finding still discloses the chimera. The note states +/// the honest ceiling: this is disclosure, not recovery — the shadowed +/// declaration's edges were collapsed at analyze and are not recoverable. +fn collision_json(conn: &rusqlite::Connection, entity_id: &str) -> Option { + let collision = duplicate_locator_collision(conn, entity_id) + .ok() + .flatten()?; + Some(json!({ + "shadowed": true, + "declarations": collision.declarations, + "shape": collision.shape, + "note": "This id resolves to more than one source declaration; the store \ + keeps one row per id (last-write-wins), so this row is a chimera \ + and one declaration is shadowed. Edges, relations, and \ + associations bound to the shadowed declaration were collapsed \ + during analyze and are NOT recoverable — treat this row's graph \ + neighborhood as unreliable. Detection: LMWV-DUPLICATE-LOCATOR.", + })) +} + /// Shannon entropy (bits/byte) of a string, used by the briefing-block guard to /// detect the rare case where an entity *name* is itself a high-entropy token /// (e.g. a generated symbol embedding a secret). Mirrors the pre-ingest @@ -8087,6 +8119,98 @@ mod tests { ); } + #[test] + fn entity_json_discloses_locator_collision() { + // clarion-48af930f2a: the single entity read choke point must disclose a + // same-locator collision so a consumer reading the shadowed id sees the + // chimera instead of a clean-looking row. Proven at the read surface, + // and with a SUPPRESSED finding — the data-loss fact outlives a status + // flip, so suppression must not hide the disclosure. + let dir = tempfile::tempdir().unwrap(); + let db_path = dir.path().join("loomweave.db"); + let mut conn = Connection::open(&db_path).expect("open sqlite"); + pragma::apply_write_pragmas(&conn).expect("write pragmas"); + schema::apply_migrations(&mut conn).expect("apply migrations"); + + let id = "python:class:specimen.colliding.ShelfMark"; + conn.execute( + "INSERT INTO entities ( + id, plugin_id, kind, name, short_name, source_file_path, properties, + content_hash, created_at, updated_at + ) VALUES ( + ?1, 'python', 'class', 'ShelfMark', 'ShelfMark', + '/specimen/colliding/__init__.py', '{}', 'h', 't', 't' + )", + rusqlite::params![id], + ) + .expect("seed colliding entity"); + + // Before any finding: a clean row carries no `collision` field. + let entity = entity_row(id, "ShelfMark", Some("h")); + let clean = super::entity_json(&conn, &entity); + assert!( + clean.get("collision").is_none(), + "an entity with no duplicate-locator finding must not be flagged: {clean}" + ); + + // Seed a SUPPRESSED duplicate-locator finding anchored to the colliding + // entity (the post-clarion-48af930f2a anchor). + conn.execute( + "INSERT OR IGNORE INTO runs (id, started_at, config, stats, status) \ + VALUES ('run-1', 't', '{}', '{}', 'completed')", + [], + ) + .unwrap(); + let evidence = serde_json::json!({ + "plugin_id": "python", + "metadata": { + "entity_id": id, + "anchor_entity_id": id, + "first_source_file_path": "/specimen/colliding/__init__.py", + "colliding_source_file_path": "/specimen/colliding.py", + "shape": "in_run_cross_file", + } + }) + .to_string(); + conn.execute( + "INSERT INTO findings ( + id, tool, tool_version, run_id, rule_id, kind, severity, + entity_id, related_entities, message, evidence, properties, + supports, supported_by, status, created_at, updated_at + ) VALUES ( + 'core:finding:infra:dup', 'loomweave', '0', 'run-1', ?1, 'defect', 'ERROR', + ?2, '[]', 'm', ?3, '{}', + '[]', '[]', 'suppressed', 't', 't' + )", + rusqlite::params![loomweave_core::DUPLICATE_LOCATOR_RULE_ID, id, evidence], + ) + .expect("seed suppressed collision finding"); + + let flagged = super::entity_json(&conn, &entity); + let collision = flagged + .get("collision") + .expect("the chimera must be disclosed even when the finding is suppressed"); + assert_eq!(collision["shadowed"], serde_json::json!(true)); + assert_eq!(collision["shape"], "in_run_cross_file"); + let declarations = collision["declarations"] + .as_array() + .expect("declarations array"); + assert_eq!(declarations.len(), 2, "both declarations: {collision}"); + assert!( + declarations.iter().any(|d| d == "/specimen/colliding.py") + && declarations + .iter() + .any(|d| d == "/specimen/colliding/__init__.py"), + "both colliding paths must be disclosed: {collision}" + ); + assert!( + collision["note"] + .as_str() + .is_some_and(|n| n.contains("NOT recoverable")), + "the note must state the disclosure-not-recovery ceiling: {collision}" + ); + } + struct BlockingProvider { release: tokio::sync::Mutex>, started: Option>, diff --git a/crates/loomweave-storage/src/lib.rs b/crates/loomweave-storage/src/lib.rs index c30a1622..9300a1f5 100644 --- a/crates/loomweave-storage/src/lib.rs +++ b/crates/loomweave-storage/src/lib.rs @@ -52,11 +52,12 @@ pub use prior_index::{ }; pub use query::{ CallEdgeMatch, CanonicalProjectPath, ContainedEntities, EntityRow, EntitySubsystem, - EntityVisibility, FindingForEmitRow, ModuleDependencyEdge, PRE_INGEST_SECRET_SCAN_RULE_IDS, - RELATION_EDGE_KINDS, ReferenceDirection, ReferenceEdgeMatch, RelationEdgeMatch, ResolvedFile, - ResolvedFileCatalogEntry, RolledUpReferenceEdge, SubsystemMember, UnresolvedCallSiteRow, - ancestor_chain, call_edges_from, call_edges_targeting, candidate_entities_for_unresolved_sites, - child_entity_ids, contained_entity_ids, containing_module_id, current_file_hash, edge_total, + EntityVisibility, FindingForEmitRow, LocatorCollision, ModuleDependencyEdge, + PRE_INGEST_SECRET_SCAN_RULE_IDS, RELATION_EDGE_KINDS, ReferenceDirection, ReferenceEdgeMatch, + RelationEdgeMatch, ResolvedFile, ResolvedFileCatalogEntry, RolledUpReferenceEdge, + SubsystemMember, UnresolvedCallSiteRow, ancestor_chain, call_edges_from, call_edges_targeting, + candidate_entities_for_unresolved_sites, child_entity_ids, contained_entity_ids, + containing_module_id, current_file_hash, duplicate_locator_collision, edge_total, entities_by_churn, entities_by_kind, entities_by_tag, entities_containing_line, entities_targeted_by_unresolved_call_sites, entities_with_wardline_facts, entity_at_line, entity_briefing_block_reason, entity_by_id, entity_ids_in_namespace, entity_total, diff --git a/crates/loomweave-storage/src/query.rs b/crates/loomweave-storage/src/query.rs index d8a6a8e1..6e6af97a 100644 --- a/crates/loomweave-storage/src/query.rs +++ b/crates/loomweave-storage/src/query.rs @@ -1157,6 +1157,93 @@ pub fn tags_for_entity(conn: &Connection, entity_id: &str) -> Result Ok(out) } +/// Disclosure that a same-locator collision was absorbed into this id's single +/// row by the writer's `ON CONFLICT(id) DO UPDATE` upsert (last-write-wins). +/// +/// This is the *honest ceiling* (clarion-48af930f2a): the stored row is a +/// chimera of two declarations and the shadowed declaration's edges were already +/// collapsed during analyze, so this discloses the unreliability — it does not +/// recover the lost declaration. Surfaced on the entity read path so a consumer +/// querying the shadowed id sees the collision instead of a clean-looking row. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct LocatorCollision { + /// The distinct source declarations that assemble this id (the survivor's + /// path plus the shadowed one), deduped and sorted for a stable projection. + pub declarations: Vec, + /// Which collision rule fired (`in_run_same_file` / `in_run_cross_file` / + /// `cross_run_unchanged_file`); `None` if the finding carried no shape. + pub shape: Option, +} + +/// Disclose a same-locator collision for `entity_id`, derived from the standing +/// `LMWV-DUPLICATE-LOCATOR` finding anchored to it (the finding's +/// `entity_id` is the colliding id since clarion-48af930f2a). +/// +/// Read **regardless of finding status**: suppressing a finding is a status +/// flip (`open`→`suppressed`), the row persists, and the data-loss fact it +/// records persists with it — so a suppressed finding must still disclose the +/// chimera here rather than silently hide it. The standing full-pass stale +/// sweep is what clears the disclosure once the collision genuinely resolves. +/// +/// Returns `Ok(None)` when the entity has no such finding. A finding whose +/// `evidence` JSON is unparseable degrades to a shapeless, declaration-less +/// disclosure (`Some` with empty fields) rather than an error — the collision +/// fact is more important than its metadata. +/// +/// # Errors +/// +/// Returns [`StorageError::Sqlite`] if the query fails to execute. +pub fn duplicate_locator_collision( + conn: &Connection, + entity_id: &str, +) -> Result> { + // At most one LMWV-DUPLICATE-LOCATOR finding exists per id per run (the + // guard dedups), and the content-keyed id collapses recurrences to one row; + // take the most recently updated should two distinct collision shapes ever + // coexist for one id. + let evidence: Option = conn + .query_row( + "SELECT evidence FROM findings \ + WHERE entity_id = ?1 AND rule_id = ?2 \ + ORDER BY updated_at DESC, id DESC \ + LIMIT 1", + params![entity_id, loomweave_core::DUPLICATE_LOCATOR_RULE_ID], + |row| row.get(0), + ) + .optional()?; + let Some(evidence) = evidence else { + return Ok(None); + }; + // `evidence` is `{"plugin_id": "...", "metadata": {...}}` (see the analyze + // host's `host_finding_to_record`). Pull the colliding paths + shape from + // the metadata; tolerate a missing/garbled shape rather than failing the + // read. + let metadata = serde_json::from_str::(&evidence) + .ok() + .and_then(|v| v.get("metadata").cloned()); + let str_field = |key: &str| -> Option { + metadata + .as_ref() + .and_then(|m| m.get(key)) + .and_then(|v| v.as_str()) + .map(str::to_owned) + }; + let mut declarations: Vec = [ + str_field("first_source_file_path"), + str_field("colliding_source_file_path"), + ] + .into_iter() + .flatten() + .filter(|p| !p.is_empty()) + .collect(); + declarations.sort(); + declarations.dedup(); + Ok(Some(LocatorCollision { + declarations, + shape: str_field("shape"), + })) +} + /// Faceted catalog query: entities carrying `tag` (any plugin's /// `entity_tags.tag`), ordered by id, materialised up to `scan_cap`. Returns /// `(rows, scan_truncated)`. A blank tag is rejected; an unknown tag matches no @@ -2404,3 +2491,139 @@ mod current_file_hash_tests { assert_eq!(current_file_hash(dir.path(), "does/not/exist.py"), None); } } + +#[cfg(test)] +mod duplicate_locator_collision_tests { + use super::*; + + const COLLIDING_ID: &str = "python:class:specimen.colliding.ShelfMark"; + + /// In-memory DB with the real schema; seeds the colliding entity every + /// finding references (`foreign_keys` is ON, so the FKs are enforced). + fn migrated_conn() -> Connection { + let mut conn = Connection::open_in_memory().unwrap(); + crate::schema::apply_migrations(&mut conn).unwrap(); + conn.execute( + "INSERT INTO entities \ + (id, plugin_id, kind, name, short_name, source_file_path, properties, \ + content_hash, created_at, updated_at) \ + VALUES (?1, 'python', 'class', 'ShelfMark', 'ShelfMark', \ + '/specimen/colliding/__init__.py', '{}', 'h', 't', 't')", + params![COLLIDING_ID], + ) + .unwrap(); + conn + } + + /// Insert a duplicate-locator finding anchored to `COLLIDING_ID`, carrying + /// the analyze-shaped `evidence` envelope. `status` exercises the + /// suppression-survival contract. + fn insert_collision_finding(conn: &Connection, status: &str) { + conn.execute( + "INSERT OR IGNORE INTO runs (id, started_at, config, stats, status) \ + VALUES ('run-1', 't', '{}', '{}', 'completed')", + [], + ) + .unwrap(); + let evidence = serde_json::json!({ + "plugin_id": "python", + "metadata": { + "entity_id": COLLIDING_ID, + "anchor_entity_id": COLLIDING_ID, + "first_source_file_path": "/specimen/colliding/__init__.py", + "colliding_source_file_path": "/specimen/colliding.py", + "shape": "in_run_cross_file", + } + }) + .to_string(); + conn.execute( + "INSERT INTO findings ( \ + id, tool, tool_version, run_id, rule_id, kind, severity, \ + entity_id, related_entities, message, evidence, properties, \ + supports, supported_by, status, created_at, updated_at \ + ) VALUES ( \ + 'core:finding:infra:dup', 'loomweave', '0', 'run-1', ?1, 'defect', 'ERROR', \ + ?2, '[]', 'm', ?3, '{}', \ + '[]', '[]', ?4, 't', 't' \ + )", + params![ + loomweave_core::DUPLICATE_LOCATOR_RULE_ID, + COLLIDING_ID, + evidence, + status + ], + ) + .unwrap(); + } + + #[test] + fn no_finding_means_no_collision() { + let conn = migrated_conn(); + assert_eq!( + duplicate_locator_collision(&conn, COLLIDING_ID).unwrap(), + None, + "an entity with no duplicate-locator finding discloses nothing" + ); + } + + #[test] + fn open_finding_discloses_both_declarations_and_shape() { + let conn = migrated_conn(); + insert_collision_finding(&conn, "open"); + let disclosure = duplicate_locator_collision(&conn, COLLIDING_ID) + .unwrap() + .expect("collision must be disclosed"); + assert_eq!( + disclosure.declarations, + vec![ + "/specimen/colliding.py".to_owned(), + "/specimen/colliding/__init__.py".to_owned(), + ], + "both colliding declarations are disclosed, sorted + deduped" + ); + assert_eq!(disclosure.shape.as_deref(), Some("in_run_cross_file")); + } + + #[test] + fn suppressed_finding_still_discloses_the_chimera() { + // The crux: suppression is a status flip, the row persists, and the + // data-loss fact persists with it — so the disclosure must survive. + let conn = migrated_conn(); + insert_collision_finding(&conn, "suppressed"); + assert!( + duplicate_locator_collision(&conn, COLLIDING_ID) + .unwrap() + .is_some(), + "a suppressed duplicate-locator finding must NOT hide the collision" + ); + } + + #[test] + fn other_rule_findings_do_not_masquerade_as_collisions() { + let conn = migrated_conn(); + conn.execute( + "INSERT OR IGNORE INTO runs (id, started_at, config, stats, status) \ + VALUES ('run-1', 't', '{}', '{}', 'completed')", + [], + ) + .unwrap(); + conn.execute( + "INSERT INTO findings ( \ + id, tool, tool_version, run_id, rule_id, kind, severity, \ + entity_id, related_entities, message, evidence, properties, \ + supports, supported_by, status, created_at, updated_at \ + ) VALUES ( \ + 'core:finding:other', 'loomweave', '0', 'run-1', 'LMWV-OTHER', 'defect', 'WARN', \ + ?1, '[]', 'm', '{}', '{}', \ + '[]', '[]', 'open', 't', 't' \ + )", + params![COLLIDING_ID], + ) + .unwrap(); + assert_eq!( + duplicate_locator_collision(&conn, COLLIDING_ID).unwrap(), + None, + "only LMWV-DUPLICATE-LOCATOR findings disclose a collision" + ); + } +}