diff --git a/adapters/grounding/opendataloader-json/src/lib.rs b/adapters/grounding/opendataloader-json/src/lib.rs index c1717c9..b7e8399 100644 --- a/adapters/grounding/opendataloader-json/src/lib.rs +++ b/adapters/grounding/opendataloader-json/src/lib.rs @@ -58,6 +58,8 @@ use serde_json::Value; /// Adapter version, reported in `ParserIdentity::adapter_version`. pub const ADAPTER_VERSION: &str = "0.1.0"; +const REAL_ODL_MAX_PAGES: u32 = 10_000; + /// Mapping failure: input is valid JSON but not recognizable ODL output. #[derive(Debug, Clone, PartialEq, Eq)] pub struct AdapterError { @@ -340,6 +342,9 @@ fn parse_real_odl(root: &Value) -> Result { if page_count == 0 { return Err(err("number of pages must be positive")); } + if page_count > REAL_ODL_MAX_PAGES { + return Err(err("number of pages exceeds adapter limit")); + } let kids = root .get("kids") .and_then(Value::as_array) @@ -816,6 +821,10 @@ mod tests { r#"{"tool":{"name":"x","version":"1"},"pages":[{"number":1,"width":612,"height":792}],"elements":[{"page":4294967296,"bbox":[1,1,2,2]}]}"#, "element.page must fit u32", ); + assert_error_contains( + r#"{"file name":"huge.pdf","number of pages":4294967295,"kids":[]}"#, + "number of pages exceeds adapter limit", + ); } #[test] diff --git a/crates/ethos-cli/src/cmd/verify.rs b/crates/ethos-cli/src/cmd/verify.rs index 956748e..52952d9 100644 --- a/crates/ethos-cli/src/cmd/verify.rs +++ b/crates/ethos-cli/src/cmd/verify.rs @@ -257,6 +257,7 @@ fn check_reason_label(reason: CheckReason) -> &'static str { CheckReason::UnsupportedClaimKind => "unsupported_claim_kind", CheckReason::StaleFingerprint => "stale_fingerprint", CheckReason::MissingSourceFingerprint => "missing_source_fingerprint", + CheckReason::MissingCitationFingerprint => "missing_citation_fingerprint", CheckReason::MissingSpanCapability => "missing_span_capability", CheckReason::MissingTableCapability => "missing_table_capability", CheckReason::UnknownCoordinateOrigin => "unknown_coordinate_origin", diff --git a/crates/ethos-cli/tests/verify.rs b/crates/ethos-cli/tests/verify.rs index 360efbd..710394b 100644 --- a/crates/ethos-cli/tests/verify.rs +++ b/crates/ethos-cli/tests/verify.rs @@ -19,6 +19,7 @@ use std::process::{Command, Output}; use std::time::{SystemTime, UNIX_EPOCH}; use ethos_core::fingerprint::source_fingerprint; +use ethos_core::model::Document; use serde_json::Value; fn ethos_bin() -> &'static str { @@ -72,6 +73,46 @@ fn json_file(path: impl AsRef) -> Value { serde_json::from_slice(&bytes).expect("JSON fixture parses") } +fn temp_split_quote_document() -> (PathBuf, String) { + let mut doc = json_file(document_example()); + doc["payload"]["elements"] = serde_json::json!([ + { + "id": "split-a", + "type": "text_block", + "page": "p0001", + "bbox": [100, 100, 400, 200], + "text": "The alpha trust loop verifies " + }, + { + "id": "split-b", + "type": "text_block", + "page": "p0001", + "bbox": [400, 100, 700, 200], + "text": "grounded evidence" + } + ]); + doc["payload"]["spans"] = serde_json::json!([]); + doc["payload"]["tables"] = serde_json::json!([]); + doc["payload"]["chunks"] = serde_json::json!([]); + doc["payload"]["regions"] = serde_json::json!([]); + doc["payload"]["security_warnings"] = serde_json::json!([]); + doc["payload"]["parser_warnings"] = serde_json::json!([]); + + let mut doc: Document = serde_json::from_value(doc).expect("split quote document parses"); + doc.payload_sha256 = doc + .compute_payload_sha256() + .expect("split quote payload hash computes"); + doc.fingerprint = doc + .compute_fingerprint() + .expect("split quote document fingerprint computes"); + let fingerprint = doc.fingerprint.clone(); + let path = temp_json( + "split-quote-native-document", + &serde_json::to_string(&doc).expect("split quote document serializes"), + ); + (path, fingerprint) +} + fn pdfium_configured() -> bool { std::env::var_os("ETHOS_PDFIUM_LIBRARY_PATH") .map(PathBuf::from) @@ -695,6 +736,48 @@ fn native_ethos_verify_produces_non_empty_checks() { assert_eq!(report["all_evidence_grounded"], false); } +#[test] +fn native_verify_grounds_split_quote_across_adjacent_elements() { + let (doc, fingerprint) = temp_split_quote_document(); + let citations = serde_json::json!({ + "document_fingerprint": fingerprint, + "claims": [ + { + "kind": "quote", + "text": "The alpha trust loop verifies grounded evidence", + "citation": { + "element_id": "split-b" + } + } + ] + }); + let citations = temp_json( + "split-quote-citations", + &serde_json::to_string(&citations).unwrap(), + ); + let report = parse_success(&[ + "verify", + doc.to_str().unwrap(), + "--citations", + citations.to_str().unwrap(), + ]); + + assert_eq!(report["all_evidence_grounded"], true); + assert_eq!(report["checks"][0]["status"], "grounded"); + assert_eq!( + report["checks"][0]["match_method"], + "normalized_text_contains" + ); + assert_eq!( + report["checks"][0]["evidence"]["text"], + "The alpha trust loop verifies grounded evidence" + ); + assert_eq!( + report["checks"][0]["evidence"]["bbox"], + serde_json::json!([100, 100, 700, 200]) + ); +} + #[test] fn opendataloader_verify_adapter_produces_capability_aware_report() { let grounding = odl_example(); @@ -1137,8 +1220,44 @@ fn bare_array_citation_input_works() { ]); assert_eq!(report["checks"].as_array().unwrap().len(), 1); - assert_eq!(report["checks"][0]["status"], "grounded"); - assert_eq!(report["all_evidence_grounded"], true); + assert_eq!(report["checks"][0]["status"], "stale"); + assert_eq!( + report["checks"][0]["reason"], + "missing_citation_fingerprint" + ); + assert_eq!(report["all_evidence_grounded"], false); +} + +#[test] +fn envelope_without_fingerprint_blocks_when_source_has_fingerprint() { + let doc = document_example(); + let citations = temp_json( + "no-fingerprint-envelope-citations", + r#"{ + "claims": [ + { + "kind": "presence", + "citation": { + "element_id": "e000002" + } + } + ] + }"#, + ); + let report = parse_success(&[ + "verify", + doc.to_str().unwrap(), + "--citations", + citations.to_str().unwrap(), + ]); + + assert_eq!(report["checks"].as_array().unwrap().len(), 1); + assert_eq!(report["checks"][0]["status"], "stale"); + assert_eq!( + report["checks"][0]["reason"], + "missing_citation_fingerprint" + ); + assert_eq!(report["all_evidence_grounded"], false); } #[test] diff --git a/crates/ethos-core/src/verify_types.rs b/crates/ethos-core/src/verify_types.rs index f69df74..3426a8f 100644 --- a/crates/ethos-core/src/verify_types.rs +++ b/crates/ethos-core/src/verify_types.rs @@ -98,6 +98,8 @@ pub enum CheckReason { StaleFingerprint, /// Citation was fingerprint-pinned but the source did not declare one. MissingSourceFingerprint, + /// Staleness policy requires a citation fingerprint, but input omitted it. + MissingCitationFingerprint, /// Span locator was used with a source that does not expose spans. MissingSpanCapability, /// Table-cell locator was used with a source that does not expose tables. diff --git a/crates/ethos-verify/src/lib.rs b/crates/ethos-verify/src/lib.rs index 6d06ff5..8a4bb3e 100644 --- a/crates/ethos-verify/src/lib.rs +++ b/crates/ethos-verify/src/lib.rs @@ -176,6 +176,9 @@ pub fn verify_claims( let fingerprint_unverifiable = config.staleness.require_fingerprint_match && citation_fingerprint.is_some() && source_fingerprint.is_none(); + let citation_fingerprint_missing = config.staleness.require_fingerprint_match + && citation_fingerprint.is_none() + && source_fingerprint.is_some(); let include_text = config.evidence.is_some_and(|e| e.include_text); let include_crops = config.evidence.is_some_and(|e| e.include_crops); let mut unsupported = Vec::new(); @@ -192,6 +195,7 @@ pub fn verify_claims( CheckContext { fingerprint_stale, fingerprint_unverifiable, + citation_fingerprint_missing, include_text, include_crops, }, @@ -225,6 +229,7 @@ pub fn verify_claims( struct CheckContext { fingerprint_stale: bool, fingerprint_unverifiable: bool, + citation_fingerprint_missing: bool, include_text: bool, include_crops: bool, } @@ -313,7 +318,20 @@ fn check_claim( }; } - let target = match resolve_target(index, &claim, config) { + if context.citation_fingerprint_missing { + return Check { + id: check_id, + claim, + status: CheckStatus::Stale, + reason: Some(CheckReason::MissingCitationFingerprint), + match_method: MatchMethod::None, + semantic_unverified: false, + evidence: None, + warnings, + }; + } + + let mut target = match resolve_target(index, &claim, config) { TargetResolution::Found(target) => target, TargetResolution::NotFound(reason) => { return Check { @@ -354,6 +372,10 @@ fn check_claim( } }; + if let Some(adjacent_target) = adjacent_quote_target(index, &claim, &target, config) { + target = adjacent_target; + } + let evidence = make_evidence(source, &target, context.include_text, context.include_crops); let (status, match_method, reason) = check_resolved_claim(claim.kind, claim.text.as_deref(), &target, config); @@ -446,6 +468,7 @@ struct FoundTarget { bbox: Option<[i64; 4]>, text: Option, from_table_cell: bool, + element_index: Option, } /// Per-run grounding snapshot used to avoid cloning full entity collections per claim. @@ -495,12 +518,6 @@ impl SourceIndex { } } - fn element(&self, id: &str) -> Option<&GroundingElement> { - self.element_by_id - .get(id) - .and_then(|index| self.elements.get(*index)) - } - fn span(&self, id: &str) -> Option<&GroundingSpan> { self.span_by_id .get(id) @@ -570,8 +587,15 @@ fn resolve_target( if let Some(element_id) = claim.citation.element_id.as_deref() { return index - .element(element_id) - .map(target_from_element) + .element_by_id + .get(element_id) + .and_then(|position| { + index + .elements + .get(*position) + .map(|element| (*position, element)) + }) + .map(|(position, element)| target_from_element(element, Some(position))) .map(TargetResolution::Found) .unwrap_or(TargetResolution::NotFound(CheckReason::ElementNotFound)); } @@ -584,8 +608,12 @@ fn resolve_target( return index .elements .iter() - .find(|element| element.page == page && contains_bbox(element.bbox, bbox, tolerance)) - .map(target_from_element) + .enumerate() + .filter(|(_, element)| { + element.page == page && contains_bbox(element.bbox, bbox, tolerance) + }) + .min_by_key(|(position, element)| (bbox_area(element.bbox), *position)) + .map(|(position, element)| target_from_element(element, Some(position))) .map(TargetResolution::Found) .unwrap_or(TargetResolution::NotFound(CheckReason::BboxNotFound)); } @@ -605,6 +633,7 @@ fn resolve_target( bbox: Some([0, 0, found.width, found.height]), text: None, from_table_cell: false, + element_index: None, }) }) .unwrap_or(TargetResolution::NotFound(CheckReason::PageNotFound)); @@ -613,12 +642,13 @@ fn resolve_target( TargetResolution::NotFound(CheckReason::MissingLocator) } -fn target_from_element(element: &GroundingElement) -> FoundTarget { +fn target_from_element(element: &GroundingElement, element_index: Option) -> FoundTarget { FoundTarget { page: Some(element.page.clone()), bbox: Some(element.bbox), text: element.text.clone(), from_table_cell: false, + element_index, } } @@ -628,6 +658,7 @@ fn target_from_span(span: &GroundingSpan) -> FoundTarget { bbox: Some(span.bbox), text: Some(span.text.clone()), from_table_cell: false, + element_index: None, } } @@ -669,7 +700,126 @@ fn target_from_cell(page: &str, cell: &GroundingCell) -> FoundTarget { bbox: Some(cell.bbox), text: Some(cell.text.clone()), from_table_cell: true, + element_index: None, + } +} + +fn adjacent_quote_target( + index: &SourceIndex, + claim: &Claim, + target: &FoundTarget, + config: &VerificationConfig, +) -> Option { + if claim.kind != ClaimKind::Quote { + return None; + } + let expected = claim.text.as_deref()?; + if target + .text + .as_deref() + .is_some_and(|actual| text_matches(ClaimKind::Quote, expected, actual, config)) + { + return None; + } + + if claim.citation.bbox.is_some() { + return None; } + + if claim.citation.element_id.is_some() { + if let Some(position) = target.element_index { + return adjacent_text_pair_for_element(index, position, expected, config); + } + } + + None +} + +fn adjacent_text_pair_for_element( + index: &SourceIndex, + position: usize, + expected: &str, + config: &VerificationConfig, +) -> Option { + let current = index.elements.get(position)?; + if let Some(second) = position + .checked_add(1) + .and_then(|next| index.elements.get(next)) + { + if let Some(target) = adjacent_text_pair_target(current, second, expected, config) { + return Some(target); + } + } + position + .checked_sub(1) + .and_then(|previous| index.elements.get(previous)) + .and_then(|first| adjacent_text_pair_target(first, current, expected, config)) +} + +fn adjacent_text_pair_target( + first: &GroundingElement, + second: &GroundingElement, + expected: &str, + config: &VerificationConfig, +) -> Option { + if first.page != second.page { + return None; + } + if !element_bboxes_are_adjacent(first.bbox, second.bbox) { + return None; + } + let first_text = first.text.as_deref()?; + let second_text = second.text.as_deref()?; + let joined = join_adjacent_text(first_text, second_text, config); + if text_matches(ClaimKind::Quote, expected, first_text, config) + || text_matches(ClaimKind::Quote, expected, second_text, config) + || !text_matches(ClaimKind::Quote, expected, &joined, config) + { + return None; + } + + Some(FoundTarget { + page: Some(first.page.clone()), + bbox: Some(union_bbox(first.bbox, second.bbox)), + text: Some(joined), + from_table_cell: false, + element_index: None, + }) +} + +fn join_adjacent_text(first: &str, second: &str, config: &VerificationConfig) -> String { + let joined = format!("{first} {second}"); + match config.matching.text_normalization { + TextNormalization::None => joined, + TextNormalization::CollapseWhitespace => normalize_quote(&joined), + } +} + +fn bbox_area(bbox: [i64; 4]) -> u128 { + let width = bbox[2].saturating_sub(bbox[0]).max(0) as u128; + let height = bbox[3].saturating_sub(bbox[1]).max(0) as u128; + width.saturating_mul(height) +} + +fn element_bboxes_are_adjacent(first: [i64; 4], second: [i64; 4]) -> bool { + let same_line = + ranges_overlap_i64(first[1], first[3], second[1], second[3]) && first[2] == second[0]; + let stacked = + ranges_overlap_i64(first[0], first[2], second[0], second[2]) && first[3] == second[1]; + same_line || stacked +} + +fn ranges_overlap_i64(a_start: i64, a_end: i64, b_start: i64, b_end: i64) -> bool { + a_start < b_end && b_start < a_end +} + +fn union_bbox(left: [i64; 4], right: [i64; 4]) -> [i64; 4] { + [ + left[0].min(right[0]), + left[1].min(right[1]), + left[2].max(right[2]), + left[3].max(right[3]), + ] } fn make_evidence( @@ -881,6 +1031,61 @@ mod tests { } } + struct ElementSource { + elements: Vec, + } + + impl GroundingSource for ElementSource { + fn parser(&self) -> ParserIdentity { + ParserIdentity { + name: "element-test-parser".into(), + version: "0.1.0".into(), + adapter: None, + adapter_version: None, + } + } + fn capabilities(&self) -> Capabilities { + Capabilities { + spans: true, + char_offsets: true, + tables: true, + fingerprint: true, + coordinate_origin: CoordinateOrigin::TopLeft, + crop_support: false, + } + } + fn fingerprint(&self) -> Option { + Some("sha256:b5d30710d0c25cc38d8dec924ecaf57ae4f81276dd5dc14d75cb3b5b6bde62d3".into()) + } + fn pages(&self) -> Vec { + vec![ + PageGeometry { + id: "p0001".into(), + index: 1, + width: 61200, + height: 79200, + rotation: 0, + }, + PageGeometry { + id: "p0002".into(), + index: 2, + width: 61200, + height: 79200, + rotation: 0, + }, + ] + } + fn elements(&self) -> Vec { + self.elements.clone() + } + fn spans(&self) -> Vec { + Vec::new() + } + fn tables(&self) -> Vec { + Vec::new() + } + } + fn claim(kind: ClaimKind, text: Option<&str>, citation: Citation) -> Claim { Claim { kind, @@ -909,6 +1114,26 @@ mod tests { verify_claims(source, input(source, claims), cfg, "0".repeat(64)) } + fn element(id: &str, page: &str, bbox: [i64; 4], text: Option<&str>) -> GroundingElement { + GroundingElement { + id: id.into(), + page: page.into(), + bbox, + kind: "text_block".into(), + text: text.map(str::to_string), + } + } + + fn verify_elements(elements: Vec, claims: Vec) -> VerificationReport { + let source = ElementSource { elements }; + let cfg = VerificationConfig::default_v1(); + let citations = CitationInput::Envelope(CitationEnvelope { + document_fingerprint: source.fingerprint(), + claims, + }); + verify_claims(&source, citations, &cfg, "0".repeat(64)) + } + #[test] fn quote_and_presence_claims_ground_with_literal_matching() { let source = TestSource::default(); @@ -954,6 +1179,313 @@ mod tests { assert_eq!(report.warnings, Vec::::new()); } + #[test] + fn quote_claim_grounds_across_adjacent_element_text_fragments() { + let report = verify_elements( + vec![ + element( + "split-a", + "p0001", + [100, 100, 400, 200], + Some("The alpha trust loop verifies "), + ), + element( + "split-b", + "p0001", + [400, 100, 700, 200], + Some("grounded evidence"), + ), + ], + vec![claim( + ClaimKind::Quote, + Some("The alpha trust loop verifies grounded evidence"), + Citation { + element_id: Some("split-a".into()), + ..Default::default() + }, + )], + ); + + assert!(report.all_evidence_grounded); + assert_eq!(report.checks[0].status, CheckStatus::Grounded); + assert_eq!( + report.checks[0].match_method, + MatchMethod::NormalizedTextContains + ); + assert_eq!( + report.checks[0] + .evidence + .as_ref() + .and_then(|e| e.text.as_deref()), + Some("The alpha trust loop verifies grounded evidence") + ); + assert_eq!( + report.checks[0].evidence.as_ref().and_then(|e| e.bbox), + Some([100, 100, 700, 200]) + ); + } + + #[test] + fn quote_claim_page_only_locator_does_not_search_adjacent_fragments() { + let report = verify_elements( + vec![ + element( + "split-a", + "p0001", + [100, 100, 400, 200], + Some("The alpha trust loop verifies "), + ), + element( + "split-b", + "p0001", + [400, 100, 700, 200], + Some("grounded evidence"), + ), + ], + vec![claim( + ClaimKind::Quote, + Some("The alpha trust loop verifies grounded evidence"), + Citation { + page: Some("p0001".into()), + ..Default::default() + }, + )], + ); + + assert!(!report.all_evidence_grounded); + assert_eq!(report.checks[0].status, CheckStatus::Mismatch); + assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch)); + } + + #[test] + fn quote_claim_grounds_when_element_id_points_to_second_adjacent_fragment() { + let report = verify_elements( + vec![ + element( + "split-a", + "p0001", + [100, 100, 400, 200], + Some("The alpha trust loop verifies "), + ), + element( + "split-b", + "p0001", + [400, 100, 700, 200], + Some("grounded evidence"), + ), + ], + vec![claim( + ClaimKind::Quote, + Some("The alpha trust loop verifies grounded evidence"), + Citation { + element_id: Some("split-b".into()), + ..Default::default() + }, + )], + ); + + assert!(report.all_evidence_grounded); + assert_eq!(report.checks[0].status, CheckStatus::Grounded); + assert_eq!( + report.checks[0] + .evidence + .as_ref() + .and_then(|e| e.text.as_deref()), + Some("The alpha trust loop verifies grounded evidence") + ); + assert_eq!( + report.checks[0].evidence.as_ref().and_then(|e| e.bbox), + Some([100, 100, 700, 200]) + ); + } + + #[test] + fn quote_claim_does_not_stitch_non_touching_element_bboxes() { + let report = verify_elements( + vec![ + element( + "split-a", + "p0001", + [100, 100, 390, 200], + Some("The alpha trust loop verifies "), + ), + element( + "split-b", + "p0001", + [400, 100, 700, 200], + Some("grounded evidence"), + ), + ], + vec![claim( + ClaimKind::Quote, + Some("The alpha trust loop verifies grounded evidence"), + Citation { + element_id: Some("split-a".into()), + ..Default::default() + }, + )], + ); + + assert!(!report.all_evidence_grounded); + assert_eq!(report.checks[0].status, CheckStatus::Mismatch); + assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch)); + } + + #[test] + fn quote_claim_bbox_locator_does_not_expand_outside_cited_region() { + let report = verify_elements( + vec![ + element( + "split-a", + "p0001", + [100, 100, 400, 200], + Some("The alpha trust loop verifies "), + ), + element( + "split-b", + "p0001", + [400, 100, 700, 200], + Some("grounded evidence"), + ), + ], + vec![claim( + ClaimKind::Quote, + Some("The alpha trust loop verifies grounded evidence"), + Citation { + page: Some("p0001".into()), + bbox: Some([120, 120, 380, 180]), + ..Default::default() + }, + )], + ); + + assert!(!report.all_evidence_grounded); + assert_eq!(report.checks[0].status, CheckStatus::Mismatch); + assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch)); + assert_eq!( + report.checks[0] + .evidence + .as_ref() + .and_then(|e| e.text.as_deref()), + Some("The alpha trust loop verifies ") + ); + assert_eq!( + report.checks[0].evidence.as_ref().and_then(|e| e.bbox), + Some([100, 100, 400, 200]) + ); + } + + #[test] + fn bbox_locator_prefers_smallest_containing_element() { + let report = verify_elements( + vec![ + element( + "container", + "p0001", + [0, 0, 1000, 1000], + Some("outer wrapper text"), + ), + element( + "inner", + "p0001", + [100, 100, 400, 200], + Some("The exact cited quote"), + ), + ], + vec![claim( + ClaimKind::Quote, + Some("The exact cited quote"), + Citation { + page: Some("p0001".into()), + bbox: Some([120, 120, 380, 180]), + ..Default::default() + }, + )], + ); + + assert!(report.all_evidence_grounded); + assert_eq!(report.checks[0].status, CheckStatus::Grounded); + assert_eq!( + report.checks[0] + .evidence + .as_ref() + .and_then(|e| e.text.as_deref()), + Some("The exact cited quote") + ); + assert_eq!( + report.checks[0].evidence.as_ref().and_then(|e| e.bbox), + Some([100, 100, 400, 200]) + ); + } + + #[test] + fn quote_claim_does_not_ground_across_non_adjacent_or_wrong_page_fragments() { + let non_adjacent = verify_elements( + vec![ + element( + "split-a", + "p0001", + [100, 100, 400, 200], + Some("The alpha trust loop verifies "), + ), + element( + "between", + "p0001", + [100, 220, 700, 320], + Some("separate evidence"), + ), + element( + "split-b", + "p0001", + [400, 100, 700, 200], + Some("grounded evidence"), + ), + ], + vec![claim( + ClaimKind::Quote, + Some("The alpha trust loop verifies grounded evidence"), + Citation { + element_id: Some("split-a".into()), + ..Default::default() + }, + )], + ); + assert!(!non_adjacent.all_evidence_grounded); + assert_eq!(non_adjacent.checks[0].status, CheckStatus::Mismatch); + assert_eq!( + non_adjacent.checks[0].reason, + Some(CheckReason::TextMismatch) + ); + + let wrong_page = verify_elements( + vec![ + element( + "split-a", + "p0001", + [100, 100, 400, 200], + Some("The alpha trust loop verifies "), + ), + element( + "split-b", + "p0002", + [400, 100, 700, 200], + Some("grounded evidence"), + ), + ], + vec![claim( + ClaimKind::Quote, + Some("The alpha trust loop verifies grounded evidence"), + Citation { + page: Some("p0001".into()), + ..Default::default() + }, + )], + ); + assert!(!wrong_page.all_evidence_grounded); + assert_eq!(wrong_page.checks[0].status, CheckStatus::Mismatch); + assert_eq!(wrong_page.checks[0].reason, Some(CheckReason::TextMismatch)); + } + #[test] fn mismatch_and_not_found_keep_gate_false() { let source = TestSource::default(); @@ -1295,6 +1827,36 @@ mod tests { assert_eq!(report.checks[0].reason, Some(CheckReason::StaleFingerprint)); } + #[test] + fn missing_citation_fingerprint_blocks_when_required() { + let source = TestSource::default(); + let cfg = VerificationConfig::default_v1(); + let report = verify_claims( + &source, + CitationInput::Envelope(CitationEnvelope { + document_fingerprint: None, + claims: vec![claim( + ClaimKind::Presence, + None, + Citation { + element_id: Some("e000002".into()), + ..Default::default() + }, + )], + }), + &cfg, + "0".repeat(64), + ); + + assert!(!report.fingerprint_stale); + assert!(!report.all_evidence_grounded); + assert_eq!(report.checks[0].status, CheckStatus::Stale); + assert_eq!( + report.checks[0].reason, + Some(CheckReason::MissingCitationFingerprint) + ); + } + #[test] fn unsupported_claim_kinds_are_explicit() { let source = TestSource::default(); diff --git a/docs/demos/verify-alpha.md b/docs/demos/verify-alpha.md index 2e0527b..3a33e8b 100644 --- a/docs/demos/verify-alpha.md +++ b/docs/demos/verify-alpha.md @@ -23,6 +23,8 @@ Ethos verifies document evidence for AI systems. The deterministic parser is one source; foreign parser output can be another grounding source through an adapter. This demo uses small checked-in fixtures so the behavior is deterministic and easy to audit. +Native table-cell grounding in this demo uses authored table data in the checked-in Ethos JSON; +fresh native parse output does not yet emit table structures into verification input. ## Native Ethos JSON diff --git a/examples/verify/README.md b/examples/verify/README.md index 50f01fc..06867f6 100644 --- a/examples/verify/README.md +++ b/examples/verify/README.md @@ -99,6 +99,7 @@ Non-grounded checks may include a stable `reason` label: | `text_mismatch` | Target text did not match the claimed text under the active literal matcher. | | `missing_table_capability` | The claim needs table-cell lookup, but the grounding source does not expose tables. | | `missing_source_fingerprint` | Citations were fingerprint-pinned, but the grounding source did not declare one. | +| `missing_citation_fingerprint` | The active staleness policy requires citation fingerprints, but the input omitted one. | | `unknown_coordinate_origin` | A bbox locator was used with a source whose coordinate origin is unknown. | | `element_not_found` | The cited element id was not found in a source that exposes element ids. | | `table_not_found` | The cited table id was not found in a source that exposes tables. | diff --git a/schemas/ethos-verification-report.schema.json b/schemas/ethos-verification-report.schema.json index bf5dedb..899de57 100644 --- a/schemas/ethos-verification-report.schema.json +++ b/schemas/ethos-verification-report.schema.json @@ -164,6 +164,7 @@ "unsupported_claim_kind", "stale_fingerprint", "missing_source_fingerprint", + "missing_citation_fingerprint", "missing_span_capability", "missing_table_capability", "unknown_coordinate_origin",