diff --git a/crates/ethos-cli/src/cmd/verify.rs b/crates/ethos-cli/src/cmd/verify.rs index b85b5f8..acfd4a4 100644 --- a/crates/ethos-cli/src/cmd/verify.rs +++ b/crates/ethos-cli/src/cmd/verify.rs @@ -610,6 +610,41 @@ fn validate_citation_input( idx + 1 ))); } + if claim.citation.table_id.is_some() != claim.citation.cell.is_some() { + return Err(Failure::Usage(format!( + "claim {} citation table_id and cell must be provided together", + idx + 1 + ))); + } + if claim.kind == ClaimKind::TableCell + && (claim.citation.table_id.is_none() || claim.citation.cell.is_none()) + { + return Err(Failure::Usage(format!( + "claim {} table_cell citation must include table_id and cell", + idx + 1 + ))); + } + if claim.citation.bbox.is_some() + && claim.citation.page.is_none() + && claim.citation.element_id.is_none() + && claim.citation.span_id.is_none() + && claim.citation.table_id.is_none() + { + return Err(Failure::Usage(format!( + "claim {} citation bbox requires page unless another target locator is present", + idx + 1 + ))); + } + if claim + .citation + .bbox + .is_some_and(|bbox| bbox[0] >= bbox[2] || bbox[1] >= bbox[3]) + { + return Err(Failure::Usage(format!( + "claim {} citation bbox must have positive area", + idx + 1 + ))); + } if matches!( claim.kind, ClaimKind::Quote | ClaimKind::Value | ClaimKind::TableCell @@ -774,6 +809,7 @@ mod tests { }, }, status: CheckStatus::Grounded, + reason: None, match_method: MatchMethod::ExactTextContains, semantic_unverified: false, evidence: Some(Evidence { @@ -799,6 +835,7 @@ mod tests { }, }, status: CheckStatus::Grounded, + reason: None, match_method: MatchMethod::PresenceOnly, semantic_unverified: false, evidence: Some(Evidence { diff --git a/crates/ethos-cli/tests/verify.rs b/crates/ethos-cli/tests/verify.rs index c1783a3..1783b01 100644 --- a/crates/ethos-cli/tests/verify.rs +++ b/crates/ethos-cli/tests/verify.rs @@ -214,6 +214,7 @@ fn real_opendataloader_ungrounded_fixture_verifies_against_golden() { assert_eq!(report["all_evidence_grounded"], false); assert_eq!(report["checks"][0]["status"], "mismatch"); assert_eq!(report["checks"][0]["match_method"], "normalized_text"); + assert_eq!(report["checks"][0]["reason"], "text_mismatch"); let gated = run_ethos(&[ "verify", @@ -276,6 +277,7 @@ fn fail_on_ungrounded_exits_one_after_writing_stale_report() { assert_eq!(report["fingerprint_stale"], true); assert_eq!(report["all_evidence_grounded"], false); assert_eq!(report["checks"][0]["status"], "stale"); + assert_eq!(report["checks"][0]["reason"], "stale_fingerprint"); } #[test] @@ -300,6 +302,7 @@ fn fail_on_ungrounded_exits_one_with_stdout_report_for_capability_blocked_source let report: Value = serde_json::from_slice(&output.stdout).expect("stdout is JSON"); assert_eq!(report["all_evidence_grounded"], false); assert_eq!(report["checks"][0]["status"], "capability_blocked"); + assert_eq!(report["checks"][0]["reason"], "missing_table_capability"); assert!(report["capability_limits"] .as_array() .unwrap() @@ -601,6 +604,7 @@ fn native_ethos_verify_produces_non_empty_checks() { assert_eq!(report["checks"][1]["status"], "grounded"); assert_eq!(report["checks"][1]["match_method"], "table_cell_lookup"); assert_eq!(report["checks"][2]["status"], "mismatch"); + assert_eq!(report["checks"][2]["reason"], "text_mismatch"); assert_eq!(report["all_evidence_grounded"], false); } @@ -649,6 +653,7 @@ fn opendataloader_verify_adapter_produces_capability_aware_report() { assert_eq!(report["checks"][1]["match_method"], "table_cell_lookup"); assert_eq!(report["checks"][1]["evidence"]["text"], "$12.4M"); assert_eq!(report["checks"][2]["status"], "mismatch"); + assert_eq!(report["checks"][2]["reason"], "text_mismatch"); assert_eq!(report["all_evidence_grounded"], false); } @@ -721,6 +726,7 @@ fn stale_fingerprint_is_report_level_failure() { assert_eq!(report["fingerprint_stale"], true); assert_eq!(report["checks"][0]["status"], "stale"); + assert_eq!(report["checks"][0]["reason"], "stale_fingerprint"); assert_eq!(report["all_evidence_grounded"], false); } @@ -808,6 +814,133 @@ fn invalid_citation_shape_is_usage_error() { .contains("claim 1 citation must contain at least one locator")); } +#[test] +fn incomplete_table_cell_locator_is_usage_error() { + let doc = document_example(); + let cases = [ + ( + "table-id-without-cell", + r#"{ + "claims": [ + { + "kind": "table_cell", + "text": "$12.4M", + "citation": { + "table_id": "t0001" + } + } + ] + }"#, + "claim 1 citation table_id and cell must be provided together", + ), + ( + "cell-without-table-id", + r#"{ + "claims": [ + { + "kind": "table_cell", + "text": "$12.4M", + "citation": { + "cell": { + "row": 1, + "col": 1 + } + } + } + ] + }"#, + "claim 1 citation table_id and cell must be provided together", + ), + ( + "table-cell-kind-without-table-cell-locator", + r#"{ + "claims": [ + { + "kind": "table_cell", + "text": "$12.4M", + "citation": { + "element_id": "e000002" + } + } + ] + }"#, + "claim 1 table_cell citation must include table_id and cell", + ), + ]; + + for (name, json, expected) in cases { + let citations = temp_json(name, json); + let output = run_ethos(&[ + "verify", + doc.to_str().unwrap(), + "--citations", + citations.to_str().unwrap(), + ]); + + assert_eq!(output.status.code(), Some(2), "case {name}"); + assert!(output.stdout.is_empty(), "case {name}"); + assert!( + String::from_utf8_lossy(&output.stderr).contains(expected), + "case {name} stderr:\n{}", + String::from_utf8_lossy(&output.stderr) + ); + } +} + +#[test] +fn unusable_bbox_locator_is_usage_error() { + let doc = document_example(); + let cases = [ + ( + "bbox-without-page", + r#"{ + "claims": [ + { + "kind": "presence", + "citation": { + "bbox": [7300, 10200, 8000, 11000] + } + } + ] + }"#, + "claim 1 citation bbox requires page unless another target locator is present", + ), + ( + "zero-width-bbox", + r#"{ + "claims": [ + { + "kind": "presence", + "citation": { + "page": "p0001", + "bbox": [7300, 10200, 7300, 11000] + } + } + ] + }"#, + "claim 1 citation bbox must have positive area", + ), + ]; + + for (name, json, expected) in cases { + let citations = temp_json(name, json); + let output = run_ethos(&[ + "verify", + doc.to_str().unwrap(), + "--citations", + citations.to_str().unwrap(), + ]); + + assert_eq!(output.status.code(), Some(2), "case {name}"); + assert!(output.stdout.is_empty(), "case {name}"); + assert!( + String::from_utf8_lossy(&output.stderr).contains(expected), + "case {name} stderr:\n{}", + String::from_utf8_lossy(&output.stderr) + ); + } +} + #[test] fn unknown_citation_fields_are_usage_errors() { let doc = document_example(); @@ -1301,6 +1434,7 @@ fn value_substrings_do_not_ground_against_native_ethos_text() { assert_eq!(report["checks"][0]["status"], "mismatch"); assert_eq!(report["checks"][0]["match_method"], "normalized_text"); + assert_eq!(report["checks"][0]["reason"], "text_mismatch"); assert_eq!(report["all_evidence_grounded"], false); } @@ -1381,7 +1515,9 @@ fn table_cell_mismatch_and_missing_cell_fail_gate() { assert_eq!(report["checks"][0]["status"], "mismatch"); assert_eq!(report["checks"][0]["match_method"], "table_cell_lookup"); + assert_eq!(report["checks"][0]["reason"], "text_mismatch"); assert_eq!(report["checks"][1]["status"], "not_found"); + assert_eq!(report["checks"][1]["reason"], "table_cell_not_found"); assert_eq!(report["all_evidence_grounded"], false); } @@ -1496,6 +1632,7 @@ fn table_cell_is_capability_blocked_when_tables_are_missing() { ]); assert_eq!(report["checks"][0]["status"], "capability_blocked"); + assert_eq!(report["checks"][0]["reason"], "missing_table_capability"); assert_eq!(report["grounding"]["capabilities"]["tables"], false); assert_eq!( report["capability_limits"], @@ -1572,6 +1709,7 @@ fn empty_tables_are_not_found_when_table_capability_is_declared() { ]) ); assert_eq!(report["checks"][0]["status"], "not_found"); + assert_eq!(report["checks"][0]["reason"], "table_not_found"); assert_eq!(report["all_evidence_grounded"], false); } @@ -1612,6 +1750,7 @@ fn foreign_source_without_fingerprint_blocks_fingerprint_pinned_citations() { ]) ); assert_eq!(report["checks"][0]["status"], "capability_blocked"); + assert_eq!(report["checks"][0]["reason"], "missing_source_fingerprint"); assert_eq!( report["checks"][0]["warnings"], serde_json::json!(["capability_limited"]) @@ -1670,6 +1809,7 @@ fn config_excluded_value_claim_is_unsupported() { ]); assert_eq!(report["checks"][0]["status"], "unsupported_claim_kind"); + assert_eq!(report["checks"][0]["reason"], "unsupported_claim_kind"); assert_eq!( report["unsupported_claim_kinds"], serde_json::json!(["value"]) @@ -1768,6 +1908,7 @@ fn bbox_presence_is_capability_blocked_when_coordinate_origin_is_unknown() { ]); assert_eq!(report["checks"][0]["status"], "capability_blocked"); + assert_eq!(report["checks"][0]["reason"], "unknown_coordinate_origin"); assert_eq!( report["capability_limits"], serde_json::json!([ diff --git a/crates/ethos-core/src/verify_types.rs b/crates/ethos-core/src/verify_types.rs index 9e6336e..3a06b60 100644 --- a/crates/ethos-core/src/verify_types.rs +++ b/crates/ethos-core/src/verify_types.rs @@ -65,6 +65,49 @@ pub enum CheckStatus { Error, } +/// Stable reason for a non-grounded check outcome. +/// +/// These are diagnostic labels only. They explain why the check did not ground +/// under the active literal verifier; they do not add semantic judgment. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum CheckReason { + /// Citation had no usable locator. + MissingLocator, + /// Textual claim kind was missing required text. + MissingRequiredText, + /// Claim kind is unsupported by the verifier or active config. + UnsupportedClaimKind, + /// Citation fingerprint differs from the grounding source fingerprint. + StaleFingerprint, + /// Citation was fingerprint-pinned but the source did not declare one. + MissingSourceFingerprint, + /// Span locator was used with a source that does not expose spans. + MissingSpanCapability, + /// Table-cell locator was used with a source that does not expose tables. + MissingTableCapability, + /// Bbox locator was used with a source whose coordinate origin is unknown. + UnknownCoordinateOrigin, + /// Element id was not found. + ElementNotFound, + /// Span id was not found. + SpanNotFound, + /// Page id was not found. + PageNotFound, + /// Bbox locator did not resolve to a grounding element. + BboxNotFound, + /// Bbox locator did not include a page locator. + MissingPageForBbox, + /// Table-cell citation did not include both table id and cell address. + MissingTableCellLocator, + /// Table id was not found. + TableNotFound, + /// Cell address was not found in the table. + TableCellNotFound, + /// Target text did not match the claimed text under the active matcher. + TextMismatch, +} + /// How evidence was matched. v1 is deliberately literal — nothing fuzzy, nothing semantic. #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] @@ -173,6 +216,9 @@ pub struct Check { pub claim: Claim, /// Outcome. pub status: CheckStatus, + /// Stable reason for a non-grounded outcome. + #[serde(skip_serializing_if = "Option::is_none")] + pub reason: Option, /// Method used. pub match_method: MatchMethod, /// True when grounding would require semantic judgment beyond the declared @@ -363,6 +409,7 @@ mod tests { }, }, status, + reason: None, match_method: MatchMethod::ExactText, semantic_unverified: semantic, evidence: None, diff --git a/crates/ethos-verify/src/lib.rs b/crates/ethos-verify/src/lib.rs index 70f1c8a..5d67f5f 100644 --- a/crates/ethos-verify/src/lib.rs +++ b/crates/ethos-verify/src/lib.rs @@ -24,8 +24,9 @@ use ethos_core::grounding::{ GroundingTable, PageGeometry, }; use ethos_core::verify_types::{ - compute_all_evidence_grounded, CapabilityLimit, Check, CheckStatus, Claim, ClaimKind, Evidence, - GroundingMeta, MatchMethod, TextNormalization, VerificationConfig, VerificationReport, + compute_all_evidence_grounded, CapabilityLimit, Check, CheckReason, CheckStatus, Claim, + ClaimKind, Evidence, GroundingMeta, MatchMethod, TextNormalization, VerificationConfig, + VerificationReport, }; use serde::{Deserialize, Serialize}; @@ -229,6 +230,7 @@ fn check_claim( id: check_id, claim, status: CheckStatus::Error, + reason: Some(CheckReason::MissingLocator), match_method: MatchMethod::None, semantic_unverified: false, evidence: None, @@ -242,6 +244,7 @@ fn check_claim( id: check_id, claim, status: CheckStatus::UnsupportedClaimKind, + reason: Some(CheckReason::UnsupportedClaimKind), match_method: MatchMethod::None, semantic_unverified: false, evidence: None, @@ -259,6 +262,7 @@ fn check_claim( id: check_id, claim, status: CheckStatus::Error, + reason: Some(CheckReason::MissingRequiredText), match_method: MatchMethod::None, semantic_unverified: false, evidence: None, @@ -271,6 +275,7 @@ fn check_claim( id: check_id, claim, status: CheckStatus::Stale, + reason: Some(CheckReason::StaleFingerprint), match_method: MatchMethod::None, semantic_unverified: false, evidence: None, @@ -284,6 +289,7 @@ fn check_claim( id: check_id, claim, status: CheckStatus::CapabilityBlocked, + reason: Some(CheckReason::MissingSourceFingerprint), match_method: MatchMethod::None, semantic_unverified: false, evidence: None, @@ -293,23 +299,37 @@ fn check_claim( let target = match resolve_target(index, &claim, config) { TargetResolution::Found(target) => target, - TargetResolution::NotFound => { + TargetResolution::NotFound(reason) => { return Check { id: check_id, claim, status: CheckStatus::NotFound, + reason: Some(reason), match_method: MatchMethod::None, semantic_unverified: false, evidence: None, warnings, }; } - TargetResolution::CapabilityBlocked => { + TargetResolution::Invalid(reason) => { + return Check { + id: check_id, + claim, + status: CheckStatus::Error, + reason: Some(reason), + match_method: MatchMethod::None, + semantic_unverified: false, + evidence: None, + warnings, + }; + } + TargetResolution::CapabilityBlocked(reason) => { push_warning(&mut warnings, WarningCode::CapabilityLimited); return Check { id: check_id, claim, status: CheckStatus::CapabilityBlocked, + reason: Some(reason), match_method: MatchMethod::None, semantic_unverified: false, evidence: None, @@ -319,12 +339,13 @@ fn check_claim( }; let evidence = make_evidence(source, &target, context.include_text, context.include_crops); - let (status, match_method) = + let (status, match_method, reason) = check_resolved_claim(claim.kind, claim.text.as_deref(), &target, config); Check { id: check_id, claim, status, + reason, match_method, semantic_unverified: false, evidence, @@ -337,7 +358,7 @@ fn check_resolved_claim( expected_text: Option<&str>, target: &FoundTarget, config: &VerificationConfig, -) -> (CheckStatus, MatchMethod) { +) -> (CheckStatus, MatchMethod, Option) { match kind { ClaimKind::Presence => check_presence_claim(), ClaimKind::Quote | ClaimKind::Value | ClaimKind::TableCell => { @@ -347,8 +368,8 @@ fn check_resolved_claim( } } -fn check_presence_claim() -> (CheckStatus, MatchMethod) { - (CheckStatus::Grounded, MatchMethod::PresenceOnly) +fn check_presence_claim() -> (CheckStatus, MatchMethod, Option) { + (CheckStatus::Grounded, MatchMethod::PresenceOnly, None) } fn check_text_claim( @@ -356,19 +377,19 @@ fn check_text_claim( expected_text: Option<&str>, target: &FoundTarget, config: &VerificationConfig, -) -> (CheckStatus, MatchMethod) { +) -> (CheckStatus, MatchMethod, Option) { let match_method = if target.from_table_cell { MatchMethod::TableCellLookup } else { text_match_method(kind, config) }; - let status = match (expected_text, target.text.as_deref()) { + let (status, reason) = match (expected_text, target.text.as_deref()) { (Some(expected), Some(actual)) if text_matches(kind, expected, actual, config) => { - CheckStatus::Grounded + (CheckStatus::Grounded, None) } - _ => CheckStatus::Mismatch, + _ => (CheckStatus::Mismatch, Some(CheckReason::TextMismatch)), }; - (status, match_method) + (status, match_method, reason) } fn is_supported_kind(kind: ClaimKind) -> bool { @@ -503,8 +524,9 @@ fn index_tables(tables: &[GroundingTable]) -> BTreeMap { enum TargetResolution { Found(FoundTarget), - NotFound, - CapabilityBlocked, + NotFound(CheckReason), + Invalid(CheckReason), + CapabilityBlocked(CheckReason), } fn resolve_target( @@ -512,19 +534,22 @@ fn resolve_target( claim: &Claim, config: &VerificationConfig, ) -> TargetResolution { - if claim.kind == ClaimKind::TableCell || claim.citation.table_id.is_some() { + if claim.kind == ClaimKind::TableCell + || claim.citation.table_id.is_some() + || claim.citation.cell.is_some() + { return resolve_table_cell(index, claim); } if let Some(span_id) = claim.citation.span_id.as_deref() { if !index.capabilities.spans { - return TargetResolution::CapabilityBlocked; + return TargetResolution::CapabilityBlocked(CheckReason::MissingSpanCapability); } return index .span(span_id) .map(target_from_span) .map(TargetResolution::Found) - .unwrap_or(TargetResolution::NotFound); + .unwrap_or(TargetResolution::NotFound(CheckReason::SpanNotFound)); } if let Some(element_id) = claim.citation.element_id.as_deref() { @@ -532,12 +557,12 @@ fn resolve_target( .element(element_id) .map(target_from_element) .map(TargetResolution::Found) - .unwrap_or(TargetResolution::NotFound); + .unwrap_or(TargetResolution::NotFound(CheckReason::ElementNotFound)); } if let (Some(page), Some(bbox)) = (claim.citation.page.as_deref(), claim.citation.bbox) { if index.capabilities.coordinate_origin == CoordinateOrigin::Unknown { - return TargetResolution::CapabilityBlocked; + return TargetResolution::CapabilityBlocked(CheckReason::UnknownCoordinateOrigin); } let tolerance = config.matching.bbox_containment_tolerance_q.unwrap_or(0); return index @@ -546,7 +571,11 @@ fn resolve_target( .find(|element| element.page == page && contains_bbox(element.bbox, bbox, tolerance)) .map(target_from_element) .map(TargetResolution::Found) - .unwrap_or(TargetResolution::NotFound); + .unwrap_or(TargetResolution::NotFound(CheckReason::BboxNotFound)); + } + + if claim.citation.bbox.is_some() { + return TargetResolution::Invalid(CheckReason::MissingPageForBbox); } if let Some(page) = claim.citation.page.as_deref() { @@ -562,10 +591,10 @@ fn resolve_target( from_table_cell: false, }) }) - .unwrap_or(TargetResolution::NotFound); + .unwrap_or(TargetResolution::NotFound(CheckReason::PageNotFound)); } - TargetResolution::NotFound + TargetResolution::NotFound(CheckReason::MissingLocator) } fn target_from_element(element: &GroundingElement) -> FoundTarget { @@ -588,19 +617,20 @@ fn target_from_span(span: &GroundingSpan) -> FoundTarget { fn resolve_table_cell(index: &SourceIndex, claim: &Claim) -> TargetResolution { let Some(table_id) = claim.citation.table_id.as_deref() else { - return TargetResolution::NotFound; + return TargetResolution::Invalid(CheckReason::MissingTableCellLocator); }; let Some(cell_ref) = claim.citation.cell else { - return TargetResolution::NotFound; + return TargetResolution::Invalid(CheckReason::MissingTableCellLocator); }; if !index.capabilities.tables { - return TargetResolution::CapabilityBlocked; + return TargetResolution::CapabilityBlocked(CheckReason::MissingTableCapability); } - index - .table(table_id) - .and_then(|table| target_from_table_cell(table, cell_ref.row, cell_ref.col)) + let Some(table) = index.table(table_id) else { + return TargetResolution::NotFound(CheckReason::TableNotFound); + }; + target_from_table_cell(table, cell_ref.row, cell_ref.col) .map(TargetResolution::Found) - .unwrap_or(TargetResolution::NotFound) + .unwrap_or(TargetResolution::NotFound(CheckReason::TableCellNotFound)) } fn target_from_table_cell(table: &GroundingTable, row: u32, col: u32) -> Option { @@ -935,7 +965,9 @@ mod tests { assert!(!report.all_evidence_grounded); assert_eq!(report.checks[0].status, CheckStatus::Mismatch); + assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch)); assert_eq!(report.checks[1].status, CheckStatus::NotFound); + assert_eq!(report.checks[1].reason, Some(CheckReason::ElementNotFound)); } #[test] @@ -976,6 +1008,7 @@ mod tests { assert!(!report.all_evidence_grounded); assert_eq!(report.checks[0].status, CheckStatus::Mismatch); + assert_eq!(report.checks[0].reason, Some(CheckReason::TextMismatch)); assert_eq!(report.checks[0].match_method, MatchMethod::NormalizedText); } @@ -1026,6 +1059,10 @@ mod tests { assert!(!report.all_evidence_grounded); assert_eq!(report.checks[0].status, CheckStatus::NotFound); + assert_eq!( + report.checks[0].reason, + Some(CheckReason::TableCellNotFound) + ); assert_eq!(report.checks[0].match_method, MatchMethod::None); } @@ -1121,6 +1158,10 @@ mod tests { ); assert_eq!(report.checks[0].status, CheckStatus::CapabilityBlocked); + assert_eq!( + report.checks[0].reason, + Some(CheckReason::MissingTableCapability) + ); assert_eq!( report.capability_limits, vec![CapabilityLimit::MissingTables] @@ -1235,6 +1276,7 @@ mod tests { assert!(report.fingerprint_stale); assert!(!report.all_evidence_grounded); assert_eq!(report.checks[0].status, CheckStatus::Stale); + assert_eq!(report.checks[0].reason, Some(CheckReason::StaleFingerprint)); } #[test] @@ -1254,6 +1296,10 @@ mod tests { assert!(!report.all_evidence_grounded); assert_eq!(report.checks[0].status, CheckStatus::UnsupportedClaimKind); + assert_eq!( + report.checks[0].reason, + Some(CheckReason::UnsupportedClaimKind) + ); assert_eq!(report.unsupported_claim_kinds, vec!["region"]); } @@ -1285,6 +1331,10 @@ mod tests { assert!(!report.all_evidence_grounded); assert_eq!(report.checks[0].status, CheckStatus::CapabilityBlocked); + assert_eq!( + report.checks[0].reason, + Some(CheckReason::MissingSpanCapability) + ); assert_eq!( report.capability_limits, vec![ @@ -1335,6 +1385,10 @@ mod tests { assert!(!report.fingerprint_stale); assert!(!report.all_evidence_grounded); assert_eq!(report.checks[0].status, CheckStatus::CapabilityBlocked); + assert_eq!( + report.checks[0].reason, + Some(CheckReason::MissingSourceFingerprint) + ); assert_eq!( report.capability_limits, vec![CapabilityLimit::MissingFingerprint] @@ -1362,6 +1416,10 @@ mod tests { assert!(!report.all_evidence_grounded); assert_eq!(report.checks[0].status, CheckStatus::Error); + assert_eq!( + report.checks[0].reason, + Some(CheckReason::MissingRequiredText) + ); assert_eq!(report.checks[0].match_method, MatchMethod::None); } diff --git a/examples/verify/README.md b/examples/verify/README.md index d1679c2..067b13f 100644 --- a/examples/verify/README.md +++ b/examples/verify/README.md @@ -36,7 +36,8 @@ ethos verify schemas/examples/document.example.json \ ``` Expected result: `fingerprint_stale: true`, check status `stale`, and -`all_evidence_grounded: false`. +`all_evidence_grounded: false`. Non-grounded checks include a `reason` label such as +`stale_fingerprint`, `text_mismatch`, or `missing_table_capability`. ## Capability-Limited Table Claim @@ -56,8 +57,8 @@ subset. They are not real pinned OpenDataLoader artifacts. Golden reports live i Real pinned OpenDataLoader output lives under `fixtures/foreign/opendataloader/real/`. That package includes both a grounded citation set and an ungrounded citation set so -`make verify-alpha` proves the accept and reject paths against a real foreign parser output. -The same target also runs native Ethos verification with `--crop-dir`, proving that emitted -crop descriptor JSON is deterministic and conforms to `schemas/ethos-crop-descriptor.schema.json`. +`make verify-alpha` exercises the accept and reject paths against a real foreign parser output. +The same target also runs native Ethos verification with `--crop-dir`, checking that emitted +crop descriptor JSON conforms to `schemas/ethos-crop-descriptor.schema.json`. When a native Ethos document is bound to its original PDF, `--crop-source-pdf` additionally emits PNG crop artifacts referenced and hashed from those descriptors. diff --git a/examples/verify/goldens/native_stale_report.json b/examples/verify/goldens/native_stale_report.json index ca0fc2d..196b766 100644 --- a/examples/verify/goldens/native_stale_report.json +++ b/examples/verify/goldens/native_stale_report.json @@ -11,6 +11,7 @@ }, "id": "v0001", "match_method": "none", + "reason": "stale_fingerprint", "semantic_unverified": false, "status": "stale", "warnings": [] diff --git a/examples/verify/goldens/opendataloader_capability_limited_report.json b/examples/verify/goldens/opendataloader_capability_limited_report.json index 72ccc69..9f3bcb7 100644 --- a/examples/verify/goldens/opendataloader_capability_limited_report.json +++ b/examples/verify/goldens/opendataloader_capability_limited_report.json @@ -22,6 +22,7 @@ }, "id": "v0001", "match_method": "none", + "reason": "missing_table_capability", "semantic_unverified": false, "status": "capability_blocked", "warnings": [ diff --git a/fixtures/foreign/opendataloader/real/expected.ungrounded.verification_report.json b/fixtures/foreign/opendataloader/real/expected.ungrounded.verification_report.json index aa3ac82..7147619 100644 --- a/fixtures/foreign/opendataloader/real/expected.ungrounded.verification_report.json +++ b/fixtures/foreign/opendataloader/real/expected.ungrounded.verification_report.json @@ -1 +1 @@ -{"all_evidence_grounded":false,"capability_limits":["missing_fingerprint","missing_spans","missing_char_offsets","missing_tables","unknown_coordinate_origin"],"checks":[{"claim":{"citation":{"element_id":"odl-2"},"kind":"value","text":"Lorem ipsum dolor sit amet"},"evidence":{"bbox":[8503,56794,50231,65976],"page":"page-1","text":"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."},"id":"v0001","match_method":"normalized_text","semantic_unverified":false,"status":"mismatch","warnings":[]}],"fingerprint_stale":false,"grounding":{"capabilities":{"char_offsets":false,"coordinate_origin":"unknown","crop_support":false,"fingerprint":false,"spans":false,"tables":false},"parser":{"adapter":"opendataloader-json","adapter_version":"0.1.0","name":"opendataloader-pdf","version":"unknown"}},"schema_version":"1.0.0","unsupported_claim_kinds":[],"verification_config_sha256":"4bb224166a04a25fed2dd3ecdb9638ddcc5b398658532b73f1c0547e4983d0b0","warnings":["capability_limited"]} +{"all_evidence_grounded":false,"capability_limits":["missing_fingerprint","missing_spans","missing_char_offsets","missing_tables","unknown_coordinate_origin"],"checks":[{"claim":{"citation":{"element_id":"odl-2"},"kind":"value","text":"Lorem ipsum dolor sit amet"},"evidence":{"bbox":[8503,56794,50231,65976],"page":"page-1","text":"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."},"id":"v0001","match_method":"normalized_text","reason":"text_mismatch","semantic_unverified":false,"status":"mismatch","warnings":[]}],"fingerprint_stale":false,"grounding":{"capabilities":{"char_offsets":false,"coordinate_origin":"unknown","crop_support":false,"fingerprint":false,"spans":false,"tables":false},"parser":{"adapter":"opendataloader-json","adapter_version":"0.1.0","name":"opendataloader-pdf","version":"unknown"}},"schema_version":"1.0.0","unsupported_claim_kinds":[],"verification_config_sha256":"4bb224166a04a25fed2dd3ecdb9638ddcc5b398658532b73f1c0547e4983d0b0","warnings":["capability_limited"]} diff --git a/schemas/ethos-citations.schema.json b/schemas/ethos-citations.schema.json index 6937456..2b89bf8 100644 --- a/schemas/ethos-citations.schema.json +++ b/schemas/ethos-citations.schema.json @@ -63,6 +63,25 @@ "description": "Where the claim says the evidence lives. At least one locator is required; id formats follow the grounding source.", "additionalProperties": false, "minProperties": 1, + "dependentRequired": { + "table_id": ["cell"], + "cell": ["table_id"] + }, + "allOf": [ + { + "if": { + "required": ["bbox"] + }, + "then": { + "anyOf": [ + { "required": ["page"] }, + { "required": ["element_id"] }, + { "required": ["span_id"] }, + { "required": ["table_id"] } + ] + } + } + ], "properties": { "page": { "type": "string" }, "element_id": { "type": "string" }, diff --git a/schemas/ethos-verification-report.schema.json b/schemas/ethos-verification-report.schema.json index 57442d9..bf5dedb 100644 --- a/schemas/ethos-verification-report.schema.json +++ b/schemas/ethos-verification-report.schema.json @@ -116,6 +116,10 @@ "status": { "enum": ["grounded", "not_found", "mismatch", "stale", "unsupported_claim_kind", "capability_blocked", "error"] }, + "reason": { + "$ref": "#/$defs/check_reason", + "description": "Stable diagnostic reason for a non-grounded check outcome. Omitted for grounded checks." + }, "match_method": { "enum": ["exact_text", "normalized_text", "exact_text_contains", "normalized_text_contains", "table_cell_lookup", "bbox_containment", "presence_only", "none"], "description": "How evidence was matched. Equality methods require the target text to equal the claim text after the configured normalization. '*_contains' methods are explicit substring containment and are used only for quote evidence inside a larger target. 'normalized_text' uses ONLY the whitespace rule pinned in the verification config; nothing fuzzier exists in v1." @@ -153,6 +157,27 @@ "$defs": { "fingerprint": { "type": "string", "pattern": "^sha256:[0-9a-f]{64}$" }, "bbox": { "type": "array", "items": { "type": "integer" }, "minItems": 4, "maxItems": 4 }, + "check_reason": { + "enum": [ + "missing_locator", + "missing_required_text", + "unsupported_claim_kind", + "stale_fingerprint", + "missing_source_fingerprint", + "missing_span_capability", + "missing_table_capability", + "unknown_coordinate_origin", + "element_not_found", + "span_not_found", + "page_not_found", + "bbox_not_found", + "missing_page_for_bbox", + "missing_table_cell_locator", + "table_not_found", + "table_cell_not_found", + "text_mismatch" + ] + }, "warning_code": { "enum": [ "low_confidence_reading_order",