Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions crates/ethos-cli/src/cmd/verify.rs
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,41 @@ fn validate_citation_input(
idx + 1
)));
}
if claim.citation.table_id.is_some() != claim.citation.cell.is_some() {
return Err(Failure::Usage(format!(
"claim {} citation table_id and cell must be provided together",
idx + 1
)));
}
if claim.kind == ClaimKind::TableCell
&& (claim.citation.table_id.is_none() || claim.citation.cell.is_none())
{
return Err(Failure::Usage(format!(
"claim {} table_cell citation must include table_id and cell",
idx + 1
)));
}
if claim.citation.bbox.is_some()
&& claim.citation.page.is_none()
&& claim.citation.element_id.is_none()
&& claim.citation.span_id.is_none()
&& claim.citation.table_id.is_none()
{
return Err(Failure::Usage(format!(
"claim {} citation bbox requires page unless another target locator is present",
idx + 1
)));
}
if claim
.citation
.bbox
.is_some_and(|bbox| bbox[0] >= bbox[2] || bbox[1] >= bbox[3])
{
return Err(Failure::Usage(format!(
"claim {} citation bbox must have positive area",
idx + 1
)));
}
if matches!(
claim.kind,
ClaimKind::Quote | ClaimKind::Value | ClaimKind::TableCell
Expand Down Expand Up @@ -774,6 +809,7 @@ mod tests {
},
},
status: CheckStatus::Grounded,
reason: None,
match_method: MatchMethod::ExactTextContains,
semantic_unverified: false,
evidence: Some(Evidence {
Expand All @@ -799,6 +835,7 @@ mod tests {
},
},
status: CheckStatus::Grounded,
reason: None,
match_method: MatchMethod::PresenceOnly,
semantic_unverified: false,
evidence: Some(Evidence {
Expand Down
141 changes: 141 additions & 0 deletions crates/ethos-cli/tests/verify.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ fn real_opendataloader_ungrounded_fixture_verifies_against_golden() {
assert_eq!(report["all_evidence_grounded"], false);
assert_eq!(report["checks"][0]["status"], "mismatch");
assert_eq!(report["checks"][0]["match_method"], "normalized_text");
assert_eq!(report["checks"][0]["reason"], "text_mismatch");

let gated = run_ethos(&[
"verify",
Expand Down Expand Up @@ -276,6 +277,7 @@ fn fail_on_ungrounded_exits_one_after_writing_stale_report() {
assert_eq!(report["fingerprint_stale"], true);
assert_eq!(report["all_evidence_grounded"], false);
assert_eq!(report["checks"][0]["status"], "stale");
assert_eq!(report["checks"][0]["reason"], "stale_fingerprint");
}

#[test]
Expand All @@ -300,6 +302,7 @@ fn fail_on_ungrounded_exits_one_with_stdout_report_for_capability_blocked_source
let report: Value = serde_json::from_slice(&output.stdout).expect("stdout is JSON");
assert_eq!(report["all_evidence_grounded"], false);
assert_eq!(report["checks"][0]["status"], "capability_blocked");
assert_eq!(report["checks"][0]["reason"], "missing_table_capability");
assert!(report["capability_limits"]
.as_array()
.unwrap()
Expand Down Expand Up @@ -601,6 +604,7 @@ fn native_ethos_verify_produces_non_empty_checks() {
assert_eq!(report["checks"][1]["status"], "grounded");
assert_eq!(report["checks"][1]["match_method"], "table_cell_lookup");
assert_eq!(report["checks"][2]["status"], "mismatch");
assert_eq!(report["checks"][2]["reason"], "text_mismatch");
assert_eq!(report["all_evidence_grounded"], false);
}

Expand Down Expand Up @@ -649,6 +653,7 @@ fn opendataloader_verify_adapter_produces_capability_aware_report() {
assert_eq!(report["checks"][1]["match_method"], "table_cell_lookup");
assert_eq!(report["checks"][1]["evidence"]["text"], "$12.4M");
assert_eq!(report["checks"][2]["status"], "mismatch");
assert_eq!(report["checks"][2]["reason"], "text_mismatch");
assert_eq!(report["all_evidence_grounded"], false);
}

Expand Down Expand Up @@ -721,6 +726,7 @@ fn stale_fingerprint_is_report_level_failure() {

assert_eq!(report["fingerprint_stale"], true);
assert_eq!(report["checks"][0]["status"], "stale");
assert_eq!(report["checks"][0]["reason"], "stale_fingerprint");
assert_eq!(report["all_evidence_grounded"], false);
}

Expand Down Expand Up @@ -808,6 +814,133 @@ fn invalid_citation_shape_is_usage_error() {
.contains("claim 1 citation must contain at least one locator"));
}

#[test]
fn incomplete_table_cell_locator_is_usage_error() {
let doc = document_example();
let cases = [
(
"table-id-without-cell",
r#"{
"claims": [
{
"kind": "table_cell",
"text": "$12.4M",
"citation": {
"table_id": "t0001"
}
}
]
}"#,
"claim 1 citation table_id and cell must be provided together",
),
(
"cell-without-table-id",
r#"{
"claims": [
{
"kind": "table_cell",
"text": "$12.4M",
"citation": {
"cell": {
"row": 1,
"col": 1
}
}
}
]
}"#,
"claim 1 citation table_id and cell must be provided together",
),
(
"table-cell-kind-without-table-cell-locator",
r#"{
"claims": [
{
"kind": "table_cell",
"text": "$12.4M",
"citation": {
"element_id": "e000002"
}
}
]
}"#,
"claim 1 table_cell citation must include table_id and cell",
),
];

for (name, json, expected) in cases {
let citations = temp_json(name, json);
let output = run_ethos(&[
"verify",
doc.to_str().unwrap(),
"--citations",
citations.to_str().unwrap(),
]);

assert_eq!(output.status.code(), Some(2), "case {name}");
assert!(output.stdout.is_empty(), "case {name}");
assert!(
String::from_utf8_lossy(&output.stderr).contains(expected),
"case {name} stderr:\n{}",
String::from_utf8_lossy(&output.stderr)
);
}
}

#[test]
fn unusable_bbox_locator_is_usage_error() {
let doc = document_example();
let cases = [
(
"bbox-without-page",
r#"{
"claims": [
{
"kind": "presence",
"citation": {
"bbox": [7300, 10200, 8000, 11000]
}
}
]
}"#,
"claim 1 citation bbox requires page unless another target locator is present",
),
(
"zero-width-bbox",
r#"{
"claims": [
{
"kind": "presence",
"citation": {
"page": "p0001",
"bbox": [7300, 10200, 7300, 11000]
}
}
]
}"#,
"claim 1 citation bbox must have positive area",
),
];

for (name, json, expected) in cases {
let citations = temp_json(name, json);
let output = run_ethos(&[
"verify",
doc.to_str().unwrap(),
"--citations",
citations.to_str().unwrap(),
]);

assert_eq!(output.status.code(), Some(2), "case {name}");
assert!(output.stdout.is_empty(), "case {name}");
assert!(
String::from_utf8_lossy(&output.stderr).contains(expected),
"case {name} stderr:\n{}",
String::from_utf8_lossy(&output.stderr)
);
}
}

#[test]
fn unknown_citation_fields_are_usage_errors() {
let doc = document_example();
Expand Down Expand Up @@ -1301,6 +1434,7 @@ fn value_substrings_do_not_ground_against_native_ethos_text() {

assert_eq!(report["checks"][0]["status"], "mismatch");
assert_eq!(report["checks"][0]["match_method"], "normalized_text");
assert_eq!(report["checks"][0]["reason"], "text_mismatch");
assert_eq!(report["all_evidence_grounded"], false);
}

Expand Down Expand Up @@ -1381,7 +1515,9 @@ fn table_cell_mismatch_and_missing_cell_fail_gate() {

assert_eq!(report["checks"][0]["status"], "mismatch");
assert_eq!(report["checks"][0]["match_method"], "table_cell_lookup");
assert_eq!(report["checks"][0]["reason"], "text_mismatch");
assert_eq!(report["checks"][1]["status"], "not_found");
assert_eq!(report["checks"][1]["reason"], "table_cell_not_found");
assert_eq!(report["all_evidence_grounded"], false);
}

Expand Down Expand Up @@ -1496,6 +1632,7 @@ fn table_cell_is_capability_blocked_when_tables_are_missing() {
]);

assert_eq!(report["checks"][0]["status"], "capability_blocked");
assert_eq!(report["checks"][0]["reason"], "missing_table_capability");
assert_eq!(report["grounding"]["capabilities"]["tables"], false);
assert_eq!(
report["capability_limits"],
Expand Down Expand Up @@ -1572,6 +1709,7 @@ fn empty_tables_are_not_found_when_table_capability_is_declared() {
])
);
assert_eq!(report["checks"][0]["status"], "not_found");
assert_eq!(report["checks"][0]["reason"], "table_not_found");
assert_eq!(report["all_evidence_grounded"], false);
}

Expand Down Expand Up @@ -1612,6 +1750,7 @@ fn foreign_source_without_fingerprint_blocks_fingerprint_pinned_citations() {
])
);
assert_eq!(report["checks"][0]["status"], "capability_blocked");
assert_eq!(report["checks"][0]["reason"], "missing_source_fingerprint");
assert_eq!(
report["checks"][0]["warnings"],
serde_json::json!(["capability_limited"])
Expand Down Expand Up @@ -1670,6 +1809,7 @@ fn config_excluded_value_claim_is_unsupported() {
]);

assert_eq!(report["checks"][0]["status"], "unsupported_claim_kind");
assert_eq!(report["checks"][0]["reason"], "unsupported_claim_kind");
assert_eq!(
report["unsupported_claim_kinds"],
serde_json::json!(["value"])
Expand Down Expand Up @@ -1768,6 +1908,7 @@ fn bbox_presence_is_capability_blocked_when_coordinate_origin_is_unknown() {
]);

assert_eq!(report["checks"][0]["status"], "capability_blocked");
assert_eq!(report["checks"][0]["reason"], "unknown_coordinate_origin");
assert_eq!(
report["capability_limits"],
serde_json::json!([
Expand Down
47 changes: 47 additions & 0 deletions crates/ethos-core/src/verify_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,49 @@ pub enum CheckStatus {
Error,
}

/// Stable reason for a non-grounded check outcome.
///
/// These are diagnostic labels only. They explain why the check did not ground
/// under the active literal verifier; they do not add semantic judgment.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum CheckReason {
/// Citation had no usable locator.
MissingLocator,
/// Textual claim kind was missing required text.
MissingRequiredText,
/// Claim kind is unsupported by the verifier or active config.
UnsupportedClaimKind,
/// Citation fingerprint differs from the grounding source fingerprint.
StaleFingerprint,
/// Citation was fingerprint-pinned but the source did not declare one.
MissingSourceFingerprint,
/// Span locator was used with a source that does not expose spans.
MissingSpanCapability,
/// Table-cell locator was used with a source that does not expose tables.
MissingTableCapability,
/// Bbox locator was used with a source whose coordinate origin is unknown.
UnknownCoordinateOrigin,
/// Element id was not found.
ElementNotFound,
/// Span id was not found.
SpanNotFound,
/// Page id was not found.
PageNotFound,
/// Bbox locator did not resolve to a grounding element.
BboxNotFound,
/// Bbox locator did not include a page locator.
MissingPageForBbox,
/// Table-cell citation did not include both table id and cell address.
MissingTableCellLocator,
/// Table id was not found.
TableNotFound,
/// Cell address was not found in the table.
TableCellNotFound,
/// Target text did not match the claimed text under the active matcher.
TextMismatch,
}

/// How evidence was matched. v1 is deliberately literal — nothing fuzzy, nothing semantic.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
Expand Down Expand Up @@ -173,6 +216,9 @@ pub struct Check {
pub claim: Claim,
/// Outcome.
pub status: CheckStatus,
/// Stable reason for a non-grounded outcome.
#[serde(skip_serializing_if = "Option::is_none")]
pub reason: Option<CheckReason>,
/// Method used.
pub match_method: MatchMethod,
/// True when grounding would require semantic judgment beyond the declared
Expand Down Expand Up @@ -363,6 +409,7 @@ mod tests {
},
},
status,
reason: None,
match_method: MatchMethod::ExactText,
semantic_unverified: semantic,
evidence: None,
Expand Down
Loading
Loading