diff --git a/Makefile b/Makefile index 76110dc..d6052e5 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,6 @@ verify-alpha: $(ETHOS_BIN) cargo test --locked -p ethos-grounding-opendataloader-json cargo test --locked -p ethos-cli --test verify $(MAKE) verify-alpha-tree - $(PYTHON) -c 'from jsonschema import Draft202012Validator' $(PYTHON) schemas/validate_examples.py $(PYTHON) examples/verify/check_verify_alpha.py --repo-root $(ROOT) --ethos-bin $(ETHOS_BIN) --out-dir $(VERIFY_ALPHA_OUT) git diff --check diff --git a/crates/ethos-cli/src/cmd/verify.rs b/crates/ethos-cli/src/cmd/verify.rs index acfd4a4..3c55619 100644 --- a/crates/ethos-cli/src/cmd/verify.rs +++ b/crates/ethos-cli/src/cmd/verify.rs @@ -11,7 +11,8 @@ use ethos_core::grounding::{ }; use ethos_core::model::Document; use ethos_core::verify_types::{ - ClaimKind, EvidenceOptions, VerificationConfig, VerificationReport, + CapabilityLimit, CheckReason, CheckStatus, ClaimKind, EvidenceOptions, MatchMethod, + VerificationConfig, VerificationReport, }; use ethos_grounding_opendataloader_json::OdlJsonSource; use ethos_pdf::PdfiumBackend; @@ -19,6 +20,7 @@ use ethos_verify::CitationInput; use crate::{ default_max_input_bytes, read_document, read_file_limited, write_output, Failure, VerifyArgs, + VerifyOutputFormat, }; pub(crate) fn verify(args: VerifyArgs) -> Result<(), Failure> { @@ -78,7 +80,7 @@ pub(crate) fn verify(args: VerifyArgs) -> Result<(), Failure> { if let Some(crop_dir) = args.crop_dir.as_deref() { write_crop_artifacts(crop_dir, &report, crop_source_pdf.as_ref())?; } - return write_report(args.out, report, args.fail_on_ungrounded); + return write_report(args.out, args.format, report, args.fail_on_ungrounded); } None => ethos_verify::verify_claims(&doc, citations, &config, config_sha256), } @@ -98,18 +100,19 @@ pub(crate) fn verify(args: VerifyArgs) -> Result<(), Failure> { } }; - write_report(args.out, report, args.fail_on_ungrounded) + write_report(args.out, args.format, report, args.fail_on_ungrounded) } fn write_report( out: Option, + format: VerifyOutputFormat, report: VerificationReport, fail_on_ungrounded: bool, ) -> Result<(), Failure> { - let value = serde_json::to_value(&report).map_err(|e| EthosError::internal(e.to_string()))?; - let mut bytes = - ethos_core::c14n::c14n_bytes(&value).map_err(|e| EthosError::internal(e.message))?; - bytes.push(b'\n'); + let bytes = match format { + VerifyOutputFormat::Json => verification_report_json_bytes(&report)?, + VerifyOutputFormat::Summary => verification_report_summary_bytes(&report)?, + }; let all_evidence_grounded = report.all_evidence_grounded; write_output(out, &bytes)?; if fail_on_ungrounded && !all_evidence_grounded { @@ -118,6 +121,227 @@ fn write_report( Ok(()) } +fn verification_report_json_bytes(report: &VerificationReport) -> Result, Failure> { + let value = serde_json::to_value(report).map_err(|e| EthosError::internal(e.to_string()))?; + let mut bytes = + ethos_core::c14n::c14n_bytes(&value).map_err(|e| EthosError::internal(e.message))?; + bytes.push(b'\n'); + Ok(bytes) +} + +fn verification_report_summary_bytes(report: &VerificationReport) -> Result, Failure> { + let mut out = String::new(); + out.push_str("ethos verify summary\n"); + out.push_str(&format!("schema_version: {}\n", report.schema_version)); + out.push_str(&format!( + "all_evidence_grounded: {}\n", + report.all_evidence_grounded + )); + out.push_str(&format!( + "fingerprint_stale: {}\n", + report.fingerprint_stale + )); + if let Some(fingerprint) = report.document_fingerprint.as_deref() { + out.push_str(&format!("document_fingerprint: {fingerprint}\n")); + } + out.push_str(&format!( + "grounding: {} {}\n", + report.grounding.parser.name, report.grounding.parser.version + )); + if let Some(adapter) = report.grounding.parser.adapter.as_deref() { + out.push_str(&format!("grounding_adapter: {adapter}\n")); + } + if let Some(adapter_version) = report.grounding.parser.adapter_version.as_deref() { + out.push_str(&format!("grounding_adapter_version: {adapter_version}\n")); + } + out.push_str(&format!("checks_total: {}\n", report.checks.len())); + for status in [ + CheckStatus::Grounded, + CheckStatus::NotFound, + CheckStatus::Mismatch, + CheckStatus::Stale, + CheckStatus::UnsupportedClaimKind, + CheckStatus::CapabilityBlocked, + CheckStatus::Error, + ] { + let count = report + .checks + .iter() + .filter(|check| check.status == status) + .count(); + out.push_str(&format!("checks_{}: {count}\n", status_label(status))); + } + out.push_str(&format!( + "capability_limits: {}\n", + list_labels(&report.capability_limits, capability_limit_label) + )); + out.push_str(&format!( + "unsupported_claim_kinds: {}\n", + if report.unsupported_claim_kinds.is_empty() { + "none".to_string() + } else { + report.unsupported_claim_kinds.join(",") + } + )); + out.push_str(&format!( + "warnings: {}\n", + serde_label_list(&report.warnings)? + )); + out.push_str("non_grounded_checks:\n"); + let mut non_grounded = report + .checks + .iter() + .filter(|check| check.status != CheckStatus::Grounded) + .peekable(); + if non_grounded.peek().is_none() { + out.push_str("- none\n"); + } else { + for check in non_grounded { + out.push_str(&format!( + "- {} status={} reason={} kind={} locator={} match_method={}\n", + check.id, + status_label(check.status), + check + .reason + .map(check_reason_label) + .unwrap_or("unspecified"), + claim_kind_label(check.claim.kind), + citation_locator_label(&check.claim.citation), + match_method_label(check.match_method) + )); + } + } + Ok(out.into_bytes()) +} + +fn status_label(status: CheckStatus) -> &'static str { + match status { + CheckStatus::Grounded => "grounded", + CheckStatus::NotFound => "not_found", + CheckStatus::Mismatch => "mismatch", + CheckStatus::Stale => "stale", + CheckStatus::UnsupportedClaimKind => "unsupported_claim_kind", + CheckStatus::CapabilityBlocked => "capability_blocked", + CheckStatus::Error => "error", + } +} + +fn check_reason_label(reason: CheckReason) -> &'static str { + match reason { + CheckReason::MissingLocator => "missing_locator", + CheckReason::MissingRequiredText => "missing_required_text", + CheckReason::UnsupportedClaimKind => "unsupported_claim_kind", + CheckReason::StaleFingerprint => "stale_fingerprint", + CheckReason::MissingSourceFingerprint => "missing_source_fingerprint", + CheckReason::MissingSpanCapability => "missing_span_capability", + CheckReason::MissingTableCapability => "missing_table_capability", + CheckReason::UnknownCoordinateOrigin => "unknown_coordinate_origin", + CheckReason::ElementNotFound => "element_not_found", + CheckReason::SpanNotFound => "span_not_found", + CheckReason::PageNotFound => "page_not_found", + CheckReason::BboxNotFound => "bbox_not_found", + CheckReason::MissingPageForBbox => "missing_page_for_bbox", + CheckReason::MissingTableCellLocator => "missing_table_cell_locator", + CheckReason::TableNotFound => "table_not_found", + CheckReason::TableCellNotFound => "table_cell_not_found", + CheckReason::TextMismatch => "text_mismatch", + } +} + +fn capability_limit_label(limit: CapabilityLimit) -> &'static str { + match limit { + CapabilityLimit::MissingSpans => "missing_spans", + CapabilityLimit::MissingCharOffsets => "missing_char_offsets", + CapabilityLimit::MissingTables => "missing_tables", + CapabilityLimit::MissingFingerprint => "missing_fingerprint", + CapabilityLimit::UnknownCoordinateOrigin => "unknown_coordinate_origin", + CapabilityLimit::MissingCropSupport => "missing_crop_support", + } +} + +fn claim_kind_label(kind: ClaimKind) -> &'static str { + match kind { + ClaimKind::Quote => "quote", + ClaimKind::Value => "value", + ClaimKind::Presence => "presence", + ClaimKind::TableCell => "table_cell", + ClaimKind::Region => "region", + ClaimKind::Other => "other", + } +} + +fn match_method_label(method: MatchMethod) -> &'static str { + match method { + MatchMethod::ExactText => "exact_text", + MatchMethod::NormalizedText => "normalized_text", + MatchMethod::ExactTextContains => "exact_text_contains", + MatchMethod::NormalizedTextContains => "normalized_text_contains", + MatchMethod::TableCellLookup => "table_cell_lookup", + MatchMethod::BboxContainment => "bbox_containment", + MatchMethod::PresenceOnly => "presence_only", + MatchMethod::None => "none", + } +} + +fn citation_locator_label(citation: ðos_core::verify_types::Citation) -> String { + let mut parts = Vec::new(); + if let Some(page) = citation.page.as_deref() { + parts.push(format!("page:{page}")); + } + if let Some(element_id) = citation.element_id.as_deref() { + parts.push(format!("element_id:{element_id}")); + } + if let Some(span_id) = citation.span_id.as_deref() { + parts.push(format!("span_id:{span_id}")); + } + if let Some(table_id) = citation.table_id.as_deref() { + parts.push(format!("table_id:{table_id}")); + } + if let Some(cell) = citation.cell { + parts.push(format!("cell:{},{}", cell.row, cell.col)); + } + if let Some(bbox) = citation.bbox { + parts.push(format!( + "bbox:[{},{},{},{}]", + bbox[0], bbox[1], bbox[2], bbox[3] + )); + } + if parts.is_empty() { + "none".to_string() + } else { + parts.join(";") + } +} + +fn list_labels(values: &[T], label: fn(T) -> &'static str) -> String { + if values.is_empty() { + "none".to_string() + } else { + values + .iter() + .map(|value| label(*value)) + .collect::>() + .join(",") + } +} + +fn serde_label_list(values: &[T]) -> Result { + if values.is_empty() { + return Ok("none".to_string()); + } + let mut labels = Vec::new(); + for value in values { + let value = serde_json::to_value(value).map_err(|e| EthosError::internal(e.to_string()))?; + let Some(label) = value.as_str() else { + return Err(Failure::Ethos(EthosError::internal( + "summary label serialization did not produce a string", + ))); + }; + labels.push(label.to_string()); + } + Ok(labels.join(",")) +} + struct CropSourcePdf { bytes: Vec, fingerprint: String, diff --git a/crates/ethos-cli/src/main.rs b/crates/ethos-cli/src/main.rs index dc88e66..f190d03 100644 --- a/crates/ethos-cli/src/main.rs +++ b/crates/ethos-cli/src/main.rs @@ -177,6 +177,9 @@ pub(crate) struct VerifyArgs { /// Output path for verification_report.json (default: stdout) #[arg(long)] pub(crate) out: Option, + /// Output format. JSON is the canonical report; summary is a compact text view. + #[arg(long, value_enum, default_value_t = VerifyOutputFormat::Json)] + pub(crate) format: VerifyOutputFormat, /// Directory for crop descriptor artifacts. With --crop-source-pdf, also writes rendered PNG /// crops bound by descriptor hashes. #[arg(long)] @@ -190,6 +193,12 @@ pub(crate) struct VerifyArgs { pub(crate) fail_on_ungrounded: bool, } +#[derive(Clone, Copy, ValueEnum)] +pub(crate) enum VerifyOutputFormat { + Json, + Summary, +} + /// CLI failure: stable error code or usage error, rendered deterministically. pub(crate) enum Failure { Ethos(EthosError), diff --git a/crates/ethos-cli/tests/verify.rs b/crates/ethos-cli/tests/verify.rs index 1783b01..375f2f5 100644 --- a/crates/ethos-cli/tests/verify.rs +++ b/crates/ethos-cli/tests/verify.rs @@ -310,6 +310,65 @@ fn fail_on_ungrounded_exits_one_with_stdout_report_for_capability_blocked_source .any(|limit| limit == "missing_tables")); } +#[test] +fn summary_format_reports_reason_before_fail_on_ungrounded_exit() { + let root = repo_root(); + let output = run_ethos(&[ + "verify", + root.join("examples/verify/opendataloader_no_tables.json") + .to_str() + .unwrap(), + "--grounding", + "opendataloader-json", + "--citations", + root.join("examples/verify/opendataloader_table_cell_citations.json") + .to_str() + .unwrap(), + "--format", + "summary", + "--fail-on-ungrounded", + ]); + + assert_eq!(output.status.code(), Some(1)); + assert_eq!(output.stderr, b""); + assert!( + serde_json::from_slice::(&output.stdout).is_err(), + "summary output must not be JSON" + ); + let summary = String::from_utf8(output.stdout).expect("summary output is UTF-8"); + assert!(summary.contains("ethos verify summary\n")); + assert!(summary.contains("all_evidence_grounded: false\n")); + assert!(summary.contains("checks_capability_blocked: 1\n")); + assert!(summary.contains("capability_limits: missing_fingerprint,missing_spans,missing_char_offsets,missing_tables,unknown_coordinate_origin\n")); + assert!(summary.contains("- v0001 status=capability_blocked reason=missing_table_capability kind=table_cell locator=table_id:odl-t1;cell:1,1 match_method=none\n")); +} + +#[test] +fn summary_format_reports_no_non_grounded_checks_for_grounded_input() { + let root = repo_root(); + let output = run_ethos(&[ + "verify", + root.join("schemas/examples/document.example.json") + .to_str() + .unwrap(), + "--citations", + root.join("examples/verify/native_grounded_citations.json") + .to_str() + .unwrap(), + "--format", + "summary", + ]); + + assert_eq!(output.status.code(), Some(0)); + assert_eq!(output.stderr, b""); + let summary = String::from_utf8(output.stdout).expect("summary output is UTF-8"); + assert!(summary.contains("all_evidence_grounded: true\n")); + assert!(summary.contains("checks_grounded: 3\n")); + assert!(summary.contains("capability_limits: none\n")); + assert!(summary.contains("warnings: none\n")); + assert!(summary.contains("non_grounded_checks:\n- none\n")); +} + #[test] fn native_verify_crop_dir_writes_deterministic_crop_descriptors() { let root = repo_root(); diff --git a/examples/verify/README.md b/examples/verify/README.md index 067b13f..1e2f897 100644 --- a/examples/verify/README.md +++ b/examples/verify/README.md @@ -13,6 +13,15 @@ ethos verify schemas/examples/document.example.json \ Expected result: `all_evidence_grounded: true`. The claims verify a quote, a table cell, and page-level presence against native Ethos document JSON. +For a compact text view of the same check outcomes, use `--format summary`. JSON remains the +default and canonical verification report format. + +```bash +ethos verify schemas/examples/document.example.json \ + --citations examples/verify/native_grounded_citations.json \ + --format summary +``` + ## OpenDataLoader-Style Grounding ```bash @@ -51,6 +60,21 @@ ethos verify examples/verify/opendataloader_no_tables.json \ Expected result: check status `capability_blocked`, warning `capability_limited`, and `all_evidence_grounded: false`. +## Reason Labels + +Non-grounded checks may include a stable `reason` label: + +| Reason | Meaning | +| --- | --- | +| `stale_fingerprint` | Citation fingerprint differs from the grounding source fingerprint. | +| `text_mismatch` | Target text did not match the claimed text under the active literal matcher. | +| `missing_table_capability` | The claim needs table-cell lookup, but the grounding source does not expose tables. | +| `missing_source_fingerprint` | Citations were fingerprint-pinned, but the grounding source did not declare one. | +| `unknown_coordinate_origin` | A bbox locator was used with a source whose coordinate origin is unknown. | +| `table_not_found` | The cited table id was not found in a source that exposes tables. | +| `table_cell_not_found` | The cited table exists, but the cited cell address was not found. | +| `unsupported_claim_kind` | The claim kind is unsupported by this verifier or the active config. | + The OpenDataLoader fixtures are synthetic and limited to the adapter's documented alpha subset. They are not real pinned OpenDataLoader artifacts. Golden reports live in `examples/verify/goldens/` and are covered by the CLI verification test. diff --git a/schemas/README.md b/schemas/README.md index 18c7a25..3fdf9f6 100644 --- a/schemas/README.md +++ b/schemas/README.md @@ -25,5 +25,9 @@ and bbox sanity that JSON Schema cannot express). The examples are documentation them small, valid, and mutually consistent (same fingerprints across document / chunks / security-report / verification-report examples). +`verification-report.example.json` shows a grounded report. +`verification-report-negative.example.json` shows a non-grounded report with a per-check +`reason` label. + Derived artifacts not schema'd here: `document.md` / `document.txt` (deterministic exports specified by the exporter config, Milestone B) and `debug.html` (Milestone C). diff --git a/schemas/examples/verification-report-negative.example.json b/schemas/examples/verification-report-negative.example.json new file mode 100644 index 0000000..6214fb9 --- /dev/null +++ b/schemas/examples/verification-report-negative.example.json @@ -0,0 +1,40 @@ +{ + "schema_version": "1.0.0", + "document_fingerprint": "sha256:b5d30710d0c25cc38d8dec924ecaf57ae4f81276dd5dc14d75cb3b5b6bde62d3", + "verification_config_sha256": "4bb224166a04a25fed2dd3ecdb9638ddcc5b398658532b73f1c0547e4983d0b0", + "grounding": { + "parser": { + "name": "ethos", + "version": "0.1.0" + }, + "capabilities": { + "spans": true, + "char_offsets": true, + "tables": true, + "fingerprint": true, + "coordinate_origin": "top-left", + "crop_support": false + } + }, + "capability_limits": [], + "fingerprint_stale": true, + "all_evidence_grounded": false, + "checks": [ + { + "id": "v0001", + "claim": { + "kind": "presence", + "citation": { + "element_id": "e000002" + } + }, + "status": "stale", + "reason": "stale_fingerprint", + "match_method": "none", + "semantic_unverified": false, + "warnings": [] + } + ], + "unsupported_claim_kinds": [], + "warnings": [] +} diff --git a/schemas/validate_examples.py b/schemas/validate_examples.py index 4122adb..a04ed86 100644 --- a/schemas/validate_examples.py +++ b/schemas/validate_examples.py @@ -44,7 +44,10 @@ EXAMPLES / "citations-array.example.json", ]), ("ethos-security-report.schema.json", [EXAMPLES / "security-report.example.json"]), - ("ethos-verification-report.schema.json", [EXAMPLES / "verification-report.example.json"]), + ("ethos-verification-report.schema.json", [ + EXAMPLES / "verification-report.example.json", + EXAMPLES / "verification-report-negative.example.json", + ]), ("ethos-verification-config.schema.json", [EXAMPLES / "verification-config.example.json"]), ("ethos-crop-descriptor.schema.json", [EXAMPLES / "crop-descriptor.example.json"]), ("ethos-deterministic-profile.schema.json", [ROOT / "profiles" / "ethos-deterministic-v1.json"]),