From 7bee23e8a604e5c72449497103bff56292bdc7f2 Mon Sep 17 00:00:00 2001 From: samjanny Date: Wed, 3 Jun 2026 16:38:16 +0200 Subject: [PATCH] fix: address security-audit findings in verify and content Five findings from an audit of the tool. verify exit code (critical): verify reported 'reject' but the process exited 0, so CI and scripts could treat an invalid document as valid. Commands now return an Outcome; a verify reject maps to a non-zero exit. An integration test guards the contract. verify default --now (medium): the default verified-time reference was a fixed 2026-05-07 date, already stale, which silently passed an expired canary. Default to the current system UTC clock instead (printed as a note); --now still overrides for reproducibility. content image read (medium): reading image files for the hash and dimensions is now hardened against a hostile repository - the resolved path must stay inside the assets directory (rejecting symlinks that point out), the target must be a regular file, and it must not exceed the 2 MiB image cap. Legitimate images are unaffected. verify --content-index (low): supplying --content-index without --fetched-onion silently skipped Stage 9b; it is now an explicit error, and the skip note covers the no-onion case. init scaffold (low): the generated README now shows build manifest with --key-seed-file instead of an inline --key-seed-hex, matching the seed-handling guidance. Tests cover the symlink and size-cap rejections and the verify exit code. cargo audit clean with the added time dependency. --- Cargo.lock | 1 + Cargo.toml | 3 ++ README.md | 2 +- src/cli.rs | 2 +- src/commands/build.rs | 6 +-- src/commands/content.rs | 6 +-- src/commands/init.rs | 8 ++-- src/commands/keygen.rs | 6 +-- src/commands/mod.rs | 18 +++++++- src/commands/verify.rs | 87 +++++++++++++++++++++++++++----------- src/main.rs | 6 ++- src/markdown.rs | 93 ++++++++++++++++++++++++++++++++++++++++- tests/exit_code.rs | 41 ++++++++++++++++++ 13 files changed, 235 insertions(+), 44 deletions(-) create mode 100644 tests/exit_code.rs diff --git a/Cargo.lock b/Cargo.lock index 20ea338..0492fd3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -263,6 +263,7 @@ dependencies = [ "imagesize", "pulldown-cmark", "serde_json", + "time", "zeroize", ] diff --git a/Cargo.toml b/Cargo.toml index ee24753..8e08316 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,6 +30,9 @@ imagesize = "0.13" # linger in freed memory, swap, or a core dump. Already in the tree transitively # via ed25519-dalek; pinned here as a direct dependency. zeroize = "1" +# Current UTC time for verify's default --now reference. Already in the tree +# via entangled-core; pinned here for the system-clock default. +time = { version = "0.3", default-features = false, features = ["std"] } [profile.release] strip = true diff --git a/README.md b/README.md index 2096436..f731867 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ Runs the document through the `entangled-core` pipeline and prints the verdict, A manifest is driven through the full chain: signature (Stages 2-6), canary (Stage 8), origin binding (Stage 9), and content index (Stage 9b). The stages that need out-of-band context run only when it is supplied: -- `--now` sets the verified-time reference for the canary and origin-expiry checks (defaults to the corpus clock). +- `--now` sets the verified-time reference for the canary and origin-expiry checks (defaults to the current system clock). - `--fetched-onion` is the onion address the manifest was fetched from; with it, Stage 9 origin binding runs. Omit to skip Stage 9. - `--content-index` is the served `/content_index.json`; when the manifest declares `content_root`, Stage 9b verifies it (and its absence with a declared `content_root` surfaces the fetch failure). - `--expected-runtime-pubkey` is the manifest's `canary.runtime_pubkey`; for a content or transaction document, it is the key the signature is checked against. Without it, a content/transaction has no authorized key and reports a signature rejection. diff --git a/src/cli.rs b/src/cli.rs index cacc3e7..3c88f62 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -171,7 +171,7 @@ pub struct VerifyArgs { /// Verified-time reference for the canary and origin-expiry checks, /// RFC 3339 (YYYY-MM-DDTHH:MM:SSZ). A real client supplies its trusted - /// wall clock. Defaults to the corpus clock if omitted. + /// wall clock. Defaults to the current system UTC clock if omitted. #[arg(long)] pub now: Option, diff --git a/src/commands/build.rs b/src/commands/build.rs index 181f781..fc78066 100644 --- a/src/commands/build.rs +++ b/src/commands/build.rs @@ -15,9 +15,9 @@ use entangled_core::document::{ use entangled_core::types::timestamp::EntangledTimestamp; use crate::cli::{BuildArgs, DocKind}; -use crate::commands::{resolve_seed, Error}; +use crate::commands::{resolve_seed, Error, Outcome}; -pub fn run(args: BuildArgs) -> Result<(), Error> { +pub fn run(args: BuildArgs) -> Result { let raw = std::fs::read(&args.input) .map_err(|e| format!("cannot read {}: {e}", args.input.display()))?; // A signing key is mandatory for build; no fresh-entropy fallback. @@ -64,5 +64,5 @@ pub fn run(args: BuildArgs) -> Result<(), Error> { let text = String::from_utf8(signed_bytes) .map_err(|e| format!("internal: signed document is not UTF-8: {e}"))?; println!("{text}"); - Ok(()) + Ok(Outcome::Success) } diff --git a/src/commands/content.rs b/src/commands/content.rs index cd5fa97..ec37e5d 100644 --- a/src/commands/content.rs +++ b/src/commands/content.rs @@ -12,10 +12,10 @@ use entangled_core::types::path::EntangledPath; use entangled_core::types::timestamp::EntangledTimestamp; use crate::cli::ContentArgs; -use crate::commands::Error; +use crate::commands::{Error, Outcome}; use crate::markdown; -pub fn run(args: ContentArgs) -> Result<(), Error> { +pub fn run(args: ContentArgs) -> Result { let source = std::fs::read_to_string(&args.markdown) .map_err(|e| format!("cannot read {}: {e}", args.markdown.display()))?; @@ -51,5 +51,5 @@ pub fn run(args: ContentArgs) -> Result<(), Error> { let json = serde_json::to_string_pretty(&doc) .map_err(|e| format!("failed to serialize the content document: {e}"))?; println!("{json}"); - Ok(()) + Ok(Outcome::Success) } diff --git a/src/commands/init.rs b/src/commands/init.rs index c7b5a07..175e037 100644 --- a/src/commands/init.rs +++ b/src/commands/init.rs @@ -11,7 +11,7 @@ use std::fs; use std::path::Path; use crate::cli::InitArgs; -use crate::commands::Error; +use crate::commands::{Error, Outcome}; /// Starter unsigned-manifest template. Placeholders are spelled out so the /// publisher knows exactly which `keygen` output each field expects. It is not @@ -39,9 +39,9 @@ const MANIFEST_TEMPLATE: &str = r#"{ } "#; -const README_TEMPLATE: &str = "# Entangled site\n\nScaffolded by entangled-tool. Next steps:\n\n1. Run `entangled-tool keygen publisher`, `keygen runtime`, and `keygen origin`; store the seeds offline.\n2. Fill the REPLACE_WITH_ placeholders in `manifest.unsigned.json` with the printed public keys and onion address, and set the canary and updated times.\n3. Sign it: `entangled-tool build manifest --input manifest.unsigned.json --key-seed-hex --now `.\n4. Add content documents under `content/` and sign each with `build content`.\n"; +const README_TEMPLATE: &str = "# Entangled site\n\nScaffolded by entangled-tool. Next steps:\n\n1. Run `entangled-tool keygen publisher`, `keygen runtime`, and `keygen origin`; store each printed seed in a file (e.g. `publisher.seed`), kept offline with restrictive permissions.\n2. Fill the REPLACE_WITH_ placeholders in `manifest.unsigned.json` with the printed public keys and onion address, and set the canary and updated times.\n3. Sign it: `entangled-tool build manifest --input manifest.unsigned.json --key-seed-file publisher.seed --now `.\n4. Add content documents under `content/` and sign each with `build content --key-seed-file runtime.seed`.\n"; -pub fn run(args: InitArgs) -> Result<(), Error> { +pub fn run(args: InitArgs) -> Result { let dir = &args.dir; create_dir(dir)?; create_dir(&dir.join("content"))?; @@ -55,7 +55,7 @@ pub fn run(args: InitArgs) -> Result<(), Error> { ); println!(" content/ (add content documents here)"); println!(" README.md (next steps)"); - Ok(()) + Ok(Outcome::Success) } fn create_dir(path: &Path) -> Result<(), Error> { diff --git a/src/commands/keygen.rs b/src/commands/keygen.rs index c5bc86f..f030d93 100644 --- a/src/commands/keygen.rs +++ b/src/commands/keygen.rs @@ -16,9 +16,9 @@ use entangled_core::crypto::{ use entangled_core::types::manifest::OnionAddress; use crate::cli::{KeyRole, KeygenArgs}; -use crate::commands::{resolve_seed, seed_to_hex, Error}; +use crate::commands::{resolve_seed, seed_to_hex, Error, Outcome}; -pub fn run(args: KeygenArgs) -> Result<(), Error> { +pub fn run(args: KeygenArgs) -> Result { // A file or inline hex if supplied; otherwise fresh OS entropy. let seed = resolve_seed( args.seed_file.as_deref(), @@ -57,5 +57,5 @@ pub fn run(args: KeygenArgs) -> Result<(), Error> { ); } } - Ok(()) + Ok(Outcome::Success) } diff --git a/src/commands/mod.rs b/src/commands/mod.rs index d880ba5..272700f 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -1,7 +1,7 @@ //! Subcommand implementations. //! -//! Each module exposes a single `run(args) -> Result<(), Error>` entry point -//! invoked by the dispatcher in `main`. +//! Each module exposes a single `run(args) -> Result` entry +//! point invoked by the dispatcher in `main`. pub mod build; pub mod content; @@ -17,6 +17,20 @@ use zeroize::Zeroizing; /// any underlying error (I/O, parsing, a core-library `Diagnostic`) uniformly. pub type Error = Box; +/// The outcome a subcommand reports to the process exit code. Distinct from +/// `Err`: a command can complete normally (no internal error) yet report a +/// negative result the caller must see - notably `verify` rejecting a document, +/// which must not exit 0 so CI and scripts treat an invalid document as a +/// failure. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Outcome { + /// The command did what it was asked; exit 0. + Success, + /// The command ran but the result is negative (e.g. verify rejected the + /// document). The command has already reported the details; exit non-zero. + Rejected, +} + /// A 32-byte key seed that is zeroed when dropped, so secret material does not /// linger in freed memory, swap, or a core dump. pub(crate) type Seed = Zeroizing<[u8; 32]>; diff --git a/src/commands/verify.rs b/src/commands/verify.rs index 2f0b254..72557c8 100644 --- a/src/commands/verify.rs +++ b/src/commands/verify.rs @@ -6,12 +6,12 @@ //! 9b). The stages that need out-of-band context run only when that context is //! supplied: `--fetched-onion` for origin binding and `--content-index` for the //! content index. A skipped stage is reported, never silently passed. A reject -//! prints the diagnostic code, stage, and message and stops at the first -//! failing stage. +//! prints the diagnostic code, stage, and message, stops at the first failing +//! stage, and makes the process exit non-zero. //! -//! Content and transaction documents are verified through signature only here; -//! their later binding checks need the fetch path / submit body, which a future -//! revision will accept. +//! Content and transaction documents are verified against the runtime key the +//! manifest authorizes, supplied with `--expected-runtime-pubkey`; their +//! later binding checks (fetch path / submit body) are not yet wired here. use entangled_core::document::{ parse_and_verify_content, parse_and_verify_manifest, parse_and_verify_transaction, @@ -22,17 +22,12 @@ use entangled_core::types::timestamp::EntangledTimestamp; use entangled_core::validation::Diagnostic; use crate::cli::VerifyArgs; -use crate::commands::Error; +use crate::commands::{Error, Outcome}; -/// The corpus clock, used as the default verified-time reference when `--now` -/// is omitted so the common case (verifying a corpus document) just works. -const DEFAULT_NOW: &str = "2026-05-07T00:01:00Z"; - -pub fn run(args: VerifyArgs) -> Result<(), Error> { +pub fn run(args: VerifyArgs) -> Result { let bytes = std::fs::read(&args.input) .map_err(|e| format!("cannot read {}: {e}", args.input.display()))?; - let now = EntangledTimestamp::try_from(args.now.as_deref().unwrap_or(DEFAULT_NOW)) - .map_err(|e| format!("--now is not a valid RFC 3339 timestamp: {e}"))?; + let now = resolve_now(args.now.as_deref())?; // Discriminate the document kind cheaply from the wire bytes so the runner // can drive the right pipeline. The core parser re-checks this in Stage 4. @@ -45,7 +40,49 @@ pub fn run(args: VerifyArgs) -> Result<(), Error> { } } -fn verify_manifest(args: &VerifyArgs, bytes: &[u8], now: &EntangledTimestamp) -> Result<(), Error> { +/// The verified-time reference for the canary and origin-expiry checks: the +/// `--now` value when given (for reproducibility), otherwise the current system +/// UTC clock. A real client uses its own trusted clock; defaulting to "now" +/// avoids a stale fixed date silently passing an expired canary. +fn resolve_now(arg: Option<&str>) -> Result { + match arg { + Some(s) => EntangledTimestamp::try_from(s) + .map_err(|e| format!("--now is not a valid RFC 3339 timestamp: {e}").into()), + None => { + let now = time::OffsetDateTime::now_utc(); + // Format to the strict YYYY-MM-DDTHH:MM:SSZ shape the type accepts. + let s = format!( + "{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z", + now.year(), + u8::from(now.month()), + now.day(), + now.hour(), + now.minute(), + now.second(), + ); + eprintln!("note: no --now given; using the current system clock ({s})"); + EntangledTimestamp::try_from(s.as_str()) + .map_err(|e| format!("internal: bad system timestamp {s}: {e}").into()) + } + } +} + +fn verify_manifest( + args: &VerifyArgs, + bytes: &[u8], + now: &EntangledTimestamp, +) -> Result { + // Stage 9b runs only inside Stage 9 (origin binding), which needs the + // fetched onion. Reject the misleading combination up front rather than + // silently ignoring --content-index. + if args.content_index.is_some() && args.fetched_onion.is_none() { + return Err( + "--content-index requires --fetched-onion: the content index check (Stage 9b) \ + runs only after origin binding (Stage 9)" + .into(), + ); + } + // Stage 2-6: signature. let sig_verified = match parse_and_verify_manifest(bytes, now) { Ok(v) => v, @@ -92,28 +129,28 @@ fn verify_manifest(args: &VerifyArgs, bytes: &[u8], now: &EntangledTimestamp) -> println!("verdict: accept"); println!("canary_state: {canary_state:?}"); report_skips(args); - Ok(()) + Ok(Outcome::Success) } -fn verify_content(args: &VerifyArgs, bytes: &[u8]) -> Result<(), Error> { +fn verify_content(args: &VerifyArgs, bytes: &[u8]) -> Result { let (runtime_pk, has_key) = runtime_key(args)?; match parse_and_verify_content(bytes, &runtime_pk) { Ok(_) => { println!("verdict: accept"); print_runtime_note(has_key); - Ok(()) + Ok(Outcome::Success) } Err(d) => report_reject(&d), } } -fn verify_transaction(args: &VerifyArgs, bytes: &[u8]) -> Result<(), Error> { +fn verify_transaction(args: &VerifyArgs, bytes: &[u8]) -> Result { let (runtime_pk, has_key) = runtime_key(args)?; match parse_and_verify_transaction(bytes, &runtime_pk, None) { Ok(_) => { println!("verdict: accept"); print_runtime_note(has_key); - Ok(()) + Ok(Outcome::Success) } Err(d) => report_reject(&d), } @@ -143,21 +180,23 @@ fn print_runtime_note(has_key: bool) { } } -fn report_reject(diag: &Diagnostic) -> Result<(), Error> { +fn report_reject(diag: &Diagnostic) -> Result { println!("verdict: reject"); println!("diagnostic: {}", diag.code); println!("stage: {}", diag.stage); println!("message: {}", diag.message); - Ok(()) + Ok(Outcome::Rejected) } /// Print which optional manifest stages were skipped for lack of context, so an /// accept verdict is never mistaken for a full-pipeline pass. fn report_skips(args: &VerifyArgs) { if args.fetched_onion.is_none() { - println!("note: Stage 9 origin binding skipped (no --fetched-onion)"); - } - if args.content_index.is_none() { + // Without the fetched onion, Stage 9 (and so Stage 9b) does not run. + println!( + "note: Stage 9 origin binding and Stage 9b content index skipped (no --fetched-onion)" + ); + } else if args.content_index.is_none() { println!("note: Stage 9b content index skipped unless content_root forced a fetch failure (no --content-index)"); } } diff --git a/src/main.rs b/src/main.rs index ad99e5e..61672ab 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,6 +7,7 @@ mod markdown; use clap::Parser; use cli::{Cli, Command}; +use commands::Outcome; use std::process::ExitCode; fn main() -> ExitCode { @@ -19,7 +20,10 @@ fn main() -> ExitCode { Command::Content(args) => commands::content::run(args), }; match result { - Ok(()) => ExitCode::SUCCESS, + Ok(Outcome::Success) => ExitCode::SUCCESS, + // The command already printed the negative result (e.g. verify's + // `verdict: reject`); exit non-zero so callers can detect it. + Ok(Outcome::Rejected) => ExitCode::FAILURE, Err(e) => { eprintln!("error: {e}"); ExitCode::FAILURE diff --git a/src/markdown.rs b/src/markdown.rs index bb29030..39e9df3 100644 --- a/src/markdown.rs +++ b/src/markdown.rs @@ -374,8 +374,7 @@ impl Converter { // slash so '/assets/x.png' reads from '/assets/x.png'. let rel = img.src.trim_start_matches('/'); let file = self.assets_base.join(rel); - let bytes = std::fs::read(&file) - .map_err(|e| format!("cannot read image file {}: {e}", file.display()))?; + let bytes = read_image_file(&self.assets_base, &file)?; let media_type = image_media_type(&bytes) .ok_or_else(|| format!("{}: not a PNG, JPEG, or WebP image", file.display()))?; @@ -483,6 +482,51 @@ fn link_target(dest: &str) -> Result { } } +/// The protocol's per-image response cap (section 03): an image resource must +/// not exceed 2 MiB. The tool refuses larger files, both to match the protocol +/// and to bound how much it reads from an untrusted repository. +const IMAGE_MAX_BYTES: u64 = 2 * 1024 * 1024; + +/// Read an image file safely from under `base`. Hardens the read against a +/// hostile repository: +/// - the resolved path (after following symlinks) must stay inside `base`, so a +/// symlink cannot redirect the read or the content hash outside the assets; +/// - the target must be a regular file, not a symlink, FIFO, or device; +/// - the file must not exceed the 2 MiB image cap. +fn read_image_file(base: &Path, file: &Path) -> Result, Error> { + let canonical_base = base + .canonicalize() + .map_err(|e| format!("cannot resolve assets directory {}: {e}", base.display()))?; + let canonical = file + .canonicalize() + .map_err(|e| format!("cannot resolve image file {}: {e}", file.display()))?; + if !canonical.starts_with(&canonical_base) { + return Err(format!( + "image file {} resolves outside the assets directory {}", + file.display(), + base.display() + ) + .into()); + } + + let meta = std::fs::metadata(&canonical) + .map_err(|e| format!("cannot stat image file {}: {e}", file.display()))?; + if !meta.is_file() { + return Err(format!("image path {} is not a regular file", file.display()).into()); + } + if meta.len() > IMAGE_MAX_BYTES { + return Err(format!( + "image file {} is {} bytes, over the {IMAGE_MAX_BYTES}-byte image cap", + file.display(), + meta.len() + ) + .into()); + } + + std::fs::read(&canonical) + .map_err(|e| format!("cannot read image file {}: {e}", file.display()).into()) +} + /// Detect the Entangled-supported media type from an image file's magic bytes. /// Only PNG, JPEG, and WebP are valid in Entangled v1 (section 03). fn image_media_type(bytes: &[u8]) -> Option { @@ -630,6 +674,51 @@ mod tests { assert!(to_blocks("![a](/nope.png)", Path::new("/nonexistent")).is_err()); } + const PNG_1X1: &[u8] = &[ + 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, + 0x52, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x08, 0x06, 0x00, 0x00, 0x00, 0x1F, + 0x15, 0xC4, 0x89, 0x00, 0x00, 0x00, 0x0A, 0x49, 0x44, 0x41, 0x54, 0x78, 0x9C, 0x63, 0x00, + 0x01, 0x00, 0x00, 0x05, 0x00, 0x01, 0x0D, 0x0A, 0x2D, 0xB4, 0x00, 0x00, 0x00, 0x00, 0x49, + 0x45, 0x4E, 0x44, 0xAE, 0x42, 0x60, 0x82, + ]; + + #[test] + #[cfg(unix)] + fn image_symlink_outside_base_rejected() { + // The site directory (the assets base) and a target that lives OUTSIDE + // it. A symlink under assets pointing at the outside target must be + // rejected. + let root = std::env::temp_dir().join("entangled-tool-img-symlink"); + let site = root.join("site"); + std::fs::create_dir_all(site.join("assets")).unwrap(); + let outside = root.join("secret.png"); + std::fs::write(&outside, PNG_1X1).unwrap(); + let link = site.join("assets/x.png"); + let _ = std::fs::remove_file(&link); + std::os::unix::fs::symlink(&outside, &link).unwrap(); + + let err = to_blocks("![a](/assets/x.png)", &site).unwrap_err(); + assert!( + format!("{err}").contains("outside the assets directory"), + "got: {err}" + ); + std::fs::remove_dir_all(&root).ok(); + } + + #[test] + fn image_over_size_cap_rejected() { + let dir = std::env::temp_dir().join("entangled-tool-img-big"); + std::fs::create_dir_all(dir.join("assets")).unwrap(); + // A PNG header followed by enough bytes to exceed the 2 MiB cap. + let mut big = PNG_1X1.to_vec(); + big.resize(IMAGE_MAX_BYTES as usize + 1, 0); + std::fs::write(dir.join("assets/x.png"), &big).unwrap(); + + let err = to_blocks("![a](/assets/x.png)", &dir).unwrap_err(); + assert!(format!("{err}").contains("image cap"), "got: {err}"); + std::fs::remove_dir_all(&dir).ok(); + } + #[test] fn rejects_html() { assert!(t("
x
").is_err()); diff --git a/tests/exit_code.rs b/tests/exit_code.rs new file mode 100644 index 0000000..9922397 --- /dev/null +++ b/tests/exit_code.rs @@ -0,0 +1,41 @@ +//! `verify` must signal its verdict through the process exit code: a rejected +//! document exits non-zero so CI and scripts do not treat it as valid. This is +//! the regression guard for that contract. + +use std::process::Command; + +fn tool() -> Command { + Command::new(env!("CARGO_BIN_EXE_entangled-tool")) +} + +const POST: &str = "examples/blog/post.json"; +// The runtime key the example manifest authorizes (its canary.runtime_pubkey). +const RUNTIME_PUBKEY: &str = "jzFtziEJkbIdjI15I4u3ni3bBa6IFElyyjEmMVSGF7o"; + +#[test] +fn verify_reject_exits_nonzero() { + // Verifying the content standalone, with no authorized runtime key, rejects. + let status = tool() + .args(["verify", "--input", POST]) + .status() + .expect("run verify"); + assert!( + !status.success(), + "a rejected document must not exit 0 (got {status})" + ); +} + +#[test] +fn verify_accept_exits_zero() { + let status = tool() + .args([ + "verify", + "--input", + POST, + "--expected-runtime-pubkey", + RUNTIME_PUBKEY, + ]) + .status() + .expect("run verify"); + assert!(status.success(), "an accepted document must exit 0"); +}