From b9db3929b1bd3ecd302ede65da17ca7739987de2 Mon Sep 17 00:00:00 2001 From: droidnoob Date: Sat, 30 May 2026 10:49:48 +0530 Subject: [PATCH 1/8] feat(batch_plan): hew_core::batch_plan module (hew-58ac) - BatchPlan { schema_version, iter_number, task_ids, source, reason, created_at, planner_tokens } + tagged BatchSource (Agent/Planner/Skipped, snake_case on the wire) - path/read/write API; atomic write via loop_log::write_json_atomic; read returns Ok(None) on missing file and rejects mismatched SCHEMA_VERSION with a clear miette diagnostic - 9 unit tests covering zero-pad path, missing-file, all three source roundtrips, atomic temp-cleanup, wire form, pinned version, unknown- version rejection First-class artifact for the parent epic hew-lf40's batch-planner pipeline; downstream parser/planner/dispatcher consume this type. Closes hew-58ac. --- hew-core/src/batch_plan.rs | 243 +++++++++++++++++++++++++++++++++++++ hew-core/src/lib.rs | 1 + 2 files changed, 244 insertions(+) create mode 100644 hew-core/src/batch_plan.rs diff --git a/hew-core/src/batch_plan.rs b/hew-core/src/batch_plan.rs new file mode 100644 index 0000000..4b8e840 --- /dev/null +++ b/hew-core/src/batch_plan.rs @@ -0,0 +1,243 @@ +//! Per-iter batch artifact written by `hew loop run --jobs N >= 2`. +//! +//! A [`BatchPlan`] names the task ids the dispatcher should consider +//! dispatching on the *next* iter. It is one of three signals +//! (agent-suggested, planner-spawned, or skipped → fall back to +//! trust-the-graph) and persists on disk as +//! `/batch-NNN.json` so a future `hew loop graph` / +//! `hew loop summary` consumer can replay the dispatch decision after +//! the fact. +//! +//! See parent epic `hew-lf40` for the wire-up and the planner-spawn +//! pipeline that produces these files. +//! +//! Schema discipline mirrors [`crate::external_gate::GateKind`]: tagged +//! enum + `snake_case` rename, atomic write via +//! [`crate::loop_log::write_json_atomic`], and a pinned +//! [`SCHEMA_VERSION`] so a newer hew can reject older logs cleanly +//! instead of misparsing. + +use std::path::{Path, PathBuf}; + +use serde::{Deserialize, Serialize}; + +use crate::error::Result; +use crate::loop_log::write_json_atomic; +use crate::runner::TokenSpend; + +/// Pinned schema version for the on-disk batch-plan format. Bump iff +/// the wire shape changes; readers reject any other value. +pub const SCHEMA_VERSION: u32 = 1; + +/// Provenance of a [`BatchPlan`]. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum BatchSource { + /// The previous iter's close output named the batch via a + /// `next_iteration:` tail line. + Agent, + /// A dedicated planner subprocess produced the batch between iters. + Planner, + /// No batch was produced — dispatcher falls back to trust-the-graph + /// (`bd ready` order). `reason` on the surrounding [`BatchPlan`] + /// records why (e.g. budget exhausted, planner declined). + Skipped, +} + +/// Per-iter batch artifact. +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +pub struct BatchPlan { + pub schema_version: u32, + pub iter_number: u32, + pub task_ids: Vec, + pub source: BatchSource, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub reason: Option, + pub created_at: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub planner_tokens: Option, +} + +/// `/batch-NNN.json` with a 3-digit zero-padded iter number. +pub fn path(run_dir: &Path, iter: u32) -> PathBuf { + run_dir.join(format!("batch-{iter:03}.json")) +} + +/// Read the batch plan for `iter` from `run_dir`. Returns `Ok(None)` +/// when the file is absent (the common case for old runs or skipped +/// iters that never wrote one). +pub fn read(run_dir: &Path, iter: u32) -> Result> { + let p = path(run_dir, iter); + let body = match std::fs::read_to_string(&p) { + Ok(s) => s, + Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None), + Err(e) => return Err(e.into()), + }; + let plan: BatchPlan = serde_json::from_str(&body)?; + if plan.schema_version != SCHEMA_VERSION { + return Err(std::io::Error::other(format!( + "unsupported batch_plan schema_version {} (expected {})", + plan.schema_version, SCHEMA_VERSION + )) + .into()); + } + Ok(Some(plan)) +} + +/// Atomically write `plan` to `/batch-NNN.json`. +pub fn write(run_dir: &Path, plan: &BatchPlan) -> Result<()> { + write_json_atomic(&path(run_dir, plan.iter_number), plan) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn tmpdir() -> PathBuf { + let base = std::env::temp_dir().join(format!( + "hew-batch-plan-{}-{}", + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or_default() + )); + std::fs::create_dir_all(&base).unwrap(); + base + } + + #[test] + fn path_zero_pads_iter_to_three_digits() { + assert_eq!(path(Path::new("/tmp/r"), 1), Path::new("/tmp/r/batch-001.json")); + assert_eq!(path(Path::new("/tmp/r"), 42), Path::new("/tmp/r/batch-042.json")); + assert_eq!(path(Path::new("/tmp/r"), 999), Path::new("/tmp/r/batch-999.json")); + } + + #[test] + fn read_returns_none_on_missing_file() { + let dir = tmpdir(); + assert!(read(&dir, 7).unwrap().is_none()); + } + + #[test] + fn read_parses_agent_sourced_plan_roundtrip() { + let dir = tmpdir(); + let plan = BatchPlan { + schema_version: SCHEMA_VERSION, + iter_number: 3, + task_ids: vec!["hew-aaa".into(), "hew-bbb".into()], + source: BatchSource::Agent, + reason: None, + created_at: "2026-05-30T00:00:00Z".into(), + planner_tokens: None, + }; + write(&dir, &plan).unwrap(); + let parsed = read(&dir, 3).unwrap().expect("file present"); + assert_eq!(parsed, plan); + } + + #[test] + fn read_parses_planner_sourced_plan_with_tokens() { + let dir = tmpdir(); + let plan = BatchPlan { + schema_version: SCHEMA_VERSION, + iter_number: 5, + task_ids: vec!["hew-ccc".into()], + source: BatchSource::Planner, + reason: None, + created_at: "2026-05-30T00:00:00Z".into(), + planner_tokens: Some(TokenSpend { + input: 1000, + output: 200, + cache_read: 0, + cache_create: 0, + }), + }; + write(&dir, &plan).unwrap(); + let parsed = read(&dir, 5).unwrap().expect("file present"); + assert_eq!(parsed.source, BatchSource::Planner); + assert_eq!(parsed.planner_tokens.unwrap().input, 1000); + } + + #[test] + fn read_parses_skipped_plan_with_reason() { + let dir = tmpdir(); + let plan = BatchPlan { + schema_version: SCHEMA_VERSION, + iter_number: 9, + task_ids: Vec::new(), + source: BatchSource::Skipped, + reason: Some("planner budget exhausted".into()), + created_at: "2026-05-30T00:00:00Z".into(), + planner_tokens: None, + }; + write(&dir, &plan).unwrap(); + let parsed = read(&dir, 9).unwrap().expect("file present"); + assert_eq!(parsed.source, BatchSource::Skipped); + assert_eq!(parsed.reason.as_deref(), Some("planner budget exhausted")); + assert!(parsed.task_ids.is_empty()); + } + + #[test] + fn write_atomic_temp_then_rename_pattern() { + let dir = tmpdir(); + let plan = BatchPlan { + schema_version: SCHEMA_VERSION, + iter_number: 2, + task_ids: vec!["hew-zzz".into()], + source: BatchSource::Agent, + reason: None, + created_at: "2026-05-30T00:00:00Z".into(), + planner_tokens: None, + }; + write(&dir, &plan).unwrap(); + let final_path = path(&dir, 2); + assert!(final_path.exists()); + // The temp sibling must not linger after a successful rename. + let tmp = dir.join(".batch-002.json.tmp"); + assert!(!tmp.exists(), "atomic write must remove its temp sibling: {tmp:?}"); + } + + #[test] + fn batch_source_serde_snake_case() { + // Single-word variants render in lower-case on the wire — no + // PascalCase leakage. + let cases = [ + (BatchSource::Agent, "\"agent\""), + (BatchSource::Planner, "\"planner\""), + (BatchSource::Skipped, "\"skipped\""), + ]; + for (variant, expected) in cases { + let s = serde_json::to_string(&variant).unwrap(); + assert_eq!(s, expected, "wire form for {variant:?}"); + let parsed: BatchSource = serde_json::from_str(expected).unwrap(); + assert_eq!(parsed, variant); + } + } + + #[test] + fn schema_version_pinned_to_1() { + assert_eq!(SCHEMA_VERSION, 1); + } + + #[test] + fn read_rejects_unknown_schema_version_with_clear_error() { + let dir = tmpdir(); + // Hand-rolled JSON with the wrong schema_version — serde parses + // it cleanly, the version check should reject it. + let body = r#"{ + "schema_version": 99, + "iter_number": 1, + "task_ids": ["hew-x"], + "source": "agent", + "created_at": "2026-05-30T00:00:00Z" + }"#; + std::fs::write(path(&dir, 1), body).unwrap(); + let err = read(&dir, 1).expect_err("must reject unknown schema_version"); + let msg = err.to_string(); + assert!( + msg.contains("schema_version") && msg.contains("99"), + "error must name the offending version: {msg}" + ); + } +} diff --git a/hew-core/src/lib.rs b/hew-core/src/lib.rs index bcd5acc..2e320ed 100644 --- a/hew-core/src/lib.rs +++ b/hew-core/src/lib.rs @@ -5,6 +5,7 @@ pub mod allowed_tools; pub mod backpressure; +pub mod batch_plan; pub mod bd; #[cfg(feature = "treesitter")] pub mod blast; From 81bfbb5b3767615d2590701582180912e8bfa663 Mon Sep 17 00:00:00 2001 From: droidnoob Date: Sat, 30 May 2026 10:55:41 +0530 Subject: [PATCH 2/8] feat(batch_plan_parse): extract_next_iteration from agent raw_text (hew-7klt) - New hew_core::batch_plan_parse module - Parses fenced ```next_iteration JSON-array and XML-tag CSV forms - Hand-rolled hew-id validator (no new regex dep) - Distinct None / Some(vec![]) / Some(ids) return states - 13 tests including 1000-iter adversarial fuzz Co-Authored-By: Claude Opus 4.7 (1M context) --- hew-core/src/batch_plan_parse.rs | 281 +++++++++++++++++++++++++++++++ hew-core/src/lib.rs | 1 + 2 files changed, 282 insertions(+) create mode 100644 hew-core/src/batch_plan_parse.rs diff --git a/hew-core/src/batch_plan_parse.rs b/hew-core/src/batch_plan_parse.rs new file mode 100644 index 0000000..1802426 --- /dev/null +++ b/hew-core/src/batch_plan_parse.rs @@ -0,0 +1,281 @@ +//! Extract a `next_iteration` batch suggestion from an iter agent's +//! raw close output. +//! +//! The iter agent is invited (via the close-output template) to emit a +//! list of task ids it believes can run in parallel on the next iter. +//! Two block forms are accepted: +//! +//! 1. Fenced code block tagged `next_iteration` carrying a JSON array: +//! +//! ```text +//! ```next_iteration +//! ["hew-aaa", "hew-bbb"] +//! ``` +//! ``` +//! +//! 2. XML-style tag with CSV body: `hew-aaa, hew-bbb`. +//! +//! The parser is **best-effort**: malformed input returns `None` rather +//! than erroring, so a noisy iter never fails the loop. Absence +//! (`None`) and an explicit empty list (`Some(vec![])`) are distinct +//! signals — empty means "the agent chose to parallelize nothing," +//! while `None` means "the agent didn't say." +//! +//! Multiple blocks in the same text: the first one wins. + +/// Extract a `next_iteration` task-id batch from raw agent close text. +/// +/// Returns: +/// - `None` if no block is present, or the block's body is unparseable. +/// - `Some(vec![])` if a block is present but contains no valid task ids +/// (either an explicit empty list, or every token was rejected as +/// malformed). +/// - `Some(vec![ids...])` with whitespace stripped and each id +/// validated against `^hew-[a-z0-9]+(\.[0-9]+)*$`. Malformed tokens +/// are silently dropped; duplicates are preserved in order. +pub fn extract_next_iteration(raw_text: &str) -> Option> { + if let Some(ids) = extract_fenced(raw_text) { + return Some(ids); + } + extract_xml_tag(raw_text) +} + +fn extract_fenced(raw_text: &str) -> Option> { + // Locate the opening fence. Accept any number of leading backticks + // ≥3 followed by the language tag; we just look for the canonical + // ```next_iteration marker. + let start = raw_text.find("```next_iteration")?; + let after_tag = &raw_text[start + "```next_iteration".len()..]; + // Body ends at the next ``` fence. + let end = after_tag.find("```")?; + let body = after_tag[..end].trim(); + parse_json_array(body) +} + +fn extract_xml_tag(raw_text: &str) -> Option> { + let open = ""; + let close = ""; + let start = raw_text.find(open)?; + let after = &raw_text[start + open.len()..]; + let end = after.find(close)?; + let body = after[..end].trim(); + Some(parse_csv(body)) +} + +fn parse_json_array(body: &str) -> Option> { + // Strip a leading `[` and trailing `]` — anything else is malformed. + let body = body.trim(); + let inner = body.strip_prefix('[')?.strip_suffix(']')?; + let inner = inner.trim(); + if inner.is_empty() { + return Some(Vec::new()); + } + let mut out = Vec::new(); + for tok in inner.split(',') { + let t = tok.trim().trim_matches(|c: char| c == '"' || c == '\''); + if t.is_empty() { + continue; + } + if is_valid_task_id(t) { + out.push(t.to_string()); + } + } + Some(out) +} + +fn parse_csv(body: &str) -> Vec { + if body.is_empty() { + return Vec::new(); + } + body.split(',') + .map(|t| t.trim()) + .filter(|t| !t.is_empty() && is_valid_task_id(t)) + .map(|t| t.to_string()) + .collect() +} + +/// Validate against `^hew-[a-z0-9]+(\.[0-9]+)*$`. +fn is_valid_task_id(s: &str) -> bool { + let rest = match s.strip_prefix("hew-") { + Some(r) => r, + None => return false, + }; + if rest.is_empty() { + return false; + } + // First segment: [a-z0-9]+ + let mut chars = rest.chars().peekable(); + let mut first_seg_len = 0usize; + while let Some(&c) = chars.peek() { + if c.is_ascii_lowercase() || c.is_ascii_digit() { + chars.next(); + first_seg_len += 1; + } else { + break; + } + } + if first_seg_len == 0 { + return false; + } + // Remaining: zero or more `.[0-9]+` segments. + while let Some(&c) = chars.peek() { + if c != '.' { + return false; + } + chars.next(); + let mut seg_len = 0usize; + while let Some(&d) = chars.peek() { + if d.is_ascii_digit() { + chars.next(); + seg_len += 1; + } else { + break; + } + } + if seg_len == 0 { + return false; + } + } + true +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn extract_returns_none_when_no_block() { + assert_eq!(extract_next_iteration("just some close text\nno batch here"), None); + assert_eq!(extract_next_iteration(""), None); + } + + #[test] + fn extract_returns_empty_vec_when_block_is_empty_array() { + let fenced = "blah\n```next_iteration\n[]\n```\n"; + assert_eq!(extract_next_iteration(fenced), Some(vec![])); + let xml = ""; + assert_eq!(extract_next_iteration(xml), Some(vec![])); + } + + #[test] + fn extract_parses_fenced_json_array_form() { + let raw = "closing notes\n\n```next_iteration\n[\"hew-aaa\", \"hew-bbb\"]\n```\n"; + assert_eq!(extract_next_iteration(raw), Some(vec!["hew-aaa".into(), "hew-bbb".into()])); + } + + #[test] + fn extract_parses_xml_tag_form() { + let raw = "done.\nhew-aaa, hew-bbb, hew-ccc\n"; + assert_eq!( + extract_next_iteration(raw), + Some(vec!["hew-aaa".into(), "hew-bbb".into(), "hew-ccc".into()]) + ); + } + + #[test] + fn extract_filters_malformed_task_ids() { + let raw = "hew-aaa, not-a-task, HEW-BBB, hew-ccc"; + assert_eq!(extract_next_iteration(raw), Some(vec!["hew-aaa".into(), "hew-ccc".into()])); + } + + #[test] + fn extract_first_block_wins_on_duplicates() { + // Fenced form takes priority over XML form too. + let raw = "\ +```next_iteration +[\"hew-aaa\"] +``` + +hew-zzz +"; + assert_eq!(extract_next_iteration(raw), Some(vec!["hew-aaa".into()])); + + // Two fenced blocks: first wins. + let raw2 = "\ +```next_iteration +[\"hew-first\"] +``` + +```next_iteration +[\"hew-second\"] +``` +"; + assert_eq!(extract_next_iteration(raw2), Some(vec!["hew-first".into()])); + } + + #[test] + fn extract_tolerates_leading_trailing_whitespace() { + let raw = "```next_iteration\n [ \"hew-aaa\" , \"hew-bbb\" ] \n```"; + assert_eq!(extract_next_iteration(raw), Some(vec!["hew-aaa".into(), "hew-bbb".into()])); + let raw_xml = " hew-aaa , hew-bbb "; + assert_eq!(extract_next_iteration(raw_xml), Some(vec!["hew-aaa".into(), "hew-bbb".into()])); + } + + #[test] + fn extract_handles_realistic_agent_close_output() { + let mut raw = String::from("Closing hew-7klt. Implemented batch_plan_parse module.\n\n"); + for i in 0..400 { + raw.push_str(&format!("Line {i}: some debug output here that nobody reads.\n")); + } + raw.push_str("\n## Suggested next iteration\n\n"); + raw.push_str("```next_iteration\n"); + raw.push_str("[\"hew-pxw9\", \"hew-rplg\", \"hew-7k1m.1\"]\n"); + raw.push_str("```\n\n"); + for i in 0..100 { + raw.push_str(&format!("Trailing line {i}\n")); + } + assert_eq!( + extract_next_iteration(&raw), + Some(vec!["hew-pxw9".into(), "hew-rplg".into(), "hew-7k1m.1".into()]) + ); + } + + #[test] + fn extract_subtask_dotted_ids_validated() { + let raw = "hew-a3f8.1, hew-a3f8.2.3, hew-bad., hew-.1"; + assert_eq!( + extract_next_iteration(raw), + Some(vec!["hew-a3f8.1".into(), "hew-a3f8.2.3".into()]) + ); + } + + #[test] + fn extract_returns_none_on_unparseable_fenced_body() { + // Fenced block present but body isn't a JSON-ish array → reject. + let raw = "```next_iteration\nnot an array\n```"; + assert_eq!(extract_next_iteration(raw), None); + } + + #[test] + fn extract_never_panics_on_adversarial_input() { + // Deterministic LCG over a small set of bytes (control chars, + // brackets, fence-ish patterns). Goal: confirm absence of panic + // across many random shapes, not full coverage. + let alphabet: &[u8] = b" \n\t<>`/[]\",hew-iteration_nxX0123456789.{}\\"; + let mut state: u64 = 0xC0FFEE; + for _ in 0..1000 { + state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407); + let len = (state as usize % 400) + 1; + let mut buf = String::with_capacity(len); + for i in 0..len { + state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407); + let b = alphabet[((state >> 17) as usize + i) % alphabet.len()]; + buf.push(b as char); + } + // Must return Some or None without panicking. + let _ = extract_next_iteration(&buf); + } + } + + #[test] + fn extract_returns_none_when_fence_unclosed() { + let raw = "```next_iteration\n[\"hew-aaa\"]\n"; + assert_eq!(extract_next_iteration(raw), None); + } + + #[test] + fn extract_returns_none_when_xml_unclosed() { + let raw = "hew-aaa"; + assert_eq!(extract_next_iteration(raw), None); + } +} diff --git a/hew-core/src/lib.rs b/hew-core/src/lib.rs index 2e320ed..da4c952 100644 --- a/hew-core/src/lib.rs +++ b/hew-core/src/lib.rs @@ -6,6 +6,7 @@ pub mod allowed_tools; pub mod backpressure; pub mod batch_plan; +pub mod batch_plan_parse; pub mod bd; #[cfg(feature = "treesitter")] pub mod blast; From c4bc1cfc3fba831012c8c700dc8248b4eda75264 Mon Sep 17 00:00:00 2001 From: droidnoob Date: Sat, 30 May 2026 11:04:38 +0530 Subject: [PATCH 3/8] feat(loop): planner-spawn with budget-exceeded fallback (hew-pxw9) - spawn_planner in hew/src/commands/loop_cmd.rs: assembles a small prompt over bd_ready + recent_touches, runs a pre-spawn token budget check, drives the runtime, parses extract_next_iteration from the response. - Every failure path returns BatchPlan { source: Skipped, reason }: budget_exceeded (no spawn), runtime_error, parse_error. Planner must never kill the loop. - skills/data/planner-prompt.md holds the system body; embedded via include_str! and treated as a data file (not a registered skill). - skills drift test now skips skills/data/ since it ships embedded resources (.toml + .md), not skill bodies. - 6 inline unit tests cover all branches via MockSpawner + a custom Err-returning spawner. Closes hew-pxw9. --- hew-core/tests/skills.rs | 7 + hew/src/commands/loop_cmd.rs | 271 +++++++++++++++++++++++++++++++++- skills/data/planner-prompt.md | 39 +++++ 3 files changed, 316 insertions(+), 1 deletion(-) create mode 100644 skills/data/planner-prompt.md diff --git a/hew-core/tests/skills.rs b/hew-core/tests/skills.rs index 7d12d34..11d2ca5 100644 --- a/hew-core/tests/skills.rs +++ b/hew-core/tests/skills.rs @@ -20,6 +20,13 @@ fn collect_md(dir: &std::path::Path, into: &mut BTreeSet, prefix: &str) let ft = entry.file_type().expect("file_type"); let name = entry.file_name().to_string_lossy().to_string(); if ft.is_dir() { + // `skills/data/` holds embedded resource files (prompts, + // TOML catalogs) — not skill bodies. They ship via + // `include_str!` from the consuming module and aren't + // registered in `skills::CORE/BROWNFIELD/OPTIONAL`. + if prefix.is_empty() && name == "data" { + continue; + } let nested_prefix = if prefix.is_empty() { name.clone() } else { format!("{prefix}/{name}") }; collect_md(&entry.path(), into, &nested_prefix); diff --git a/hew/src/commands/loop_cmd.rs b/hew/src/commands/loop_cmd.rs index a138526..8b02c17 100644 --- a/hew/src/commands/loop_cmd.rs +++ b/hew/src/commands/loop_cmd.rs @@ -22,7 +22,9 @@ use std::time::Duration; use clap::{Args as ClapArgs, Subcommand, ValueEnum}; use hew_core::backpressure::{self, GateCheck, Verdict}; -use hew_core::bd::{BdClient, RealBd}; +use hew_core::batch_plan::{BatchPlan, BatchSource, SCHEMA_VERSION as BATCH_PLAN_SCHEMA_VERSION}; +use hew_core::batch_plan_parse::extract_next_iteration; +use hew_core::bd::{BdClient, ReadyTask, RealBd}; use hew_core::config::LoopModelConfig; use hew_core::error::HewError; use hew_core::loop_log::{ @@ -1945,6 +1947,139 @@ fn print_iter(log: &IterLog) { ); } +/// Embedded planner system prompt. Lives at `skills/data/planner-prompt.md` +/// so it can be tuned without recompile is not the goal — embedding via +/// `include_str!` keeps the binary self-contained while the file on disk +/// remains the canonical edit surface for prompt iteration during dev. +const PLANNER_PROMPT_BODY: &str = include_str!("../../../skills/data/planner-prompt.md"); + +/// Compact view of a ready task that's safe to serialize into the +/// planner prompt. We deliberately drop `description` and `parent` so +/// the prompt stays small — the planner picks parallel-safe ids by +/// title + priority, not by re-reading the whole task graph. +#[derive(serde::Serialize)] +struct PlannerReadyView<'a> { + id: &'a str, + title: &'a str, + priority: u8, + #[serde(rename = "type")] + issue_type: &'a str, +} + +/// Build the per-iter planner prompt. The system body +/// (`PLANNER_PROMPT_BODY`) goes in the cache-prefix slot; bd-ready + +/// recent-touches JSON payloads go in the tail. +fn assemble_planner_prompt( + bd_ready: &[ReadyTask], + recent_touches: &[String], +) -> prompt::AssembledPrompt { + let view: Vec> = bd_ready + .iter() + .map(|t| PlannerReadyView { + id: &t.id, + title: &t.title, + priority: t.priority, + issue_type: &t.issue_type, + }) + .collect(); + let bd_ready_json = serde_json::to_string(&view).unwrap_or_else(|_| "[]".to_string()); + let touches_json = serde_json::to_string(recent_touches).unwrap_or_else(|_| "[]".to_string()); + let tail = format!("## bd_ready\n\n{bd_ready_json}\n\n## recent_touches\n\n{touches_json}\n"); + prompt::assemble(PLANNER_PROMPT_BODY, "", &tail) +} + +/// Build a `Skipped` batch plan with the given reason. Used by every +/// non-success path in [`spawn_planner`] so the caller sees one +/// shape regardless of why the planner declined. +fn skipped_plan(iter_number: u32, reason: impl Into) -> BatchPlan { + BatchPlan { + schema_version: BATCH_PLAN_SCHEMA_VERSION, + iter_number, + task_ids: Vec::new(), + source: BatchSource::Skipped, + reason: Some(reason.into()), + created_at: iso_now_utc(), + planner_tokens: None, + } +} + +/// Spawn the planner runtime to suggest a batch for `iter_number`. +/// +/// Per `hew-pxw9` acceptance: this function NEVER propagates an error — +/// every failure path returns `BatchPlan { source: Skipped, ... }`. The +/// planner is an advisory signal layered on top of trust-the-graph, and +/// a broken planner must not kill the loop. +/// +/// Pre-spawn budget check skips the subprocess entirely when the +/// assembled prompt's `token_estimate` exceeds `budget_tokens` — we +/// never truncate context to fit a budget per the plan's "refusing to +/// plan is strictly better than guessing badly" rule. +pub fn spawn_planner( + bd_ready: &[ReadyTask], + recent_touches: &[String], + budget_tokens: u32, + runtime: RuntimeKind, + project_root: &Path, + iter_number: u32, +) -> miette::Result { + let spawner = build_spawner_for(runtime); + Ok(spawn_planner_with( + spawner.as_ref(), + bd_ready, + recent_touches, + budget_tokens, + project_root, + iter_number, + )) +} + +/// Spawner-injected variant of [`spawn_planner`]. Production wires the +/// real runtime; unit tests pass a `MockSpawner` (or a custom error- +/// returning one) so the budget / parse / runtime-error branches are +/// each exercisable without touching a real subprocess. +fn spawn_planner_with( + spawner: &dyn RuntimeSpawner, + bd_ready: &[ReadyTask], + recent_touches: &[String], + budget_tokens: u32, + project_root: &Path, + iter_number: u32, +) -> BatchPlan { + let prompt = assemble_planner_prompt(bd_ready, recent_touches); + let estimate = prompt.token_estimate; + if estimate > budget_tokens as u64 { + return skipped_plan( + iter_number, + format!("budget_exceeded: estimated {estimate} tokens > budget {budget_tokens}"), + ); + } + let opts = SpawnOpts { model_override: None, working_dir: Some(project_root.to_path_buf()) }; + let outcome = match spawner.spawn(&prompt, &[], &opts) { + Ok(o) => o, + Err(e) => return skipped_plan(iter_number, format!("runtime_error: {e}")), + }; + match extract_next_iteration(&outcome.raw_text) { + Some(ids) => BatchPlan { + schema_version: BATCH_PLAN_SCHEMA_VERSION, + iter_number, + task_ids: ids, + source: BatchSource::Planner, + reason: None, + created_at: iso_now_utc(), + planner_tokens: Some(outcome.tokens), + }, + None => BatchPlan { + schema_version: BATCH_PLAN_SCHEMA_VERSION, + iter_number, + task_ids: Vec::new(), + source: BatchSource::Skipped, + reason: Some("parse_error: planner response missing next_iteration block".into()), + created_at: iso_now_utc(), + planner_tokens: Some(outcome.tokens), + }, + } +} + fn parse_duration(s: &str) -> Result { let s = s.trim(); if s.is_empty() { @@ -2149,4 +2284,138 @@ mod tests { let err = resolve_scope(&args, &non_interactive_ctx(), &bd).unwrap_err(); assert!(format!("{err:?}").contains("not found")); } + + // ---- spawn_planner (hew-pxw9) ----------------------------------- + + use hew_core::runner::TokenSpend; + use hew_core::runtime::{MockSpawner, SpawnFailureClass, SpawnOutcome}; + + fn ready(id: &str, prio: u8) -> ReadyTask { + ReadyTask { + id: id.into(), + title: format!("title for {id}"), + description: String::new(), + priority: prio, + status: "open".into(), + issue_type: "task".into(), + parent: None, + } + } + + fn planner_outcome_with(raw_text: impl Into) -> SpawnOutcome { + SpawnOutcome { + success: true, + closed_task: None, + tokens: TokenSpend { input: 1234, output: 56, cache_read: 0, cache_create: 0 }, + stderr_tail: String::new(), + raw_text: raw_text.into(), + failure_class: SpawnFailureClass::Success, + } + } + + #[test] + fn planner_skips_when_estimated_tokens_exceed_budget() { + let bd_ready = vec![ready("hew-aaa", 1), ready("hew-bbb", 2)]; + let touches = vec!["src/foo.rs:bar".to_string()]; + let mock = MockSpawner::new(planner_outcome_with("")); + let plan = + spawn_planner_with(&mock, &bd_ready, &touches, /*budget*/ 1, Path::new("/"), 7); + assert_eq!(plan.source, BatchSource::Skipped); + assert!(plan.task_ids.is_empty()); + let reason = plan.reason.as_deref().unwrap_or(""); + assert!( + reason.starts_with("budget_exceeded:") && reason.contains("budget 1"), + "reason should name the cause + budget: {reason}", + ); + // No subprocess spawned. + assert!( + mock.last_args.borrow().is_none(), + "spawner must not be invoked when budget already exceeded", + ); + assert!(plan.planner_tokens.is_none(), "no spawn → no tokens accounted"); + assert_eq!(plan.iter_number, 7); + assert_eq!(plan.schema_version, BATCH_PLAN_SCHEMA_VERSION); + } + + #[test] + fn planner_returns_plan_on_clean_response() { + let bd_ready = vec![ready("hew-aaa", 1), ready("hew-bbb", 2)]; + let mock = MockSpawner::new(planner_outcome_with( + "thinking...\n\n```next_iteration\n[\"hew-aaa\", \"hew-bbb\"]\n```\n", + )); + let plan = + spawn_planner_with(&mock, &bd_ready, &[], /*budget*/ 100_000, Path::new("/"), 3); + assert_eq!(plan.source, BatchSource::Planner); + assert_eq!(plan.task_ids, vec!["hew-aaa".to_string(), "hew-bbb".to_string()]); + assert_eq!(plan.iter_number, 3); + assert!(plan.reason.is_none()); + } + + #[test] + fn planner_skips_on_parse_error() { + let mock = MockSpawner::new(planner_outcome_with("no fenced block here, just prose")); + let plan = + spawn_planner_with(&mock, &[ready("hew-aaa", 1)], &[], 100_000, Path::new("/"), 1); + assert_eq!(plan.source, BatchSource::Skipped); + assert!(plan.task_ids.is_empty()); + let reason = plan.reason.as_deref().unwrap_or(""); + assert!(reason.starts_with("parse_error:"), "got {reason:?}"); + } + + #[test] + fn planner_skips_on_runtime_error() { + #[derive(Debug)] + struct ErrSpawner; + impl RuntimeSpawner for ErrSpawner { + fn spawn( + &self, + _: &prompt::AssembledPrompt, + _: &[String], + _: &SpawnOpts, + ) -> hew_core::error::Result { + Err(std::io::Error::other("simulated spawn failure").into()) + } + } + let plan = spawn_planner_with( + &ErrSpawner, + &[ready("hew-aaa", 1)], + &[], + 100_000, + Path::new("/"), + 4, + ); + assert_eq!(plan.source, BatchSource::Skipped); + let reason = plan.reason.as_deref().unwrap_or(""); + assert!(reason.starts_with("runtime_error:"), "got {reason:?}"); + assert!(reason.contains("simulated spawn failure"), "must surface the cause: {reason}"); + } + + #[test] + fn planner_prompt_includes_bd_ready_and_recent_touches() { + let bd_ready = vec![ready("hew-aaa", 1), ready("hew-bbb", 3)]; + let touches = vec!["src/dispatcher.rs:run".into(), "src/loop_log.rs:write".into()]; + let prompt = assemble_planner_prompt(&bd_ready, &touches); + // System body lands in the cache prefix. + assert!(prompt.prefix.contains("Hew loop"), "prefix must carry the system body"); + // Task tail carries both inputs. + assert!(prompt.tail.contains("hew-aaa"), "bd_ready ids missing from tail"); + assert!(prompt.tail.contains("hew-bbb")); + assert!(prompt.tail.contains("\"priority\":1"), "priority emitted"); + assert!(prompt.tail.contains("src/dispatcher.rs:run"), "touches missing from tail"); + assert!(prompt.tail.contains("src/loop_log.rs:write")); + // The full prompt is what the spawner actually receives — it + // must contain both halves. + assert!(prompt.full_text.contains("Hew loop")); + assert!(prompt.full_text.contains("hew-aaa")); + } + + #[test] + fn planner_tokens_field_populated_on_success() { + let mock = MockSpawner::new(planner_outcome_with("```next_iteration\n[\"hew-aaa\"]\n```")); + let plan = + spawn_planner_with(&mock, &[ready("hew-aaa", 1)], &[], 100_000, Path::new("/"), 2); + let tokens = plan.planner_tokens.expect("planner_tokens populated on success"); + assert_eq!(tokens.input, 1234); + assert_eq!(tokens.output, 56); + } } diff --git a/skills/data/planner-prompt.md b/skills/data/planner-prompt.md new file mode 100644 index 0000000..edc4690 --- /dev/null +++ b/skills/data/planner-prompt.md @@ -0,0 +1,39 @@ + +# Hew loop — batch planner + +You are the planner subprocess inside `hew loop run --jobs N`. Your only +job is to pick a small set of bd-ready task ids that look safe to run +in parallel on the next iter. The dispatcher will then intersect your +list with the live bd-ready set and fan out one worker per id. + +## Inputs + +Two JSON blobs follow this prompt, delimited by `---`: + +- `bd_ready` — an array of `{id, title, priority, type}` for every task + currently ready. You may pick from any of these. +- `recent_touches` — an array of `:` strings the last few + iters wrote to. Tasks that look likely to touch the same paths or + symbols are NOT safe to run in parallel — drop one of them. + +## Rules + +- Pick task ids drawn ONLY from `bd_ready`. Never invent ids. +- Prefer higher-priority tasks (P0/P1 over P2/P3) when ranking. +- Aim for 2–4 ids when there are good independent candidates; + one id (or zero) is acceptable when the graph is sparse or every + candidate fights over the same files. +- When unsure, return fewer ids. The fallback (trust-the-graph) is + always safe. + +## Output format + +Respond with exactly one fenced block — no prose before or after: + +```next_iteration +["hew-aaa", "hew-bbb"] +``` + +An empty list (`[]`) is acceptable when nothing looks parallel-safe. +The dispatcher tolerates absence (you can also reply with no block), +in which case it falls back to `bd ready` order. From b8d82898f3e59027e9c19fb0260a11fd7c06db4e Mon Sep 17 00:00:00 2001 From: droidnoob Date: Sat, 30 May 2026 11:12:26 +0530 Subject: [PATCH 4/8] =?UTF-8?q?feat(dispatcher):=20thread=20BatchPlan=20+?= =?UTF-8?q?=20filter=20dispatch=5Ftick=20by=20batch=20=E2=88=A9=20bd-ready?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Dispatcher::new gains Option; field cached on the struct. - dispatch_tick narrows post-scope candidates by linear contains against plan.task_ids (typical batch <10 — avoids per-tick HashSet alloc). Filter is non-expansive: bd dep graph stays the safety floor per DECISION:loop-parallel-overlap-policy. - Source::Skipped and empty task_ids fall through to trust-the-graph with no batch_source signaled. - New DispatchTick.batch_source + Dispatcher::current_batch_source() for downstream summary aggregation. - 8 new tests cover the matrix; existing 13 dispatcher tests pass unchanged with batch_plan: None. Closes hew-rplg. --- hew-core/src/dispatcher.rs | 180 ++++++++++++++++++++++++++++++++--- hew/src/commands/loop_cmd.rs | 2 +- 2 files changed, 167 insertions(+), 15 deletions(-) diff --git a/hew-core/src/dispatcher.rs b/hew-core/src/dispatcher.rs index df05e6b..47397a6 100644 --- a/hew-core/src/dispatcher.rs +++ b/hew-core/src/dispatcher.rs @@ -18,6 +18,7 @@ use std::collections::HashSet; use std::path::Path; +use crate::batch_plan::{BatchPlan, BatchSource}; use crate::bd::{BdClient, ReadyTask}; use crate::error::Result; use crate::git::GitClient; @@ -46,11 +47,16 @@ pub struct DispatchTick { /// New slot assignments made this tick. pub assignments: Vec, /// Number of `bd ready` tasks visible (whether or not assigned). + /// Counts the **post-filter** set when a [`BatchPlan`] is active. pub ready_seen: usize, /// Tasks the dispatcher tried to claim but `bd` rejected — typically /// a race with another agent claiming the same id. The slot stays /// idle and will be retried next tick. pub claim_failures: Vec, + /// Provenance of the active batch plan, if any narrowed this tick. + /// `None` when no plan is set or when the plan's source is + /// [`BatchSource::Skipped`] (fall-through to trust-the-graph). + pub batch_source: Option, } /// A task that was claimed and pinned to a specific slot this tick. @@ -75,6 +81,7 @@ pub struct Dispatcher { run_id: String, base_sha: String, scope: Scope, + batch_plan: Option, } impl Dispatcher { @@ -90,6 +97,7 @@ impl Dispatcher { run_id: impl Into, base_sha: impl Into, scope: Scope, + batch_plan: Option, ) -> Self { let n = (jobs.max(1)) as usize; Self { @@ -97,6 +105,17 @@ impl Dispatcher { run_id: run_id.into(), base_sha: base_sha.into(), scope, + batch_plan, + } + } + + /// Provenance of the active batch plan, if one narrowed dispatch. + /// Returns `None` when no plan is set or the plan's source is + /// [`BatchSource::Skipped`] (fall-through to trust-the-graph). + pub fn current_batch_source(&self) -> Option { + match &self.batch_plan { + Some(p) if p.source != BatchSource::Skipped && !p.task_ids.is_empty() => Some(p.source), + _ => None, } } @@ -169,7 +188,22 @@ impl Dispatcher { let ready: Vec = ready.into_iter().filter(|t| self.scope.includes(&t.id, &descendant_set)).collect(); - let mut tick = DispatchTick { ready_seen: ready.len(), ..Default::default() }; + // Batch-plan narrowing — non-expansive (batch ∩ bd-ready). The + // bd dep graph remains the safety floor per + // `DECISION:loop-parallel-overlap-policy`; a batch can only + // shrink the candidate set, never reintroduce a blocked task. + // Skipped plans and empty task_ids fall through unchanged. + let batch_source = self.current_batch_source(); + let ready: Vec = match (&self.batch_plan, batch_source) { + (Some(plan), Some(_)) => { + // Typical batch is <10 ids — linear contains is fine + // and avoids the per-tick HashSet allocation. + ready.into_iter().filter(|t| plan.task_ids.iter().any(|id| id == &t.id)).collect() + } + _ => ready, + }; + + let mut tick = DispatchTick { ready_seen: ready.len(), batch_source, ..Default::default() }; if ready.is_empty() { return Ok(tick); } @@ -366,7 +400,7 @@ mod tests { #[test] fn new_clamps_jobs_to_at_least_one() { - let d = Dispatcher::new(0, "run-x", "deadbeef", Scope::Ready); + let d = Dispatcher::new(0, "run-x", "deadbeef", Scope::Ready, None); assert_eq!(d.jobs(), 1); assert_eq!(d.slots().len(), 1); assert!(d.all_idle()); @@ -379,7 +413,7 @@ mod tests { // Regression for acceptance: N=1 picks the first ready task and // stops — identical to today's serial loop. let bd = MockBd::new(vec![ready("hew-a"), ready("hew-b"), ready("hew-c")]); - let mut d = Dispatcher::new(1, "run-1", "sha", Scope::Ready); + let mut d = Dispatcher::new(1, "run-1", "sha", Scope::Ready, None); let tick = d.dispatch_tick(&bd).expect("tick"); assert_eq!(tick.assignments.len(), 1, "exactly one slot filled"); assert_eq!(tick.assignments[0].slot_id, 0); @@ -393,7 +427,7 @@ mod tests { #[test] fn dispatcher_fills_all_slots_when_ready_has_enough() { let bd = MockBd::new(vec![ready("hew-a"), ready("hew-b"), ready("hew-c"), ready("hew-d")]); - let mut d = Dispatcher::new(3, "run-2", "sha", Scope::Ready); + let mut d = Dispatcher::new(3, "run-2", "sha", Scope::Ready, None); let tick = d.dispatch_tick(&bd).expect("tick"); assert_eq!(tick.assignments.len(), 3, "all 3 slots filled"); let ids: Vec<&str> = tick.assignments.iter().map(|a| a.task.id.as_str()).collect(); @@ -408,7 +442,7 @@ mod tests { #[test] fn dispatcher_skips_assignment_when_ready_empty() { let bd = MockBd::new(vec![]); - let mut d = Dispatcher::new(2, "run-3", "sha", Scope::Ready); + let mut d = Dispatcher::new(2, "run-3", "sha", Scope::Ready, None); let tick = d.dispatch_tick(&bd).expect("tick"); assert!(tick.assignments.is_empty()); assert_eq!(tick.ready_seen, 0); @@ -420,7 +454,7 @@ mod tests { fn dispatcher_does_nothing_when_all_slots_busy() { // No `bd ready` should even be called when capacity = 0. let bd = MockBd::new(vec![ready("hew-z")]); - let mut d = Dispatcher::new(1, "run-4", "sha", Scope::Ready); + let mut d = Dispatcher::new(1, "run-4", "sha", Scope::Ready, None); d.dispatch_tick(&bd).expect("first tick"); // Second tick: slot is full. let tick = d.dispatch_tick(&bd).expect("second tick"); @@ -436,7 +470,7 @@ mod tests { let bd = MockBd::new(vec![ready("hew-a"), ready("hew-b")]); bd.fail_claim("hew-a"); - let mut d = Dispatcher::new(1, "run-5", "sha", Scope::Ready); + let mut d = Dispatcher::new(1, "run-5", "sha", Scope::Ready, None); let tick = d.dispatch_tick(&bd).expect("tick"); assert_eq!(tick.claim_failures.len(), 1); @@ -454,7 +488,7 @@ mod tests { // returned the task to two `ready` queries before either // claim landed. Dispatcher must not double-assign. let bd = MockBd::new(vec![ready("hew-a")]); - let mut d = Dispatcher::new(2, "run-6", "sha", Scope::Ready); + let mut d = Dispatcher::new(2, "run-6", "sha", Scope::Ready, None); let tick1 = d.dispatch_tick(&bd).expect("first tick"); assert_eq!(tick1.assignments.len(), 1); assert_eq!(tick1.assignments[0].task.id, "hew-a"); @@ -470,7 +504,7 @@ mod tests { #[test] fn complete_returns_running_task_id_and_idles_slot() { let bd = MockBd::new(vec![ready("hew-a")]); - let mut d = Dispatcher::new(1, "run-7", "sha", Scope::Ready); + let mut d = Dispatcher::new(1, "run-7", "sha", Scope::Ready, None); d.dispatch_tick(&bd).expect("tick"); assert_eq!(d.complete(0), Some("hew-a".into())); assert!(d.all_idle()); @@ -487,7 +521,7 @@ mod tests { // calls when its iter body returns. Two workers run, both // complete in turn, capacity restores. let bd = MockBd::new(vec![ready("hew-a"), ready("hew-b"), ready("hew-c")]); - let mut d = Dispatcher::new(2, "run-8", "sha", Scope::Ready); + let mut d = Dispatcher::new(2, "run-8", "sha", Scope::Ready, None); let t1 = d.dispatch_tick(&bd).expect("tick 1"); assert_eq!(t1.assignments.len(), 2); @@ -515,7 +549,7 @@ mod tests { // Scope::Ready must surface every bd-ready task — no descendant // walk, no filtering. let bd = MockBd::new(vec![ready("hew-a"), ready("hew-b"), ready("hew-c")]); - let mut d = Dispatcher::new(3, "run-scope-ready", "sha", Scope::Ready); + let mut d = Dispatcher::new(3, "run-scope-ready", "sha", Scope::Ready, None); let tick = d.dispatch_tick(&bd).expect("tick"); assert_eq!(tick.ready_seen, 3); let ids: Vec<&str> = tick.assignments.iter().map(|a| a.task.id.as_str()).collect(); @@ -535,7 +569,7 @@ mod tests { .with_children("hew-child-2", &[]) .with_children("hew-stranger", &[]); let scope = Scope::Epics { epic_ids: vec!["hew-epic-a".into()] }; - let mut d = Dispatcher::new(3, "run-scope-epic", "sha", scope); + let mut d = Dispatcher::new(3, "run-scope-epic", "sha", scope, None); let tick = d.dispatch_tick(&bd).expect("tick"); assert_eq!(tick.ready_seen, 2, "stranger filtered out"); let ids: Vec<&str> = tick.assignments.iter().map(|a| a.task.id.as_str()).collect(); @@ -551,7 +585,7 @@ mod tests { let bd = MockBd::new(vec![ready("hew-stranger"), ready("hew-other")]) .with_children("hew-epic-empty", &[]); let scope = Scope::Epics { epic_ids: vec!["hew-epic-empty".into()] }; - let mut d = Dispatcher::new(2, "run-scope-empty", "sha", scope); + let mut d = Dispatcher::new(2, "run-scope-empty", "sha", scope, None); let tick = d.dispatch_tick(&bd).expect("tick"); assert_eq!(tick.ready_seen, 0); assert!(tick.assignments.is_empty()); @@ -569,7 +603,7 @@ mod tests { .with_children("hew-epic-live", &["hew-child-1"]) .with_children("hew-child-1", &[]); let scope = Scope::Epics { epic_ids: vec!["hew-epic-live".into()] }; - let mut d = Dispatcher::new(2, "run-scope-live", "sha", scope); + let mut d = Dispatcher::new(2, "run-scope-live", "sha", scope, None); let t1 = d.dispatch_tick(&bd).expect("first tick"); assert_eq!(t1.assignments.len(), 1); @@ -584,4 +618,122 @@ mod tests { assert_eq!(t2.assignments.len(), 1); assert_eq!(t2.assignments[0].task.id, "hew-child-2"); } + + // ── BatchPlan filter coverage ─────────────────────────────────── + + fn plan(iter: u32, source: BatchSource, ids: &[&str]) -> BatchPlan { + BatchPlan { + schema_version: crate::batch_plan::SCHEMA_VERSION, + iter_number: iter, + task_ids: ids.iter().map(|s| s.to_string()).collect(), + source, + reason: None, + created_at: "2026-05-30T00:00:00Z".into(), + planner_tokens: None, + } + } + + #[test] + fn dispatch_tick_no_plan_behaves_as_today() { + // Sanity: a `batch_plan: None` Dispatcher behaves identically + // to the pre-batch-plan world. Mirrors `n1_dispatcher_assigns_…`. + let bd = MockBd::new(vec![ready("hew-a"), ready("hew-b")]); + let mut d = Dispatcher::new(2, "run-bp-none", "sha", Scope::Ready, None); + let tick = d.dispatch_tick(&bd).expect("tick"); + assert_eq!(tick.ready_seen, 2); + assert_eq!(tick.assignments.len(), 2); + assert!(tick.batch_source.is_none(), "no plan → no batch_source"); + assert!(d.current_batch_source().is_none()); + } + + #[test] + fn dispatch_tick_agent_plan_filters_candidates() { + let bd = MockBd::new(vec![ready("hew-a"), ready("hew-b"), ready("hew-c")]); + let plan = plan(1, BatchSource::Agent, &["hew-b"]); + let mut d = Dispatcher::new(2, "run-bp-agent", "sha", Scope::Ready, Some(plan)); + let tick = d.dispatch_tick(&bd).expect("tick"); + assert_eq!(tick.ready_seen, 1, "post-filter count"); + assert_eq!(tick.assignments.len(), 1); + assert_eq!(tick.assignments[0].task.id, "hew-b"); + assert_eq!(tick.batch_source, Some(BatchSource::Agent)); + assert_eq!(bd.claimed(), vec!["hew-b"]); + } + + #[test] + fn dispatch_tick_planner_plan_filters_candidates() { + let bd = MockBd::new(vec![ready("hew-a"), ready("hew-b"), ready("hew-c")]); + let plan = plan(1, BatchSource::Planner, &["hew-a", "hew-c"]); + let mut d = Dispatcher::new(3, "run-bp-planner", "sha", Scope::Ready, Some(plan)); + let tick = d.dispatch_tick(&bd).expect("tick"); + assert_eq!(tick.ready_seen, 2); + let ids: Vec<&str> = tick.assignments.iter().map(|a| a.task.id.as_str()).collect(); + assert_eq!(ids, vec!["hew-a", "hew-c"]); + assert_eq!(tick.batch_source, Some(BatchSource::Planner)); + } + + #[test] + fn dispatch_tick_skipped_plan_falls_through_to_full_bd_ready() { + // Source::Skipped means trust-the-graph — no filtering, no + // batch_source on the tick. + let bd = MockBd::new(vec![ready("hew-a"), ready("hew-b")]); + let plan = plan(1, BatchSource::Skipped, &[]); + let mut d = Dispatcher::new(2, "run-bp-skip", "sha", Scope::Ready, Some(plan)); + let tick = d.dispatch_tick(&bd).expect("tick"); + assert_eq!(tick.ready_seen, 2); + assert_eq!(tick.assignments.len(), 2); + assert!(tick.batch_source.is_none()); + assert!(d.current_batch_source().is_none()); + } + + #[test] + fn dispatch_tick_empty_task_ids_falls_through_to_full_bd_ready() { + // Defensive: an Agent/Planner plan with an empty task_ids array + // is treated as no-narrowing rather than "block everything". + let bd = MockBd::new(vec![ready("hew-a"), ready("hew-b")]); + let plan = plan(1, BatchSource::Agent, &[]); + let mut d = Dispatcher::new(2, "run-bp-empty", "sha", Scope::Ready, Some(plan)); + let tick = d.dispatch_tick(&bd).expect("tick"); + assert_eq!(tick.ready_seen, 2); + assert_eq!(tick.assignments.len(), 2); + assert!(tick.batch_source.is_none(), "empty task_ids → no narrowing signaled"); + } + + #[test] + fn dispatch_tick_batch_task_id_not_in_ready_is_dropped() { + // Hard floor: a batch naming a blocked or unknown task does not + // resurrect it. batch ∩ bd-ready, never batch ∪ anything. + let bd = MockBd::new(vec![ready("hew-a")]); + let plan = plan(1, BatchSource::Agent, &["hew-a", "hew-blocked", "hew-ghost"]); + let mut d = Dispatcher::new(3, "run-bp-floor", "sha", Scope::Ready, Some(plan)); + let tick = d.dispatch_tick(&bd).expect("tick"); + assert_eq!(tick.ready_seen, 1, "blocked/ghost ids dropped by intersect"); + assert_eq!(tick.assignments.len(), 1); + assert_eq!(tick.assignments[0].task.id, "hew-a"); + assert_eq!(bd.claimed(), vec!["hew-a"]); + } + + #[test] + fn dispatch_tick_ready_seen_reflects_post_filter_count() { + // Explicit pin: ready_seen is the post-filter, post-scope count + // — what downstream summary aggregation consumes. + let bd = MockBd::new(vec![ready("hew-a"), ready("hew-b"), ready("hew-c"), ready("hew-d")]); + let plan = plan(2, BatchSource::Planner, &["hew-b", "hew-c"]); + let mut d = Dispatcher::new(4, "run-bp-count", "sha", Scope::Ready, Some(plan)); + let tick = d.dispatch_tick(&bd).expect("tick"); + assert_eq!(tick.ready_seen, 2, "two of four candidates survived the filter"); + } + + #[test] + fn dispatch_tick_batch_source_captured_for_summary() { + // The summary path reads `Dispatcher::current_batch_source()` + // out-of-band of any tick; verify the accessor returns the + // active provenance and matches the per-tick field. + let bd = MockBd::new(vec![ready("hew-a")]); + let plan = plan(1, BatchSource::Agent, &["hew-a"]); + let mut d = Dispatcher::new(1, "run-bp-summary", "sha", Scope::Ready, Some(plan)); + assert_eq!(d.current_batch_source(), Some(BatchSource::Agent)); + let tick = d.dispatch_tick(&bd).expect("tick"); + assert_eq!(tick.batch_source, Some(BatchSource::Agent)); + assert_eq!(d.current_batch_source(), Some(BatchSource::Agent)); + } } diff --git a/hew/src/commands/loop_cmd.rs b/hew/src/commands/loop_cmd.rs index 8b02c17..2d7a97c 100644 --- a/hew/src/commands/loop_cmd.rs +++ b/hew/src/commands/loop_cmd.rs @@ -923,7 +923,7 @@ fn run_loop_parallel( // Dispatcher" acceptance holds across both paths. The scope was // resolved once at the top of `run_loop` and threaded here. let mut dispatcher = - hew_core::dispatcher::Dispatcher::new(args.jobs, &run_id, &base_sha, scope.clone()); + hew_core::dispatcher::Dispatcher::new(args.jobs, &run_id, &base_sha, scope.clone(), None); // v1 wiring: one tick to fill all slots, then drive each worker's // loop in a scoped thread. The dispatcher's slot-fill state machine From f5d45358d6fbd29e263bd41a99db9e60807beec0 Mon Sep 17 00:00:00 2001 From: droidnoob Date: Sat, 30 May 2026 11:31:30 +0530 Subject: [PATCH 5/8] feat(loop): planner config + CLI + iter-end batch hook (hew-7k1m) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - LoopPlannerConfig {enabled, budget_tokens, runtime}; default enabled=true / 10_000 tokens / runtime=None. - hew config get/set for loop.planner.{enabled,budget_tokens,runtime}. - hew loop run --no-planner / --planner-budget / --planner-runtime, resolved via resolve_planner_config (CLI > config > default). - Iter-end hook in run_worker_loop_with_scope writes /batch-NNN+1.json under --jobs >= 2 covering all four branches: Agent (raw_text named the block) → Planner (spawned) → Skipped (planner_disabled / budget_exceeded / parse_error / runtime_error) → bypass entirely when jobs == 1. - Pure resolve_iter_completion_plan helper keeps the branch arithmetic test-friendly. --- hew-core/src/config.rs | 111 ++++++++++++ hew/src/commands/loop_cmd.rs | 294 +++++++++++++++++++++++++++++++- hew/tests/loop_backpressure.rs | 73 ++++++++ hew/tests/loop_dynamic_model.rs | 3 + hew/tests/loop_parallel_e2e.rs | 3 + 5 files changed, 480 insertions(+), 4 deletions(-) diff --git a/hew-core/src/config.rs b/hew-core/src/config.rs index fd0d6a0..e45a267 100644 --- a/hew-core/src/config.rs +++ b/hew-core/src/config.rs @@ -65,6 +65,41 @@ pub struct LoopConfig { /// Per-task model selection knobs consumed by the dynamic-model /// resolver (epic `hew-1tq`). All-None / empty by default. pub model: LoopModelConfig, + /// Planner-spawn knobs consumed by the iter-end batch-plan hook + /// when `hew loop run --jobs N >= 2` (epic `hew-lf40` / + /// `hew-7k1m`). Disabled / `0` for jobs == 1 — the entire layer + /// is bypassed so the fast path stays free of planner overhead. + pub planner: LoopPlannerConfig, +} + +/// Planner-spawn knobs. The planner is the inter-iter advisor that +/// produces a [`crate::batch_plan::BatchPlan`] for the next iter when +/// the previous iter's agent output did not name one. All fields are +/// per-run overridable via CLI flags on `hew loop run` (see +/// `--no-planner` / `--planner-budget` / `--planner-runtime`). +#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)] +#[serde(default)] +pub struct LoopPlannerConfig { + /// Whether the planner runs at all. `false` ⇒ every iter-end that + /// doesn't find an agent-named batch writes a `Skipped` plan with + /// `reason = "planner_disabled"` instead of spawning. Default + /// `true`. + pub enabled: bool, + /// Pre-spawn token-estimate budget. The planner refuses to spawn + /// when the assembled prompt would exceed this (and emits a + /// `Skipped` plan with `reason = "budget_exceeded: ..."`). Default + /// `10_000`. `0` is treated as "always exceeded" — useful for + /// disabling planner spawns without flipping `enabled`. + pub budget_tokens: u32, + /// Runtime to drive the planner. `None` ⇒ inherit the loop's + /// primary runtime. Accepts `"claude"` / `"codex"`. + pub runtime: Option, +} + +impl Default for LoopPlannerConfig { + fn default() -> Self { + Self { enabled: true, budget_tokens: 10_000, runtime: None } + } } /// Persistent inputs for the dynamic per-task model resolver. Model @@ -371,6 +406,11 @@ pub fn get(cfg: &Config, key: &str) -> Option { "loop.model.by_type" | "loop.model.by-type" => { Some(format_map(&cfg.loop_cfg.model.by_type)) } + "loop.planner.enabled" => Some(cfg.loop_cfg.planner.enabled.to_string()), + "loop.planner.budget_tokens" | "loop.planner.budget-tokens" => { + Some(cfg.loop_cfg.planner.budget_tokens.to_string()) + } + "loop.planner.runtime" => cfg.loop_cfg.planner.runtime.clone(), k if k.starts_with("loop.model.by_priority.") || k.starts_with("loop.model.by-priority.") => { @@ -592,6 +632,28 @@ pub fn set(cfg: &mut Config, key: &str, value: &str) -> Result<()> { cfg.loop_cfg.model.by_type.insert(sub.to_string(), value.to_string()); } } + "loop.planner.enabled" => cfg.loop_cfg.planner.enabled = bool_val(value)?, + "loop.planner.budget_tokens" | "loop.planner.budget-tokens" => { + let n: u32 = value.parse().map_err(|_| HewError::MissingFlag { + flag: format!("value (expected non-negative integer, got `{value}`)"), + })?; + cfg.loop_cfg.planner.budget_tokens = n; + } + "loop.planner.runtime" => { + if value.is_empty() { + cfg.loop_cfg.planner.runtime = None; + } else { + if !crate::runtime::RuntimeKind::VARIANTS.contains(&value) { + return Err(HewError::MissingFlag { + flag: format!( + "value (expected one of {}, got `{value}`)", + crate::runtime::RuntimeKind::VARIANTS.join("|") + ), + }); + } + cfg.loop_cfg.planner.runtime = Some(value.to_string()); + } + } _ => { return Err(HewError::MissingFlag { flag: format!("key (unknown: {key})") }); } @@ -628,6 +690,9 @@ pub fn keys() -> &'static [&'static str] { "loop.model.default", "loop.model.by_priority", "loop.model.by_type", + "loop.planner.enabled", + "loop.planner.budget_tokens", + "loop.planner.runtime", ] } @@ -723,6 +788,9 @@ mod tests { "loop.model.default" => "sonnet-4-6", "loop.model.by_priority" => "P0=opus-4-7,P3=haiku-4-5", "loop.model.by_type" => "bug=sonnet-4-6,chore=haiku-4-5", + "loop.planner.enabled" => "true", + "loop.planner.budget_tokens" => "20000", + "loop.planner.runtime" => "codex", k if k.starts_with("optional-skills.") => "yes", _ => "true", }; @@ -1176,6 +1244,49 @@ fallback_runtime = "codex" assert_eq!(loaded.loop_cfg.model.by_type.get("bug").unwrap(), "sonnet-4-6"); } + // ──────── loop.planner.* ──────── + + #[test] + fn loop_planner_config_default_is_enabled_10k_tokens() { + let cfg = Config::default(); + assert!(cfg.loop_cfg.planner.enabled); + assert_eq!(cfg.loop_cfg.planner.budget_tokens, 10_000); + assert!(cfg.loop_cfg.planner.runtime.is_none()); + assert_eq!(get(&cfg, "loop.planner.enabled"), Some("true".into())); + assert_eq!(get(&cfg, "loop.planner.budget_tokens"), Some("10000".into())); + assert_eq!(get(&cfg, "loop.planner.runtime"), None); + } + + #[test] + fn config_loop_planner_get_set_roundtrip() { + let tmp = tempfile::tempdir().unwrap(); + let path = tmp.path().join("config.toml"); + let mut cfg = Config::default(); + set(&mut cfg, "loop.planner.enabled", "false").unwrap(); + set(&mut cfg, "loop.planner.budget_tokens", "25000").unwrap(); + set(&mut cfg, "loop.planner.runtime", "codex").unwrap(); + assert!(!cfg.loop_cfg.planner.enabled); + assert_eq!(cfg.loop_cfg.planner.budget_tokens, 25_000); + assert_eq!(cfg.loop_cfg.planner.runtime.as_deref(), Some("codex")); + save_to(&path, &cfg).unwrap(); + + let loaded = load_from(&path).unwrap(); + assert!(!loaded.loop_cfg.planner.enabled); + assert_eq!(loaded.loop_cfg.planner.budget_tokens, 25_000); + assert_eq!(loaded.loop_cfg.planner.runtime.as_deref(), Some("codex")); + + // Clear runtime back to None. + set(&mut cfg, "loop.planner.runtime", "").unwrap(); + assert!(cfg.loop_cfg.planner.runtime.is_none()); + // Invalid runtime rejected. + assert!(set(&mut cfg, "loop.planner.runtime", "cursor").is_err()); + // Non-numeric budget rejected. + assert!(set(&mut cfg, "loop.planner.budget_tokens", "lots").is_err()); + // Bool variants accepted. + set(&mut cfg, "loop.planner.enabled", "yes").unwrap(); + assert!(cfg.loop_cfg.planner.enabled); + } + #[test] fn compact_keys_survive_disk_roundtrip() { let tmp = tempfile::tempdir().unwrap(); diff --git a/hew/src/commands/loop_cmd.rs b/hew/src/commands/loop_cmd.rs index 2d7a97c..6910fec 100644 --- a/hew/src/commands/loop_cmd.rs +++ b/hew/src/commands/loop_cmd.rs @@ -22,10 +22,11 @@ use std::time::Duration; use clap::{Args as ClapArgs, Subcommand, ValueEnum}; use hew_core::backpressure::{self, GateCheck, Verdict}; +use hew_core::batch_plan; use hew_core::batch_plan::{BatchPlan, BatchSource, SCHEMA_VERSION as BATCH_PLAN_SCHEMA_VERSION}; use hew_core::batch_plan_parse::extract_next_iteration; use hew_core::bd::{BdClient, ReadyTask, RealBd}; -use hew_core::config::LoopModelConfig; +use hew_core::config::{LoopModelConfig, LoopPlannerConfig}; use hew_core::error::HewError; use hew_core::loop_log::{ IterLog, LOOP_ROOT, Manifest, ManifestWorker, RunLog, iter_log_path, new_run_id, run_dir, @@ -382,6 +383,67 @@ pub struct Args { /// list. Example: `--epic hew-6az --epic hew-1tq`. #[arg(long = "epic", value_name = "EPIC_ID")] pub epic: Vec, + + /// Disable the inter-iter planner for this run. When set, every + /// iter-end that doesn't surface an agent-named `next_iteration:` + /// block writes a `Skipped { reason: "planner_disabled" }` batch + /// plan instead of spawning a planner subprocess. Overrides + /// `loop.planner.enabled` config. + #[arg(long, default_value_t = false, action = clap::ArgAction::SetTrue)] + pub no_planner: bool, + + /// Per-spawn token-estimate budget for the planner. Overrides + /// `loop.planner.budget_tokens` config. `0` disables planner + /// spawns without flipping `--no-planner`. Default `10000`. + #[arg(long)] + pub planner_budget: Option, + + /// Runtime to drive the planner. Overrides + /// `loop.planner.runtime` config; falling back to the loop's + /// primary runtime when unset. + #[arg( + long, + value_parser = clap::builder::PossibleValuesParser::new(RuntimeKind::VARIANTS), + )] + pub planner_runtime: Option, +} + +/// Resolve the effective [`LoopPlannerConfig`] for this run. Precedence: +/// +/// 1. `--no-planner` CLI flag (sticky `enabled = false`) +/// 2. Per-flag overrides (`--planner-budget`, `--planner-runtime`) +/// 3. `loop.planner.*` config values +/// 4. Compiled-in defaults (enabled, 10_000 tokens, runtime = primary) +/// +/// Validates the planner runtime is a known [`RuntimeKind`] so a bad +/// CLI/config value fails before the run starts rather than at first +/// iter-end spawn attempt. +pub fn resolve_planner_config( + args: &Args, + base: &LoopPlannerConfig, +) -> miette::Result { + let mut out = base.clone(); + if args.no_planner { + out.enabled = false; + } + if let Some(b) = args.planner_budget { + out.budget_tokens = b; + } + if let Some(r) = args.planner_runtime.as_deref() { + // Validate against the RuntimeKind allowlist. Empty string + // clears the override. + if r.is_empty() { + out.runtime = None; + } else { + let _: RuntimeKind = r.parse().map_err(|e: String| miette::miette!("{e}"))?; + out.runtime = Some(r.to_string()); + } + } else if let Some(r) = out.runtime.as_deref() { + // Validate config-sourced runtime too — bad on-disk config + // shouldn't silently turn into a missing planner. + let _: RuntimeKind = r.parse().map_err(|e: String| miette::miette!("{e}"))?; + } + Ok(out) } /// CLI surface of [`Scope`]. The runtime type lives in @@ -457,6 +519,7 @@ pub fn run_loop(ctx: &Ctx, args: Args) -> miette::Result<()> { if args.dry_run { None } else { fallback.runtime.map(build_spawner_for) }; let gate = AutoGateRunner; let loop_model = cfg.loop_cfg.model.clone(); + let planner_cfg = resolve_planner_config(&args, &cfg.loop_cfg.planner)?; run_loop_with_scope( ctx, args, @@ -465,6 +528,7 @@ pub fn run_loop(ctx: &Ctx, args: Args) -> miette::Result<()> { fallback_spawner.as_deref(), fallback, loop_model, + planner_cfg, &gate, &project_root, scope, @@ -715,6 +779,7 @@ pub fn run_loop_with( fallback_spawner, fallback, loop_model, + LoopPlannerConfig::default(), gate, project_root, Scope::Ready, @@ -730,6 +795,7 @@ pub fn run_loop_with_scope( fallback_spawner: Option<&dyn RuntimeSpawner>, fallback: FallbackConfig, loop_model: LoopModelConfig, + planner_cfg: LoopPlannerConfig, gate: &dyn GateRunner, project_root: &Path, scope: Scope, @@ -747,6 +813,7 @@ pub fn run_loop_with_scope( fallback_spawner, fallback, loop_model, + planner_cfg, gate, project_root, scope, @@ -760,6 +827,7 @@ pub fn run_loop_with_scope( fallback_spawner, fallback, loop_model, + planner_cfg, gate, project_root, scope, @@ -779,6 +847,7 @@ fn run_loop_serial( fallback_spawner: Option<&dyn RuntimeSpawner>, fallback: FallbackConfig, loop_model: LoopModelConfig, + planner_cfg: LoopPlannerConfig, gate: &dyn GateRunner, project_root: &Path, scope: Scope, @@ -831,6 +900,7 @@ fn run_loop_serial( fallback_spawner, fallback, loop_model.clone(), + planner_cfg.clone(), gate, &worker, &skill, @@ -894,6 +964,7 @@ fn run_loop_parallel( fallback_spawner: Option<&dyn RuntimeSpawner>, fallback: FallbackConfig, loop_model: LoopModelConfig, + planner_cfg: LoopPlannerConfig, gate: &dyn GateRunner, project_root: &Path, scope: Scope, @@ -1006,6 +1077,7 @@ fn run_loop_parallel( fallback_spawner, fallback, loop_model.clone(), + planner_cfg.clone(), gate, worker, &skill, @@ -1138,6 +1210,7 @@ pub fn run_worker_loop( fallback_spawner, fallback, loop_model, + LoopPlannerConfig::default(), gate, worker, skill, @@ -1158,6 +1231,7 @@ pub fn run_worker_loop_with_scope( fallback_spawner: Option<&dyn RuntimeSpawner>, fallback: FallbackConfig, loop_model: LoopModelConfig, + planner_cfg: LoopPlannerConfig, gate: &dyn GateRunner, worker: &Worker, skill: &skills::Skill, @@ -1310,7 +1384,8 @@ pub fn run_worker_loop_with_scope( ); let spawn_opts = SpawnOpts { model_override, working_dir: None }; - let (mut outcome, tokens, mut stderr_tail, failure_class) = if let Some(s) = active_spawner + let (mut outcome, tokens, mut stderr_tail, failure_class, raw_text) = if let Some(s) = + active_spawner { match s.spawn(&assembled, allowed, &spawn_opts) { Ok(out) => { @@ -1321,7 +1396,7 @@ pub fn run_worker_loop_with_scope( } else { IterOutcome::RuntimeError }; - (oc, out.tokens, Some(out.stderr_tail), out.failure_class) + (oc, out.tokens, Some(out.stderr_tail), out.failure_class, out.raw_text) } Err(e) => { if !ctx.quiet { @@ -1332,11 +1407,18 @@ pub fn run_worker_loop_with_scope( Default::default(), Some(format!("{e}")), SpawnFailureClass::RuntimeError(hew_core::runtime::RuntimeErrorKind::Spawn), + String::new(), ) } } } else { - (IterOutcome::NoClose, Default::default(), None, SpawnFailureClass::Success) + ( + IterOutcome::NoClose, + Default::default(), + None, + SpawnFailureClass::Success, + String::new(), + ) }; // Out-of-band closure detection. detect_closed_task only @@ -1504,6 +1586,54 @@ pub fn run_worker_loop_with_scope( .map_err(|e| miette::miette!("write iter log: {e}"))?; iter_logs.push(log); + // Iter-end batch-plan hook (hew-7k1m). Only fires for the + // parallel path (`--jobs >= 2`); the N=1 fast path never writes + // a batch file. The plan describes the NEXT iter's batch and + // resolves via four branches: + // 1. Agent: previous iter's raw_text named a `next_iteration:` + // block. + // 2. Planner: planner runtime returns a fresh batch (Planner + // source) or declines (Skipped with parse/runtime/budget + // reason). + // 3. Skipped: planner disabled → `reason = "planner_disabled"`. + // 4. (jobs == 1): layer bypassed entirely. + if args.jobs > 1 { + let next_iter = iter_number + 1; + let planner_kind = planner_cfg + .runtime + .as_deref() + .map(|r| r.parse::()) + .transpose() + .map_err(|e: String| miette::miette!("{e}"))? + .unwrap_or(primary_kind); + let plan = resolve_iter_completion_plan(&raw_text, &planner_cfg, next_iter, |ni| { + // Reuse the active loop spawner when it matches the + // planner's runtime — avoids constructing a second + // subprocess channel for the common config-less path. + let inherited = if planner_kind == active_kind { active_spawner } else { None }; + let built; + let planner_spawner: &dyn RuntimeSpawner = if let Some(s) = inherited { + s + } else { + built = build_spawner_for(planner_kind); + &*built + }; + spawn_planner_with( + planner_spawner, + &[], + &[], + planner_cfg.budget_tokens, + &worker.worktree_dir, + ni, + ) + }); + if let Err(e) = batch_plan::write(&worker.log_dir, &plan) + && !ctx.quiet + { + eprintln!("iter {iter_number} batch-plan write failed: {e}"); + } + } + // When a fallback is wired and the cooldown machinery is // actively routing iters, swallow RuntimeError from the // stop-signal point of view — the loop should switch to the @@ -1988,6 +2118,45 @@ fn assemble_planner_prompt( prompt::assemble(PLANNER_PROMPT_BODY, "", &tail) } +/// Pure resolution of the iter-end batch plan when the parallel +/// dispatcher (`--jobs >= 2`) needs to seed the next iter. Splits the +/// four branches per `hew-7k1m`: +/// +/// - `Some(Ok(ids))` from `extract_next_iteration(raw_text)` → Agent +/// - planner disabled OR budget = 0 → Skipped { planner_disabled } +/// - planner enabled → returned by the injected closure (which the +/// caller wires to [`spawn_planner_with`]) +/// +/// Splitting this away from the worker loop's side-effects keeps the +/// branch arithmetic test-friendly: no git, no bd, no spawner ctor. +pub fn resolve_iter_completion_plan( + raw_text: &str, + planner_cfg: &LoopPlannerConfig, + next_iter: u32, + planner_fn: F, +) -> BatchPlan +where + F: FnOnce(u32) -> BatchPlan, +{ + if !raw_text.is_empty() + && let Some(ids) = hew_core::batch_plan_parse::extract_next_iteration(raw_text) + { + return BatchPlan { + schema_version: BATCH_PLAN_SCHEMA_VERSION, + iter_number: next_iter, + task_ids: ids, + source: BatchSource::Agent, + reason: None, + created_at: iso_now_utc(), + planner_tokens: None, + }; + } + if !planner_cfg.enabled || planner_cfg.budget_tokens == 0 { + return skipped_plan(next_iter, "planner_disabled"); + } + planner_fn(next_iter) +} + /// Build a `Skipped` batch plan with the given reason. Used by every /// non-success path in [`spawn_planner`] so the caller sees one /// shape regardless of why the planner declined. @@ -2132,6 +2301,9 @@ mod tests { scope: None, epics: Vec::new(), epic: Vec::new(), + no_planner: false, + planner_budget: None, + planner_runtime: None, } } @@ -2409,6 +2581,120 @@ mod tests { assert!(prompt.full_text.contains("hew-aaa")); } + // ---- iter-end batch hook (hew-7k1m) ----------------------------- + + fn agent_plan_via_fenced_block() -> &'static str { + "thinking...\n\n```next_iteration\n[\"hew-foo\", \"hew-bar\"]\n```\nDone.\n" + } + + fn never_planner(_ni: u32) -> BatchPlan { + panic!("planner closure must not run for this branch"); + } + + #[test] + fn iter_completion_writes_agent_sourced_batch_when_block_present() { + let cfg = LoopPlannerConfig::default(); + let plan = + resolve_iter_completion_plan(agent_plan_via_fenced_block(), &cfg, 7, never_planner); + assert_eq!(plan.source, BatchSource::Agent); + assert_eq!(plan.iter_number, 7); + assert_eq!(plan.task_ids, vec!["hew-foo".to_string(), "hew-bar".to_string()]); + assert!(plan.reason.is_none()); + assert!(plan.planner_tokens.is_none()); + } + + #[test] + fn iter_completion_writes_planner_sourced_batch_when_agent_silent() { + let cfg = LoopPlannerConfig::default(); + let plan = resolve_iter_completion_plan("no fenced block, just prose", &cfg, 4, |ni| { + // Stand-in for spawn_planner_with: returns a Planner-sourced plan. + BatchPlan { + schema_version: BATCH_PLAN_SCHEMA_VERSION, + iter_number: ni, + task_ids: vec!["hew-planned".into()], + source: BatchSource::Planner, + reason: None, + created_at: iso_now_utc(), + planner_tokens: Some(TokenSpend { + input: 1, + output: 1, + cache_read: 0, + cache_create: 0, + }), + } + }); + assert_eq!(plan.source, BatchSource::Planner); + assert_eq!(plan.iter_number, 4); + assert_eq!(plan.task_ids, vec!["hew-planned".to_string()]); + assert!(plan.planner_tokens.is_some()); + } + + #[test] + fn iter_completion_writes_skipped_batch_when_planner_disabled() { + let cfg = LoopPlannerConfig { enabled: false, ..Default::default() }; + let plan = resolve_iter_completion_plan("", &cfg, 3, never_planner); + assert_eq!(plan.source, BatchSource::Skipped); + assert_eq!(plan.iter_number, 3); + assert!(plan.task_ids.is_empty()); + assert_eq!(plan.reason.as_deref(), Some("planner_disabled")); + } + + #[test] + fn iter_completion_skipped_when_budget_zero() { + // `--planner-budget 0` is the documented "off without flipping + // --no-planner" knob; it must short-circuit before the closure + // runs. + let cfg = LoopPlannerConfig { budget_tokens: 0, ..Default::default() }; + let plan = resolve_iter_completion_plan("", &cfg, 2, never_planner); + assert_eq!(plan.source, BatchSource::Skipped); + assert_eq!(plan.reason.as_deref(), Some("planner_disabled")); + } + + #[test] + fn iter_completion_agent_wins_over_planner() { + // Agent block in raw_text → planner closure must not fire even + // when planner is enabled. + let cfg = LoopPlannerConfig::default(); + let plan = + resolve_iter_completion_plan(agent_plan_via_fenced_block(), &cfg, 9, never_planner); + assert_eq!(plan.source, BatchSource::Agent); + } + + #[test] + fn cli_no_planner_skips_planner_call_even_when_agent_silent() { + let mut args = default_args(); + args.no_planner = true; + let resolved = resolve_planner_config(&args, &LoopPlannerConfig::default()).unwrap(); + assert!(!resolved.enabled); + let plan = resolve_iter_completion_plan("", &resolved, 1, never_planner); + assert_eq!(plan.source, BatchSource::Skipped); + assert_eq!(plan.reason.as_deref(), Some("planner_disabled")); + } + + #[test] + fn cli_planner_budget_overrides_config() { + let mut args = default_args(); + args.planner_budget = Some(42); + let base = LoopPlannerConfig { budget_tokens: 999, ..Default::default() }; + let resolved = resolve_planner_config(&args, &base).unwrap(); + assert_eq!(resolved.budget_tokens, 42); + } + + #[test] + fn cli_planner_runtime_overrides_config() { + let mut args = default_args(); + args.planner_runtime = Some("codex".into()); + let resolved = resolve_planner_config(&args, &LoopPlannerConfig::default()).unwrap(); + assert_eq!(resolved.runtime.as_deref(), Some("codex")); + } + + #[test] + fn cli_planner_runtime_rejects_unknown_kind() { + let mut args = default_args(); + args.planner_runtime = Some("cursor".into()); + assert!(resolve_planner_config(&args, &LoopPlannerConfig::default()).is_err()); + } + #[test] fn planner_tokens_field_populated_on_success() { let mock = MockSpawner::new(planner_outcome_with("```next_iteration\n[\"hew-aaa\"]\n```")); diff --git a/hew/tests/loop_backpressure.rs b/hew/tests/loop_backpressure.rs index 1ae6c2a..e0d2e3c 100644 --- a/hew/tests/loop_backpressure.rs +++ b/hew/tests/loop_backpressure.rs @@ -133,6 +133,9 @@ fn args_one_iter() -> Args { scope: None, epics: Vec::new(), epic: Vec::new(), + no_planner: false, + planner_budget: None, + planner_runtime: None, } } @@ -822,6 +825,9 @@ fn cooldown_routes_to_fallback_for_n_iters_then_retries_primary() { scope: None, epics: Vec::new(), epic: Vec::new(), + no_planner: false, + planner_budget: None, + planner_runtime: None, }; let fallback_cfg = FallbackConfig { runtime: Some(hew_core::runtime::RuntimeKind::Codex), cooldown_iters: 3 }; @@ -976,6 +982,73 @@ fn gate_is_called_with_worker_worktree_dir() { ); } +/// hew-7k1m: the N=1 fast path bypasses the iter-end batch-plan layer +/// entirely. Even after a successful iter, no `batch-NNN.json` file is +/// written under the run dir. +#[test] +fn iter_completion_skips_layer_entirely_when_jobs_is_1() { + let tmp = tempfile::tempdir().expect("tempdir"); + let repo = tmp.path().to_path_buf(); + git(&repo, &["init", "-q", "-b", "main"]); + std::fs::write(repo.join("README.md"), b"seed\n").unwrap(); + git(&repo, &["add", "README.md"]); + git(&repo, &["commit", "-q", "-m", "seed"]); + + let bd = CapturingBd { + ready: vec![ReadyTask { + id: "hew-test".into(), + title: "synthetic ready task".into(), + description: String::new(), + priority: 1, + status: "open".into(), + issue_type: "task".into(), + parent: None, + }], + remembered: RefCell::new(Vec::new()), + }; + let spawner = CommitMakingSpawner { repo_dir: repo.clone() }; + let gate = + StaticGateRunner(GateCheck { tests_passed: true, lint_passed: true, ..Default::default() }); + + let cwd_guard = std::env::current_dir().unwrap(); + std::env::set_current_dir(&repo).expect("cd repo"); + let res = run_loop_with( + &ctx(), + args_one_iter(), + &bd, + Some(&spawner), + None, + FallbackConfig::default(), + LoopModelConfig::default(), + &gate, + &repo, + ); + std::env::set_current_dir(cwd_guard).ok(); + res.expect("loop runs"); + + // Walk `.hew/loop//` and confirm no batch-*.json files. + let loop_root = repo.join(".hew").join("loop"); + let run_dirs: Vec = std::fs::read_dir(&loop_root) + .expect("loop dir present") + .filter_map(|e| e.ok()) + .map(|e| e.path()) + .filter(|p| p.is_dir()) + .collect(); + assert_eq!(run_dirs.len(), 1, "exactly one run dir"); + let batch_files: Vec = std::fs::read_dir(&run_dirs[0]) + .expect("run dir readable") + .filter_map(|e| e.ok()) + .map(|e| e.path()) + .filter(|p| { + p.file_name() + .and_then(|s| s.to_str()) + .map(|n| n.starts_with("batch-") && n.ends_with(".json")) + .unwrap_or(false) + }) + .collect(); + assert!(batch_files.is_empty(), "jobs=1 must not write any batch-*.json, got {batch_files:?}",); +} + /// hew-j4x: the single-worker fast path must keep its prior behavior — /// `run_loop_with` constructs a `Worker` with `worktree_dir = /// project_root`, so the gate is invoked at the project root just like diff --git a/hew/tests/loop_dynamic_model.rs b/hew/tests/loop_dynamic_model.rs index 5e4ea70..e22881d 100644 --- a/hew/tests/loop_dynamic_model.rs +++ b/hew/tests/loop_dynamic_model.rs @@ -67,6 +67,9 @@ fn args_one_dry_iter() -> Args { scope: None, epics: Vec::new(), epic: Vec::new(), + no_planner: false, + planner_budget: None, + planner_runtime: None, } } diff --git a/hew/tests/loop_parallel_e2e.rs b/hew/tests/loop_parallel_e2e.rs index 446d979..508bb5f 100644 --- a/hew/tests/loop_parallel_e2e.rs +++ b/hew/tests/loop_parallel_e2e.rs @@ -149,6 +149,9 @@ fn args_parallel(jobs: u32) -> Args { scope: None, epics: Vec::new(), epic: Vec::new(), + no_planner: false, + planner_budget: None, + planner_runtime: None, } } From 15e068ec5053348a3333200c85ccbdad7b57b0a2 Mon Sep 17 00:00:00 2001 From: droidnoob Date: Sat, 30 May 2026 11:41:45 +0530 Subject: [PATCH 6/8] feat(loop_summary): planner counts line + LOOP.md "Batch planner" (hew-z7rz) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Summary gains PlannerCounts{agent,planner,skipped} + scan_planner_counts(run_dir) helper that walks batch-NNN.json artifacts; render emits 'planner: agent=N, runtime=M, fallback=K' between scope and tokens, omits when zero - loop_cmd::print_summary populates from run_dir so live, replay, and parallel-aggregate paths all carry it - docs/LOOP.md '## Batch planner' section: agent→planner→trust-the-graph cascade, batch-NNN.json schema, summary line, --no-planner / loop.planner.* surface - CHANGELOG [Unreleased] entry; DECISION:loop-batch-planner-floor memory persisted - 5 new lib tests; fmt+clippy clean; 712 lib tests green --- CHANGELOG.md | 18 ++++ docs/LOOP.md | 85 +++++++++++++++++++ hew-core/src/loop_summary.rs | 160 +++++++++++++++++++++++++++++++++++ hew/src/commands/loop_cmd.rs | 1 + 4 files changed, 264 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 467b6a6..3adfab7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,24 @@ versioning follows [Semantic Versioning](https://semver.org/). ### Added +- **Batch planner for `hew loop run --jobs N` (epic `hew-lf40`).** + Parallel runs now layer two informed signals on top of `bd ready` to + choose each iter's dispatch batch: (1) a `next_iteration:` block in + the iter agent's close output (cheapest, in-band), and (2) a + dedicated planner subprocess spawned between iters when (1) is + absent — capped by `loop.planner.budget_tokens` (default `10_000`) + and skipped rather than truncated when over budget. `bd ready` + remains the safety floor: agent / planner suggestions can only + narrow the candidate set, never expand it + (`DECISION:loop-batch-planner-floor`). Each iter persists a + `batch-NNN.json` artifact (`schema_version: 1`) under the run dir; a + future `hew loop graph` (`hew-m7lq`) replays them. The end-of-run + summary gains a single-line `planner: agent=N, runtime=M, + fallback=K` row right after `scope:` (omitted entirely for legacy / + serial runs). CLI: `--no-planner`, `--planner-budget`. Config: + `[loop.planner] enabled = true`, `budget_tokens = 10_000`. v1 only + triggers under `--jobs >= 2`; `--jobs=1` skips the layer. See + `docs/LOOP.md` § Batch planner. - **`hew loop run --scope={ready|epics}` — scoped run queue (`hew-b3yl`).** Operators (and calling agents) now declare which slice of `bd ready` counts as the queue for a run: `--scope=ready` diff --git a/docs/LOOP.md b/docs/LOOP.md index 2a7d078..34d95a7 100644 --- a/docs/LOOP.md +++ b/docs/LOOP.md @@ -456,6 +456,91 @@ epic-scoped (per `hew-6n0v`) and stays out of this surface. --- +## Batch planner + +Parallel runs (`--jobs N >= 2`) need to choose *which* of the bd-ready +tasks dispatch this iter. The dispatcher layers two informed signals on +top of `bd ready`, with `bd ready` itself as the safety floor: + +1. **Iter agent's `next_iteration:` block.** The previous iter's close + output can name task ids the dispatcher should consider next. Cheapest + signal — already part of the iter's token budget; no extra subprocess. +2. **Planner subprocess.** Spawned between iters *only* when (1) is + absent. Bounded by `loop.planner.budget_tokens` (default `10_000`). + When the budget would be exceeded the planner skips entirely rather + than truncating its context to fit. +3. **Floor: `bd ready`.** The dispatcher always intersects the chosen + batch with the live `bd ready` set. Suggestions can only *narrow* the + candidate set, never expand it — see + `DECISION:loop-batch-planner-floor` and + `DECISION:loop-parallel-overlap-policy`. + +The cascade is **agent → planner → trust-the-graph**. If the agent +emits `next_iteration:`, that wins. Otherwise the planner runs (if +budgeted). If neither produces a usable batch (no agent block, planner +skipped or declined), the dispatcher falls through to `bd ready` order +exactly as a serial run would. + +**Each iter persists a `batch-NNN.json` artifact** to the run dir: + +``` +.hew/loop//batch-001.json +.hew/loop//batch-002.json +... +``` + +Schema (`schema_version: 1`): + +```json +{ + "schema_version": 1, + "iter_number": 3, + "task_ids": ["hew-aaa", "hew-bbb"], + "source": "agent", // "agent" | "planner" | "skipped" + "reason": null, // populated on "skipped" (e.g. "planner budget exhausted") + "created_at": "2026-05-30T00:00:00Z", + "planner_tokens": null // {input,output,cache_read,cache_create} when source="planner" +} +``` + +A future `hew loop graph` (`hew-m7lq`) consumes these artifacts to +render the dispatch history. + +**End-of-run summary** rolls the counts up into one line, right after +`scope:`: + +``` +planner: agent=4, runtime=2, fallback=1 +``` + +`agent` = iter-suggested batches, `runtime` = planner-subprocess +batches, `fallback` = skipped batches that fell through to bd-ready +order. The line is omitted entirely when no `batch-*.json` files exist +(serial run, or a parallel run that crashed before the first iter). + +### Configuration + +```toml +[loop.planner] +enabled = true # master switch; false disables the planner subprocess layer +budget_tokens = 10_000 # hard cap; planner skips rather than truncates +``` + +CLI overrides on `hew loop run`: + +| Flag | Effect | +|--------------------------|-----------------------------------------------------| +| `--no-planner` | Disable the planner-subprocess layer for this run. The iter agent's `next_iteration:` block still drives the batch when present; otherwise the dispatcher falls through to `bd ready`. | +| `--planner-budget N` | Override `loop.planner.budget_tokens` for this run. | + +**v1 wire-up:** Only triggers when `--jobs >= 2`. `--jobs=1` skips the +planner layer entirely — there's nothing for it to narrow. + +**Non-goals (v1):** replacing trust-the-graph; static touches-overlap +analysis; cross-run batch memory; retroactive recovery of hung iters. + +--- + ## Stop signals - `hew loop cancel` — touches `.hew/loop//.stop`. diff --git a/hew-core/src/loop_summary.rs b/hew-core/src/loop_summary.rs index 2c7ed47..d8807c0 100644 --- a/hew-core/src/loop_summary.rs +++ b/hew-core/src/loop_summary.rs @@ -5,7 +5,9 @@ //! CLI layer prints whatever [`render`] returns. use std::collections::BTreeMap; +use std::path::Path; +use crate::batch_plan::{self, BatchSource}; use crate::loop_log::{IterLog, ManifestWorker}; use crate::runner::{Run, StopReason, TokenSpend}; use crate::scope::{self, Scope}; @@ -46,6 +48,55 @@ pub struct Summary { /// to `None` from [`summarize`]; live callers populate from /// `run.config.scope`, re-render callers from `RunLog.scope`. pub scope: Option, + /// Tally of `batch-NNN.json` artifacts produced by the run, grouped + /// by [`BatchSource`]. All zeros (the default) means the run never + /// wrote a batch plan — legacy serial run, or `--jobs=1` — and the + /// renderer hides the `planner:` line entirely. Populated by + /// callers via [`scan_planner_counts`]; [`summarize`] leaves it at + /// default so it can stay a pure function. + pub planner_counts: PlannerCounts, +} + +/// Per-source tally of `batch-NNN.json` artifacts a run produced. Zero +/// in every field == no batch files on disk; the renderer treats that +/// case as "legacy / serial run" and skips the planner line. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +pub struct PlannerCounts { + pub agent: u32, + pub planner: u32, + pub skipped: u32, +} + +impl PlannerCounts { + pub fn total(&self) -> u32 { + self.agent + self.planner + self.skipped + } +} + +/// Walk `run_dir` for `batch-NNN.json` files and tally them by source. +/// Missing/unreadable files are skipped silently — a partial run that +/// crashed mid-write shouldn't break the summary. Returns the +/// default-zero counts when the directory has no batch artifacts. +pub fn scan_planner_counts(run_dir: &Path) -> PlannerCounts { + let mut counts = PlannerCounts::default(); + let Ok(entries) = std::fs::read_dir(run_dir) else { return counts }; + for entry in entries.flatten() { + let Some(name) = entry.file_name().to_str().map(str::to_string) else { continue }; + if !name.starts_with("batch-") || !name.ends_with(".json") { + continue; + } + let Some(stem) = name.strip_prefix("batch-").and_then(|s| s.strip_suffix(".json")) else { + continue; + }; + let Ok(iter) = stem.parse::() else { continue }; + let Ok(Some(plan)) = batch_plan::read(run_dir, iter) else { continue }; + match plan.source { + BatchSource::Agent => counts.agent += 1, + BatchSource::Planner => counts.planner += 1, + BatchSource::Skipped => counts.skipped += 1, + } + } + counts } /// One row of the per-model breakdown table. `model` is the resolved @@ -128,6 +179,7 @@ pub fn summarize(run: &Run, iter_logs: &[IterLog]) -> Summary { symbols_touched, per_model, scope: None, + planner_counts: PlannerCounts::default(), } } @@ -224,6 +276,18 @@ pub fn render(summary: &Summary, logs_path: &str, colorize: bool) -> String { let scope_label = scope::label_optional(summary.scope.as_ref()); let _ = writeln!(s, " {bold}scope{reset}: {scope_label}"); + // Planner line — only present when the run produced batch artifacts. + // Naming on the wire: `agent` (next_iteration: from the iter agent), + // `runtime` (planner subprocess), `fallback` (skipped → trust-the-graph). + let pc = &summary.planner_counts; + if pc.total() > 0 { + let _ = writeln!( + s, + " {bold}planner{reset}: agent={}, runtime={}, fallback={}", + pc.agent, pc.planner, pc.skipped, + ); + } + // Token breakdown. let total = summary.cost.total(); let _ = writeln!(s, " {bold}tokens{reset}: {} total", fmt_int(total)); @@ -906,6 +970,102 @@ mod tests { ); } + fn batch_tmpdir() -> std::path::PathBuf { + use std::sync::atomic::{AtomicU64, Ordering}; + static SEQ: AtomicU64 = AtomicU64::new(0); + let n = SEQ.fetch_add(1, Ordering::Relaxed); + let base = std::env::temp_dir().join(format!( + "hew-summary-planner-{}-{}-{}", + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or_default(), + n, + )); + std::fs::create_dir_all(&base).unwrap(); + base + } + + fn write_batch(dir: &std::path::Path, iter: u32, source: BatchSource) { + let plan = crate::batch_plan::BatchPlan { + schema_version: crate::batch_plan::SCHEMA_VERSION, + iter_number: iter, + task_ids: Vec::new(), + source, + reason: None, + created_at: "2026-05-30T00:00:00Z".into(), + planner_tokens: None, + }; + crate::batch_plan::write(dir, &plan).unwrap(); + } + + #[test] + fn summary_planner_counts_aggregates_agent_planner_skipped() { + let dir = batch_tmpdir(); + write_batch(&dir, 1, BatchSource::Agent); + write_batch(&dir, 2, BatchSource::Agent); + write_batch(&dir, 3, BatchSource::Planner); + write_batch(&dir, 4, BatchSource::Skipped); + let counts = scan_planner_counts(&dir); + assert_eq!(counts.agent, 2); + assert_eq!(counts.planner, 1); + assert_eq!(counts.skipped, 1); + assert_eq!(counts.total(), 4); + } + + #[test] + fn summary_omits_planner_line_when_no_batch_files() { + let mut sum = one_iter_summary(); + sum.planner_counts = PlannerCounts::default(); + let txt = render(&sum, "/x", false); + assert!(!txt.contains("planner:"), "legacy run must omit planner row:\n{txt}"); + } + + #[test] + fn summary_planner_line_appears_after_scope_line() { + let mut sum = one_iter_summary(); + sum.scope = Some(Scope::Ready); + sum.planner_counts = PlannerCounts { agent: 4, planner: 2, skipped: 1 }; + let txt = render(&sum, "/x", false); + let scope_pos = txt.find("scope:").expect("scope row present"); + let planner_pos = txt.find("planner:").expect("planner row present"); + let tokens_pos = txt.find("tokens:").expect("tokens row present"); + assert!( + scope_pos < planner_pos && planner_pos < tokens_pos, + "planner must sit between scope and tokens:\n{txt}" + ); + assert!( + txt.contains("agent=4, runtime=2, fallback=1"), + "planner row format mismatch:\n{txt}" + ); + } + + #[test] + fn summary_legacy_run_without_batch_files_renders_clean() { + let dir = batch_tmpdir(); + // No batch files written. + let counts = scan_planner_counts(&dir); + assert_eq!(counts, PlannerCounts::default()); + let mut sum = one_iter_summary(); + sum.planner_counts = counts; + let txt = render(&sum, "/x", false); + assert!(!txt.contains("planner:"), "no batch files → no planner row:\n{txt}"); + } + + #[test] + fn scan_planner_counts_ignores_non_batch_files() { + let dir = batch_tmpdir(); + std::fs::write(dir.join("run.json"), "{}").unwrap(); + std::fs::write(dir.join("iter-001.json"), "{}").unwrap(); + std::fs::write(dir.join("batch-notanumber.json"), "{}").unwrap(); + write_batch(&dir, 7, BatchSource::Agent); + let counts = scan_planner_counts(&dir); + assert_eq!(counts.agent, 1); + assert_eq!(counts.planner, 0); + assert_eq!(counts.skipped, 0); + } + #[test] fn render_strips_ansi_when_colorize_false() { let logs = vec![iter_log(1, "closed", Some("h1"), TokenSpend::default())]; diff --git a/hew/src/commands/loop_cmd.rs b/hew/src/commands/loop_cmd.rs index 6910fec..84fabe2 100644 --- a/hew/src/commands/loop_cmd.rs +++ b/hew/src/commands/loop_cmd.rs @@ -1678,6 +1678,7 @@ fn print_summary( } let mut summary = hew_core::loop_summary::summarize(run, iter_logs); summary.scope = scope; + summary.planner_counts = hew_core::loop_summary::scan_planner_counts(dir); let colorize = std::env::var_os("NO_COLOR").is_none(); print!("{}", hew_core::loop_summary::render(&summary, &dir.display().to_string(), colorize),); } From fcc5a9ee58521a10e99807ba021e0951877083cd Mon Sep 17 00:00:00 2001 From: droidnoob Date: Sat, 30 May 2026 12:06:09 +0530 Subject: [PATCH 7/8] feat(verify): end-of-run test step for hew loop (hew-bon7) Adds an opt-in mandatory verify step that runs after the last iter (and after merge-back on --jobs >= 2) to prove the final stacked state is green. Conditional on both a resolvable test command (CLI > config > gate::detect) and an explicit opt-in. - new hew_core::verify (VerifyOutcome + resolve_command + run_verify) - new [loop.end_of_run] config block (verify_tests, verify_command, verify_budget_wall) with three settable keys - Run + RunLog gain verify_outcome with backward-compat parse - summary renderer adds a coloured "verify:" line below planner - --verify-tests / --no-verify-tests / --verify-command CLI flags - failure writes STATUS:loop-verify-failed: + non-zero exit; closed tasks are NOT rolled back - defaults byte-identical to today (verify_tests = false) - 18 new tests; docs/LOOP.md + CHANGELOG updated Closes bd-hew-bon7. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 18 ++ docs/LOOP.md | 77 ++++++ hew-core/src/config.rs | 91 ++++++++ hew-core/src/lib.rs | 1 + hew-core/src/loop_log.rs | 32 +++ hew-core/src/loop_summary.rs | 66 ++++++ hew-core/src/runner.rs | 7 + hew-core/src/verify.rs | 402 ++++++++++++++++++++++++++++++++ hew/src/commands/loop_cmd.rs | 135 ++++++++++- hew/tests/loop_backpressure.rs | 6 + hew/tests/loop_dynamic_model.rs | 3 + hew/tests/loop_parallel_e2e.rs | 3 + 12 files changed, 840 insertions(+), 1 deletion(-) create mode 100644 hew-core/src/verify.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 3adfab7..b91ac23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,24 @@ versioning follows [Semantic Versioning](https://semver.org/). ### Added +- **End-of-run verify step for `hew loop` (`hew-bon7`).** Opt-in + mandatory test step that runs after the last iter (and after + merge-back on `--jobs N >= 2`) to prove the final stacked state is + green. Conditional on both a resolvable test command (CLI + `--verify-command` > `loop.end_of_run.verify_command` > project- + authored signals via `hew_core::gate::detect`) and an explicit + opt-in (`--verify-tests` or `loop.end_of_run.verify_tests = true`). + Outcome (`Passed` / `Failed` / `Skipped` / `TimedOut`) persists as + `Run.verify_outcome` in `run.json`, shows up as a `verify:` line in + `hew loop summary`, and on failure files a + `STATUS:loop-verify-failed:` memory + exits non-zero so CI + branches on it. Closed tasks are **not** rolled back on failure — + the memory + summary line + exit code are the durable signals. + Defaults are byte-identical to today (`verify_tests = false`). CLI: + `--verify-tests`, `--no-verify-tests`, `--verify-command=...`. + Config: `[loop.end_of_run] verify_tests`, `verify_command`, + `verify_budget_wall` (default `"10m"`). See `docs/LOOP.md` § + End-of-run verification. - **Batch planner for `hew loop run --jobs N` (epic `hew-lf40`).** Parallel runs now layer two informed signals on top of `bd ready` to choose each iter's dispatch batch: (1) a `next_iteration:` block in diff --git a/docs/LOOP.md b/docs/LOOP.md index 34d95a7..043b608 100644 --- a/docs/LOOP.md +++ b/docs/LOOP.md @@ -541,6 +541,83 @@ analysis; cross-run batch memory; retroactive recovery of hung iters. --- +## End-of-run verification + +Once every iter has closed (and, under `--jobs N >= 2`, every worker +branch has merged back onto HEAD), hew can run a single test command +to prove the *final stacked state* is green. The verify step is +**opt-in**; default runs are byte-identical to today. + +Why a final-state check on top of per-iter `hew-guard` runs: the +gate inside the loop catches regressions an iter introduced, but the +sum of N green iters is not a green tree if two parallel workers +touched the same module and the merge resolved one half (see +`DECISION:loop-parallel-overlap-policy` — conflicts file +`[merge-conflict]` bug tasks but the working tree itself may still +need a final compile + test pass). + +### Wiring + +Both must hold for the step to run: + +1. A test command resolves — explicit `--verify-command`, then + `loop.end_of_run.verify_command` config, then project-authored + signals via `hew_core::gate::detect` (`justfile`, `Makefile`, + `package.json` `test`). No language-sniffing fallback — mirrors + the per-iter gate's existing philosophy. +2. The user opted in — `--verify-tests` or + `loop.end_of_run.verify_tests = true`. + +### CLI + +```sh +hew loop run --verify-tests # opt in for this run +hew loop run --verify-tests --verify-command="cargo nextest run --workspace" +hew loop run --no-verify-tests # explicit off +``` + +### TOML + +```toml +[loop.end_of_run] +verify_tests = false # default false +verify_command = "" # empty = auto-detect from gate +verify_budget_wall = "10m" # hard cap on the verify step +``` + +### Outcome + +The full stdout+stderr of the verify command is written to +`.hew/loop//verify.log`. The outcome (`Passed` / `Failed` / +`Skipped` / `TimedOut`) is persisted as `Run.verify_outcome` on the +run's `run.json` and shows up in the summary: + +``` + verify: passed (22s, cargo nextest run --workspace) + verify: failed (exit 3, 5s, pytest -q) + verify: skipped (no command resolved) + verify: timed out (> 600s, ...) +``` + +A failed or timed-out verify **does not unwind any closed task**. The +durable signals are: + +- The `verify:` line in `hew loop summary`. +- A `STATUS:loop-verify-failed:` memory so the next session + sees the regression on `hew prime resume`. +- Non-zero exit code from `hew loop run` so CI / wrapper scripts can + branch on it. + +### Out of scope (v1) + +- Auto-fix on failure (could later re-queue failing tests as bd + tasks). +- Per-iter verification (the per-iter gate already covers that path; + full-suite-per-iter would triple wall-clock cost). +- Sandbox enforcement separate from the runtime's own. + +--- + ## Stop signals - `hew loop cancel` — touches `.hew/loop//.stop`. diff --git a/hew-core/src/config.rs b/hew-core/src/config.rs index e45a267..51e051a 100644 --- a/hew-core/src/config.rs +++ b/hew-core/src/config.rs @@ -70,6 +70,37 @@ pub struct LoopConfig { /// `hew-7k1m`). Disabled / `0` for jobs == 1 — the entire layer /// is bypassed so the fast path stays free of planner overhead. pub planner: LoopPlannerConfig, + /// End-of-run verification knobs. Opt-in (`verify_tests = false` + /// by default) so existing runs stay byte-identical to today. + /// See `hew-bon7`. + pub end_of_run: LoopEndOfRunConfig, +} + +/// `loop.end_of_run.*` knobs. Mandatory end-of-run test step that +/// proves the final stacked state is green before the loop reports +/// success. Off by default; flip via `loop.end_of_run.verify_tests` +/// in config or `--verify-tests` on `hew loop run`. +#[derive(Debug, Clone, Serialize, Deserialize, schemars::JsonSchema)] +#[serde(default)] +pub struct LoopEndOfRunConfig { + /// Run the verify step at all. Default `false`. + pub verify_tests: bool, + /// User-supplied verify command (e.g. `"cargo nextest run --workspace"`). + /// Empty = let [`crate::gate::detect`] resolve from project-authored + /// signals (justfile/Makefile/package.json `test`). + pub verify_command: String, + /// Wall-clock cap on the verify step. `"10m"` default. + pub verify_budget_wall: String, +} + +impl Default for LoopEndOfRunConfig { + fn default() -> Self { + Self { + verify_tests: false, + verify_command: String::new(), + verify_budget_wall: "10m".into(), + } + } } /// Planner-spawn knobs. The planner is the inter-iter advisor that @@ -411,6 +442,15 @@ pub fn get(cfg: &Config, key: &str) -> Option { Some(cfg.loop_cfg.planner.budget_tokens.to_string()) } "loop.planner.runtime" => cfg.loop_cfg.planner.runtime.clone(), + "loop.end_of_run.verify_tests" | "loop.end_of_run.verify-tests" => { + Some(cfg.loop_cfg.end_of_run.verify_tests.to_string()) + } + "loop.end_of_run.verify_command" | "loop.end_of_run.verify-command" => { + Some(cfg.loop_cfg.end_of_run.verify_command.clone()) + } + "loop.end_of_run.verify_budget_wall" | "loop.end_of_run.verify-budget-wall" => { + Some(cfg.loop_cfg.end_of_run.verify_budget_wall.clone()) + } k if k.starts_with("loop.model.by_priority.") || k.starts_with("loop.model.by-priority.") => { @@ -654,6 +694,25 @@ pub fn set(cfg: &mut Config, key: &str, value: &str) -> Result<()> { cfg.loop_cfg.planner.runtime = Some(value.to_string()); } } + "loop.end_of_run.verify_tests" | "loop.end_of_run.verify-tests" => { + cfg.loop_cfg.end_of_run.verify_tests = bool_val(value)?; + } + "loop.end_of_run.verify_command" | "loop.end_of_run.verify-command" => { + cfg.loop_cfg.end_of_run.verify_command = value.to_string(); + } + "loop.end_of_run.verify_budget_wall" | "loop.end_of_run.verify-budget-wall" => { + if value.is_empty() { + cfg.loop_cfg.end_of_run.verify_budget_wall = "10m".into(); + } else { + // Validate parseability — same s/m/h grammar as + // `--budget-wall`. Reject bad values at set-time so + // `hew loop run` doesn't trip on a stale config. + parse_budget_wall(value).map_err(|e| HewError::MissingFlag { + flag: format!("value (expected s/m/h duration, got `{value}`: {e})"), + })?; + cfg.loop_cfg.end_of_run.verify_budget_wall = value.to_string(); + } + } _ => { return Err(HewError::MissingFlag { flag: format!("key (unknown: {key})") }); } @@ -693,9 +752,38 @@ pub fn keys() -> &'static [&'static str] { "loop.planner.enabled", "loop.planner.budget_tokens", "loop.planner.runtime", + "loop.end_of_run.verify_tests", + "loop.end_of_run.verify_command", + "loop.end_of_run.verify_budget_wall", ] } +/// Parse a `loop.end_of_run.verify_budget_wall` string into a +/// [`std::time::Duration`]. Accepts `s` / `m` / `h`. Bare +/// helper here (not the CLI's `parse_duration`) so config-side +/// validation doesn't require pulling in the binary crate. +pub fn parse_budget_wall(raw: &str) -> Result { + let raw = raw.trim(); + if raw.is_empty() { + return Err(HewError::MissingFlag { flag: "empty duration".into() }); + } + let (num, unit) = raw.split_at(raw.len() - 1); + let n: u64 = num + .parse() + .map_err(|e| HewError::MissingFlag { flag: format!("invalid number `{num}`: {e}") })?; + let dur = match unit { + "s" => std::time::Duration::from_secs(n), + "m" => std::time::Duration::from_secs(n * 60), + "h" => std::time::Duration::from_secs(n * 3600), + other => { + return Err(HewError::MissingFlag { + flag: format!("unknown duration unit `{other}` (expected s/m/h)"), + }); + } + }; + Ok(dur) +} + #[cfg(test)] mod tests { use super::*; @@ -791,6 +879,9 @@ mod tests { "loop.planner.enabled" => "true", "loop.planner.budget_tokens" => "20000", "loop.planner.runtime" => "codex", + "loop.end_of_run.verify_tests" => "true", + "loop.end_of_run.verify_command" => "cargo nextest run", + "loop.end_of_run.verify_budget_wall" => "10m", k if k.starts_with("optional-skills.") => "yes", _ => "true", }; diff --git a/hew-core/src/lib.rs b/hew-core/src/lib.rs index da4c952..48789c9 100644 --- a/hew-core/src/lib.rs +++ b/hew-core/src/lib.rs @@ -52,6 +52,7 @@ pub mod testing; pub mod time; pub mod treesitter; pub mod tty; +pub mod verify; pub mod worktree; pub use ctx::{Ctx, OutputMode}; diff --git a/hew-core/src/loop_log.rs b/hew-core/src/loop_log.rs index 9162728..b281fc6 100644 --- a/hew-core/src/loop_log.rs +++ b/hew-core/src/loop_log.rs @@ -167,6 +167,11 @@ pub struct RunLog { /// after `from_run` once `RunConfig.scope` lands. #[serde(default, skip_serializing_if = "Option::is_none")] pub scope: Option, + /// End-of-run verify-tests outcome. `None` on legacy `run.json` + /// files predating the field; preserved verbatim on re-parse for + /// `hew loop summary`. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub verify_outcome: Option, } impl RunLog { @@ -182,6 +187,7 @@ impl RunLog { strict: run.config.strict, interactive: run.config.interactive, scope: Some(run.config.scope.clone()), + verify_outcome: run.verify_outcome.clone(), } } } @@ -688,6 +694,32 @@ mod tests { assert_eq!(parsed.scope, Some(Scope::Epics { epic_ids: vec!["hew-6az".into()] })); } + #[test] + fn run_log_backward_compat_missing_verify_outcome_deserializes_as_none() { + // Legacy run.json files predate `verify_outcome`. The + // pre-scope fixture is also pre-verify, so re-use it. + let path = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("fixtures") + .join("run-log-pre-scope.json"); + let body = std::fs::read_to_string(&path).expect("read pre-scope fixture"); + let parsed: RunLog = serde_json::from_str(&body).expect("parse pre-scope fixture"); + assert!(parsed.verify_outcome.is_none()); + } + + #[test] + fn run_log_round_trips_verify_outcome() { + let mut run = Run::new("loop-v1", "2026-05-30T00:00:00Z", RunConfig::default()); + run.verify_outcome = Some(crate::verify::VerifyOutcome::Passed { + command: "cargo test".into(), + duration_secs: 22, + }); + let log = RunLog::from_run(&run); + let json = serde_json::to_string_pretty(&log).unwrap(); + let parsed: RunLog = serde_json::from_str(&json).unwrap(); + assert_eq!(parsed.verify_outcome, run.verify_outcome); + } + #[test] fn run_log_backward_compat_missing_scope_deserializes_as_none() { let path = Path::new(env!("CARGO_MANIFEST_DIR")) diff --git a/hew-core/src/loop_summary.rs b/hew-core/src/loop_summary.rs index d8807c0..870cba0 100644 --- a/hew-core/src/loop_summary.rs +++ b/hew-core/src/loop_summary.rs @@ -55,6 +55,12 @@ pub struct Summary { /// callers via [`scan_planner_counts`]; [`summarize`] leaves it at /// default so it can stay a pure function. pub planner_counts: PlannerCounts, + /// End-of-run verify-tests outcome. `None` when verify never ran + /// (opt-in off, no command resolved, or pre-`hew-bon7` run). When + /// set, the renderer adds a `verify:` line below `planner:`. + /// Populated by `print_summary` from `Run.verify_outcome` (live) + /// or `RunLog.verify_outcome` (re-render from disk). + pub verify_outcome: Option, } /// Per-source tally of `batch-NNN.json` artifacts a run produced. Zero @@ -180,6 +186,7 @@ pub fn summarize(run: &Run, iter_logs: &[IterLog]) -> Summary { per_model, scope: None, planner_counts: PlannerCounts::default(), + verify_outcome: run.verify_outcome.clone(), } } @@ -288,6 +295,19 @@ pub fn render(summary: &Summary, logs_path: &str, colorize: bool) -> String { ); } + // Verify line — only present when the run ran (or recorded a + // deliberate skip of) the end-of-run verify-tests step. + if let Some(v) = &summary.verify_outcome { + let painted = if v.is_failure() { + format!("{red}{}{reset}", v.summary_line()) + } else if matches!(v, crate::verify::VerifyOutcome::Passed { .. }) { + format!("{green}{}{reset}", v.summary_line()) + } else { + v.summary_line() + }; + let _ = writeln!(s, " {bold}verify{reset}: {}", painted); + } + // Token breakdown. let total = summary.cost.total(); let _ = writeln!(s, " {bold}tokens{reset}: {} total", fmt_int(total)); @@ -1066,6 +1086,52 @@ mod tests { assert_eq!(counts.skipped, 0); } + #[test] + fn render_hides_verify_line_when_outcome_is_none() { + let sum = one_iter_summary(); + let txt = render(&sum, "/x", false); + assert!(!txt.contains("verify:"), "verify line should be absent:\n{txt}"); + } + + #[test] + fn render_shows_verify_passed_line() { + let mut sum = one_iter_summary(); + sum.verify_outcome = Some(crate::verify::VerifyOutcome::Passed { + command: "cargo test --workspace".into(), + duration_secs: 22, + }); + let txt = render(&sum, "/x", false); + assert!(txt.contains("verify:")); + assert!(txt.contains("passed")); + assert!(txt.contains("cargo test --workspace")); + } + + #[test] + fn render_shows_verify_failed_line() { + let mut sum = one_iter_summary(); + sum.verify_outcome = Some(crate::verify::VerifyOutcome::Failed { + command: "pytest -q".into(), + exit_code: 1, + duration_secs: 5, + stderr_tail: String::new(), + }); + let txt = render(&sum, "/x", false); + assert!(txt.contains("verify:")); + assert!(txt.contains("failed")); + assert!(txt.contains("exit 1")); + } + + #[test] + fn render_shows_verify_skipped_line_when_command_unresolved() { + let mut sum = one_iter_summary(); + sum.verify_outcome = + Some(crate::verify::VerifyOutcome::Skipped { reason: "no command resolved".into() }); + let txt = render(&sum, "/x", false); + assert!(txt.contains("verify:")); + assert!(txt.contains("skipped")); + assert!(txt.contains("no command resolved")); + } + #[test] fn render_strips_ansi_when_colorize_false() { let logs = vec![iter_log(1, "closed", Some("h1"), TokenSpend::default())]; diff --git a/hew-core/src/runner.rs b/hew-core/src/runner.rs index af4190f..7ed8524 100644 --- a/hew-core/src/runner.rs +++ b/hew-core/src/runner.rs @@ -11,6 +11,7 @@ use std::time::Duration; use crate::config::LoopModelConfig; use crate::runtime::{RuntimeSpawner, SpawnFailureClass}; use crate::scope::Scope; +use crate::verify::VerifyOutcome; /// Per-run configuration. Set once at `hew loop` invocation, immutable /// for the duration of the run. @@ -225,6 +226,11 @@ pub struct Run { pub config: RunConfig, pub iters: Vec, pub stop_reason: Option, + /// Outcome of the end-of-run verify step (`loop.end_of_run.verify_tests`). + /// `None` when verify never ran — either opt-in off or no command + /// resolved AND the loop didn't bother recording a `Skipped`. Set + /// by `hew loop run` after merge-back, before final `run.json`. + pub verify_outcome: Option, } impl Run { @@ -235,6 +241,7 @@ impl Run { config, iters: Vec::new(), stop_reason: None, + verify_outcome: None, } } diff --git a/hew-core/src/verify.rs b/hew-core/src/verify.rs new file mode 100644 index 0000000..1873a82 --- /dev/null +++ b/hew-core/src/verify.rs @@ -0,0 +1,402 @@ +//! End-of-run test verification for `hew loop`. +//! +//! The verify step runs once after the last iter (and after merge-back +//! on `--jobs >= 2`) and before the final `run.json` write. It proves +//! the *final stacked state* compiles + passes its declared tests so +//! merge-back / PR creation isn't shipping a green-by-construction +//! pipeline that breaks in CI. +//! +//! Conditional on **both**: +//! 1. A test command resolves (CLI override > config override > +//! [`crate::gate::detect`] auto-detect of project-authored +//! signals). +//! 2. The user opted in via `loop.end_of_run.verify_tests = true` +//! or `hew loop run --verify-tests`. +//! +//! Failure surfaces in `hew loop summary` and writes a +//! `STATUS:loop-verify-failed:` memory; it does **not** unwind +//! closed tasks. Per `DECISION:loop-parallel-overlap-policy`, +//! conflicts on merge-back already file `[merge-conflict]` bugs; +//! verify-tests is the next-layer safety net for "final state is +//! actually green". + +use std::ffi::OsStr; +use std::path::Path; +use std::process::{Command, Stdio}; +use std::time::{Duration, Instant}; + +use serde::{Deserialize, Serialize}; +use wait_timeout::ChildExt; + +use crate::gate::GateSpec; +use crate::process::spawn_with_etxtbsy_retry; + +/// Outcome of one verify-tests invocation. Persisted in `run.json` as +/// `Run.verify_outcome` and re-rendered by `hew loop summary`. +/// +/// Stays a small enum so adding outcomes later (e.g. `Cancelled` if +/// the user ctrl-Cs the verify step itself) is a non-breaking append. +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum VerifyOutcome { + /// Command exited 0. `command` is the rendered argv for the + /// summary line; `duration_secs` is wall-clock spent. + Passed { command: String, duration_secs: u64 }, + /// Command exited non-zero. `stderr_tail` is the last ~2 KiB of + /// merged stdout/stderr for the failure breadcrumb. + Failed { command: String, exit_code: i32, duration_secs: u64, stderr_tail: String }, + /// Verify was opt-in-true but no command resolved (no CLI override, + /// no config override, `gate::detect` returned empty test_cmd). + /// Also covers the `verify_tests = false` path so a single field + /// captures every non-run case. + Skipped { reason: String }, + /// Wall-clock budget elapsed before the command finished. The + /// child was killed. + TimedOut { command: String, budget_secs: u64 }, +} + +impl VerifyOutcome { + /// True iff the run should exit non-zero on this outcome. Wrapper + /// scripts / CI branches on this. + pub fn is_failure(&self) -> bool { + matches!(self, Self::Failed { .. } | Self::TimedOut { .. }) + } + + /// One-line label for `hew loop summary` output. + pub fn summary_line(&self) -> String { + match self { + Self::Passed { command, duration_secs } => { + format!("passed ({}s, {})", duration_secs, command) + } + Self::Failed { command, exit_code, duration_secs, .. } => { + format!("failed (exit {}, {}s, {})", exit_code, duration_secs, command) + } + Self::Skipped { reason } => format!("skipped ({})", reason), + Self::TimedOut { command, budget_secs } => { + format!("timed out (> {}s, {})", budget_secs, command) + } + } + } +} + +/// Resolve the verify command for this run. Precedence: +/// +/// 1. `cli_override` — `--verify-command="..."` on `hew loop run`. +/// 2. `config_override` — `loop.end_of_run.verify_command` in hew config. +/// 3. `gate.test_cmd` — project-authored signals (`justfile`, +/// `Makefile`, `package.json`). Already detected by the caller; we +/// don't re-walk the filesystem. +/// +/// Returns `None` when nothing resolves — caller skips with a +/// `no_command_resolved` reason. +pub fn resolve_command( + cli_override: Option<&str>, + config_override: Option<&str>, + gate: &GateSpec, +) -> Option> { + if let Some(raw) = cli_override.map(str::trim).filter(|s| !s.is_empty()) { + return Some(split_command(raw)); + } + if let Some(raw) = config_override.map(str::trim).filter(|s| !s.is_empty()) { + return Some(split_command(raw)); + } + if !gate.test_cmd.is_empty() { + return Some(gate.test_cmd.clone()); + } + None +} + +/// Whitespace-split a user-supplied command string. We deliberately +/// avoid shell-style quoting parsing here — operators wanting that +/// shape pass the command through their shell instead. Mirrors +/// `gate::detect`'s naive vector shape. +fn split_command(raw: &str) -> Vec { + raw.split_whitespace().map(str::to_string).collect() +} + +/// Spawn the resolved verify command under `budget` and capture its +/// output. The command runs in `working_dir` (the project root or, on +/// parallel runs, the launch HEAD — caller decides). Combined +/// stdout+stderr is written byte-for-byte to `log_path` and the last +/// ~2 KiB of stderr returned in [`VerifyOutcome::Failed`]. +/// +/// Pure-ish: only side effects are subprocess spawn + writing the +/// log file. Callers persist [`VerifyOutcome`] into `run.json`. +pub fn run_verify( + command: &[String], + working_dir: &Path, + log_path: &Path, + budget: Duration, +) -> VerifyOutcome { + let rendered = command.join(" "); + if command.is_empty() { + return VerifyOutcome::Skipped { reason: "empty command".into() }; + } + let program = &command[0]; + let args: Vec<&OsStr> = command[1..].iter().map(OsStr::new).collect(); + + let mut cmd = Command::new(program); + cmd.args(&args) + .current_dir(working_dir) + .stdin(Stdio::null()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + + let start = Instant::now(); + let mut child = match spawn_with_etxtbsy_retry(&mut cmd) { + Ok(c) => c, + Err(e) => { + return VerifyOutcome::Failed { + command: rendered, + exit_code: -1, + duration_secs: 0, + stderr_tail: format!("spawn failed: {e}"), + }; + } + }; + + let status = match child.wait_timeout(budget) { + Ok(Some(s)) => s, + Ok(None) => { + let _ = child.kill(); + let _ = child.wait(); + return VerifyOutcome::TimedOut { command: rendered, budget_secs: budget.as_secs() }; + } + Err(e) => { + return VerifyOutcome::Failed { + command: rendered, + exit_code: -1, + duration_secs: start.elapsed().as_secs(), + stderr_tail: format!("wait failed: {e}"), + }; + } + }; + + let duration_secs = start.elapsed().as_secs(); + let mut stdout = Vec::new(); + let mut stderr = Vec::new(); + if let Some(mut s) = child.stdout.take() { + use std::io::Read; + let _ = s.read_to_end(&mut stdout); + } + if let Some(mut s) = child.stderr.take() { + use std::io::Read; + let _ = s.read_to_end(&mut stderr); + } + + // Best-effort log write. A failure here is not load-bearing — the + // outcome record is the durable signal. + let mut combined = Vec::with_capacity(stdout.len() + stderr.len() + 16); + combined.extend_from_slice(b"=== stdout ===\n"); + combined.extend_from_slice(&stdout); + combined.extend_from_slice(b"\n=== stderr ===\n"); + combined.extend_from_slice(&stderr); + if let Some(parent) = log_path.parent() { + let _ = std::fs::create_dir_all(parent); + } + let _ = std::fs::write(log_path, &combined); + + if status.success() { + VerifyOutcome::Passed { command: rendered, duration_secs } + } else { + VerifyOutcome::Failed { + command: rendered, + exit_code: status.code().unwrap_or(-1), + duration_secs, + stderr_tail: tail_bytes(&stderr, 2048), + } + } +} + +fn tail_bytes(bytes: &[u8], cap: usize) -> String { + if bytes.len() <= cap { + return String::from_utf8_lossy(bytes).into_owned(); + } + String::from_utf8_lossy(&bytes[bytes.len() - cap..]).into_owned() +} + +#[cfg(test)] +mod tests { + use super::*; + + fn gate_with(cmd: &[&str]) -> GateSpec { + GateSpec { test_cmd: cmd.iter().map(|s| s.to_string()).collect(), lint_cmd: Vec::new() } + } + + #[test] + fn resolve_prefers_cli_over_config_over_gate() { + let gate = gate_with(&["just", "test"]); + let r = resolve_command(Some("cargo test"), Some("make test"), &gate); + assert_eq!(r, Some(vec!["cargo".into(), "test".into()])); + } + + #[test] + fn resolve_falls_through_to_config_when_cli_empty() { + let gate = gate_with(&["just", "test"]); + let r = resolve_command(Some(""), Some("make test"), &gate); + assert_eq!(r, Some(vec!["make".into(), "test".into()])); + } + + #[test] + fn resolve_falls_through_to_gate_when_overrides_absent() { + let gate = gate_with(&["just", "test"]); + let r = resolve_command(None, None, &gate); + assert_eq!(r, Some(vec!["just".into(), "test".into()])); + } + + #[test] + fn resolve_returns_none_when_nothing_set() { + let gate = GateSpec::default(); + assert!(resolve_command(None, None, &gate).is_none()); + } + + #[test] + fn resolve_trims_whitespace_only_strings_as_empty() { + let gate = gate_with(&["just", "test"]); + let r = resolve_command(Some(" "), None, &gate); + assert_eq!(r, Some(vec!["just".into(), "test".into()])); + } + + #[cfg(unix)] + #[test] + fn run_verify_passed_records_command_and_duration() { + let tmp = tempfile::tempdir().unwrap(); + let log = tmp.path().join("verify.log"); + let out = run_verify(&["true".into()], tmp.path(), &log, Duration::from_secs(5)); + match out { + VerifyOutcome::Passed { command, .. } => assert_eq!(command, "true"), + other => panic!("expected Passed, got {other:?}"), + } + assert!(log.exists(), "log file should be written"); + } + + #[cfg(unix)] + #[test] + fn run_verify_failed_captures_exit_code_and_stderr_tail() { + let tmp = tempfile::tempdir().unwrap(); + let log = tmp.path().join("verify.log"); + let out = run_verify( + &["sh".into(), "-c".into(), "echo boom 1>&2; exit 3".into()], + tmp.path(), + &log, + Duration::from_secs(5), + ); + match out { + VerifyOutcome::Failed { exit_code, stderr_tail, .. } => { + assert_eq!(exit_code, 3); + assert!(stderr_tail.contains("boom"), "stderr tail: {stderr_tail}"); + } + other => panic!("expected Failed, got {other:?}"), + } + } + + #[cfg(unix)] + #[test] + fn run_verify_timeout_kills_child_and_reports_budget() { + let tmp = tempfile::tempdir().unwrap(); + let log = tmp.path().join("verify.log"); + let out = run_verify( + &["sh".into(), "-c".into(), "sleep 5".into()], + tmp.path(), + &log, + Duration::from_millis(200), + ); + match out { + VerifyOutcome::TimedOut { budget_secs, .. } => assert_eq!(budget_secs, 0), + other => panic!("expected TimedOut, got {other:?}"), + } + } + + #[test] + fn run_verify_empty_command_skips() { + let tmp = tempfile::tempdir().unwrap(); + let log = tmp.path().join("verify.log"); + let out = run_verify(&[], tmp.path(), &log, Duration::from_secs(1)); + assert!(matches!(out, VerifyOutcome::Skipped { .. })); + } + + #[test] + fn run_verify_spawn_failure_returns_failed_with_negative_exit_code() { + let tmp = tempfile::tempdir().unwrap(); + let log = tmp.path().join("verify.log"); + let out = run_verify( + &["this-binary-does-not-exist-xyz".into()], + tmp.path(), + &log, + Duration::from_secs(1), + ); + match out { + VerifyOutcome::Failed { exit_code, stderr_tail, .. } => { + assert_eq!(exit_code, -1); + assert!(stderr_tail.contains("spawn failed")); + } + other => panic!("expected Failed, got {other:?}"), + } + } + + #[test] + fn verify_outcome_serde_round_trip_passed() { + let v = VerifyOutcome::Passed { command: "cargo test".into(), duration_secs: 22 }; + let s = serde_json::to_string(&v).unwrap(); + let back: VerifyOutcome = serde_json::from_str(&s).unwrap(); + assert_eq!(v, back); + } + + #[test] + fn verify_outcome_serde_round_trip_failed_has_tail() { + let v = VerifyOutcome::Failed { + command: "cargo test".into(), + exit_code: 101, + duration_secs: 5, + stderr_tail: "thread 'main' panicked".into(), + }; + let s = serde_json::to_string(&v).unwrap(); + let back: VerifyOutcome = serde_json::from_str(&s).unwrap(); + assert_eq!(v, back); + } + + #[test] + fn verify_outcome_is_failure_classifies_correctly() { + assert!(!VerifyOutcome::Passed { command: "x".into(), duration_secs: 1 }.is_failure()); + assert!(!VerifyOutcome::Skipped { reason: "off".into() }.is_failure()); + assert!( + VerifyOutcome::Failed { + command: "x".into(), + exit_code: 1, + duration_secs: 0, + stderr_tail: String::new(), + } + .is_failure() + ); + assert!(VerifyOutcome::TimedOut { command: "x".into(), budget_secs: 1 }.is_failure()); + } + + #[test] + fn summary_line_renders_each_variant() { + let p = VerifyOutcome::Passed { command: "cargo test".into(), duration_secs: 22 }; + assert!(p.summary_line().contains("passed")); + assert!(p.summary_line().contains("cargo test")); + let f = VerifyOutcome::Failed { + command: "cargo test".into(), + exit_code: 1, + duration_secs: 5, + stderr_tail: String::new(), + }; + assert!(f.summary_line().contains("failed")); + let s = VerifyOutcome::Skipped { reason: "no command".into() }; + assert!(s.summary_line().contains("skipped")); + let t = VerifyOutcome::TimedOut { command: "x".into(), budget_secs: 600 }; + assert!(t.summary_line().contains("timed out")); + } + + #[test] + fn tail_bytes_under_cap_returns_all() { + assert_eq!(tail_bytes(b"hello", 100), "hello"); + } + + #[test] + fn tail_bytes_over_cap_returns_suffix() { + let b = b"abcdefghij"; + assert_eq!(tail_bytes(b, 4), "ghij"); + } +} diff --git a/hew/src/commands/loop_cmd.rs b/hew/src/commands/loop_cmd.rs index 84fabe2..6d8631a 100644 --- a/hew/src/commands/loop_cmd.rs +++ b/hew/src/commands/loop_cmd.rs @@ -203,6 +203,7 @@ pub struct LoopCmd { } #[derive(Debug, Subcommand)] +#[allow(clippy::large_enum_variant)] pub enum LoopSub { /// Drive the autonomous outer loop until a stop signal fires. Run(Args), @@ -406,6 +407,27 @@ pub struct Args { value_parser = clap::builder::PossibleValuesParser::new(RuntimeKind::VARIANTS), )] pub planner_runtime: Option, + + /// Run a mandatory end-of-run test command after the last iter + /// (and after merge-back on `--jobs >= 2`) to prove the final + /// stacked state is green. Overrides + /// `loop.end_of_run.verify_tests` config. Default `false`. + #[arg(long, default_value_t = false, action = clap::ArgAction::SetTrue)] + pub verify_tests: bool, + + /// Explicit-off for the end-of-run verify step, takes precedence + /// over `--verify-tests` and `loop.end_of_run.verify_tests` + /// config. Useful when a config opts in globally but a particular + /// run shouldn't pay the verify cost (e.g. dry-run experiments). + #[arg(long, default_value_t = false, action = clap::ArgAction::SetTrue)] + pub no_verify_tests: bool, + + /// Override the resolved verify command for this run. Empty = + /// fall back to `loop.end_of_run.verify_command` config, then to + /// project-authored signals (justfile/Makefile/package.json + /// `test`) via `hew_core::gate::detect`. + #[arg(long)] + pub verify_command: Option, } /// Resolve the effective [`LoopPlannerConfig`] for this run. Precedence: @@ -834,6 +856,79 @@ pub fn run_loop_with_scope( ) } +/// End-of-run verify step (`hew-bon7`). Opt-in via `--verify-tests` +/// or `loop.end_of_run.verify_tests = true`. Resolves the command +/// from CLI > config > [`hew_core::gate::detect`], spawns it under +/// the configured wall budget, records the outcome onto `run.verify_outcome`, +/// re-writes `run.json` so the persisted summary matches, and writes +/// a `STATUS:loop-verify-failed:` memory on failure so the +/// next session sees the regression. No-op when verify is disabled +/// (no record written, summary line absent). +fn maybe_run_verify_step( + ctx: &Ctx, + args: &Args, + bd: &dyn BdClient, + run: &mut Run, + working_dir: &Path, + run_dir: &Path, + worker_n: Option, +) { + // CLI > config > defaults. `--no-verify-tests` always wins so a + // global config opt-in can be vetoed per-run. + let cfg = match hew_core::config::load() { + Ok(c) => c.loop_cfg.end_of_run, + Err(_) => hew_core::config::LoopEndOfRunConfig::default(), + }; + if args.no_verify_tests { + return; + } + let enabled = args.verify_tests || cfg.verify_tests; + if !enabled { + return; + } + + let gate = hew_core::gate::detect(working_dir); + let command = hew_core::verify::resolve_command( + args.verify_command.as_deref(), + Some(&cfg.verify_command), + &gate, + ); + let outcome = match command { + None => hew_core::verify::VerifyOutcome::Skipped { reason: "no command resolved".into() }, + Some(cmd) => { + let budget = hew_core::config::parse_budget_wall(&cfg.verify_budget_wall) + .unwrap_or_else(|_| Duration::from_secs(600)); + let log_path = run_dir.join("verify.log"); + if !ctx.quiet { + eprintln!("hew loop verify: {} (budget {}s)", cmd.join(" "), budget.as_secs()); + } + hew_core::verify::run_verify(&cmd, working_dir, &log_path, budget) + } + }; + + // Persist on the in-memory Run before re-writing run.json so the + // summary line + manifest see a consistent state. + run.verify_outcome = Some(outcome.clone()); + let _ = write_json_atomic(&run_log_path(run_dir, worker_n), &RunLog::from_run(run)); + + // STATUS memory on failure — survives across sessions so the next + // resume sees the regression. We deliberately do NOT file a bd + // task because closed work is not rolled back; the memory is the + // breadcrumb, the user decides on follow-up. + if outcome.is_failure() { + let summary = outcome.summary_line(); + let body = format!( + "STATUS:loop-verify-failed:{} — {} (run-dir={})", + run.id, + summary, + run_dir.display(), + ); + if let Err(e) = bd.remember(&body) { + tracing::warn!("failed to file STATUS:loop-verify-failed memory: {e}"); + } + } +} + /// Today's single-worker loop, factored out so [`run_loop_with`] can /// branch on `--jobs N` without touching the existing code path. The /// body below is the original `run_loop_with` verbatim — the rename is @@ -892,7 +987,7 @@ fn run_loop_serial( }; let started_at = iso_now_utc(); - let outcome = run_worker_loop_with_scope( + let mut outcome = run_worker_loop_with_scope( ctx, &args, bd, @@ -911,6 +1006,13 @@ fn run_loop_serial( scope.clone(), )?; + // End-of-run verify (hew-bon7): opt-in. Runs in the project root + // for the serial path; on `--jobs N>=2` the parallel path runs it + // after merge-back below. Records the outcome onto `Run.verify_outcome` + // so the summary line + STATUS memory + exit code branch on a + // single value. + maybe_run_verify_step(ctx, &args, bd, &mut outcome.run, project_root, &dir, worker.worker_n); + // Dispatcher-shutdown manifest: lists every worker that // participated in the run + their final outcome. v1 has a single // worker; the future parallel dispatcher folds N outcomes into the @@ -928,6 +1030,14 @@ fn run_loop_serial( let scope = Some(outcome.run.config.scope.clone()); print_summary(ctx, &outcome.run, &outcome.iter_logs, &dir, scope); + + // Verify failure ⇒ non-zero exit (acceptance: "CI / wrapper scripts + // can branch on this"). Closed tasks are NOT rolled back; the + // STATUS:loop-verify-failed memory + summary line + non-zero exit + // are the durable signals. + if outcome.run.verify_outcome.as_ref().is_some_and(|v| v.is_failure()) { + return Err(miette::miette!("verify-tests failed")); + } Ok(()) } @@ -1147,6 +1257,17 @@ fn run_loop_parallel( } } + // End-of-run verify (hew-bon7). On the parallel path the verify + // command runs in `project_root` (post-merge-back HEAD) and the + // outcome is recorded on the first worker's Run so the existing + // `print_summary(first, ...)` contract picks it up. Per-worker + // outcomes are not duplicated — the verify proves the stacked + // post-merge state, not any one worker's branch. + if let Some(first) = worker_outcomes.first_mut() { + let worker_n = workers.first().and_then(|w| w.worker_n); + maybe_run_verify_step(ctx, &args, bd, &mut first.run, project_root, &dir, worker_n); + } + // Per-worker manifest rows; jobs reflects the dispatcher's slot // count (matches the user's --jobs N, post-clamp). let manifest_rows: Vec = workers @@ -1166,10 +1287,17 @@ fn run_loop_parallel( // v1: print the first worker's summary as a stand-in for the full // per-worker breakdown (that's hew-h0tu). Honors the existing // "print summary at end" contract so nothing downstream regresses. + let verify_failed = worker_outcomes + .first() + .and_then(|f| f.run.verify_outcome.as_ref()) + .is_some_and(|v| v.is_failure()); if let Some(first) = worker_outcomes.first() { let scope = Some(first.run.config.scope.clone()); print_summary(ctx, &first.run, &first.iter_logs, &dir, scope); } + if verify_failed { + return Err(miette::miette!("verify-tests failed")); + } Ok(()) } @@ -1844,6 +1972,7 @@ pub fn run_summary(ctx: &Ctx, args: SummaryArgs) -> miette::Result<()> { }) .collect(), stop_reason: rl.stop_reason.as_deref().and_then(hew_core::runner::StopReason::from_label), + verify_outcome: rl.verify_outcome.clone(), }; print_summary(ctx, &run, &iter_logs, &dir, rl.scope.clone()); @@ -1915,6 +2044,7 @@ fn run_summary_parallel(ctx: &Ctx, dir: &Path, manifest_path: &Path) -> miette:: config: RunConfig::default(), iters, stop_reason, + verify_outcome: None, }; // Scope is dispatcher-level and identical across workers; read it @@ -2305,6 +2435,9 @@ mod tests { no_planner: false, planner_budget: None, planner_runtime: None, + verify_tests: false, + no_verify_tests: false, + verify_command: None, } } diff --git a/hew/tests/loop_backpressure.rs b/hew/tests/loop_backpressure.rs index e0d2e3c..215c4dd 100644 --- a/hew/tests/loop_backpressure.rs +++ b/hew/tests/loop_backpressure.rs @@ -136,6 +136,9 @@ fn args_one_iter() -> Args { no_planner: false, planner_budget: None, planner_runtime: None, + verify_tests: false, + no_verify_tests: false, + verify_command: None, } } @@ -828,6 +831,9 @@ fn cooldown_routes_to_fallback_for_n_iters_then_retries_primary() { no_planner: false, planner_budget: None, planner_runtime: None, + verify_tests: false, + no_verify_tests: false, + verify_command: None, }; let fallback_cfg = FallbackConfig { runtime: Some(hew_core::runtime::RuntimeKind::Codex), cooldown_iters: 3 }; diff --git a/hew/tests/loop_dynamic_model.rs b/hew/tests/loop_dynamic_model.rs index e22881d..e295d85 100644 --- a/hew/tests/loop_dynamic_model.rs +++ b/hew/tests/loop_dynamic_model.rs @@ -70,6 +70,9 @@ fn args_one_dry_iter() -> Args { no_planner: false, planner_budget: None, planner_runtime: None, + verify_tests: false, + no_verify_tests: false, + verify_command: None, } } diff --git a/hew/tests/loop_parallel_e2e.rs b/hew/tests/loop_parallel_e2e.rs index 508bb5f..05b0f04 100644 --- a/hew/tests/loop_parallel_e2e.rs +++ b/hew/tests/loop_parallel_e2e.rs @@ -152,6 +152,9 @@ fn args_parallel(jobs: u32) -> Args { no_planner: false, planner_budget: None, planner_runtime: None, + verify_tests: false, + no_verify_tests: false, + verify_command: None, } } From e9c23c0d3b2cf96ecca70ac73246beb3b5a68022 Mon Sep 17 00:00:00 2001 From: droidnoob Date: Sat, 30 May 2026 12:18:51 +0530 Subject: [PATCH 8/8] feat(loop): hew loop graph DAG renderer (hew-m7lq) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - hew_core::loop_graph IR + mermaid/dot/ascii renderers (pure, no I/O) - builders read iter*.json, batch*.json, run.json, manifest.json - unhappy paths render distinctly: incomplete (dashed), cancelled (⊘), runtime-error+empty-stderr ("possibly hung"), backpressure rollback (↺ self-edge), verify outcomes (passed/failed/skipped) - parallel runs lay out per-worker swimlanes from manifest.json - pre-batch-plan legacy runs render with sequential edges only - CLI: hew loop graph [--run-id ID] [--format ...] [--out PATH] [--all] - 13 unit tests covering each acceptance criterion + 5 e2e CLI tests - docs/LOOP.md § Loop graph section + CHANGELOG entry Closes epic hew-lf40 (8/8 children). --- CHANGELOG.md | 19 + docs/LOOP.md | 95 +++ hew-core/src/lib.rs | 1 + hew-core/src/loop_graph.rs | 1259 ++++++++++++++++++++++++++++++++++ hew/src/commands/loop_cmd.rs | 85 +++ hew/tests/loop_graph_e2e.rs | 177 +++++ 6 files changed, 1636 insertions(+) create mode 100644 hew-core/src/loop_graph.rs create mode 100644 hew/tests/loop_graph_e2e.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index b91ac23..8f18447 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,25 @@ versioning follows [Semantic Versioning](https://semver.org/). ### Added +- **`hew loop graph` DAG renderer (`hew-m7lq`).** Renders the loop's + iter + batch + run + manifest history as a directed graph in + mermaid (default), GraphViz `dot`, or terminal ASCII. Each iter is + a node labelled with task id, outcome glyph, duration, and tokens; + edges distinguish sequential next-iter, agent-suggested, + planner-suggested, fallback (`bd ready`), and backpressure + rollbacks. Unhappy paths render distinctly: incomplete iters get a + dashed border (`⋯`), cancelled-mid-run iters get `⊘` with the stop + timestamp, runtime errors with empty stderr annotate as `possibly + hung`, backpressure failures draw a `↺ rolled back` self-edge, and + verify outcomes get a coloured tail node (`Verify ✓` / `Verify ✗` / + `Verify (skipped)`). Parallel runs (`--jobs >= 2`) render + per-worker swimlanes from `manifest.json`. CLI: + `hew loop graph [--run-id ID] [--format mermaid|dot|ascii] + [--out PATH] [--all]`; `--out` ending in `.md` wraps the mermaid + body in a fenced \`\`\`mermaid block. `--all` aggregates every run + under `.hew/loop/` into one document with each as its own subgraph. + Pre-batch-plan legacy runs render with sequential edges only. See + `docs/LOOP.md` § Loop graph. Closes epic `hew-lf40`. - **End-of-run verify step for `hew loop` (`hew-bon7`).** Opt-in mandatory test step that runs after the last iter (and after merge-back on `--jobs N >= 2`) to prove the final stacked state is diff --git a/docs/LOOP.md b/docs/LOOP.md index 043b608..a2c3006 100644 --- a/docs/LOOP.md +++ b/docs/LOOP.md @@ -618,6 +618,101 @@ durable signals are: --- +## Loop graph + +After a run finishes (or even while it's still in flight), the +collection of iter/batch/run/manifest JSON under `.hew/loop//` +*is* a directed acyclic graph: iters connected by sequential +succession, batch suggestions, and parallel-worker swimlanes. +`hew loop graph` renders that DAG so the run's behavior — including +the unhappy paths the planner doesn't fix — is auditable at a glance. + +```sh +hew loop graph # latest run, mermaid to stdout +hew loop graph --run-id loop-2026... # specific run +hew loop graph --format=dot --out=run.dot # GraphViz +hew loop graph --out=run.md # mermaid wrapped in ```mermaid fence +hew loop graph --format=ascii # terminal-only, no unicode glyphs +hew loop graph --all # timeline across every run in .hew/loop/ +``` + +### Outcome glyphs + +| Outcome | Glyph | Mermaid class | dot color | meaning | +|---------------------|-------|--------------------|-----------|----------------------------------------| +| `closed` | ✓ | `iter-closed` | green | task closed cleanly | +| `no_close` | ◐ | `iter-no-close` | orange | spawner exited; no task closed | +| `runtime_error` | ✗ | `iter-runtime-err` | red | spawner returned a hard error | +| `backpressure_fail` | ↺ | `iter-backpressure`| red | tests/lint failed; commits reverted | +| **cancelled** | ⊘ | `iter-cancelled` | gray | `.stop` fired while this iter was live | +| **incomplete** | ⋯ | `iter-incomplete` | gray/dashed | started, never ended (crash mid-iter)| + +### Edge kinds + +| Edge | Mermaid syntax | Source | +|----------------------------|-----------------------------|--------------------------------------------------------------| +| Sequential next-iter | `iter1 --> iter2` | default dispatcher order | +| Agent-suggested | `iter1 -. agent .-> iter2` | previous iter's `next_iteration:` emit | +| Planner-suggested | `iter1 -. planner .-> iter2`| inter-iter planner subprocess | +| Fallback (trust-the-graph) | `iter1 == fallback ==> iter2` | no batch — dispatcher used `bd ready` | +| Rollback (backpressure) | `iter2 -.rolled back.-> iter1`| `↺` self-edge back to the iter before the failure | +| Verify | `iter_last --> verify` | end-of-run verify-tests node | + +### Unhappy paths + +The renderer makes the cases the planner can't fix legible: + +1. **Incomplete iter** (started, no `ended_at`) — `⋯` node with a + dashed border. The dispatcher crashed or was killed mid-iter. +2. **Cancelled mid-run** — when `run.stop_reason = cancelled` the + in-flight iter gets `⊘` instead of `⋯` and an annotation + `cancelled @ `. +3. **Runtime error with empty stderr** — annotated + `(no stderr — possibly hung)` so the operator can spot the + pattern of a runaway runtime. +4. **Backpressure with rollback** — `↺` self-edge from the failing + iter back to its predecessor with `rolled back` annotation. +5. **Verify failed** — the verify node renders red; the first three + matching lines from `verify.log` annotate it as failed-test + breadcrumbs. + +### Pre-batch-plan runs + +Runs from before the planner epic shipped have no `batch-*.json` +files. The renderer falls back to plain Sequential edges in that +case — no agent/planner/fallback styling shown. + +### Worked example + +A parallel `--jobs=2` run with one agent suggestion, one planner +pick, and a verify pass renders to: + +```mermaid +flowchart TD + subgraph worker-0 + w0_iter1["iter-1
hew-a
✓ 12s 1200t"] + w0_iter2["iter-2
hew-b
✓ 9s 980t"] + end + subgraph worker-1 + w1_iter1["iter-1
hew-c
✓ 14s 1100t"] + end + verify["Verify ✓"] + w0_iter1 -. agent .-> w0_iter2 + w0_iter2 --> verify + class w0_iter1 iter-closed; + class w0_iter2 iter-closed; + class w1_iter1 iter-closed; + class verify verify-passed; +``` + +### Out of scope (v1) + +- Live-updating diagrams (websocket / fswatch). Static snapshot only. +- Click-through navigation from a node to its iter log. +- Auto-export to GitHub Action artifacts. +- Task descriptions in node labels — only `task_id + iter_number` + ship today to avoid leaking private text into shareable diagrams. + ## Stop signals - `hew loop cancel` — touches `.hew/loop//.stop`. diff --git a/hew-core/src/lib.rs b/hew-core/src/lib.rs index 48789c9..6a99a4d 100644 --- a/hew-core/src/lib.rs +++ b/hew-core/src/lib.rs @@ -26,6 +26,7 @@ pub mod gate; pub mod git; pub mod guard; pub mod install; +pub mod loop_graph; pub mod loop_log; pub mod loop_model; pub mod loop_summary; diff --git a/hew-core/src/loop_graph.rs b/hew-core/src/loop_graph.rs new file mode 100644 index 0000000..81ba286 --- /dev/null +++ b/hew-core/src/loop_graph.rs @@ -0,0 +1,1259 @@ +//! `hew loop graph` — render a single (or multi-) run's history as a DAG. +//! +//! Two layers in one module: +//! * [`LoopGraph`] IR + pure renderers ([`render_mermaid`], +//! [`render_dot`], [`render_ascii`]) — no I/O. +//! * [`build_from_run_dir`] / [`build_from_loop_root`] — read iter + +//! batch + run + manifest JSON and lift them into the IR. +//! +//! The split keeps snapshot tests trivial: assemble an IR by hand, call +//! the renderer, compare against a fixed expected string. +//! +//! Unhappy paths the renderer must distinguish (per the task body): +//! * incomplete iter — `started_at` but no `ended_at` (⋯ dashed) +//! * cancelled mid-run — run stopped via `.stop`; the in-flight iter +//! gets ⊘ +//! * runtime error with empty stderr — possibly hung; annotate +//! * backpressure with rollback — ↺ self-edge with `rolled back` note +//! * verify failed — verify node renders red + failed test names +//! * pre-batchplan legacy runs — no `batch-*.json` files; sequential +//! edges only + +use std::collections::BTreeMap; +use std::fs; +use std::path::Path; + +use crate::batch_plan::BatchSource; +use crate::error::Result; +use crate::loop_log::{IterLog, Manifest, RunLog}; +use crate::verify::VerifyOutcome; + +/// Wire-format-agnostic outcome glyph for a node. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum OutcomeGlyph { + Closed, + NoClose, + RuntimeError, + BackpressureFail, + Cancelled, + Incomplete, +} + +impl OutcomeGlyph { + pub fn glyph(self) -> &'static str { + match self { + Self::Closed => "✓", + Self::NoClose => "◐", + Self::RuntimeError => "✗", + Self::BackpressureFail => "↺", + Self::Cancelled => "⊘", + Self::Incomplete => "⋯", + } + } + + pub fn ascii_glyph(self) -> &'static str { + match self { + Self::Closed => "OK", + Self::NoClose => "NC", + Self::RuntimeError => "ER", + Self::BackpressureFail => "BP", + Self::Cancelled => "CX", + Self::Incomplete => "..", + } + } + + /// Mermaid classDef name. Stable; LOOP.md docs depend on the spelling. + pub fn mermaid_class(self) -> &'static str { + match self { + Self::Closed => "iter-closed", + Self::NoClose => "iter-no-close", + Self::RuntimeError => "iter-runtime-err", + Self::BackpressureFail => "iter-backpressure", + Self::Cancelled => "iter-cancelled", + Self::Incomplete => "iter-incomplete", + } + } + + pub fn dot_color(self) -> &'static str { + match self { + Self::Closed => "green", + Self::NoClose => "orange", + Self::RuntimeError | Self::BackpressureFail => "red", + Self::Cancelled | Self::Incomplete => "gray", + } + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum EdgeKind { + /// Default sequential next-iter edge. + Sequential, + /// Previous iter's agent emitted `next_iteration:` containing this iter's task. + BatchAgent, + /// Planner sub-process picked this iter's task. + BatchPlanner, + /// No batch — dispatcher used `bd ready`. + Fallback, + /// Backpressure rollback target. Carries the short sha in `annotation`. + Rollback, + /// Final verify-tests edge into the verify node. + Verify, +} + +#[derive(Clone, Debug)] +pub struct Node { + pub id: String, + pub iter_number: u32, + pub worker_n: Option, + pub task_id: Option, + pub outcome: OutcomeGlyph, + pub tokens: u64, + pub duration_secs: Option, + /// True when the iter is a runtime error with empty stderr — strong + /// hint that the subprocess hung. Surface as label annotation. + pub stderr_hung: bool, +} + +#[derive(Clone, Debug)] +pub struct VerifyNode { + pub outcome: VerifyOutcome, + /// Top 3 failed test names, when known. Annotates the verify node + /// in the renderer. + pub failure_lines: Vec, +} + +#[derive(Clone, Debug)] +pub struct Edge { + pub from: String, + pub to: String, + pub kind: EdgeKind, + pub annotation: Option, +} + +/// In-memory representation of one run's DAG. +#[derive(Clone, Debug, Default)] +pub struct LoopGraph { + pub run_id: String, + pub nodes: Vec, + pub edges: Vec, + pub verify: Option, + /// ISO timestamp of the cancel signal, when the run terminated via + /// `.stop`. Annotates the cancelled node. + pub cancelled_at: Option, + /// Worker IDs present in the run, sorted. Empty for the `--jobs=1` + /// fast path; >=2 entries for parallel runs (subgraphs). + pub workers: Vec, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Format { + Mermaid, + Dot, + Ascii, +} + +// --------------------------------------------------------------------------- +// Builders +// --------------------------------------------------------------------------- + +/// Read a single run-dir and build its [`LoopGraph`]. +pub fn build_from_run_dir(run_dir: &Path) -> Result { + let run_log = read_run_log(run_dir)?; + let manifest = read_manifest(run_dir); + + let mut g = LoopGraph { + run_id: run_log.as_ref().map(|r| r.id.clone()).unwrap_or_else(|| { + run_dir.file_name().and_then(|s| s.to_str()).unwrap_or("loop-unknown").to_string() + }), + ..LoopGraph::default() + }; + + let cancelled = run_log.as_ref().and_then(|r| r.stop_reason.as_deref()) == Some("cancelled"); + + if let Some(m) = manifest.as_ref() { + g.workers = m.workers.iter().map(|w| w.id).collect(); + g.workers.sort_unstable(); + for w in &m.workers { + let dir = match w.log_subdir.as_deref() { + Some(sub) => run_dir.join(sub), + None => run_dir.to_path_buf(), + }; + ingest_worker(&mut g, &dir, Some(w.id), cancelled)?; + } + } else { + ingest_worker(&mut g, run_dir, None, cancelled)?; + } + + // Mark cancellation annotation timestamp (best-effort: use the + // last_updated_at from run.json since the .stop file mtime is the + // ground truth but reading it adds I/O for a cosmetic annotation). + if cancelled && let Some(rl) = run_log.as_ref() { + g.cancelled_at = Some(rl.last_updated_at.clone()); + } + + // Verify node sits at the tail. + if let Some(rl) = run_log.as_ref() + && let Some(out) = rl.verify_outcome.clone() + { + let failure_lines = read_verify_failure_lines(run_dir); + g.verify = Some(VerifyNode { outcome: out, failure_lines }); + // Wire the last seen iter into the verify node. + if let Some(last) = g.nodes.last() { + g.edges.push(Edge { + from: last.id.clone(), + to: "verify".into(), + kind: EdgeKind::Verify, + annotation: None, + }); + } + } + + Ok(g) +} + +fn ingest_worker( + g: &mut LoopGraph, + dir: &Path, + worker_n: Option, + cancelled: bool, +) -> Result<()> { + let iter_logs = collect_iter_logs(dir)?; + if iter_logs.is_empty() { + return Ok(()); + } + let last_idx = iter_logs.len() - 1; + + let prefix = match worker_n { + Some(n) => format!("w{n}_"), + None => String::new(), + }; + + let node_ids: Vec = + iter_logs.iter().map(|l| format!("{prefix}iter{}", l.number)).collect(); + + for (idx, log) in iter_logs.iter().enumerate() { + let is_last = idx == last_idx; + let outcome = classify_outcome(log, cancelled && is_last); + let stderr_hung = matches!(log.outcome.as_deref(), Some("runtime_error")) + && log.stderr_tail.as_deref().is_none_or(str::is_empty); + + g.nodes.push(Node { + id: node_ids[idx].clone(), + iter_number: log.number, + worker_n, + task_id: log.task_id.clone(), + outcome, + tokens: log.cost.total(), + duration_secs: duration_secs(log), + stderr_hung, + }); + + if idx > 0 { + let from = node_ids[idx - 1].clone(); + let to = node_ids[idx].clone(); + // Determine edge kind from the batch plan for this iter (if any). + let edge_kind = batch_edge_kind(dir, log.number); + g.edges.push(Edge { from, to, kind: edge_kind, annotation: None }); + } + + if matches!(outcome, OutcomeGlyph::BackpressureFail) && idx > 0 { + g.edges.push(Edge { + from: node_ids[idx].clone(), + to: node_ids[idx - 1].clone(), + kind: EdgeKind::Rollback, + annotation: Some("rolled back".into()), + }); + } + } + Ok(()) +} + +fn classify_outcome(log: &IterLog, cancelled_in_flight: bool) -> OutcomeGlyph { + if log.ended_at.is_none() { + return if cancelled_in_flight { + OutcomeGlyph::Cancelled + } else { + OutcomeGlyph::Incomplete + }; + } + match log.outcome.as_deref() { + Some("closed") => OutcomeGlyph::Closed, + Some("no_close") => OutcomeGlyph::NoClose, + Some("runtime_error") => OutcomeGlyph::RuntimeError, + Some("backpressure_fail") => OutcomeGlyph::BackpressureFail, + _ if cancelled_in_flight => OutcomeGlyph::Cancelled, + _ => OutcomeGlyph::NoClose, + } +} + +fn duration_secs(log: &IterLog) -> Option { + let start = parse_iso(&log.started_at)?; + let end = parse_iso(log.ended_at.as_deref()?)?; + Some(end.saturating_sub(start)) +} + +/// Truncated ISO 8601 parser sufficient for `YYYY-MM-DDTHH:MM:SSZ`. +/// Returns seconds since unix epoch. +fn parse_iso(s: &str) -> Option { + // YYYY-MM-DDTHH:MM:SSZ — 20 chars + if s.len() < 19 { + return None; + } + let y: i64 = s.get(0..4)?.parse().ok()?; + let mo: u32 = s.get(5..7)?.parse().ok()?; + let d: u32 = s.get(8..10)?.parse().ok()?; + let h: u32 = s.get(11..13)?.parse().ok()?; + let mi: u32 = s.get(14..16)?.parse().ok()?; + let se: u32 = s.get(17..19)?.parse().ok()?; + // Days-from-civil: Howard Hinnant. Good for the 1970..3000 range we care about. + let y = if mo <= 2 { y - 1 } else { y }; + let era = if y >= 0 { y / 400 } else { (y - 399) / 400 }; + let yoe = (y - era * 400) as u64; + let doy: u64 = (153 * (if mo > 2 { mo - 3 } else { mo + 9 }) as u64 + 2) / 5 + d as u64 - 1; + let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy; + let days = era * 146_097 + doe as i64 - 719_468; + if days < 0 { + return None; + } + Some((days as u64) * 86_400 + (h as u64) * 3_600 + (mi as u64) * 60 + se as u64) +} + +fn batch_edge_kind(dir: &Path, iter_number: u32) -> EdgeKind { + match crate::batch_plan::read(dir, iter_number) { + Ok(Some(plan)) => match plan.source { + BatchSource::Agent => EdgeKind::BatchAgent, + BatchSource::Planner => EdgeKind::BatchPlanner, + BatchSource::Skipped => EdgeKind::Fallback, + }, + _ => EdgeKind::Sequential, + } +} + +fn collect_iter_logs(dir: &Path) -> Result> { + let mut out: Vec = Vec::new(); + let entries = match fs::read_dir(dir) { + Ok(it) => it, + Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(out), + Err(e) => return Err(e.into()), + }; + for entry in entries.flatten() { + let path = entry.path(); + let Some(name) = path.file_name().and_then(|s| s.to_str()) else { continue }; + if !name.starts_with("iter-") || !name.ends_with(".json") { + continue; + } + if let Ok(body) = fs::read_to_string(&path) + && let Ok(log) = serde_json::from_str::(&body) + { + out.push(log); + } + } + out.sort_by_key(|l| l.number); + Ok(out) +} + +fn read_run_log(dir: &Path) -> Result> { + let path = dir.join("run.json"); + let body = match fs::read_to_string(&path) { + Ok(s) => s, + Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None), + Err(e) => return Err(e.into()), + }; + Ok(serde_json::from_str(&body).ok()) +} + +fn read_manifest(dir: &Path) -> Option { + let path = dir.join("manifest.json"); + let body = fs::read_to_string(&path).ok()?; + serde_json::from_str(&body).ok() +} + +fn read_verify_failure_lines(dir: &Path) -> Vec { + let path = dir.join("verify.log"); + let body = match fs::read_to_string(&path) { + Ok(s) => s, + Err(_) => return Vec::new(), + }; + body.lines() + .filter(|l| l.contains("FAILED") || l.contains("failed") || l.contains("test ")) + .map(str::to_string) + .take(3) + .collect() +} + +/// Build one graph per run under `loop_root`, sorted by run-id. +pub fn build_from_loop_root(loop_root: &Path) -> Result> { + let mut out = Vec::new(); + let entries = match fs::read_dir(loop_root) { + Ok(it) => it, + Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(out), + Err(e) => return Err(e.into()), + }; + let mut dirs: Vec<_> = entries + .flatten() + .map(|e| e.path()) + .filter(|p| { + p.is_dir() + && p.file_name().and_then(|n| n.to_str()).is_some_and(|n| n.starts_with("loop-")) + }) + .collect(); + dirs.sort(); + for d in dirs { + out.push(build_from_run_dir(&d)?); + } + Ok(out) +} + +// --------------------------------------------------------------------------- +// Rendering +// --------------------------------------------------------------------------- + +/// Entry point: render one graph in the given format. +pub fn render(g: &LoopGraph, format: Format) -> String { + match format { + Format::Mermaid => render_mermaid(g), + Format::Dot => render_dot(g), + Format::Ascii => render_ascii(g), + } +} + +/// Render N graphs as a single document. `--all` mode wraps each run as +/// its own subgraph (mermaid/dot) or a stacked section (ascii). +pub fn render_all(graphs: &[LoopGraph], format: Format) -> String { + match format { + Format::Mermaid => render_all_mermaid(graphs), + Format::Dot => render_all_dot(graphs), + Format::Ascii => render_all_ascii(graphs), + } +} + +fn render_mermaid(g: &LoopGraph) -> String { + let mut out = String::new(); + out.push_str("flowchart TD\n"); + render_mermaid_body(&mut out, g, ""); + out +} + +fn render_all_mermaid(graphs: &[LoopGraph]) -> String { + let mut out = String::new(); + out.push_str("flowchart TD\n"); + for g in graphs { + let safe = sanitize_id(&g.run_id); + out.push_str(&format!(" subgraph {safe}[\"{}\"]\n", g.run_id)); + render_mermaid_body(&mut out, g, " "); + out.push_str(" end\n"); + } + out +} + +fn render_mermaid_body(out: &mut String, g: &LoopGraph, indent: &str) { + // Group nodes by worker for swimlanes when parallel. + let nodes_by_worker = group_nodes_by_worker(g); + let parallel = g.workers.len() >= 2; + + for (worker, nodes) in &nodes_by_worker { + if parallel && let Some(w) = worker { + out.push_str(&format!("{indent} subgraph worker-{w}\n")); + } + for n in nodes { + let label = mermaid_label(g, n); + let shape = if matches!(n.outcome, OutcomeGlyph::Incomplete) { + format!("{}[/\"{}\"\\]", n.id, label) + } else { + format!("{}[\"{}\"]", n.id, label) + }; + let pad = if parallel { " " } else { " " }; + out.push_str(&format!("{indent}{pad}{shape}\n")); + } + if parallel && worker.is_some() { + out.push_str(&format!("{indent} end\n")); + } + } + + // Verify node. + if let Some(v) = &g.verify { + let label = verify_label(v); + out.push_str(&format!("{indent} verify[\"{label}\"]\n")); + } + + // Edges. + for e in &g.edges { + let arrow = match e.kind { + EdgeKind::Sequential => format!("{} --> {}", e.from, e.to), + EdgeKind::BatchAgent => format!("{} -. agent .-> {}", e.from, e.to), + EdgeKind::BatchPlanner => format!("{} -. planner .-> {}", e.from, e.to), + EdgeKind::Fallback => format!("{} == fallback ==> {}", e.from, e.to), + EdgeKind::Rollback => { + let ann = e.annotation.as_deref().unwrap_or("rollback"); + format!("{} -.{ann}.-> {}", e.from, e.to) + } + EdgeKind::Verify => format!("{} --> {}", e.from, e.to), + }; + out.push_str(&format!("{indent} {arrow}\n")); + } + + // Class assignments. + for n in &g.nodes { + out.push_str(&format!("{indent} class {} {};\n", n.id, n.outcome.mermaid_class())); + } + if let Some(v) = &g.verify { + let cls = match &v.outcome { + VerifyOutcome::Passed { .. } => "verify-passed", + VerifyOutcome::Failed { .. } | VerifyOutcome::TimedOut { .. } => "verify-failed", + VerifyOutcome::Skipped { .. } => "verify-skipped", + }; + out.push_str(&format!("{indent} class verify {cls};\n")); + } +} + +fn mermaid_label(g: &LoopGraph, n: &Node) -> String { + let glyph = n.outcome.glyph(); + let dur = n.duration_secs.map(|s| format!("{s}s")).unwrap_or_else(|| "-".into()); + let task = n.task_id.as_deref().unwrap_or("-"); + let mut s = format!("iter-{}
{}
{} {} {}t", n.iter_number, task, glyph, dur, n.tokens); + if n.stderr_hung { + s.push_str("
(no stderr — possibly hung)"); + } + if matches!(n.outcome, OutcomeGlyph::Cancelled) + && let Some(ts) = g.cancelled_at.as_deref() + { + s.push_str(&format!("
cancelled @ {ts}")); + } + s +} + +fn verify_label(v: &VerifyNode) -> String { + let head = match &v.outcome { + VerifyOutcome::Passed { .. } => "Verify ✓", + VerifyOutcome::Failed { .. } => "Verify ✗", + VerifyOutcome::Skipped { .. } => "Verify (skipped)", + VerifyOutcome::TimedOut { .. } => "Verify ⏱", + }; + let mut s = head.to_string(); + if !v.failure_lines.is_empty() { + s.push_str("
"); + for (i, l) in v.failure_lines.iter().enumerate() { + if i > 0 { + s.push_str("
"); + } + // Escape any embedded quotes for the mermaid string. + s.push_str(&l.replace('"', "'")); + } + } + s +} + +fn render_dot(g: &LoopGraph) -> String { + let mut out = String::new(); + out.push_str("digraph loop {\n"); + out.push_str(" rankdir=TB;\n"); + out.push_str(" node [shape=box, fontname=\"Helvetica\"];\n"); + render_dot_body(&mut out, g, ""); + out.push_str("}\n"); + out +} + +fn render_all_dot(graphs: &[LoopGraph]) -> String { + let mut out = String::new(); + out.push_str("digraph loop_all {\n"); + out.push_str(" rankdir=TB;\n"); + out.push_str(" node [shape=box, fontname=\"Helvetica\"];\n"); + for g in graphs { + let safe = sanitize_id(&g.run_id); + out.push_str(&format!(" subgraph cluster_{safe} {{\n")); + out.push_str(&format!(" label=\"{}\";\n", g.run_id)); + render_dot_body(&mut out, g, " "); + out.push_str(" }\n"); + } + out.push_str("}\n"); + out +} + +fn render_dot_body(out: &mut String, g: &LoopGraph, indent: &str) { + let nodes_by_worker = group_nodes_by_worker(g); + let parallel = g.workers.len() >= 2; + for (worker, nodes) in &nodes_by_worker { + if parallel && let Some(w) = worker { + out.push_str(&format!("{indent} subgraph cluster_worker_{w} {{\n")); + out.push_str(&format!("{indent} label=\"worker-{w}\";\n")); + } + for n in nodes { + let label = dot_label(g, n); + let style = + if matches!(n.outcome, OutcomeGlyph::Incomplete) { ", style=dashed" } else { "" }; + out.push_str(&format!( + "{indent} {} [label=\"{}\", color={}{}];\n", + n.id, + label, + n.outcome.dot_color(), + style + )); + } + if parallel && worker.is_some() { + out.push_str(&format!("{indent} }}\n")); + } + } + if let Some(v) = &g.verify { + let color = match &v.outcome { + VerifyOutcome::Passed { .. } => "green", + VerifyOutcome::Failed { .. } | VerifyOutcome::TimedOut { .. } => "red", + VerifyOutcome::Skipped { .. } => "gray", + }; + let label = verify_label(v).replace("
", "\\n"); + out.push_str(&format!("{indent} verify [label=\"{label}\", color={color}];\n")); + } + for e in &g.edges { + let style = match e.kind { + EdgeKind::Sequential | EdgeKind::Verify => "", + EdgeKind::BatchAgent => " [style=dotted, label=\"agent\"]", + EdgeKind::BatchPlanner => " [style=dotted, label=\"planner\"]", + EdgeKind::Fallback => " [style=bold, label=\"fallback\"]", + EdgeKind::Rollback => " [style=dashed, label=\"rolled back\"]", + }; + out.push_str(&format!("{indent} {} -> {}{};\n", e.from, e.to, style)); + } +} + +fn dot_label(g: &LoopGraph, n: &Node) -> String { + let glyph = n.outcome.glyph(); + let dur = n.duration_secs.map(|s| format!("{s}s")).unwrap_or_else(|| "-".into()); + let task = n.task_id.as_deref().unwrap_or("-"); + let mut s = format!("iter-{}\\n{}\\n{} {} {}t", n.iter_number, task, glyph, dur, n.tokens); + if n.stderr_hung { + s.push_str("\\n(no stderr — possibly hung)"); + } + if matches!(n.outcome, OutcomeGlyph::Cancelled) + && let Some(ts) = g.cancelled_at.as_deref() + { + s.push_str(&format!("\\ncancelled @ {ts}")); + } + s +} + +fn render_ascii(g: &LoopGraph) -> String { + let mut out = String::new(); + out.push_str(&format!("run: {}\n", g.run_id)); + render_ascii_body(&mut out, g); + out +} + +fn render_all_ascii(graphs: &[LoopGraph]) -> String { + let mut out = String::new(); + for (i, g) in graphs.iter().enumerate() { + if i > 0 { + out.push('\n'); + } + out.push_str(&format!("=== {} ===\n", g.run_id)); + render_ascii_body(&mut out, g); + } + out +} + +fn render_ascii_body(out: &mut String, g: &LoopGraph) { + if g.nodes.is_empty() { + out.push_str(" (no iters)\n"); + } + let nodes_by_worker = group_nodes_by_worker(g); + let parallel = g.workers.len() >= 2; + for (worker, nodes) in &nodes_by_worker { + if parallel && let Some(w) = worker { + out.push_str(&format!("worker-{w}:\n")); + } + for (i, n) in nodes.iter().enumerate() { + let task = n.task_id.as_deref().unwrap_or("-"); + let dur = n.duration_secs.map(|s| format!("{s}s")).unwrap_or_else(|| "-".into()); + let glyph = n.outcome.ascii_glyph(); + let mut line = format!( + " [{:>2}] iter-{:>3} {} task={} {} {}t", + glyph, n.iter_number, glyph, task, dur, n.tokens + ); + if n.stderr_hung { + line.push_str(" (no stderr — possibly hung)"); + } + if matches!(n.outcome, OutcomeGlyph::Cancelled) + && let Some(ts) = g.cancelled_at.as_deref() + { + line.push_str(&format!(" cancelled @ {ts}")); + } + out.push_str(&line); + out.push('\n'); + if i + 1 < nodes.len() { + let edge = edge_between(g, &nodes[i].id, &nodes[i + 1].id); + out.push_str(&format!(" {}\n", ascii_edge_label(edge))); + } + } + } + if let Some(v) = &g.verify { + let head = match &v.outcome { + VerifyOutcome::Passed { .. } => "verify: OK", + VerifyOutcome::Failed { .. } => "verify: FAIL", + VerifyOutcome::Skipped { .. } => "verify: skipped", + VerifyOutcome::TimedOut { .. } => "verify: timed out", + }; + out.push_str(&format!(" {head}\n")); + for l in &v.failure_lines { + out.push_str(&format!(" {l}\n")); + } + } +} + +fn edge_between(g: &LoopGraph, from: &str, to: &str) -> Option { + g.edges + .iter() + .find(|e| e.from == from && e.to == to && !matches!(e.kind, EdgeKind::Rollback)) + .map(|e| e.kind) +} + +fn ascii_edge_label(kind: Option) -> &'static str { + match kind { + Some(EdgeKind::BatchAgent) => "| (agent)", + Some(EdgeKind::BatchPlanner) => "| (planner)", + Some(EdgeKind::Fallback) => "|| (fallback)", + Some(EdgeKind::Rollback) => "↺ (rolled back)", + Some(EdgeKind::Verify) => "|", + _ => "|", + } +} + +fn group_nodes_by_worker(g: &LoopGraph) -> Vec<(Option, Vec<&Node>)> { + // Preserve workers order from g.workers; ungrouped (worker_n=None) + // nodes come first when present. + let mut buckets: BTreeMap, Vec<&Node>> = BTreeMap::new(); + for n in &g.nodes { + buckets.entry(n.worker_n).or_default().push(n); + } + let mut out: Vec<(Option, Vec<&Node>)> = Vec::new(); + if let Some(nodes) = buckets.remove(&None) { + out.push((None, nodes)); + } + for w in &g.workers { + if let Some(nodes) = buckets.remove(&Some(*w)) { + out.push((Some(*w), nodes)); + } + } + out +} + +fn sanitize_id(s: &str) -> String { + s.chars().map(|c| if c.is_ascii_alphanumeric() { c } else { '_' }).collect() +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use crate::batch_plan::{BatchPlan, SCHEMA_VERSION}; + use crate::loop_log::{ManifestWorker, write_json_atomic}; + use crate::runner::TokenSpend; + use std::path::PathBuf; + + fn tmpdir() -> PathBuf { + let base = std::env::temp_dir().join(format!( + "hew-loop-graph-{}-{}-{}", + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or_default(), + COUNTER.fetch_add(1, std::sync::atomic::Ordering::SeqCst), + )); + std::fs::create_dir_all(&base).unwrap(); + base + } + + static COUNTER: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0); + + fn write_iter( + dir: &Path, + n: u32, + task: &str, + outcome: &str, + started: &str, + ended: Option<&str>, + ) { + let log = IterLog { + number: n, + task_id: Some(task.into()), + started_at: started.into(), + ended_at: ended.map(str::to_string), + outcome: Some(outcome.into()), + prompt_prefix_hash: None, + cost: TokenSpend { input: 100, output: 50, cache_read: 0, cache_create: 0 }, + decisions: Vec::new(), + deferred: Vec::new(), + tool_calls: Vec::new(), + stderr_tail: None, + symbols_touched: Vec::new(), + runtime_used: None, + cooldown_engaged: false, + model: None, + }; + write_json_atomic(&dir.join(format!("iter-{n:03}.json")), &log).unwrap(); + } + + fn write_run(dir: &Path, id: &str, stop_reason: Option<&str>, verify: Option) { + let mut rl = RunLog { + id: id.into(), + started_at: "2026-05-30T00:00:00Z".into(), + last_updated_at: "2026-05-30T00:01:00Z".into(), + iter_count: 3, + cumulative_tokens: 450, + stop_reason: stop_reason.map(str::to_string), + max_iter: None, + strict: false, + interactive: false, + scope: None, + verify_outcome: verify, + }; + // suppress unused-field warnings; serialization carries everything. + rl.last_updated_at = "2026-05-30T00:01:00Z".into(); + write_json_atomic(&dir.join("run.json"), &rl).unwrap(); + } + + #[test] + fn graph_renders_simple_3_iter_run_as_mermaid() { + let dir = tmpdir(); + write_iter( + &dir, + 1, + "hew-a", + "closed", + "2026-05-30T00:00:00Z", + Some("2026-05-30T00:00:10Z"), + ); + write_iter( + &dir, + 2, + "hew-b", + "closed", + "2026-05-30T00:00:10Z", + Some("2026-05-30T00:00:20Z"), + ); + write_iter( + &dir, + 3, + "hew-c", + "closed", + "2026-05-30T00:00:20Z", + Some("2026-05-30T00:00:30Z"), + ); + write_run(&dir, "loop-simple", Some("ready_empty"), None); + + let g = build_from_run_dir(&dir).unwrap(); + let out = render(&g, Format::Mermaid); + assert!(out.starts_with("flowchart TD\n")); + assert!(out.contains("iter1[\"iter-1
hew-a
✓ 10s 150t\"]")); + assert!(out.contains("iter1 --> iter2")); + assert!(out.contains("iter2 --> iter3")); + assert!(out.contains("class iter1 iter-closed;")); + } + + #[test] + fn graph_renders_same_run_as_dot() { + let dir = tmpdir(); + write_iter( + &dir, + 1, + "hew-a", + "closed", + "2026-05-30T00:00:00Z", + Some("2026-05-30T00:00:10Z"), + ); + write_iter( + &dir, + 2, + "hew-b", + "closed", + "2026-05-30T00:00:10Z", + Some("2026-05-30T00:00:20Z"), + ); + write_run(&dir, "loop-dot", Some("ready_empty"), None); + + let g = build_from_run_dir(&dir).unwrap(); + let out = render(&g, Format::Dot); + assert!(out.starts_with("digraph loop {\n")); + assert!(out.contains("iter1 [label=\"iter-1\\nhew-a\\n✓ 10s 150t\", color=green];")); + assert!(out.contains("iter1 -> iter2;")); + assert!(out.ends_with("}\n")); + } + + #[test] + fn graph_handles_incomplete_iter_with_dashed_border() { + let dir = tmpdir(); + write_iter( + &dir, + 1, + "hew-a", + "closed", + "2026-05-30T00:00:00Z", + Some("2026-05-30T00:00:10Z"), + ); + // iter-2 started but never ended (no ended_at). + write_iter(&dir, 2, "hew-b", "closed", "2026-05-30T00:00:10Z", None); + write_run(&dir, "loop-incomplete", None, None); + + let g = build_from_run_dir(&dir).unwrap(); + let mermaid = render(&g, Format::Mermaid); + assert!(mermaid.contains("iter2[/\"")); + assert!(mermaid.contains("⋯")); + assert!(mermaid.contains("class iter2 iter-incomplete;")); + + let dot = render(&g, Format::Dot); + assert!(dot.contains("style=dashed")); + } + + #[test] + fn graph_handles_cancelled_run_with_annotation() { + let dir = tmpdir(); + write_iter( + &dir, + 1, + "hew-a", + "closed", + "2026-05-30T00:00:00Z", + Some("2026-05-30T00:00:10Z"), + ); + // iter-2 was running when .stop fired — no ended_at AND run.stop_reason=cancelled. + write_iter(&dir, 2, "hew-b", "closed", "2026-05-30T00:00:10Z", None); + write_run(&dir, "loop-cancel", Some("cancelled"), None); + + let g = build_from_run_dir(&dir).unwrap(); + // The in-flight iter must classify as Cancelled (not Incomplete). + let last = g.nodes.last().unwrap(); + assert_eq!(last.outcome, OutcomeGlyph::Cancelled); + let out = render(&g, Format::Mermaid); + assert!(out.contains("⊘")); + assert!(out.contains("cancelled @")); + } + + #[test] + fn graph_renders_batch_source_edges_distinctly() { + let dir = tmpdir(); + write_iter( + &dir, + 1, + "hew-a", + "closed", + "2026-05-30T00:00:00Z", + Some("2026-05-30T00:00:10Z"), + ); + write_iter( + &dir, + 2, + "hew-b", + "closed", + "2026-05-30T00:00:10Z", + Some("2026-05-30T00:00:20Z"), + ); + write_iter( + &dir, + 3, + "hew-c", + "closed", + "2026-05-30T00:00:20Z", + Some("2026-05-30T00:00:30Z"), + ); + write_iter( + &dir, + 4, + "hew-d", + "closed", + "2026-05-30T00:00:30Z", + Some("2026-05-30T00:00:40Z"), + ); + write_run(&dir, "loop-batches", Some("ready_empty"), None); + // iter-2 was chosen by previous iter's agent emit + crate::batch_plan::write( + &dir, + &BatchPlan { + schema_version: SCHEMA_VERSION, + iter_number: 2, + task_ids: vec!["hew-b".into()], + source: BatchSource::Agent, + reason: None, + created_at: "2026-05-30T00:00:09Z".into(), + planner_tokens: None, + }, + ) + .unwrap(); + // iter-3 was chosen by the planner subprocess + crate::batch_plan::write( + &dir, + &BatchPlan { + schema_version: SCHEMA_VERSION, + iter_number: 3, + task_ids: vec!["hew-c".into()], + source: BatchSource::Planner, + reason: None, + created_at: "2026-05-30T00:00:19Z".into(), + planner_tokens: None, + }, + ) + .unwrap(); + // iter-4 fell back to trust-the-graph + crate::batch_plan::write( + &dir, + &BatchPlan { + schema_version: SCHEMA_VERSION, + iter_number: 4, + task_ids: Vec::new(), + source: BatchSource::Skipped, + reason: Some("planner_disabled".into()), + created_at: "2026-05-30T00:00:29Z".into(), + planner_tokens: None, + }, + ) + .unwrap(); + + let g = build_from_run_dir(&dir).unwrap(); + let out = render(&g, Format::Mermaid); + assert!(out.contains("iter1 -. agent .-> iter2"), "got: {out}"); + assert!(out.contains("iter2 -. planner .-> iter3")); + assert!(out.contains("iter3 == fallback ==> iter4")); + } + + #[test] + fn graph_renders_worker_swimlanes_for_parallel_run() { + let dir = tmpdir(); + let w0 = dir.join("worker-0"); + let w1 = dir.join("worker-1"); + std::fs::create_dir_all(&w0).unwrap(); + std::fs::create_dir_all(&w1).unwrap(); + write_iter(&w0, 1, "hew-a", "closed", "2026-05-30T00:00:00Z", Some("2026-05-30T00:00:10Z")); + write_iter(&w0, 2, "hew-b", "closed", "2026-05-30T00:00:10Z", Some("2026-05-30T00:00:20Z")); + write_iter(&w1, 1, "hew-c", "closed", "2026-05-30T00:00:00Z", Some("2026-05-30T00:00:15Z")); + let manifest = Manifest { + run_id: "loop-par".into(), + jobs: 2, + started_at: "2026-05-30T00:00:00Z".into(), + completed_at: "2026-05-30T00:00:20Z".into(), + workers: vec![ + ManifestWorker { + id: 0, + branch: "loop/par/w0".into(), + log_subdir: Some("worker-0".into()), + iter_count: 2, + cumulative_tokens: 300, + stop_reason: Some("ready_empty".into()), + }, + ManifestWorker { + id: 1, + branch: "loop/par/w1".into(), + log_subdir: Some("worker-1".into()), + iter_count: 1, + cumulative_tokens: 150, + stop_reason: Some("ready_empty".into()), + }, + ], + }; + crate::loop_log::write_manifest(&dir, &manifest).unwrap(); + write_run(&dir, "loop-par", Some("ready_empty"), None); + + let g = build_from_run_dir(&dir).unwrap(); + assert_eq!(g.workers, vec![0, 1]); + let out = render(&g, Format::Mermaid); + assert!(out.contains("subgraph worker-0")); + assert!(out.contains("subgraph worker-1")); + assert!(out.contains("w0_iter1")); + assert!(out.contains("w1_iter1")); + } + + #[test] + fn graph_renders_backpressure_rollback_edge_with_target() { + let dir = tmpdir(); + write_iter( + &dir, + 1, + "hew-a", + "closed", + "2026-05-30T00:00:00Z", + Some("2026-05-30T00:00:10Z"), + ); + write_iter( + &dir, + 2, + "hew-b", + "backpressure_fail", + "2026-05-30T00:00:10Z", + Some("2026-05-30T00:00:20Z"), + ); + write_run(&dir, "loop-bp", Some("guard_trip"), None); + + let g = build_from_run_dir(&dir).unwrap(); + let bp = g.nodes.last().unwrap(); + assert_eq!(bp.outcome, OutcomeGlyph::BackpressureFail); + let out = render(&g, Format::Mermaid); + assert!(out.contains("↺") || out.contains("iter-backpressure")); + // Rollback self-edge from bp back to previous iter. + assert!(out.contains("iter2 -.rolled back.-> iter1")); + } + + #[test] + fn graph_renders_verify_node_passed_failed_skipped() { + for (out_outcome, expected_class, expected_glyph) in [ + ( + VerifyOutcome::Passed { command: "cargo test".into(), duration_secs: 12 }, + "verify-passed", + "Verify ✓", + ), + ( + VerifyOutcome::Failed { + command: "cargo test".into(), + exit_code: 1, + duration_secs: 22, + stderr_tail: "boom".into(), + }, + "verify-failed", + "Verify ✗", + ), + ( + VerifyOutcome::Skipped { reason: "no test cmd".into() }, + "verify-skipped", + "Verify (skipped)", + ), + ] { + let dir = tmpdir(); + write_iter( + &dir, + 1, + "hew-a", + "closed", + "2026-05-30T00:00:00Z", + Some("2026-05-30T00:00:10Z"), + ); + write_run(&dir, "loop-verify", Some("ready_empty"), Some(out_outcome)); + let g = build_from_run_dir(&dir).unwrap(); + let out = render(&g, Format::Mermaid); + assert!(out.contains(expected_class), "missing {expected_class} in: {out}"); + assert!(out.contains(expected_glyph), "missing {expected_glyph} in: {out}"); + } + } + + #[test] + fn graph_renders_runtime_error_with_hung_annotation() { + let dir = tmpdir(); + write_iter( + &dir, + 1, + "hew-a", + "closed", + "2026-05-30T00:00:00Z", + Some("2026-05-30T00:00:10Z"), + ); + // runtime_error with no stderr_tail set → "possibly hung" + write_iter( + &dir, + 2, + "hew-b", + "runtime_error", + "2026-05-30T00:00:10Z", + Some("2026-05-30T00:00:20Z"), + ); + write_run(&dir, "loop-hung", Some("runtime_error"), None); + + let g = build_from_run_dir(&dir).unwrap(); + assert!(g.nodes[1].stderr_hung); + let out = render(&g, Format::Mermaid); + assert!(out.contains("no stderr — possibly hung")); + } + + #[test] + fn graph_all_mode_renders_each_run_as_subgraph() { + let root = tmpdir(); + let loop_root = root.join(".hew/loop"); + std::fs::create_dir_all(&loop_root).unwrap(); + for id in ["loop-aaa", "loop-bbb"] { + let d = loop_root.join(id); + std::fs::create_dir_all(&d).unwrap(); + write_iter( + &d, + 1, + "hew-x", + "closed", + "2026-05-30T00:00:00Z", + Some("2026-05-30T00:00:10Z"), + ); + write_run(&d, id, Some("ready_empty"), None); + } + let graphs = build_from_loop_root(&loop_root).unwrap(); + assert_eq!(graphs.len(), 2); + let out = render_all(&graphs, Format::Mermaid); + assert!(out.contains("subgraph loop_aaa")); + assert!(out.contains("subgraph loop_bbb")); + } + + #[test] + fn graph_handles_pre_batchplan_legacy_run() { + // No batch-*.json files written; edges should be Sequential + // (no agent/planner/fallback styling). + let dir = tmpdir(); + write_iter( + &dir, + 1, + "hew-a", + "closed", + "2026-05-30T00:00:00Z", + Some("2026-05-30T00:00:10Z"), + ); + write_iter( + &dir, + 2, + "hew-b", + "closed", + "2026-05-30T00:00:10Z", + Some("2026-05-30T00:00:20Z"), + ); + write_run(&dir, "loop-legacy", Some("ready_empty"), None); + + let g = build_from_run_dir(&dir).unwrap(); + assert_eq!(g.edges.len(), 1); + assert!(matches!(g.edges[0].kind, EdgeKind::Sequential)); + let out = render(&g, Format::Mermaid); + assert!(!out.contains("agent")); + assert!(!out.contains("planner")); + assert!(!out.contains("fallback")); + assert!(out.contains("iter1 --> iter2")); + } + + #[test] + fn ascii_renderer_produces_terminal_friendly_output() { + let dir = tmpdir(); + write_iter( + &dir, + 1, + "hew-a", + "closed", + "2026-05-30T00:00:00Z", + Some("2026-05-30T00:00:10Z"), + ); + write_iter( + &dir, + 2, + "hew-b", + "closed", + "2026-05-30T00:00:10Z", + Some("2026-05-30T00:00:20Z"), + ); + write_run(&dir, "loop-ascii", Some("ready_empty"), None); + + let g = build_from_run_dir(&dir).unwrap(); + let out = render(&g, Format::Ascii); + assert!(out.starts_with("run: loop-ascii\n")); + // No unicode in the ASCII renderer. + assert!(out.is_ascii(), "ascii output contained non-ascii: {out}"); + } + + #[test] + fn parse_iso_handles_canonical_format() { + assert_eq!(parse_iso("1970-01-01T00:00:00Z"), Some(0)); + assert_eq!(parse_iso("1970-01-01T00:00:10Z"), Some(10)); + assert_eq!(parse_iso("2026-05-30T00:00:00Z"), parse_iso("2026-05-30T00:00:00Z")); + // Difference of one day: + let a = parse_iso("2026-05-30T00:00:00Z").unwrap(); + let b = parse_iso("2026-05-31T00:00:00Z").unwrap(); + assert_eq!(b - a, 86_400); + } +} diff --git a/hew/src/commands/loop_cmd.rs b/hew/src/commands/loop_cmd.rs index 6d8631a..0b833ec 100644 --- a/hew/src/commands/loop_cmd.rs +++ b/hew/src/commands/loop_cmd.rs @@ -224,6 +224,38 @@ pub enum LoopSub { /// records a `stop_reason`). Defaults to listing what would be /// removed; pass `--apply` to actually delete. PruneWorktrees(PruneWorktreesArgs), + /// Render the loop's iter+batch+run history as a DAG diagram + /// (mermaid by default; dot or ascii on opt-in). Defaults to the + /// most recent run; `--all` aggregates every run under + /// `.hew/loop/` into a single document. + Graph(GraphArgs), +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)] +#[clap(rename_all = "lowercase")] +pub enum GraphFormatArg { + Mermaid, + Dot, + Ascii, +} + +#[derive(Debug, ClapArgs)] +pub struct GraphArgs { + /// Run-id to render. Defaults to the most recent run. + #[arg(long)] + pub run_id: Option, + /// Output format. Defaults to `mermaid` for markdown embedding. + #[arg(long, value_enum, default_value_t = GraphFormatArg::Mermaid)] + pub format: GraphFormatArg, + /// Write to a file instead of stdout. When the path ends in `.md` + /// and format is mermaid, the output is wrapped in a fenced + /// ```mermaid block. + #[arg(long = "out")] + pub output_file: Option, + /// Aggregate every run under `.hew/loop/` into one document. Each + /// run renders as its own subgraph. + #[arg(long, default_value_t = false, action = clap::ArgAction::SetTrue)] + pub all: bool, } #[derive(Debug, ClapArgs)] @@ -278,9 +310,62 @@ pub fn run(ctx: &Ctx, cmd: LoopCmd) -> miette::Result<()> { LoopSub::List(a) => run_list(ctx, a), LoopSub::Summary(a) => run_summary(ctx, a), LoopSub::PruneWorktrees(a) => run_prune_worktrees(ctx, a), + LoopSub::Graph(a) => run_graph(ctx, a), } } +pub fn run_graph(ctx: &Ctx, args: GraphArgs) -> miette::Result<()> { + let project_root = std::env::current_dir().map_err(|e| miette::miette!("cwd: {e}"))?; + let format = match args.format { + GraphFormatArg::Mermaid => hew_core::loop_graph::Format::Mermaid, + GraphFormatArg::Dot => hew_core::loop_graph::Format::Dot, + GraphFormatArg::Ascii => hew_core::loop_graph::Format::Ascii, + }; + + let body = if args.all { + let root = loop_root(&project_root); + let graphs = hew_core::loop_graph::build_from_loop_root(&root) + .map_err(|e| miette::miette!("build graphs: {e}"))?; + if graphs.is_empty() { + return Err(miette::miette!("no loop runs found in {}", root.display())); + } + hew_core::loop_graph::render_all(&graphs, format) + } else { + let run_id = match args.run_id { + Some(id) => id, + None => latest_run_id(&project_root)?, + }; + let dir = loop_root(&project_root).join(&run_id); + if !dir.exists() { + return Err(miette::miette!("run-dir not found: {}", dir.display())); + } + let g = hew_core::loop_graph::build_from_run_dir(&dir) + .map_err(|e| miette::miette!("build graph: {e}"))?; + hew_core::loop_graph::render(&g, format) + }; + + let wrapped = if matches!(args.format, GraphFormatArg::Mermaid) + && args.output_file.as_ref().is_some_and(|p| { + p.extension().and_then(|s| s.to_str()).is_some_and(|e| e.eq_ignore_ascii_case("md")) + }) { + format!("```mermaid\n{}```\n", body) + } else { + body + }; + + match args.output_file { + Some(path) => { + std::fs::write(&path, &wrapped) + .map_err(|e| miette::miette!("write {}: {e}", path.display()))?; + if !ctx.quiet { + println!("wrote {}", path.display()); + } + } + None => print!("{}", wrapped), + } + Ok(()) +} + #[derive(Debug, ClapArgs)] pub struct Args { /// Hard cap on iterations. Omit for unlimited (stop via other signals). diff --git a/hew/tests/loop_graph_e2e.rs b/hew/tests/loop_graph_e2e.rs new file mode 100644 index 0000000..b67d6b5 --- /dev/null +++ b/hew/tests/loop_graph_e2e.rs @@ -0,0 +1,177 @@ +//! `hew loop graph` end-to-end. Plants a tiny 2-iter run-dir with a +//! batch-plan + run.json and asserts the subcommand renders mermaid by +//! default + writes to `--output` when requested. +//! +//! Task: hew-m7lq. + +use std::path::Path; + +use assert_cmd::Command as AssertCmd; +use hew_core::batch_plan::{BatchPlan, BatchSource, SCHEMA_VERSION}; +use hew_core::loop_log::{IterLog, RunLog, run_dir, run_log_path, write_json_atomic}; +use hew_core::runner::TokenSpend; +use predicates::str::contains; + +fn write_iter(dir: &Path, n: u32, task: &str, started: &str, ended: &str) { + let log = IterLog { + number: n, + task_id: Some(task.into()), + started_at: started.into(), + ended_at: Some(ended.into()), + outcome: Some("closed".into()), + prompt_prefix_hash: None, + cost: TokenSpend { input: 100, output: 50, cache_read: 0, cache_create: 0 }, + decisions: Vec::new(), + deferred: Vec::new(), + tool_calls: Vec::new(), + stderr_tail: None, + symbols_touched: Vec::new(), + runtime_used: None, + cooldown_engaged: false, + model: None, + }; + write_json_atomic(&dir.join(format!("iter-{n:03}.json")), &log).unwrap(); +} + +fn write_run(dir: &Path, id: &str) { + let rl = RunLog { + id: id.into(), + started_at: "2026-05-30T00:00:00Z".into(), + last_updated_at: "2026-05-30T00:01:00Z".into(), + iter_count: 2, + cumulative_tokens: 300, + stop_reason: Some("ready_empty".into()), + max_iter: None, + strict: false, + interactive: false, + scope: None, + verify_outcome: None, + }; + write_json_atomic(&run_log_path(dir, None), &rl).unwrap(); +} + +fn plant_run(project_root: &Path, run_id: &str) { + let dir = run_dir(project_root, run_id).unwrap(); + write_iter(&dir, 1, "hew-a", "2026-05-30T00:00:00Z", "2026-05-30T00:00:10Z"); + write_iter(&dir, 2, "hew-b", "2026-05-30T00:00:10Z", "2026-05-30T00:00:20Z"); + hew_core::batch_plan::write( + &dir, + &BatchPlan { + schema_version: SCHEMA_VERSION, + iter_number: 2, + task_ids: vec!["hew-b".into()], + source: BatchSource::Agent, + reason: None, + created_at: "2026-05-30T00:00:09Z".into(), + planner_tokens: None, + }, + ) + .unwrap(); + write_run(&dir, run_id); +} + +#[test] +fn cli_loop_graph_renders_latest_run_to_stdout_as_mermaid() { + let tmp = tempfile::tempdir().unwrap(); + plant_run(tmp.path(), "loop-graph-e2e"); + + AssertCmd::cargo_bin("hew") + .unwrap() + .current_dir(tmp.path()) + .args(["loop", "graph"]) + .env("HEW_NO_UPDATE_CHECK", "1") + .env("NO_COLOR", "1") + .env("TERM", "dumb") + .assert() + .success() + .stdout(contains("flowchart TD")) + .stdout(contains("iter1 -. agent .-> iter2")); +} + +#[test] +fn cli_loop_graph_writes_to_output_file_when_provided() { + let tmp = tempfile::tempdir().unwrap(); + plant_run(tmp.path(), "loop-graph-e2e-out"); + let out_path = tmp.path().join("graph.md"); + + AssertCmd::cargo_bin("hew") + .unwrap() + .current_dir(tmp.path()) + .args(["loop", "graph", "--out", out_path.to_str().unwrap()]) + .env("HEW_NO_UPDATE_CHECK", "1") + .env("NO_COLOR", "1") + .env("TERM", "dumb") + .assert() + .success() + .stdout(contains("wrote")); + + let body = std::fs::read_to_string(&out_path).unwrap(); + // .md output wraps mermaid in a fenced block. + assert!(body.starts_with("```mermaid\n"), "body: {body}"); + assert!(body.contains("flowchart TD")); + assert!(body.trim_end().ends_with("```")); +} + +#[test] +fn cli_loop_graph_supports_dot_and_ascii_formats() { + let tmp = tempfile::tempdir().unwrap(); + plant_run(tmp.path(), "loop-graph-e2e-fmt"); + + AssertCmd::cargo_bin("hew") + .unwrap() + .current_dir(tmp.path()) + .args(["loop", "graph", "--format", "dot"]) + .env("HEW_NO_UPDATE_CHECK", "1") + .env("NO_COLOR", "1") + .env("TERM", "dumb") + .assert() + .success() + .stdout(contains("digraph loop")) + .stdout(contains("iter1 -> iter2")); + + AssertCmd::cargo_bin("hew") + .unwrap() + .current_dir(tmp.path()) + .args(["loop", "graph", "--format", "ascii"]) + .env("HEW_NO_UPDATE_CHECK", "1") + .env("NO_COLOR", "1") + .env("TERM", "dumb") + .assert() + .success() + .stdout(contains("run: loop-graph-e2e-fmt")); +} + +#[test] +fn cli_loop_graph_all_aggregates_multiple_runs() { + let tmp = tempfile::tempdir().unwrap(); + plant_run(tmp.path(), "loop-graph-all-aaa"); + plant_run(tmp.path(), "loop-graph-all-bbb"); + + AssertCmd::cargo_bin("hew") + .unwrap() + .current_dir(tmp.path()) + .args(["loop", "graph", "--all"]) + .env("HEW_NO_UPDATE_CHECK", "1") + .env("NO_COLOR", "1") + .env("TERM", "dumb") + .assert() + .success() + .stdout(contains("subgraph loop_graph_all_aaa")) + .stdout(contains("subgraph loop_graph_all_bbb")); +} + +#[test] +fn cli_loop_graph_errors_when_run_dir_missing() { + let tmp = tempfile::tempdir().unwrap(); + // No runs planted. + + AssertCmd::cargo_bin("hew") + .unwrap() + .current_dir(tmp.path()) + .args(["loop", "graph"]) + .env("HEW_NO_UPDATE_CHECK", "1") + .env("NO_COLOR", "1") + .env("TERM", "dumb") + .assert() + .failure(); +}