brunojm · brunojm · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026 · chatgpt-codex-connector
diff --git a/README.md b/README.md
@@ -62,10 +62,17 @@ Filters combine with AND logic.
 
 | Flag | Description |
 |------|-------------|
-| `--output <FORMAT>` | `json` (default), `jsonl`, or `summary` |
-| `--fields <FIELDS>` | Comma-separated. Valid names: `url`, `method`, `status`, `status-text`, `time`, `mime-type`, `started-date-time`. CLI names are kebab-case; emitted JSON keys preserve HAR camelCase (`statusText`, `mimeType`). Unknown names error at parse time. |
+| `--output <FORMAT>` | `json` (default, pretty in a TTY, compact when piped), `jsonl`, or `summary`. |
+| `--fields <FIELDS>` | Comma-separated. Valid names: `id`, `url`, `method`, `status`, `status-text`, `time`, `mime-type`, `started-date-time`. CLI names are kebab-case; emitted JSON keys preserve HAR camelCase (`statusText`, `mimeType`). Unknown names error at parse time. |
 | `--count` | Print only the count of matching entries. Conflicts with `--fields`, `--no-body`, `--output`. |
-| `--no-body` | Exclude request/response bodies |
+| `--overview` | Print a single JSON dashboard of the filtered HAR: entry count, status/method/MIME histograms, top 10 domains, total body size, total time. Replaces a cascade of exploratory queries with one call. |
+| `--entry <N>` | Fetch a single entry by id (its original 0-indexed position in the HAR). Returns a JSON object, not an array. |
+| `--no-body` | Exclude all request/response body text. |
+| `--include-all-bodies` | Include bodies for static-asset MIME types (CSS/JS/images/fonts/WASM). By default those are stripped to save tokens. |
+
+Every output entry includes an `id` field — the entry's original 0-indexed position in the HAR. IDs are stable across filter changes, so you can list matches with `--fields id,url,status` and then drill into a specific one with `--entry N`.
+
+Static-asset response bodies (images, fonts, CSS, JS, WASM, video, audio) are stripped by default, since they dominate HAR size but rarely help debug API behaviour. Use `--include-all-bodies` to keep them, or `--no-body` to strip everything.
 
 ### Utility
 
@@ -95,6 +102,13 @@ hargrep --method POST --count session.har
 # LLM-friendly: just URLs, statuses, timings, no bodies
 hargrep --fields url,status,time --no-body --output jsonl recording.har
 
+# One-shot overview of a HAR: entry count, histograms, top domains, totals
+hargrep --overview recording.har
+
+# Narrow with filters, list IDs, then fetch one entry in full
+hargrep --status-range 5xx --fields id,url,status --output jsonl recording.har
+hargrep --entry 42 recording.har
+
 # Validate before processing
 hargrep --validate untrusted.har
 
@@ -109,20 +123,22 @@ hargrep --header 'Authorization:Bearer' --fields url,status recording.har
 
 `hargrep` is designed to fit into agent pipelines:
 
-- **Predictable schema** — every output mode produces deterministic, well-formed JSON or compact text
-- **`--fields`** — request only the columns you need so the output stays small
-- **`--no-body`** — strip base64 images and large response bodies
-- **`--count`** — check scope cheaply before committing context to a full query
-- **`--jsonl`** — stream one entry per line, easy to chunk
+- **Predictable schema** — every output mode produces deterministic, well-formed JSON or compact text.
+- **Stable entry IDs** — every entry includes an `id` field (its original HAR index). List matches cheaply, then fetch specific entries with `--entry N`.
+- **`--overview`** — one call returns a dashboard of the (optionally filtered) HAR. Replaces several exploratory queries.
+- **`--fields`** — request only the columns you need so the output stays small.
+- **Asset bodies stripped by default** — CSS/JS/images/fonts/WASM response bodies are dropped automatically since they dominate HAR size. `--include-all-bodies` disables this; `--no-body` strips everything.
+- **`--count`** — check scope cheaply before committing context to a full query.
+- **`--output jsonl`** — stream one entry per line, easy to chunk. Default JSON is compact when piped and pretty in a TTY.
 - **Fails fast** — CLI arguments are validated before any file is read. Unknown `--fields` names, invalid `--status-range`, bad `--url-regex`, and conflicting flags (e.g. `--count --fields`) all error with exit code 2 and a descriptive message on stderr. Typos surface immediately instead of producing empty results.
 
-Typical agent flow: validate → count → filter narrowly → read specific entries.
+Typical agent flow: overview → filter → fetch specific entries.
 
 ```bash
-hargrep --validate recording.har                          # check it parses
-hargrep --count --status-range 5xx recording.har          # probe the scope
-hargrep --status-range 5xx --fields url,status,time \
-  --output jsonl recording.har                            # pull just what's needed
+hargrep --overview recording.har                          # shape + scope in one call
+hargrep --status-range 5xx --fields id,url,status \
+  --output jsonl recording.har                            # list candidates
+hargrep --entry 42 recording.har                          # pull the full entry for one id
 ```
 
 ## HAR format

diff --git a/src/filter.rs b/src/filter.rs
@@ -108,10 +108,15 @@ pub struct FilterOptions {
     pub min_time: Option<f64>,
 }
 
-pub fn filter_entries(entries: Vec<Entry>, opts: &FilterOptions) -> Vec<Entry> {
+/// Filter entries against the provided options, preserving each entry's
+/// original index in the HAR. Downstream formatters emit this index as `id`,
+/// which lets an LLM agent list entries and then fetch one by id stably even
+/// after the filter set changes.
+pub fn filter_entries(entries: Vec<Entry>, opts: &FilterOptions) -> Vec<(usize, Entry)> {
     entries
         .into_iter()
-        .filter(|entry| matches_all(entry, opts))
+        .enumerate()
+        .filter(|(_, entry)| matches_all(entry, opts))
         .collect()
 }
 
@@ -273,7 +278,7 @@ mod tests {
         };
         let result = filter_entries(entries, &opts);
         assert_eq!(result.len(), 1);
-        assert_eq!(result[0].request.method, "POST");
+        assert_eq!(result[0].1.request.method, "POST");
     }
 
     #[test]
@@ -296,7 +301,7 @@ mod tests {
         };
         let result = filter_entries(entries, &opts);
         assert_eq!(result.len(), 1);
-        assert_eq!(result[0].response.status, 404);
+        assert_eq!(result[0].1.response.status, 404);
     }
 
     #[test]
@@ -308,7 +313,7 @@ mod tests {
         };
         let result = filter_entries(entries, &opts);
         assert_eq!(result.len(), 1);
-        assert_eq!(result[0].response.status, 404);
+        assert_eq!(result[0].1.response.status, 404);
     }
 
     #[test]
@@ -342,7 +347,7 @@ mod tests {
         };
         let result = filter_entries(entries, &opts);
         assert_eq!(result.len(), 1);
-        assert!(result[0].request.url.contains("/users/999"));
+        assert!(result[0].1.request.url.contains("/users/999"));
     }
 
     #[test]
@@ -354,7 +359,7 @@ mod tests {
         };
         let result = filter_entries(entries, &opts);
         assert_eq!(result.len(), 1);
-        assert_eq!(result[0].request.method, "POST");
+        assert_eq!(result[0].1.request.method, "POST");
     }
 
     #[test]
@@ -366,7 +371,7 @@ mod tests {
         };
         let result = filter_entries(entries, &opts);
         assert_eq!(result.len(), 1);
-        assert_eq!(result[0].request.method, "POST");
+        assert_eq!(result[0].1.request.method, "POST");
     }
 
     #[test]
@@ -389,7 +394,7 @@ mod tests {
         };
         let result = filter_entries(entries, &opts);
         assert_eq!(result.len(), 1);
-        assert!(result[0].request.url.contains("image.png"));
+        assert!(result[0].1.request.url.contains("image.png"));
     }
 
     #[test]

diff --git a/src/main.rs b/src/main.rs
@@ -2,11 +2,12 @@ mod filter;
 mod har;
 mod input;
 mod output;
+mod overview;
 
 use anyhow::Result;
 use clap::Parser;
 use filter::{FilterOptions, HeaderFilter, StatusRange};
-use output::{Field, OutputFormat, OutputMode};
+use output::{BodyMode, Field, OutputFormat, OutputMode};
 use regex::Regex;
 use std::path::PathBuf;
 use std::process;
@@ -62,10 +63,40 @@ struct Cli {
     #[arg(long)]
     count: bool,
 
+    /// Print a single-shot JSON dashboard of the filtered HAR: entry count,
+    /// status/method/MIME histograms, top domains, total body size, total time.
+    /// Replaces a cascade of exploratory queries with one call.
+    #[arg(
+        long,
+        conflicts_with_all = ["count", "fields", "entry", "no_body", "include_all_bodies", "output"]
+    )]
+    overview: bool,
+
+    /// Fetch a single entry by id (the original 0-indexed position in the HAR).
+    /// Returns a JSON object, not an array. Useful after listing entries with
+    /// `--fields id,url,status` and then zeroing in on one. `--entry` is a
+    /// direct lookup, not a filter operation — it conflicts with filter flags
+    /// so an agent can't accidentally combine them and get surprising results.
+    #[arg(
+        long,
+        conflicts_with_all = [
+            "count", "fields", "output",
+            "method", "status", "status_range", "url", "url_regex",
+            "header", "mime", "min_time",
+        ]
+    )]
+    entry: Option<usize>,
+
     /// Exclude request/response bodies from output
-    #[arg(long, conflicts_with = "count")]
+    #[arg(long, conflicts_with_all = ["count", "include_all_bodies"])]
     no_body: bool,
 
+    /// Include bodies for static-asset MIME types (CSS/JS/images/fonts/WASM)
+    /// that would otherwise be stripped by default. Use when you actually need
+    /// to inspect an asset payload.
+    #[arg(long, conflicts_with = "count")]
+    include_all_bodies: bool,
+
     /// Validate HAR only, don't query
     #[arg(long)]
     validate: bool,
@@ -100,6 +131,24 @@ fn run(cli: Cli) -> Result<i32> {
         return Ok(0);
     }
 
+    let body_mode = if cli.no_body {
+        BodyMode::StripAll
+    } else if cli.include_all_bodies {
+        BodyMode::IncludeAll
+    } else {
+        BodyMode::SkipAssets
+    };
+
+    if let Some(id) = cli.entry {
+        let total = har.log.entries.len();
+        let entry = har.log.entries.into_iter().nth(id).ok_or_else(|| {
+            anyhow::anyhow!("entry id {id} out of range (HAR has {total} entries)")
+        })?;
+        let output = output::format_single_entry(id, &entry, body_mode)?;
+        print!("{output}");
+        return Ok(0);
+    }
+
     let filter_opts = FilterOptions {
         method: cli.method,
         status: cli.status,
@@ -114,13 +163,25 @@ fn run(cli: Cli) -> Result<i32> {
     let filtered = filter::filter_entries(har.log.entries, &filter_opts);
     let exit_code = if filtered.is_empty() { 1 } else { 0 };
 
+    if cli.overview {
+        let doc = overview::build_overview(&filtered);
+        let serialized = if std::io::IsTerminal::is_terminal(&std::io::stdout()) {
+            serde_json::to_string_pretty(&doc)?
+        } else {
+            serde_json::to_string(&doc)?
+        };
+        println!("{serialized}");
+        // Keep grep-like exit semantics: empty filtered set → exit 1.
+        return Ok(exit_code);
+    }
+
     let mode = if cli.count {
         OutputMode::Count
     } else {
         OutputMode::Formatted {
             format: cli.output,
             fields: cli.fields,
-            no_body: cli.no_body,
+            body: body_mode,
         }
     };