Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ Reads from stdin if no file is given.
| `--header <NAME:VALUE>` | `--header 'Authorization:Bearer'` |
| `--mime <SUBSTRING>` | `--mime application/json` (matches `application/json; charset=utf-8` too) |
| `--min-time <MS>` | `--min-time 500` |
| `--body-grep <SUBSTRING>` | Match against request or response body text (case-sensitive). |

Filters combine with AND logic.

Expand All @@ -66,6 +67,9 @@ Filters combine with AND logic.
| `--fields <FIELDS>` | Comma-separated. Valid names: `id`, `url`, `method`, `status`, `status-text`, `time`, `mime-type`, `started-date-time`. CLI names are kebab-case; emitted JSON keys preserve HAR camelCase (`statusText`, `mimeType`). Unknown names error at parse time. |
| `--count` | Print only the count of matching entries. Conflicts with `--fields`, `--no-body`, `--output`. |
| `--overview` | Print a single JSON dashboard of the filtered HAR: entry count, status/method/MIME histograms, top 10 domains, total body size, total time. Replaces a cascade of exploratory queries with one call. |
| `--domains` | Emit `[{domain, count}]` sorted by count desc. Respects filters. |
| `--size-by-type` | Emit `[{mime_type, total_bytes, count}]` sorted by total_bytes desc. Respects filters. |
| `--redirects` | Emit `[{id, url, status, location}]` for every 3xx entry. Respects filters. |
| `--entry <N>` | Fetch a single entry by id (its original 0-indexed position in the HAR). Returns a JSON object, not an array. |
| `--no-body` | Exclude all request/response body text. |
| `--include-all-bodies` | Include bodies for static-asset MIME types (CSS/JS/images/fonts/WASM). By default those are stripped to save tokens. |
Expand Down Expand Up @@ -109,6 +113,14 @@ hargrep --overview recording.har
hargrep --status-range 5xx --fields id,url,status --output jsonl recording.har
hargrep --entry 42 recording.har

# Aggregate views — one call each
hargrep --domains recording.har # which hosts?
hargrep --size-by-type recording.har # where's the bandwidth going?
hargrep --redirects recording.har # all 3xx + Location headers

# Body search that actually knows about HAR schema
hargrep --body-grep 'session expired' --fields id,url,status recording.har

# Validate before processing
hargrep --validate untrusted.har

Expand Down
235 changes: 235 additions & 0 deletions src/aggregates.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
//! Standalone aggregate views — focused answers that would otherwise require
//! an agent to synthesize across multiple `hargrep` calls.
//!
//! Each view emits a single JSON document: an array of aggregate rows or, for
//! `--redirects`, a flat list of 3xx entries with their Location headers.
//! Respects the filter pipeline so you can scope a view with any of the
//! existing filter flags.

use crate::har::Entry;
use crate::overview::extract_host;
use serde_json::{Value, json};
use std::collections::BTreeMap;

/// `--domains`: [{domain, count}] sorted by count desc, then domain asc.
pub fn domains(entries: &[(usize, Entry)]) -> Value {
let mut counts: BTreeMap<String, u64> = BTreeMap::new();
for (_, entry) in entries {
if let Some(host) = extract_host(&entry.request.url) {
*counts.entry(host).or_insert(0) += 1;
}
}
let mut rows: Vec<(String, u64)> = counts.into_iter().collect();
rows.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
Value::Array(
rows.into_iter()
.map(|(domain, count)| json!({ "domain": domain, "count": count }))
.collect(),
)
}

/// `--size-by-type`: [{mime_type, total_bytes, count}] sorted by total_bytes desc.
/// Uses the full MIME string (including charset) so `application/json` and
/// `application/json; charset=utf-8` are separate rows — matches how the HAR
/// actually labelled them. Agents that want to collapse variants can do so.
pub fn size_by_type(entries: &[(usize, Entry)]) -> Value {
let mut by_mime: BTreeMap<String, (i64, u64)> = BTreeMap::new();
for (_, entry) in entries {
let mime = entry
.response
.content
.mime_type
.as_deref()
.unwrap_or("unknown");
let size = entry.response.content.size.max(0);
let cell = by_mime.entry(mime.to_string()).or_insert((0, 0));
cell.0 += size;
cell.1 += 1;
}
let mut rows: Vec<(String, i64, u64)> = by_mime
.into_iter()
.map(|(mime, (bytes, count))| (mime, bytes, count))
.collect();
rows.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
Value::Array(
rows.into_iter()
.map(|(mime_type, total_bytes, count)| {
json!({ "mime_type": mime_type, "total_bytes": total_bytes, "count": count })
})
.collect(),
)
}

/// `--redirects`: flat list of 3xx entries with their Location header.
/// Each row: {id, url, status, location}. Chain reconstruction is left to the
/// caller — the raw pairs are enough information and the format stays simple.
pub fn redirects(entries: &[(usize, Entry)]) -> Value {
let mut rows = Vec::new();
for (id, entry) in entries {
let status = entry.response.status;
if (300..400).contains(&status) {
let location = find_location_header(entry).unwrap_or_default();
rows.push(json!({
"id": id,
"url": entry.request.url,
"status": status,
"location": location,
}));
}
}
Value::Array(rows)
}

fn find_location_header(entry: &Entry) -> Option<String> {
entry
.response
.headers
.iter()
.find(|h| h.name.eq_ignore_ascii_case("location"))
.map(|h| h.value.clone())
}

#[cfg(test)]
mod tests {
use super::*;
use crate::har::{Content, Entry, Header, Request, Response, Timings};

fn make_entry(method: &str, url: &str, status: u16, mime: &str, body_size: i64) -> Entry {
Entry {
started_date_time: "2026-01-15T10:00:00.000Z".to_string(),
time: 10.0,
request: Request {
method: method.to_string(),
url: url.to_string(),
http_version: "HTTP/1.1".to_string(),
headers: vec![],
query_string: vec![],
headers_size: -1,
body_size: -1,
post_data: None,
},
response: Response {
status,
status_text: String::new(),
http_version: "HTTP/1.1".to_string(),
headers: vec![],
content: Content {
size: body_size,
mime_type: Some(mime.to_string()),
text: None,
},
redirect_url: String::new(),
headers_size: -1,
body_size: 0,
},
timings: Timings {
send: 0.0,
wait: 10.0,
receive: 0.0,
},
cache: None,
}
}

fn with_location(mut entry: Entry, location: &str) -> Entry {
entry.response.headers.push(Header {
name: "Location".to_string(),
value: location.to_string(),
});
entry
}

fn indexed(entries: Vec<Entry>) -> Vec<(usize, Entry)> {
entries.into_iter().enumerate().collect()
}

#[test]
fn domains_counts_and_sorts() {
let rows = domains(&indexed(vec![
make_entry("GET", "https://a.example/x", 200, "application/json", 10),
make_entry("GET", "https://a.example/y", 200, "application/json", 10),
make_entry("GET", "https://b.example/z", 200, "application/json", 10),
]));
let arr = rows.as_array().unwrap();
assert_eq!(arr.len(), 2);
assert_eq!(arr[0]["domain"], "a.example");
assert_eq!(arr[0]["count"], 2);
assert_eq!(arr[1]["domain"], "b.example");
assert_eq!(arr[1]["count"], 1);
}

#[test]
fn size_by_type_sums_and_sorts() {
let rows = size_by_type(&indexed(vec![
make_entry("GET", "u", 200, "image/png", 1000),
make_entry("GET", "u", 200, "application/json", 50),
make_entry("GET", "u", 200, "application/json", 150),
]));
let arr = rows.as_array().unwrap();
assert_eq!(arr.len(), 2);
assert_eq!(arr[0]["mime_type"], "image/png");
assert_eq!(arr[0]["total_bytes"], 1000);
assert_eq!(arr[0]["count"], 1);
assert_eq!(arr[1]["mime_type"], "application/json");
assert_eq!(arr[1]["total_bytes"], 200);
assert_eq!(arr[1]["count"], 2);
}

#[test]
fn size_by_type_treats_unknown_mime_as_unknown_bucket() {
let mut e = make_entry("GET", "u", 200, "application/json", 10);
e.response.content.mime_type = None;
let rows = size_by_type(&indexed(vec![e]));
let arr = rows.as_array().unwrap();
assert_eq!(arr[0]["mime_type"], "unknown");
}

#[test]
fn size_by_type_treats_negative_sizes_as_zero() {
let rows = size_by_type(&indexed(vec![make_entry(
"GET",
"u",
200,
"application/json",
-1,
)]));
assert_eq!(rows.as_array().unwrap()[0]["total_bytes"], 0);
}

#[test]
fn redirects_only_includes_3xx() {
let entries = vec![
make_entry("GET", "https://x/home", 200, "text/html", 0),
with_location(
make_entry("GET", "https://x/", 301, "text/html", 0),
"https://x/home",
),
with_location(
make_entry("GET", "https://x/old", 302, "text/html", 0),
"https://x/new",
),
make_entry("GET", "https://x/y", 404, "text/html", 0),
];
let rows = redirects(&indexed(entries));
let arr = rows.as_array().unwrap();
assert_eq!(arr.len(), 2);
assert_eq!(arr[0]["status"], 301);
assert_eq!(arr[0]["id"], 1);
assert_eq!(arr[0]["location"], "https://x/home");
assert_eq!(arr[1]["status"], 302);
}

#[test]
fn redirects_handles_missing_location_header() {
let rows = redirects(&indexed(vec![make_entry(
"GET",
"https://x/",
301,
"text/html",
0,
)]));
let arr = rows.as_array().unwrap();
assert_eq!(arr.len(), 1);
assert_eq!(arr[0]["location"], "");
}
}
24 changes: 24 additions & 0 deletions src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,10 @@ pub struct FilterOptions {
pub header: Option<HeaderFilter>,
pub mime: Option<String>,
pub min_time: Option<f64>,
/// Substring match against request postData.text OR response content.text.
/// Matches if either contains the pattern. Agents fall through to
/// `grep`/`rg` on raw HAR otherwise, which is noisy and unreliable.
pub body_grep: Option<String>,
}

/// Filter entries against the provided options, preserving each entry's
Expand Down Expand Up @@ -165,9 +169,29 @@ fn matches_all(entry: &Entry, opts: &FilterOptions) -> bool {
{
return false;
}
if let Some(ref pat) = opts.body_grep
&& !body_contains(entry, pat)
{
return false;
}
true
}

fn body_contains(entry: &Entry, pat: &str) -> bool {
if let Some(resp_text) = entry.response.content.text.as_deref()
&& resp_text.contains(pat)
{
return true;
}
if let Some(post_data) = &entry.request.post_data
&& let Some(req_text) = post_data.text.as_deref()
&& req_text.contains(pat)
{
return true;
}
false
}

fn has_header(entry: &Entry, hf: &HeaderFilter) -> bool {
entry
.request
Expand Down
Loading