Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ Reads from stdin if no file is given.
| `--mime <SUBSTRING>` | `--mime application/json` (matches `application/json; charset=utf-8` too) |
| `--min-time <MS>` | `--min-time 500` |
| `--body-grep <SUBSTRING>` | Match against request or response body text (case-sensitive). |
| `--body-regex <REGEX>` | Regex match against request or response body text. Use `(?i)pattern` for case-insensitive. |

Filters combine with AND logic.

Expand All @@ -70,7 +71,7 @@ Filters combine with AND logic.
| `--domains` | Emit `[{domain, count}]` sorted by count desc. Respects filters. |
| `--size-by-type` | Emit `[{mime_type, total_bytes, count}]` sorted by total_bytes desc. Respects filters. |
| `--redirects` | Emit `[{id, url, status, location}]` for every 3xx entry. Respects filters. |
| `--entry <N>` | Fetch a single entry by id (its original 0-indexed position in the HAR). Returns a JSON object, not an array. |
| `--entry <N>` | Fetch a single entry by id (its original 0-indexed position in the HAR). Returns a JSON object, not an array. As a direct lookup, `--entry` conflicts with every filter flag; combine them and the command errors at parse time. |
| `--no-body` | Exclude all request/response body text. |
| `--include-all-bodies` | Include bodies for static-asset MIME types (CSS/JS/images/fonts/WASM). By default those are stripped to save tokens. |

Expand All @@ -84,6 +85,7 @@ Static-asset response bodies (images, fonts, CSS, JS, WASM, video, audio) are st
|------|-------------|
| `--validate` | Validate HAR only, no query |
| `-v`, `--verbose` | Print parsing info to stderr |
| `--help-llm` | Print a compact, LLM-tuned cheatsheet of every flag (~1.5 KB vs ~3.5 KB for `--help`) and exit. |

### Exit codes

Expand Down Expand Up @@ -120,6 +122,10 @@ hargrep --redirects recording.har # all 3xx + Location h

# Body search that actually knows about HAR schema
hargrep --body-grep 'session expired' --fields id,url,status recording.har
hargrep --body-regex '(?i)timeout|deadline' --status-range 5xx recording.har

# Compact flag reference for LLM agents (~1.5 KB vs ~3.5 KB for --help)
hargrep --help-llm

# Validate before processing
hargrep --validate untrusted.har
Expand Down
23 changes: 23 additions & 0 deletions src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ pub struct FilterOptions {
/// Matches if either contains the pattern. Agents fall through to
/// `grep`/`rg` on raw HAR otherwise, which is noisy and unreliable.
pub body_grep: Option<String>,
/// Regex variant of `body_grep`. Compiled at CLI parse time (invalid
/// patterns error with exit code 2 before any file is read).
pub body_regex: Option<Regex>,
}

/// Filter entries against the provided options, preserving each entry's
Expand Down Expand Up @@ -174,9 +177,29 @@ fn matches_all(entry: &Entry, opts: &FilterOptions) -> bool {
{
return false;
}
if let Some(ref re) = opts.body_regex
&& !body_matches_regex(entry, re)
{
return false;
}
true
}

fn body_matches_regex(entry: &Entry, re: &Regex) -> bool {
if let Some(resp_text) = entry.response.content.text.as_deref()
&& re.is_match(resp_text)
{
return true;
}
if let Some(post_data) = &entry.request.post_data
&& let Some(req_text) = post_data.text.as_deref()
&& re.is_match(req_text)
{
return true;
}
false
}

fn body_contains(entry: &Entry, pat: &str) -> bool {
if let Some(resp_text) = entry.response.content.text.as_deref()
&& resp_text.contains(pat)
Expand Down
55 changes: 54 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ struct Cli {
#[arg(long)]
body_grep: Option<String>,

/// Filter by regex match against request or response body text.
/// Use `(?i)pattern` for case-insensitive matching.
#[arg(long)]
body_regex: Option<Regex>,
Comment on lines +61 to +62

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Declare --body-regex incompatible with --entry

The new --body-regex option is defined as a filter, but it can still be passed with --entry; in run, the --entry path returns before any filters are applied, so the regex is silently ignored (for example, --entry 0 --body-regex '^nope$' still returns entry 0). This creates surprising behavior for the new flag and should be prevented by adding a conflict with --entry at argument definition time.

Useful? React with 👍 / 👎.


/// Output format
#[arg(long, value_enum, default_value_t = OutputFormat::Json, conflicts_with = "count")]
output: OutputFormat,
Expand Down Expand Up @@ -112,7 +117,7 @@ struct Cli {
conflicts_with_all = [
"count", "fields", "output",
"method", "status", "status_range", "url", "url_regex",
"header", "mime", "min_time",
"header", "mime", "min_time", "body_grep", "body_regex",
]
)]
entry: Option<usize>,
Expand All @@ -131,6 +136,12 @@ struct Cli {
#[arg(long)]
validate: bool,

/// Print a compact, LLM-tuned cheatsheet of every flag and exit. Unlike
/// `--help`, this omits clap's formatting and examples so an agent pays
/// a few hundred tokens instead of a few thousand for the reference.
#[arg(long)]
help_llm: bool,

/// Show parsing info on stderr
#[arg(short, long)]
verbose: bool,
Expand All @@ -139,7 +150,48 @@ struct Cli {
file: Option<PathBuf>,
}

const HELP_LLM: &str = "\
hargrep — HAR query CLI. Reads FILE (or stdin).

FILTERS (AND-combined):
--method GET|POST|... --status CODE
--status-range 4xx|200-299 --url SUBSTR
--url-regex REGEX --header 'NAME[:VALUE]'
--mime SUBSTR --min-time MS
--body-grep SUBSTR --body-regex REGEX

OUTPUT (mutually exclusive):
(default) Filtered entries as JSON (pretty in TTY, compact when piped).
--output json|jsonl|summary
--fields F,F,... id,url,method,status,status-text,time,mime-type,started-date-time
--count Matching entry count.
--overview {entries,status,methods,mime_types,top_domains,total_body_size_bytes,total_time_ms}
--domains [{domain,count}] sorted by count desc.
--size-by-type [{mime_type,total_bytes,count}] sorted by total_bytes desc.
--redirects [{id,url,status,location}] for every 3xx.
--entry N One entry by id (original 0-indexed HAR position).

BODY:
(default) Keep JSON/HTML/XML/text; strip CSS/JS/images/fonts/WASM.
--no-body Strip ALL body text.
--include-all-bodies Keep ALL bodies, including static assets.

UTIL: --validate -v/--verbose --help --help-llm --version

Every entry output includes `id` (stable across filters). Agent flow:
hargrep --overview FILE
hargrep --status-range 5xx --fields id,url,status FILE # list
hargrep --entry N FILE # drill in

EXIT: 0=matches 1=no matches 2=error (bad args, invalid HAR, IO).
";

fn run(cli: Cli) -> Result<i32> {
if cli.help_llm {
print!("{HELP_LLM}");
return Ok(0);
}

let raw = input::read_input(cli.file.as_deref())?;

let har: har::Har = serde_json::from_str(&raw).map_err(|e| {
Expand Down Expand Up @@ -189,6 +241,7 @@ fn run(cli: Cli) -> Result<i32> {
mime: cli.mime,
min_time: cli.min_time,
body_grep: cli.body_grep,
body_regex: cli.body_regex,
};

let filtered = filter::filter_entries(har.log.entries, &filter_opts);
Expand Down
92 changes: 92 additions & 0 deletions tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,96 @@ fn test_body_grep_composes_with_other_filters() {
assert_eq!(parsed[0]["request"]["method"], "POST");
}

#[test]
fn test_body_regex_matches_response_body() {
let (stdout, _, _) = hargrep(&[
"--body-regex",
r#""name":\s*"Al\w+""#,
"tests/fixtures/valid.har",
]);
let parsed: Vec<serde_json::Value> = serde_json::from_str(&stdout).unwrap();
assert_eq!(parsed.len(), 1);
assert_eq!(parsed[0]["id"], 1);
}

#[test]
fn test_body_regex_matches_request_post_body() {
let (stdout, _, _) = hargrep(&["--body-regex", "Al.ce", "tests/fixtures/valid.har"]);
let parsed: Vec<serde_json::Value> = serde_json::from_str(&stdout).unwrap();
assert!(parsed.iter().any(|e| e["id"] == 1));
}

#[test]
fn test_body_regex_invalid_pattern_errors_at_parse() {
let (_, stderr, code) = hargrep(&["--body-regex", "[unclosed", "tests/fixtures/valid.har"]);
assert_eq!(code, 2);
assert!(
stderr.to_lowercase().contains("body-regex"),
"expected body-regex error, got: {stderr}"
);
}

#[test]
fn test_body_regex_composes_with_body_grep_as_and() {
// Both flags set: entry must match BOTH (AND, like all other filters).
let (_, _, code) = hargrep(&[
"--body-grep",
"Alice",
"--body-regex",
"^no_match_$",
"tests/fixtures/valid.har",
]);
assert_eq!(code, 1);
}

// --- --help-llm ---

#[test]
fn test_help_llm_emits_compact_cheatsheet() {
let (stdout, _, code) = hargrep(&["--help-llm"]);
assert_eq!(code, 0);
// Must fit in roughly one screen; serves LLM agents, not humans.
assert!(
stdout.len() < 2000,
"--help-llm output should be compact (<2KB); got {} bytes",
stdout.len()
);
// Sanity: lists every top-level flag category we want an agent to know.
for needle in [
"--method",
"--status",
"--status-range",
"--url",
"--mime",
"--body-grep",
"--body-regex",
"--count",
"--overview",
"--domains",
"--size-by-type",
"--redirects",
"--entry",
"--fields",
"--output",
"--no-body",
"--include-all-bodies",
] {
assert!(
stdout.contains(needle),
"--help-llm missing {needle:?}; output:\n{stdout}"
);
}
// Exit codes should be documented.
assert!(stdout.contains('0') && stdout.contains('1') && stdout.contains('2'));
}

#[test]
fn test_help_llm_does_not_require_a_file() {
// --help-llm is a self-contained info flag, like --help.
let (_, _, code) = hargrep(&["--help-llm"]);
assert_eq!(code, 0);
}

// --- --overview dashboard ---

#[test]
Expand Down Expand Up @@ -668,6 +758,8 @@ fn test_entry_flag_conflicts_with_filter_flags() {
&["--mime", "json"],
&["--min-time", "100"],
&["--header", "Authorization"],
&["--body-grep", "Alice"],
&["--body-regex", "Al.ce"],
];
for filter_args in cases {
let mut args = vec!["--entry", "0"];
Expand Down
Loading