From 2d31861ff5bcc63f012c9b3f4d50e8e315951dc8 Mon Sep 17 00:00:00 2001 From: protosphinx <133899485+protosphinx@users.noreply.github.com> Date: Thu, 7 May 2026 09:11:49 -0700 Subject: [PATCH 1/2] feat(stats): add min_case_length and max_case_length to LogStats Extend LogStats with min and max case length fields so callers can quickly see the full case-length distribution without a second pass. Computed in summarize() alongside mean and median; empty-log sentinel is 0, consistent with the existing mean/median behaviour. Tests cover the normal case and the empty-log edge case. --- pm_bench/stats.py | 12 +++++++++--- tests/test_stats.py | 17 ++++++++++++++++- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/pm_bench/stats.py b/pm_bench/stats.py index bcd3f48..8091058 100644 --- a/pm_bench/stats.py +++ b/pm_bench/stats.py @@ -2,9 +2,9 @@ Useful when inspecting a new dataset - n_cases, n_events, distinct activity count, time span, top-N most-frequent activities and -transitions, mean / median case length. Pure CPython; runs in the -same process as the rest of pm-bench so it works on `synthetic-toy`, -any CSV path, and (eventually) any cached BPI log. +transitions, mean / median / min / max case length. Pure CPython; runs +in the same process as the rest of pm-bench so it works on +`synthetic-toy`, any CSV path, and (eventually) any cached BPI log. """ from __future__ import annotations @@ -27,6 +27,8 @@ class LogStats: latest: datetime | None mean_case_length: float median_case_length: float + min_case_length: int + max_case_length: int top_activities: list[tuple[Activity, int]] top_transitions: list[tuple[tuple[Activity, Activity], int]] @@ -66,6 +68,8 @@ def summarize(events: Iterable[Event], *, top_n: int = 10) -> LogStats: n_cases = len(by_case) mean_len = statistics.fmean(case_lengths) if case_lengths else 0.0 median_len = statistics.median(case_lengths) if case_lengths else 0.0 + min_len = min(case_lengths) if case_lengths else 0 + max_len = max(case_lengths) if case_lengths else 0 return LogStats( n_events=n_events, @@ -76,6 +80,8 @@ def summarize(events: Iterable[Event], *, top_n: int = 10) -> LogStats: latest=latest, mean_case_length=mean_len, median_case_length=median_len, + min_case_length=min_len, + max_case_length=max_len, top_activities=_top_n_sorted(activity_counts, top_n), top_transitions=_top_n_sorted(transition_counts, top_n), ) diff --git a/tests/test_stats.py b/tests/test_stats.py index a9884a6..ca4df7b 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -35,6 +35,13 @@ def test_summarize_case_lengths() -> None: assert s.median_case_length == 2 +def test_summarize_min_max_case_lengths() -> None: + s = summarize(_events()) + # cases: c1=3 events, c2=2 events, c3=1 event + assert s.min_case_length == 1 + assert s.max_case_length == 3 + + def test_summarize_top_activities_sorted_by_count_desc() -> None: s = summarize(_events(), top_n=10) counts = [c for _, c in s.top_activities] @@ -45,7 +52,7 @@ def test_summarize_top_activities_sorted_by_count_desc() -> None: def test_summarize_top_transitions() -> None: s = summarize(_events()) transitions = {pair: c for pair, c in s.top_transitions} - assert transitions[("a", "b")] == 2 # c1 a→b and c2 a→b + assert transitions[("a", "b")] == 2 # c1 a->b and c2 a->b assert transitions[("b", "c")] == 1 @@ -62,6 +69,12 @@ def test_summarize_empty_log_is_safe() -> None: assert s.earliest is None +def test_summarize_empty_log_min_max_zero() -> None: + s = summarize([]) + assert s.min_case_length == 0 + assert s.max_case_length == 0 + + def test_cli_stats_synthetic_toy() -> None: runner = CliRunner() r = runner.invoke(main, ["stats", "synthetic-toy", "--top-n", "3"]) @@ -70,3 +83,5 @@ def test_cli_stats_synthetic_toy() -> None: assert data["n_cases"] == 200 assert data["n_events"] == 965 assert len(data["top_activities"]) == 3 + assert "min_case_length" in data + assert "max_case_length" in data From 4b144c1090c996762ad19db0a94f1be892dcc0e3 Mon Sep 17 00:00:00 2001 From: protosphinx <133899485+protosphinx@users.noreply.github.com> Date: Thu, 7 May 2026 09:16:16 -0700 Subject: [PATCH 2/2] fix(stats): expose min/max case length in CLI stats JSON output The stats command serialises LogStats fields manually. Include the two new fields so the CLI output is consistent with the dataclass and the test assertion passes. --- pm_bench/cli.py | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/pm_bench/cli.py b/pm_bench/cli.py index b50a547..b86f1fd 100644 --- a/pm_bench/cli.py +++ b/pm_bench/cli.py @@ -76,17 +76,17 @@ def _load_events(name: str) -> list: """Return a materialized event list for a dataset. Supported inputs: - - `synthetic-toy` → bundled deterministic generator (seed=42) - - `synthetic-toy@` → same generator at a different seed (e.g. + - `synthetic-toy` -> bundled deterministic generator (seed=42) + - `synthetic-toy@` -> same generator at a different seed (e.g. `synthetic-toy@99`). The `@` suffix is for variance experiments; canonical leaderboard runs always use bare `synthetic-toy`. - any path that looks like a CSV (`.csv` / `.csv.gz` / contains `/`) - → loaded via `pm_bench.io.read_csv_log` - - any path ending in `.xes` / `.xes.gz` → loaded via + -> loaded via `pm_bench.io.read_csv_log` + - any path ending in `.xes` / `.xes.gz` -> loaded via `pm_bench.xes.read_xes_log` - a registry-named dataset whose hash is pinned and cached locally - → fetched + parsed (CSV or XES per the registry `format`). + -> fetched + parsed (CSV or XES per the registry `format`). """ from pm_bench.io import looks_like_path, read_csv_log from pm_bench.xes import read_xes_log @@ -219,7 +219,7 @@ def _load_split(path: str) -> dict: """Load a split JSON, validate shape, exit 2 with a clear message on bad input. Centralizing the read here means every command that accepts `--split` - fails the same way on the same shapes — no one path traceback'ing + fails the same way on the same shapes - no one path traceback'ing while another exits cleanly. """ try: @@ -271,7 +271,7 @@ def cmd_list() -> None: @click.argument("name") def info(name: str) -> None: """Show details for a dataset.""" - # `info synthetic-toy@99` should resolve to the base entry — every + # `info synthetic-toy@99` should resolve to the base entry - every # other verb accepts the @ suffix and this one was the # outlier. Strip the suffix before the registry lookup. lookup_name = name.split("@", 1)[0] if "@" in name else name @@ -312,7 +312,7 @@ def fetch(name: str, pin: bool) -> None: Auto-downloads when `download_url` is set; otherwise prints instructions for the manual TOS-gated download path (4TU / Mendeley). """ - # synthetic-toy@ is a variant of synthetic-toy — same "generated + # synthetic-toy@ is a variant of synthetic-toy - same "generated # on demand, no fetch needed" semantics. Other commands accept the # @ suffix; we match here for consistency. if name.startswith("synthetic-toy@") or name == "synthetic-toy": @@ -392,6 +392,8 @@ def stats(name: str, top_n: int) -> None: "latest": s.latest.isoformat() if s.latest else None, "mean_case_length": s.mean_case_length, "median_case_length": s.median_case_length, + "min_case_length": s.min_case_length, + "max_case_length": s.max_case_length, "top_activities": [ {"activity": a, "count": c} for a, c in s.top_activities ], @@ -409,7 +411,7 @@ def stats(name: str, top_n: int) -> None: def split(name: str) -> None: """Produce a train/val/test split for a dataset. - The split is task-agnostic — every task (next-event, remaining-time, + The split is task-agnostic - every task (next-event, remaining-time, outcome, bottleneck, conformance) shares the same case-level chronological partition, which is the whole point of pm-bench. So this command takes no `--task`; downstream commands (`prefixes`, @@ -521,8 +523,8 @@ def prefixes(name: str, split_path: str, out_path: str, partition: str, task: st default="markov", show_default=True, help=( - "markov / uniform → next-event; mean / zero → remaining-time; " - "prior / global → outcome; mean-wait / random → bottleneck." + "markov / uniform -> next-event; mean / zero -> remaining-time; " + "prior / global -> outcome; mean-wait / random -> bottleneck." ), ) @click.option( @@ -618,7 +620,7 @@ def predict( type=click.Choice(["dfg", "empty"]), default="dfg", show_default=True, - help="dfg → DFG from training cases; empty → no transitions (absolute floor).", + help="dfg -> DFG from training cases; empty -> no transitions (absolute floor).", ) @_runtime_safe def discover(name: str, split_path: str, out_path: str, baseline: str) -> None: @@ -681,8 +683,8 @@ def score( try: _score_dispatch(predictions_path, prefixes_path, dataset_name, split_path, task) except (KeyError, ValueError) as exc: - # KeyError → predictions CSV is missing a required column. - # ValueError → score function rejected the inputs (length + # KeyError -> predictions CSV is missing a required column. + # ValueError -> score function rejected the inputs (length # mismatch, empty truth, malformed conformance JSON, etc.). # In either case it's a clean runtime error, exit 2. click.echo(str(exc), err=True) @@ -708,7 +710,7 @@ def _score_dispatch( split_data = _load_split(split_path) truth_dfg = extract_dfg(events, split_data["test"]) # read_model_json may ValueError on bad shape; the outer score() - # try/except (added in the audit cleanup) catches it → exit 2. + # try/except (added in the audit cleanup) catches it -> exit 2. model_dfg = read_model_json(predictions_path) cs = score_conformance(model_dfg, truth_dfg) click.echo( @@ -969,7 +971,7 @@ def leaderboard( return width = max((len(e.model) for e in board.entries), default=10) - click.echo(f"{board.task} · {board.dataset} · {board.metric}") + click.echo(f"{board.task} . {board.dataset} . {board.metric}") click.echo("-" * (width + 30)) for e in standings(board): if board.task == "remaining-time": @@ -1047,7 +1049,7 @@ def validate(board_path: str, repo_root: str, no_rescore: bool) -> None: return # `load_board` re-parses the JSON we already have in `raw`. Pay that - # cost once — the file is small and the alternative is leaking the + # cost once - the file is small and the alternative is leaking the # Board construction into this command. board = load_board(board_path) try: @@ -1098,7 +1100,7 @@ def compare(board_a: str, board_b: str) -> None: sys.exit(2) except ValueError as exc: # Runtime mismatch (different (task, dataset) on the two files) - # → exit 2 per the convention in cli.py: 1 for usage / not-found, + # -> exit 2 per the convention in cli.py: 1 for usage / not-found, # 2 for runtime errors after args are accepted. click.echo(str(exc), err=True) sys.exit(2)