diff --git a/pm_bench/cli.py b/pm_bench/cli.py index b50a547..b86f1fd 100644 --- a/pm_bench/cli.py +++ b/pm_bench/cli.py @@ -76,17 +76,17 @@ def _load_events(name: str) -> list: """Return a materialized event list for a dataset. Supported inputs: - - `synthetic-toy` → bundled deterministic generator (seed=42) - - `synthetic-toy@` → same generator at a different seed (e.g. + - `synthetic-toy` -> bundled deterministic generator (seed=42) + - `synthetic-toy@` -> same generator at a different seed (e.g. `synthetic-toy@99`). The `@` suffix is for variance experiments; canonical leaderboard runs always use bare `synthetic-toy`. - any path that looks like a CSV (`.csv` / `.csv.gz` / contains `/`) - → loaded via `pm_bench.io.read_csv_log` - - any path ending in `.xes` / `.xes.gz` → loaded via + -> loaded via `pm_bench.io.read_csv_log` + - any path ending in `.xes` / `.xes.gz` -> loaded via `pm_bench.xes.read_xes_log` - a registry-named dataset whose hash is pinned and cached locally - → fetched + parsed (CSV or XES per the registry `format`). + -> fetched + parsed (CSV or XES per the registry `format`). """ from pm_bench.io import looks_like_path, read_csv_log from pm_bench.xes import read_xes_log @@ -219,7 +219,7 @@ def _load_split(path: str) -> dict: """Load a split JSON, validate shape, exit 2 with a clear message on bad input. Centralizing the read here means every command that accepts `--split` - fails the same way on the same shapes — no one path traceback'ing + fails the same way on the same shapes - no one path traceback'ing while another exits cleanly. """ try: @@ -271,7 +271,7 @@ def cmd_list() -> None: @click.argument("name") def info(name: str) -> None: """Show details for a dataset.""" - # `info synthetic-toy@99` should resolve to the base entry — every + # `info synthetic-toy@99` should resolve to the base entry - every # other verb accepts the @ suffix and this one was the # outlier. Strip the suffix before the registry lookup. lookup_name = name.split("@", 1)[0] if "@" in name else name @@ -312,7 +312,7 @@ def fetch(name: str, pin: bool) -> None: Auto-downloads when `download_url` is set; otherwise prints instructions for the manual TOS-gated download path (4TU / Mendeley). """ - # synthetic-toy@ is a variant of synthetic-toy — same "generated + # synthetic-toy@ is a variant of synthetic-toy - same "generated # on demand, no fetch needed" semantics. Other commands accept the # @ suffix; we match here for consistency. if name.startswith("synthetic-toy@") or name == "synthetic-toy": @@ -392,6 +392,8 @@ def stats(name: str, top_n: int) -> None: "latest": s.latest.isoformat() if s.latest else None, "mean_case_length": s.mean_case_length, "median_case_length": s.median_case_length, + "min_case_length": s.min_case_length, + "max_case_length": s.max_case_length, "top_activities": [ {"activity": a, "count": c} for a, c in s.top_activities ], @@ -409,7 +411,7 @@ def stats(name: str, top_n: int) -> None: def split(name: str) -> None: """Produce a train/val/test split for a dataset. - The split is task-agnostic — every task (next-event, remaining-time, + The split is task-agnostic - every task (next-event, remaining-time, outcome, bottleneck, conformance) shares the same case-level chronological partition, which is the whole point of pm-bench. So this command takes no `--task`; downstream commands (`prefixes`, @@ -521,8 +523,8 @@ def prefixes(name: str, split_path: str, out_path: str, partition: str, task: st default="markov", show_default=True, help=( - "markov / uniform → next-event; mean / zero → remaining-time; " - "prior / global → outcome; mean-wait / random → bottleneck." + "markov / uniform -> next-event; mean / zero -> remaining-time; " + "prior / global -> outcome; mean-wait / random -> bottleneck." ), ) @click.option( @@ -618,7 +620,7 @@ def predict( type=click.Choice(["dfg", "empty"]), default="dfg", show_default=True, - help="dfg → DFG from training cases; empty → no transitions (absolute floor).", + help="dfg -> DFG from training cases; empty -> no transitions (absolute floor).", ) @_runtime_safe def discover(name: str, split_path: str, out_path: str, baseline: str) -> None: @@ -681,8 +683,8 @@ def score( try: _score_dispatch(predictions_path, prefixes_path, dataset_name, split_path, task) except (KeyError, ValueError) as exc: - # KeyError → predictions CSV is missing a required column. - # ValueError → score function rejected the inputs (length + # KeyError -> predictions CSV is missing a required column. + # ValueError -> score function rejected the inputs (length # mismatch, empty truth, malformed conformance JSON, etc.). # In either case it's a clean runtime error, exit 2. click.echo(str(exc), err=True) @@ -708,7 +710,7 @@ def _score_dispatch( split_data = _load_split(split_path) truth_dfg = extract_dfg(events, split_data["test"]) # read_model_json may ValueError on bad shape; the outer score() - # try/except (added in the audit cleanup) catches it → exit 2. + # try/except (added in the audit cleanup) catches it -> exit 2. model_dfg = read_model_json(predictions_path) cs = score_conformance(model_dfg, truth_dfg) click.echo( @@ -969,7 +971,7 @@ def leaderboard( return width = max((len(e.model) for e in board.entries), default=10) - click.echo(f"{board.task} · {board.dataset} · {board.metric}") + click.echo(f"{board.task} . {board.dataset} . {board.metric}") click.echo("-" * (width + 30)) for e in standings(board): if board.task == "remaining-time": @@ -1047,7 +1049,7 @@ def validate(board_path: str, repo_root: str, no_rescore: bool) -> None: return # `load_board` re-parses the JSON we already have in `raw`. Pay that - # cost once — the file is small and the alternative is leaking the + # cost once - the file is small and the alternative is leaking the # Board construction into this command. board = load_board(board_path) try: @@ -1098,7 +1100,7 @@ def compare(board_a: str, board_b: str) -> None: sys.exit(2) except ValueError as exc: # Runtime mismatch (different (task, dataset) on the two files) - # → exit 2 per the convention in cli.py: 1 for usage / not-found, + # -> exit 2 per the convention in cli.py: 1 for usage / not-found, # 2 for runtime errors after args are accepted. click.echo(str(exc), err=True) sys.exit(2) diff --git a/pm_bench/stats.py b/pm_bench/stats.py index bcd3f48..8091058 100644 --- a/pm_bench/stats.py +++ b/pm_bench/stats.py @@ -2,9 +2,9 @@ Useful when inspecting a new dataset - n_cases, n_events, distinct activity count, time span, top-N most-frequent activities and -transitions, mean / median case length. Pure CPython; runs in the -same process as the rest of pm-bench so it works on `synthetic-toy`, -any CSV path, and (eventually) any cached BPI log. +transitions, mean / median / min / max case length. Pure CPython; runs +in the same process as the rest of pm-bench so it works on +`synthetic-toy`, any CSV path, and (eventually) any cached BPI log. """ from __future__ import annotations @@ -27,6 +27,8 @@ class LogStats: latest: datetime | None mean_case_length: float median_case_length: float + min_case_length: int + max_case_length: int top_activities: list[tuple[Activity, int]] top_transitions: list[tuple[tuple[Activity, Activity], int]] @@ -66,6 +68,8 @@ def summarize(events: Iterable[Event], *, top_n: int = 10) -> LogStats: n_cases = len(by_case) mean_len = statistics.fmean(case_lengths) if case_lengths else 0.0 median_len = statistics.median(case_lengths) if case_lengths else 0.0 + min_len = min(case_lengths) if case_lengths else 0 + max_len = max(case_lengths) if case_lengths else 0 return LogStats( n_events=n_events, @@ -76,6 +80,8 @@ def summarize(events: Iterable[Event], *, top_n: int = 10) -> LogStats: latest=latest, mean_case_length=mean_len, median_case_length=median_len, + min_case_length=min_len, + max_case_length=max_len, top_activities=_top_n_sorted(activity_counts, top_n), top_transitions=_top_n_sorted(transition_counts, top_n), ) diff --git a/tests/test_stats.py b/tests/test_stats.py index a9884a6..ca4df7b 100644 --- a/tests/test_stats.py +++ b/tests/test_stats.py @@ -35,6 +35,13 @@ def test_summarize_case_lengths() -> None: assert s.median_case_length == 2 +def test_summarize_min_max_case_lengths() -> None: + s = summarize(_events()) + # cases: c1=3 events, c2=2 events, c3=1 event + assert s.min_case_length == 1 + assert s.max_case_length == 3 + + def test_summarize_top_activities_sorted_by_count_desc() -> None: s = summarize(_events(), top_n=10) counts = [c for _, c in s.top_activities] @@ -45,7 +52,7 @@ def test_summarize_top_activities_sorted_by_count_desc() -> None: def test_summarize_top_transitions() -> None: s = summarize(_events()) transitions = {pair: c for pair, c in s.top_transitions} - assert transitions[("a", "b")] == 2 # c1 a→b and c2 a→b + assert transitions[("a", "b")] == 2 # c1 a->b and c2 a->b assert transitions[("b", "c")] == 1 @@ -62,6 +69,12 @@ def test_summarize_empty_log_is_safe() -> None: assert s.earliest is None +def test_summarize_empty_log_min_max_zero() -> None: + s = summarize([]) + assert s.min_case_length == 0 + assert s.max_case_length == 0 + + def test_cli_stats_synthetic_toy() -> None: runner = CliRunner() r = runner.invoke(main, ["stats", "synthetic-toy", "--top-n", "3"]) @@ -70,3 +83,5 @@ def test_cli_stats_synthetic_toy() -> None: assert data["n_cases"] == 200 assert data["n_events"] == 965 assert len(data["top_activities"]) == 3 + assert "min_case_length" in data + assert "max_case_length" in data