Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 20 additions & 18 deletions pm_bench/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,17 +76,17 @@ def _load_events(name: str) -> list:
"""Return a materialized event list for a dataset.

Supported inputs:
- `synthetic-toy` bundled deterministic generator (seed=42)
- `synthetic-toy@<seed>` same generator at a different seed (e.g.
- `synthetic-toy` -> bundled deterministic generator (seed=42)
- `synthetic-toy@<seed>` -> same generator at a different seed (e.g.
`synthetic-toy@99`). The `@<seed>` suffix is for variance
experiments; canonical leaderboard runs always use bare
`synthetic-toy`.
- any path that looks like a CSV (`.csv` / `.csv.gz` / contains `/`)
loaded via `pm_bench.io.read_csv_log`
- any path ending in `.xes` / `.xes.gz` loaded via
-> loaded via `pm_bench.io.read_csv_log`
- any path ending in `.xes` / `.xes.gz` -> loaded via
`pm_bench.xes.read_xes_log`
- a registry-named dataset whose hash is pinned and cached locally
fetched + parsed (CSV or XES per the registry `format`).
-> fetched + parsed (CSV or XES per the registry `format`).
"""
from pm_bench.io import looks_like_path, read_csv_log
from pm_bench.xes import read_xes_log
Expand Down Expand Up @@ -219,7 +219,7 @@ def _load_split(path: str) -> dict:
"""Load a split JSON, validate shape, exit 2 with a clear message on bad input.

Centralizing the read here means every command that accepts `--split`
fails the same way on the same shapes no one path traceback'ing
fails the same way on the same shapes - no one path traceback'ing
while another exits cleanly.
"""
try:
Expand Down Expand Up @@ -271,7 +271,7 @@ def cmd_list() -> None:
@click.argument("name")
def info(name: str) -> None:
"""Show details for a dataset."""
# `info synthetic-toy@99` should resolve to the base entry every
# `info synthetic-toy@99` should resolve to the base entry - every
# other verb accepts the @<seed> suffix and this one was the
# outlier. Strip the suffix before the registry lookup.
lookup_name = name.split("@", 1)[0] if "@" in name else name
Expand Down Expand Up @@ -312,7 +312,7 @@ def fetch(name: str, pin: bool) -> None:
Auto-downloads when `download_url` is set; otherwise prints
instructions for the manual TOS-gated download path (4TU / Mendeley).
"""
# synthetic-toy@<seed> is a variant of synthetic-toy same "generated
# synthetic-toy@<seed> is a variant of synthetic-toy - same "generated
# on demand, no fetch needed" semantics. Other commands accept the
# @<seed> suffix; we match here for consistency.
if name.startswith("synthetic-toy@") or name == "synthetic-toy":
Expand Down Expand Up @@ -392,6 +392,8 @@ def stats(name: str, top_n: int) -> None:
"latest": s.latest.isoformat() if s.latest else None,
"mean_case_length": s.mean_case_length,
"median_case_length": s.median_case_length,
"min_case_length": s.min_case_length,
"max_case_length": s.max_case_length,
"top_activities": [
{"activity": a, "count": c} for a, c in s.top_activities
],
Expand All @@ -409,7 +411,7 @@ def stats(name: str, top_n: int) -> None:
def split(name: str) -> None:
"""Produce a train/val/test split for a dataset.

The split is task-agnostic every task (next-event, remaining-time,
The split is task-agnostic - every task (next-event, remaining-time,
outcome, bottleneck, conformance) shares the same case-level
chronological partition, which is the whole point of pm-bench. So
this command takes no `--task`; downstream commands (`prefixes`,
Expand Down Expand Up @@ -521,8 +523,8 @@ def prefixes(name: str, split_path: str, out_path: str, partition: str, task: st
default="markov",
show_default=True,
help=(
"markov / uniform next-event; mean / zero remaining-time; "
"prior / global outcome; mean-wait / random bottleneck."
"markov / uniform -> next-event; mean / zero -> remaining-time; "
"prior / global -> outcome; mean-wait / random -> bottleneck."
),
)
@click.option(
Expand Down Expand Up @@ -618,7 +620,7 @@ def predict(
type=click.Choice(["dfg", "empty"]),
default="dfg",
show_default=True,
help="dfg DFG from training cases; empty no transitions (absolute floor).",
help="dfg -> DFG from training cases; empty -> no transitions (absolute floor).",
)
@_runtime_safe
def discover(name: str, split_path: str, out_path: str, baseline: str) -> None:
Expand Down Expand Up @@ -681,8 +683,8 @@ def score(
try:
_score_dispatch(predictions_path, prefixes_path, dataset_name, split_path, task)
except (KeyError, ValueError) as exc:
# KeyError predictions CSV is missing a required column.
# ValueError score function rejected the inputs (length
# KeyError -> predictions CSV is missing a required column.
# ValueError -> score function rejected the inputs (length
# mismatch, empty truth, malformed conformance JSON, etc.).
# In either case it's a clean runtime error, exit 2.
click.echo(str(exc), err=True)
Expand All @@ -708,7 +710,7 @@ def _score_dispatch(
split_data = _load_split(split_path)
truth_dfg = extract_dfg(events, split_data["test"])
# read_model_json may ValueError on bad shape; the outer score()
# try/except (added in the audit cleanup) catches it exit 2.
# try/except (added in the audit cleanup) catches it -> exit 2.
model_dfg = read_model_json(predictions_path)
cs = score_conformance(model_dfg, truth_dfg)
click.echo(
Expand Down Expand Up @@ -969,7 +971,7 @@ def leaderboard(
return

width = max((len(e.model) for e in board.entries), default=10)
click.echo(f"{board.task} · {board.dataset} · {board.metric}")
click.echo(f"{board.task} . {board.dataset} . {board.metric}")
click.echo("-" * (width + 30))
for e in standings(board):
if board.task == "remaining-time":
Expand Down Expand Up @@ -1047,7 +1049,7 @@ def validate(board_path: str, repo_root: str, no_rescore: bool) -> None:
return

# `load_board` re-parses the JSON we already have in `raw`. Pay that
# cost once the file is small and the alternative is leaking the
# cost once - the file is small and the alternative is leaking the
# Board construction into this command.
board = load_board(board_path)
try:
Expand Down Expand Up @@ -1098,7 +1100,7 @@ def compare(board_a: str, board_b: str) -> None:
sys.exit(2)
except ValueError as exc:
# Runtime mismatch (different (task, dataset) on the two files)
# exit 2 per the convention in cli.py: 1 for usage / not-found,
# -> exit 2 per the convention in cli.py: 1 for usage / not-found,
# 2 for runtime errors after args are accepted.
click.echo(str(exc), err=True)
sys.exit(2)
Expand Down
12 changes: 9 additions & 3 deletions pm_bench/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

Useful when inspecting a new dataset - n_cases, n_events, distinct
activity count, time span, top-N most-frequent activities and
transitions, mean / median case length. Pure CPython; runs in the
same process as the rest of pm-bench so it works on `synthetic-toy`,
any CSV path, and (eventually) any cached BPI log.
transitions, mean / median / min / max case length. Pure CPython; runs
in the same process as the rest of pm-bench so it works on
`synthetic-toy`, any CSV path, and (eventually) any cached BPI log.
"""
from __future__ import annotations

Expand All @@ -27,6 +27,8 @@ class LogStats:
latest: datetime | None
mean_case_length: float
median_case_length: float
min_case_length: int
max_case_length: int
top_activities: list[tuple[Activity, int]]
top_transitions: list[tuple[tuple[Activity, Activity], int]]

Expand Down Expand Up @@ -66,6 +68,8 @@ def summarize(events: Iterable[Event], *, top_n: int = 10) -> LogStats:
n_cases = len(by_case)
mean_len = statistics.fmean(case_lengths) if case_lengths else 0.0
median_len = statistics.median(case_lengths) if case_lengths else 0.0
min_len = min(case_lengths) if case_lengths else 0
max_len = max(case_lengths) if case_lengths else 0

return LogStats(
n_events=n_events,
Expand All @@ -76,6 +80,8 @@ def summarize(events: Iterable[Event], *, top_n: int = 10) -> LogStats:
latest=latest,
mean_case_length=mean_len,
median_case_length=median_len,
min_case_length=min_len,
max_case_length=max_len,
top_activities=_top_n_sorted(activity_counts, top_n),
top_transitions=_top_n_sorted(transition_counts, top_n),
)
Expand Down
17 changes: 16 additions & 1 deletion tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ def test_summarize_case_lengths() -> None:
assert s.median_case_length == 2


def test_summarize_min_max_case_lengths() -> None:
s = summarize(_events())
# cases: c1=3 events, c2=2 events, c3=1 event
assert s.min_case_length == 1
assert s.max_case_length == 3


def test_summarize_top_activities_sorted_by_count_desc() -> None:
s = summarize(_events(), top_n=10)
counts = [c for _, c in s.top_activities]
Expand All @@ -45,7 +52,7 @@ def test_summarize_top_activities_sorted_by_count_desc() -> None:
def test_summarize_top_transitions() -> None:
s = summarize(_events())
transitions = {pair: c for pair, c in s.top_transitions}
assert transitions[("a", "b")] == 2 # c1 ab and c2 ab
assert transitions[("a", "b")] == 2 # c1 a->b and c2 a->b
assert transitions[("b", "c")] == 1


Expand All @@ -62,6 +69,12 @@ def test_summarize_empty_log_is_safe() -> None:
assert s.earliest is None


def test_summarize_empty_log_min_max_zero() -> None:
s = summarize([])
assert s.min_case_length == 0
assert s.max_case_length == 0


def test_cli_stats_synthetic_toy() -> None:
runner = CliRunner()
r = runner.invoke(main, ["stats", "synthetic-toy", "--top-n", "3"])
Expand All @@ -70,3 +83,5 @@ def test_cli_stats_synthetic_toy() -> None:
assert data["n_cases"] == 200
assert data["n_events"] == 965
assert len(data["top_activities"]) == 3
assert "min_case_length" in data
assert "max_case_length" in data
Loading