Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions docs/runtime-telemetry-history.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,12 @@ normal same-condition comparability gate passes.

Runtime telemetry coverage context is copied into
`runtime_telemetry_context.<baseline|candidate>.telemetry_coverage` and, when
provided through the history artifact, `history_telemetry_coverage`. This makes
coverage gaps visible to Lab or AIGuard consumers without allowing coverage to
override EdgeEnv's comparability-first regression policy.
provided through the history artifact, `history_telemetry_coverage`. The history
artifact also exposes a producer-side `telemetry_coverage` summary with
`run_summaries` and `missing_field_runs`, so Lab or AIGuard consumers can reuse
EdgeEnv's replay summary instead of recomputing coverage gaps. This makes
coverage gaps visible downstream without allowing coverage to override
EdgeEnv's comparability-first regression policy.

Replay edge cases are preserved as evidence context:

Expand Down
4 changes: 4 additions & 0 deletions inferedge_env/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,10 @@ def inspect_runtime_telemetry_history_command(
f"{', '.join(coverage.get('missing_fields', [])) or '-'}",
soft_wrap=True,
)
console.print(
"Telemetry coverage missing field runs: "
f"{coverage.get('missing_field_run_count', 0)}"
)
console.print(
"Orchestrator context runs: "
f"{len(replay.get('orchestrator_context_run_ids', []))}"
Expand Down
3 changes: 3 additions & 0 deletions inferedge_env/compare/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,9 @@ def _maybe_runtime_telemetry_context(
"schema_version": telemetry_history.get("schema_version"),
"summary": telemetry_history.get("summary", {}),
}
telemetry_coverage = telemetry_history.get("telemetry_coverage")
if isinstance(telemetry_coverage, dict):
context["history"]["telemetry_coverage"] = telemetry_coverage
return context


Expand Down
55 changes: 51 additions & 4 deletions inferedge_env/result/telemetry_history.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def build_runtime_telemetry_history(
)
)
missing.sort(key=lambda item: item["run_id"])
telemetry_coverage = _telemetry_coverage_summary(entries)
return {
"schema_version": RUNTIME_TELEMETRY_HISTORY_SCHEMA_VERSION,
"generated_at": generated.isoformat(),
Expand All @@ -82,6 +83,7 @@ def build_runtime_telemetry_history(
"missing_telemetry_runs": len(missing),
"orchestrator_feed_runs": len(orchestrator_contexts),
},
"telemetry_coverage": telemetry_coverage,
"runs": entries,
"missing_telemetry": missing,
"notes": [
Expand Down Expand Up @@ -158,6 +160,13 @@ def validate_runtime_telemetry_history(
raise RuntimeTelemetryHistoryError(
f"Runtime telemetry history missing_telemetry must be a list: {label}"
)
if "telemetry_coverage" in payload and not isinstance(
payload.get("telemetry_coverage"),
dict,
):
raise RuntimeTelemetryHistoryError(
f"Runtime telemetry history telemetry_coverage must be an object: {label}"
)
for index, entry in enumerate(payload["runs"]):
if not isinstance(entry, dict):
raise RuntimeTelemetryHistoryError(
Expand Down Expand Up @@ -200,6 +209,9 @@ def inspect_runtime_telemetry_history(payload: dict[str, Any]) -> dict[str, Any]
numeric_sequence_ids = [
value for value in sequence_ids if isinstance(value, (int, float))
]
telemetry_coverage = payload.get("telemetry_coverage")
if not isinstance(telemetry_coverage, dict):
telemetry_coverage = _telemetry_coverage_summary(runs)
return {
"schema_version": payload["schema_version"],
"valid": True,
Expand All @@ -208,7 +220,7 @@ def inspect_runtime_telemetry_history(payload: dict[str, Any]) -> dict[str, Any]
"replay": {
"run_ids": run_ids,
"telemetry_fields": _telemetry_fields(runs),
"telemetry_coverage": _telemetry_coverage_summary(runs),
"telemetry_coverage": telemetry_coverage,
"orchestrator_context_run_ids": [
entry["run_id"]
for entry in runs
Expand Down Expand Up @@ -398,6 +410,8 @@ def _telemetry_coverage_summary(entries: list[dict[str, Any]]) -> dict[str, Any]
missing_fields: set[str] = set()
ratios: list[float] = []
missing_telemetry_failure_values: set[bool] = set()
run_summaries: list[dict[str, Any]] = []
missing_field_runs: list[dict[str, Any]] = []

for entry in entries:
telemetry = entry.get("runtime_telemetry")
Expand All @@ -406,19 +420,48 @@ def _telemetry_coverage_summary(entries: list[dict[str, Any]]) -> dict[str, Any]
coverage = telemetry.get("coverage")
if not isinstance(coverage, dict):
continue
run_id = entry.get("run_id")
run_id_value = run_id if isinstance(run_id, str) else ""
coverage_entries.append(coverage)
expected_fields.update(_string_items(coverage.get("expected_fields")))
observed_fields.update(_string_items(coverage.get("observed_fields")))
missing_fields.update(_string_items(coverage.get("missing_fields")))
expected = _string_items(coverage.get("expected_fields"))
observed = _string_items(coverage.get("observed_fields"))
missing = _string_items(coverage.get("missing_fields"))
expected_fields.update(expected)
observed_fields.update(observed)
missing_fields.update(missing)
ratio = coverage.get("coverage_ratio")
ratio_value = float(ratio) if isinstance(ratio, (int, float)) else None
if isinstance(ratio, (int, float)):
ratios.append(float(ratio))
missing_telemetry_is_failure = coverage.get("missing_telemetry_is_failure")
if isinstance(missing_telemetry_is_failure, bool):
missing_telemetry_failure_values.add(missing_telemetry_is_failure)
run_summary = {
"run_id": run_id_value,
"coverage_present": True,
"expected_fields": sorted(expected),
"observed_fields": sorted(observed),
"missing_fields": sorted(missing),
"expected_field_count": coverage.get("expected_field_count"),
"observed_field_count": coverage.get("observed_field_count"),
"missing_field_count": coverage.get("missing_field_count"),
"coverage_ratio": ratio_value,
"missing_telemetry_is_failure": missing_telemetry_is_failure,
}
run_summaries.append(run_summary)
if missing:
missing_field_runs.append(
{
"run_id": run_id_value,
"missing_fields": sorted(missing),
"missing_field_count": len(missing),
"missing_telemetry_is_failure": missing_telemetry_is_failure,
}
)

return {
"runs_with_coverage": len(coverage_entries),
"runs_without_coverage": max(len(entries) - len(coverage_entries), 0),
"expected_fields": sorted(expected_fields),
"observed_fields": sorted(observed_fields),
"missing_fields": sorted(missing_fields),
Expand All @@ -427,6 +470,10 @@ def _telemetry_coverage_summary(entries: list[dict[str, Any]]) -> dict[str, Any]
"missing_telemetry_is_failure_values": sorted(
missing_telemetry_failure_values
),
"any_missing_telemetry_is_failure": any(missing_telemetry_failure_values),
"missing_field_run_count": len(missing_field_runs),
"missing_field_runs": missing_field_runs,
"run_summaries": run_summaries,
}


Expand Down
32 changes: 32 additions & 0 deletions tests/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,30 @@ def test_regression_attaches_runtime_telemetry_history_context(
"telemetry_runs": 2,
"missing_telemetry_runs": 0,
},
"telemetry_coverage": {
"runs_with_coverage": 1,
"runs_without_coverage": 1,
"expected_fields": [
"gpu_temperature",
"queue_depth",
"telemetry_timestamp",
],
"observed_fields": ["gpu_temperature", "telemetry_timestamp"],
"missing_fields": ["queue_depth"],
"coverage_ratio_min": 0.666667,
"coverage_ratio_max": 0.666667,
"missing_telemetry_is_failure_values": [False],
"any_missing_telemetry_is_failure": False,
"missing_field_run_count": 1,
"missing_field_runs": [
{
"run_id": "candidate",
"missing_fields": ["queue_depth"],
"missing_field_count": 1,
"missing_telemetry_is_failure": False,
}
],
},
"runs": [
{
"run_id": "baseline",
Expand Down Expand Up @@ -133,6 +157,14 @@ def test_regression_attaches_runtime_telemetry_history_context(
assert context["history"]["schema_version"] == (
"edgeenv.runtime-telemetry-history.v1"
)
assert context["history"]["telemetry_coverage"]["missing_field_runs"] == [
{
"run_id": "candidate",
"missing_fields": ["queue_depth"],
"missing_field_count": 1,
"missing_telemetry_is_failure": False,
}
]
assert context["baseline"]["result_telemetry_present"] is True
assert context["baseline"]["history_entry_present"] is True
assert context["candidate"]["execution_sequence_id"] == 2
Expand Down
73 changes: 73 additions & 0 deletions tests/test_runtime_telemetry_history.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,28 @@ def test_build_runtime_telemetry_history_records_entries_and_missing_gaps(
"comparability_owner": "edgeenv",
"missing_telemetry_is_failure": False,
}
assert payload["telemetry_coverage"]["missing_field_runs"] == [
{
"run_id": "run-with-telemetry",
"missing_fields": ["queue_depth"],
"missing_field_count": 1,
"missing_telemetry_is_failure": False,
}
]
assert payload["telemetry_coverage"]["run_summaries"] == [
{
"run_id": "run-with-telemetry",
"coverage_present": True,
"expected_fields": ["gpu_temperature", "queue_depth"],
"observed_fields": ["gpu_temperature"],
"missing_fields": ["queue_depth"],
"expected_field_count": 2,
"observed_field_count": 1,
"missing_field_count": 1,
"coverage_ratio": 0.5,
"missing_telemetry_is_failure": False,
}
]
assert payload["runs"][0]["protocol"]["repeat_runs"] == 10
assert payload["missing_telemetry"] == [
{
Expand Down Expand Up @@ -299,12 +321,55 @@ def test_inspect_runtime_telemetry_history_reports_replay_summary(
assert "operation" in summary["replay"]["telemetry_fields"]
assert summary["replay"]["telemetry_coverage"] == {
"runs_with_coverage": 2,
"runs_without_coverage": 0,
"expected_fields": ["gpu_temperature", "queue_depth"],
"observed_fields": ["gpu_temperature"],
"missing_fields": ["queue_depth"],
"coverage_ratio_min": 0.5,
"coverage_ratio_max": 0.5,
"missing_telemetry_is_failure_values": [False],
"any_missing_telemetry_is_failure": False,
"missing_field_run_count": 2,
"missing_field_runs": [
{
"run_id": "run-a",
"missing_fields": ["queue_depth"],
"missing_field_count": 1,
"missing_telemetry_is_failure": False,
},
{
"run_id": "run-b",
"missing_fields": ["queue_depth"],
"missing_field_count": 1,
"missing_telemetry_is_failure": False,
},
],
"run_summaries": [
{
"run_id": "run-a",
"coverage_present": True,
"expected_fields": ["gpu_temperature", "queue_depth"],
"observed_fields": ["gpu_temperature"],
"missing_fields": ["queue_depth"],
"expected_field_count": 2,
"observed_field_count": 1,
"missing_field_count": 1,
"coverage_ratio": 0.5,
"missing_telemetry_is_failure": False,
},
{
"run_id": "run-b",
"coverage_present": True,
"expected_fields": ["gpu_temperature", "queue_depth"],
"observed_fields": ["gpu_temperature"],
"missing_fields": ["queue_depth"],
"expected_field_count": 2,
"observed_field_count": 1,
"missing_field_count": 1,
"coverage_ratio": 0.5,
"missing_telemetry_is_failure": False,
},
],
}
assert summary["replay"]["orchestrator_context_run_ids"] == []
assert "not production monitoring" in summary["notes"][2]
Expand Down Expand Up @@ -466,6 +531,14 @@ def test_cli_runs_telemetry_inspect_history_json_output(
assert payload["replay"]["telemetry_coverage"]["missing_fields"] == [
"queue_depth"
]
assert payload["replay"]["telemetry_coverage"]["missing_field_runs"] == [
{
"run_id": "run-cli-json",
"missing_fields": ["queue_depth"],
"missing_field_count": 1,
"missing_telemetry_is_failure": False,
}
]


def _write_registered_run(
Expand Down
Loading