diff --git a/docs/runtime-telemetry-history.md b/docs/runtime-telemetry-history.md index acda04d..61dc07e 100644 --- a/docs/runtime-telemetry-history.md +++ b/docs/runtime-telemetry-history.md @@ -161,9 +161,12 @@ normal same-condition comparability gate passes. Runtime telemetry coverage context is copied into `runtime_telemetry_context..telemetry_coverage` and, when -provided through the history artifact, `history_telemetry_coverage`. This makes -coverage gaps visible to Lab or AIGuard consumers without allowing coverage to -override EdgeEnv's comparability-first regression policy. +provided through the history artifact, `history_telemetry_coverage`. The history +artifact also exposes a producer-side `telemetry_coverage` summary with +`run_summaries` and `missing_field_runs`, so Lab or AIGuard consumers can reuse +EdgeEnv's replay summary instead of recomputing coverage gaps. This makes +coverage gaps visible downstream without allowing coverage to override +EdgeEnv's comparability-first regression policy. Replay edge cases are preserved as evidence context: diff --git a/inferedge_env/cli.py b/inferedge_env/cli.py index 96e5767..53666f6 100644 --- a/inferedge_env/cli.py +++ b/inferedge_env/cli.py @@ -455,6 +455,10 @@ def inspect_runtime_telemetry_history_command( f"{', '.join(coverage.get('missing_fields', [])) or '-'}", soft_wrap=True, ) + console.print( + "Telemetry coverage missing field runs: " + f"{coverage.get('missing_field_run_count', 0)}" + ) console.print( "Orchestrator context runs: " f"{len(replay.get('orchestrator_context_run_ids', []))}" diff --git a/inferedge_env/compare/regression.py b/inferedge_env/compare/regression.py index eee9428..3eac7e1 100644 --- a/inferedge_env/compare/regression.py +++ b/inferedge_env/compare/regression.py @@ -252,6 +252,9 @@ def _maybe_runtime_telemetry_context( "schema_version": telemetry_history.get("schema_version"), "summary": telemetry_history.get("summary", {}), } + telemetry_coverage = telemetry_history.get("telemetry_coverage") + if isinstance(telemetry_coverage, dict): + context["history"]["telemetry_coverage"] = telemetry_coverage return context diff --git a/inferedge_env/result/telemetry_history.py b/inferedge_env/result/telemetry_history.py index 0c5a1ba..280f0e5 100644 --- a/inferedge_env/result/telemetry_history.py +++ b/inferedge_env/result/telemetry_history.py @@ -68,6 +68,7 @@ def build_runtime_telemetry_history( ) ) missing.sort(key=lambda item: item["run_id"]) + telemetry_coverage = _telemetry_coverage_summary(entries) return { "schema_version": RUNTIME_TELEMETRY_HISTORY_SCHEMA_VERSION, "generated_at": generated.isoformat(), @@ -82,6 +83,7 @@ def build_runtime_telemetry_history( "missing_telemetry_runs": len(missing), "orchestrator_feed_runs": len(orchestrator_contexts), }, + "telemetry_coverage": telemetry_coverage, "runs": entries, "missing_telemetry": missing, "notes": [ @@ -158,6 +160,13 @@ def validate_runtime_telemetry_history( raise RuntimeTelemetryHistoryError( f"Runtime telemetry history missing_telemetry must be a list: {label}" ) + if "telemetry_coverage" in payload and not isinstance( + payload.get("telemetry_coverage"), + dict, + ): + raise RuntimeTelemetryHistoryError( + f"Runtime telemetry history telemetry_coverage must be an object: {label}" + ) for index, entry in enumerate(payload["runs"]): if not isinstance(entry, dict): raise RuntimeTelemetryHistoryError( @@ -200,6 +209,9 @@ def inspect_runtime_telemetry_history(payload: dict[str, Any]) -> dict[str, Any] numeric_sequence_ids = [ value for value in sequence_ids if isinstance(value, (int, float)) ] + telemetry_coverage = payload.get("telemetry_coverage") + if not isinstance(telemetry_coverage, dict): + telemetry_coverage = _telemetry_coverage_summary(runs) return { "schema_version": payload["schema_version"], "valid": True, @@ -208,7 +220,7 @@ def inspect_runtime_telemetry_history(payload: dict[str, Any]) -> dict[str, Any] "replay": { "run_ids": run_ids, "telemetry_fields": _telemetry_fields(runs), - "telemetry_coverage": _telemetry_coverage_summary(runs), + "telemetry_coverage": telemetry_coverage, "orchestrator_context_run_ids": [ entry["run_id"] for entry in runs @@ -398,6 +410,8 @@ def _telemetry_coverage_summary(entries: list[dict[str, Any]]) -> dict[str, Any] missing_fields: set[str] = set() ratios: list[float] = [] missing_telemetry_failure_values: set[bool] = set() + run_summaries: list[dict[str, Any]] = [] + missing_field_runs: list[dict[str, Any]] = [] for entry in entries: telemetry = entry.get("runtime_telemetry") @@ -406,19 +420,48 @@ def _telemetry_coverage_summary(entries: list[dict[str, Any]]) -> dict[str, Any] coverage = telemetry.get("coverage") if not isinstance(coverage, dict): continue + run_id = entry.get("run_id") + run_id_value = run_id if isinstance(run_id, str) else "" coverage_entries.append(coverage) - expected_fields.update(_string_items(coverage.get("expected_fields"))) - observed_fields.update(_string_items(coverage.get("observed_fields"))) - missing_fields.update(_string_items(coverage.get("missing_fields"))) + expected = _string_items(coverage.get("expected_fields")) + observed = _string_items(coverage.get("observed_fields")) + missing = _string_items(coverage.get("missing_fields")) + expected_fields.update(expected) + observed_fields.update(observed) + missing_fields.update(missing) ratio = coverage.get("coverage_ratio") + ratio_value = float(ratio) if isinstance(ratio, (int, float)) else None if isinstance(ratio, (int, float)): ratios.append(float(ratio)) missing_telemetry_is_failure = coverage.get("missing_telemetry_is_failure") if isinstance(missing_telemetry_is_failure, bool): missing_telemetry_failure_values.add(missing_telemetry_is_failure) + run_summary = { + "run_id": run_id_value, + "coverage_present": True, + "expected_fields": sorted(expected), + "observed_fields": sorted(observed), + "missing_fields": sorted(missing), + "expected_field_count": coverage.get("expected_field_count"), + "observed_field_count": coverage.get("observed_field_count"), + "missing_field_count": coverage.get("missing_field_count"), + "coverage_ratio": ratio_value, + "missing_telemetry_is_failure": missing_telemetry_is_failure, + } + run_summaries.append(run_summary) + if missing: + missing_field_runs.append( + { + "run_id": run_id_value, + "missing_fields": sorted(missing), + "missing_field_count": len(missing), + "missing_telemetry_is_failure": missing_telemetry_is_failure, + } + ) return { "runs_with_coverage": len(coverage_entries), + "runs_without_coverage": max(len(entries) - len(coverage_entries), 0), "expected_fields": sorted(expected_fields), "observed_fields": sorted(observed_fields), "missing_fields": sorted(missing_fields), @@ -427,6 +470,10 @@ def _telemetry_coverage_summary(entries: list[dict[str, Any]]) -> dict[str, Any] "missing_telemetry_is_failure_values": sorted( missing_telemetry_failure_values ), + "any_missing_telemetry_is_failure": any(missing_telemetry_failure_values), + "missing_field_run_count": len(missing_field_runs), + "missing_field_runs": missing_field_runs, + "run_summaries": run_summaries, } diff --git a/tests/test_regression.py b/tests/test_regression.py index 2fba3bc..65c7d38 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -105,6 +105,30 @@ def test_regression_attaches_runtime_telemetry_history_context( "telemetry_runs": 2, "missing_telemetry_runs": 0, }, + "telemetry_coverage": { + "runs_with_coverage": 1, + "runs_without_coverage": 1, + "expected_fields": [ + "gpu_temperature", + "queue_depth", + "telemetry_timestamp", + ], + "observed_fields": ["gpu_temperature", "telemetry_timestamp"], + "missing_fields": ["queue_depth"], + "coverage_ratio_min": 0.666667, + "coverage_ratio_max": 0.666667, + "missing_telemetry_is_failure_values": [False], + "any_missing_telemetry_is_failure": False, + "missing_field_run_count": 1, + "missing_field_runs": [ + { + "run_id": "candidate", + "missing_fields": ["queue_depth"], + "missing_field_count": 1, + "missing_telemetry_is_failure": False, + } + ], + }, "runs": [ { "run_id": "baseline", @@ -133,6 +157,14 @@ def test_regression_attaches_runtime_telemetry_history_context( assert context["history"]["schema_version"] == ( "edgeenv.runtime-telemetry-history.v1" ) + assert context["history"]["telemetry_coverage"]["missing_field_runs"] == [ + { + "run_id": "candidate", + "missing_fields": ["queue_depth"], + "missing_field_count": 1, + "missing_telemetry_is_failure": False, + } + ] assert context["baseline"]["result_telemetry_present"] is True assert context["baseline"]["history_entry_present"] is True assert context["candidate"]["execution_sequence_id"] == 2 diff --git a/tests/test_runtime_telemetry_history.py b/tests/test_runtime_telemetry_history.py index c7cea3f..5271b62 100644 --- a/tests/test_runtime_telemetry_history.py +++ b/tests/test_runtime_telemetry_history.py @@ -77,6 +77,28 @@ def test_build_runtime_telemetry_history_records_entries_and_missing_gaps( "comparability_owner": "edgeenv", "missing_telemetry_is_failure": False, } + assert payload["telemetry_coverage"]["missing_field_runs"] == [ + { + "run_id": "run-with-telemetry", + "missing_fields": ["queue_depth"], + "missing_field_count": 1, + "missing_telemetry_is_failure": False, + } + ] + assert payload["telemetry_coverage"]["run_summaries"] == [ + { + "run_id": "run-with-telemetry", + "coverage_present": True, + "expected_fields": ["gpu_temperature", "queue_depth"], + "observed_fields": ["gpu_temperature"], + "missing_fields": ["queue_depth"], + "expected_field_count": 2, + "observed_field_count": 1, + "missing_field_count": 1, + "coverage_ratio": 0.5, + "missing_telemetry_is_failure": False, + } + ] assert payload["runs"][0]["protocol"]["repeat_runs"] == 10 assert payload["missing_telemetry"] == [ { @@ -299,12 +321,55 @@ def test_inspect_runtime_telemetry_history_reports_replay_summary( assert "operation" in summary["replay"]["telemetry_fields"] assert summary["replay"]["telemetry_coverage"] == { "runs_with_coverage": 2, + "runs_without_coverage": 0, "expected_fields": ["gpu_temperature", "queue_depth"], "observed_fields": ["gpu_temperature"], "missing_fields": ["queue_depth"], "coverage_ratio_min": 0.5, "coverage_ratio_max": 0.5, "missing_telemetry_is_failure_values": [False], + "any_missing_telemetry_is_failure": False, + "missing_field_run_count": 2, + "missing_field_runs": [ + { + "run_id": "run-a", + "missing_fields": ["queue_depth"], + "missing_field_count": 1, + "missing_telemetry_is_failure": False, + }, + { + "run_id": "run-b", + "missing_fields": ["queue_depth"], + "missing_field_count": 1, + "missing_telemetry_is_failure": False, + }, + ], + "run_summaries": [ + { + "run_id": "run-a", + "coverage_present": True, + "expected_fields": ["gpu_temperature", "queue_depth"], + "observed_fields": ["gpu_temperature"], + "missing_fields": ["queue_depth"], + "expected_field_count": 2, + "observed_field_count": 1, + "missing_field_count": 1, + "coverage_ratio": 0.5, + "missing_telemetry_is_failure": False, + }, + { + "run_id": "run-b", + "coverage_present": True, + "expected_fields": ["gpu_temperature", "queue_depth"], + "observed_fields": ["gpu_temperature"], + "missing_fields": ["queue_depth"], + "expected_field_count": 2, + "observed_field_count": 1, + "missing_field_count": 1, + "coverage_ratio": 0.5, + "missing_telemetry_is_failure": False, + }, + ], } assert summary["replay"]["orchestrator_context_run_ids"] == [] assert "not production monitoring" in summary["notes"][2] @@ -466,6 +531,14 @@ def test_cli_runs_telemetry_inspect_history_json_output( assert payload["replay"]["telemetry_coverage"]["missing_fields"] == [ "queue_depth" ] + assert payload["replay"]["telemetry_coverage"]["missing_field_runs"] == [ + { + "run_id": "run-cli-json", + "missing_fields": ["queue_depth"], + "missing_field_count": 1, + "missing_telemetry_is_failure": False, + } + ] def _write_registered_run(