From cafc60e3a566bb920e24e80329c767a6e3906d21 Mon Sep 17 00:00:00 2001
From: hyeokjun32 <ksjm0417@naver.com>
Date: Sat, 23 May 2026 22:29:54 +0900
Subject: [PATCH] Preserve runtime telemetry coverage context

---
 README.md                                 |  4 ++
 docs/ko/README.md                         |  5 ++-
 docs/runtime-telemetry-history.md         | 26 +++++++++++--
 inferedge_env/cli.py                      | 36 ++++++++++++++++++
 inferedge_env/compare/regression.py       | 29 ++++++++++++++
 inferedge_env/result/telemetry_history.py | 46 +++++++++++++++++++++++
 tests/test_regression.py                  | 46 ++++++++++++++++++++++-
 tests/test_runtime_telemetry_history.py   | 39 +++++++++++++++++++
 8 files changed, 225 insertions(+), 6 deletions(-)
diff --git a/README.md b/README.md
index 6888105..77b1fdb 100644
--- a/README.md
+++ b/README.md
@@ -313,6 +313,10 @@ Use `edgeenv runs telemetry inspect-history <path>` to validate and summarize
 that replay artifact before attaching it to a regression report. The intended
 local flow is export history, inspect the replay artifact, then pass it to
 `report regression --telemetry-history`.
+If Runtime includes `runtime_telemetry.coverage`, EdgeEnv preserves it in the
+history artifact and inspect summary as evidence quality metadata. Missing
+coverage fields are visible as coverage gaps, but they do not fail the run or
+change comparability.
 
 `report regression` reuses the same comparability gate. It only computes
 mean/p95/p99/FPS/resource deltas for `Comparable: Yes` with
diff --git a/docs/ko/README.md b/docs/ko/README.md
index 480ef60..6709865 100644
--- a/docs/ko/README.md
+++ b/docs/ko/README.md
@@ -80,7 +80,10 @@ runtime telemetry history artifact가 있으면 `--telemetry-history`로 연결
 report에 telemetry coverage와 evidence gap을 보조 context로 첨부할 수 있다.
 이 context는 same-condition comparability gate를 우회하지 않는다.
 `edgeenv runs telemetry inspect-history <path>`로 history artifact의 schema,
-replay run, telemetry field, evidence gap을 먼저 확인할 수 있다.
+replay run, telemetry field, coverage metadata, evidence gap을 먼저 확인할 수
+있다. Runtime이 `runtime_telemetry.coverage`를 제공하면 EdgeEnv는 이를
+evidence quality metadata로 보존하지만, coverage 누락을 run 실패나 regression
+judgement로 승격하지 않는다.
 
 ## EdgeEnv가 아닌 것
 
diff --git a/docs/runtime-telemetry-history.md b/docs/runtime-telemetry-history.md
index de1039d..acda04d 100644
--- a/docs/runtime-telemetry-history.md
+++ b/docs/runtime-telemetry-history.md
@@ -37,6 +37,9 @@ The payload is intentionally additive and minimally validated:
 - it must be a JSON object
 - `schema_version`, when present, must be a string
 - unknown telemetry fields are preserved instead of normalized into registry columns
+- Runtime telemetry `coverage`, when present, is preserved as evidence quality
+  metadata. Missing coverage fields are treated as an evidence gap, not as a
+  failed run or comparability failure.
 
 When present, EdgeEnv stores the payload in two places:
 
@@ -95,7 +98,16 @@ The history artifact uses this top-level shape:
     {
       "run_id": "run-20260522-000000-12345678",
       "runtime_telemetry": {
-        "schema_version": "inferedge-runtime-telemetry-v1"
+        "schema_version": "inferedge-runtime-telemetry-v1",
+        "coverage": {
+          "schema_version": "inferedge-runtime-telemetry-coverage-v1",
+          "expected_fields": ["queue_depth", "gpu_temperature"],
+          "observed_fields": ["gpu_temperature"],
+          "missing_fields": ["queue_depth"],
+          "coverage_ratio": 0.5,
+          "comparability_owner": "edgeenv",
+          "missing_telemetry_is_failure": false
+        }
       },
       "orchestrator_operation_context": {
         "schema_version": "inferedge-orchestrator-edgeenv-runtime-telemetry-feed-v1",
@@ -117,8 +129,9 @@ This is a replay dataset seed. It records evidence gaps explicitly and does not
 
 `inspect-history` is a read-only validation step for that seed artifact. It
 checks the schema, summarizes replay run IDs, available telemetry fields,
-execution sequence IDs, and missing telemetry evidence gaps. It does not mutate
-the registry, change comparability judgement, or act as a monitoring alert.
+execution sequence IDs, telemetry coverage metadata, and missing telemetry
+evidence gaps. It does not mutate the registry, change comparability judgement,
+or act as a monitoring alert.
 
 Regression reports can attach this artifact as supplemental context:
 
@@ -146,6 +159,12 @@ The regression report records telemetry coverage and evidence gaps for the
 baseline/candidate pair. It still calculates regression deltas only after the
 normal same-condition comparability gate passes.
 
+Runtime telemetry coverage context is copied into
+`runtime_telemetry_context.<baseline|candidate>.telemetry_coverage` and, when
+provided through the history artifact, `history_telemetry_coverage`. This makes
+coverage gaps visible to Lab or AIGuard consumers without allowing coverage to
+override EdgeEnv's comparability-first regression policy.
+
 Replay edge cases are preserved as evidence context:
 
 - If the compared candidate is missing runtime telemetry, the regression report
@@ -195,6 +214,7 @@ and Lab remains the deployment decision owner.
 
 - Do not make runtime telemetry required for a successful run.
 - Do not treat missing telemetry as a comparability failure.
+- Do not treat missing telemetry coverage fields as a regression judgement.
 - Do not add telemetry columns to `runs.db` before a query/report requirement is proven.
 - Do not describe this as production observability, cloud monitoring, distributed tracing, or real-time data drift detection.
 - Do not use telemetry to bypass the existing comparability-first regression policy.
diff --git a/inferedge_env/cli.py b/inferedge_env/cli.py
index bce368c..96e5767 100644
--- a/inferedge_env/cli.py
+++ b/inferedge_env/cli.py
@@ -444,6 +444,17 @@ def inspect_runtime_telemetry_history_command(
     console.print(f"Schema: {summary['schema_version']}")
     console.print(f"Replay runs: {len(replay['run_ids'])}")
     console.print(f"Telemetry fields: {', '.join(replay['telemetry_fields']) or '-'}")
+    coverage = replay.get("telemetry_coverage", {})
+    if isinstance(coverage, dict):
+        console.print(
+            "Telemetry coverage runs: "
+            f"{coverage.get('runs_with_coverage', 0)}"
+        )
+        console.print(
+            "Telemetry coverage missing fields: "
+            f"{', '.join(coverage.get('missing_fields', [])) or '-'}",
+            soft_wrap=True,
+        )
     console.print(
         "Orchestrator context runs: "
         f"{len(replay.get('orchestrator_context_run_ids', []))}"
@@ -1159,6 +1170,7 @@ def _print_runtime_telemetry_context(context: dict[str, Any]) -> None:
             f"history={str(bool(baseline.get('history_entry_present'))).lower()}",
             soft_wrap=True,
         )
+        _print_runtime_telemetry_coverage("baseline", baseline)
     if isinstance(candidate, dict):
         console.print(
             "- candidate: "
@@ -1166,6 +1178,7 @@ def _print_runtime_telemetry_context(context: dict[str, Any]) -> None:
             f"history={str(bool(candidate.get('history_entry_present'))).lower()}",
             soft_wrap=True,
         )
+        _print_runtime_telemetry_coverage("candidate", candidate)
     gaps = context.get("evidence_gaps", [])
     if gaps:
         console.print("- evidence_gaps:")
@@ -1181,6 +1194,29 @@ def _print_runtime_telemetry_context(context: dict[str, Any]) -> None:
     console.print("- role: supplemental context, not a comparability gate")
 
 
+def _print_runtime_telemetry_coverage(
+    label: str,
+    context: dict[str, Any],
+) -> None:
+    coverage = context.get("telemetry_coverage")
+    if not isinstance(coverage, dict):
+        coverage = context.get("history_telemetry_coverage")
+    if not isinstance(coverage, dict):
+        return
+    missing_fields = coverage.get("missing_fields")
+    if not isinstance(missing_fields, list):
+        missing_fields = []
+    console.print(
+        f"- {label} coverage: "
+        f"observed={coverage.get('observed_field_count', '-')}/"
+        f"{coverage.get('expected_field_count', '-')}, "
+        f"missing={', '.join(str(item) for item in missing_fields) or '-'}, "
+        f"missing_is_failure="
+        f"{str(bool(coverage.get('missing_telemetry_is_failure'))).lower()}",
+        soft_wrap=True,
+    )
+
+
 def _metric_delta_line(field: str, left: float, right: float, unit: str) -> str:
     delta = right - left
     percent = _format_percent_delta(left, delta)
diff --git a/inferedge_env/compare/regression.py b/inferedge_env/compare/regression.py
index 6212ec5..eee9428 100644
--- a/inferedge_env/compare/regression.py
+++ b/inferedge_env/compare/regression.py
@@ -315,6 +315,9 @@ def _telemetry_run_context(
                 ),
             }
         )
+        coverage = _telemetry_coverage_context(telemetry.get("coverage"))
+        if coverage is not None:
+            context["telemetry_coverage"] = coverage
     if history_entry is not None:
         context["history_telemetry_timestamp"] = history_entry.get(
             "telemetry_timestamp"
@@ -322,6 +325,11 @@ def _telemetry_run_context(
         context["history_execution_sequence_id"] = history_entry.get(
             "execution_sequence_id"
         )
+        history_telemetry = history_entry.get("runtime_telemetry")
+        if isinstance(history_telemetry, dict):
+            coverage = _telemetry_coverage_context(history_telemetry.get("coverage"))
+            if coverage is not None:
+                context["history_telemetry_coverage"] = coverage
         _attach_orchestrator_context(context, history_entry)
     if missing_entry is not None:
         context["history_missing_reason"] = missing_entry.get("reason")
@@ -354,6 +362,27 @@ def _telemetry_source(telemetry: dict[str, Any]) -> str | None:
     return source if isinstance(source, str) else None
 
 
+def _telemetry_coverage_context(value: Any) -> dict[str, Any] | None:
+    if not isinstance(value, dict):
+        return None
+    context: dict[str, Any] = {}
+    for key in (
+        "schema_version",
+        "expected_fields",
+        "observed_fields",
+        "missing_fields",
+        "expected_field_count",
+        "observed_field_count",
+        "missing_field_count",
+        "coverage_ratio",
+        "comparability_owner",
+        "missing_telemetry_is_failure",
+    ):
+        if key in value:
+            context[key] = value[key]
+    return context
+
+
 def _telemetry_gaps(context: dict[str, Any]) -> list[dict[str, str]]:
     gaps: list[dict[str, str]] = []
     if not context["result_telemetry_present"]:
diff --git a/inferedge_env/result/telemetry_history.py b/inferedge_env/result/telemetry_history.py
index 816cee3..0c5a1ba 100644
--- a/inferedge_env/result/telemetry_history.py
+++ b/inferedge_env/result/telemetry_history.py
@@ -208,6 +208,7 @@ def inspect_runtime_telemetry_history(payload: dict[str, Any]) -> dict[str, Any]
         "replay": {
             "run_ids": run_ids,
             "telemetry_fields": _telemetry_fields(runs),
+            "telemetry_coverage": _telemetry_coverage_summary(runs),
             "orchestrator_context_run_ids": [
                 entry["run_id"]
                 for entry in runs
@@ -390,5 +391,50 @@ def _telemetry_fields(entries: list[dict[str, Any]]) -> list[str]:
     return sorted(fields)
 
 
+def _telemetry_coverage_summary(entries: list[dict[str, Any]]) -> dict[str, Any]:
+    coverage_entries: list[dict[str, Any]] = []
+    expected_fields: set[str] = set()
+    observed_fields: set[str] = set()
+    missing_fields: set[str] = set()
+    ratios: list[float] = []
+    missing_telemetry_failure_values: set[bool] = set()
+
+    for entry in entries:
+        telemetry = entry.get("runtime_telemetry")
+        if not isinstance(telemetry, dict):
+            continue
+        coverage = telemetry.get("coverage")
+        if not isinstance(coverage, dict):
+            continue
+        coverage_entries.append(coverage)
+        expected_fields.update(_string_items(coverage.get("expected_fields")))
+        observed_fields.update(_string_items(coverage.get("observed_fields")))
+        missing_fields.update(_string_items(coverage.get("missing_fields")))
+        ratio = coverage.get("coverage_ratio")
+        if isinstance(ratio, (int, float)):
+            ratios.append(float(ratio))
+        missing_telemetry_is_failure = coverage.get("missing_telemetry_is_failure")
+        if isinstance(missing_telemetry_is_failure, bool):
+            missing_telemetry_failure_values.add(missing_telemetry_is_failure)
+
+    return {
+        "runs_with_coverage": len(coverage_entries),
+        "expected_fields": sorted(expected_fields),
+        "observed_fields": sorted(observed_fields),
+        "missing_fields": sorted(missing_fields),
+        "coverage_ratio_min": min(ratios) if ratios else None,
+        "coverage_ratio_max": max(ratios) if ratios else None,
+        "missing_telemetry_is_failure_values": sorted(
+            missing_telemetry_failure_values
+        ),
+    }
+
+
+def _string_items(value: Any) -> list[str]:
+    if not isinstance(value, list):
+        return []
+    return [item for item in value if isinstance(item, str)]
+
+
 def _is_monotonic(values: list[int | float]) -> bool:
     return all(left <= right for left, right in zip(values, values[1:]))
diff --git a/tests/test_regression.py b/tests/test_regression.py
index 90c2989..2fba3bc 100644
--- a/tests/test_regression.py
+++ b/tests/test_regression.py
@@ -92,7 +92,10 @@ def test_regression_attaches_runtime_telemetry_history_context(
             p95=125.0,
             p99=135.0,
             fps=48.0,
-            runtime_telemetry=_runtime_telemetry(sequence_id=2),
+            runtime_telemetry=_runtime_telemetry(
+                sequence_id=2,
+                missing_fields=["queue_depth"],
+            ),
         ),
     )
     telemetry_history = {
@@ -112,6 +115,7 @@ def test_regression_attaches_runtime_telemetry_history_context(
                 "run_id": "candidate",
                 "telemetry_timestamp": "2026-05-22T00:00:02Z",
                 "execution_sequence_id": 2,
+                "runtime_telemetry": candidate.runtime_telemetry,
             },
         ],
         "missing_telemetry": [],
@@ -132,6 +136,25 @@ def test_regression_attaches_runtime_telemetry_history_context(
     assert context["baseline"]["result_telemetry_present"] is True
     assert context["baseline"]["history_entry_present"] is True
     assert context["candidate"]["execution_sequence_id"] == 2
+    assert context["candidate"]["telemetry_coverage"] == {
+        "schema_version": "inferedge-runtime-telemetry-coverage-v1",
+        "expected_fields": [
+            "gpu_temperature",
+            "queue_depth",
+            "telemetry_timestamp",
+        ],
+        "observed_fields": ["gpu_temperature", "telemetry_timestamp"],
+        "missing_fields": ["queue_depth"],
+        "expected_field_count": 3,
+        "observed_field_count": 2,
+        "missing_field_count": 1,
+        "coverage_ratio": 0.666667,
+        "comparability_owner": "edgeenv",
+        "missing_telemetry_is_failure": False,
+    }
+    assert context["candidate"]["history_telemetry_coverage"]["missing_fields"] == [
+        "queue_depth"
+    ]
     assert context["evidence_gaps"] == []
     assert report.evidence["mean_delta_pct"] == 12.0
 
@@ -859,7 +882,14 @@ def _write_registered_run(
     RunRegistry(edgeenv_root / "runs.db").insert(result, run_dir / "result.json")
 
 
-def _runtime_telemetry(sequence_id: int) -> dict:
+def _runtime_telemetry(
+    sequence_id: int,
+    *,
+    missing_fields: list[str] | None = None,
+) -> dict:
+    missing = missing_fields or []
+    expected_fields = ["gpu_temperature", "queue_depth", "telemetry_timestamp"]
+    observed_fields = [field for field in expected_fields if field not in missing]
     return {
         "schema_version": "inferedge-runtime-telemetry-v1",
         "telemetry_timestamp": f"2026-05-22T00:00:0{sequence_id}Z",
@@ -874,6 +904,18 @@ def _runtime_telemetry(sequence_id: int) -> dict:
         "operation": {
             "timeout_observed": False,
         },
+        "coverage": {
+            "schema_version": "inferedge-runtime-telemetry-coverage-v1",
+            "expected_fields": expected_fields,
+            "observed_fields": observed_fields,
+            "missing_fields": missing,
+            "expected_field_count": len(expected_fields),
+            "observed_field_count": len(observed_fields),
+            "missing_field_count": len(missing),
+            "coverage_ratio": round(len(observed_fields) / len(expected_fields), 6),
+            "comparability_owner": "edgeenv",
+            "missing_telemetry_is_failure": False,
+        },
     }
 
 
diff --git a/tests/test_runtime_telemetry_history.py b/tests/test_runtime_telemetry_history.py
index 68416bc..c7cea3f 100644
--- a/tests/test_runtime_telemetry_history.py
+++ b/tests/test_runtime_telemetry_history.py
@@ -65,6 +65,18 @@ def test_build_runtime_telemetry_history_records_entries_and_missing_gaps(
     assert payload["runs"][0]["runtime_telemetry"]["resource"] == {
         "telemetry_source": "runtime-result"
     }
+    assert payload["runs"][0]["runtime_telemetry"]["coverage"] == {
+        "schema_version": "inferedge-runtime-telemetry-coverage-v1",
+        "expected_fields": ["queue_depth", "gpu_temperature"],
+        "observed_fields": ["gpu_temperature"],
+        "missing_fields": ["queue_depth"],
+        "expected_field_count": 2,
+        "observed_field_count": 1,
+        "missing_field_count": 1,
+        "coverage_ratio": 0.5,
+        "comparability_owner": "edgeenv",
+        "missing_telemetry_is_failure": False,
+    }
     assert payload["runs"][0]["protocol"]["repeat_runs"] == 10
     assert payload["missing_telemetry"] == [
         {
@@ -285,6 +297,15 @@ def test_inspect_runtime_telemetry_history_reports_replay_summary(
     assert summary["replay"]["missing_run_ids"] == ["run-without-telemetry"]
     assert "latency" in summary["replay"]["telemetry_fields"]
     assert "operation" in summary["replay"]["telemetry_fields"]
+    assert summary["replay"]["telemetry_coverage"] == {
+        "runs_with_coverage": 2,
+        "expected_fields": ["gpu_temperature", "queue_depth"],
+        "observed_fields": ["gpu_temperature"],
+        "missing_fields": ["queue_depth"],
+        "coverage_ratio_min": 0.5,
+        "coverage_ratio_max": 0.5,
+        "missing_telemetry_is_failure_values": [False],
+    }
     assert summary["replay"]["orchestrator_context_run_ids"] == []
     assert "not production monitoring" in summary["notes"][2]
 
@@ -398,6 +419,8 @@ def test_cli_runs_telemetry_inspect_history_validates_replay_artifact(
     assert "Runtime telemetry history valid" in result.output
     assert "Replay runs: 1" in result.output
     assert "Telemetry fields:" in result.output
+    assert "Telemetry coverage runs: 1" in result.output
+    assert "Telemetry coverage missing fields: queue_depth" in result.output
     assert "latency" in result.output
     assert "Evidence gaps: 1" in result.output
     assert "run-cli-without-telemetry" in result.output
@@ -439,6 +462,10 @@ def test_cli_runs_telemetry_inspect_history_json_output(
     assert payload["valid"] is True
     assert payload["replay"]["run_ids"] == ["run-cli-json"]
     assert payload["replay"]["execution_sequence_ids"] == [3]
+    assert payload["replay"]["telemetry_coverage"]["runs_with_coverage"] == 1
+    assert payload["replay"]["telemetry_coverage"]["missing_fields"] == [
+        "queue_depth"
+    ]
 
 
 def _write_registered_run(
@@ -487,6 +514,18 @@ def _runtime_telemetry_payload(sequence_id: int = 7) -> dict:
         "operation": {
             "timeout_observed": False,
         },
+        "coverage": {
+            "schema_version": "inferedge-runtime-telemetry-coverage-v1",
+            "expected_fields": ["queue_depth", "gpu_temperature"],
+            "observed_fields": ["gpu_temperature"],
+            "missing_fields": ["queue_depth"],
+            "expected_field_count": 2,
+            "observed_field_count": 1,
+            "missing_field_count": 1,
+            "coverage_ratio": 0.5,
+            "comparability_owner": "edgeenv",
+            "missing_telemetry_is_failure": False,
+        },
         "missing_fields": ["queue_depth"],
         "production_monitoring": False,
     }