gwonxhj · hyeokjun32 · May 19, 2026 · May 19, 2026
diff --git a/README.md b/README.md
@@ -128,7 +128,8 @@ The Lab decision surface now also exposes `policy_version`, `triggered_rules`, a
 `agent-runtime-report` is an additive reliable edge agent runtime report path.
 It bundles Orchestrator scheduling evidence and AIGuard runtime reliability `guard_analysis` into a Lab-owned agent deployment decision context without changing existing Runtime result or compare contracts.
 The current bundled evidence is a synthetic/dummy sustained high-load 3-agent scenario.
-The report preserves sustained queue-depth, worker health, runtime event summary/timeline, policy decision reason, and `sustained_overload_risk` evidence as local-first deployment review context.
+The report preserves sustained queue-depth, worker health, Runtime result health/error/event evidence, runtime event summary/timeline, policy decision reason, and `sustained_overload_risk` evidence as local-first deployment review context.
+When a Runtime result JSON with `runtime_health_snapshot` / `runtime_events` is available, add `--runtime-result <path>` to include Runtime-side operation context in the same Lab report.
 
 ![InferEdge Local Studio demo evidence](assets/images/local-studio-demo-evidence.png)
 

diff --git a/docs/portfolio/agent_runtime_reliability_report.md b/docs/portfolio/agent_runtime_reliability_report.md
@@ -67,11 +67,16 @@ runtime operation review:
 - `runtime_event_summary` for event type counts.
 - `runtime_event_timeline` sample rows for queue snapshots, policy decisions,
   drops, and execution outcomes.
+- Optional Runtime result operation evidence through `--runtime-result`,
+  including `runtime_health_snapshot`, `runtime_error_classification`, and
+  `runtime_events`.
 
 These fields make the report path explicit:
 
 ```text
-Orchestrator operation evidence -> AIGuard reliability explanation -> Lab-owned deployment risk context
+Runtime result operation evidence + Orchestrator operation evidence
+-> AIGuard reliability explanation
+-> Lab-owned deployment risk context
 ```
 
 ## Lab Decision Context

diff --git a/inferedgelab/commands/agent_runtime_report.py b/inferedgelab/commands/agent_runtime_report.py
@@ -23,12 +23,18 @@ def agent_runtime_report_cmd(
         "--guard-analysis",
         help="Optional AIGuard runtime reliability guard_analysis JSON",
     ),
+    runtime_result: str = typer.Option(
+        "",
+        "--runtime-result",
+        help="Optional InferEdge-Runtime result JSON with runtime_health_snapshot/runtime_events",
+    ),
     format: str = typer.Option("text", "--format", "-f", help="text/json/markdown"),
     output: str = typer.Option("", "--output", "-o", help="Optional output path"),
 ) -> None:
     report = load_agent_runtime_reliability_bundle(
         orchestration_summary_path=orchestration_summary,
         guard_analysis_path=guard_analysis or None,
+        runtime_result_path=runtime_result or None,
     )
     normalized_format = format.strip().lower()
     if normalized_format == "json":
@@ -53,6 +59,9 @@ def _text_summary(report: dict) -> str:
     metrics = report["agent_runtime_summary"]["metrics"]
     decision = report["agent_deployment_decision"]
     guard = report["guard_summary"]
+    runtime_context = report["agent_runtime_summary"].get("runtime_result_context") or {}
+    health = runtime_context.get("runtime_health_snapshot") or {}
+    error = runtime_context.get("runtime_error_classification") or {}
     lines = [
         "InferEdge Agent Runtime Reliability Report",
         f"schema_version: {report['schema_version']}",
@@ -63,6 +72,8 @@ def _text_summary(report: dict) -> str:
         f"drop_rate: {metrics['drop_rate']:.6g}",
         f"fallback_rate: {metrics['fallback_rate']:.6g}",
         f"deadline_miss_rate: {metrics['deadline_miss_rate']:.6g}",
+        f"runtime_health_status: {health.get('status')}",
+        f"runtime_error_category: {error.get('category')}",
         "triggered_rules:",
     ]
     lines.extend(f"- {rule}" for rule in decision["triggered_rules"])

diff --git a/inferedgelab/services/agent_runtime_report.py b/inferedgelab/services/agent_runtime_report.py
@@ -90,6 +90,7 @@ def build_agent_runtime_reliability_report(
     *,
     orchestration_summary: dict[str, Any],
     guard_analysis: dict[str, Any] | None = None,
+    runtime_result: dict[str, Any] | None = None,
     source: dict[str, Any] | None = None,
     thresholds: dict[str, float] | None = None,
 ) -> dict[str, Any]:
@@ -98,6 +99,7 @@ def build_agent_runtime_reliability_report(
     policy = {**DEFAULT_AGENT_RUNTIME_THRESHOLDS, **(thresholds or {})}
     metrics = compute_agent_runtime_metrics(orchestration_summary)
     runtime_summary = _agent_runtime_summary(orchestration_summary)
+    runtime_result_context = _runtime_result_operation_context(runtime_result)
     decision = build_agent_runtime_deployment_decision(
         metrics=metrics,
         guard_analysis=guard_analysis,
@@ -119,6 +121,11 @@ def build_agent_runtime_reliability_report(
                 if isinstance(guard_analysis, dict)
                 else None
             ),
+            "runtime_result": (
+                runtime_result.get("schema_version")
+                if isinstance(runtime_result, dict)
+                else None
+            ),
             "source_contracts": runtime_summary.get("source_contracts", {}),
         },
         "agent_runtime_summary": {
@@ -127,6 +134,7 @@ def build_agent_runtime_reliability_report(
             "metrics": metrics,
             "timeline_summary": _timeline_summary(orchestration_summary, metrics),
             "operation_context": _operation_context(orchestration_summary, metrics),
+            "runtime_result_context": runtime_result_context,
             "policy_decision_reasons": metrics["policy_decision_reasons"],
             "policy_decision_log_count": len(_policy_log(orchestration_summary)),
         },
@@ -332,17 +340,23 @@ def load_agent_runtime_reliability_bundle(
     *,
     orchestration_summary_path: str | Path,
     guard_analysis_path: str | Path | None = None,
+    runtime_result_path: str | Path | None = None,
 ) -> dict[str, Any]:
     orchestration_summary = _load_json_dict(orchestration_summary_path)
     guard_analysis = _load_json_dict(guard_analysis_path) if guard_analysis_path else None
+    runtime_result = _load_json_dict(runtime_result_path) if runtime_result_path else None
     return build_agent_runtime_reliability_report(
         orchestration_summary=orchestration_summary,
         guard_analysis=guard_analysis,
+        runtime_result=runtime_result,
         source={
             "orchestration_summary_path": str(orchestration_summary_path),
             "guard_analysis_path": str(guard_analysis_path)
             if guard_analysis_path
             else None,
+            "runtime_result_path": str(runtime_result_path)
+            if runtime_result_path
+            else None,
         },
     )
 
@@ -352,6 +366,10 @@ def build_agent_runtime_reliability_markdown(report: dict[str, Any]) -> str:
     metrics = runtime["metrics"]
     decision = report["agent_deployment_decision"]
     guard = report["guard_summary"]
+    runtime_result_context = runtime.get("runtime_result_context") or {}
+    runtime_health = runtime_result_context.get("runtime_health_snapshot") or {}
+    runtime_error = runtime_result_context.get("runtime_error_classification") or {}
+    runtime_event_summary = runtime_result_context.get("runtime_event_summary") or {}
 
     lines = [
         "# InferEdge Agent Runtime Reliability Report",
@@ -466,6 +484,33 @@ def build_agent_runtime_reliability_markdown(report: dict[str, Any]) -> str:
                 ]
             ],
             "",
+            "## Runtime Result Operation Evidence",
+            "",
+            "| Field | Value |",
+            "|---|---|",
+            f"| runtime_result_schema | {runtime_result_context.get('source_schema_version') or '-'} |",
+            f"| compare_key | {runtime_result_context.get('compare_key') or '-'} |",
+            f"| backend_key | {runtime_result_context.get('backend_key') or '-'} |",
+            f"| runtime_status | {runtime_health.get('status') or runtime_result_context.get('status') or '-'} |",
+            f"| runtime_error_category | {runtime_error.get('category') or '-'} |",
+            f"| timeout_observed | {runtime_health.get('timeout_observed', runtime_error.get('timeout_observed', '-'))} |",
+            f"| runtime_event_count | {_fmt_number(runtime_event_summary.get('event_count'))} |",
+            "",
+            "Runtime result event sample:",
+            "",
+            "| # | Type | Status | Detail |",
+            "|---:|---|---|---|",
+            *[
+                "| "
+                f"{index} | "
+                f"{event.get('type') or event.get('event_type') or '-'} | "
+                f"{event.get('status') or '-'} | "
+                f"{event.get('category') or event.get('reason') or event.get('engine_backend') or '-'} |"
+                for index, event in enumerate(
+                    runtime_result_context.get("runtime_event_sample") or []
+                )
+            ],
+            "",
             "## AIGuard Runtime Reliability Evidence",
             "",
             f"- guard_status: `{guard.get('status')}`",
@@ -685,6 +730,46 @@ def _operation_context(
     }
 
 
+def _runtime_result_operation_context(
+    runtime_result: dict[str, Any] | None,
+) -> dict[str, Any]:
+    if not isinstance(runtime_result, dict):
+        return {
+            "source_schema_version": None,
+            "compare_key": None,
+            "backend_key": None,
+            "status": None,
+            "success": None,
+            "runtime_health_snapshot": {},
+            "runtime_error_classification": {},
+            "runtime_event_summary": {
+                "schema_version": None,
+                "event_count": 0,
+                "event_type_counts": {},
+            },
+            "runtime_event_sample": [],
+        }
+
+    health = runtime_result.get("runtime_health_snapshot")
+    error = runtime_result.get("runtime_error_classification")
+    runtime_events = _dict_list(runtime_result.get("runtime_events"))
+    return {
+        "source_schema_version": runtime_result.get("schema_version"),
+        "compare_key": runtime_result.get("compare_key"),
+        "backend_key": runtime_result.get("backend_key"),
+        "status": runtime_result.get("status"),
+        "success": runtime_result.get("success"),
+        "runtime_health_snapshot": dict(health) if isinstance(health, dict) else {},
+        "runtime_error_classification": dict(error) if isinstance(error, dict) else {},
+        "runtime_event_summary": {
+            "schema_version": "inferedgelab-runtime-result-event-summary-v1",
+            "event_count": len(runtime_events),
+            "event_type_counts": _runtime_result_event_type_counts(runtime_events),
+        },
+        "runtime_event_sample": runtime_events[:8],
+    }
+
+
 def _queue_state_summary(orchestration_summary: dict[str, Any]) -> dict[str, Any]:
     value = orchestration_summary.get("queue_state_summary")
     if isinstance(value, dict):
@@ -749,6 +834,16 @@ def _runtime_event_type_counts(runtime_events: list[dict[str, Any]]) -> dict[str
     return counts
 
 
+def _runtime_result_event_type_counts(runtime_events: list[dict[str, Any]]) -> dict[str, int]:
+    counts: dict[str, int] = {}
+    for event in runtime_events:
+        event_type = event.get("type") or event.get("event_type")
+        if not isinstance(event_type, str) or not event_type:
+            event_type = "unknown"
+        counts[event_type] = counts.get(event_type, 0) + 1
+    return counts
+
+
 def _worker_health_counts(worker_health_snapshot: dict[str, Any]) -> dict[str, int]:
     workers = worker_health_snapshot.get("workers")
     if not isinstance(workers, dict):

diff --git a/tests/test_agent_runtime_report.py b/tests/test_agent_runtime_report.py
@@ -218,6 +218,66 @@ def guard_analysis() -> dict:
     }
 
 
+def runtime_result_with_operation_evidence() -> dict:
+    return {
+        "schema_version": "inferedge-runtime-result-v1",
+        "compare_key": "yolov8n__b1__h224w224__fp32",
+        "backend_key": "onnxruntime__cpu",
+        "status": "skipped",
+        "success": False,
+        "runtime_health_snapshot": {
+            "schema_version": "inferedge-runtime-health-v1",
+            "status": "degraded",
+            "engine_backend": "onnxruntime",
+            "device": "cpu",
+            "input_mode": "dummy",
+            "input_preprocess": "synthetic",
+            "warmup": 1,
+            "runs": 1,
+            "run_once": False,
+            "success": False,
+            "latency_mean_ms": 0.0,
+            "latency_p95_ms": 0.0,
+            "latency_p99_ms": 0.0,
+            "fps": 0.0,
+            "power_mode": "unknown",
+            "jetson_clocks": "unknown",
+            "timeout_policy": "not_configured",
+            "timeout_observed": False,
+        },
+        "runtime_error_classification": {
+            "schema_version": "inferedge-runtime-error-v1",
+            "status": "classified",
+            "category": "runtime_execution_skipped",
+            "message": "backend is not available in this build",
+            "timeout_observed": False,
+            "retryable": False,
+        },
+        "runtime_events": [
+            {
+                "type": "runtime_configured",
+                "status": "ok",
+                "engine_backend": "onnxruntime",
+                "device": "cpu",
+                "input_mode": "dummy",
+            },
+            {
+                "type": "benchmark_completed",
+                "status": "skipped",
+                "success": False,
+                "warmup": 1,
+                "runs": 1,
+                "mean_ms": 0.0,
+            },
+            {
+                "type": "runtime_error_classified",
+                "status": "classified",
+                "category": "runtime_execution_skipped",
+            },
+        ],
+    }
+
+
 def sustained_guard_analysis() -> dict:
     data = guard_analysis()
     data["evidence"].append(
@@ -285,6 +345,7 @@ def test_agent_runtime_report_blocks_when_guard_blocks():
     report = build_agent_runtime_reliability_report(
         orchestration_summary=orchestration_summary(),
         guard_analysis=sustained_guard_analysis(),
+        runtime_result=runtime_result_with_operation_evidence(),
     )
 
     decision = report["agent_deployment_decision"]
@@ -295,6 +356,7 @@ def test_agent_runtime_report_blocks_when_guard_blocks():
     assert report["contracts"]["aiguard_guard_analysis"] == (
         "inferedge-aiguard-diagnosis-v1"
     )
+    assert report["contracts"]["runtime_result"] == "inferedge-runtime-result-v1"
     assert decision["policy_version"] == AGENT_RUNTIME_POLICY_VERSION
     assert decision["decision"] == "blocked"
     assert "guard_blocked_runtime_block" in decision["triggered_rules"]
@@ -326,6 +388,18 @@ def test_agent_runtime_report_blocks_when_guard_blocks():
     assert operation_context["runtime_event_timeline_sample"][1]["event_type"] == (
         "policy_decision"
     )
+    runtime_context = report["agent_runtime_summary"]["runtime_result_context"]
+    assert runtime_context["source_schema_version"] == "inferedge-runtime-result-v1"
+    assert runtime_context["runtime_health_snapshot"]["status"] == "degraded"
+    assert runtime_context["runtime_error_classification"]["category"] == (
+        "runtime_execution_skipped"
+    )
+    assert runtime_context["runtime_event_summary"]["event_count"] == 3
+    assert runtime_context["runtime_event_summary"]["event_type_counts"] == {
+        "runtime_configured": 1,
+        "benchmark_completed": 1,
+        "runtime_error_classified": 1,
+    }
 
 
 def test_agent_runtime_report_keeps_legacy_orchestrator_summary_compatible():
@@ -352,6 +426,7 @@ def test_agent_runtime_report_markdown_contains_sections():
     report = build_agent_runtime_reliability_report(
         orchestration_summary=orchestration_summary(),
         guard_analysis=sustained_guard_analysis(),
+        runtime_result=runtime_result_with_operation_evidence(),
     )
     markdown = build_agent_runtime_reliability_markdown(report)
 
@@ -362,6 +437,8 @@ def test_agent_runtime_report_markdown_contains_sections():
     assert "Queue State" in markdown
     assert "Worker Health" in markdown
     assert "Runtime Event Summary" in markdown
+    assert "Runtime Result Operation Evidence" in markdown
+    assert "runtime_execution_skipped" in markdown
     assert "queue_pressure_state" in markdown
     assert "policy_decision" in markdown
     assert "AIGuard Runtime Reliability Evidence" in markdown
@@ -383,10 +460,15 @@ def test_agent_runtime_report_loads_committed_fixtures():
     assert len(report["agent_runtime_summary"]["agents"]) == 3
 
 
-def test_agent_runtime_report_command_outputs_json(capsys):
+def test_agent_runtime_report_command_outputs_json(tmp_path, capsys):
+    runtime_result_path = tmp_path / "runtime_operation_result.json"
+    with runtime_result_path.open("w", encoding="utf-8") as file:
+        json.dump(runtime_result_with_operation_evidence(), file)
+
     agent_runtime_report_cmd(
         orchestration_summary="examples/agent_runtime/agent_3_orchestration_summary.json",
         guard_analysis="examples/agent_runtime/aiguard_runtime_guard_analysis.json",
+        runtime_result=str(runtime_result_path),
         format="json",
         output="",
     )
@@ -395,3 +477,5 @@ def test_agent_runtime_report_command_outputs_json(capsys):
 
     assert report["schema_version"] == AGENT_RUNTIME_REPORT_SCHEMA_VERSION
     assert report["agent_deployment_decision"]["decision"] == "blocked"
+    runtime_context = report["agent_runtime_summary"]["runtime_result_context"]
+    assert runtime_context["runtime_health_snapshot"]["status"] == "degraded"