gwonxhj · hyeokjun32 · May 20, 2026 · May 20, 2026
diff --git a/docs/portfolio/agent_runtime_reliability_report.md b/docs/portfolio/agent_runtime_reliability_report.md
@@ -79,6 +79,11 @@ runtime operation review:
   treated as Lab `review_required` evidence because it means the configured
   latency threshold was breached; it does not claim production request
   cancellation.
+- AIGuard Runtime operation evidence, including
+  `runtime_backend_unavailable`, `runtime_latency_budget_overrun`,
+  `runtime_error_classification`, and
+  `runtime_thermal_memory_evidence_missing` when Runtime health/error/event
+  fields are analyzed by AIGuard.
 
 These fields make the report path explicit:
 
@@ -117,14 +122,20 @@ Triggered rules:
 - `sustained_overload_review`
 - `runtime_timeout_observed_review` when a Runtime result reports a latency
   timeout observation threshold breach.
+- `runtime_operation_guard_block` when AIGuard reports failed high-severity
+  Runtime operation evidence such as backend unavailable or latency budget
+  overrun.
+- `runtime_operation_guard_review` when AIGuard reports warning-level Runtime
+  operation evidence such as missing Jetson thermal/memory context.
 
 ## Boundary
 
 - Orchestrator records scheduling and policy evidence.
 - Orchestrator operation-health fields are displayed as local runtime evidence.
 - Orchestrator remote dispatch result fields are displayed as plan-only worker
   selection evidence when provided.
-- AIGuard explains runtime reliability risk.
+- AIGuard explains runtime reliability risk, including additive Runtime
+  health/error/event warning evidence when provided.
 - Lab remains the final deployment decision owner.
 - This report is an additive agent-runtime path and does not change existing
   Runtime result, compare output, or classic deployment decision contracts.
diff --git a/inferedgelab/services/agent_runtime_report.py b/inferedgelab/services/agent_runtime_report.py
@@ -19,6 +19,14 @@
 AIGUARD_DIAGNOSIS_SCHEMA_VERSION = "inferedge-aiguard-diagnosis-v1"
 REMOTE_DISPATCH_SCHEMA_VERSION = "inferedge-remote-dispatch-result-v1"
 
+RUNTIME_OPERATION_GUARD_EVIDENCE_TYPES = {
+    "runtime_backend_unavailable",
+    "runtime_latency_budget_overrun",
+    "runtime_error_classification",
+    "runtime_thermal_memory_evidence_missing",
+    "runtime_operation_health",
+}
+
 DEFAULT_AGENT_RUNTIME_THRESHOLDS = {
     "deadline_miss_rate_review": 0.05,
     "deadline_miss_rate_blocked": 0.20,
@@ -84,6 +92,14 @@
         "effect": "review_required",
         "description": "Runtime result reported a latency timeout observation threshold breach.",
     },
+    "runtime_operation_guard_block": {
+        "effect": "blocked",
+        "description": "AIGuard Runtime operation evidence reported failed backend, latency, or error-classification risk.",
+    },
+    "runtime_operation_guard_review": {
+        "effect": "review_required",
+        "description": "AIGuard Runtime operation evidence reported warning-level runtime context risk.",
+    },
     "runtime_reliability_pass_note": {
         "effect": "deployable_with_note",
         "description": "Runtime reliability evidence stayed within configured thresholds.",
@@ -107,10 +123,12 @@ def build_agent_runtime_reliability_report(
     runtime_summary = _agent_runtime_summary(orchestration_summary)
     runtime_result_context = _runtime_result_operation_context(runtime_result)
     remote_dispatch_context = _remote_dispatch_context(remote_dispatch)
+    runtime_operation_guard_summary = _runtime_operation_guard_summary(guard_analysis)
     decision = build_agent_runtime_deployment_decision(
         metrics=metrics,
         guard_analysis=guard_analysis,
         runtime_result_context=runtime_result_context,
+        runtime_operation_guard_summary=runtime_operation_guard_summary,
         thresholds=policy,
     )
 
@@ -153,6 +171,7 @@ def build_agent_runtime_reliability_report(
             "policy_decision_log_count": len(_policy_log(orchestration_summary)),
         },
         "guard_summary": _guard_summary(guard_analysis),
+        "runtime_operation_guard_summary": runtime_operation_guard_summary,
         "runtime_reliability_evidence": _runtime_reliability_evidence(guard_analysis),
         "agent_deployment_decision": decision,
         "notes": [
@@ -168,6 +187,7 @@ def build_agent_runtime_deployment_decision(
     metrics: dict[str, Any],
     guard_analysis: dict[str, Any] | None,
     runtime_result_context: dict[str, Any] | None = None,
+    runtime_operation_guard_summary: dict[str, Any] | None = None,
     thresholds: dict[str, float] | None = None,
 ) -> dict[str, Any]:
     policy = {**DEFAULT_AGENT_RUNTIME_THRESHOLDS, **(thresholds or {})}
@@ -225,6 +245,13 @@ def build_agent_runtime_deployment_decision(
     )
     if _runtime_timeout_observed(runtime_result_context):
         triggered_rules.append("runtime_timeout_observed_review")
+    runtime_guard = runtime_operation_guard_summary
+    if runtime_guard is None:
+        runtime_guard = _runtime_operation_guard_summary(guard_analysis)
+    if _runtime_operation_guard_blocking(runtime_guard):
+        triggered_rules.append("runtime_operation_guard_block")
+    elif _runtime_operation_guard_review(runtime_guard):
+        triggered_rules.append("runtime_operation_guard_review")
 
     if not triggered_rules:
         triggered_rules.append("runtime_reliability_pass_note")
@@ -391,6 +418,7 @@ def build_agent_runtime_reliability_markdown(report: dict[str, Any]) -> str:
     metrics = runtime["metrics"]
     decision = report["agent_deployment_decision"]
     guard = report["guard_summary"]
+    runtime_guard = report.get("runtime_operation_guard_summary") or {}
     runtime_result_context = runtime.get("runtime_result_context") or {}
     remote_dispatch_context = runtime.get("remote_dispatch_context") or {}
     runtime_health = runtime_result_context.get("runtime_health_snapshot") or {}
@@ -528,6 +556,31 @@ def build_agent_runtime_reliability_markdown(report: dict[str, Any]) -> str:
             f"| runtime_timeout_observed | {runtime_result_context.get('runtime_timeout_observed', False)} |",
             f"| runtime_event_count | {_fmt_number(runtime_event_summary.get('event_count'))} |",
             "",
+            "## AIGuard Runtime Operation Evidence",
+            "",
+            "| Field | Value |",
+            "|---|---|",
+            f"| evidence_count | {_fmt_number(runtime_guard.get('evidence_count'))} |",
+            f"| failed_count | {_fmt_number(runtime_guard.get('failed_count'))} |",
+            f"| warning_count | {_fmt_number(runtime_guard.get('warning_count'))} |",
+            f"| evidence_types | {', '.join(runtime_guard.get('evidence_types') or []) or '-'} |",
+            f"| retry_hints | {', '.join(runtime_guard.get('retry_hints') or []) or '-'} |",
+            "",
+            "Runtime operation guard evidence:",
+            "",
+            "| Type | Metric | Observed | Severity | Status | Recommendation |",
+            "|---|---|---:|---|---|---|",
+            *[
+                "| "
+                f"{item.get('type') or '-'} | "
+                f"{item.get('metric_name') or '-'} | "
+                f"{_fmt_number(item.get('observed_value'))} | "
+                f"{item.get('severity') or '-'} | "
+                f"{item.get('status') or '-'} | "
+                f"{item.get('recommendation') or '-'} |"
+                for item in runtime_guard.get("evidence", [])
+            ],
+            "",
             "Runtime result event sample:",
             "",
             "| # | Type | Status | Detail |",
@@ -788,6 +841,87 @@ def _runtime_reliability_evidence(
     ]
 
 
+def _runtime_operation_guard_summary(
+    guard_analysis: dict[str, Any] | None,
+) -> dict[str, Any]:
+    evidence = [
+        item
+        for item in guard_evidence_items(guard_analysis)
+        if isinstance(item, dict)
+        and item.get("type") in RUNTIME_OPERATION_GUARD_EVIDENCE_TYPES
+    ]
+    failed = [item for item in evidence if item.get("status") == "failed"]
+    warnings = [item for item in evidence if item.get("status") == "warning"]
+    retry_hints = sorted(
+        {
+            retry_hint
+            for item in evidence
+            for retry_hint in [_runtime_operation_retry_hint(item)]
+            if isinstance(retry_hint, str) and retry_hint
+        }
+    )
+    return {
+        "evidence_count": len(evidence),
+        "failed_count": len(failed),
+        "warning_count": len(warnings),
+        "evidence_types": [
+            item.get("type") for item in evidence if isinstance(item.get("type"), str)
+        ],
+        "metric_names": [
+            item.get("metric_name")
+            for item in evidence
+            if isinstance(item.get("metric_name"), str)
+        ],
+        "retry_hints": retry_hints,
+        "evidence": [
+            {
+                "type": item.get("type"),
+                "metric_name": item.get("metric_name"),
+                "observed_value": item.get("observed_value"),
+                "threshold": item.get("threshold"),
+                "severity": item.get("severity"),
+                "status": item.get("status"),
+                "explanation": item.get("explanation"),
+                "recommendation": item.get("recommendation"),
+                "why_it_matters": item.get("why_it_matters"),
+                "retry_hint": _runtime_operation_retry_hint(item),
+            }
+            for item in evidence
+        ],
+    }
+
+
+def _runtime_operation_guard_blocking(summary: dict[str, Any]) -> bool:
+    for item in _dict_list(summary.get("evidence")):
+        if item.get("status") != "failed":
+            continue
+        if item.get("severity") in {"high", "critical"}:
+            return True
+        if item.get("type") in {
+            "runtime_backend_unavailable",
+            "runtime_latency_budget_overrun",
+        }:
+            return True
+    return False
+
+
+def _runtime_operation_guard_review(summary: dict[str, Any]) -> bool:
+    if _runtime_operation_guard_blocking(summary):
+        return False
+    return bool(summary.get("failed_count") or summary.get("warning_count"))
+
+
+def _runtime_operation_retry_hint(evidence_item: dict[str, Any]) -> str | None:
+    raw_context = evidence_item.get("raw_context")
+    if not isinstance(raw_context, dict):
+        return None
+    runtime_operation = raw_context.get("runtime_operation")
+    if not isinstance(runtime_operation, dict):
+        return None
+    retry_hint = runtime_operation.get("retry_hint")
+    return retry_hint if isinstance(retry_hint, str) and retry_hint else None
+
+
 def _timeline_summary(
     orchestration_summary: dict[str, Any],
     metrics: dict[str, Any],

diff --git a/tests/test_agent_runtime_report.py b/tests/test_agent_runtime_report.py
@@ -218,6 +218,78 @@ def guard_analysis() -> dict:
     }
 
 
+def runtime_operation_guard_analysis() -> dict:
+    data = guard_analysis()
+    data["primary_reason"] = (
+        "runtime_error_severity indicates runtime reliability risk."
+    )
+    data["evidence"].extend(
+        [
+            {
+                "type": "runtime_backend_unavailable",
+                "metric_name": "engine_available",
+                "observed_value": 0,
+                "baseline_value": None,
+                "threshold": 1,
+                "delta": None,
+                "delta_pct": None,
+                "increase_factor": None,
+                "severity": "high",
+                "status": "failed",
+                "explanation": "Runtime could not confirm backend availability.",
+                "why_it_matters": (
+                    "Runtime backend availability is required before using the "
+                    "result as deployment evidence."
+                ),
+                "suspected_causes": ["backend_runtime_unavailable"],
+                "recommendation": "Check backend installation and engine load logs.",
+                "raw_context": {
+                    "runtime_operation": {
+                        "engine_available": False,
+                        "retry_hint": "check_backend_availability",
+                    }
+                },
+            },
+            {
+                "type": "runtime_latency_budget_overrun",
+                "metric_name": "latency_budget_exceeded",
+                "observed_value": 1,
+                "baseline_value": None,
+                "threshold": 50.0,
+                "delta": 22.5,
+                "delta_pct": 0.45,
+                "increase_factor": None,
+                "severity": "high",
+                "status": "failed",
+                "explanation": "Runtime latency exceeded the configured budget.",
+                "why_it_matters": (
+                    "Latency budget overrun means the runtime result did not "
+                    "satisfy the expected timing contract."
+                ),
+                "suspected_causes": ["runtime_latency_spike"],
+                "recommendation": "Review runtime event log and fallback policy.",
+                "raw_context": {
+                    "runtime_operation": {
+                        "latency_budget_ms": 50.0,
+                        "observed_mean_ms": 72.5,
+                    }
+                },
+            },
+        ]
+    )
+    data["suspected_causes"] = [
+        "queue_backlog",
+        "backend_runtime_unavailable",
+        "runtime_latency_spike",
+    ]
+    data["recommendations"] = [
+        "Tune scheduling policy.",
+        "Check backend installation and engine load logs.",
+        "Review runtime event log and fallback policy.",
+    ]
+    return data
+
+
 def runtime_result_with_operation_evidence() -> dict:
     return {
         "schema_version": "inferedge-runtime-result-v1",
@@ -512,7 +584,7 @@ def test_compute_agent_runtime_metrics_from_orchestrator_summary():
 def test_agent_runtime_report_blocks_when_guard_blocks():
     report = build_agent_runtime_reliability_report(
         orchestration_summary=orchestration_summary(),
-        guard_analysis=sustained_guard_analysis(),
+        guard_analysis=runtime_operation_guard_analysis(),
         runtime_result=runtime_result_with_operation_evidence(),
         remote_dispatch=remote_dispatch_result(),
     )
@@ -534,8 +606,19 @@ def test_agent_runtime_report_blocks_when_guard_blocks():
     assert "guard_blocked_runtime_block" in decision["triggered_rules"]
     assert "drop_rate_block" in decision["triggered_rules"]
     assert "sustained_overload_review" in decision["triggered_rules"]
+    assert "runtime_operation_guard_block" in decision["triggered_rules"]
     assert report["guard_summary"]["guard_verdict"] == "blocked"
-    assert "sustained_overload_risk" in report["guard_summary"]["evidence_types"]
+    assert "runtime_backend_unavailable" in report["guard_summary"]["evidence_types"]
+    runtime_guard = report["runtime_operation_guard_summary"]
+    assert runtime_guard["evidence_count"] == 2
+    assert runtime_guard["failed_count"] == 2
+    assert runtime_guard["retry_hints"] == ["check_backend_availability"]
+    assert {
+        item["type"] for item in runtime_guard["evidence"]
+    } == {
+        "runtime_backend_unavailable",
+        "runtime_latency_budget_overrun",
+    }
     assert report["agent_runtime_summary"]["timeline_summary"] == {
         "scenario_mode": "sustained_high_load",
         "queue_depth_sample_count": 1,
@@ -548,7 +631,11 @@ def test_agent_runtime_report_blocks_when_guard_blocks():
     }
     assert {
         item["type"] for item in report["runtime_reliability_evidence"]
-    } == {"excessive_drop_rate", "sustained_overload_risk"}
+    } == {
+        "excessive_drop_rate",
+        "runtime_backend_unavailable",
+        "runtime_latency_budget_overrun",
+    }
     operation_context = report["agent_runtime_summary"]["operation_context"]
     assert operation_context["queue_state_summary"]["queue_pressure_state"] == "overloaded"
     assert operation_context["worker_health_counts"] == {
@@ -630,7 +717,7 @@ def test_agent_runtime_report_keeps_legacy_orchestrator_summary_compatible():
 def test_agent_runtime_report_markdown_contains_sections():
     report = build_agent_runtime_reliability_report(
         orchestration_summary=orchestration_summary(),
-        guard_analysis=sustained_guard_analysis(),
+        guard_analysis=runtime_operation_guard_analysis(),
         runtime_result=runtime_result_with_operation_evidence(),
         remote_dispatch=remote_dispatch_result(),
     )
@@ -644,6 +731,10 @@ def test_agent_runtime_report_markdown_contains_sections():
     assert "Worker Health" in markdown
     assert "Runtime Event Summary" in markdown
     assert "Runtime Result Operation Evidence" in markdown
+    assert "AIGuard Runtime Operation Evidence" in markdown
+    assert "runtime_backend_unavailable" in markdown
+    assert "runtime_latency_budget_overrun" in markdown
+    assert "check_backend_availability" in markdown
     assert "Remote Dispatch Context" in markdown
     assert "jetson-nano-01" in markdown
     assert "plan_only" in markdown
@@ -655,7 +746,7 @@ def test_agent_runtime_report_markdown_contains_sections():
     assert "AIGuard Runtime Reliability Evidence" in markdown
     assert "Lab Agent Deployment Decision" in markdown
     assert "guard_blocked_runtime_block" in markdown
-    assert "sustained_overload_risk" in markdown
+    assert "runtime_operation_guard_block" in markdown
     assert "max_total_queue_depth" in markdown
     assert "not a production cloud orchestration dashboard" in markdown