gwonxhj · hyeokjun32 · May 19, 2026 · May 19, 2026
diff --git a/docs/portfolio/agent_runtime_reliability_report.md b/docs/portfolio/agent_runtime_reliability_report.md
@@ -70,6 +70,11 @@ runtime operation review:
 - Optional Runtime result operation evidence through `--runtime-result`,
   including `runtime_health_snapshot`, `runtime_error_classification`, and
   `runtime_events`.
+- Runtime timeout observation context, including `timeout_policy`,
+  `timeout_budget_ms`, and `runtime_timeout_observed`. A timeout observation is
+  treated as Lab `review_required` evidence because it means the configured
+  latency threshold was breached; it does not claim production request
+  cancellation.
 
 These fields make the report path explicit:
 
@@ -101,6 +106,8 @@ Triggered rules:
 - `deadline_miss_review`
 - `queue_backlog_review`
 - `sustained_overload_review`
+- `runtime_timeout_observed_review` when a Runtime result reports a latency
+  timeout observation threshold breach.
 
 ## Boundary
 

diff --git a/inferedgelab/services/agent_runtime_report.py b/inferedgelab/services/agent_runtime_report.py
@@ -79,6 +79,10 @@
         "effect": "review_required",
         "description": "Sustained queue depth crossed the review threshold.",
     },
+    "runtime_timeout_observed_review": {
+        "effect": "review_required",
+        "description": "Runtime result reported a latency timeout observation threshold breach.",
+    },
     "runtime_reliability_pass_note": {
         "effect": "deployable_with_note",
         "description": "Runtime reliability evidence stayed within configured thresholds.",
@@ -103,6 +107,7 @@ def build_agent_runtime_reliability_report(
     decision = build_agent_runtime_deployment_decision(
         metrics=metrics,
         guard_analysis=guard_analysis,
+        runtime_result_context=runtime_result_context,
         thresholds=policy,
     )
 
@@ -153,6 +158,7 @@ def build_agent_runtime_deployment_decision(
     *,
     metrics: dict[str, Any],
     guard_analysis: dict[str, Any] | None,
+    runtime_result_context: dict[str, Any] | None = None,
     thresholds: dict[str, float] | None = None,
 ) -> dict[str, Any]:
     policy = {**DEFAULT_AGENT_RUNTIME_THRESHOLDS, **(thresholds or {})}
@@ -208,6 +214,8 @@ def build_agent_runtime_deployment_decision(
         review_rule="sustained_overload_review",
         blocked_rule="sustained_overload_block",
     )
+    if _runtime_timeout_observed(runtime_result_context):
+        triggered_rules.append("runtime_timeout_observed_review")
 
     if not triggered_rules:
         triggered_rules.append("runtime_reliability_pass_note")
@@ -493,7 +501,9 @@ def build_agent_runtime_reliability_markdown(report: dict[str, Any]) -> str:
             f"| backend_key | {runtime_result_context.get('backend_key') or '-'} |",
             f"| runtime_status | {runtime_health.get('status') or runtime_result_context.get('status') or '-'} |",
             f"| runtime_error_category | {runtime_error.get('category') or '-'} |",
-            f"| timeout_observed | {runtime_health.get('timeout_observed', runtime_error.get('timeout_observed', '-'))} |",
+            f"| timeout_policy | {runtime_health.get('timeout_policy', runtime_error.get('timeout_policy', '-'))} |",
+            f"| timeout_budget_ms | {_fmt_number(runtime_health.get('timeout_budget_ms', runtime_error.get('timeout_budget_ms')))} |",
+            f"| runtime_timeout_observed | {runtime_result_context.get('runtime_timeout_observed', False)} |",
             f"| runtime_event_count | {_fmt_number(runtime_event_summary.get('event_count'))} |",
             "",
             "Runtime result event sample:",
@@ -617,6 +627,39 @@ def _rule_effect(rule: str) -> str:
     return AGENT_RUNTIME_POLICY_RULES.get(rule, {}).get("effect", "unknown")
 
 
+def _runtime_timeout_observed(runtime_result_context: dict[str, Any] | None) -> bool:
+    if not isinstance(runtime_result_context, dict):
+        return False
+    if bool(runtime_result_context.get("runtime_timeout_observed")):
+        return True
+    return _runtime_timeout_observed_from_parts(
+        health=runtime_result_context.get("runtime_health_snapshot"),
+        error=runtime_result_context.get("runtime_error_classification"),
+        runtime_events=_dict_list(runtime_result_context.get("runtime_event_sample")),
+    )
+
+
+def _runtime_timeout_observed_from_parts(
+    *,
+    health: Any,
+    error: Any,
+    runtime_events: list[dict[str, Any]],
+) -> bool:
+    if isinstance(health, dict) and bool(health.get("timeout_observed")):
+        return True
+    if isinstance(error, dict):
+        if bool(error.get("timeout_observed")):
+            return True
+        if error.get("category") == "runtime_timeout_observed":
+            return True
+    for event in runtime_events:
+        if bool(event.get("timeout_observed")):
+            return True
+        if event.get("category") == "runtime_timeout_observed":
+            return True
+    return False
+
+
 def _policy_log(orchestration_summary: dict[str, Any]) -> list[dict[str, Any]]:
     value = orchestration_summary.get("policy_decision_log")
     if not isinstance(value, list):
@@ -740,6 +783,7 @@ def _runtime_result_operation_context(
             "backend_key": None,
             "status": None,
             "success": None,
+            "runtime_timeout_observed": False,
             "runtime_health_snapshot": {},
             "runtime_error_classification": {},
             "runtime_event_summary": {
@@ -759,6 +803,11 @@ def _runtime_result_operation_context(
         "backend_key": runtime_result.get("backend_key"),
         "status": runtime_result.get("status"),
         "success": runtime_result.get("success"),
+        "runtime_timeout_observed": _runtime_timeout_observed_from_parts(
+            health=health,
+            error=error,
+            runtime_events=runtime_events,
+        ),
         "runtime_health_snapshot": dict(health) if isinstance(health, dict) else {},
         "runtime_error_classification": dict(error) if isinstance(error, dict) else {},
         "runtime_event_summary": {

diff --git a/tests/test_agent_runtime_report.py b/tests/test_agent_runtime_report.py
@@ -278,6 +278,112 @@ def runtime_result_with_operation_evidence() -> dict:
     }
 
 
+def runtime_result_with_timeout_observed() -> dict:
+    data = runtime_result_with_operation_evidence()
+    data["status"] = "completed"
+    data["success"] = True
+    data["runtime_health_snapshot"].update(
+        {
+            "status": "degraded",
+            "success": True,
+            "latency_mean_ms": 12.5,
+            "latency_p95_ms": 15.0,
+            "latency_p99_ms": 18.0,
+            "fps": 80.0,
+            "timeout_policy": "latency_threshold",
+            "timeout_budget_ms": 10,
+            "timeout_observed": True,
+        }
+    )
+    data["runtime_error_classification"].update(
+        {
+            "category": "runtime_timeout_observed",
+            "message": "mean latency exceeded configured timeout observation threshold",
+            "timeout_observed": True,
+            "retryable": True,
+        }
+    )
+    data["runtime_events"][-1].update(
+        {
+            "category": "runtime_timeout_observed",
+            "timeout_policy": "latency_threshold",
+            "timeout_observed": True,
+        }
+    )
+    return data
+
+
+def quiet_orchestration_summary() -> dict:
+    data = orchestration_summary()
+    data["agent_runtime_summary"]["totals"] = {
+        "executed_count": 10,
+        "dropped_count": 0,
+        "deadline_missed_count": 0,
+        "fallback_count": 0,
+        "policy_decision_count": 0,
+        "overload_event_count": 0,
+    }
+    data["sustained_runtime_summary"].update(
+        {
+            "scenario_mode": "normal",
+            "queue_depth_sample_count": 1,
+            "latency_sample_count": 1,
+            "max_total_queue_depth": 0,
+        }
+    )
+    data["queue_depth_timeline"] = [
+        {
+            "cycle": 1,
+            "stage": "before_policy",
+            "queue_depth": {
+                "vision_agent": 0,
+                "voice_command_agent": 0,
+                "safety_monitor_agent": 0,
+            },
+            "total_queue_depth": 0,
+        }
+    ]
+    data["latency_timeline"] = []
+    data["policy_decision_log"] = []
+    data["queue_state_summary"].update(
+        {
+            "max_total_queue_depth": 0,
+            "average_total_queue_depth": 0.0,
+            "final_queue_depth": {
+                "vision_agent": 0,
+                "voice_command_agent": 0,
+                "safety_monitor_agent": 0,
+            },
+            "max_queue_depth_by_task": {
+                "vision_agent": 0,
+                "voice_command_agent": 0,
+                "safety_monitor_agent": 0,
+            },
+            "queue_pressure_state": "normal",
+        }
+    )
+    data["runtime_event_summary"] = {
+        "schema_version": "inferedge-orchestrator-runtime-event-summary-v1",
+        "event_count": 0,
+        "event_type_counts": {},
+    }
+    data["runtime_event_timeline"] = []
+    return data
+
+
+def passing_guard_analysis() -> dict:
+    return {
+        "schema_version": "inferedge-aiguard-diagnosis-v1",
+        "status": "pass",
+        "guard_verdict": "pass",
+        "severity": "low",
+        "confidence": 0.96,
+        "primary_reason": "Runtime reliability guard evidence stayed within thresholds.",
+        "evidence": [],
+        "created_at": "2026-05-17T00:00:00Z",
+    }
+
+
 def sustained_guard_analysis() -> dict:
     data = guard_analysis()
     data["evidence"].append(
@@ -402,6 +508,29 @@ def test_agent_runtime_report_blocks_when_guard_blocks():
     }
 
 
+def test_agent_runtime_report_marks_runtime_timeout_as_review():
+    report = build_agent_runtime_reliability_report(
+        orchestration_summary=quiet_orchestration_summary(),
+        guard_analysis=passing_guard_analysis(),
+        runtime_result=runtime_result_with_timeout_observed(),
+    )
+
+    decision = report["agent_deployment_decision"]
+    assert decision["decision"] == "review_required"
+    assert "runtime_timeout_observed_review" in decision["triggered_rules"]
+    assert "guard_blocked_runtime_block" not in decision["triggered_rules"]
+    runtime_context = report["agent_runtime_summary"]["runtime_result_context"]
+    assert runtime_context["runtime_timeout_observed"] is True
+    assert runtime_context["runtime_health_snapshot"]["timeout_policy"] == (
+        "latency_threshold"
+    )
+
+    markdown = build_agent_runtime_reliability_markdown(report)
+    assert "runtime_timeout_observed" in markdown
+    assert "latency_threshold" in markdown
+    assert "runtime_timeout_observed_review" in markdown
+
+
 def test_agent_runtime_report_keeps_legacy_orchestrator_summary_compatible():
     legacy_summary = orchestration_summary()
     legacy_summary.pop("queue_state_summary")