diff --git a/docs/portfolio/agent_runtime_reliability_report.md b/docs/portfolio/agent_runtime_reliability_report.md index 6c26cae..7c7b189 100644 --- a/docs/portfolio/agent_runtime_reliability_report.md +++ b/docs/portfolio/agent_runtime_reliability_report.md @@ -70,6 +70,11 @@ runtime operation review: - Optional Runtime result operation evidence through `--runtime-result`, including `runtime_health_snapshot`, `runtime_error_classification`, and `runtime_events`. +- Runtime timeout observation context, including `timeout_policy`, + `timeout_budget_ms`, and `runtime_timeout_observed`. A timeout observation is + treated as Lab `review_required` evidence because it means the configured + latency threshold was breached; it does not claim production request + cancellation. These fields make the report path explicit: @@ -101,6 +106,8 @@ Triggered rules: - `deadline_miss_review` - `queue_backlog_review` - `sustained_overload_review` +- `runtime_timeout_observed_review` when a Runtime result reports a latency + timeout observation threshold breach. ## Boundary diff --git a/inferedgelab/services/agent_runtime_report.py b/inferedgelab/services/agent_runtime_report.py index b193ed2..78350a7 100644 --- a/inferedgelab/services/agent_runtime_report.py +++ b/inferedgelab/services/agent_runtime_report.py @@ -79,6 +79,10 @@ "effect": "review_required", "description": "Sustained queue depth crossed the review threshold.", }, + "runtime_timeout_observed_review": { + "effect": "review_required", + "description": "Runtime result reported a latency timeout observation threshold breach.", + }, "runtime_reliability_pass_note": { "effect": "deployable_with_note", "description": "Runtime reliability evidence stayed within configured thresholds.", @@ -103,6 +107,7 @@ def build_agent_runtime_reliability_report( decision = build_agent_runtime_deployment_decision( metrics=metrics, guard_analysis=guard_analysis, + runtime_result_context=runtime_result_context, thresholds=policy, ) @@ -153,6 +158,7 @@ def build_agent_runtime_deployment_decision( *, metrics: dict[str, Any], guard_analysis: dict[str, Any] | None, + runtime_result_context: dict[str, Any] | None = None, thresholds: dict[str, float] | None = None, ) -> dict[str, Any]: policy = {**DEFAULT_AGENT_RUNTIME_THRESHOLDS, **(thresholds or {})} @@ -208,6 +214,8 @@ def build_agent_runtime_deployment_decision( review_rule="sustained_overload_review", blocked_rule="sustained_overload_block", ) + if _runtime_timeout_observed(runtime_result_context): + triggered_rules.append("runtime_timeout_observed_review") if not triggered_rules: triggered_rules.append("runtime_reliability_pass_note") @@ -493,7 +501,9 @@ def build_agent_runtime_reliability_markdown(report: dict[str, Any]) -> str: f"| backend_key | {runtime_result_context.get('backend_key') or '-'} |", f"| runtime_status | {runtime_health.get('status') or runtime_result_context.get('status') or '-'} |", f"| runtime_error_category | {runtime_error.get('category') or '-'} |", - f"| timeout_observed | {runtime_health.get('timeout_observed', runtime_error.get('timeout_observed', '-'))} |", + f"| timeout_policy | {runtime_health.get('timeout_policy', runtime_error.get('timeout_policy', '-'))} |", + f"| timeout_budget_ms | {_fmt_number(runtime_health.get('timeout_budget_ms', runtime_error.get('timeout_budget_ms')))} |", + f"| runtime_timeout_observed | {runtime_result_context.get('runtime_timeout_observed', False)} |", f"| runtime_event_count | {_fmt_number(runtime_event_summary.get('event_count'))} |", "", "Runtime result event sample:", @@ -617,6 +627,39 @@ def _rule_effect(rule: str) -> str: return AGENT_RUNTIME_POLICY_RULES.get(rule, {}).get("effect", "unknown") +def _runtime_timeout_observed(runtime_result_context: dict[str, Any] | None) -> bool: + if not isinstance(runtime_result_context, dict): + return False + if bool(runtime_result_context.get("runtime_timeout_observed")): + return True + return _runtime_timeout_observed_from_parts( + health=runtime_result_context.get("runtime_health_snapshot"), + error=runtime_result_context.get("runtime_error_classification"), + runtime_events=_dict_list(runtime_result_context.get("runtime_event_sample")), + ) + + +def _runtime_timeout_observed_from_parts( + *, + health: Any, + error: Any, + runtime_events: list[dict[str, Any]], +) -> bool: + if isinstance(health, dict) and bool(health.get("timeout_observed")): + return True + if isinstance(error, dict): + if bool(error.get("timeout_observed")): + return True + if error.get("category") == "runtime_timeout_observed": + return True + for event in runtime_events: + if bool(event.get("timeout_observed")): + return True + if event.get("category") == "runtime_timeout_observed": + return True + return False + + def _policy_log(orchestration_summary: dict[str, Any]) -> list[dict[str, Any]]: value = orchestration_summary.get("policy_decision_log") if not isinstance(value, list): @@ -740,6 +783,7 @@ def _runtime_result_operation_context( "backend_key": None, "status": None, "success": None, + "runtime_timeout_observed": False, "runtime_health_snapshot": {}, "runtime_error_classification": {}, "runtime_event_summary": { @@ -759,6 +803,11 @@ def _runtime_result_operation_context( "backend_key": runtime_result.get("backend_key"), "status": runtime_result.get("status"), "success": runtime_result.get("success"), + "runtime_timeout_observed": _runtime_timeout_observed_from_parts( + health=health, + error=error, + runtime_events=runtime_events, + ), "runtime_health_snapshot": dict(health) if isinstance(health, dict) else {}, "runtime_error_classification": dict(error) if isinstance(error, dict) else {}, "runtime_event_summary": { diff --git a/tests/test_agent_runtime_report.py b/tests/test_agent_runtime_report.py index 30a4bb7..c468c1c 100644 --- a/tests/test_agent_runtime_report.py +++ b/tests/test_agent_runtime_report.py @@ -278,6 +278,112 @@ def runtime_result_with_operation_evidence() -> dict: } +def runtime_result_with_timeout_observed() -> dict: + data = runtime_result_with_operation_evidence() + data["status"] = "completed" + data["success"] = True + data["runtime_health_snapshot"].update( + { + "status": "degraded", + "success": True, + "latency_mean_ms": 12.5, + "latency_p95_ms": 15.0, + "latency_p99_ms": 18.0, + "fps": 80.0, + "timeout_policy": "latency_threshold", + "timeout_budget_ms": 10, + "timeout_observed": True, + } + ) + data["runtime_error_classification"].update( + { + "category": "runtime_timeout_observed", + "message": "mean latency exceeded configured timeout observation threshold", + "timeout_observed": True, + "retryable": True, + } + ) + data["runtime_events"][-1].update( + { + "category": "runtime_timeout_observed", + "timeout_policy": "latency_threshold", + "timeout_observed": True, + } + ) + return data + + +def quiet_orchestration_summary() -> dict: + data = orchestration_summary() + data["agent_runtime_summary"]["totals"] = { + "executed_count": 10, + "dropped_count": 0, + "deadline_missed_count": 0, + "fallback_count": 0, + "policy_decision_count": 0, + "overload_event_count": 0, + } + data["sustained_runtime_summary"].update( + { + "scenario_mode": "normal", + "queue_depth_sample_count": 1, + "latency_sample_count": 1, + "max_total_queue_depth": 0, + } + ) + data["queue_depth_timeline"] = [ + { + "cycle": 1, + "stage": "before_policy", + "queue_depth": { + "vision_agent": 0, + "voice_command_agent": 0, + "safety_monitor_agent": 0, + }, + "total_queue_depth": 0, + } + ] + data["latency_timeline"] = [] + data["policy_decision_log"] = [] + data["queue_state_summary"].update( + { + "max_total_queue_depth": 0, + "average_total_queue_depth": 0.0, + "final_queue_depth": { + "vision_agent": 0, + "voice_command_agent": 0, + "safety_monitor_agent": 0, + }, + "max_queue_depth_by_task": { + "vision_agent": 0, + "voice_command_agent": 0, + "safety_monitor_agent": 0, + }, + "queue_pressure_state": "normal", + } + ) + data["runtime_event_summary"] = { + "schema_version": "inferedge-orchestrator-runtime-event-summary-v1", + "event_count": 0, + "event_type_counts": {}, + } + data["runtime_event_timeline"] = [] + return data + + +def passing_guard_analysis() -> dict: + return { + "schema_version": "inferedge-aiguard-diagnosis-v1", + "status": "pass", + "guard_verdict": "pass", + "severity": "low", + "confidence": 0.96, + "primary_reason": "Runtime reliability guard evidence stayed within thresholds.", + "evidence": [], + "created_at": "2026-05-17T00:00:00Z", + } + + def sustained_guard_analysis() -> dict: data = guard_analysis() data["evidence"].append( @@ -402,6 +508,29 @@ def test_agent_runtime_report_blocks_when_guard_blocks(): } +def test_agent_runtime_report_marks_runtime_timeout_as_review(): + report = build_agent_runtime_reliability_report( + orchestration_summary=quiet_orchestration_summary(), + guard_analysis=passing_guard_analysis(), + runtime_result=runtime_result_with_timeout_observed(), + ) + + decision = report["agent_deployment_decision"] + assert decision["decision"] == "review_required" + assert "runtime_timeout_observed_review" in decision["triggered_rules"] + assert "guard_blocked_runtime_block" not in decision["triggered_rules"] + runtime_context = report["agent_runtime_summary"]["runtime_result_context"] + assert runtime_context["runtime_timeout_observed"] is True + assert runtime_context["runtime_health_snapshot"]["timeout_policy"] == ( + "latency_threshold" + ) + + markdown = build_agent_runtime_reliability_markdown(report) + assert "runtime_timeout_observed" in markdown + assert "latency_threshold" in markdown + assert "runtime_timeout_observed_review" in markdown + + def test_agent_runtime_report_keeps_legacy_orchestrator_summary_compatible(): legacy_summary = orchestration_summary() legacy_summary.pop("queue_state_summary")