Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/portfolio/agent_runtime_reliability_report.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@ runtime operation review:
- Optional Runtime result operation evidence through `--runtime-result`,
including `runtime_health_snapshot`, `runtime_error_classification`, and
`runtime_events`.
- Runtime timeout observation context, including `timeout_policy`,
`timeout_budget_ms`, and `runtime_timeout_observed`. A timeout observation is
treated as Lab `review_required` evidence because it means the configured
latency threshold was breached; it does not claim production request
cancellation.

These fields make the report path explicit:

Expand Down Expand Up @@ -101,6 +106,8 @@ Triggered rules:
- `deadline_miss_review`
- `queue_backlog_review`
- `sustained_overload_review`
- `runtime_timeout_observed_review` when a Runtime result reports a latency
timeout observation threshold breach.

## Boundary

Expand Down
51 changes: 50 additions & 1 deletion inferedgelab/services/agent_runtime_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@
"effect": "review_required",
"description": "Sustained queue depth crossed the review threshold.",
},
"runtime_timeout_observed_review": {
"effect": "review_required",
"description": "Runtime result reported a latency timeout observation threshold breach.",
},
"runtime_reliability_pass_note": {
"effect": "deployable_with_note",
"description": "Runtime reliability evidence stayed within configured thresholds.",
Expand All @@ -103,6 +107,7 @@ def build_agent_runtime_reliability_report(
decision = build_agent_runtime_deployment_decision(
metrics=metrics,
guard_analysis=guard_analysis,
runtime_result_context=runtime_result_context,
thresholds=policy,
)

Expand Down Expand Up @@ -153,6 +158,7 @@ def build_agent_runtime_deployment_decision(
*,
metrics: dict[str, Any],
guard_analysis: dict[str, Any] | None,
runtime_result_context: dict[str, Any] | None = None,
thresholds: dict[str, float] | None = None,
) -> dict[str, Any]:
policy = {**DEFAULT_AGENT_RUNTIME_THRESHOLDS, **(thresholds or {})}
Expand Down Expand Up @@ -208,6 +214,8 @@ def build_agent_runtime_deployment_decision(
review_rule="sustained_overload_review",
blocked_rule="sustained_overload_block",
)
if _runtime_timeout_observed(runtime_result_context):
triggered_rules.append("runtime_timeout_observed_review")

if not triggered_rules:
triggered_rules.append("runtime_reliability_pass_note")
Expand Down Expand Up @@ -493,7 +501,9 @@ def build_agent_runtime_reliability_markdown(report: dict[str, Any]) -> str:
f"| backend_key | {runtime_result_context.get('backend_key') or '-'} |",
f"| runtime_status | {runtime_health.get('status') or runtime_result_context.get('status') or '-'} |",
f"| runtime_error_category | {runtime_error.get('category') or '-'} |",
f"| timeout_observed | {runtime_health.get('timeout_observed', runtime_error.get('timeout_observed', '-'))} |",
f"| timeout_policy | {runtime_health.get('timeout_policy', runtime_error.get('timeout_policy', '-'))} |",
f"| timeout_budget_ms | {_fmt_number(runtime_health.get('timeout_budget_ms', runtime_error.get('timeout_budget_ms')))} |",
f"| runtime_timeout_observed | {runtime_result_context.get('runtime_timeout_observed', False)} |",
f"| runtime_event_count | {_fmt_number(runtime_event_summary.get('event_count'))} |",
"",
"Runtime result event sample:",
Expand Down Expand Up @@ -617,6 +627,39 @@ def _rule_effect(rule: str) -> str:
return AGENT_RUNTIME_POLICY_RULES.get(rule, {}).get("effect", "unknown")


def _runtime_timeout_observed(runtime_result_context: dict[str, Any] | None) -> bool:
if not isinstance(runtime_result_context, dict):
return False
if bool(runtime_result_context.get("runtime_timeout_observed")):
return True
return _runtime_timeout_observed_from_parts(
health=runtime_result_context.get("runtime_health_snapshot"),
error=runtime_result_context.get("runtime_error_classification"),
runtime_events=_dict_list(runtime_result_context.get("runtime_event_sample")),
)


def _runtime_timeout_observed_from_parts(
*,
health: Any,
error: Any,
runtime_events: list[dict[str, Any]],
) -> bool:
if isinstance(health, dict) and bool(health.get("timeout_observed")):
return True
if isinstance(error, dict):
if bool(error.get("timeout_observed")):
return True
if error.get("category") == "runtime_timeout_observed":
return True
for event in runtime_events:
if bool(event.get("timeout_observed")):
return True
if event.get("category") == "runtime_timeout_observed":
return True
return False


def _policy_log(orchestration_summary: dict[str, Any]) -> list[dict[str, Any]]:
value = orchestration_summary.get("policy_decision_log")
if not isinstance(value, list):
Expand Down Expand Up @@ -740,6 +783,7 @@ def _runtime_result_operation_context(
"backend_key": None,
"status": None,
"success": None,
"runtime_timeout_observed": False,
"runtime_health_snapshot": {},
"runtime_error_classification": {},
"runtime_event_summary": {
Expand All @@ -759,6 +803,11 @@ def _runtime_result_operation_context(
"backend_key": runtime_result.get("backend_key"),
"status": runtime_result.get("status"),
"success": runtime_result.get("success"),
"runtime_timeout_observed": _runtime_timeout_observed_from_parts(
health=health,
error=error,
runtime_events=runtime_events,
),
"runtime_health_snapshot": dict(health) if isinstance(health, dict) else {},
"runtime_error_classification": dict(error) if isinstance(error, dict) else {},
"runtime_event_summary": {
Expand Down
129 changes: 129 additions & 0 deletions tests/test_agent_runtime_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,112 @@ def runtime_result_with_operation_evidence() -> dict:
}


def runtime_result_with_timeout_observed() -> dict:
data = runtime_result_with_operation_evidence()
data["status"] = "completed"
data["success"] = True
data["runtime_health_snapshot"].update(
{
"status": "degraded",
"success": True,
"latency_mean_ms": 12.5,
"latency_p95_ms": 15.0,
"latency_p99_ms": 18.0,
"fps": 80.0,
"timeout_policy": "latency_threshold",
"timeout_budget_ms": 10,
"timeout_observed": True,
}
)
data["runtime_error_classification"].update(
{
"category": "runtime_timeout_observed",
"message": "mean latency exceeded configured timeout observation threshold",
"timeout_observed": True,
"retryable": True,
}
)
data["runtime_events"][-1].update(
{
"category": "runtime_timeout_observed",
"timeout_policy": "latency_threshold",
"timeout_observed": True,
}
)
return data


def quiet_orchestration_summary() -> dict:
data = orchestration_summary()
data["agent_runtime_summary"]["totals"] = {
"executed_count": 10,
"dropped_count": 0,
"deadline_missed_count": 0,
"fallback_count": 0,
"policy_decision_count": 0,
"overload_event_count": 0,
}
data["sustained_runtime_summary"].update(
{
"scenario_mode": "normal",
"queue_depth_sample_count": 1,
"latency_sample_count": 1,
"max_total_queue_depth": 0,
}
)
data["queue_depth_timeline"] = [
{
"cycle": 1,
"stage": "before_policy",
"queue_depth": {
"vision_agent": 0,
"voice_command_agent": 0,
"safety_monitor_agent": 0,
},
"total_queue_depth": 0,
}
]
data["latency_timeline"] = []
data["policy_decision_log"] = []
data["queue_state_summary"].update(
{
"max_total_queue_depth": 0,
"average_total_queue_depth": 0.0,
"final_queue_depth": {
"vision_agent": 0,
"voice_command_agent": 0,
"safety_monitor_agent": 0,
},
"max_queue_depth_by_task": {
"vision_agent": 0,
"voice_command_agent": 0,
"safety_monitor_agent": 0,
},
"queue_pressure_state": "normal",
}
)
data["runtime_event_summary"] = {
"schema_version": "inferedge-orchestrator-runtime-event-summary-v1",
"event_count": 0,
"event_type_counts": {},
}
data["runtime_event_timeline"] = []
return data


def passing_guard_analysis() -> dict:
return {
"schema_version": "inferedge-aiguard-diagnosis-v1",
"status": "pass",
"guard_verdict": "pass",
"severity": "low",
"confidence": 0.96,
"primary_reason": "Runtime reliability guard evidence stayed within thresholds.",
"evidence": [],
"created_at": "2026-05-17T00:00:00Z",
}


def sustained_guard_analysis() -> dict:
data = guard_analysis()
data["evidence"].append(
Expand Down Expand Up @@ -402,6 +508,29 @@ def test_agent_runtime_report_blocks_when_guard_blocks():
}


def test_agent_runtime_report_marks_runtime_timeout_as_review():
report = build_agent_runtime_reliability_report(
orchestration_summary=quiet_orchestration_summary(),
guard_analysis=passing_guard_analysis(),
runtime_result=runtime_result_with_timeout_observed(),
)

decision = report["agent_deployment_decision"]
assert decision["decision"] == "review_required"
assert "runtime_timeout_observed_review" in decision["triggered_rules"]
assert "guard_blocked_runtime_block" not in decision["triggered_rules"]
runtime_context = report["agent_runtime_summary"]["runtime_result_context"]
assert runtime_context["runtime_timeout_observed"] is True
assert runtime_context["runtime_health_snapshot"]["timeout_policy"] == (
"latency_threshold"
)

markdown = build_agent_runtime_reliability_markdown(report)
assert "runtime_timeout_observed" in markdown
assert "latency_threshold" in markdown
assert "runtime_timeout_observed_review" in markdown


def test_agent_runtime_report_keeps_legacy_orchestrator_summary_compatible():
legacy_summary = orchestration_summary()
legacy_summary.pop("queue_state_summary")
Expand Down
Loading