Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion docs/portfolio/agent_runtime_reliability_report.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ runtime operation review:
treated as Lab `review_required` evidence because it means the configured
latency threshold was breached; it does not claim production request
cancellation.
- AIGuard Runtime operation evidence, including
`runtime_backend_unavailable`, `runtime_latency_budget_overrun`,
`runtime_error_classification`, and
`runtime_thermal_memory_evidence_missing` when Runtime health/error/event
fields are analyzed by AIGuard.

These fields make the report path explicit:

Expand Down Expand Up @@ -117,14 +122,20 @@ Triggered rules:
- `sustained_overload_review`
- `runtime_timeout_observed_review` when a Runtime result reports a latency
timeout observation threshold breach.
- `runtime_operation_guard_block` when AIGuard reports failed high-severity
Runtime operation evidence such as backend unavailable or latency budget
overrun.
- `runtime_operation_guard_review` when AIGuard reports warning-level Runtime
operation evidence such as missing Jetson thermal/memory context.

## Boundary

- Orchestrator records scheduling and policy evidence.
- Orchestrator operation-health fields are displayed as local runtime evidence.
- Orchestrator remote dispatch result fields are displayed as plan-only worker
selection evidence when provided.
- AIGuard explains runtime reliability risk.
- AIGuard explains runtime reliability risk, including additive Runtime
health/error/event warning evidence when provided.
- Lab remains the final deployment decision owner.
- This report is an additive agent-runtime path and does not change existing
Runtime result, compare output, or classic deployment decision contracts.
134 changes: 134 additions & 0 deletions inferedgelab/services/agent_runtime_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@
AIGUARD_DIAGNOSIS_SCHEMA_VERSION = "inferedge-aiguard-diagnosis-v1"
REMOTE_DISPATCH_SCHEMA_VERSION = "inferedge-remote-dispatch-result-v1"

RUNTIME_OPERATION_GUARD_EVIDENCE_TYPES = {
"runtime_backend_unavailable",
"runtime_latency_budget_overrun",
"runtime_error_classification",
"runtime_thermal_memory_evidence_missing",
"runtime_operation_health",
}

DEFAULT_AGENT_RUNTIME_THRESHOLDS = {
"deadline_miss_rate_review": 0.05,
"deadline_miss_rate_blocked": 0.20,
Expand Down Expand Up @@ -84,6 +92,14 @@
"effect": "review_required",
"description": "Runtime result reported a latency timeout observation threshold breach.",
},
"runtime_operation_guard_block": {
"effect": "blocked",
"description": "AIGuard Runtime operation evidence reported failed backend, latency, or error-classification risk.",
},
"runtime_operation_guard_review": {
"effect": "review_required",
"description": "AIGuard Runtime operation evidence reported warning-level runtime context risk.",
},
"runtime_reliability_pass_note": {
"effect": "deployable_with_note",
"description": "Runtime reliability evidence stayed within configured thresholds.",
Expand All @@ -107,10 +123,12 @@ def build_agent_runtime_reliability_report(
runtime_summary = _agent_runtime_summary(orchestration_summary)
runtime_result_context = _runtime_result_operation_context(runtime_result)
remote_dispatch_context = _remote_dispatch_context(remote_dispatch)
runtime_operation_guard_summary = _runtime_operation_guard_summary(guard_analysis)
decision = build_agent_runtime_deployment_decision(
metrics=metrics,
guard_analysis=guard_analysis,
runtime_result_context=runtime_result_context,
runtime_operation_guard_summary=runtime_operation_guard_summary,
thresholds=policy,
)

Expand Down Expand Up @@ -153,6 +171,7 @@ def build_agent_runtime_reliability_report(
"policy_decision_log_count": len(_policy_log(orchestration_summary)),
},
"guard_summary": _guard_summary(guard_analysis),
"runtime_operation_guard_summary": runtime_operation_guard_summary,
"runtime_reliability_evidence": _runtime_reliability_evidence(guard_analysis),
"agent_deployment_decision": decision,
"notes": [
Expand All @@ -168,6 +187,7 @@ def build_agent_runtime_deployment_decision(
metrics: dict[str, Any],
guard_analysis: dict[str, Any] | None,
runtime_result_context: dict[str, Any] | None = None,
runtime_operation_guard_summary: dict[str, Any] | None = None,
thresholds: dict[str, float] | None = None,
) -> dict[str, Any]:
policy = {**DEFAULT_AGENT_RUNTIME_THRESHOLDS, **(thresholds or {})}
Expand Down Expand Up @@ -225,6 +245,13 @@ def build_agent_runtime_deployment_decision(
)
if _runtime_timeout_observed(runtime_result_context):
triggered_rules.append("runtime_timeout_observed_review")
runtime_guard = runtime_operation_guard_summary
if runtime_guard is None:
runtime_guard = _runtime_operation_guard_summary(guard_analysis)
if _runtime_operation_guard_blocking(runtime_guard):
triggered_rules.append("runtime_operation_guard_block")
elif _runtime_operation_guard_review(runtime_guard):
triggered_rules.append("runtime_operation_guard_review")

if not triggered_rules:
triggered_rules.append("runtime_reliability_pass_note")
Expand Down Expand Up @@ -391,6 +418,7 @@ def build_agent_runtime_reliability_markdown(report: dict[str, Any]) -> str:
metrics = runtime["metrics"]
decision = report["agent_deployment_decision"]
guard = report["guard_summary"]
runtime_guard = report.get("runtime_operation_guard_summary") or {}
runtime_result_context = runtime.get("runtime_result_context") or {}
remote_dispatch_context = runtime.get("remote_dispatch_context") or {}
runtime_health = runtime_result_context.get("runtime_health_snapshot") or {}
Expand Down Expand Up @@ -528,6 +556,31 @@ def build_agent_runtime_reliability_markdown(report: dict[str, Any]) -> str:
f"| runtime_timeout_observed | {runtime_result_context.get('runtime_timeout_observed', False)} |",
f"| runtime_event_count | {_fmt_number(runtime_event_summary.get('event_count'))} |",
"",
"## AIGuard Runtime Operation Evidence",
"",
"| Field | Value |",
"|---|---|",
f"| evidence_count | {_fmt_number(runtime_guard.get('evidence_count'))} |",
f"| failed_count | {_fmt_number(runtime_guard.get('failed_count'))} |",
f"| warning_count | {_fmt_number(runtime_guard.get('warning_count'))} |",
f"| evidence_types | {', '.join(runtime_guard.get('evidence_types') or []) or '-'} |",
f"| retry_hints | {', '.join(runtime_guard.get('retry_hints') or []) or '-'} |",
"",
"Runtime operation guard evidence:",
"",
"| Type | Metric | Observed | Severity | Status | Recommendation |",
"|---|---|---:|---|---|---|",
*[
"| "
f"{item.get('type') or '-'} | "
f"{item.get('metric_name') or '-'} | "
f"{_fmt_number(item.get('observed_value'))} | "
f"{item.get('severity') or '-'} | "
f"{item.get('status') or '-'} | "
f"{item.get('recommendation') or '-'} |"
for item in runtime_guard.get("evidence", [])
],
"",
"Runtime result event sample:",
"",
"| # | Type | Status | Detail |",
Expand Down Expand Up @@ -788,6 +841,87 @@ def _runtime_reliability_evidence(
]


def _runtime_operation_guard_summary(
guard_analysis: dict[str, Any] | None,
) -> dict[str, Any]:
evidence = [
item
for item in guard_evidence_items(guard_analysis)
if isinstance(item, dict)
and item.get("type") in RUNTIME_OPERATION_GUARD_EVIDENCE_TYPES
]
failed = [item for item in evidence if item.get("status") == "failed"]
warnings = [item for item in evidence if item.get("status") == "warning"]
retry_hints = sorted(
{
retry_hint
for item in evidence
for retry_hint in [_runtime_operation_retry_hint(item)]
if isinstance(retry_hint, str) and retry_hint
}
)
return {
"evidence_count": len(evidence),
"failed_count": len(failed),
"warning_count": len(warnings),
"evidence_types": [
item.get("type") for item in evidence if isinstance(item.get("type"), str)
],
"metric_names": [
item.get("metric_name")
for item in evidence
if isinstance(item.get("metric_name"), str)
],
"retry_hints": retry_hints,
"evidence": [
{
"type": item.get("type"),
"metric_name": item.get("metric_name"),
"observed_value": item.get("observed_value"),
"threshold": item.get("threshold"),
"severity": item.get("severity"),
"status": item.get("status"),
"explanation": item.get("explanation"),
"recommendation": item.get("recommendation"),
"why_it_matters": item.get("why_it_matters"),
"retry_hint": _runtime_operation_retry_hint(item),
}
for item in evidence
],
}


def _runtime_operation_guard_blocking(summary: dict[str, Any]) -> bool:
for item in _dict_list(summary.get("evidence")):
if item.get("status") != "failed":
continue
if item.get("severity") in {"high", "critical"}:
return True
if item.get("type") in {
"runtime_backend_unavailable",
"runtime_latency_budget_overrun",
}:
return True
return False


def _runtime_operation_guard_review(summary: dict[str, Any]) -> bool:
if _runtime_operation_guard_blocking(summary):
return False
return bool(summary.get("failed_count") or summary.get("warning_count"))


def _runtime_operation_retry_hint(evidence_item: dict[str, Any]) -> str | None:
raw_context = evidence_item.get("raw_context")
if not isinstance(raw_context, dict):
return None
runtime_operation = raw_context.get("runtime_operation")
if not isinstance(runtime_operation, dict):
return None
retry_hint = runtime_operation.get("retry_hint")
return retry_hint if isinstance(retry_hint, str) and retry_hint else None


def _timeline_summary(
orchestration_summary: dict[str, Any],
metrics: dict[str, Any],
Expand Down
101 changes: 96 additions & 5 deletions tests/test_agent_runtime_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,78 @@ def guard_analysis() -> dict:
}


def runtime_operation_guard_analysis() -> dict:
data = guard_analysis()
data["primary_reason"] = (
"runtime_error_severity indicates runtime reliability risk."
)
data["evidence"].extend(
[
{
"type": "runtime_backend_unavailable",
"metric_name": "engine_available",
"observed_value": 0,
"baseline_value": None,
"threshold": 1,
"delta": None,
"delta_pct": None,
"increase_factor": None,
"severity": "high",
"status": "failed",
"explanation": "Runtime could not confirm backend availability.",
"why_it_matters": (
"Runtime backend availability is required before using the "
"result as deployment evidence."
),
"suspected_causes": ["backend_runtime_unavailable"],
"recommendation": "Check backend installation and engine load logs.",
"raw_context": {
"runtime_operation": {
"engine_available": False,
"retry_hint": "check_backend_availability",
}
},
},
{
"type": "runtime_latency_budget_overrun",
"metric_name": "latency_budget_exceeded",
"observed_value": 1,
"baseline_value": None,
"threshold": 50.0,
"delta": 22.5,
"delta_pct": 0.45,
"increase_factor": None,
"severity": "high",
"status": "failed",
"explanation": "Runtime latency exceeded the configured budget.",
"why_it_matters": (
"Latency budget overrun means the runtime result did not "
"satisfy the expected timing contract."
),
"suspected_causes": ["runtime_latency_spike"],
"recommendation": "Review runtime event log and fallback policy.",
"raw_context": {
"runtime_operation": {
"latency_budget_ms": 50.0,
"observed_mean_ms": 72.5,
}
},
},
]
)
data["suspected_causes"] = [
"queue_backlog",
"backend_runtime_unavailable",
"runtime_latency_spike",
]
data["recommendations"] = [
"Tune scheduling policy.",
"Check backend installation and engine load logs.",
"Review runtime event log and fallback policy.",
]
return data


def runtime_result_with_operation_evidence() -> dict:
return {
"schema_version": "inferedge-runtime-result-v1",
Expand Down Expand Up @@ -512,7 +584,7 @@ def test_compute_agent_runtime_metrics_from_orchestrator_summary():
def test_agent_runtime_report_blocks_when_guard_blocks():
report = build_agent_runtime_reliability_report(
orchestration_summary=orchestration_summary(),
guard_analysis=sustained_guard_analysis(),
guard_analysis=runtime_operation_guard_analysis(),
runtime_result=runtime_result_with_operation_evidence(),
remote_dispatch=remote_dispatch_result(),
)
Expand All @@ -534,8 +606,19 @@ def test_agent_runtime_report_blocks_when_guard_blocks():
assert "guard_blocked_runtime_block" in decision["triggered_rules"]
assert "drop_rate_block" in decision["triggered_rules"]
assert "sustained_overload_review" in decision["triggered_rules"]
assert "runtime_operation_guard_block" in decision["triggered_rules"]
assert report["guard_summary"]["guard_verdict"] == "blocked"
assert "sustained_overload_risk" in report["guard_summary"]["evidence_types"]
assert "runtime_backend_unavailable" in report["guard_summary"]["evidence_types"]
runtime_guard = report["runtime_operation_guard_summary"]
assert runtime_guard["evidence_count"] == 2
assert runtime_guard["failed_count"] == 2
assert runtime_guard["retry_hints"] == ["check_backend_availability"]
assert {
item["type"] for item in runtime_guard["evidence"]
} == {
"runtime_backend_unavailable",
"runtime_latency_budget_overrun",
}
assert report["agent_runtime_summary"]["timeline_summary"] == {
"scenario_mode": "sustained_high_load",
"queue_depth_sample_count": 1,
Expand All @@ -548,7 +631,11 @@ def test_agent_runtime_report_blocks_when_guard_blocks():
}
assert {
item["type"] for item in report["runtime_reliability_evidence"]
} == {"excessive_drop_rate", "sustained_overload_risk"}
} == {
"excessive_drop_rate",
"runtime_backend_unavailable",
"runtime_latency_budget_overrun",
}
operation_context = report["agent_runtime_summary"]["operation_context"]
assert operation_context["queue_state_summary"]["queue_pressure_state"] == "overloaded"
assert operation_context["worker_health_counts"] == {
Expand Down Expand Up @@ -630,7 +717,7 @@ def test_agent_runtime_report_keeps_legacy_orchestrator_summary_compatible():
def test_agent_runtime_report_markdown_contains_sections():
report = build_agent_runtime_reliability_report(
orchestration_summary=orchestration_summary(),
guard_analysis=sustained_guard_analysis(),
guard_analysis=runtime_operation_guard_analysis(),
runtime_result=runtime_result_with_operation_evidence(),
remote_dispatch=remote_dispatch_result(),
)
Expand All @@ -644,6 +731,10 @@ def test_agent_runtime_report_markdown_contains_sections():
assert "Worker Health" in markdown
assert "Runtime Event Summary" in markdown
assert "Runtime Result Operation Evidence" in markdown
assert "AIGuard Runtime Operation Evidence" in markdown
assert "runtime_backend_unavailable" in markdown
assert "runtime_latency_budget_overrun" in markdown
assert "check_backend_availability" in markdown
assert "Remote Dispatch Context" in markdown
assert "jetson-nano-01" in markdown
assert "plan_only" in markdown
Expand All @@ -655,7 +746,7 @@ def test_agent_runtime_report_markdown_contains_sections():
assert "AIGuard Runtime Reliability Evidence" in markdown
assert "Lab Agent Deployment Decision" in markdown
assert "guard_blocked_runtime_block" in markdown
assert "sustained_overload_risk" in markdown
assert "runtime_operation_guard_block" in markdown
assert "max_total_queue_depth" in markdown
assert "not a production cloud orchestration dashboard" in markdown

Expand Down
Loading