Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ The Lab decision surface now also exposes `policy_version`, `triggered_rules`, a
`agent-runtime-report` is an additive reliable edge agent runtime report path.
It bundles Orchestrator scheduling evidence and AIGuard runtime reliability `guard_analysis` into a Lab-owned agent deployment decision context without changing existing Runtime result or compare contracts.
The current bundled evidence is a synthetic/dummy sustained high-load 3-agent scenario.
The report preserves sustained queue-depth, worker health, runtime event summary/timeline, policy decision reason, and `sustained_overload_risk` evidence as local-first deployment review context.
The report preserves sustained queue-depth, worker health, Runtime result health/error/event evidence, runtime event summary/timeline, policy decision reason, and `sustained_overload_risk` evidence as local-first deployment review context.
When a Runtime result JSON with `runtime_health_snapshot` / `runtime_events` is available, add `--runtime-result <path>` to include Runtime-side operation context in the same Lab report.

![InferEdge Local Studio demo evidence](assets/images/local-studio-demo-evidence.png)

Expand Down
7 changes: 6 additions & 1 deletion docs/portfolio/agent_runtime_reliability_report.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,16 @@ runtime operation review:
- `runtime_event_summary` for event type counts.
- `runtime_event_timeline` sample rows for queue snapshots, policy decisions,
drops, and execution outcomes.
- Optional Runtime result operation evidence through `--runtime-result`,
including `runtime_health_snapshot`, `runtime_error_classification`, and
`runtime_events`.

These fields make the report path explicit:

```text
Orchestrator operation evidence -> AIGuard reliability explanation -> Lab-owned deployment risk context
Runtime result operation evidence + Orchestrator operation evidence
-> AIGuard reliability explanation
-> Lab-owned deployment risk context
```

## Lab Decision Context
Expand Down
11 changes: 11 additions & 0 deletions inferedgelab/commands/agent_runtime_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,18 @@ def agent_runtime_report_cmd(
"--guard-analysis",
help="Optional AIGuard runtime reliability guard_analysis JSON",
),
runtime_result: str = typer.Option(
"",
"--runtime-result",
help="Optional InferEdge-Runtime result JSON with runtime_health_snapshot/runtime_events",
),
format: str = typer.Option("text", "--format", "-f", help="text/json/markdown"),
output: str = typer.Option("", "--output", "-o", help="Optional output path"),
) -> None:
report = load_agent_runtime_reliability_bundle(
orchestration_summary_path=orchestration_summary,
guard_analysis_path=guard_analysis or None,
runtime_result_path=runtime_result or None,
)
normalized_format = format.strip().lower()
if normalized_format == "json":
Expand All @@ -53,6 +59,9 @@ def _text_summary(report: dict) -> str:
metrics = report["agent_runtime_summary"]["metrics"]
decision = report["agent_deployment_decision"]
guard = report["guard_summary"]
runtime_context = report["agent_runtime_summary"].get("runtime_result_context") or {}
health = runtime_context.get("runtime_health_snapshot") or {}
error = runtime_context.get("runtime_error_classification") or {}
lines = [
"InferEdge Agent Runtime Reliability Report",
f"schema_version: {report['schema_version']}",
Expand All @@ -63,6 +72,8 @@ def _text_summary(report: dict) -> str:
f"drop_rate: {metrics['drop_rate']:.6g}",
f"fallback_rate: {metrics['fallback_rate']:.6g}",
f"deadline_miss_rate: {metrics['deadline_miss_rate']:.6g}",
f"runtime_health_status: {health.get('status')}",
f"runtime_error_category: {error.get('category')}",
"triggered_rules:",
]
lines.extend(f"- {rule}" for rule in decision["triggered_rules"])
Expand Down
95 changes: 95 additions & 0 deletions inferedgelab/services/agent_runtime_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def build_agent_runtime_reliability_report(
*,
orchestration_summary: dict[str, Any],
guard_analysis: dict[str, Any] | None = None,
runtime_result: dict[str, Any] | None = None,
source: dict[str, Any] | None = None,
thresholds: dict[str, float] | None = None,
) -> dict[str, Any]:
Expand All @@ -98,6 +99,7 @@ def build_agent_runtime_reliability_report(
policy = {**DEFAULT_AGENT_RUNTIME_THRESHOLDS, **(thresholds or {})}
metrics = compute_agent_runtime_metrics(orchestration_summary)
runtime_summary = _agent_runtime_summary(orchestration_summary)
runtime_result_context = _runtime_result_operation_context(runtime_result)
decision = build_agent_runtime_deployment_decision(
metrics=metrics,
guard_analysis=guard_analysis,
Expand All @@ -119,6 +121,11 @@ def build_agent_runtime_reliability_report(
if isinstance(guard_analysis, dict)
else None
),
"runtime_result": (
runtime_result.get("schema_version")
if isinstance(runtime_result, dict)
else None
),
"source_contracts": runtime_summary.get("source_contracts", {}),
},
"agent_runtime_summary": {
Expand All @@ -127,6 +134,7 @@ def build_agent_runtime_reliability_report(
"metrics": metrics,
"timeline_summary": _timeline_summary(orchestration_summary, metrics),
"operation_context": _operation_context(orchestration_summary, metrics),
"runtime_result_context": runtime_result_context,
"policy_decision_reasons": metrics["policy_decision_reasons"],
"policy_decision_log_count": len(_policy_log(orchestration_summary)),
},
Expand Down Expand Up @@ -332,17 +340,23 @@ def load_agent_runtime_reliability_bundle(
*,
orchestration_summary_path: str | Path,
guard_analysis_path: str | Path | None = None,
runtime_result_path: str | Path | None = None,
) -> dict[str, Any]:
orchestration_summary = _load_json_dict(orchestration_summary_path)
guard_analysis = _load_json_dict(guard_analysis_path) if guard_analysis_path else None
runtime_result = _load_json_dict(runtime_result_path) if runtime_result_path else None
return build_agent_runtime_reliability_report(
orchestration_summary=orchestration_summary,
guard_analysis=guard_analysis,
runtime_result=runtime_result,
source={
"orchestration_summary_path": str(orchestration_summary_path),
"guard_analysis_path": str(guard_analysis_path)
if guard_analysis_path
else None,
"runtime_result_path": str(runtime_result_path)
if runtime_result_path
else None,
},
)

Expand All @@ -352,6 +366,10 @@ def build_agent_runtime_reliability_markdown(report: dict[str, Any]) -> str:
metrics = runtime["metrics"]
decision = report["agent_deployment_decision"]
guard = report["guard_summary"]
runtime_result_context = runtime.get("runtime_result_context") or {}
runtime_health = runtime_result_context.get("runtime_health_snapshot") or {}
runtime_error = runtime_result_context.get("runtime_error_classification") or {}
runtime_event_summary = runtime_result_context.get("runtime_event_summary") or {}

lines = [
"# InferEdge Agent Runtime Reliability Report",
Expand Down Expand Up @@ -466,6 +484,33 @@ def build_agent_runtime_reliability_markdown(report: dict[str, Any]) -> str:
]
],
"",
"## Runtime Result Operation Evidence",
"",
"| Field | Value |",
"|---|---|",
f"| runtime_result_schema | {runtime_result_context.get('source_schema_version') or '-'} |",
f"| compare_key | {runtime_result_context.get('compare_key') or '-'} |",
f"| backend_key | {runtime_result_context.get('backend_key') or '-'} |",
f"| runtime_status | {runtime_health.get('status') or runtime_result_context.get('status') or '-'} |",
f"| runtime_error_category | {runtime_error.get('category') or '-'} |",
f"| timeout_observed | {runtime_health.get('timeout_observed', runtime_error.get('timeout_observed', '-'))} |",
f"| runtime_event_count | {_fmt_number(runtime_event_summary.get('event_count'))} |",
"",
"Runtime result event sample:",
"",
"| # | Type | Status | Detail |",
"|---:|---|---|---|",
*[
"| "
f"{index} | "
f"{event.get('type') or event.get('event_type') or '-'} | "
f"{event.get('status') or '-'} | "
f"{event.get('category') or event.get('reason') or event.get('engine_backend') or '-'} |"
for index, event in enumerate(
runtime_result_context.get("runtime_event_sample") or []
)
],
"",
"## AIGuard Runtime Reliability Evidence",
"",
f"- guard_status: `{guard.get('status')}`",
Expand Down Expand Up @@ -685,6 +730,46 @@ def _operation_context(
}


def _runtime_result_operation_context(
runtime_result: dict[str, Any] | None,
) -> dict[str, Any]:
if not isinstance(runtime_result, dict):
return {
"source_schema_version": None,
"compare_key": None,
"backend_key": None,
"status": None,
"success": None,
"runtime_health_snapshot": {},
"runtime_error_classification": {},
"runtime_event_summary": {
"schema_version": None,
"event_count": 0,
"event_type_counts": {},
},
"runtime_event_sample": [],
}

health = runtime_result.get("runtime_health_snapshot")
error = runtime_result.get("runtime_error_classification")
runtime_events = _dict_list(runtime_result.get("runtime_events"))
return {
"source_schema_version": runtime_result.get("schema_version"),
"compare_key": runtime_result.get("compare_key"),
"backend_key": runtime_result.get("backend_key"),
"status": runtime_result.get("status"),
"success": runtime_result.get("success"),
"runtime_health_snapshot": dict(health) if isinstance(health, dict) else {},
"runtime_error_classification": dict(error) if isinstance(error, dict) else {},
"runtime_event_summary": {
"schema_version": "inferedgelab-runtime-result-event-summary-v1",
"event_count": len(runtime_events),
"event_type_counts": _runtime_result_event_type_counts(runtime_events),
},
"runtime_event_sample": runtime_events[:8],
}


def _queue_state_summary(orchestration_summary: dict[str, Any]) -> dict[str, Any]:
value = orchestration_summary.get("queue_state_summary")
if isinstance(value, dict):
Expand Down Expand Up @@ -749,6 +834,16 @@ def _runtime_event_type_counts(runtime_events: list[dict[str, Any]]) -> dict[str
return counts


def _runtime_result_event_type_counts(runtime_events: list[dict[str, Any]]) -> dict[str, int]:
counts: dict[str, int] = {}
for event in runtime_events:
event_type = event.get("type") or event.get("event_type")
if not isinstance(event_type, str) or not event_type:
event_type = "unknown"
counts[event_type] = counts.get(event_type, 0) + 1
return counts


def _worker_health_counts(worker_health_snapshot: dict[str, Any]) -> dict[str, int]:
workers = worker_health_snapshot.get("workers")
if not isinstance(workers, dict):
Expand Down
86 changes: 85 additions & 1 deletion tests/test_agent_runtime_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,66 @@ def guard_analysis() -> dict:
}


def runtime_result_with_operation_evidence() -> dict:
return {
"schema_version": "inferedge-runtime-result-v1",
"compare_key": "yolov8n__b1__h224w224__fp32",
"backend_key": "onnxruntime__cpu",
"status": "skipped",
"success": False,
"runtime_health_snapshot": {
"schema_version": "inferedge-runtime-health-v1",
"status": "degraded",
"engine_backend": "onnxruntime",
"device": "cpu",
"input_mode": "dummy",
"input_preprocess": "synthetic",
"warmup": 1,
"runs": 1,
"run_once": False,
"success": False,
"latency_mean_ms": 0.0,
"latency_p95_ms": 0.0,
"latency_p99_ms": 0.0,
"fps": 0.0,
"power_mode": "unknown",
"jetson_clocks": "unknown",
"timeout_policy": "not_configured",
"timeout_observed": False,
},
"runtime_error_classification": {
"schema_version": "inferedge-runtime-error-v1",
"status": "classified",
"category": "runtime_execution_skipped",
"message": "backend is not available in this build",
"timeout_observed": False,
"retryable": False,
},
"runtime_events": [
{
"type": "runtime_configured",
"status": "ok",
"engine_backend": "onnxruntime",
"device": "cpu",
"input_mode": "dummy",
},
{
"type": "benchmark_completed",
"status": "skipped",
"success": False,
"warmup": 1,
"runs": 1,
"mean_ms": 0.0,
},
{
"type": "runtime_error_classified",
"status": "classified",
"category": "runtime_execution_skipped",
},
],
}


def sustained_guard_analysis() -> dict:
data = guard_analysis()
data["evidence"].append(
Expand Down Expand Up @@ -285,6 +345,7 @@ def test_agent_runtime_report_blocks_when_guard_blocks():
report = build_agent_runtime_reliability_report(
orchestration_summary=orchestration_summary(),
guard_analysis=sustained_guard_analysis(),
runtime_result=runtime_result_with_operation_evidence(),
)

decision = report["agent_deployment_decision"]
Expand All @@ -295,6 +356,7 @@ def test_agent_runtime_report_blocks_when_guard_blocks():
assert report["contracts"]["aiguard_guard_analysis"] == (
"inferedge-aiguard-diagnosis-v1"
)
assert report["contracts"]["runtime_result"] == "inferedge-runtime-result-v1"
assert decision["policy_version"] == AGENT_RUNTIME_POLICY_VERSION
assert decision["decision"] == "blocked"
assert "guard_blocked_runtime_block" in decision["triggered_rules"]
Expand Down Expand Up @@ -326,6 +388,18 @@ def test_agent_runtime_report_blocks_when_guard_blocks():
assert operation_context["runtime_event_timeline_sample"][1]["event_type"] == (
"policy_decision"
)
runtime_context = report["agent_runtime_summary"]["runtime_result_context"]
assert runtime_context["source_schema_version"] == "inferedge-runtime-result-v1"
assert runtime_context["runtime_health_snapshot"]["status"] == "degraded"
assert runtime_context["runtime_error_classification"]["category"] == (
"runtime_execution_skipped"
)
assert runtime_context["runtime_event_summary"]["event_count"] == 3
assert runtime_context["runtime_event_summary"]["event_type_counts"] == {
"runtime_configured": 1,
"benchmark_completed": 1,
"runtime_error_classified": 1,
}


def test_agent_runtime_report_keeps_legacy_orchestrator_summary_compatible():
Expand All @@ -352,6 +426,7 @@ def test_agent_runtime_report_markdown_contains_sections():
report = build_agent_runtime_reliability_report(
orchestration_summary=orchestration_summary(),
guard_analysis=sustained_guard_analysis(),
runtime_result=runtime_result_with_operation_evidence(),
)
markdown = build_agent_runtime_reliability_markdown(report)

Expand All @@ -362,6 +437,8 @@ def test_agent_runtime_report_markdown_contains_sections():
assert "Queue State" in markdown
assert "Worker Health" in markdown
assert "Runtime Event Summary" in markdown
assert "Runtime Result Operation Evidence" in markdown
assert "runtime_execution_skipped" in markdown
assert "queue_pressure_state" in markdown
assert "policy_decision" in markdown
assert "AIGuard Runtime Reliability Evidence" in markdown
Expand All @@ -383,10 +460,15 @@ def test_agent_runtime_report_loads_committed_fixtures():
assert len(report["agent_runtime_summary"]["agents"]) == 3


def test_agent_runtime_report_command_outputs_json(capsys):
def test_agent_runtime_report_command_outputs_json(tmp_path, capsys):
runtime_result_path = tmp_path / "runtime_operation_result.json"
with runtime_result_path.open("w", encoding="utf-8") as file:
json.dump(runtime_result_with_operation_evidence(), file)

agent_runtime_report_cmd(
orchestration_summary="examples/agent_runtime/agent_3_orchestration_summary.json",
guard_analysis="examples/agent_runtime/aiguard_runtime_guard_analysis.json",
runtime_result=str(runtime_result_path),
format="json",
output="",
)
Expand All @@ -395,3 +477,5 @@ def test_agent_runtime_report_command_outputs_json(capsys):

assert report["schema_version"] == AGENT_RUNTIME_REPORT_SCHEMA_VERSION
assert report["agent_deployment_decision"]["decision"] == "blocked"
runtime_context = report["agent_runtime_summary"]["runtime_result_context"]
assert runtime_context["runtime_health_snapshot"]["status"] == "degraded"
Loading