diff --git a/README.md b/README.md index 2824cb5..c497b57 100644 --- a/README.md +++ b/README.md @@ -128,7 +128,8 @@ The Lab decision surface now also exposes `policy_version`, `triggered_rules`, a `agent-runtime-report` is an additive reliable edge agent runtime report path. It bundles Orchestrator scheduling evidence and AIGuard runtime reliability `guard_analysis` into a Lab-owned agent deployment decision context without changing existing Runtime result or compare contracts. The current bundled evidence is a synthetic/dummy sustained high-load 3-agent scenario. -The report preserves sustained queue-depth, worker health, runtime event summary/timeline, policy decision reason, and `sustained_overload_risk` evidence as local-first deployment review context. +The report preserves sustained queue-depth, worker health, Runtime result health/error/event evidence, runtime event summary/timeline, policy decision reason, and `sustained_overload_risk` evidence as local-first deployment review context. +When a Runtime result JSON with `runtime_health_snapshot` / `runtime_events` is available, add `--runtime-result ` to include Runtime-side operation context in the same Lab report. ![InferEdge Local Studio demo evidence](assets/images/local-studio-demo-evidence.png) diff --git a/docs/portfolio/agent_runtime_reliability_report.md b/docs/portfolio/agent_runtime_reliability_report.md index 24c9830..6c26cae 100644 --- a/docs/portfolio/agent_runtime_reliability_report.md +++ b/docs/portfolio/agent_runtime_reliability_report.md @@ -67,11 +67,16 @@ runtime operation review: - `runtime_event_summary` for event type counts. - `runtime_event_timeline` sample rows for queue snapshots, policy decisions, drops, and execution outcomes. +- Optional Runtime result operation evidence through `--runtime-result`, + including `runtime_health_snapshot`, `runtime_error_classification`, and + `runtime_events`. These fields make the report path explicit: ```text -Orchestrator operation evidence -> AIGuard reliability explanation -> Lab-owned deployment risk context +Runtime result operation evidence + Orchestrator operation evidence +-> AIGuard reliability explanation +-> Lab-owned deployment risk context ``` ## Lab Decision Context diff --git a/inferedgelab/commands/agent_runtime_report.py b/inferedgelab/commands/agent_runtime_report.py index 844c7d3..d146b7a 100644 --- a/inferedgelab/commands/agent_runtime_report.py +++ b/inferedgelab/commands/agent_runtime_report.py @@ -23,12 +23,18 @@ def agent_runtime_report_cmd( "--guard-analysis", help="Optional AIGuard runtime reliability guard_analysis JSON", ), + runtime_result: str = typer.Option( + "", + "--runtime-result", + help="Optional InferEdge-Runtime result JSON with runtime_health_snapshot/runtime_events", + ), format: str = typer.Option("text", "--format", "-f", help="text/json/markdown"), output: str = typer.Option("", "--output", "-o", help="Optional output path"), ) -> None: report = load_agent_runtime_reliability_bundle( orchestration_summary_path=orchestration_summary, guard_analysis_path=guard_analysis or None, + runtime_result_path=runtime_result or None, ) normalized_format = format.strip().lower() if normalized_format == "json": @@ -53,6 +59,9 @@ def _text_summary(report: dict) -> str: metrics = report["agent_runtime_summary"]["metrics"] decision = report["agent_deployment_decision"] guard = report["guard_summary"] + runtime_context = report["agent_runtime_summary"].get("runtime_result_context") or {} + health = runtime_context.get("runtime_health_snapshot") or {} + error = runtime_context.get("runtime_error_classification") or {} lines = [ "InferEdge Agent Runtime Reliability Report", f"schema_version: {report['schema_version']}", @@ -63,6 +72,8 @@ def _text_summary(report: dict) -> str: f"drop_rate: {metrics['drop_rate']:.6g}", f"fallback_rate: {metrics['fallback_rate']:.6g}", f"deadline_miss_rate: {metrics['deadline_miss_rate']:.6g}", + f"runtime_health_status: {health.get('status')}", + f"runtime_error_category: {error.get('category')}", "triggered_rules:", ] lines.extend(f"- {rule}" for rule in decision["triggered_rules"]) diff --git a/inferedgelab/services/agent_runtime_report.py b/inferedgelab/services/agent_runtime_report.py index cdb530b..b193ed2 100644 --- a/inferedgelab/services/agent_runtime_report.py +++ b/inferedgelab/services/agent_runtime_report.py @@ -90,6 +90,7 @@ def build_agent_runtime_reliability_report( *, orchestration_summary: dict[str, Any], guard_analysis: dict[str, Any] | None = None, + runtime_result: dict[str, Any] | None = None, source: dict[str, Any] | None = None, thresholds: dict[str, float] | None = None, ) -> dict[str, Any]: @@ -98,6 +99,7 @@ def build_agent_runtime_reliability_report( policy = {**DEFAULT_AGENT_RUNTIME_THRESHOLDS, **(thresholds or {})} metrics = compute_agent_runtime_metrics(orchestration_summary) runtime_summary = _agent_runtime_summary(orchestration_summary) + runtime_result_context = _runtime_result_operation_context(runtime_result) decision = build_agent_runtime_deployment_decision( metrics=metrics, guard_analysis=guard_analysis, @@ -119,6 +121,11 @@ def build_agent_runtime_reliability_report( if isinstance(guard_analysis, dict) else None ), + "runtime_result": ( + runtime_result.get("schema_version") + if isinstance(runtime_result, dict) + else None + ), "source_contracts": runtime_summary.get("source_contracts", {}), }, "agent_runtime_summary": { @@ -127,6 +134,7 @@ def build_agent_runtime_reliability_report( "metrics": metrics, "timeline_summary": _timeline_summary(orchestration_summary, metrics), "operation_context": _operation_context(orchestration_summary, metrics), + "runtime_result_context": runtime_result_context, "policy_decision_reasons": metrics["policy_decision_reasons"], "policy_decision_log_count": len(_policy_log(orchestration_summary)), }, @@ -332,17 +340,23 @@ def load_agent_runtime_reliability_bundle( *, orchestration_summary_path: str | Path, guard_analysis_path: str | Path | None = None, + runtime_result_path: str | Path | None = None, ) -> dict[str, Any]: orchestration_summary = _load_json_dict(orchestration_summary_path) guard_analysis = _load_json_dict(guard_analysis_path) if guard_analysis_path else None + runtime_result = _load_json_dict(runtime_result_path) if runtime_result_path else None return build_agent_runtime_reliability_report( orchestration_summary=orchestration_summary, guard_analysis=guard_analysis, + runtime_result=runtime_result, source={ "orchestration_summary_path": str(orchestration_summary_path), "guard_analysis_path": str(guard_analysis_path) if guard_analysis_path else None, + "runtime_result_path": str(runtime_result_path) + if runtime_result_path + else None, }, ) @@ -352,6 +366,10 @@ def build_agent_runtime_reliability_markdown(report: dict[str, Any]) -> str: metrics = runtime["metrics"] decision = report["agent_deployment_decision"] guard = report["guard_summary"] + runtime_result_context = runtime.get("runtime_result_context") or {} + runtime_health = runtime_result_context.get("runtime_health_snapshot") or {} + runtime_error = runtime_result_context.get("runtime_error_classification") or {} + runtime_event_summary = runtime_result_context.get("runtime_event_summary") or {} lines = [ "# InferEdge Agent Runtime Reliability Report", @@ -466,6 +484,33 @@ def build_agent_runtime_reliability_markdown(report: dict[str, Any]) -> str: ] ], "", + "## Runtime Result Operation Evidence", + "", + "| Field | Value |", + "|---|---|", + f"| runtime_result_schema | {runtime_result_context.get('source_schema_version') or '-'} |", + f"| compare_key | {runtime_result_context.get('compare_key') or '-'} |", + f"| backend_key | {runtime_result_context.get('backend_key') or '-'} |", + f"| runtime_status | {runtime_health.get('status') or runtime_result_context.get('status') or '-'} |", + f"| runtime_error_category | {runtime_error.get('category') or '-'} |", + f"| timeout_observed | {runtime_health.get('timeout_observed', runtime_error.get('timeout_observed', '-'))} |", + f"| runtime_event_count | {_fmt_number(runtime_event_summary.get('event_count'))} |", + "", + "Runtime result event sample:", + "", + "| # | Type | Status | Detail |", + "|---:|---|---|---|", + *[ + "| " + f"{index} | " + f"{event.get('type') or event.get('event_type') or '-'} | " + f"{event.get('status') or '-'} | " + f"{event.get('category') or event.get('reason') or event.get('engine_backend') or '-'} |" + for index, event in enumerate( + runtime_result_context.get("runtime_event_sample") or [] + ) + ], + "", "## AIGuard Runtime Reliability Evidence", "", f"- guard_status: `{guard.get('status')}`", @@ -685,6 +730,46 @@ def _operation_context( } +def _runtime_result_operation_context( + runtime_result: dict[str, Any] | None, +) -> dict[str, Any]: + if not isinstance(runtime_result, dict): + return { + "source_schema_version": None, + "compare_key": None, + "backend_key": None, + "status": None, + "success": None, + "runtime_health_snapshot": {}, + "runtime_error_classification": {}, + "runtime_event_summary": { + "schema_version": None, + "event_count": 0, + "event_type_counts": {}, + }, + "runtime_event_sample": [], + } + + health = runtime_result.get("runtime_health_snapshot") + error = runtime_result.get("runtime_error_classification") + runtime_events = _dict_list(runtime_result.get("runtime_events")) + return { + "source_schema_version": runtime_result.get("schema_version"), + "compare_key": runtime_result.get("compare_key"), + "backend_key": runtime_result.get("backend_key"), + "status": runtime_result.get("status"), + "success": runtime_result.get("success"), + "runtime_health_snapshot": dict(health) if isinstance(health, dict) else {}, + "runtime_error_classification": dict(error) if isinstance(error, dict) else {}, + "runtime_event_summary": { + "schema_version": "inferedgelab-runtime-result-event-summary-v1", + "event_count": len(runtime_events), + "event_type_counts": _runtime_result_event_type_counts(runtime_events), + }, + "runtime_event_sample": runtime_events[:8], + } + + def _queue_state_summary(orchestration_summary: dict[str, Any]) -> dict[str, Any]: value = orchestration_summary.get("queue_state_summary") if isinstance(value, dict): @@ -749,6 +834,16 @@ def _runtime_event_type_counts(runtime_events: list[dict[str, Any]]) -> dict[str return counts +def _runtime_result_event_type_counts(runtime_events: list[dict[str, Any]]) -> dict[str, int]: + counts: dict[str, int] = {} + for event in runtime_events: + event_type = event.get("type") or event.get("event_type") + if not isinstance(event_type, str) or not event_type: + event_type = "unknown" + counts[event_type] = counts.get(event_type, 0) + 1 + return counts + + def _worker_health_counts(worker_health_snapshot: dict[str, Any]) -> dict[str, int]: workers = worker_health_snapshot.get("workers") if not isinstance(workers, dict): diff --git a/tests/test_agent_runtime_report.py b/tests/test_agent_runtime_report.py index 14fa93d..30a4bb7 100644 --- a/tests/test_agent_runtime_report.py +++ b/tests/test_agent_runtime_report.py @@ -218,6 +218,66 @@ def guard_analysis() -> dict: } +def runtime_result_with_operation_evidence() -> dict: + return { + "schema_version": "inferedge-runtime-result-v1", + "compare_key": "yolov8n__b1__h224w224__fp32", + "backend_key": "onnxruntime__cpu", + "status": "skipped", + "success": False, + "runtime_health_snapshot": { + "schema_version": "inferedge-runtime-health-v1", + "status": "degraded", + "engine_backend": "onnxruntime", + "device": "cpu", + "input_mode": "dummy", + "input_preprocess": "synthetic", + "warmup": 1, + "runs": 1, + "run_once": False, + "success": False, + "latency_mean_ms": 0.0, + "latency_p95_ms": 0.0, + "latency_p99_ms": 0.0, + "fps": 0.0, + "power_mode": "unknown", + "jetson_clocks": "unknown", + "timeout_policy": "not_configured", + "timeout_observed": False, + }, + "runtime_error_classification": { + "schema_version": "inferedge-runtime-error-v1", + "status": "classified", + "category": "runtime_execution_skipped", + "message": "backend is not available in this build", + "timeout_observed": False, + "retryable": False, + }, + "runtime_events": [ + { + "type": "runtime_configured", + "status": "ok", + "engine_backend": "onnxruntime", + "device": "cpu", + "input_mode": "dummy", + }, + { + "type": "benchmark_completed", + "status": "skipped", + "success": False, + "warmup": 1, + "runs": 1, + "mean_ms": 0.0, + }, + { + "type": "runtime_error_classified", + "status": "classified", + "category": "runtime_execution_skipped", + }, + ], + } + + def sustained_guard_analysis() -> dict: data = guard_analysis() data["evidence"].append( @@ -285,6 +345,7 @@ def test_agent_runtime_report_blocks_when_guard_blocks(): report = build_agent_runtime_reliability_report( orchestration_summary=orchestration_summary(), guard_analysis=sustained_guard_analysis(), + runtime_result=runtime_result_with_operation_evidence(), ) decision = report["agent_deployment_decision"] @@ -295,6 +356,7 @@ def test_agent_runtime_report_blocks_when_guard_blocks(): assert report["contracts"]["aiguard_guard_analysis"] == ( "inferedge-aiguard-diagnosis-v1" ) + assert report["contracts"]["runtime_result"] == "inferedge-runtime-result-v1" assert decision["policy_version"] == AGENT_RUNTIME_POLICY_VERSION assert decision["decision"] == "blocked" assert "guard_blocked_runtime_block" in decision["triggered_rules"] @@ -326,6 +388,18 @@ def test_agent_runtime_report_blocks_when_guard_blocks(): assert operation_context["runtime_event_timeline_sample"][1]["event_type"] == ( "policy_decision" ) + runtime_context = report["agent_runtime_summary"]["runtime_result_context"] + assert runtime_context["source_schema_version"] == "inferedge-runtime-result-v1" + assert runtime_context["runtime_health_snapshot"]["status"] == "degraded" + assert runtime_context["runtime_error_classification"]["category"] == ( + "runtime_execution_skipped" + ) + assert runtime_context["runtime_event_summary"]["event_count"] == 3 + assert runtime_context["runtime_event_summary"]["event_type_counts"] == { + "runtime_configured": 1, + "benchmark_completed": 1, + "runtime_error_classified": 1, + } def test_agent_runtime_report_keeps_legacy_orchestrator_summary_compatible(): @@ -352,6 +426,7 @@ def test_agent_runtime_report_markdown_contains_sections(): report = build_agent_runtime_reliability_report( orchestration_summary=orchestration_summary(), guard_analysis=sustained_guard_analysis(), + runtime_result=runtime_result_with_operation_evidence(), ) markdown = build_agent_runtime_reliability_markdown(report) @@ -362,6 +437,8 @@ def test_agent_runtime_report_markdown_contains_sections(): assert "Queue State" in markdown assert "Worker Health" in markdown assert "Runtime Event Summary" in markdown + assert "Runtime Result Operation Evidence" in markdown + assert "runtime_execution_skipped" in markdown assert "queue_pressure_state" in markdown assert "policy_decision" in markdown assert "AIGuard Runtime Reliability Evidence" in markdown @@ -383,10 +460,15 @@ def test_agent_runtime_report_loads_committed_fixtures(): assert len(report["agent_runtime_summary"]["agents"]) == 3 -def test_agent_runtime_report_command_outputs_json(capsys): +def test_agent_runtime_report_command_outputs_json(tmp_path, capsys): + runtime_result_path = tmp_path / "runtime_operation_result.json" + with runtime_result_path.open("w", encoding="utf-8") as file: + json.dump(runtime_result_with_operation_evidence(), file) + agent_runtime_report_cmd( orchestration_summary="examples/agent_runtime/agent_3_orchestration_summary.json", guard_analysis="examples/agent_runtime/aiguard_runtime_guard_analysis.json", + runtime_result=str(runtime_result_path), format="json", output="", ) @@ -395,3 +477,5 @@ def test_agent_runtime_report_command_outputs_json(capsys): assert report["schema_version"] == AGENT_RUNTIME_REPORT_SCHEMA_VERSION assert report["agent_deployment_decision"]["decision"] == "blocked" + runtime_context = report["agent_runtime_summary"]["runtime_result_context"] + assert runtime_context["runtime_health_snapshot"]["status"] == "degraded"