From 52bce612d51218342980817bff4fd9a20a5516db Mon Sep 17 00:00:00 2001
From: hyeokjun32 <ksjm0417@naver.com>
Date: Sun, 17 May 2026 00:49:00 +0900
Subject: [PATCH] feat: add agent runtime reliability report

---
 README.ko.md                                  |   5 +
 README.md                                     |   6 +
 .../agent_runtime_reliability_report.md       |  77 +++
 .../agent_3_orchestration_summary.json        |  69 +++
 .../aiguard_runtime_guard_analysis.json       |  57 +++
 inferedgelab/cli.py                           |   4 +
 inferedgelab/commands/agent_runtime_report.py |  70 +++
 inferedgelab/services/agent_runtime_report.py | 459 ++++++++++++++++++
 tests/test_agent_runtime_report.py            | 161 ++++++
 9 files changed, 908 insertions(+)
 create mode 100644 docs/portfolio/agent_runtime_reliability_report.md
 create mode 100644 examples/agent_runtime/agent_3_orchestration_summary.json
 create mode 100644 examples/agent_runtime/aiguard_runtime_guard_analysis.json
 create mode 100644 inferedgelab/commands/agent_runtime_report.py
 create mode 100644 inferedgelab/services/agent_runtime_report.py
 create mode 100644 tests/test_agent_runtime_report.py

diff --git a/README.ko.md b/README.ko.md
index bdfad63..daf1c84 100644
--- a/README.ko.md
+++ b/README.ko.md
@@ -58,12 +58,17 @@ Recommended demo flow:
 ```bash
 poetry run inferedgelab demo-evidence-summary
 poetry run inferedgelab demo-evidence-summary --format json
+poetry run inferedgelab agent-runtime-report \
+  --orchestration-summary examples/agent_runtime/agent_3_orchestration_summary.json \
+  --guard-analysis examples/agent_runtime/aiguard_runtime_guard_analysis.json
 poetry run inferedgelab export-demo-evidence --output reports/studio_demo_evidence.md
 ```
 
 Load Demo Evidence는 bundled ONNX Runtime CPU / TensorRT Jetson result fixture를 불러오고, Run / Import / Jetson Helper는 기존 CLI/API workflow를 local UI로 확장하는 보조 기능입니다.
 Studio evidence와 jobs는 in-memory이며 local server process가 재시작되면 초기화됩니다.
 
+`agent-runtime-report`는 Orchestrator scheduling evidence와 AIGuard runtime reliability `guard_analysis`를 Lab-owned agent deployment decision context로 묶는 additive report path입니다. 기존 Runtime result나 compare contract는 변경하지 않습니다.
+
 ## 이 레포의 역할
 
 - Runtime benchmark/result JSON을 읽어 compare/report를 생성합니다.
diff --git a/README.md b/README.md
index 458fc4c..b4befb2 100644
--- a/README.md
+++ b/README.md
@@ -113,6 +113,9 @@ poetry run inferedgelab demo-evidence-summary
 poetry run inferedgelab demo-evidence-summary --format json
 poetry run inferedgelab portfolio-demo-check
 poetry run inferedgelab core4-conformance-check
+poetry run inferedgelab agent-runtime-report \
+  --orchestration-summary examples/agent_runtime/agent_3_orchestration_summary.json \
+  --guard-analysis examples/agent_runtime/aiguard_runtime_guard_analysis.json
 poetry run inferedgelab export-demo-evidence --output reports/studio_demo_evidence.md
 ```
 
@@ -122,6 +125,9 @@ It validates the committed Studio fixtures, expected README/PPT metrics, portfol
 It validates the bundled Forge manifest/metadata fixture, Runtime result JSON, Lab compare/deployment decision surface, and AIGuard `guard_analysis` evidence without mutating existing schemas.
 The Lab decision surface now also exposes `policy_version`, `triggered_rules`, and `policy_summary` so reviewers can see which local policy rules produced deploy/review/block/unknown outcomes.
 
+`agent-runtime-report` is an additive reliable edge agent runtime report path.
+It bundles Orchestrator scheduling evidence and AIGuard runtime reliability `guard_analysis` into a Lab-owned agent deployment decision context without changing existing Runtime result or compare contracts.
+
 ![InferEdge Local Studio demo evidence](assets/images/local-studio-demo-evidence.png)
 
 Verified demo fixture values:
diff --git a/docs/portfolio/agent_runtime_reliability_report.md b/docs/portfolio/agent_runtime_reliability_report.md
new file mode 100644
index 0000000..d74cf19
--- /dev/null
+++ b/docs/portfolio/agent_runtime_reliability_report.md
@@ -0,0 +1,77 @@
+# Agent Runtime Reliability Report
+
+## Scope
+
+This report is the first Lab-side bundle view for the reliable edge agent
+runtime path.
+
+It connects:
+
+- Forge `agent_manifest.json` metadata
+- Runtime `result.agent` metadata
+- Orchestrator `inferedge-orchestration-summary-v1`
+- AIGuard `inferedge-aiguard-diagnosis-v1`
+- Lab-owned agent deployment decision context
+
+This is a local-first report path. It is not a production cloud orchestration
+dashboard and does not add DB/queue/auth/billing behavior.
+
+## Demo Bundle
+
+Committed lightweight fixtures:
+
+- `examples/agent_runtime/agent_3_orchestration_summary.json`
+- `examples/agent_runtime/aiguard_runtime_guard_analysis.json`
+
+Generate a Markdown report:
+
+```bash
+poetry run inferedgelab agent-runtime-report \
+  --orchestration-summary examples/agent_runtime/agent_3_orchestration_summary.json \
+  --guard-analysis examples/agent_runtime/aiguard_runtime_guard_analysis.json \
+  --format markdown \
+  --output reports/agent_runtime_reliability_report.md
+```
+
+## Evidence Summary
+
+| Evidence | Value |
+|---|---:|
+| executed_count | 10 |
+| dropped_count | 14 |
+| deadline_missed_count | 1 |
+| fallback_count | 14 |
+| drop_rate | 0.583333 |
+| fallback_rate | 0.583333 |
+| deadline_miss_rate | 0.1 |
+| queue_backlog_policy_decision_count | 1 |
+
+## Lab Decision Context
+
+Expected decision:
+
+```text
+blocked
+```
+
+Primary reason:
+
+```text
+Agent runtime reliability evidence indicates blocked deployment risk.
+```
+
+Triggered rules:
+
+- `guard_blocked_runtime_block`
+- `drop_rate_block`
+- `fallback_rate_block`
+- `deadline_miss_review`
+- `queue_backlog_review`
+
+## Boundary
+
+- Orchestrator records scheduling and policy evidence.
+- AIGuard explains runtime reliability risk.
+- Lab remains the final deployment decision owner.
+- This report is an additive agent-runtime path and does not change existing
+  Runtime result, compare output, or classic deployment decision contracts.
diff --git a/examples/agent_runtime/agent_3_orchestration_summary.json b/examples/agent_runtime/agent_3_orchestration_summary.json
new file mode 100644
index 0000000..898891b
--- /dev/null
+++ b/examples/agent_runtime/agent_3_orchestration_summary.json
@@ -0,0 +1,69 @@
+{
+  "schema_version": "inferedge-orchestration-summary-v1",
+  "agent_runtime_summary": {
+    "schema_version": "inferedge-orchestration-summary-v1",
+    "source_contracts": {
+      "forge_agent_manifest": "inferedge-agent-manifest-v1",
+      "runtime_agent_result": "inferedge-runtime-agent-task-v1"
+    },
+    "agents": {
+      "safety_monitor_agent": {
+        "agent_id": "safety_monitor_agent",
+        "agent_type": "safety",
+        "priority": 100,
+        "latency_budget_ms": 20.0,
+        "fallback_policy": "protect",
+        "task_id": "task_safety_monitor_agent"
+      },
+      "vision_agent": {
+        "agent_id": "vision_agent",
+        "agent_type": "vision",
+        "priority": 90,
+        "latency_budget_ms": 33.0,
+        "fallback_policy": "drop_stale",
+        "task_id": "task_vision_agent"
+      },
+      "voice_command_agent": {
+        "agent_id": "voice_command_agent",
+        "agent_type": "voice",
+        "priority": 50,
+        "latency_budget_ms": 120.0,
+        "fallback_policy": "defer",
+        "task_id": "task_voice_command_agent"
+      }
+    },
+    "totals": {
+      "executed_count": 10,
+      "dropped_count": 14,
+      "deadline_missed_count": 1,
+      "fallback_count": 14,
+      "policy_decision_count": 14,
+      "overload_event_count": 14
+    }
+  },
+  "policy_decision_log": [
+    {
+      "agent_id": "vision_agent",
+      "task_id": "task_vision_agent",
+      "decision": "load_shedding",
+      "reason": "queue_backlog_threshold_exceeded",
+      "fallback_used": true,
+      "protected_agent_id": "safety_monitor_agent"
+    }
+  ],
+  "drop_events": [
+    {
+      "agent_id": "vision_agent",
+      "task_id": "task_vision_agent",
+      "reason": "load_shedding_backlog_threshold_exceeded"
+    }
+  ],
+  "overload_events": [
+    {
+      "agent_id": "vision_agent",
+      "task_id": "task_vision_agent",
+      "fallback_used": true,
+      "reason": "queue_backlog_threshold_exceeded"
+    }
+  ]
+}
diff --git a/examples/agent_runtime/aiguard_runtime_guard_analysis.json b/examples/agent_runtime/aiguard_runtime_guard_analysis.json
new file mode 100644
index 0000000..65de9b6
--- /dev/null
+++ b/examples/agent_runtime/aiguard_runtime_guard_analysis.json
@@ -0,0 +1,57 @@
+{
+  "schema_version": "inferedge-aiguard-diagnosis-v1",
+  "source": {
+    "orchestration_summary_schema_version": "inferedge-orchestration-summary-v1"
+  },
+  "guard_verdict": "blocked",
+  "severity": "high",
+  "confidence": 0.88,
+  "primary_reason": "drop_rate indicates runtime reliability risk under orchestrated multi-agent load.",
+  "evidence": [
+    {
+      "type": "excessive_drop_rate",
+      "metric_name": "drop_rate",
+      "observed_value": 0.5833333333333334,
+      "baseline_value": null,
+      "threshold": 0.2,
+      "delta": null,
+      "delta_pct": null,
+      "increase_factor": null,
+      "severity": "high",
+      "status": "failed",
+      "explanation": "Drop rate crossed the configured review threshold under synthetic 3-agent load.",
+      "why_it_matters": "High drop rate can make camera or command workloads stale even if selected high-priority tasks are protected.",
+      "suspected_causes": [
+        "queue_backlog",
+        "overload_load_shedding",
+        "producer_rate_exceeds_runtime_capacity"
+      ],
+      "recommendation": "Tune target FPS, queue size, drop policy, or fallback policy for affected agents.",
+      "raw_context": {
+        "executed_count": 10,
+        "dropped_count": 14
+      }
+    }
+  ],
+  "suspected_causes": [
+    "queue_backlog",
+    "overload_load_shedding",
+    "producer_rate_exceeds_runtime_capacity"
+  ],
+  "recommendations": [
+    "Tune target FPS, queue size, drop policy, or fallback policy for affected agents."
+  ],
+  "thresholds": {
+    "drop_rate_review": 0.2,
+    "drop_rate_blocked": 0.5
+  },
+  "baseline_summary": {},
+  "candidate_summary": {
+    "runtime_reliability": {
+      "drop_rate": 0.5833333333333334,
+      "fallback_rate": 0.5833333333333334,
+      "deadline_miss_rate": 0.1
+    }
+  },
+  "created_at": "2026-05-17T00:00:00Z"
+}
diff --git a/inferedgelab/cli.py b/inferedgelab/cli.py
index 3ae1f06..4065487 100644
--- a/inferedgelab/cli.py
+++ b/inferedgelab/cli.py
@@ -16,6 +16,7 @@
 from inferedgelab.commands.demo_evidence import export_demo_evidence_cmd
 from inferedgelab.commands.demo_evidence import portfolio_demo_check_cmd
 from inferedgelab.commands.core4_conformance import core4_conformance_check_cmd
+from inferedgelab.commands.agent_runtime_report import agent_runtime_report_cmd
 from inferedgelab.commands.list_results import list_results_cmd
 from inferedgelab.commands.history_report import history_report_cmd
 from inferedgelab.commands.serve import serve_cmd
@@ -52,6 +53,9 @@ def version_cmd() -> None:
 app.command("core4-conformance-check", help="Validate Forge/Runtime/Lab/AIGuard contract conformance")(
     core4_conformance_check_cmd
 )
+app.command("agent-runtime-report", help="Generate Agent Runtime Reliability report from Orchestrator/AIGuard evidence")(
+    agent_runtime_report_cmd
+)
 app.command("list-results", help="List recent structured benchmark results")(list_results_cmd)
 app.command("history-report", help="Generate HTML history report from structured benchmark results")(history_report_cmd)
 app.command("serve", help="Run InferEdgeLab FastAPI server")(serve_cmd)
diff --git a/inferedgelab/commands/agent_runtime_report.py b/inferedgelab/commands/agent_runtime_report.py
new file mode 100644
index 0000000..844c7d3
--- /dev/null
+++ b/inferedgelab/commands/agent_runtime_report.py
@@ -0,0 +1,70 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import typer
+from rich import print as rprint
+
+from inferedgelab.services.agent_runtime_report import (
+    agent_runtime_reliability_json,
+    build_agent_runtime_reliability_markdown,
+    load_agent_runtime_reliability_bundle,
+)
+
+
+def agent_runtime_report_cmd(
+    orchestration_summary: str = typer.Option(
+        ...,
+        "--orchestration-summary",
+        help="Path to InferEdgeOrchestrator orchestration_summary JSON",
+    ),
+    guard_analysis: str = typer.Option(
+        "",
+        "--guard-analysis",
+        help="Optional AIGuard runtime reliability guard_analysis JSON",
+    ),
+    format: str = typer.Option("text", "--format", "-f", help="text/json/markdown"),
+    output: str = typer.Option("", "--output", "-o", help="Optional output path"),
+) -> None:
+    report = load_agent_runtime_reliability_bundle(
+        orchestration_summary_path=orchestration_summary,
+        guard_analysis_path=guard_analysis or None,
+    )
+    normalized_format = format.strip().lower()
+    if normalized_format == "json":
+        text = agent_runtime_reliability_json(report)
+    elif normalized_format in {"markdown", "md"}:
+        text = build_agent_runtime_reliability_markdown(report)
+    elif normalized_format == "text":
+        text = _text_summary(report)
+    else:
+        raise typer.BadParameter("--format must be one of: text, json, markdown")
+
+    if output:
+        path = Path(output)
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_text(text, encoding="utf-8")
+        rprint(f"[green]Saved[/green]: {path}")
+    else:
+        print(text, end="")
+
+
+def _text_summary(report: dict) -> str:
+    metrics = report["agent_runtime_summary"]["metrics"]
+    decision = report["agent_deployment_decision"]
+    guard = report["guard_summary"]
+    lines = [
+        "InferEdge Agent Runtime Reliability Report",
+        f"schema_version: {report['schema_version']}",
+        f"decision: {decision['decision']}",
+        f"policy_version: {decision['policy_version']}",
+        f"reason: {decision['reason']}",
+        f"guard_verdict: {guard.get('guard_verdict')}",
+        f"drop_rate: {metrics['drop_rate']:.6g}",
+        f"fallback_rate: {metrics['fallback_rate']:.6g}",
+        f"deadline_miss_rate: {metrics['deadline_miss_rate']:.6g}",
+        "triggered_rules:",
+    ]
+    lines.extend(f"- {rule}" for rule in decision["triggered_rules"])
+    lines.append("")
+    return "\n".join(lines)
diff --git a/inferedgelab/services/agent_runtime_report.py b/inferedgelab/services/agent_runtime_report.py
new file mode 100644
index 0000000..233cdcf
--- /dev/null
+++ b/inferedgelab/services/agent_runtime_report.py
@@ -0,0 +1,459 @@
+from __future__ import annotations
+
+import json
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+
+from inferedgelab.services.guard_analysis import (
+    guard_evidence_items,
+    guard_primary_reason,
+    guard_status,
+    guard_verdict,
+)
+
+
+AGENT_RUNTIME_REPORT_SCHEMA_VERSION = "inferedgelab-agent-runtime-reliability-report-v1"
+AGENT_RUNTIME_POLICY_VERSION = "inferedge-lab-agent-runtime-policy-v1"
+ORCHESTRATION_SCHEMA_VERSION = "inferedge-orchestration-summary-v1"
+AIGUARD_DIAGNOSIS_SCHEMA_VERSION = "inferedge-aiguard-diagnosis-v1"
+
+DEFAULT_AGENT_RUNTIME_THRESHOLDS = {
+    "deadline_miss_rate_review": 0.05,
+    "deadline_miss_rate_blocked": 0.20,
+    "drop_rate_review": 0.20,
+    "drop_rate_blocked": 0.50,
+    "fallback_rate_review": 0.20,
+    "fallback_rate_blocked": 0.50,
+    "queue_backlog_policy_decision_count_review": 1,
+}
+
+AGENT_RUNTIME_POLICY_RULES: dict[str, dict[str, str]] = {
+    "guard_blocked_runtime_block": {
+        "effect": "blocked",
+        "description": "AIGuard runtime reliability evidence reported blocked/error status.",
+    },
+    "guard_warning_runtime_review": {
+        "effect": "review_required",
+        "description": "AIGuard runtime reliability evidence requires deployment review.",
+    },
+    "guard_missing_unknown": {
+        "effect": "unknown",
+        "description": "AIGuard runtime reliability evidence is missing.",
+    },
+    "deadline_miss_block": {
+        "effect": "blocked",
+        "description": "Deadline miss rate crossed the blocking threshold.",
+    },
+    "deadline_miss_review": {
+        "effect": "review_required",
+        "description": "Deadline miss rate crossed the review threshold.",
+    },
+    "drop_rate_block": {
+        "effect": "blocked",
+        "description": "Drop rate crossed the blocking threshold.",
+    },
+    "drop_rate_review": {
+        "effect": "review_required",
+        "description": "Drop rate crossed the review threshold.",
+    },
+    "fallback_rate_block": {
+        "effect": "blocked",
+        "description": "Fallback usage crossed the blocking threshold.",
+    },
+    "fallback_rate_review": {
+        "effect": "review_required",
+        "description": "Fallback usage crossed the review threshold.",
+    },
+    "queue_backlog_review": {
+        "effect": "review_required",
+        "description": "Queue backlog policy intervention was observed.",
+    },
+    "runtime_reliability_pass_note": {
+        "effect": "deployable_with_note",
+        "description": "Runtime reliability evidence stayed within configured thresholds.",
+    },
+}
+
+
+def build_agent_runtime_reliability_report(
+    *,
+    orchestration_summary: dict[str, Any],
+    guard_analysis: dict[str, Any] | None = None,
+    source: dict[str, Any] | None = None,
+    thresholds: dict[str, float] | None = None,
+) -> dict[str, Any]:
+    """Build a Lab-owned report for an agent runtime reliability bundle."""
+
+    policy = {**DEFAULT_AGENT_RUNTIME_THRESHOLDS, **(thresholds or {})}
+    metrics = compute_agent_runtime_metrics(orchestration_summary)
+    runtime_summary = _agent_runtime_summary(orchestration_summary)
+    decision = build_agent_runtime_deployment_decision(
+        metrics=metrics,
+        guard_analysis=guard_analysis,
+        thresholds=policy,
+    )
+
+    return {
+        "schema_version": AGENT_RUNTIME_REPORT_SCHEMA_VERSION,
+        "generated_at": _utc_now_iso(),
+        "scope": "local-first agent runtime reliability report",
+        "source": dict(source or {}),
+        "contracts": {
+            "orchestration_summary": (
+                orchestration_summary.get("schema_version")
+                or runtime_summary.get("schema_version")
+            ),
+            "aiguard_guard_analysis": (
+                guard_analysis.get("schema_version")
+                if isinstance(guard_analysis, dict)
+                else None
+            ),
+            "source_contracts": runtime_summary.get("source_contracts", {}),
+        },
+        "agent_runtime_summary": {
+            "agents": _agent_summaries(runtime_summary),
+            "totals": _totals(runtime_summary),
+            "metrics": metrics,
+            "policy_decision_log_count": len(_policy_log(orchestration_summary)),
+        },
+        "guard_summary": _guard_summary(guard_analysis),
+        "agent_deployment_decision": decision,
+        "notes": [
+            "This report is local-first runtime reliability evidence, not a production cloud orchestration dashboard.",
+            "InferEdgeLab remains the final deployment decision owner.",
+            "AIGuard and Orchestrator provide optional evidence; they do not overwrite Lab policy.",
+        ],
+    }
+
+
+def build_agent_runtime_deployment_decision(
+    *,
+    metrics: dict[str, Any],
+    guard_analysis: dict[str, Any] | None,
+    thresholds: dict[str, float] | None = None,
+) -> dict[str, Any]:
+    policy = {**DEFAULT_AGENT_RUNTIME_THRESHOLDS, **(thresholds or {})}
+    triggered_rules: list[str] = []
+
+    normalized_guard_status = guard_status(guard_analysis)
+    normalized_guard_verdict = guard_verdict(guard_analysis)
+
+    if normalized_guard_status == "error" or normalized_guard_verdict == "blocked":
+        triggered_rules.append("guard_blocked_runtime_block")
+    elif normalized_guard_status == "warning" or normalized_guard_verdict in {
+        "suspicious",
+        "review_required",
+    }:
+        triggered_rules.append("guard_warning_runtime_review")
+    elif normalized_guard_status is None and normalized_guard_verdict is None:
+        triggered_rules.append("guard_missing_unknown")
+
+    _append_metric_rules(
+        triggered_rules,
+        metric_value=metrics["deadline_miss_rate"],
+        review=policy["deadline_miss_rate_review"],
+        blocked=policy["deadline_miss_rate_blocked"],
+        review_rule="deadline_miss_review",
+        blocked_rule="deadline_miss_block",
+    )
+    _append_metric_rules(
+        triggered_rules,
+        metric_value=metrics["drop_rate"],
+        review=policy["drop_rate_review"],
+        blocked=policy["drop_rate_blocked"],
+        review_rule="drop_rate_review",
+        blocked_rule="drop_rate_block",
+    )
+    _append_metric_rules(
+        triggered_rules,
+        metric_value=metrics["fallback_rate"],
+        review=policy["fallback_rate_review"],
+        blocked=policy["fallback_rate_blocked"],
+        review_rule="fallback_rate_review",
+        blocked_rule="fallback_rate_block",
+    )
+    if (
+        metrics["queue_backlog_policy_decision_count"]
+        >= policy["queue_backlog_policy_decision_count_review"]
+    ):
+        triggered_rules.append("queue_backlog_review")
+
+    if not triggered_rules:
+        triggered_rules.append("runtime_reliability_pass_note")
+
+    if any(_rule_effect(rule) == "blocked" for rule in triggered_rules):
+        decision = "blocked"
+        reason = "Agent runtime reliability evidence indicates blocked deployment risk."
+        recommended_action = (
+            "Do not deploy until deadline, drop, fallback, and guard evidence are reviewed."
+        )
+    elif any(_rule_effect(rule) == "review_required" for rule in triggered_rules):
+        decision = "review_required"
+        reason = "Agent runtime reliability evidence requires deployment review."
+        recommended_action = (
+            "Review Orchestrator policy decisions, AIGuard evidence, and agent priority budgets."
+        )
+    elif "guard_missing_unknown" in triggered_rules:
+        decision = "unknown"
+        reason = "AIGuard runtime reliability evidence is unavailable."
+        recommended_action = (
+            "Run AIGuard runtime reliability analysis before using this report for deployment."
+        )
+    else:
+        decision = "deployable_with_note"
+        reason = "Agent runtime reliability evidence stayed within configured thresholds."
+        recommended_action = (
+            "Deployment can proceed with runtime monitoring and the local evidence note retained."
+        )
+
+    return {
+        "policy_version": AGENT_RUNTIME_POLICY_VERSION,
+        "decision": decision,
+        "reason": reason,
+        "guard_status": normalized_guard_status,
+        "guard_verdict": normalized_guard_verdict,
+        "recommended_action": recommended_action,
+        "triggered_rules": triggered_rules,
+        "policy_summary": [
+            {
+                "rule": rule,
+                "effect": _rule_effect(rule),
+                "description": AGENT_RUNTIME_POLICY_RULES[rule]["description"],
+            }
+            for rule in triggered_rules
+        ],
+    }
+
+
+def compute_agent_runtime_metrics(orchestration_summary: dict[str, Any]) -> dict[str, Any]:
+    runtime_summary = _agent_runtime_summary(orchestration_summary)
+    totals = _totals(runtime_summary)
+    executed_count = _non_negative_number(totals.get("executed_count"))
+    dropped_count = _non_negative_number(totals.get("dropped_count"))
+    deadline_missed_count = _non_negative_number(totals.get("deadline_missed_count"))
+    fallback_count = _non_negative_number(totals.get("fallback_count"))
+    total_task_events = executed_count + dropped_count
+    policy_log = _policy_log(orchestration_summary)
+    queue_backlog_count = sum(
+        1
+        for item in policy_log
+        if "backlog" in str(item.get("reason", "")).lower()
+        or "backlog" in str(item.get("decision", "")).lower()
+    )
+    return {
+        "executed_count": executed_count,
+        "dropped_count": dropped_count,
+        "deadline_missed_count": deadline_missed_count,
+        "fallback_count": fallback_count,
+        "policy_decision_count": _non_negative_number(
+            totals.get("policy_decision_count")
+        ),
+        "overload_event_count": _non_negative_number(totals.get("overload_event_count")),
+        "total_task_events": total_task_events,
+        "deadline_miss_rate": _ratio(deadline_missed_count, executed_count),
+        "drop_rate": _ratio(dropped_count, total_task_events),
+        "fallback_rate": _ratio(fallback_count, total_task_events),
+        "queue_backlog_policy_decision_count": queue_backlog_count,
+    }
+
+
+def load_agent_runtime_reliability_bundle(
+    *,
+    orchestration_summary_path: str | Path,
+    guard_analysis_path: str | Path | None = None,
+) -> dict[str, Any]:
+    orchestration_summary = _load_json_dict(orchestration_summary_path)
+    guard_analysis = _load_json_dict(guard_analysis_path) if guard_analysis_path else None
+    return build_agent_runtime_reliability_report(
+        orchestration_summary=orchestration_summary,
+        guard_analysis=guard_analysis,
+        source={
+            "orchestration_summary_path": str(orchestration_summary_path),
+            "guard_analysis_path": str(guard_analysis_path)
+            if guard_analysis_path
+            else None,
+        },
+    )
+
+
+def build_agent_runtime_reliability_markdown(report: dict[str, Any]) -> str:
+    runtime = report["agent_runtime_summary"]
+    metrics = runtime["metrics"]
+    decision = report["agent_deployment_decision"]
+    guard = report["guard_summary"]
+
+    lines = [
+        "# InferEdge Agent Runtime Reliability Report",
+        "",
+        "## Scope",
+        "",
+        f"- schema_version: `{report['schema_version']}`",
+        f"- generated_at: `{report['generated_at']}`",
+        f"- scope: {report['scope']}",
+        "- This is local-first report evidence, not a production cloud orchestration dashboard.",
+        "",
+        "## Agent Runtime Summary",
+        "",
+        "| Agent | Type | Priority | Latency Budget ms | Fallback Policy |",
+        "|---|---|---:|---:|---|",
+    ]
+    for agent in runtime["agents"]:
+        lines.append(
+            "| "
+            f"{agent.get('agent_id', '')} | "
+            f"{agent.get('agent_type', '')} | "
+            f"{_fmt_number(agent.get('priority'))} | "
+            f"{_fmt_number(agent.get('latency_budget_ms'))} | "
+            f"{agent.get('fallback_policy', '')} |"
+        )
+
+    lines.extend(
+        [
+            "",
+            "## Runtime Reliability Metrics",
+            "",
+            "| Metric | Value |",
+            "|---|---:|",
+            f"| executed_count | {_fmt_number(metrics['executed_count'])} |",
+            f"| dropped_count | {_fmt_number(metrics['dropped_count'])} |",
+            f"| deadline_missed_count | {_fmt_number(metrics['deadline_missed_count'])} |",
+            f"| fallback_count | {_fmt_number(metrics['fallback_count'])} |",
+            f"| deadline_miss_rate | {_fmt_number(metrics['deadline_miss_rate'])} |",
+            f"| drop_rate | {_fmt_number(metrics['drop_rate'])} |",
+            f"| fallback_rate | {_fmt_number(metrics['fallback_rate'])} |",
+            f"| queue_backlog_policy_decision_count | {_fmt_number(metrics['queue_backlog_policy_decision_count'])} |",
+            "",
+            "## AIGuard Runtime Reliability Evidence",
+            "",
+            f"- guard_status: `{guard.get('status')}`",
+            f"- guard_verdict: `{guard.get('guard_verdict')}`",
+            f"- severity: `{guard.get('severity')}`",
+            f"- primary_reason: {guard.get('primary_reason')}",
+            f"- evidence_count: `{guard.get('evidence_count')}`",
+            "",
+            "## Lab Agent Deployment Decision",
+            "",
+            f"- policy_version: `{decision['policy_version']}`",
+            f"- decision: `{decision['decision']}`",
+            f"- reason: {decision['reason']}",
+            f"- recommended_action: {decision['recommended_action']}",
+            "- triggered_rules:",
+            *[f"  - `{rule}`" for rule in decision["triggered_rules"]],
+            "",
+            "## Notes",
+            "",
+        ]
+    )
+    lines.extend(f"- {note}" for note in report["notes"])
+    return "\n".join(lines) + "\n"
+
+
+def agent_runtime_reliability_json(report: dict[str, Any]) -> str:
+    return json.dumps(report, ensure_ascii=False, indent=2) + "\n"
+
+
+def write_agent_runtime_reliability_markdown(
+    report: dict[str, Any],
+    output: str | Path,
+) -> Path:
+    path = Path(output)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(build_agent_runtime_reliability_markdown(report), encoding="utf-8")
+    return path
+
+
+def _agent_runtime_summary(orchestration_summary: dict[str, Any]) -> dict[str, Any]:
+    value = orchestration_summary.get("agent_runtime_summary")
+    return value if isinstance(value, dict) else {}
+
+
+def _totals(runtime_summary: dict[str, Any]) -> dict[str, Any]:
+    value = runtime_summary.get("totals")
+    return value if isinstance(value, dict) else {}
+
+
+def _agent_summaries(runtime_summary: dict[str, Any]) -> list[dict[str, Any]]:
+    agents = runtime_summary.get("agents")
+    if isinstance(agents, dict):
+        values = [value for value in agents.values() if isinstance(value, dict)]
+    elif isinstance(agents, list):
+        values = [value for value in agents if isinstance(value, dict)]
+    else:
+        values = []
+    return sorted(values, key=lambda item: str(item.get("agent_id", "")))
+
+
+def _guard_summary(guard_analysis: dict[str, Any] | None) -> dict[str, Any]:
+    return {
+        "schema_version": guard_analysis.get("schema_version")
+        if isinstance(guard_analysis, dict)
+        else None,
+        "status": guard_status(guard_analysis),
+        "guard_verdict": guard_verdict(guard_analysis),
+        "severity": guard_analysis.get("severity") if isinstance(guard_analysis, dict) else None,
+        "primary_reason": guard_primary_reason(guard_analysis),
+        "evidence_count": len(guard_evidence_items(guard_analysis)),
+    }
+
+
+def _append_metric_rules(
+    rules: list[str],
+    *,
+    metric_value: float,
+    review: float,
+    blocked: float,
+    review_rule: str,
+    blocked_rule: str,
+) -> None:
+    if metric_value >= blocked:
+        rules.append(blocked_rule)
+    elif metric_value >= review:
+        rules.append(review_rule)
+
+
+def _rule_effect(rule: str) -> str:
+    return AGENT_RUNTIME_POLICY_RULES.get(rule, {}).get("effect", "unknown")
+
+
+def _policy_log(orchestration_summary: dict[str, Any]) -> list[dict[str, Any]]:
+    value = orchestration_summary.get("policy_decision_log")
+    if not isinstance(value, list):
+        value = orchestration_summary.get("policy_decisions")
+    if not isinstance(value, list):
+        return []
+    return [item for item in value if isinstance(item, dict)]
+
+
+def _load_json_dict(path: str | Path | None) -> dict[str, Any] | None:
+    if path is None:
+        return None
+    with Path(path).open("r", encoding="utf-8") as file:
+        data = json.load(file)
+    if not isinstance(data, dict):
+        raise ValueError(f"Expected JSON object: {path}")
+    return data
+
+
+def _non_negative_number(value: Any) -> float:
+    if isinstance(value, (int, float)) and not isinstance(value, bool):
+        return max(float(value), 0.0)
+    return 0.0
+
+
+def _ratio(numerator: float, denominator: float) -> float:
+    if denominator <= 0:
+        return 0.0
+    return numerator / denominator
+
+
+def _utc_now_iso() -> str:
+    return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
+
+
+def _fmt_number(value: Any) -> str:
+    if value is None:
+        return "-"
+    if isinstance(value, float):
+        return f"{value:.6g}"
+    return str(value)
diff --git a/tests/test_agent_runtime_report.py b/tests/test_agent_runtime_report.py
new file mode 100644
index 0000000..b4c2903
--- /dev/null
+++ b/tests/test_agent_runtime_report.py
@@ -0,0 +1,161 @@
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from inferedgelab.commands.agent_runtime_report import agent_runtime_report_cmd
+from inferedgelab.services.agent_runtime_report import (
+    AGENT_RUNTIME_POLICY_VERSION,
+    AGENT_RUNTIME_REPORT_SCHEMA_VERSION,
+    build_agent_runtime_reliability_markdown,
+    build_agent_runtime_reliability_report,
+    compute_agent_runtime_metrics,
+    load_agent_runtime_reliability_bundle,
+)
+
+
+def orchestration_summary() -> dict:
+    return {
+        "schema_version": "inferedge-orchestration-summary-v1",
+        "agent_runtime_summary": {
+            "schema_version": "inferedge-orchestration-summary-v1",
+            "source_contracts": {
+                "forge_agent_manifest": "inferedge-agent-manifest-v1",
+                "runtime_agent_result": "inferedge-runtime-agent-task-v1",
+            },
+            "agents": {
+                "safety_monitor_agent": {
+                    "agent_id": "safety_monitor_agent",
+                    "agent_type": "safety",
+                    "priority": 100,
+                    "latency_budget_ms": 20.0,
+                    "fallback_policy": "protect",
+                },
+                "vision_agent": {
+                    "agent_id": "vision_agent",
+                    "agent_type": "vision",
+                    "priority": 90,
+                    "latency_budget_ms": 33.0,
+                    "fallback_policy": "drop_stale",
+                },
+            },
+            "totals": {
+                "executed_count": 10,
+                "dropped_count": 14,
+                "deadline_missed_count": 1,
+                "fallback_count": 14,
+                "policy_decision_count": 14,
+                "overload_event_count": 14,
+            },
+        },
+        "policy_decision_log": [
+            {
+                "agent_id": "vision_agent",
+                "decision": "load_shedding",
+                "reason": "queue_backlog_threshold_exceeded",
+                "protected_agent_id": "safety_monitor_agent",
+            }
+        ],
+    }
+
+
+def guard_analysis() -> dict:
+    return {
+        "schema_version": "inferedge-aiguard-diagnosis-v1",
+        "guard_verdict": "blocked",
+        "severity": "high",
+        "confidence": 0.88,
+        "primary_reason": "drop_rate indicates runtime reliability risk.",
+        "evidence": [
+            {
+                "type": "excessive_drop_rate",
+                "metric_name": "drop_rate",
+                "observed_value": 14 / 24,
+                "baseline_value": None,
+                "threshold": 0.2,
+                "delta": None,
+                "delta_pct": None,
+                "increase_factor": None,
+                "severity": "high",
+                "status": "failed",
+                "explanation": "Drop rate crossed threshold.",
+                "why_it_matters": "Dropped work may become stale.",
+                "suspected_causes": ["queue_backlog"],
+                "recommendation": "Tune scheduling policy.",
+                "raw_context": {},
+            }
+        ],
+        "created_at": "2026-05-17T00:00:00Z",
+    }
+
+
+def test_compute_agent_runtime_metrics_from_orchestrator_summary():
+    metrics = compute_agent_runtime_metrics(orchestration_summary())
+
+    assert metrics["deadline_miss_rate"] == pytest.approx(0.1)
+    assert metrics["drop_rate"] == pytest.approx(14 / 24)
+    assert metrics["fallback_rate"] == pytest.approx(14 / 24)
+    assert metrics["queue_backlog_policy_decision_count"] == 1
+
+
+def test_agent_runtime_report_blocks_when_guard_blocks():
+    report = build_agent_runtime_reliability_report(
+        orchestration_summary=orchestration_summary(),
+        guard_analysis=guard_analysis(),
+    )
+
+    decision = report["agent_deployment_decision"]
+    assert report["schema_version"] == AGENT_RUNTIME_REPORT_SCHEMA_VERSION
+    assert report["contracts"]["orchestration_summary"] == (
+        "inferedge-orchestration-summary-v1"
+    )
+    assert report["contracts"]["aiguard_guard_analysis"] == (
+        "inferedge-aiguard-diagnosis-v1"
+    )
+    assert decision["policy_version"] == AGENT_RUNTIME_POLICY_VERSION
+    assert decision["decision"] == "blocked"
+    assert "guard_blocked_runtime_block" in decision["triggered_rules"]
+    assert "drop_rate_block" in decision["triggered_rules"]
+    assert report["guard_summary"]["guard_verdict"] == "blocked"
+
+
+def test_agent_runtime_report_markdown_contains_sections():
+    report = build_agent_runtime_reliability_report(
+        orchestration_summary=orchestration_summary(),
+        guard_analysis=guard_analysis(),
+    )
+    markdown = build_agent_runtime_reliability_markdown(report)
+
+    assert "# InferEdge Agent Runtime Reliability Report" in markdown
+    assert "Agent Runtime Summary" in markdown
+    assert "Runtime Reliability Metrics" in markdown
+    assert "AIGuard Runtime Reliability Evidence" in markdown
+    assert "Lab Agent Deployment Decision" in markdown
+    assert "guard_blocked_runtime_block" in markdown
+    assert "not a production cloud orchestration dashboard" in markdown
+
+
+def test_agent_runtime_report_loads_committed_fixtures():
+    report = load_agent_runtime_reliability_bundle(
+        orchestration_summary_path="examples/agent_runtime/agent_3_orchestration_summary.json",
+        guard_analysis_path="examples/agent_runtime/aiguard_runtime_guard_analysis.json",
+    )
+
+    assert report["agent_deployment_decision"]["decision"] == "blocked"
+    assert report["agent_runtime_summary"]["metrics"]["drop_rate"] == pytest.approx(14 / 24)
+    assert len(report["agent_runtime_summary"]["agents"]) == 3
+
+
+def test_agent_runtime_report_command_outputs_json(capsys):
+    agent_runtime_report_cmd(
+        orchestration_summary="examples/agent_runtime/agent_3_orchestration_summary.json",
+        guard_analysis="examples/agent_runtime/aiguard_runtime_guard_analysis.json",
+        format="json",
+        output="",
+    )
+    out = capsys.readouterr().out
+    report = json.loads(out)
+
+    assert report["schema_version"] == AGENT_RUNTIME_REPORT_SCHEMA_VERSION
+    assert report["agent_deployment_decision"]["decision"] == "blocked"