diff --git a/docs/ci/runtime_intelligence_gitlab_artifacts.md b/docs/ci/runtime_intelligence_gitlab_artifacts.md index 18b33b8..7906e15 100644 --- a/docs/ci/runtime_intelligence_gitlab_artifacts.md +++ b/docs/ci/runtime_intelligence_gitlab_artifacts.md @@ -120,6 +120,10 @@ The initial gate is conservative: - the EdgeEnv handoff `runtime_telemetry_history` file must exist and preserve the EdgeEnv history schema, telemetry coverage summary, and Runtime history seed ownership markers +- the same handoff history may include runs with missing Runtime telemetry; the + gate treats them as evidence gaps, but requires any preserved Orchestrator + context on those entries to keep source repository, artifact role, producer + contract, owner boundary flags, and EdgeEnv mapping hints intact The bundle manifest gate is implemented by `scripts/check_runtime_intelligence_bundle_manifest.py`. It verifies that the bundle contains baseline/candidate Runtime results, EdgeEnv regression evidence, AIGuard guard evidence, and explicit owner/boundary metadata before Lab generates the report. In this template it also consumes `--edgeenv-handoff examples/runtime_intelligence_chain/edgeenv_lab_handoff_manifest.json` to verify EdgeEnv producer-side file/source/role/schema alignment. The same gate now also checks `source_repositories`, `artifact_roles`, and `producer_contracts` so the smoke remains a cross-repo handoff fixture rather than a Lab-only report sample. diff --git a/docs/portfolio/edgeenv_runtime_regression_lab_handoff.md b/docs/portfolio/edgeenv_runtime_regression_lab_handoff.md index 86dc5de..bb9740d 100644 --- a/docs/portfolio/edgeenv_runtime_regression_lab_handoff.md +++ b/docs/portfolio/edgeenv_runtime_regression_lab_handoff.md @@ -105,7 +105,7 @@ This second smoke uses committed lightweight artifacts to represent the cross-re - `examples/runtime_intelligence_chain/bundle_manifest.json` declares the local-first artifact bundle, file paths, source repositories, artifact roles, producer contracts, owners, and boundary flags. - `examples/runtime_intelligence_chain/edgeenv_lab_handoff_manifest.json` mirrors the EdgeEnv producer-side handoff manifest and its `lab_bundle_alignment` metadata, so Lab can verify EdgeEnv-produced file keys separately from external AIGuard evidence. -- `examples/runtime_intelligence_chain/runtime_telemetry_history.json` is the EdgeEnv producer-side telemetry history artifact referenced by the handoff manifest. +- `examples/runtime_intelligence_chain/runtime_telemetry_history.json` is the EdgeEnv producer-side telemetry history artifact referenced by the handoff manifest. It includes a missing-telemetry run as an evidence gap and preserves Orchestrator context on that entry without turning Orchestrator into a regression or deployment decision owner. - Orchestrator context is preserved inside the EdgeEnv regression artifact as `orchestrator_operation_context`. - AIGuard deterministic queue/thermal evidence is passed as a precomputed `guard_analysis` artifact that mirrors the AIGuard producer-side diagnosis v1 evidence shape. - Lab owns the combined report and deployment decision. @@ -129,6 +129,7 @@ Expected Lab behavior: - The same gate requires EdgeEnv-preserved Orchestrator producer markers to carry `source_repository=InferEdgeOrchestrator`, `artifact_role=orchestrator-supplemental-operation-context`, and `producer_contract=inferedge-orchestrator-edgeenv-runtime-telemetry-feed-v1`. - When an EdgeEnv handoff manifest is provided, the bundle gate requires EdgeEnv-produced file keys, external AIGuard file keys, source repository mapping, artifact roles, producer contracts, and boundary flags to match Lab's Runtime Intelligence bundle contract. - The same handoff gate verifies that the referenced `runtime_telemetry_history` artifact exists and preserves EdgeEnv history schema, telemetry coverage, and Runtime history seed ownership markers. +- The same handoff gate verifies that missing telemetry entries remain evidence gaps while preserving Orchestrator producer markers, owner boundary flags, and EdgeEnv mapping hints when Orchestrator context is attached. - The bundle gate also requires AIGuard coverage evidence raw context to preserve the same Orchestrator mapping hint and producer markers, proving that AIGuard kept EdgeEnv/Orchestrator ownership markers as diagnosis context rather than recomputing coverage or owning deployment policy. - Additional Lab test fixtures under `tests/fixtures/edgeenv_regression/` mirror EdgeEnv replay examples for candidate telemetry gaps and execution sequence inversion. These fixture smokes verify that replay warnings become Lab-owned report context without making Lab recompute EdgeEnv comparability. - Markdown/HTML reports include a `Runtime Intelligence Risk Summary` that summarizes EdgeEnv comparability/regression, telemetry replay gaps, Runtime history seed traceability, AIGuard deterministic evidence, and the Lab-owned deployment decision in one reviewer-facing table. diff --git a/examples/runtime_intelligence_chain/runtime_telemetry_history.json b/examples/runtime_intelligence_chain/runtime_telemetry_history.json index f8fc155..9ed1ce5 100644 --- a/examples/runtime_intelligence_chain/runtime_telemetry_history.json +++ b/examples/runtime_intelligence_chain/runtime_telemetry_history.json @@ -1,15 +1,15 @@ { "schema_version": "edgeenv.runtime-telemetry-history.v1", "summary": { - "registered_runs": 2, + "registered_runs": 3, "telemetry_runs": 2, - "missing_telemetry_runs": 0, - "orchestrator_feed_runs": 1, + "missing_telemetry_runs": 1, + "orchestrator_feed_runs": 2, "history_seed_runs": 2 }, "telemetry_coverage": { "runs_with_coverage": 2, - "runs_without_coverage": 0, + "runs_without_coverage": 1, "expected_fields": [ "gpu_temperature", "queue_depth", @@ -151,5 +151,57 @@ ] } } + ], + "missing_telemetry": [ + { + "run_id": "edgeenv-smoke-missing", + "reason": "runtime_telemetry_missing", + "missing_telemetry_is_failure": false, + "orchestrator_context_present": true, + "orchestrator_operation_context": { + "schema_version": "inferedge-orchestrator-edgeenv-runtime-telemetry-feed-v1", + "role": "orchestrator_operation_context_for_edgeenv", + "source_repository": "InferEdgeOrchestrator", + "artifact_role": "orchestrator-supplemental-operation-context", + "producer_contract": "inferedge-orchestrator-edgeenv-runtime-telemetry-feed-v1", + "source": "orchestration_summary", + "run_id": "edgeenv-smoke-missing", + "not_a_regression_judgement": true, + "not_a_comparability_gate": true, + "decision_owner": "lab", + "regression_owner": "edgeenv", + "candidate_context": { + "run_id": "edgeenv-smoke-missing", + "telemetry_source": "inferedge_orchestrator_operation_summary", + "queue_depth": 4, + "operation": { + "queue_depth": 4, + "deadline_missed_count": 1, + "fallback_count": 0 + }, + "resource": { + "source": "runtime_health_snapshot", + "gpu_temperature": 72.0, + "throttling_detected": false + } + }, + "edgeenv_mapping_hint": { + "copy_candidate_context_to": "runtime_telemetry_context.candidate", + "operation_context_role": "supplemental", + "coverage_summary_owner": "edgeenv", + "coverage_summary_path": "runtime_telemetry_context.history.telemetry_coverage", + "candidate_context_required_fields": [ + "run_id", + "telemetry_source", + "operation", + "resource" + ], + "aiguard_evidence_candidates": [ + "runtime_queue_overload", + "runtime_thermal_instability" + ] + } + } + } ] } diff --git a/scripts/check_runtime_intelligence_bundle_manifest.py b/scripts/check_runtime_intelligence_bundle_manifest.py index 3db5ec6..a95a0f2 100644 --- a/scripts/check_runtime_intelligence_bundle_manifest.py +++ b/scripts/check_runtime_intelligence_bundle_manifest.py @@ -125,6 +125,7 @@ EDGEENV_HANDOFF_SUMMARY_CONTRACT_MARKERS = ( "edgeenv_handoff: lab_bundle_alignment validated", "edgeenv_handoff: runtime_telemetry_history validated", + "edgeenv_handoff: missing_telemetry_orchestrator_context validated", ) @@ -434,9 +435,9 @@ def _validate_edgeenv_runtime_history_artifact( ) summary = history.get("summary") or {} _record( - summary.get("registered_runs") == 2, + summary.get("registered_runs") == 3, errors, - "EdgeEnv handoff runtime_telemetry_history.summary.registered_runs must be 2", + "EdgeEnv handoff runtime_telemetry_history.summary.registered_runs must be 3", ) _record( summary.get("telemetry_runs") == 2, @@ -444,10 +445,16 @@ def _validate_edgeenv_runtime_history_artifact( "EdgeEnv handoff runtime_telemetry_history.summary.telemetry_runs must be 2", ) _record( - summary.get("orchestrator_feed_runs") == 1, + summary.get("missing_telemetry_runs") == 1, errors, "EdgeEnv handoff runtime_telemetry_history.summary." - "orchestrator_feed_runs must be 1", + "missing_telemetry_runs must be 1", + ) + _record( + summary.get("orchestrator_feed_runs") == 2, + errors, + "EdgeEnv handoff runtime_telemetry_history.summary." + "orchestrator_feed_runs must be 2", ) _record( summary.get("history_seed_runs") == 2, @@ -463,6 +470,7 @@ def _validate_edgeenv_runtime_history_artifact( if isinstance(coverage, dict): _validate_edgeenv_history_coverage_summary(coverage, errors) _validate_edgeenv_history_seed_runs(history, errors) + _validate_edgeenv_missing_telemetry_orchestrator_context(history, errors) def _validate_edgeenv_report(edgeenv_report: dict[str, Any], errors: list[str]) -> None: @@ -768,6 +776,88 @@ def _validate_edgeenv_history_seed_runs( ) +def _validate_edgeenv_missing_telemetry_orchestrator_context( + history: dict[str, Any], + errors: list[str], +) -> None: + missing_telemetry = history.get("missing_telemetry") + _record( + isinstance(missing_telemetry, list), + errors, + "EdgeEnv handoff runtime_telemetry_history.missing_telemetry must be a list", + ) + if not isinstance(missing_telemetry, list): + return + + missing_run = next( + ( + item + for item in missing_telemetry + if isinstance(item, dict) + and item.get("run_id") == "edgeenv-smoke-missing" + ), + None, + ) + _record( + isinstance(missing_run, dict), + errors, + "EdgeEnv handoff runtime_telemetry_history.missing_telemetry must " + "include edgeenv-smoke-missing", + ) + if not isinstance(missing_run, dict): + return + + _record( + missing_run.get("reason") == "runtime_telemetry_missing", + errors, + "EdgeEnv handoff runtime_telemetry_history.missing_telemetry" + "[edgeenv-smoke-missing].reason must be runtime_telemetry_missing", + ) + operation_context = missing_run.get("orchestrator_operation_context") + _record( + isinstance(operation_context, dict), + errors, + "EdgeEnv handoff runtime_telemetry_history.missing_telemetry" + "[edgeenv-smoke-missing] must include orchestrator_operation_context", + ) + if not isinstance(operation_context, dict): + return + + prefix = ( + "EdgeEnv handoff runtime_telemetry_history.missing_telemetry" + "[edgeenv-smoke-missing].orchestrator_operation_context" + ) + _record( + operation_context.get("schema_version") + == REQUIRED_PRODUCER_CONTRACTS["orchestrator_feed_schema"], + errors, + f"{prefix}.schema_version must be " + f"{REQUIRED_PRODUCER_CONTRACTS['orchestrator_feed_schema']}", + ) + _validate_orchestrator_producer_markers(operation_context, errors, prefix) + _record( + operation_context.get("not_a_regression_judgement") is True, + errors, + f"{prefix}.not_a_regression_judgement must be true", + ) + _record( + operation_context.get("not_a_comparability_gate") is True, + errors, + f"{prefix}.not_a_comparability_gate must be true", + ) + _record( + operation_context.get("decision_owner") == "lab", + errors, + f"{prefix}.decision_owner must be lab", + ) + _record( + operation_context.get("regression_owner") == "edgeenv", + errors, + f"{prefix}.regression_owner must be edgeenv", + ) + _validate_orchestrator_mapping_hint(operation_context, errors) + + def _validate_runtime_history_seed( seed: Any, errors: list[str], diff --git a/scripts/check_runtime_intelligence_ci_artifacts.py b/scripts/check_runtime_intelligence_ci_artifacts.py index 10a57e8..0a0e918 100644 --- a/scripts/check_runtime_intelligence_ci_artifacts.py +++ b/scripts/check_runtime_intelligence_ci_artifacts.py @@ -39,6 +39,7 @@ "aiguard_raw_context: orchestrator_producer_markers preserved", "edgeenv_handoff: lab_bundle_alignment validated", "edgeenv_handoff: runtime_telemetry_history validated", + "edgeenv_handoff: missing_telemetry_orchestrator_context validated", ) diff --git a/tests/test_runtime_intelligence_bundle_manifest.py b/tests/test_runtime_intelligence_bundle_manifest.py index 2af623e..05f5423 100644 --- a/tests/test_runtime_intelligence_bundle_manifest.py +++ b/tests/test_runtime_intelligence_bundle_manifest.py @@ -90,6 +90,7 @@ def test_runtime_intelligence_bundle_manifest_gate_validates_edgeenv_handoff( summary = summary_path.read_text(encoding="utf-8") assert "edgeenv_handoff: lab_bundle_alignment validated" in summary assert "edgeenv_handoff: runtime_telemetry_history validated" in summary + assert "edgeenv_handoff: missing_telemetry_orchestrator_context validated" in summary def test_runtime_intelligence_bundle_manifest_gate_fails_for_bad_edgeenv_handoff( @@ -144,6 +145,48 @@ def test_runtime_intelligence_bundle_manifest_gate_fails_for_missing_handoff_his assert "files.runtime_telemetry_history does not exist" in summary +def test_runtime_intelligence_bundle_manifest_gate_fails_for_bad_missing_history_context( + tmp_path, +): + handoff = json.loads(EDGEENV_HANDOFF.read_text(encoding="utf-8")) + runtime_history_path = ( + REPO_ROOT + / "examples" + / "runtime_intelligence_chain" + / handoff["files"]["runtime_telemetry_history"] + ) + runtime_history = json.loads(runtime_history_path.read_text(encoding="utf-8")) + missing_context = runtime_history["missing_telemetry"][0][ + "orchestrator_operation_context" + ] + missing_context["artifact_role"] = "edgeenv-owned-regression-context" + missing_context["edgeenv_mapping_hint"]["coverage_summary_owner"] = "orchestrator" + + runtime_history_copy = tmp_path / "runtime_telemetry_history.json" + runtime_history_copy.write_text(json.dumps(runtime_history), encoding="utf-8") + handoff["files"]["runtime_telemetry_history"] = str(runtime_history_copy) + handoff_path = tmp_path / "edgeenv_lab_handoff_manifest.json" + handoff_path.write_text(json.dumps(handoff), encoding="utf-8") + summary_path = tmp_path / "bundle_manifest_gate_summary.md" + + result = manifest_gate( + manifest=str(MANIFEST), + edgeenv_handoff=str(handoff_path), + summary_out=str(summary_path), + ) + + assert result == 2 + summary = summary_path.read_text(encoding="utf-8") + assert ( + "missing_telemetry[edgeenv-smoke-missing].orchestrator_operation_context" + ".artifact_role must be orchestrator-supplemental-operation-context" + ) in summary + assert ( + "orchestrator_operation_context.edgeenv_mapping_hint." + "coverage_summary_owner must be edgeenv" + ) in summary + + def test_runtime_intelligence_bundle_manifest_gate_fails_for_bad_owner(tmp_path): manifest = json.loads(MANIFEST.read_text(encoding="utf-8")) manifest["ownership"]["deployment_decision_owner"] = "aiguard" diff --git a/tests/test_runtime_intelligence_ci_template.py b/tests/test_runtime_intelligence_ci_template.py index 0d8cfe8..bb3a329 100644 --- a/tests/test_runtime_intelligence_ci_template.py +++ b/tests/test_runtime_intelligence_ci_template.py @@ -117,6 +117,7 @@ def test_runtime_intelligence_ci_artifact_gate_passes_for_expected_outputs(tmp_p "- aiguard_raw_context: orchestrator_producer_markers preserved", "- edgeenv_handoff: lab_bundle_alignment validated", "- edgeenv_handoff: runtime_telemetry_history validated", + "- edgeenv_handoff: missing_telemetry_orchestrator_context validated", ] ), encoding="utf-8", @@ -355,6 +356,7 @@ def test_runtime_intelligence_ci_artifact_gate_fails_for_failed_deployment_risk( "- aiguard_raw_context: orchestrator_producer_markers preserved", "- edgeenv_handoff: lab_bundle_alignment validated", "- edgeenv_handoff: runtime_telemetry_history validated", + "- edgeenv_handoff: missing_telemetry_orchestrator_context validated", ] ), encoding="utf-8", diff --git a/tests/test_runtime_intelligence_smoke_script.py b/tests/test_runtime_intelligence_smoke_script.py index 65a479a..785492d 100644 --- a/tests/test_runtime_intelligence_smoke_script.py +++ b/tests/test_runtime_intelligence_smoke_script.py @@ -76,6 +76,10 @@ def test_runtime_intelligence_smoke_script_runs_artifact_chain(tmp_path): ) assert "edgeenv_handoff: lab_bundle_alignment validated" in bundle_summary assert "edgeenv_handoff: runtime_telemetry_history validated" in bundle_summary + assert ( + "edgeenv_handoff: missing_telemetry_orchestrator_context validated" + in bundle_summary + ) ci_summary = ( output_dir / "runtime_intelligence_ci_artifact_gate_summary.md"