From 7ef6095271069b0834570a9a98497b1e4a23a49e Mon Sep 17 00:00:00 2001 From: Seongho Bae Date: Thu, 18 Jun 2026 07:29:42 +0900 Subject: [PATCH 1/8] fix: classify external opencode check failures --- .github/workflows/opencode-review.yml | 69 +++++++++ scripts/ci/classify_failed_check_evidence.py | 146 ++++++++++++++++++ .../tests/test_supply_chain_policy.py | 74 +++++++++ 3 files changed, 289 insertions(+) create mode 100644 scripts/ci/classify_failed_check_evidence.py diff --git a/.github/workflows/opencode-review.yml b/.github/workflows/opencode-review.yml index 5df2a530..9bb9f163 100644 --- a/.github/workflows/opencode-review.yml +++ b/.github/workflows/opencode-review.yml @@ -1299,6 +1299,67 @@ jobs: } >"$body_file" } + build_external_failed_check_body() { + local failed_checks_file="$1" + local classification_file="$2" + local body_file="$3" + local reason + local signals + + reason="$(jq -r '.reason // "external GitHub check failure"' "$classification_file")" + signals="$( + jq -r ' + (.signals // []) + | map("- " + .) + | join("\n") + ' "$classification_file" + )" + if [ -z "$signals" ]; then + signals="- external check failure was classified without additional signals" + fi + + { + printf '## Pull request overview\n\n' + printf 'OpenCode completed its review pass, but the only failed current-head check is external infrastructure rather than a source-backed repository defect.\n\n' + printf '## Findings\n\n' + printf 'No blocking source finding was submitted. Re-run the failed workflow job so the required GitHub check can report a clean current-head result.\n\n' + printf '## Verification\n\n' + printf -- '- Result: EXTERNAL_CHECK_FAILURE\n' + printf -- '- Reason: %s\n\n' "$reason" + printf '## Gate evidence\n\n' + printf -- "- Head SHA: \`%s\`\n" "$HEAD_SHA" + printf -- '- Workflow run: %s\n' "$RUN_ID" + printf -- '- Workflow attempt: %s\n\n' "$RUN_ATTEMPT" + printf 'Failed checks:\n' + cat "$failed_checks_file" + printf '\n\nExternal infrastructure signals:\n%s\n' "$signals" + } >"$body_file" + } + + stop_for_external_failed_check_if_needed() { + local failed_checks_file="$1" + local evidence_file="$2" + local body_file="$3" + local classification_file + local classification + + classification_file="$(mktemp)" + if ! python3 scripts/ci/classify_failed_check_evidence.py "$evidence_file" >"$classification_file"; then + rm -f "$classification_file" + return 1 + fi + + classification="$(jq -r '.classification // empty' "$classification_file")" + if [ "$classification" != "external_infrastructure" ]; then + rm -f "$classification_file" + return 1 + fi + + build_external_failed_check_body "$failed_checks_file" "$classification_file" "$body_file" + rm -f "$classification_file" + stop_approval_without_review "EXTERNAL_CHECK_FAILURE" "$(cat "$body_file")" + } + normalize_opencode_output() { local output_file="$1" @@ -1708,6 +1769,9 @@ jobs: if ! scripts/ci/collect_failed_check_evidence.sh "$failed_check_evidence_file"; then printf "Failed GitHub Check evidence could not be collected for current head \`%s\`.\n" "$HEAD_SHA" >"$failed_check_evidence_file" fi + if stop_for_external_failed_check_if_needed "$failed_checks_file" "$failed_check_evidence_file" "$failed_check_review_body_file"; then + : + fi if run_failed_check_diagnosis "$failed_checks_file" "$failed_check_evidence_file" "$failed_check_review_body_file"; then create_pull_review "REQUEST_CHANGES" "$(cat "$failed_check_review_body_file")" else @@ -1835,6 +1899,9 @@ jobs: if ! scripts/ci/collect_failed_check_evidence.sh "$failed_check_evidence_file"; then printf "Failed GitHub Check evidence could not be collected for current head \`%s\`.\n" "$HEAD_SHA" >"$failed_check_evidence_file" fi + if stop_for_external_failed_check_if_needed "$failed_checks_file" "$failed_check_evidence_file" "$failed_check_review_body_file"; then + : + fi if run_failed_check_diagnosis "$failed_checks_file" "$failed_check_evidence_file" "$failed_check_review_body_file"; then create_pull_review "REQUEST_CHANGES" "$(cat "$failed_check_review_body_file")" else @@ -1891,6 +1958,8 @@ jobs: if scripts/ci/validate_opencode_failed_check_review.sh "$control_json" "$failed_checks_file" "$failed_check_evidence_file"; then format_request_changes_body "$control_json" "$failed_check_review_body_file" create_pull_review "REQUEST_CHANGES" "$(cat "$failed_check_review_body_file")" + elif stop_for_external_failed_check_if_needed "$failed_checks_file" "$failed_check_evidence_file" "$failed_check_review_body_file"; then + : elif run_failed_check_diagnosis "$failed_checks_file" "$failed_check_evidence_file" "$failed_check_review_body_file"; then create_pull_review "REQUEST_CHANGES" "$(cat "$failed_check_review_body_file")" else diff --git a/scripts/ci/classify_failed_check_evidence.py b/scripts/ci/classify_failed_check_evidence.py new file mode 100644 index 00000000..edb8b980 --- /dev/null +++ b/scripts/ci/classify_failed_check_evidence.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +"""Classify failed-check evidence before OpenCode changes PR review state.""" + +from __future__ import annotations + +import json +import re +import sys +from pathlib import Path +from typing import Any + + +FAILED_CHECK_HEADING = re.compile(r"^## Failed check:\s*(.+)$", re.MULTILINE) +UPLOAD_ARTIFACT_STEP = re.compile( + r"^- step \d+:\s+Upload .+ artifact \(failure\)$", + re.IGNORECASE | re.MULTILINE, +) +ARTIFACT_UPLOAD_INFRA_PATTERNS = ( + ( + "artifact upload finalize request reset", + re.compile( + r"Failed to FinalizeArtifact:\s+Unable to make request:\s+ECONNRESET", + re.IGNORECASE, + ), + ), + ( + "artifact service request reset", + re.compile(r"Unable to make request:\s+ECONNRESET", re.IGNORECASE), + ), +) +ARTIFACT_UPLOAD_CONFIRMATION_PATTERNS = ( + re.compile(r"actions/upload-artifact@", re.IGNORECASE), + re.compile(r"Finished uploading artifact content", re.IGNORECASE), + re.compile(r"Finalizing artifact upload", re.IGNORECASE), +) +BUILD_OR_PACKAGE_SUCCESS_PATTERNS = ( + re.compile(r"Finished `release` profile", re.IGNORECASE), + re.compile(r"Built application at:", re.IGNORECASE), + re.compile(r"Packaged .+ to artifacts/", re.IGNORECASE), +) + + +def unknown(reason: str, *, signals: list[str] | None = None) -> dict[str, Any]: + """Return the default actionable-or-unknown classification.""" + return { + "classification": "actionable_or_unknown", + "reason": reason, + "signals": signals or [], + } + + +def external(reason: str, *, signals: list[str]) -> dict[str, Any]: + """Return a classification for failures outside repository source control.""" + return { + "classification": "external_infrastructure", + "reason": reason, + "signals": signals, + } + + +def classify_failed_check_evidence(evidence_text: str) -> dict[str, Any]: + """Classify whether failed check evidence is safe to withhold as non-source.""" + failed_checks = FAILED_CHECK_HEADING.findall(evidence_text) + if not failed_checks: + return unknown("no failed check headings were present") + if len(failed_checks) != 1: + return unknown( + "multiple failed checks require per-check source diagnosis", + signals=failed_checks, + ) + + failed_check = failed_checks[0].strip() + upload_step_match = UPLOAD_ARTIFACT_STEP.search(evidence_text) + if upload_step_match is None: + return unknown( + "the failed job step was not an artifact upload step", + signals=[failed_check], + ) + + matched_infra_signals = [ + label + for label, pattern in ARTIFACT_UPLOAD_INFRA_PATTERNS + if pattern.search(evidence_text) + ] + if not matched_infra_signals: + return unknown( + "no known external artifact upload infrastructure signal was present", + signals=[failed_check, upload_step_match.group(0)], + ) + + if not any( + pattern.search(evidence_text) + for pattern in ARTIFACT_UPLOAD_CONFIRMATION_PATTERNS + ): + return unknown( + "artifact upload context was missing from the failed-check evidence", + signals=[failed_check, upload_step_match.group(0), *matched_infra_signals], + ) + + build_success_signals = [ + pattern.pattern + for pattern in BUILD_OR_PACKAGE_SUCCESS_PATTERNS + if pattern.search(evidence_text) + ] + if not build_success_signals: + return unknown( + "build or package success was not visible before artifact upload failed", + signals=[failed_check, upload_step_match.group(0), *matched_infra_signals], + ) + + return external( + ( + "the only failed check is a GitHub artifact upload finalization/network " + "failure after build/package output was produced; rerun the failed " + "workflow job instead of requesting source changes" + ), + signals=[ + failed_check, + upload_step_match.group(0), + *matched_infra_signals, + *build_success_signals, + ], + ) + + +def main(argv: list[str]) -> int: + """Classify a failed-check evidence file and print JSON.""" + if len(argv) != 2: + print( + "usage: classify_failed_check_evidence.py ", file=sys.stderr + ) + return 64 + + evidence_file = Path(argv[1]) + try: + evidence_text = evidence_file.read_text(encoding="utf-8") + except OSError as exc: + print(f"cannot read failed-check evidence file: {exc}", file=sys.stderr) + return 65 + + print(json.dumps(classify_failed_check_evidence(evidence_text), ensure_ascii=True)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv)) diff --git a/services/analysis-engine/tests/test_supply_chain_policy.py b/services/analysis-engine/tests/test_supply_chain_policy.py index 67d2577d..ec9c0a77 100644 --- a/services/analysis-engine/tests/test_supply_chain_policy.py +++ b/services/analysis-engine/tests/test_supply_chain_policy.py @@ -4825,6 +4825,80 @@ def test_opencode_review_gate_ignores_review_agent_status_contexts() -> None: assert workflow.count("select(opencode_review_agent_status | not)") >= 3 +def test_opencode_classifies_artifact_upload_reset_as_external() -> None: + """Ensure transient artifact upload finalization resets do not request changes.""" + classifier = load_module( + "scripts/ci/classify_failed_check_evidence.py", + "classify_failed_check_evidence", + ) + evidence = """ +# Failed GitHub Check Evidence + +## Failed check: build-baseline/build / macos / amd64 + +### Failed job steps + +- step 13: Upload macOS amd64 artifact (failure) + +### Failed log excerpt + +```text +Finished `release` profile [optimized] target(s) in 6m 56s +Packaged BandScope_0.1.3_x64.dmg to artifacts/bandscope-macos-amd64.dmg +Run actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a +Finished uploading artifact content to blob storage! +Finalizing artifact upload +##[error]Failed to FinalizeArtifact: Unable to make request: ECONNRESET +``` +""".strip() + + result = classifier.classify_failed_check_evidence(evidence) + + assert result["classification"] == "external_infrastructure" + assert "rerun the failed workflow job" in result["reason"] + assert "build-baseline/build / macos / amd64" in result["signals"] + + +def test_opencode_keeps_test_failures_actionable() -> None: + """Ensure ordinary failed checks still require source-backed diagnosis.""" + classifier = load_module( + "scripts/ci/classify_failed_check_evidence.py", + "classify_failed_check_evidence_actionable", + ) + evidence = """ +# Failed GitHub Check Evidence + +## Failed check: ci/ci / build-and-test + +### Failed job steps + +- step 7: Run tests (failure) + +### Failed log excerpt + +```text +FAIL apps/desktop/src/App.test.tsx +##[error]Process completed with exit code 1. +``` +""".strip() + + result = classifier.classify_failed_check_evidence(evidence) + + assert result["classification"] == "actionable_or_unknown" + + +def test_opencode_review_stops_external_check_failures_without_review() -> None: + """Ensure external check failures update overview instead of review state.""" + repo_root = Path(__file__).resolve().parents[3] + workflow = (repo_root / ".github" / "workflows" / "opencode-review.yml").read_text( + encoding="utf-8" + ) + + assert "scripts/ci/classify_failed_check_evidence.py" in workflow + assert "stop_for_external_failed_check_if_needed" in workflow + assert 'stop_approval_without_review "EXTERNAL_CHECK_FAILURE"' in workflow + + def test_opencode_normalizer_defaults_missing_approve_findings(tmp_path: Path) -> None: """Ensure APPROVE control payloads without findings normalize to findings:[].""" normalizer = load_module( From 6c0bdf2fdb4c95c02cc1680f0aebdba143b8771d Mon Sep 17 00:00:00 2001 From: Seongho Bae Date: Thu, 18 Jun 2026 07:39:18 +0900 Subject: [PATCH 2/8] fix: report matched opencode evidence lines --- scripts/ci/classify_failed_check_evidence.py | 22 ++++++++++++++----- .../tests/test_supply_chain_policy.py | 5 +++++ 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/scripts/ci/classify_failed_check_evidence.py b/scripts/ci/classify_failed_check_evidence.py index edb8b980..ddc02b68 100644 --- a/scripts/ci/classify_failed_check_evidence.py +++ b/scripts/ci/classify_failed_check_evidence.py @@ -58,6 +58,19 @@ def external(reason: str, *, signals: list[str]) -> dict[str, Any]: } +def matching_evidence_lines( + evidence_text: str, patterns: tuple[re.Pattern[str], ...] +) -> list[str]: + """Return concrete evidence lines matched by the given patterns.""" + matches: list[str] = [] + for pattern in patterns: + for line in evidence_text.splitlines(): + if pattern.search(line): + matches.append(line.strip()) + break + return matches + + def classify_failed_check_evidence(evidence_text: str) -> dict[str, Any]: """Classify whether failed check evidence is safe to withhold as non-source.""" failed_checks = FAILED_CHECK_HEADING.findall(evidence_text) @@ -97,11 +110,10 @@ def classify_failed_check_evidence(evidence_text: str) -> dict[str, Any]: signals=[failed_check, upload_step_match.group(0), *matched_infra_signals], ) - build_success_signals = [ - pattern.pattern - for pattern in BUILD_OR_PACKAGE_SUCCESS_PATTERNS - if pattern.search(evidence_text) - ] + build_success_signals = matching_evidence_lines( + evidence_text, + BUILD_OR_PACKAGE_SUCCESS_PATTERNS, + ) if not build_success_signals: return unknown( "build or package success was not visible before artifact upload failed", diff --git a/services/analysis-engine/tests/test_supply_chain_policy.py b/services/analysis-engine/tests/test_supply_chain_policy.py index ec9c0a77..f4cb9a04 100644 --- a/services/analysis-engine/tests/test_supply_chain_policy.py +++ b/services/analysis-engine/tests/test_supply_chain_policy.py @@ -4857,6 +4857,11 @@ def test_opencode_classifies_artifact_upload_reset_as_external() -> None: assert result["classification"] == "external_infrastructure" assert "rerun the failed workflow job" in result["reason"] assert "build-baseline/build / macos / amd64" in result["signals"] + assert "Packaged .+ to artifacts/" not in result["signals"] + assert ( + "Packaged BandScope_0.1.3_x64.dmg to artifacts/bandscope-macos-amd64.dmg" + in result["signals"] + ) def test_opencode_keeps_test_failures_actionable() -> None: From c88aac1d6de87da747d6db1c3608fd82f2940497 Mon Sep 17 00:00:00 2001 From: Seongho Bae Date: Thu, 18 Jun 2026 07:56:50 +0900 Subject: [PATCH 3/8] fix: classify tauri binary release download failures --- scripts/ci/classify_failed_check_evidence.py | 121 +++++++++++++++--- .../tests/test_supply_chain_policy.py | 34 +++++ 2 files changed, 135 insertions(+), 20 deletions(-) diff --git a/scripts/ci/classify_failed_check_evidence.py b/scripts/ci/classify_failed_check_evidence.py index ddc02b68..32212f84 100644 --- a/scripts/ci/classify_failed_check_evidence.py +++ b/scripts/ci/classify_failed_check_evidence.py @@ -15,6 +15,10 @@ r"^- step \d+:\s+Upload .+ artifact \(failure\)$", re.IGNORECASE | re.MULTILINE, ) +BUILD_NATIVE_SHELL_STEP = re.compile( + r"^- step \d+:\s+Build native shell \(failure\)$", + re.IGNORECASE | re.MULTILINE, +) ARTIFACT_UPLOAD_INFRA_PATTERNS = ( ( "artifact upload finalize request reset", @@ -33,6 +37,21 @@ re.compile(r"Finished uploading artifact content", re.IGNORECASE), re.compile(r"Finalizing artifact upload", re.IGNORECASE), ) +TAURI_BINARY_RELEASE_DOWNLOAD_PATTERNS = ( + re.compile( + r"Downloading https://github\.com/tauri-apps/binary-releases/", + re.IGNORECASE, + ), +) +TAURI_BUNDLE_INFRA_PATTERNS = ( + ( + "tauri binary release download server error", + re.compile( + r"failed to bundle project `http status:\s*50[0-9]`", + re.IGNORECASE, + ), + ), +) BUILD_OR_PACKAGE_SUCCESS_PATTERNS = ( re.compile(r"Finished `release` profile", re.IGNORECASE), re.compile(r"Built application at:", re.IGNORECASE), @@ -84,52 +103,114 @@ def classify_failed_check_evidence(evidence_text: str) -> dict[str, Any]: failed_check = failed_checks[0].strip() upload_step_match = UPLOAD_ARTIFACT_STEP.search(evidence_text) - if upload_step_match is None: + build_success_signals = matching_evidence_lines( + evidence_text, + BUILD_OR_PACKAGE_SUCCESS_PATTERNS, + ) + if upload_step_match is not None: + matched_infra_signals = [ + label + for label, pattern in ARTIFACT_UPLOAD_INFRA_PATTERNS + if pattern.search(evidence_text) + ] + if not matched_infra_signals: + return unknown( + "no known external artifact upload infrastructure signal was present", + signals=[failed_check, upload_step_match.group(0)], + ) + + if not any( + pattern.search(evidence_text) + for pattern in ARTIFACT_UPLOAD_CONFIRMATION_PATTERNS + ): + return unknown( + "artifact upload context was missing from the failed-check evidence", + signals=[ + failed_check, + upload_step_match.group(0), + *matched_infra_signals, + ], + ) + + if not build_success_signals: + return unknown( + "build or package success was not visible before artifact upload failed", + signals=[ + failed_check, + upload_step_match.group(0), + *matched_infra_signals, + ], + ) + + return external( + ( + "the only failed check is a GitHub artifact upload " + "finalization/network failure after build/package output was " + "produced; rerun the failed workflow job instead of requesting " + "source changes" + ), + signals=[ + failed_check, + upload_step_match.group(0), + *matched_infra_signals, + *build_success_signals, + ], + ) + + native_shell_step_match = BUILD_NATIVE_SHELL_STEP.search(evidence_text) + if native_shell_step_match is None: return unknown( - "the failed job step was not an artifact upload step", + "no known external failed job step pattern was present", signals=[failed_check], ) matched_infra_signals = [ label - for label, pattern in ARTIFACT_UPLOAD_INFRA_PATTERNS + for label, pattern in TAURI_BUNDLE_INFRA_PATTERNS if pattern.search(evidence_text) ] if not matched_infra_signals: return unknown( - "no known external artifact upload infrastructure signal was present", - signals=[failed_check, upload_step_match.group(0)], + "no known external native-shell infrastructure signal was present", + signals=[failed_check, native_shell_step_match.group(0)], ) - if not any( - pattern.search(evidence_text) - for pattern in ARTIFACT_UPLOAD_CONFIRMATION_PATTERNS - ): + tauri_download_signals = matching_evidence_lines( + evidence_text, + TAURI_BINARY_RELEASE_DOWNLOAD_PATTERNS, + ) + if not tauri_download_signals: return unknown( - "artifact upload context was missing from the failed-check evidence", - signals=[failed_check, upload_step_match.group(0), *matched_infra_signals], + "Tauri binary release download context was missing from the evidence", + signals=[ + failed_check, + native_shell_step_match.group(0), + *matched_infra_signals, + ], ) - build_success_signals = matching_evidence_lines( - evidence_text, - BUILD_OR_PACKAGE_SUCCESS_PATTERNS, - ) if not build_success_signals: return unknown( - "build or package success was not visible before artifact upload failed", - signals=[failed_check, upload_step_match.group(0), *matched_infra_signals], + "build success was not visible before native-shell bundling failed", + signals=[ + failed_check, + native_shell_step_match.group(0), + *matched_infra_signals, + *tauri_download_signals, + ], ) return external( ( - "the only failed check is a GitHub artifact upload finalization/network " - "failure after build/package output was produced; rerun the failed " + "the only failed check is a Tauri binary release download server " + "error after the native app binary was built; rerun the failed " "workflow job instead of requesting source changes" ), signals=[ failed_check, - upload_step_match.group(0), + native_shell_step_match.group(0), *matched_infra_signals, + *tauri_download_signals, *build_success_signals, ], ) diff --git a/services/analysis-engine/tests/test_supply_chain_policy.py b/services/analysis-engine/tests/test_supply_chain_policy.py index 86533d4e..0db04719 100644 --- a/services/analysis-engine/tests/test_supply_chain_policy.py +++ b/services/analysis-engine/tests/test_supply_chain_policy.py @@ -4917,6 +4917,40 @@ def test_opencode_classifies_artifact_upload_reset_as_external() -> None: ) +def test_opencode_classifies_tauri_binary_release_502_as_external() -> None: + """Ensure Tauri binary release server errors do not request source changes.""" + classifier = load_module( + "scripts/ci/classify_failed_check_evidence.py", + "classify_failed_check_evidence_tauri_binary_release", + ) + evidence = """ +# Failed GitHub Check Evidence + +## Failed check: build-baseline/build / windows / amd64 + +### Failed job steps + +- step 12: Build native shell (failure) + +### Failed log excerpt + +```text +Finished `release` profile [optimized] target(s) in 4m 53s +Built application at: D:\\a\\bandscope\\target\\release\\bandscope-desktop.exe +Downloading https://github.com/tauri-apps/binary-releases/releases/download/nsis-3.11/nsis-3.11.zip +failed to bundle project `http status: 502` +Error failed to bundle project `http status: 502` +``` +""".strip() + + result = classifier.classify_failed_check_evidence(evidence) + + assert result["classification"] == "external_infrastructure" + assert "Tauri binary release download server error" in result["reason"] + assert "build-baseline/build / windows / amd64" in result["signals"] + assert any("tauri-apps/binary-releases" in signal for signal in result["signals"]) + + def test_opencode_keeps_test_failures_actionable() -> None: """Ensure ordinary failed checks still require source-backed diagnosis.""" classifier = load_module( From 45fbedc1148940fa121f709642df4c19431659b2 Mon Sep 17 00:00:00 2001 From: Seongho Bae Date: Thu, 18 Jun 2026 08:02:05 +0900 Subject: [PATCH 4/8] fix: classify setup-uv fetch failures --- scripts/ci/classify_failed_check_evidence.py | 58 +++++++++++++++++++ .../tests/test_supply_chain_policy.py | 33 +++++++++++ 2 files changed, 91 insertions(+) diff --git a/scripts/ci/classify_failed_check_evidence.py b/scripts/ci/classify_failed_check_evidence.py index 32212f84..b445c36c 100644 --- a/scripts/ci/classify_failed_check_evidence.py +++ b/scripts/ci/classify_failed_check_evidence.py @@ -19,6 +19,10 @@ r"^- step \d+:\s+Build native shell \(failure\)$", re.IGNORECASE | re.MULTILINE, ) +SETUP_UV_STEP = re.compile( + r"^- step \d+:\s+Run astral-sh/setup-uv@.+ \(failure\)$", + re.IGNORECASE | re.MULTILINE, +) ARTIFACT_UPLOAD_INFRA_PATTERNS = ( ( "artifact upload finalize request reset", @@ -52,6 +56,19 @@ ), ), ) +SETUP_UV_MANIFEST_FETCH_PATTERNS = ( + re.compile( + r"Fetching manifest data from " + r"https://raw\.githubusercontent\.com/astral-sh/versions/", + re.IGNORECASE, + ), +) +SETUP_UV_INFRA_PATTERNS = ( + ( + "setup-uv manifest fetch failed", + re.compile(r"##\[error\]fetch failed", re.IGNORECASE), + ), +) BUILD_OR_PACKAGE_SUCCESS_PATTERNS = ( re.compile(r"Finished `release` profile", re.IGNORECASE), re.compile(r"Built application at:", re.IGNORECASE), @@ -157,6 +174,47 @@ def classify_failed_check_evidence(evidence_text: str) -> dict[str, Any]: ], ) + setup_uv_step_match = SETUP_UV_STEP.search(evidence_text) + if setup_uv_step_match is not None: + matched_infra_signals = [ + label + for label, pattern in SETUP_UV_INFRA_PATTERNS + if pattern.search(evidence_text) + ] + if not matched_infra_signals: + return unknown( + "no known external setup-uv infrastructure signal was present", + signals=[failed_check, setup_uv_step_match.group(0)], + ) + + setup_uv_fetch_signals = matching_evidence_lines( + evidence_text, + SETUP_UV_MANIFEST_FETCH_PATTERNS, + ) + if not setup_uv_fetch_signals: + return unknown( + "setup-uv manifest fetch context was missing from the evidence", + signals=[ + failed_check, + setup_uv_step_match.group(0), + *matched_infra_signals, + ], + ) + + return external( + ( + "the only failed check is a setup-uv manifest fetch failure " + "before repository build steps ran; rerun the failed workflow " + "job instead of requesting source changes" + ), + signals=[ + failed_check, + setup_uv_step_match.group(0), + *matched_infra_signals, + *setup_uv_fetch_signals, + ], + ) + native_shell_step_match = BUILD_NATIVE_SHELL_STEP.search(evidence_text) if native_shell_step_match is None: return unknown( diff --git a/services/analysis-engine/tests/test_supply_chain_policy.py b/services/analysis-engine/tests/test_supply_chain_policy.py index 0db04719..94454503 100644 --- a/services/analysis-engine/tests/test_supply_chain_policy.py +++ b/services/analysis-engine/tests/test_supply_chain_policy.py @@ -4951,6 +4951,39 @@ def test_opencode_classifies_tauri_binary_release_502_as_external() -> None: assert any("tauri-apps/binary-releases" in signal for signal in result["signals"]) +def test_opencode_classifies_setup_uv_manifest_fetch_as_external() -> None: + """Ensure setup-uv manifest fetch failures do not request source changes.""" + classifier = load_module( + "scripts/ci/classify_failed_check_evidence.py", + "classify_failed_check_evidence_setup_uv_fetch", + ) + evidence = """ +# Failed GitHub Check Evidence + +## Failed check: build-baseline/build / macos / amd64 + +### Failed job steps + +- step 5: Run astral-sh/setup-uv@fac544c07dec837d0ccb6301d7b5580bf5edae39 (failure) + +### Failed log excerpt + +```text +Fetching manifest data from https://raw.githubusercontent.com/astral-sh/versions/ +##[error]fetch failed +``` +""".strip() + + result = classifier.classify_failed_check_evidence(evidence) + + assert result["classification"] == "external_infrastructure" + assert "setup-uv manifest fetch failure" in result["reason"] + assert "build-baseline/build / macos / amd64" in result["signals"] + assert any( + "raw.githubusercontent.com/astral-sh/versions" in signal for signal in result["signals"] + ) + + def test_opencode_keeps_test_failures_actionable() -> None: """Ensure ordinary failed checks still require source-backed diagnosis.""" classifier = load_module( From f4ae8ab5f3a43bda2c72e04337b30de3dc1c9132 Mon Sep 17 00:00:00 2001 From: Seongho Bae Date: Thu, 18 Jun 2026 08:12:48 +0900 Subject: [PATCH 5/8] fix: keep scorecard analysis for nonblocking cii --- scripts/checks/normalize_scorecard_sarif.py | 43 ++++++++++++++++--- .../tests/test_supply_chain_policy.py | 17 ++++++-- 2 files changed, 50 insertions(+), 10 deletions(-) diff --git a/scripts/checks/normalize_scorecard_sarif.py b/scripts/checks/normalize_scorecard_sarif.py index 05b87a3c..a803dc34 100644 --- a/scripts/checks/normalize_scorecard_sarif.py +++ b/scripts/checks/normalize_scorecard_sarif.py @@ -21,6 +21,41 @@ def is_non_blocking_scorecard_result(result: object) -> bool: ) +def downgrade_non_blocking_scorecard_result(result: dict) -> int: + """Keep a non-blocking Scorecard result visible without tripping gates.""" + rewritten = 0 + if result.get("level") != "note": + result["level"] = "note" + rewritten += 1 + + properties = result.get("properties") + if not isinstance(properties, dict): + properties = {} + result["properties"] = properties + rewritten += 1 + if properties.get("bandscopeNonBlockingScorecardSignal") is not True: + properties["bandscopeNonBlockingScorecardSignal"] = True + rewritten += 1 + + locations = result.get("locations") + if isinstance(locations, list) and locations: + return rewritten + + result["locations"] = [ + { + "physicalLocation": { + "artifactLocation": {"uri": SCORECARD_WORKFLOW_URI}, + "region": {"startLine": 1}, + "properties": { + "bandscopeNonBlockingScorecardSignal": True, + "bandscopeRepositoryLevelFinding": True, + }, + } + } + ] + return rewritten + 1 + + def normalize_scorecard_sarif(source: Path, target: Path) -> int: """Normalize Scorecard SARIF locations/results and return the change count.""" sarif = json.loads(source.read_text(encoding="utf-8")) @@ -35,14 +70,11 @@ def normalize_scorecard_sarif(source: Path, target: Path) -> int: results = run.get("results", []) if not isinstance(results, list): continue - retained_results = [] for result in results: - if is_non_blocking_scorecard_result(result): - rewritten += 1 - continue - retained_results.append(result) if not isinstance(result, dict): continue + if is_non_blocking_scorecard_result(result): + rewritten += downgrade_non_blocking_scorecard_result(result) locations = result.get("locations", []) if not isinstance(locations, list): continue @@ -74,7 +106,6 @@ def normalize_scorecard_sarif(source: Path, target: Path) -> int: ) properties["bandscopeRepositoryLevelFinding"] = True rewritten += 1 - run["results"] = retained_results target.write_text( json.dumps(sarif, indent=2, sort_keys=True) + "\n", encoding="utf-8" diff --git a/services/analysis-engine/tests/test_supply_chain_policy.py b/services/analysis-engine/tests/test_supply_chain_policy.py index 94454503..5eb3a7e6 100644 --- a/services/analysis-engine/tests/test_supply_chain_policy.py +++ b/services/analysis-engine/tests/test_supply_chain_policy.py @@ -2711,10 +2711,10 @@ def test_scorecard_sarif_normalizer_preserves_file_locations(tmp_path: Path) -> assert "properties" not in location -def test_scorecard_sarif_normalizer_drops_non_blocking_cii_badge_result( +def test_scorecard_sarif_normalizer_downgrades_non_blocking_cii_badge_result( tmp_path: Path, ) -> None: - """Ensure the external OpenSSF badge signal does not block code scanning gates.""" + """Ensure the badge signal keeps Scorecard analysis without blocking gates.""" normalizer = load_module( "scripts/checks/normalize_scorecard_sarif.py", "normalize_scorecard_sarif_cii_badge", @@ -2759,9 +2759,18 @@ def test_scorecard_sarif_normalizer_drops_non_blocking_cii_badge_result( rewritten = normalizer.normalize_scorecard_sarif(source, target) normalized = json.loads(target.read_text(encoding="utf-8")) results = normalized["runs"][0]["results"] + cii_result = results[0] + cii_location = cii_result["locations"][0]["physicalLocation"] - assert rewritten == 2 - assert [result["ruleId"] for result in results] == ["TokenPermissionsID"] + assert rewritten == 5 + assert [result["ruleId"] for result in results] == [ + "CIIBestPracticesID", + "TokenPermissionsID", + ] + assert cii_result["level"] == "note" + assert cii_result["properties"]["bandscopeNonBlockingScorecardSignal"] is True + assert cii_location["artifactLocation"]["uri"] == ".github/workflows/ossf-scorecard.yml" + assert cii_location["region"]["startLine"] == 1 def test_scorecard_sarif_normalizer_fills_existing_region_start_line( From d99de88ef6ce21cb8b3e3249fff24f3d6e5cb052 Mon Sep 17 00:00:00 2001 From: Seongho Bae Date: Thu, 18 Jun 2026 08:18:57 +0900 Subject: [PATCH 6/8] Fix external check evidence classification --- .github/workflows/opencode-review.yml | 7 +++- scripts/ci/classify_failed_check_evidence.py | 40 ++++++++++++------- .../tests/test_supply_chain_policy.py | 10 +++++ 3 files changed, 41 insertions(+), 16 deletions(-) diff --git a/.github/workflows/opencode-review.yml b/.github/workflows/opencode-review.yml index 9bb9f163..ee7509cd 100644 --- a/.github/workflows/opencode-review.yml +++ b/.github/workflows/opencode-review.yml @@ -1349,7 +1349,12 @@ jobs: return 1 fi - classification="$(jq -r '.classification // empty' "$classification_file")" + if ! classification="$( + jq -r '.classification // empty' "$classification_file" 2>/dev/null + )"; then + rm -f "$classification_file" + return 1 + fi if [ "$classification" != "external_infrastructure" ]; then rm -f "$classification_file" return 1 diff --git a/scripts/ci/classify_failed_check_evidence.py b/scripts/ci/classify_failed_check_evidence.py index b445c36c..8947995d 100644 --- a/scripts/ci/classify_failed_check_evidence.py +++ b/scripts/ci/classify_failed_check_evidence.py @@ -107,6 +107,19 @@ def matching_evidence_lines( return matches +def matching_labeled_evidence_lines( + evidence_text: str, patterns: tuple[tuple[str, re.Pattern[str]], ...] +) -> list[str]: + """Return labeled concrete evidence lines matched by the given patterns.""" + matches: list[str] = [] + for label, pattern in patterns: + for line in evidence_text.splitlines(): + if pattern.search(line): + matches.append(f"{label}: {line.strip()}") + break + return matches + + def classify_failed_check_evidence(evidence_text: str) -> dict[str, Any]: """Classify whether failed check evidence is safe to withhold as non-source.""" failed_checks = FAILED_CHECK_HEADING.findall(evidence_text) @@ -125,11 +138,10 @@ def classify_failed_check_evidence(evidence_text: str) -> dict[str, Any]: BUILD_OR_PACKAGE_SUCCESS_PATTERNS, ) if upload_step_match is not None: - matched_infra_signals = [ - label - for label, pattern in ARTIFACT_UPLOAD_INFRA_PATTERNS - if pattern.search(evidence_text) - ] + matched_infra_signals = matching_labeled_evidence_lines( + evidence_text, + ARTIFACT_UPLOAD_INFRA_PATTERNS, + ) if not matched_infra_signals: return unknown( "no known external artifact upload infrastructure signal was present", @@ -176,11 +188,10 @@ def classify_failed_check_evidence(evidence_text: str) -> dict[str, Any]: setup_uv_step_match = SETUP_UV_STEP.search(evidence_text) if setup_uv_step_match is not None: - matched_infra_signals = [ - label - for label, pattern in SETUP_UV_INFRA_PATTERNS - if pattern.search(evidence_text) - ] + matched_infra_signals = matching_labeled_evidence_lines( + evidence_text, + SETUP_UV_INFRA_PATTERNS, + ) if not matched_infra_signals: return unknown( "no known external setup-uv infrastructure signal was present", @@ -222,11 +233,10 @@ def classify_failed_check_evidence(evidence_text: str) -> dict[str, Any]: signals=[failed_check], ) - matched_infra_signals = [ - label - for label, pattern in TAURI_BUNDLE_INFRA_PATTERNS - if pattern.search(evidence_text) - ] + matched_infra_signals = matching_labeled_evidence_lines( + evidence_text, + TAURI_BUNDLE_INFRA_PATTERNS, + ) if not matched_infra_signals: return unknown( "no known external native-shell infrastructure signal was present", diff --git a/services/analysis-engine/tests/test_supply_chain_policy.py b/services/analysis-engine/tests/test_supply_chain_policy.py index 5eb3a7e6..c383cd04 100644 --- a/services/analysis-engine/tests/test_supply_chain_policy.py +++ b/services/analysis-engine/tests/test_supply_chain_policy.py @@ -4920,6 +4920,10 @@ def test_opencode_classifies_artifact_upload_reset_as_external() -> None: assert "rerun the failed workflow job" in result["reason"] assert "build-baseline/build / macos / amd64" in result["signals"] assert "Packaged .+ to artifacts/" not in result["signals"] + assert any( + "Failed to FinalizeArtifact: Unable to make request: ECONNRESET" in signal + for signal in result["signals"] + ) assert ( "Packaged BandScope_0.1.3_x64.dmg to artifacts/bandscope-macos-amd64.dmg" in result["signals"] @@ -4958,6 +4962,9 @@ def test_opencode_classifies_tauri_binary_release_502_as_external() -> None: assert "Tauri binary release download server error" in result["reason"] assert "build-baseline/build / windows / amd64" in result["signals"] assert any("tauri-apps/binary-releases" in signal for signal in result["signals"]) + assert any( + "failed to bundle project `http status: 502`" in signal for signal in result["signals"] + ) def test_opencode_classifies_setup_uv_manifest_fetch_as_external() -> None: @@ -4988,6 +4995,7 @@ def test_opencode_classifies_setup_uv_manifest_fetch_as_external() -> None: assert result["classification"] == "external_infrastructure" assert "setup-uv manifest fetch failure" in result["reason"] assert "build-baseline/build / macos / amd64" in result["signals"] + assert any("##[error]fetch failed" in signal for signal in result["signals"]) assert any( "raw.githubusercontent.com/astral-sh/versions" in signal for signal in result["signals"] ) @@ -5031,6 +5039,8 @@ def test_opencode_review_stops_external_check_failures_without_review() -> None: assert "scripts/ci/classify_failed_check_evidence.py" in workflow assert "stop_for_external_failed_check_if_needed" in workflow assert 'stop_approval_without_review "EXTERNAL_CHECK_FAILURE"' in workflow + assert 'if ! classification="$(' in workflow + assert "jq -r '.classification // empty' \"$classification_file\" 2>/dev/null" in workflow def test_opencode_normalizer_defaults_missing_approve_findings(tmp_path: Path) -> None: From 584fed2a95e3f49a1e3e95a883afc6e220a5fdfd Mon Sep 17 00:00:00 2001 From: Seongho Bae Date: Thu, 18 Jun 2026 08:28:21 +0900 Subject: [PATCH 7/8] Polish external check review evidence --- .github/workflows/opencode-review.yml | 2 +- scripts/ci/classify_failed_check_evidence.py | 6 +++++- .../analysis-engine/tests/test_supply_chain_policy.py | 10 ++++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/.github/workflows/opencode-review.yml b/.github/workflows/opencode-review.yml index ee7509cd..2b79a0cf 100644 --- a/.github/workflows/opencode-review.yml +++ b/.github/workflows/opencode-review.yml @@ -1310,7 +1310,7 @@ jobs: signals="$( jq -r ' (.signals // []) - | map("- " + .) + | map(tostring | ltrimstr("- ") | "- " + .) | join("\n") ' "$classification_file" )" diff --git a/scripts/ci/classify_failed_check_evidence.py b/scripts/ci/classify_failed_check_evidence.py index 8947995d..1ecf342a 100644 --- a/scripts/ci/classify_failed_check_evidence.py +++ b/scripts/ci/classify_failed_check_evidence.py @@ -112,10 +112,14 @@ def matching_labeled_evidence_lines( ) -> list[str]: """Return labeled concrete evidence lines matched by the given patterns.""" matches: list[str] = [] + matched_lines: set[str] = set() for label, pattern in patterns: for line in evidence_text.splitlines(): if pattern.search(line): - matches.append(f"{label}: {line.strip()}") + matched_line = line.strip() + if matched_line not in matched_lines: + matches.append(f"{label}: {matched_line}") + matched_lines.add(matched_line) break return matches diff --git a/services/analysis-engine/tests/test_supply_chain_policy.py b/services/analysis-engine/tests/test_supply_chain_policy.py index c383cd04..60023783 100644 --- a/services/analysis-engine/tests/test_supply_chain_policy.py +++ b/services/analysis-engine/tests/test_supply_chain_policy.py @@ -4920,6 +4920,15 @@ def test_opencode_classifies_artifact_upload_reset_as_external() -> None: assert "rerun the failed workflow job" in result["reason"] assert "build-baseline/build / macos / amd64" in result["signals"] assert "Packaged .+ to artifacts/" not in result["signals"] + artifact_finalize_signals = [ + signal + for signal in result["signals"] + if "Failed to FinalizeArtifact: Unable to make request: ECONNRESET" in signal + ] + assert artifact_finalize_signals == [ + "artifact upload finalize request reset: " + "##[error]Failed to FinalizeArtifact: Unable to make request: ECONNRESET" + ] assert any( "Failed to FinalizeArtifact: Unable to make request: ECONNRESET" in signal for signal in result["signals"] @@ -5039,6 +5048,7 @@ def test_opencode_review_stops_external_check_failures_without_review() -> None: assert "scripts/ci/classify_failed_check_evidence.py" in workflow assert "stop_for_external_failed_check_if_needed" in workflow assert 'stop_approval_without_review "EXTERNAL_CHECK_FAILURE"' in workflow + assert 'map(tostring | ltrimstr("- ") | "- " + .)' in workflow assert 'if ! classification="$(' in workflow assert "jq -r '.classification // empty' \"$classification_file\" 2>/dev/null" in workflow From af3c6b7c4c06285c41b388d58229bdc13046b6aa Mon Sep 17 00:00:00 2001 From: Seongho Bae Date: Thu, 18 Jun 2026 08:50:56 +0900 Subject: [PATCH 8/8] Recover OpenCode publish gate normalization --- .github/workflows/opencode-review.yml | 21 ++++++++++++++++--- .../tests/test_supply_chain_policy.py | 3 +++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/.github/workflows/opencode-review.yml b/.github/workflows/opencode-review.yml index 2b79a0cf..552550d6 100644 --- a/.github/workflows/opencode-review.yml +++ b/.github/workflows/opencode-review.yml @@ -895,15 +895,30 @@ jobs: ' "$clean_output" >"$comment_body_file" if [ ! -s "$comment_body_file" ]; then - echo "OpenCode output did not include the required sentinel." - cat "$clean_output" - exit 0 + if python3 scripts/ci/opencode_review_normalize_output.py \ + "$HEAD_SHA" "$RUN_ID" "$RUN_ATTEMPT" "$clean_output"; then + cp "$clean_output" "$comment_body_file" + else + echo "OpenCode output did not include the required sentinel." + cat "$clean_output" + exit 0 + fi fi gate_status=0 gate_result="$( bash scripts/ci/opencode_review_approve_gate.sh "$HEAD_SHA" "$RUN_ID" "$RUN_ATTEMPT" "$comment_body_file" "$normalized_comment_json" )" || gate_status=$? + if [ "$gate_status" -ne 0 ]; then + if python3 scripts/ci/opencode_review_normalize_output.py \ + "$HEAD_SHA" "$RUN_ID" "$RUN_ATTEMPT" "$clean_output"; then + cp "$clean_output" "$comment_body_file" + gate_status=0 + gate_result="$( + bash scripts/ci/opencode_review_approve_gate.sh "$HEAD_SHA" "$RUN_ID" "$RUN_ATTEMPT" "$comment_body_file" "$normalized_comment_json" + )" || gate_status=$? + fi + fi printf 'OpenCode comment gate result: %s (exit %s)\n' "$gate_result" "$gate_status" if [ "$gate_status" -eq 0 ]; then { diff --git a/services/analysis-engine/tests/test_supply_chain_policy.py b/services/analysis-engine/tests/test_supply_chain_policy.py index 60023783..16383241 100644 --- a/services/analysis-engine/tests/test_supply_chain_policy.py +++ b/services/analysis-engine/tests/test_supply_chain_policy.py @@ -5049,6 +5049,9 @@ def test_opencode_review_stops_external_check_failures_without_review() -> None: assert "stop_for_external_failed_check_if_needed" in workflow assert 'stop_approval_without_review "EXTERNAL_CHECK_FAILURE"' in workflow assert 'map(tostring | ltrimstr("- ") | "- " + .)' in workflow + assert 'if [ "$gate_status" -ne 0 ]; then' in workflow + assert "python3 scripts/ci/opencode_review_normalize_output.py" in workflow + assert '"$HEAD_SHA" "$RUN_ID" "$RUN_ATTEMPT" "$clean_output"' in workflow assert 'if ! classification="$(' in workflow assert "jq -r '.classification // empty' \"$classification_file\" 2>/dev/null" in workflow