From 2bdbf4a78de3b56af8ccc3f1fa78acc00d258a1a Mon Sep 17 00:00:00 2001
From: Seongho Bae <me@seonghobae.me>
Date: Mon, 22 Jun 2026 02:04:15 +0900
Subject: [PATCH] harden opencode agent review evidence gate

---
 .github/workflows/opencode-review.yml         | 19 ++++++-
 .../ci/opencode_review_normalize_output.py    | 55 ++++++++++++++-----
 .../tests/test_supply_chain_policy.py         | 14 +++--
 3 files changed, 69 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/opencode-review.yml b/.github/workflows/opencode-review.yml
index 875fc403..6554b878 100644
--- a/.github/workflows/opencode-review.yml
+++ b/.github/workflows/opencode-review.yml
@@ -568,6 +568,11 @@ jobs:
           <opencode-evidence>
           $(head -c "$prompt_evidence_bytes" "$OPENCODE_EVIDENCE_FILE")
           </opencode-evidence>
+          Use CodeGraph for blast-radius, call graph, and test-coverage questions before broad local reads.
+          Prefer deletion, stdlib/native platform features, and already-installed dependencies before proposing new code or packages, but do not simplify away trust-boundary validation, data-loss handling, security, accessibility, or required tests.
+          For Korean prose, preserve facts, identifiers, numbers, and quotes while removing only formulaic filler or translationese.
+          When Strix evidence supports it, name the concrete CWE/KISA-style class such as injection, auth/authz, secrets, crypto, path traversal/file upload, XSS/CSRF/SSRF, error disclosure, or debug/deployment config; do not invent a category without evidence.
+          Before APPROVE, the summary must include at least one exact changed file path inspected as changed-file evidence. Never approve with a reason or summary that says no changes, no files, or no actionable changes were found when bounded evidence lists changed files; that control block is invalid.
           First line exactly:
           <!-- opencode-review-gate head_sha=${HEAD_SHA} run_id=${RUN_ID} run_attempt=${RUN_ATTEMPT} -->
           Then exactly one control block:
@@ -673,6 +678,11 @@ jobs:
           <opencode-evidence>
           $(head -c "$prompt_evidence_bytes" "$OPENCODE_EVIDENCE_FILE")
           </opencode-evidence>
+          Use CodeGraph for blast-radius, call graph, and test-coverage questions before broad local reads.
+          Prefer deletion, stdlib/native platform features, and already-installed dependencies before proposing new code or packages, but do not simplify away trust-boundary validation, data-loss handling, security, accessibility, or required tests.
+          For Korean prose, preserve facts, identifiers, numbers, and quotes while removing only formulaic filler or translationese.
+          When Strix evidence supports it, name the concrete CWE/KISA-style class such as injection, auth/authz, secrets, crypto, path traversal/file upload, XSS/CSRF/SSRF, error disclosure, or debug/deployment config; do not invent a category without evidence.
+          Before APPROVE, the summary must include at least one exact changed file path inspected as changed-file evidence. Never approve with a reason or summary that says no changes, no files, or no actionable changes were found when bounded evidence lists changed files; that control block is invalid.
           First line exactly:
           <!-- opencode-review-gate head_sha=${HEAD_SHA} run_id=${RUN_ID} run_attempt=${RUN_ATTEMPT} -->
           Then exactly one control block:
@@ -778,6 +788,11 @@ jobs:
           <opencode-evidence>
           $(head -c "$prompt_evidence_bytes" "$OPENCODE_EVIDENCE_FILE")
           </opencode-evidence>
+          Use CodeGraph for blast-radius, call graph, and test-coverage questions before broad local reads.
+          Prefer deletion, stdlib/native platform features, and already-installed dependencies before proposing new code or packages, but do not simplify away trust-boundary validation, data-loss handling, security, accessibility, or required tests.
+          For Korean prose, preserve facts, identifiers, numbers, and quotes while removing only formulaic filler or translationese.
+          When Strix evidence supports it, name the concrete CWE/KISA-style class such as injection, auth/authz, secrets, crypto, path traversal/file upload, XSS/CSRF/SSRF, error disclosure, or debug/deployment config; do not invent a category without evidence.
+          Before APPROVE, the summary must include at least one exact changed file path inspected as changed-file evidence. Never approve with a reason or summary that says no changes, no files, or no actionable changes were found when bounded evidence lists changed files; that control block is invalid.
           First line exactly:
           <!-- opencode-review-gate head_sha=${HEAD_SHA} run_id=${RUN_ID} run_attempt=${RUN_ATTEMPT} -->
           Then exactly one control block:
@@ -2186,8 +2201,8 @@ jobs:
             failed_checks_file="$(mktemp)"
             failed_check_evidence_file="$(mktemp)"
             failed_check_review_body_file="$(mktemp)"
-            failed_check_review_payload_file=""
-            failed_check_inline_failure_body_file=""
+            failed_check_review_payload_file="$(mktemp)"
+            failed_check_inline_failure_body_file="$(mktemp)"
             pending_checks_file=""
             # shellcheck disable=SC2329
             cleanup_failed_outcome_files() {
diff --git a/scripts/ci/opencode_review_normalize_output.py b/scripts/ci/opencode_review_normalize_output.py
index c7bd9c41..32145f8f 100755
--- a/scripts/ci/opencode_review_normalize_output.py
+++ b/scripts/ci/opencode_review_normalize_output.py
@@ -25,9 +25,6 @@
     "structural exploration is unnecessary",
     "structural analysis is unnecessary",
     "structural review is unnecessary",
-    "could not be reviewed",
-    "could not inspect",
-    "could not be inspected",
     "changed files could not be inspected",
     "source files could not be inspected",
     "required files could not be inspected",
@@ -37,13 +34,17 @@
     "could not access the source files",
     "could not access required files",
     "could not access required evidence",
-    "file access issues",
-    "file inaccessibility",
     "evidence was truncated",
-    "not provided in evidence",
     "truncated evidence",
-    "unable to inspect",
-    "insufficient evidence",
+    "no changes detected",
+    "no changes were detected",
+    "no changes found",
+    "no changes were found",
+    "no files or changes were found",
+    "no files or changes found",
+    "no actionable changes to review",
+    "no changes to review",
+    "no changed files",
 )
 
 STRUCTURAL_FAILURE_PATTERNS = (
@@ -61,6 +62,20 @@
         r"\b(?:structural\s+(?:exploration|analysis|review))\s+"
         r"(?:was\s+)?(?:unavailable|incomplete|blocked|not possible)\b"
     ),
+    re.compile(
+        r"\bno\s+(?:files?\s+or\s+)?changes?\s+"
+        r"(?:were\s+)?(?:detected|found|present)\b"
+    ),
+    re.compile(r"\bno\s+(?:actionable\s+)?changes?\s+to\s+review\b"),
+    re.compile(r"\b(?:no|zero)\s+changed\s+files?\b"),
+)
+
+CHANGED_FILE_EVIDENCE_PATTERN = re.compile(
+    r"(?<![A-Za-z0-9_])(?:[A-Za-z0-9_.-]+/)+[A-Za-z0-9_.@+-]+"
+    r"|(?<![A-Za-z0-9_])[A-Za-z0-9_.-]+\."
+    r"(?:py|js|jsx|ts|tsx|mjs|cjs|sh|bash|yml|yaml|json|jsonc|toml|lock|md|txt|css|scss|html|sql|go|rs|java|kt|swift|rb|php|cs|xml|ini|cfg)"
+    r"(?![A-Za-z0-9_])"
+    r"|(?<![A-Za-z0-9_])(?:Dockerfile|Makefile|README|LICENSE|AGENTS\.md)(?![A-Za-z0-9_])"
 )
 
 
@@ -72,8 +87,13 @@ def admits_missing_structural_review(reason: str, summary: str) -> bool:
     )
 
 
+def mentions_changed_file_evidence(reason: str, summary: str) -> bool:
+    """Return whether an approval names at least one concrete changed file/path."""
+    return bool(CHANGED_FILE_EVIDENCE_PATTERN.search(f"{reason}\n{summary}"))
+
+
 def check_structural_approval(control_file: Path) -> int:
-    """Reject approvals whose control JSON admits missing structural review."""
+    """Validate an already-normalized control block before publishing approval."""
     try:
         value = json.loads(control_file.read_text(encoding="utf-8"))
     except (OSError, json.JSONDecodeError) as exc:
@@ -90,6 +110,12 @@ def check_structural_approval(control_file: Path) -> int:
     ):
         print("NO_CONCLUSION", file=sys.stderr)
         return 4
+    if value.get("result") == "APPROVE" and not mentions_changed_file_evidence(
+        str(value.get("reason", "")),
+        str(value.get("summary", "")),
+    ):
+        print("NO_CONCLUSION", file=sys.stderr)
+        return 4
 
     return 0
 
@@ -101,7 +127,7 @@ def valid_control(
     expected_run_id: str,
     expected_run_attempt: str,
 ) -> dict[str, Any] | None:
-    """Return a normalized review control object when all gate fields are valid."""
+    """Return a normalized control block when it matches the current run."""
     if not isinstance(value, dict):
         return None
 
@@ -134,6 +160,8 @@ def valid_control(
         return None
     if result == "APPROVE" and admits_missing_structural_review(reason, summary):
         return None
+    if result == "APPROVE" and not mentions_changed_file_evidence(reason, summary):
+        return None
 
     required_finding_fields = (
         "path",
@@ -148,7 +176,8 @@ def valid_control(
     for finding in findings:
         if not isinstance(finding, dict):
             return None
-        if not isinstance(finding.get("line"), int) or finding["line"] <= 0:
+        line = finding.get("line")
+        if isinstance(line, bool) or not isinstance(line, int) or line <= 0:
             return None
         for field in required_finding_fields:
             if not isinstance(finding.get(field), str) or not finding[field].strip():
@@ -166,7 +195,7 @@ def valid_control(
 
 
 def iter_json_objects(text: str) -> list[Any]:
-    """Extract JSON objects from possibly noisy OpenCode output text."""
+    """Extract JSON objects from raw OpenCode output that may include prose."""
     decoder = json.JSONDecoder()
     values: list[Any] = []
 
@@ -189,7 +218,7 @@ def iter_json_objects(text: str) -> list[Any]:
 
 
 def main(argv: list[str]) -> int:
-    """Normalize an OpenCode output file for the shell approval gate."""
+    """Run the normalizer CLI and write the publishable control block."""
     if len(argv) == 3 and argv[1] == "--check-structural-approval":
         return check_structural_approval(Path(argv[2]))
 
diff --git a/services/analysis-engine/tests/test_supply_chain_policy.py b/services/analysis-engine/tests/test_supply_chain_policy.py
index c471caac..08ebc48e 100644
--- a/services/analysis-engine/tests/test_supply_chain_policy.py
+++ b/services/analysis-engine/tests/test_supply_chain_policy.py
@@ -5165,8 +5165,11 @@ def test_opencode_normalizer_defaults_missing_approve_findings(tmp_path: Path) -
             [
                 "review text",
                 '{"head_sha":"abc123","run_id":"456","run_attempt":"1",'
-                '"result":"APPROVE","reason":"checks and review passed",'
-                '"summary":"no source-backed blockers found"}',
+                '"result":"APPROVE",'
+                '"reason":"checks passed for '
+                'scripts/ci/opencode_review_normalize_output.py",'
+                '"summary":"no blockers in '
+                'scripts/ci/opencode_review_normalize_output.py"}',
             ]
         ),
         encoding="utf-8",
@@ -5198,8 +5201,11 @@ def test_opencode_review_gate_defaults_missing_approve_findings(tmp_path: Path)
                 "",
                 "<!-- opencode-review-control-v1",
                 '{"head_sha":"abc123","run_id":"456","run_attempt":"1",'
-                '"result":"APPROVE","reason":"checks and review passed",'
-                '"summary":"no source-backed blockers found"}',
+                '"result":"APPROVE",'
+                '"reason":"checks passed for '
+                'scripts/ci/opencode_review_normalize_output.py",'
+                '"summary":"no blockers in '
+                'scripts/ci/opencode_review_normalize_output.py"}',
                 "-->",
                 "",
             ]