From 2bdbf4a78de3b56af8ccc3f1fa78acc00d258a1a Mon Sep 17 00:00:00 2001 From: Seongho Bae Date: Mon, 22 Jun 2026 02:04:15 +0900 Subject: [PATCH] harden opencode agent review evidence gate --- .github/workflows/opencode-review.yml | 19 ++++++- .../ci/opencode_review_normalize_output.py | 55 ++++++++++++++----- .../tests/test_supply_chain_policy.py | 14 +++-- 3 files changed, 69 insertions(+), 19 deletions(-) diff --git a/.github/workflows/opencode-review.yml b/.github/workflows/opencode-review.yml index 875fc403..6554b878 100644 --- a/.github/workflows/opencode-review.yml +++ b/.github/workflows/opencode-review.yml @@ -568,6 +568,11 @@ jobs: $(head -c "$prompt_evidence_bytes" "$OPENCODE_EVIDENCE_FILE") + Use CodeGraph for blast-radius, call graph, and test-coverage questions before broad local reads. + Prefer deletion, stdlib/native platform features, and already-installed dependencies before proposing new code or packages, but do not simplify away trust-boundary validation, data-loss handling, security, accessibility, or required tests. + For Korean prose, preserve facts, identifiers, numbers, and quotes while removing only formulaic filler or translationese. + When Strix evidence supports it, name the concrete CWE/KISA-style class such as injection, auth/authz, secrets, crypto, path traversal/file upload, XSS/CSRF/SSRF, error disclosure, or debug/deployment config; do not invent a category without evidence. + Before APPROVE, the summary must include at least one exact changed file path inspected as changed-file evidence. Never approve with a reason or summary that says no changes, no files, or no actionable changes were found when bounded evidence lists changed files; that control block is invalid. First line exactly: Then exactly one control block: @@ -673,6 +678,11 @@ jobs: $(head -c "$prompt_evidence_bytes" "$OPENCODE_EVIDENCE_FILE") + Use CodeGraph for blast-radius, call graph, and test-coverage questions before broad local reads. + Prefer deletion, stdlib/native platform features, and already-installed dependencies before proposing new code or packages, but do not simplify away trust-boundary validation, data-loss handling, security, accessibility, or required tests. + For Korean prose, preserve facts, identifiers, numbers, and quotes while removing only formulaic filler or translationese. + When Strix evidence supports it, name the concrete CWE/KISA-style class such as injection, auth/authz, secrets, crypto, path traversal/file upload, XSS/CSRF/SSRF, error disclosure, or debug/deployment config; do not invent a category without evidence. + Before APPROVE, the summary must include at least one exact changed file path inspected as changed-file evidence. Never approve with a reason or summary that says no changes, no files, or no actionable changes were found when bounded evidence lists changed files; that control block is invalid. First line exactly: Then exactly one control block: @@ -778,6 +788,11 @@ jobs: $(head -c "$prompt_evidence_bytes" "$OPENCODE_EVIDENCE_FILE") + Use CodeGraph for blast-radius, call graph, and test-coverage questions before broad local reads. + Prefer deletion, stdlib/native platform features, and already-installed dependencies before proposing new code or packages, but do not simplify away trust-boundary validation, data-loss handling, security, accessibility, or required tests. + For Korean prose, preserve facts, identifiers, numbers, and quotes while removing only formulaic filler or translationese. + When Strix evidence supports it, name the concrete CWE/KISA-style class such as injection, auth/authz, secrets, crypto, path traversal/file upload, XSS/CSRF/SSRF, error disclosure, or debug/deployment config; do not invent a category without evidence. + Before APPROVE, the summary must include at least one exact changed file path inspected as changed-file evidence. Never approve with a reason or summary that says no changes, no files, or no actionable changes were found when bounded evidence lists changed files; that control block is invalid. First line exactly: Then exactly one control block: @@ -2186,8 +2201,8 @@ jobs: failed_checks_file="$(mktemp)" failed_check_evidence_file="$(mktemp)" failed_check_review_body_file="$(mktemp)" - failed_check_review_payload_file="" - failed_check_inline_failure_body_file="" + failed_check_review_payload_file="$(mktemp)" + failed_check_inline_failure_body_file="$(mktemp)" pending_checks_file="" # shellcheck disable=SC2329 cleanup_failed_outcome_files() { diff --git a/scripts/ci/opencode_review_normalize_output.py b/scripts/ci/opencode_review_normalize_output.py index c7bd9c41..32145f8f 100755 --- a/scripts/ci/opencode_review_normalize_output.py +++ b/scripts/ci/opencode_review_normalize_output.py @@ -25,9 +25,6 @@ "structural exploration is unnecessary", "structural analysis is unnecessary", "structural review is unnecessary", - "could not be reviewed", - "could not inspect", - "could not be inspected", "changed files could not be inspected", "source files could not be inspected", "required files could not be inspected", @@ -37,13 +34,17 @@ "could not access the source files", "could not access required files", "could not access required evidence", - "file access issues", - "file inaccessibility", "evidence was truncated", - "not provided in evidence", "truncated evidence", - "unable to inspect", - "insufficient evidence", + "no changes detected", + "no changes were detected", + "no changes found", + "no changes were found", + "no files or changes were found", + "no files or changes found", + "no actionable changes to review", + "no changes to review", + "no changed files", ) STRUCTURAL_FAILURE_PATTERNS = ( @@ -61,6 +62,20 @@ r"\b(?:structural\s+(?:exploration|analysis|review))\s+" r"(?:was\s+)?(?:unavailable|incomplete|blocked|not possible)\b" ), + re.compile( + r"\bno\s+(?:files?\s+or\s+)?changes?\s+" + r"(?:were\s+)?(?:detected|found|present)\b" + ), + re.compile(r"\bno\s+(?:actionable\s+)?changes?\s+to\s+review\b"), + re.compile(r"\b(?:no|zero)\s+changed\s+files?\b"), +) + +CHANGED_FILE_EVIDENCE_PATTERN = re.compile( + r"(? bool: ) +def mentions_changed_file_evidence(reason: str, summary: str) -> bool: + """Return whether an approval names at least one concrete changed file/path.""" + return bool(CHANGED_FILE_EVIDENCE_PATTERN.search(f"{reason}\n{summary}")) + + def check_structural_approval(control_file: Path) -> int: - """Reject approvals whose control JSON admits missing structural review.""" + """Validate an already-normalized control block before publishing approval.""" try: value = json.loads(control_file.read_text(encoding="utf-8")) except (OSError, json.JSONDecodeError) as exc: @@ -90,6 +110,12 @@ def check_structural_approval(control_file: Path) -> int: ): print("NO_CONCLUSION", file=sys.stderr) return 4 + if value.get("result") == "APPROVE" and not mentions_changed_file_evidence( + str(value.get("reason", "")), + str(value.get("summary", "")), + ): + print("NO_CONCLUSION", file=sys.stderr) + return 4 return 0 @@ -101,7 +127,7 @@ def valid_control( expected_run_id: str, expected_run_attempt: str, ) -> dict[str, Any] | None: - """Return a normalized review control object when all gate fields are valid.""" + """Return a normalized control block when it matches the current run.""" if not isinstance(value, dict): return None @@ -134,6 +160,8 @@ def valid_control( return None if result == "APPROVE" and admits_missing_structural_review(reason, summary): return None + if result == "APPROVE" and not mentions_changed_file_evidence(reason, summary): + return None required_finding_fields = ( "path", @@ -148,7 +176,8 @@ def valid_control( for finding in findings: if not isinstance(finding, dict): return None - if not isinstance(finding.get("line"), int) or finding["line"] <= 0: + line = finding.get("line") + if isinstance(line, bool) or not isinstance(line, int) or line <= 0: return None for field in required_finding_fields: if not isinstance(finding.get(field), str) or not finding[field].strip(): @@ -166,7 +195,7 @@ def valid_control( def iter_json_objects(text: str) -> list[Any]: - """Extract JSON objects from possibly noisy OpenCode output text.""" + """Extract JSON objects from raw OpenCode output that may include prose.""" decoder = json.JSONDecoder() values: list[Any] = [] @@ -189,7 +218,7 @@ def iter_json_objects(text: str) -> list[Any]: def main(argv: list[str]) -> int: - """Normalize an OpenCode output file for the shell approval gate.""" + """Run the normalizer CLI and write the publishable control block.""" if len(argv) == 3 and argv[1] == "--check-structural-approval": return check_structural_approval(Path(argv[2])) diff --git a/services/analysis-engine/tests/test_supply_chain_policy.py b/services/analysis-engine/tests/test_supply_chain_policy.py index c471caac..08ebc48e 100644 --- a/services/analysis-engine/tests/test_supply_chain_policy.py +++ b/services/analysis-engine/tests/test_supply_chain_policy.py @@ -5165,8 +5165,11 @@ def test_opencode_normalizer_defaults_missing_approve_findings(tmp_path: Path) - [ "review text", '{"head_sha":"abc123","run_id":"456","run_attempt":"1",' - '"result":"APPROVE","reason":"checks and review passed",' - '"summary":"no source-backed blockers found"}', + '"result":"APPROVE",' + '"reason":"checks passed for ' + 'scripts/ci/opencode_review_normalize_output.py",' + '"summary":"no blockers in ' + 'scripts/ci/opencode_review_normalize_output.py"}', ] ), encoding="utf-8", @@ -5198,8 +5201,11 @@ def test_opencode_review_gate_defaults_missing_approve_findings(tmp_path: Path) "", "", "", ]