From 78482fac055ff99e65e0cc04e384984615bd1aeb Mon Sep 17 00:00:00 2001 From: seonghobae <8172694+seonghobae@users.noreply.github.com> Date: Tue, 16 Jun 2026 05:14:19 +0000 Subject: [PATCH 1/4] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Extract=20constant=20di?= =?UTF-8?q?ctionaries=20to=20module=20level=20for=20print=20results?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .jules/bolt.md | 5 +++++ scanner/cli/vibesec.py | 23 +++++++++++++---------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/.jules/bolt.md b/.jules/bolt.md index 3708540..45576b6 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -13,3 +13,8 @@ ## 2026-06-14 - Deferring Pathlib Operations in Hot Paths **Learning:** In highly repetitive loops like file scanners (e.g., iterating through thousands of safe files), preemptively calculating `Path.relative_to()` and sanitizing strings adds significant cumulative overhead. Pathlib operations internally parse paths, check parts, and construct new objects, which is extremely expensive when executed on a per-file basis unconditionally. **Action:** Always defer expensive path computations (like converting paths to relative or string sanitization) until *after* the fast-path condition (like a regex match) triggers. This drastically cuts down on unnecessary string operations for clean files. +## 2024-05-16 - Module-level Constants for Performance + +**Learning:** Recreating static dictionaries (like severity mappings and icons) inside frequently called functions causes unnecessary memory allocations and slight performance overhead on every call. + +**Action:** Extract static dictionaries to module-level constants to ensure they are instantiated only once when the module is loaded. diff --git a/scanner/cli/vibesec.py b/scanner/cli/vibesec.py index 5fb6e4e..dd98692 100644 --- a/scanner/cli/vibesec.py +++ b/scanner/cli/vibesec.py @@ -585,21 +585,24 @@ def _scan_file(file_path: Path, base_path: Path): return findings + +# ⚡ Bolt: Move severity mappings to module level to avoid redundant +# dictionary allocations on every call to print scan results. +SEVERITY_ORDER = {"CRITICAL": 0, "HIGH": 1, "WARNING": 2, "INFO": 3} +SEVERITY_ICONS = { + "CRITICAL": "🔴 CRITICAL", + "HIGH": "🟠 HIGH", + "WARNING": "🟡 WARNING", + "INFO": "🔵 INFO", +} + def _print_scan_results(findings, files_scanned): - severity_order = {"CRITICAL": 0, "HIGH": 1, "WARNING": 2, "INFO": 3} - findings.sort(key=lambda f: severity_order.get(f["severity"], 99)) - - severity_icons = { - "CRITICAL": "🔴 CRITICAL", - "HIGH": "🟠 HIGH", - "WARNING": "🟡 WARNING", - "INFO": "🔵 INFO", - } + findings.sort(key=lambda f: SEVERITY_ORDER.get(f["severity"], 99)) counts = {"CRITICAL": 0, "HIGH": 0, "WARNING": 0, "INFO": 0} for f in findings: counts[f["severity"]] += 1 - icon = severity_icons.get(f["severity"], f["severity"]) + icon = SEVERITY_ICONS.get(f["severity"], f["severity"]) print(f"[{icon}] {f['file']}:{f['line']}") print(f" Rule: {f['rule_id']}") print(f" {f['message']}") From bd1d972bf791dffc2081c2752151a011fdbeab59 Mon Sep 17 00:00:00 2001 From: seonghobae <8172694+seonghobae@users.noreply.github.com> Date: Tue, 16 Jun 2026 06:25:00 +0000 Subject: [PATCH 2/4] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Extract=20constant=20di?= =?UTF-8?q?ctionaries=20to=20module=20level=20for=20print=20results?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .jules/bolt.md | 10 +- scanner/cli/vibesec.py | 137 +++++++-------- .../ci/opencode_review_normalize_output.py | 80 +++------ scripts/ci/pr_review_merge_scheduler.py | 26 ++- tests/scripts/__init__.py | 0 tests/scripts/ci/__init__.py | 0 .../test_opencode_review_normalize_output.py | 166 ------------------ .../ci/test_pr_review_merge_scheduler.py | 80 --------- .../test_opencode_review_normalize_output.py | 19 -- tests/test_pr_review_merge_scheduler.py | 24 --- tests/test_vibesec.py | 111 +----------- 11 files changed, 102 insertions(+), 551 deletions(-) delete mode 100644 tests/scripts/__init__.py delete mode 100644 tests/scripts/ci/__init__.py delete mode 100644 tests/scripts/ci/test_opencode_review_normalize_output.py delete mode 100644 tests/scripts/ci/test_pr_review_merge_scheduler.py delete mode 100644 tests/test_opencode_review_normalize_output.py delete mode 100644 tests/test_pr_review_merge_scheduler.py diff --git a/.jules/bolt.md b/.jules/bolt.md index 9cf95b2..45576b6 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -14,13 +14,7 @@ **Learning:** In highly repetitive loops like file scanners (e.g., iterating through thousands of safe files), preemptively calculating `Path.relative_to()` and sanitizing strings adds significant cumulative overhead. Pathlib operations internally parse paths, check parts, and construct new objects, which is extremely expensive when executed on a per-file basis unconditionally. **Action:** Always defer expensive path computations (like converting paths to relative or string sanitization) until *after* the fast-path condition (like a regex match) triggers. This drastically cuts down on unnecessary string operations for clean files. ## 2024-05-16 - Module-level Constants for Performance -**Learning:** Recreating static dictionaries (like severity mappings and icons) inside frequently called functions causes unnecessary memory allocations and slight performance overhead on every call. -**Action:** Extract static dictionaries to module-level constants to ensure they are instantiated only once when the module is loaded. -## 2024-05-18 - Set literal vs Tuple membership check -**Learning:** In Python, using set literals for constant membership checks (e.g., `in {'CRITICAL', 'HIGH'}`) inside loops or comprehensions is highly efficient because CPython optimizes them into `frozenset` constants at compile time, eliminating runtime instantiation overhead. Using `tuple` for these checks performs an `O(n)` linear search, while a `frozenset` performs an `O(1)` hash lookup. -**Action:** Prefer set literals `in {"A", "B"}` over tuples `in ("A", "B")` when performing membership checks against constant items, especially in hot paths or tight loops. +**Learning:** Recreating static dictionaries (like severity mappings and icons) inside frequently called functions causes unnecessary memory allocations and slight performance overhead on every call. -## 2024-06-16 - Parallelize Subprocess CLI Calls -**Learning:** Sequential, synchronous execution of `subprocess.run` (like calling the GitHub CLI) across multiple items (like PRs) is a significant I/O bottleneck. -**Action:** Use `concurrent.futures.ThreadPoolExecutor` with `functools.partial` and `executor.map` to safely parallelize I/O-bound subprocess executions, significantly reducing overall script runtime. +**Action:** Extract static dictionaries to module-level constants to ensure they are instantiated only once when the module is loaded. diff --git a/scanner/cli/vibesec.py b/scanner/cli/vibesec.py index eddad2e..dd98692 100644 --- a/scanner/cli/vibesec.py +++ b/scanner/cli/vibesec.py @@ -49,7 +49,6 @@ - File uploads must validate type, size, and filename server-side. - Never set CORS to allow all origins on authenticated endpoints. - Add tests for cross-user access denial on every resource endpoint. -- When dealing with OWASP rules or security vulnerabilities, automatically run `vibesec scan .` as a hook or use tools like `semgrep` to verify the codebase. See https://github.com/Seongho-Bae/VibeSec for full rules and checklists. """ @@ -67,7 +66,6 @@ 6. **Supabase**: Use getUser() (not getSession()) server-side; RLS on all tables. 7. **Files**: Validate type, size, and generate server-side filenames for uploads. 8. **CORS**: Restrict to known origins on authenticated endpoints. -9. **Automated Scanning**: When dealing with OWASP rules or security vulnerabilities, automatically run `vibesec scan .` as a hook or use tools like `semgrep` to verify the codebase. Return 401 for unauthenticated requests, 403 for ownership violations. @@ -296,54 +294,6 @@ # Command implementations # --------------------------------------------------------------------------- - -def _install_tool_rules(config: dict, project_root, installed: list): - """Install the specific rules file based on tool configuration.""" - if config.get("shared_only"): - return - - target_file = project_root / config['path'] - - # SECURITY: Prevent Arbitrary File Write via symlink path traversal - if not target_file.resolve().is_relative_to(project_root): - print(f"Error: Target path {target_file} escapes the project root. Aborting.", file=sys.stderr) - sys.exit(1) - - target_file.parent.mkdir(parents=True, exist_ok=True) - if target_file.is_symlink(): - target_file.unlink() - - if "append_marker" in config: - if target_file.exists(): - existing = target_file.read_text() - if config['append_marker'] not in existing: - target_file.write_text(existing + "\n\n" + config["content"]) - installed.append(f"{config['path']} (appended)") - else: - print(f"{config['path']} already contains {config['append_marker']} rules — skipping.") - else: - target_file.write_text(config["content"]) - installed.append(str(config['path'])) - else: - target_file.write_text(config["content"]) - installed.append(str(config['path'])) - - -def _install_checklist(project_root, installed: list): - """Install the VIBESEC_CHECKLIST.md file.""" - checklist_file = project_root / "VIBESEC_CHECKLIST.md" - - # SECURITY: Prevent Arbitrary File Write via symlink path traversal - if not checklist_file.resolve().is_relative_to(project_root): - print(f"Error: Checklist path {checklist_file} escapes the project root. Aborting.", file=sys.stderr) - sys.exit(1) - - if checklist_file.is_symlink(): - checklist_file.unlink() - if not checklist_file.exists(): - checklist_file.write_text(CHECKLIST_TEMPLATE) - installed.append("VIBESEC_CHECKLIST.md") - def cmd_init(args): """Install security rules into the project.""" tool = getattr(args, "tool", "cursor") or "cursor" @@ -377,8 +327,46 @@ def cmd_init(args): sys.exit(1) config = tool_configs[tool] - _install_tool_rules(config, project_root, installed) - _install_checklist(project_root, installed) + if not config.get("shared_only"): + target_file = project_root / config["path"] + + # SECURITY: Prevent Arbitrary File Write via symlink path traversal + if not target_file.resolve().is_relative_to(project_root): + print(f"Error: Target path {target_file} escapes the project root. Aborting.", file=sys.stderr) + sys.exit(1) + + target_file.parent.mkdir(parents=True, exist_ok=True) + if target_file.is_symlink(): + target_file.unlink() + + if "append_marker" in config: + if target_file.exists(): + existing = target_file.read_text() + if config["append_marker"] not in existing: + target_file.write_text(existing + "\n\n" + config["content"]) + installed.append(f"{config['path']} (appended)") + else: + print(f"{config['path']} already contains {config['append_marker']} rules — skipping.") + else: + target_file.write_text(config["content"]) + installed.append(str(config["path"])) + else: + target_file.write_text(config["content"]) + installed.append(str(config["path"])) + # Always create the checklist + checklist_file = project_root / "VIBESEC_CHECKLIST.md" + + # SECURITY: Prevent Arbitrary File Write via symlink path traversal + if not checklist_file.resolve().is_relative_to(project_root): + print(f"Error: Checklist path {checklist_file} escapes the project root. Aborting.", file=sys.stderr) + sys.exit(1) + + if checklist_file.is_symlink(): + checklist_file.unlink() + if not checklist_file.exists(): + checklist_file.write_text(CHECKLIST_TEMPLATE) + installed.append("VIBESEC_CHECKLIST.md") + if stack and "supabase" in stack: _print_supabase_reminder() @@ -431,7 +419,7 @@ def cmd_scan(args): findings.extend(file_findings) _print_scan_results(findings, files_scanned) - return 1 if any(f["severity"] in {"CRITICAL", "HIGH"} for f in findings) else 0 + return 1 if any(f["severity"] in ("CRITICAL", "HIGH") for f in findings) else 0 def cmd_hook(args): @@ -505,28 +493,6 @@ def _get_applicable_rules(ext: str): return _RULES_CACHE[ext] -def _process_dir_entries(dir_path: str): - """Process entries in a directory, yielding files and returning subdirectories.""" - dirs = [] - try: - with os.scandir(dir_path) as it: - for entry in it: - try: - if entry.is_symlink(): - continue - if entry.is_dir(follow_symlinks=False): - if entry.name not in SKIP_DIRS and not entry.name.startswith("."): - dirs.append(entry.path) - elif entry.is_file(follow_symlinks=False): - _, ext = os.path.splitext(entry.name) - if ext.lower() not in SKIP_EXTENSIONS: - yield Path(entry.path) - except (OSError, PermissionError): - continue - except (OSError, PermissionError): - pass - return dirs - def _collect_files(base_path: Path): """Collect all scannable files, skipping unwanted directories.""" # ⚡ Bolt: Optimize file traversal using os.scandir and os.path.splitext @@ -536,8 +502,25 @@ def _collect_files(base_path: Path): stack = [str(base_path)] while stack: current_dir = stack.pop() - dirs = yield from _process_dir_entries(current_dir) - stack.extend(reversed(dirs)) + try: + with os.scandir(current_dir) as it: + dirs = [] + for entry in it: + try: + if entry.is_symlink(): + continue + if entry.is_dir(follow_symlinks=False): + if entry.name not in SKIP_DIRS and not entry.name.startswith("."): + dirs.append(entry.path) + elif entry.is_file(follow_symlinks=False): + _, ext = os.path.splitext(entry.name) + if ext.lower() not in SKIP_EXTENSIONS: + yield Path(entry.path) + except (OSError, PermissionError): + continue + stack.extend(reversed(dirs)) + except (OSError, PermissionError): + pass def _sanitize_terminal_output(text: str) -> str: diff --git a/scripts/ci/opencode_review_normalize_output.py b/scripts/ci/opencode_review_normalize_output.py index 7d2a797..2a850c6 100755 --- a/scripts/ci/opencode_review_normalize_output.py +++ b/scripts/ci/opencode_review_normalize_output.py @@ -1,47 +1,47 @@ #!/usr/bin/env python3 """Normalize OpenCode review output into the strict approval-gate contract.""" +from __future__ import annotations + import json import sys from pathlib import Path from typing import Any -def _validate_metadata( - value: dict[str, Any], +def valid_control( + value: Any, + *, expected_head_sha: str, expected_run_id: str, expected_run_attempt: str, -) -> bool: +) -> dict[str, Any] | None: + if not isinstance(value, dict): + return None + if value.get("head_sha") != expected_head_sha: - return False + return None if value.get("run_id") != expected_run_id: - return False + return None if value.get("run_attempt") != expected_run_attempt: - return False - return True - + return None -def _validate_result_and_reason(value: dict[str, Any]) -> bool: result = value.get("result") if result not in {"APPROVE", "REQUEST_CHANGES"}: - return False + return None + if not isinstance(value.get("reason"), str) or not value["reason"].strip(): - return False + return None if not isinstance(value.get("summary"), str) or not value["summary"].strip(): - return False - return True - + return None -def _validate_findings(value: dict[str, Any]) -> bool: - result = value.get("result") findings = value.get("findings") if not isinstance(findings, list): - return False + return None if result == "APPROVE" and findings: - return False + return None if result == "REQUEST_CHANGES" and not findings: - return False + return None required_finding_fields = ( "path", @@ -55,47 +55,21 @@ def _validate_findings(value: dict[str, Any]) -> bool: ) for finding in findings: if not isinstance(finding, dict): - return False + return None if not isinstance(finding.get("line"), int) or finding["line"] <= 0: - return False + return None for field in required_finding_fields: if not isinstance(finding.get(field), str) or not finding[field].strip(): - return False - return True - - -def valid_control( - value: Any, - *, - expected_head_sha: str, - expected_run_id: str, - expected_run_attempt: str, -) -> dict[str, Any] | None: - if not isinstance(value, dict): - return None - - if not _validate_metadata( - value, - expected_head_sha, - expected_run_id, - expected_run_attempt, - ): - return None - - if not _validate_result_and_reason(value): - return None - - if not _validate_findings(value): - return None + return None return { "head_sha": value["head_sha"], "run_id": value["run_id"], "run_attempt": value["run_attempt"], - "result": value["result"], + "result": result, "reason": value["reason"], "summary": value["summary"], - "findings": value["findings"], + "findings": findings, } @@ -132,12 +106,6 @@ def main(argv: list[str]) -> int: expected_head_sha, expected_run_id, expected_run_attempt, output_file_arg = argv[1:] output_file = Path(output_file_arg) - project_root = Path.cwd().resolve() - - if not output_file.resolve().is_relative_to(project_root): - print(f"error: output file path {output_file_arg!r} is outside the project root", file=sys.stderr) - return 65 - try: output_text = output_file.read_text(encoding="utf-8") except OSError as exc: diff --git a/scripts/ci/pr_review_merge_scheduler.py b/scripts/ci/pr_review_merge_scheduler.py index cab2198..a8fee70 100644 --- a/scripts/ci/pr_review_merge_scheduler.py +++ b/scripts/ci/pr_review_merge_scheduler.py @@ -1,12 +1,11 @@ #!/usr/bin/env python3 +from __future__ import annotations import argparse import json import os import subprocess import sys -import concurrent.futures -from functools import partial from dataclasses import dataclass from typing import Any @@ -331,18 +330,17 @@ def main(argv: list[str]) -> int: if not args.repo: raise SystemExit("--repo is required") prs = fetch_open_prs(args.repo, args.max_prs) - - inspect_func = partial( - inspect_pr, - args.repo, - dry_run=args.dry_run, - trigger_reviews=args.trigger_reviews, - enable_auto_merge_flag=args.enable_auto_merge, - workflow=args.review_workflow, - ) - with concurrent.futures.ThreadPoolExecutor() as executor: - decisions = list(executor.map(inspect_func, prs)) - + decisions = [ + inspect_pr( + args.repo, + pr, + dry_run=args.dry_run, + trigger_reviews=args.trigger_reviews, + enable_auto_merge_flag=args.enable_auto_merge, + workflow=args.review_workflow, + ) + for pr in prs + ] print_summary(decisions, dry_run=args.dry_run) return 0 diff --git a/tests/scripts/__init__.py b/tests/scripts/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/scripts/ci/__init__.py b/tests/scripts/ci/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/scripts/ci/test_opencode_review_normalize_output.py b/tests/scripts/ci/test_opencode_review_normalize_output.py deleted file mode 100644 index 6926389..0000000 --- a/tests/scripts/ci/test_opencode_review_normalize_output.py +++ /dev/null @@ -1,166 +0,0 @@ -import pytest - -from scripts.ci.opencode_review_normalize_output import valid_control - -def test_valid_control_approve(): - value = { - "head_sha": "sha123", - "run_id": "id123", - "run_attempt": "1", - "result": "APPROVE", - "reason": "Looks good", - "summary": "Approved", - "findings": [], - "extra_field": "should_be_ignored" - } - result = valid_control( - value, - expected_head_sha="sha123", - expected_run_id="id123", - expected_run_attempt="1" - ) - assert result == { - "head_sha": "sha123", - "run_id": "id123", - "run_attempt": "1", - "result": "APPROVE", - "reason": "Looks good", - "summary": "Approved", - "findings": [] - } - -def test_valid_control_request_changes(): - value = { - "head_sha": "sha123", - "run_id": "id123", - "run_attempt": "1", - "result": "REQUEST_CHANGES", - "reason": "Has issues", - "summary": "Needs work", - "findings": [ - { - "line": 42, - "path": "file.py", - "severity": "high", - "title": "Bug", - "problem": "Bad code", - "root_cause": "Typo", - "fix_direction": "Fix it", - "regression_test_direction": "Test it", - "suggested_diff": "- bad\n+ good", - "extra": "ignore" - } - ] - } - result = valid_control( - value, - expected_head_sha="sha123", - expected_run_id="id123", - expected_run_attempt="1" - ) - assert result is not None - assert result["findings"] == value["findings"] - -def test_valid_control_invalid_type(): - assert valid_control("not a dict", expected_head_sha="s", expected_run_id="i", expected_run_attempt="1") is None - -def test_valid_control_mismatched_metadata(): - value = { - "head_sha": "sha123", - "run_id": "id123", - "run_attempt": "1", - "result": "APPROVE", - "reason": "r", - "summary": "s", - "findings": [] - } - - assert valid_control(value, expected_head_sha="wrong", expected_run_id="id123", expected_run_attempt="1") is None - assert valid_control(value, expected_head_sha="sha123", expected_run_id="wrong", expected_run_attempt="1") is None - assert valid_control(value, expected_head_sha="sha123", expected_run_id="id123", expected_run_attempt="wrong") is None - -def test_valid_control_invalid_result(): - value = { - "head_sha": "sha", - "run_id": "id", - "run_attempt": "1", - "result": "INVALID", - "reason": "r", - "summary": "s", - "findings": [] - } - assert valid_control(value, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - -def test_valid_control_invalid_reason_summary(): - base = { - "head_sha": "sha", "run_id": "id", "run_attempt": "1", - "result": "APPROVE", "findings": [] - } - - # Missing reason - val = dict(base, summary="s") - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - - # Empty reason - val = dict(base, reason=" ", summary="s") - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - - # Missing summary - val = dict(base, reason="r") - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - - # Empty summary - val = dict(base, reason="r", summary="") - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - -def test_valid_control_findings_logic(): - base = { - "head_sha": "sha", "run_id": "id", "run_attempt": "1", - "reason": "r", "summary": "s" - } - - # findings not a list - val = dict(base, result="APPROVE", findings="not a list") - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - - # APPROVE with findings - val = dict(base, result="APPROVE", findings=[{}]) - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - - # REQUEST_CHANGES without findings - val = dict(base, result="REQUEST_CHANGES", findings=[]) - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - -def test_valid_control_invalid_findings(): - base = { - "head_sha": "sha", "run_id": "id", "run_attempt": "1", - "result": "REQUEST_CHANGES", "reason": "r", "summary": "s" - } - valid_finding = { - "line": 1, "path": "p", "severity": "s", "title": "t", - "problem": "p", "root_cause": "r", "fix_direction": "f", - "regression_test_direction": "r", "suggested_diff": "s" - } - - # Finding not a dict - val = dict(base, findings=["not dict"]) - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - - # Invalid line - val = dict(base, findings=[dict(valid_finding, line=0)]) - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - val = dict(base, findings=[dict(valid_finding, line="1")]) - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - - # Missing required field - for field in ["path", "severity", "title", "problem", "root_cause", "fix_direction", "regression_test_direction", "suggested_diff"]: - finding = dict(valid_finding) - del finding[field] - val = dict(base, findings=[finding]) - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - - # Empty field - finding = dict(valid_finding) - finding[field] = " " - val = dict(base, findings=[finding]) - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None diff --git a/tests/scripts/ci/test_pr_review_merge_scheduler.py b/tests/scripts/ci/test_pr_review_merge_scheduler.py deleted file mode 100644 index 6bbbbd6..0000000 --- a/tests/scripts/ci/test_pr_review_merge_scheduler.py +++ /dev/null @@ -1,80 +0,0 @@ -import pytest - -from scripts.ci.pr_review_merge_scheduler import is_opencode_context - -def test_is_opencode_context_checkrun_name(): - node = { - "__typename": "CheckRun", - "name": "opencode-review", - } - assert is_opencode_context(node) is True - -def test_is_opencode_context_checkrun_workflow_name(): - node = { - "__typename": "CheckRun", - "name": "other-check", - "checkSuite": { - "workflowRun": { - "workflow": { - "name": "OpenCode Review" - } - } - } - } - assert is_opencode_context(node) is True - -def test_is_opencode_context_checkrun_false(): - node = { - "__typename": "CheckRun", - "name": "other-check", - "checkSuite": { - "workflowRun": { - "workflow": { - "name": "Other Workflow" - } - } - } - } - assert is_opencode_context(node) is False - -def test_is_opencode_context_checkrun_missing_fields(): - node = { - "__typename": "CheckRun", - "name": "other-check", - "checkSuite": {} - } - assert is_opencode_context(node) is False - - node2 = { - "__typename": "CheckRun", - "name": "other-check", - # missing checkSuite entirely - } - assert is_opencode_context(node2) is False - -def test_is_opencode_context_statuscontext_match(): - node = { - "__typename": "StatusContext", - "context": "opencode-review", - } - assert is_opencode_context(node) is True - -def test_is_opencode_context_statuscontext_mismatch(): - node = { - "__typename": "StatusContext", - "context": "other-review", - } - assert is_opencode_context(node) is False - -def test_is_opencode_context_statuscontext_missing(): - node = { - "__typename": "StatusContext", - # missing context - } - assert is_opencode_context(node) is False - -def test_is_opencode_context_missing_typename(): - node = { - "context": "opencode-review", - } - assert is_opencode_context(node) is True diff --git a/tests/test_opencode_review_normalize_output.py b/tests/test_opencode_review_normalize_output.py deleted file mode 100644 index 6c4cc83..0000000 --- a/tests/test_opencode_review_normalize_output.py +++ /dev/null @@ -1,19 +0,0 @@ -import json -from unittest.mock import patch - -from scripts.ci.opencode_review_normalize_output import iter_json_objects - - -def test_iter_json_objects_decode_error(): - """Test that iter_json_objects handles JSONDecodeError when decoding.""" - text = "prefix { valid looking json } suffix" - - # We mock raw_decode to raise JSONDecodeError to hit the except block explicitly - # This fulfills the 'Requires mocking the operation that throws the exception' rationale. - with patch("json.JSONDecoder.raw_decode") as mock_raw_decode: - mock_raw_decode.side_effect = json.JSONDecodeError("Mocked error", text, 0) - - result = iter_json_objects(text) - - assert result == [] - assert mock_raw_decode.called diff --git a/tests/test_pr_review_merge_scheduler.py b/tests/test_pr_review_merge_scheduler.py deleted file mode 100644 index 3a16137..0000000 --- a/tests/test_pr_review_merge_scheduler.py +++ /dev/null @@ -1,24 +0,0 @@ -import sys -from pathlib import Path -import pytest - -sys.path.insert(0, str(Path(__file__).parent.parent / "scripts" / "ci")) -import pr_review_merge_scheduler - -def test_split_repo_success(): - assert pr_review_merge_scheduler.split_repo("owner/repo") == ("owner", "repo") - -def test_split_repo_success_multiple_slashes(): - assert pr_review_merge_scheduler.split_repo("owner/repo/extra") == ("owner", "repo/extra") - -def test_split_repo_invalid(): - with pytest.raises(ValueError, match="repo must be owner/name, got 'invalid'"): - pr_review_merge_scheduler.split_repo("invalid") - -def test_split_repo_empty_owner(): - with pytest.raises(ValueError, match="repo must be owner/name, got '/repo'"): - pr_review_merge_scheduler.split_repo("/repo") - -def test_split_repo_empty_repo(): - with pytest.raises(ValueError, match="repo must be owner/name, got 'owner/'"): - pr_review_merge_scheduler.split_repo("owner/") diff --git a/tests/test_vibesec.py b/tests/test_vibesec.py index f79534b..a5a41f9 100644 --- a/tests/test_vibesec.py +++ b/tests/test_vibesec.py @@ -1,4 +1,3 @@ -import os import re import tempfile from pathlib import Path @@ -6,7 +5,7 @@ import pytest -from scanner.cli.vibesec import _collect_files, _print_scan_results, _scan_file, cmd_init, cmd_scan, cmd_review, REVIEW_PROMPT_BASE, REVIEW_PROMPT_NEXTJS, REVIEW_PROMPT_SUPABASE, REVIEW_PROMPT_FIREBASE, REVIEW_PROMPT_STRIPE, REVIEW_PROMPT_FOOTER +from scanner.cli.vibesec import _collect_files, _print_scan_results, _scan_file, cmd_init, cmd_scan MOCK_RULES = [ { @@ -18,9 +17,9 @@ }, { "id": "mock-todo", - "pattern": re.compile(r"TODO: fix issue"), + "pattern": re.compile(r"TODO: fix auth"), "severity": "HIGH", - "message": "Found issue todo", + "message": "Found auth todo", "extensions": None, }, { @@ -82,7 +81,7 @@ def test_scan_file_with_findings(tmp_path): @patch("scanner.cli.vibesec.SCAN_RULES", MOCK_RULES) def test_scan_file_with_multiple_findings(tmp_path): test_file = tmp_path / "unsafe_multiple.js" - test_file.write_text("const key = MOCK_SECRET_KEY;\n// TODO: fix issue here\n") + test_file.write_text("const key = MOCK_SECRET_KEY;\n// TODO: fix auth checks here\n") findings = _scan_file(test_file, tmp_path) rule_ids = [f["rule_id"] for f in findings] @@ -213,45 +212,6 @@ def test_collect_files_handles_cyclic_symlink(tmp_path): assert collected_rel_paths == {"a/a.py", "b/b.py"} -def test_collect_files_handles_oserror_in_scandir(tmp_path): - (tmp_path / "a.py").touch() - with patch("os.scandir", side_effect=PermissionError): - assert list(_collect_files(tmp_path)) == [] - - -def test_collect_files_handles_oserror_in_entry(tmp_path): - (tmp_path / "a.py").touch() - (tmp_path / "b.py").touch() - - original_scandir = os.scandir - - def mock_scandir(path): - iterator = original_scandir(path) - class MockIterator: - def __enter__(self): - return self - def __exit__(self, *args): - iterator.close() - def __iter__(self): - return self - def __next__(self): - entry = next(iterator) - if entry.name == "a.py": - class MockEntry: - name = entry.name - path = entry.path - def is_symlink(self): - raise PermissionError("Access denied") - return MockEntry() - return entry - return MockIterator() - - with patch("os.scandir", side_effect=mock_scandir): - collected_rel_paths = {f.relative_to(tmp_path).as_posix() for f in _collect_files(tmp_path)} - assert collected_rel_paths == {"b.py"} - - - @patch("scanner.cli.vibesec.SCAN_RULES", MOCK_RULES) def test_scan_file_skips_symlink(tmp_path): target = tmp_path / "target.py" @@ -456,66 +416,3 @@ def test_sanitize_terminal_output(): # Test non-strings assert _sanitize_terminal_output(None) is None - -# --------------------------------------------------------------------------- -# cmd_review tests -# --------------------------------------------------------------------------- - -from argparse import Namespace - -def test_cmd_review_base_prompt(capsys): - args = Namespace(stack=None, db=None, payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_BASE in captured.out - assert REVIEW_PROMPT_FOOTER in captured.out - assert REVIEW_PROMPT_NEXTJS not in captured.out - assert REVIEW_PROMPT_SUPABASE not in captured.out - assert REVIEW_PROMPT_FIREBASE not in captured.out - assert REVIEW_PROMPT_STRIPE not in captured.out - -def test_cmd_review_nextjs(capsys): - args = Namespace(stack=["nextjs"], db=None, payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_NEXTJS in captured.out - -def test_cmd_review_supabase(capsys): - args = Namespace(stack=None, db="supabase", payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_SUPABASE in captured.out - -def test_cmd_review_supabase_via_stack(capsys): - args = Namespace(stack=["supabase"], db=None, payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_SUPABASE in captured.out - -def test_cmd_review_firebase(capsys): - args = Namespace(stack=None, db="firebase", payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_FIREBASE in captured.out - -def test_cmd_review_firebase_via_stack(capsys): - args = Namespace(stack=["firebase"], db=None, payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_FIREBASE in captured.out - -def test_cmd_review_stripe(capsys): - args = Namespace(stack=None, db=None, payments="stripe") - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_STRIPE in captured.out - -def test_cmd_review_all_options(capsys): - args = Namespace(stack=["nextjs"], db="supabase", payments="stripe") - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_BASE in captured.out - assert REVIEW_PROMPT_NEXTJS in captured.out - assert REVIEW_PROMPT_SUPABASE in captured.out - assert REVIEW_PROMPT_STRIPE in captured.out - assert REVIEW_PROMPT_FOOTER in captured.out From d909f2ddc506c089e2bb7d0b69c1c26fbea140dd Mon Sep 17 00:00:00 2001 From: seonghobae <8172694+seonghobae@users.noreply.github.com> Date: Tue, 16 Jun 2026 06:35:59 +0000 Subject: [PATCH 3/4] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Extract=20constant=20di?= =?UTF-8?q?ctionaries=20to=20module=20level=20for=20print=20results?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From 0d92ac3edbffefa872b60759df54a12cdeb4cf88 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 16 Jun 2026 06:36:19 +0000 Subject: [PATCH 4/4] Resolve PR merge conflicts --- .jules/bolt.md | 12 +- scanner/cli/vibesec.py | 137 ++++++++------- .../ci/opencode_review_normalize_output.py | 80 ++++++--- scripts/ci/pr_review_merge_scheduler.py | 26 +-- tests/scripts/__init__.py | 0 tests/scripts/ci/__init__.py | 0 .../test_opencode_review_normalize_output.py | 166 ++++++++++++++++++ .../ci/test_pr_review_merge_scheduler.py | 80 +++++++++ .../test_opencode_review_normalize_output.py | 19 ++ tests/test_pr_review_merge_scheduler.py | 24 +++ 10 files changed, 446 insertions(+), 98 deletions(-) create mode 100644 tests/scripts/__init__.py create mode 100644 tests/scripts/ci/__init__.py create mode 100644 tests/scripts/ci/test_opencode_review_normalize_output.py create mode 100644 tests/scripts/ci/test_pr_review_merge_scheduler.py create mode 100644 tests/test_opencode_review_normalize_output.py create mode 100644 tests/test_pr_review_merge_scheduler.py diff --git a/.jules/bolt.md b/.jules/bolt.md index 45576b6..6d4e7ed 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -13,8 +13,16 @@ ## 2026-06-14 - Deferring Pathlib Operations in Hot Paths **Learning:** In highly repetitive loops like file scanners (e.g., iterating through thousands of safe files), preemptively calculating `Path.relative_to()` and sanitizing strings adds significant cumulative overhead. Pathlib operations internally parse paths, check parts, and construct new objects, which is extremely expensive when executed on a per-file basis unconditionally. **Action:** Always defer expensive path computations (like converting paths to relative or string sanitization) until *after* the fast-path condition (like a regex match) triggers. This drastically cuts down on unnecessary string operations for clean files. -## 2024-05-16 - Module-level Constants for Performance +## 2024-05-18 - Set literal vs Tuple membership check -**Learning:** Recreating static dictionaries (like severity mappings and icons) inside frequently called functions causes unnecessary memory allocations and slight performance overhead on every call. +**Learning:** In Python, using set literals for constant membership checks (e.g., `in {'CRITICAL', 'HIGH'}`) inside loops or comprehensions is highly efficient because CPython optimizes them into `frozenset` constants at compile time, eliminating runtime instantiation overhead. Using `tuple` for these checks performs an `O(n)` linear search, while a `frozenset` performs an `O(1)` hash lookup. + +**Action:** Prefer set literals `in {"A", "B"}` over tuples `in ("A", "B")` when performing membership checks against constant items, especially in hot paths or tight loops. +## 2024-06-16 - Parallelize Subprocess CLI Calls +**Learning:** Sequential, synchronous execution of `subprocess.run` (like calling the GitHub CLI) across multiple items (like PRs) is a significant I/O bottleneck. +**Action:** Use `concurrent.futures.ThreadPoolExecutor` with `functools.partial` and `executor.map` to safely parallelize I/O-bound subprocess executions, significantly reducing overall script runtime. + +## 2024-05-16 - Module-level Constants for Performance +**Learning:** Recreating static dictionaries (like severity mappings and icons) inside frequently called functions causes unnecessary memory allocations and slight performance overhead on every call. **Action:** Extract static dictionaries to module-level constants to ensure they are instantiated only once when the module is loaded. diff --git a/scanner/cli/vibesec.py b/scanner/cli/vibesec.py index dd98692..eddad2e 100644 --- a/scanner/cli/vibesec.py +++ b/scanner/cli/vibesec.py @@ -49,6 +49,7 @@ - File uploads must validate type, size, and filename server-side. - Never set CORS to allow all origins on authenticated endpoints. - Add tests for cross-user access denial on every resource endpoint. +- When dealing with OWASP rules or security vulnerabilities, automatically run `vibesec scan .` as a hook or use tools like `semgrep` to verify the codebase. See https://github.com/Seongho-Bae/VibeSec for full rules and checklists. """ @@ -66,6 +67,7 @@ 6. **Supabase**: Use getUser() (not getSession()) server-side; RLS on all tables. 7. **Files**: Validate type, size, and generate server-side filenames for uploads. 8. **CORS**: Restrict to known origins on authenticated endpoints. +9. **Automated Scanning**: When dealing with OWASP rules or security vulnerabilities, automatically run `vibesec scan .` as a hook or use tools like `semgrep` to verify the codebase. Return 401 for unauthenticated requests, 403 for ownership violations. @@ -294,6 +296,54 @@ # Command implementations # --------------------------------------------------------------------------- + +def _install_tool_rules(config: dict, project_root, installed: list): + """Install the specific rules file based on tool configuration.""" + if config.get("shared_only"): + return + + target_file = project_root / config['path'] + + # SECURITY: Prevent Arbitrary File Write via symlink path traversal + if not target_file.resolve().is_relative_to(project_root): + print(f"Error: Target path {target_file} escapes the project root. Aborting.", file=sys.stderr) + sys.exit(1) + + target_file.parent.mkdir(parents=True, exist_ok=True) + if target_file.is_symlink(): + target_file.unlink() + + if "append_marker" in config: + if target_file.exists(): + existing = target_file.read_text() + if config['append_marker'] not in existing: + target_file.write_text(existing + "\n\n" + config["content"]) + installed.append(f"{config['path']} (appended)") + else: + print(f"{config['path']} already contains {config['append_marker']} rules — skipping.") + else: + target_file.write_text(config["content"]) + installed.append(str(config['path'])) + else: + target_file.write_text(config["content"]) + installed.append(str(config['path'])) + + +def _install_checklist(project_root, installed: list): + """Install the VIBESEC_CHECKLIST.md file.""" + checklist_file = project_root / "VIBESEC_CHECKLIST.md" + + # SECURITY: Prevent Arbitrary File Write via symlink path traversal + if not checklist_file.resolve().is_relative_to(project_root): + print(f"Error: Checklist path {checklist_file} escapes the project root. Aborting.", file=sys.stderr) + sys.exit(1) + + if checklist_file.is_symlink(): + checklist_file.unlink() + if not checklist_file.exists(): + checklist_file.write_text(CHECKLIST_TEMPLATE) + installed.append("VIBESEC_CHECKLIST.md") + def cmd_init(args): """Install security rules into the project.""" tool = getattr(args, "tool", "cursor") or "cursor" @@ -327,46 +377,8 @@ def cmd_init(args): sys.exit(1) config = tool_configs[tool] - if not config.get("shared_only"): - target_file = project_root / config["path"] - - # SECURITY: Prevent Arbitrary File Write via symlink path traversal - if not target_file.resolve().is_relative_to(project_root): - print(f"Error: Target path {target_file} escapes the project root. Aborting.", file=sys.stderr) - sys.exit(1) - - target_file.parent.mkdir(parents=True, exist_ok=True) - if target_file.is_symlink(): - target_file.unlink() - - if "append_marker" in config: - if target_file.exists(): - existing = target_file.read_text() - if config["append_marker"] not in existing: - target_file.write_text(existing + "\n\n" + config["content"]) - installed.append(f"{config['path']} (appended)") - else: - print(f"{config['path']} already contains {config['append_marker']} rules — skipping.") - else: - target_file.write_text(config["content"]) - installed.append(str(config["path"])) - else: - target_file.write_text(config["content"]) - installed.append(str(config["path"])) - # Always create the checklist - checklist_file = project_root / "VIBESEC_CHECKLIST.md" - - # SECURITY: Prevent Arbitrary File Write via symlink path traversal - if not checklist_file.resolve().is_relative_to(project_root): - print(f"Error: Checklist path {checklist_file} escapes the project root. Aborting.", file=sys.stderr) - sys.exit(1) - - if checklist_file.is_symlink(): - checklist_file.unlink() - if not checklist_file.exists(): - checklist_file.write_text(CHECKLIST_TEMPLATE) - installed.append("VIBESEC_CHECKLIST.md") - + _install_tool_rules(config, project_root, installed) + _install_checklist(project_root, installed) if stack and "supabase" in stack: _print_supabase_reminder() @@ -419,7 +431,7 @@ def cmd_scan(args): findings.extend(file_findings) _print_scan_results(findings, files_scanned) - return 1 if any(f["severity"] in ("CRITICAL", "HIGH") for f in findings) else 0 + return 1 if any(f["severity"] in {"CRITICAL", "HIGH"} for f in findings) else 0 def cmd_hook(args): @@ -493,6 +505,28 @@ def _get_applicable_rules(ext: str): return _RULES_CACHE[ext] +def _process_dir_entries(dir_path: str): + """Process entries in a directory, yielding files and returning subdirectories.""" + dirs = [] + try: + with os.scandir(dir_path) as it: + for entry in it: + try: + if entry.is_symlink(): + continue + if entry.is_dir(follow_symlinks=False): + if entry.name not in SKIP_DIRS and not entry.name.startswith("."): + dirs.append(entry.path) + elif entry.is_file(follow_symlinks=False): + _, ext = os.path.splitext(entry.name) + if ext.lower() not in SKIP_EXTENSIONS: + yield Path(entry.path) + except (OSError, PermissionError): + continue + except (OSError, PermissionError): + pass + return dirs + def _collect_files(base_path: Path): """Collect all scannable files, skipping unwanted directories.""" # ⚡ Bolt: Optimize file traversal using os.scandir and os.path.splitext @@ -502,25 +536,8 @@ def _collect_files(base_path: Path): stack = [str(base_path)] while stack: current_dir = stack.pop() - try: - with os.scandir(current_dir) as it: - dirs = [] - for entry in it: - try: - if entry.is_symlink(): - continue - if entry.is_dir(follow_symlinks=False): - if entry.name not in SKIP_DIRS and not entry.name.startswith("."): - dirs.append(entry.path) - elif entry.is_file(follow_symlinks=False): - _, ext = os.path.splitext(entry.name) - if ext.lower() not in SKIP_EXTENSIONS: - yield Path(entry.path) - except (OSError, PermissionError): - continue - stack.extend(reversed(dirs)) - except (OSError, PermissionError): - pass + dirs = yield from _process_dir_entries(current_dir) + stack.extend(reversed(dirs)) def _sanitize_terminal_output(text: str) -> str: diff --git a/scripts/ci/opencode_review_normalize_output.py b/scripts/ci/opencode_review_normalize_output.py index 2a850c6..7d2a797 100755 --- a/scripts/ci/opencode_review_normalize_output.py +++ b/scripts/ci/opencode_review_normalize_output.py @@ -1,47 +1,47 @@ #!/usr/bin/env python3 """Normalize OpenCode review output into the strict approval-gate contract.""" -from __future__ import annotations - import json import sys from pathlib import Path from typing import Any -def valid_control( - value: Any, - *, +def _validate_metadata( + value: dict[str, Any], expected_head_sha: str, expected_run_id: str, expected_run_attempt: str, -) -> dict[str, Any] | None: - if not isinstance(value, dict): - return None - +) -> bool: if value.get("head_sha") != expected_head_sha: - return None + return False if value.get("run_id") != expected_run_id: - return None + return False if value.get("run_attempt") != expected_run_attempt: - return None + return False + return True + +def _validate_result_and_reason(value: dict[str, Any]) -> bool: result = value.get("result") if result not in {"APPROVE", "REQUEST_CHANGES"}: - return None - + return False if not isinstance(value.get("reason"), str) or not value["reason"].strip(): - return None + return False if not isinstance(value.get("summary"), str) or not value["summary"].strip(): - return None + return False + return True + +def _validate_findings(value: dict[str, Any]) -> bool: + result = value.get("result") findings = value.get("findings") if not isinstance(findings, list): - return None + return False if result == "APPROVE" and findings: - return None + return False if result == "REQUEST_CHANGES" and not findings: - return None + return False required_finding_fields = ( "path", @@ -55,21 +55,47 @@ def valid_control( ) for finding in findings: if not isinstance(finding, dict): - return None + return False if not isinstance(finding.get("line"), int) or finding["line"] <= 0: - return None + return False for field in required_finding_fields: if not isinstance(finding.get(field), str) or not finding[field].strip(): - return None + return False + return True + + +def valid_control( + value: Any, + *, + expected_head_sha: str, + expected_run_id: str, + expected_run_attempt: str, +) -> dict[str, Any] | None: + if not isinstance(value, dict): + return None + + if not _validate_metadata( + value, + expected_head_sha, + expected_run_id, + expected_run_attempt, + ): + return None + + if not _validate_result_and_reason(value): + return None + + if not _validate_findings(value): + return None return { "head_sha": value["head_sha"], "run_id": value["run_id"], "run_attempt": value["run_attempt"], - "result": result, + "result": value["result"], "reason": value["reason"], "summary": value["summary"], - "findings": findings, + "findings": value["findings"], } @@ -106,6 +132,12 @@ def main(argv: list[str]) -> int: expected_head_sha, expected_run_id, expected_run_attempt, output_file_arg = argv[1:] output_file = Path(output_file_arg) + project_root = Path.cwd().resolve() + + if not output_file.resolve().is_relative_to(project_root): + print(f"error: output file path {output_file_arg!r} is outside the project root", file=sys.stderr) + return 65 + try: output_text = output_file.read_text(encoding="utf-8") except OSError as exc: diff --git a/scripts/ci/pr_review_merge_scheduler.py b/scripts/ci/pr_review_merge_scheduler.py index a8fee70..cab2198 100644 --- a/scripts/ci/pr_review_merge_scheduler.py +++ b/scripts/ci/pr_review_merge_scheduler.py @@ -1,11 +1,12 @@ #!/usr/bin/env python3 -from __future__ import annotations import argparse import json import os import subprocess import sys +import concurrent.futures +from functools import partial from dataclasses import dataclass from typing import Any @@ -330,17 +331,18 @@ def main(argv: list[str]) -> int: if not args.repo: raise SystemExit("--repo is required") prs = fetch_open_prs(args.repo, args.max_prs) - decisions = [ - inspect_pr( - args.repo, - pr, - dry_run=args.dry_run, - trigger_reviews=args.trigger_reviews, - enable_auto_merge_flag=args.enable_auto_merge, - workflow=args.review_workflow, - ) - for pr in prs - ] + + inspect_func = partial( + inspect_pr, + args.repo, + dry_run=args.dry_run, + trigger_reviews=args.trigger_reviews, + enable_auto_merge_flag=args.enable_auto_merge, + workflow=args.review_workflow, + ) + with concurrent.futures.ThreadPoolExecutor() as executor: + decisions = list(executor.map(inspect_func, prs)) + print_summary(decisions, dry_run=args.dry_run) return 0 diff --git a/tests/scripts/__init__.py b/tests/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/scripts/ci/__init__.py b/tests/scripts/ci/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/scripts/ci/test_opencode_review_normalize_output.py b/tests/scripts/ci/test_opencode_review_normalize_output.py new file mode 100644 index 0000000..6926389 --- /dev/null +++ b/tests/scripts/ci/test_opencode_review_normalize_output.py @@ -0,0 +1,166 @@ +import pytest + +from scripts.ci.opencode_review_normalize_output import valid_control + +def test_valid_control_approve(): + value = { + "head_sha": "sha123", + "run_id": "id123", + "run_attempt": "1", + "result": "APPROVE", + "reason": "Looks good", + "summary": "Approved", + "findings": [], + "extra_field": "should_be_ignored" + } + result = valid_control( + value, + expected_head_sha="sha123", + expected_run_id="id123", + expected_run_attempt="1" + ) + assert result == { + "head_sha": "sha123", + "run_id": "id123", + "run_attempt": "1", + "result": "APPROVE", + "reason": "Looks good", + "summary": "Approved", + "findings": [] + } + +def test_valid_control_request_changes(): + value = { + "head_sha": "sha123", + "run_id": "id123", + "run_attempt": "1", + "result": "REQUEST_CHANGES", + "reason": "Has issues", + "summary": "Needs work", + "findings": [ + { + "line": 42, + "path": "file.py", + "severity": "high", + "title": "Bug", + "problem": "Bad code", + "root_cause": "Typo", + "fix_direction": "Fix it", + "regression_test_direction": "Test it", + "suggested_diff": "- bad\n+ good", + "extra": "ignore" + } + ] + } + result = valid_control( + value, + expected_head_sha="sha123", + expected_run_id="id123", + expected_run_attempt="1" + ) + assert result is not None + assert result["findings"] == value["findings"] + +def test_valid_control_invalid_type(): + assert valid_control("not a dict", expected_head_sha="s", expected_run_id="i", expected_run_attempt="1") is None + +def test_valid_control_mismatched_metadata(): + value = { + "head_sha": "sha123", + "run_id": "id123", + "run_attempt": "1", + "result": "APPROVE", + "reason": "r", + "summary": "s", + "findings": [] + } + + assert valid_control(value, expected_head_sha="wrong", expected_run_id="id123", expected_run_attempt="1") is None + assert valid_control(value, expected_head_sha="sha123", expected_run_id="wrong", expected_run_attempt="1") is None + assert valid_control(value, expected_head_sha="sha123", expected_run_id="id123", expected_run_attempt="wrong") is None + +def test_valid_control_invalid_result(): + value = { + "head_sha": "sha", + "run_id": "id", + "run_attempt": "1", + "result": "INVALID", + "reason": "r", + "summary": "s", + "findings": [] + } + assert valid_control(value, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + +def test_valid_control_invalid_reason_summary(): + base = { + "head_sha": "sha", "run_id": "id", "run_attempt": "1", + "result": "APPROVE", "findings": [] + } + + # Missing reason + val = dict(base, summary="s") + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + + # Empty reason + val = dict(base, reason=" ", summary="s") + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + + # Missing summary + val = dict(base, reason="r") + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + + # Empty summary + val = dict(base, reason="r", summary="") + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + +def test_valid_control_findings_logic(): + base = { + "head_sha": "sha", "run_id": "id", "run_attempt": "1", + "reason": "r", "summary": "s" + } + + # findings not a list + val = dict(base, result="APPROVE", findings="not a list") + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + + # APPROVE with findings + val = dict(base, result="APPROVE", findings=[{}]) + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + + # REQUEST_CHANGES without findings + val = dict(base, result="REQUEST_CHANGES", findings=[]) + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + +def test_valid_control_invalid_findings(): + base = { + "head_sha": "sha", "run_id": "id", "run_attempt": "1", + "result": "REQUEST_CHANGES", "reason": "r", "summary": "s" + } + valid_finding = { + "line": 1, "path": "p", "severity": "s", "title": "t", + "problem": "p", "root_cause": "r", "fix_direction": "f", + "regression_test_direction": "r", "suggested_diff": "s" + } + + # Finding not a dict + val = dict(base, findings=["not dict"]) + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + + # Invalid line + val = dict(base, findings=[dict(valid_finding, line=0)]) + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + val = dict(base, findings=[dict(valid_finding, line="1")]) + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + + # Missing required field + for field in ["path", "severity", "title", "problem", "root_cause", "fix_direction", "regression_test_direction", "suggested_diff"]: + finding = dict(valid_finding) + del finding[field] + val = dict(base, findings=[finding]) + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + + # Empty field + finding = dict(valid_finding) + finding[field] = " " + val = dict(base, findings=[finding]) + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None diff --git a/tests/scripts/ci/test_pr_review_merge_scheduler.py b/tests/scripts/ci/test_pr_review_merge_scheduler.py new file mode 100644 index 0000000..6bbbbd6 --- /dev/null +++ b/tests/scripts/ci/test_pr_review_merge_scheduler.py @@ -0,0 +1,80 @@ +import pytest + +from scripts.ci.pr_review_merge_scheduler import is_opencode_context + +def test_is_opencode_context_checkrun_name(): + node = { + "__typename": "CheckRun", + "name": "opencode-review", + } + assert is_opencode_context(node) is True + +def test_is_opencode_context_checkrun_workflow_name(): + node = { + "__typename": "CheckRun", + "name": "other-check", + "checkSuite": { + "workflowRun": { + "workflow": { + "name": "OpenCode Review" + } + } + } + } + assert is_opencode_context(node) is True + +def test_is_opencode_context_checkrun_false(): + node = { + "__typename": "CheckRun", + "name": "other-check", + "checkSuite": { + "workflowRun": { + "workflow": { + "name": "Other Workflow" + } + } + } + } + assert is_opencode_context(node) is False + +def test_is_opencode_context_checkrun_missing_fields(): + node = { + "__typename": "CheckRun", + "name": "other-check", + "checkSuite": {} + } + assert is_opencode_context(node) is False + + node2 = { + "__typename": "CheckRun", + "name": "other-check", + # missing checkSuite entirely + } + assert is_opencode_context(node2) is False + +def test_is_opencode_context_statuscontext_match(): + node = { + "__typename": "StatusContext", + "context": "opencode-review", + } + assert is_opencode_context(node) is True + +def test_is_opencode_context_statuscontext_mismatch(): + node = { + "__typename": "StatusContext", + "context": "other-review", + } + assert is_opencode_context(node) is False + +def test_is_opencode_context_statuscontext_missing(): + node = { + "__typename": "StatusContext", + # missing context + } + assert is_opencode_context(node) is False + +def test_is_opencode_context_missing_typename(): + node = { + "context": "opencode-review", + } + assert is_opencode_context(node) is True diff --git a/tests/test_opencode_review_normalize_output.py b/tests/test_opencode_review_normalize_output.py new file mode 100644 index 0000000..6c4cc83 --- /dev/null +++ b/tests/test_opencode_review_normalize_output.py @@ -0,0 +1,19 @@ +import json +from unittest.mock import patch + +from scripts.ci.opencode_review_normalize_output import iter_json_objects + + +def test_iter_json_objects_decode_error(): + """Test that iter_json_objects handles JSONDecodeError when decoding.""" + text = "prefix { valid looking json } suffix" + + # We mock raw_decode to raise JSONDecodeError to hit the except block explicitly + # This fulfills the 'Requires mocking the operation that throws the exception' rationale. + with patch("json.JSONDecoder.raw_decode") as mock_raw_decode: + mock_raw_decode.side_effect = json.JSONDecodeError("Mocked error", text, 0) + + result = iter_json_objects(text) + + assert result == [] + assert mock_raw_decode.called diff --git a/tests/test_pr_review_merge_scheduler.py b/tests/test_pr_review_merge_scheduler.py new file mode 100644 index 0000000..3a16137 --- /dev/null +++ b/tests/test_pr_review_merge_scheduler.py @@ -0,0 +1,24 @@ +import sys +from pathlib import Path +import pytest + +sys.path.insert(0, str(Path(__file__).parent.parent / "scripts" / "ci")) +import pr_review_merge_scheduler + +def test_split_repo_success(): + assert pr_review_merge_scheduler.split_repo("owner/repo") == ("owner", "repo") + +def test_split_repo_success_multiple_slashes(): + assert pr_review_merge_scheduler.split_repo("owner/repo/extra") == ("owner", "repo/extra") + +def test_split_repo_invalid(): + with pytest.raises(ValueError, match="repo must be owner/name, got 'invalid'"): + pr_review_merge_scheduler.split_repo("invalid") + +def test_split_repo_empty_owner(): + with pytest.raises(ValueError, match="repo must be owner/name, got '/repo'"): + pr_review_merge_scheduler.split_repo("/repo") + +def test_split_repo_empty_repo(): + with pytest.raises(ValueError, match="repo must be owner/name, got 'owner/'"): + pr_review_merge_scheduler.split_repo("owner/")