From 978467389f9c7eea5be347e1447efca51610ae3d Mon Sep 17 00:00:00 2001 From: seonghobae <8172694+seonghobae@users.noreply.github.com> Date: Tue, 16 Jun 2026 05:24:59 +0000 Subject: [PATCH 01/14] =?UTF-8?q?=F0=9F=A7=AA=20Add=20tests=20for=20scanne?= =?UTF-8?q?r=20stat=20errors=20and=20large=20files?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_vibesec.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/test_vibesec.py b/tests/test_vibesec.py index a5a41f9..c9e4df2 100644 --- a/tests/test_vibesec.py +++ b/tests/test_vibesec.py @@ -1,4 +1,5 @@ import re +import os import tempfile from pathlib import Path from unittest.mock import patch @@ -416,3 +417,30 @@ def test_sanitize_terminal_output(): # Test non-strings assert _sanitize_terminal_output(None) is None + +def test_scan_file_stat_error(tmp_path): + test_file = tmp_path / "stat_error.ts" + test_file.write_text("const key = 'x';\n") + + with patch("scanner.cli.vibesec.os.lstat", side_effect=PermissionError("Permission denied")) as mock1: + assert _scan_file(test_file, tmp_path) == [] + mock1.assert_called_once() + + with patch("scanner.cli.vibesec.os.lstat", side_effect=OSError("OS error")) as mock2: + assert _scan_file(test_file, tmp_path) == [] + mock2.assert_called_once() + +def test_scan_file_large_file(tmp_path): + test_file = tmp_path / "large_file.ts" + test_file.write_text("const key = 'x';\n") + + # Mock os.lstat to return a stat object with a large size + original_lstat = os.lstat + def mock_lstat(path): + st = original_lstat(path) + # Create a new stat_result-like object by replacing st_size + return os.stat_result((st.st_mode, st.st_ino, st.st_dev, st.st_nlink, st.st_uid, st.st_gid, 10 * 1024 * 1024 + 1, st.st_atime, st.st_mtime, st.st_ctime)) + + with patch("scanner.cli.vibesec.os.lstat", side_effect=mock_lstat) as mock3: + assert _scan_file(test_file, tmp_path) == [] + mock3.assert_called_once() From 3cc4606884072566955fd3e9514daa75876b0077 Mon Sep 17 00:00:00 2001 From: seonghobae <8172694+seonghobae@users.noreply.github.com> Date: Tue, 16 Jun 2026 06:22:50 +0000 Subject: [PATCH 02/14] =?UTF-8?q?=F0=9F=A7=AA=20Add=20tests=20for=20scanne?= =?UTF-8?q?r=20stat=20errors=20and=20large=20files?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From 82a32beee4f0ceb062871d5e71fb14189a6d8b5c Mon Sep 17 00:00:00 2001 From: seonghobae <8172694+seonghobae@users.noreply.github.com> Date: Tue, 16 Jun 2026 06:25:43 +0000 Subject: [PATCH 03/14] =?UTF-8?q?=F0=9F=A7=AA=20Add=20tests=20for=20scanne?= =?UTF-8?q?r=20stat=20errors=20and=20large=20files?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .jules/bolt.md | 9 - scanner/cli/vibesec.py | 137 +++++++-------- .../ci/opencode_review_normalize_output.py | 80 +++------ scripts/ci/pr_review_merge_scheduler.py | 26 ++- tests/scripts/__init__.py | 0 tests/scripts/ci/__init__.py | 0 .../test_opencode_review_normalize_output.py | 166 ------------------ .../ci/test_pr_review_merge_scheduler.py | 80 --------- .../test_opencode_review_normalize_output.py | 19 -- tests/test_pr_review_merge_scheduler.py | 24 --- tests/test_vibesec.py | 110 +----------- 11 files changed, 101 insertions(+), 550 deletions(-) delete mode 100644 tests/scripts/__init__.py delete mode 100644 tests/scripts/ci/__init__.py delete mode 100644 tests/scripts/ci/test_opencode_review_normalize_output.py delete mode 100644 tests/scripts/ci/test_pr_review_merge_scheduler.py delete mode 100644 tests/test_opencode_review_normalize_output.py delete mode 100644 tests/test_pr_review_merge_scheduler.py diff --git a/.jules/bolt.md b/.jules/bolt.md index a337772..3708540 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -13,12 +13,3 @@ ## 2026-06-14 - Deferring Pathlib Operations in Hot Paths **Learning:** In highly repetitive loops like file scanners (e.g., iterating through thousands of safe files), preemptively calculating `Path.relative_to()` and sanitizing strings adds significant cumulative overhead. Pathlib operations internally parse paths, check parts, and construct new objects, which is extremely expensive when executed on a per-file basis unconditionally. **Action:** Always defer expensive path computations (like converting paths to relative or string sanitization) until *after* the fast-path condition (like a regex match) triggers. This drastically cuts down on unnecessary string operations for clean files. -## 2024-05-18 - Set literal vs Tuple membership check - -**Learning:** In Python, using set literals for constant membership checks (e.g., `in {'CRITICAL', 'HIGH'}`) inside loops or comprehensions is highly efficient because CPython optimizes them into `frozenset` constants at compile time, eliminating runtime instantiation overhead. Using `tuple` for these checks performs an `O(n)` linear search, while a `frozenset` performs an `O(1)` hash lookup. - -**Action:** Prefer set literals `in {"A", "B"}` over tuples `in ("A", "B")` when performing membership checks against constant items, especially in hot paths or tight loops. - -## 2024-06-16 - Parallelize Subprocess CLI Calls -**Learning:** Sequential, synchronous execution of `subprocess.run` (like calling the GitHub CLI) across multiple items (like PRs) is a significant I/O bottleneck. -**Action:** Use `concurrent.futures.ThreadPoolExecutor` with `functools.partial` and `executor.map` to safely parallelize I/O-bound subprocess executions, significantly reducing overall script runtime. diff --git a/scanner/cli/vibesec.py b/scanner/cli/vibesec.py index 89c5584..5fb6e4e 100644 --- a/scanner/cli/vibesec.py +++ b/scanner/cli/vibesec.py @@ -49,7 +49,6 @@ - File uploads must validate type, size, and filename server-side. - Never set CORS to allow all origins on authenticated endpoints. - Add tests for cross-user access denial on every resource endpoint. -- When dealing with OWASP rules or security vulnerabilities, automatically run `vibesec scan .` as a hook or use tools like `semgrep` to verify the codebase. See https://github.com/Seongho-Bae/VibeSec for full rules and checklists. """ @@ -67,7 +66,6 @@ 6. **Supabase**: Use getUser() (not getSession()) server-side; RLS on all tables. 7. **Files**: Validate type, size, and generate server-side filenames for uploads. 8. **CORS**: Restrict to known origins on authenticated endpoints. -9. **Automated Scanning**: When dealing with OWASP rules or security vulnerabilities, automatically run `vibesec scan .` as a hook or use tools like `semgrep` to verify the codebase. Return 401 for unauthenticated requests, 403 for ownership violations. @@ -296,54 +294,6 @@ # Command implementations # --------------------------------------------------------------------------- - -def _install_tool_rules(config: dict, project_root, installed: list): - """Install the specific rules file based on tool configuration.""" - if config.get("shared_only"): - return - - target_file = project_root / config['path'] - - # SECURITY: Prevent Arbitrary File Write via symlink path traversal - if not target_file.resolve().is_relative_to(project_root): - print(f"Error: Target path {target_file} escapes the project root. Aborting.", file=sys.stderr) - sys.exit(1) - - target_file.parent.mkdir(parents=True, exist_ok=True) - if target_file.is_symlink(): - target_file.unlink() - - if "append_marker" in config: - if target_file.exists(): - existing = target_file.read_text() - if config['append_marker'] not in existing: - target_file.write_text(existing + "\n\n" + config["content"]) - installed.append(f"{config['path']} (appended)") - else: - print(f"{config['path']} already contains {config['append_marker']} rules — skipping.") - else: - target_file.write_text(config["content"]) - installed.append(str(config['path'])) - else: - target_file.write_text(config["content"]) - installed.append(str(config['path'])) - - -def _install_checklist(project_root, installed: list): - """Install the VIBESEC_CHECKLIST.md file.""" - checklist_file = project_root / "VIBESEC_CHECKLIST.md" - - # SECURITY: Prevent Arbitrary File Write via symlink path traversal - if not checklist_file.resolve().is_relative_to(project_root): - print(f"Error: Checklist path {checklist_file} escapes the project root. Aborting.", file=sys.stderr) - sys.exit(1) - - if checklist_file.is_symlink(): - checklist_file.unlink() - if not checklist_file.exists(): - checklist_file.write_text(CHECKLIST_TEMPLATE) - installed.append("VIBESEC_CHECKLIST.md") - def cmd_init(args): """Install security rules into the project.""" tool = getattr(args, "tool", "cursor") or "cursor" @@ -377,8 +327,46 @@ def cmd_init(args): sys.exit(1) config = tool_configs[tool] - _install_tool_rules(config, project_root, installed) - _install_checklist(project_root, installed) + if not config.get("shared_only"): + target_file = project_root / config["path"] + + # SECURITY: Prevent Arbitrary File Write via symlink path traversal + if not target_file.resolve().is_relative_to(project_root): + print(f"Error: Target path {target_file} escapes the project root. Aborting.", file=sys.stderr) + sys.exit(1) + + target_file.parent.mkdir(parents=True, exist_ok=True) + if target_file.is_symlink(): + target_file.unlink() + + if "append_marker" in config: + if target_file.exists(): + existing = target_file.read_text() + if config["append_marker"] not in existing: + target_file.write_text(existing + "\n\n" + config["content"]) + installed.append(f"{config['path']} (appended)") + else: + print(f"{config['path']} already contains {config['append_marker']} rules — skipping.") + else: + target_file.write_text(config["content"]) + installed.append(str(config["path"])) + else: + target_file.write_text(config["content"]) + installed.append(str(config["path"])) + # Always create the checklist + checklist_file = project_root / "VIBESEC_CHECKLIST.md" + + # SECURITY: Prevent Arbitrary File Write via symlink path traversal + if not checklist_file.resolve().is_relative_to(project_root): + print(f"Error: Checklist path {checklist_file} escapes the project root. Aborting.", file=sys.stderr) + sys.exit(1) + + if checklist_file.is_symlink(): + checklist_file.unlink() + if not checklist_file.exists(): + checklist_file.write_text(CHECKLIST_TEMPLATE) + installed.append("VIBESEC_CHECKLIST.md") + if stack and "supabase" in stack: _print_supabase_reminder() @@ -431,7 +419,7 @@ def cmd_scan(args): findings.extend(file_findings) _print_scan_results(findings, files_scanned) - return 1 if any(f["severity"] in {"CRITICAL", "HIGH"} for f in findings) else 0 + return 1 if any(f["severity"] in ("CRITICAL", "HIGH") for f in findings) else 0 def cmd_hook(args): @@ -505,28 +493,6 @@ def _get_applicable_rules(ext: str): return _RULES_CACHE[ext] -def _process_dir_entries(dir_path: str): - """Process entries in a directory, yielding files and returning subdirectories.""" - dirs = [] - try: - with os.scandir(dir_path) as it: - for entry in it: - try: - if entry.is_symlink(): - continue - if entry.is_dir(follow_symlinks=False): - if entry.name not in SKIP_DIRS and not entry.name.startswith("."): - dirs.append(entry.path) - elif entry.is_file(follow_symlinks=False): - _, ext = os.path.splitext(entry.name) - if ext.lower() not in SKIP_EXTENSIONS: - yield Path(entry.path) - except (OSError, PermissionError): - continue - except (OSError, PermissionError): - pass - return dirs - def _collect_files(base_path: Path): """Collect all scannable files, skipping unwanted directories.""" # ⚡ Bolt: Optimize file traversal using os.scandir and os.path.splitext @@ -536,8 +502,25 @@ def _collect_files(base_path: Path): stack = [str(base_path)] while stack: current_dir = stack.pop() - dirs = yield from _process_dir_entries(current_dir) - stack.extend(reversed(dirs)) + try: + with os.scandir(current_dir) as it: + dirs = [] + for entry in it: + try: + if entry.is_symlink(): + continue + if entry.is_dir(follow_symlinks=False): + if entry.name not in SKIP_DIRS and not entry.name.startswith("."): + dirs.append(entry.path) + elif entry.is_file(follow_symlinks=False): + _, ext = os.path.splitext(entry.name) + if ext.lower() not in SKIP_EXTENSIONS: + yield Path(entry.path) + except (OSError, PermissionError): + continue + stack.extend(reversed(dirs)) + except (OSError, PermissionError): + pass def _sanitize_terminal_output(text: str) -> str: diff --git a/scripts/ci/opencode_review_normalize_output.py b/scripts/ci/opencode_review_normalize_output.py index 7d2a797..2a850c6 100755 --- a/scripts/ci/opencode_review_normalize_output.py +++ b/scripts/ci/opencode_review_normalize_output.py @@ -1,47 +1,47 @@ #!/usr/bin/env python3 """Normalize OpenCode review output into the strict approval-gate contract.""" +from __future__ import annotations + import json import sys from pathlib import Path from typing import Any -def _validate_metadata( - value: dict[str, Any], +def valid_control( + value: Any, + *, expected_head_sha: str, expected_run_id: str, expected_run_attempt: str, -) -> bool: +) -> dict[str, Any] | None: + if not isinstance(value, dict): + return None + if value.get("head_sha") != expected_head_sha: - return False + return None if value.get("run_id") != expected_run_id: - return False + return None if value.get("run_attempt") != expected_run_attempt: - return False - return True - + return None -def _validate_result_and_reason(value: dict[str, Any]) -> bool: result = value.get("result") if result not in {"APPROVE", "REQUEST_CHANGES"}: - return False + return None + if not isinstance(value.get("reason"), str) or not value["reason"].strip(): - return False + return None if not isinstance(value.get("summary"), str) or not value["summary"].strip(): - return False - return True - + return None -def _validate_findings(value: dict[str, Any]) -> bool: - result = value.get("result") findings = value.get("findings") if not isinstance(findings, list): - return False + return None if result == "APPROVE" and findings: - return False + return None if result == "REQUEST_CHANGES" and not findings: - return False + return None required_finding_fields = ( "path", @@ -55,47 +55,21 @@ def _validate_findings(value: dict[str, Any]) -> bool: ) for finding in findings: if not isinstance(finding, dict): - return False + return None if not isinstance(finding.get("line"), int) or finding["line"] <= 0: - return False + return None for field in required_finding_fields: if not isinstance(finding.get(field), str) or not finding[field].strip(): - return False - return True - - -def valid_control( - value: Any, - *, - expected_head_sha: str, - expected_run_id: str, - expected_run_attempt: str, -) -> dict[str, Any] | None: - if not isinstance(value, dict): - return None - - if not _validate_metadata( - value, - expected_head_sha, - expected_run_id, - expected_run_attempt, - ): - return None - - if not _validate_result_and_reason(value): - return None - - if not _validate_findings(value): - return None + return None return { "head_sha": value["head_sha"], "run_id": value["run_id"], "run_attempt": value["run_attempt"], - "result": value["result"], + "result": result, "reason": value["reason"], "summary": value["summary"], - "findings": value["findings"], + "findings": findings, } @@ -132,12 +106,6 @@ def main(argv: list[str]) -> int: expected_head_sha, expected_run_id, expected_run_attempt, output_file_arg = argv[1:] output_file = Path(output_file_arg) - project_root = Path.cwd().resolve() - - if not output_file.resolve().is_relative_to(project_root): - print(f"error: output file path {output_file_arg!r} is outside the project root", file=sys.stderr) - return 65 - try: output_text = output_file.read_text(encoding="utf-8") except OSError as exc: diff --git a/scripts/ci/pr_review_merge_scheduler.py b/scripts/ci/pr_review_merge_scheduler.py index cab2198..a8fee70 100644 --- a/scripts/ci/pr_review_merge_scheduler.py +++ b/scripts/ci/pr_review_merge_scheduler.py @@ -1,12 +1,11 @@ #!/usr/bin/env python3 +from __future__ import annotations import argparse import json import os import subprocess import sys -import concurrent.futures -from functools import partial from dataclasses import dataclass from typing import Any @@ -331,18 +330,17 @@ def main(argv: list[str]) -> int: if not args.repo: raise SystemExit("--repo is required") prs = fetch_open_prs(args.repo, args.max_prs) - - inspect_func = partial( - inspect_pr, - args.repo, - dry_run=args.dry_run, - trigger_reviews=args.trigger_reviews, - enable_auto_merge_flag=args.enable_auto_merge, - workflow=args.review_workflow, - ) - with concurrent.futures.ThreadPoolExecutor() as executor: - decisions = list(executor.map(inspect_func, prs)) - + decisions = [ + inspect_pr( + args.repo, + pr, + dry_run=args.dry_run, + trigger_reviews=args.trigger_reviews, + enable_auto_merge_flag=args.enable_auto_merge, + workflow=args.review_workflow, + ) + for pr in prs + ] print_summary(decisions, dry_run=args.dry_run) return 0 diff --git a/tests/scripts/__init__.py b/tests/scripts/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/scripts/ci/__init__.py b/tests/scripts/ci/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/tests/scripts/ci/test_opencode_review_normalize_output.py b/tests/scripts/ci/test_opencode_review_normalize_output.py deleted file mode 100644 index 6926389..0000000 --- a/tests/scripts/ci/test_opencode_review_normalize_output.py +++ /dev/null @@ -1,166 +0,0 @@ -import pytest - -from scripts.ci.opencode_review_normalize_output import valid_control - -def test_valid_control_approve(): - value = { - "head_sha": "sha123", - "run_id": "id123", - "run_attempt": "1", - "result": "APPROVE", - "reason": "Looks good", - "summary": "Approved", - "findings": [], - "extra_field": "should_be_ignored" - } - result = valid_control( - value, - expected_head_sha="sha123", - expected_run_id="id123", - expected_run_attempt="1" - ) - assert result == { - "head_sha": "sha123", - "run_id": "id123", - "run_attempt": "1", - "result": "APPROVE", - "reason": "Looks good", - "summary": "Approved", - "findings": [] - } - -def test_valid_control_request_changes(): - value = { - "head_sha": "sha123", - "run_id": "id123", - "run_attempt": "1", - "result": "REQUEST_CHANGES", - "reason": "Has issues", - "summary": "Needs work", - "findings": [ - { - "line": 42, - "path": "file.py", - "severity": "high", - "title": "Bug", - "problem": "Bad code", - "root_cause": "Typo", - "fix_direction": "Fix it", - "regression_test_direction": "Test it", - "suggested_diff": "- bad\n+ good", - "extra": "ignore" - } - ] - } - result = valid_control( - value, - expected_head_sha="sha123", - expected_run_id="id123", - expected_run_attempt="1" - ) - assert result is not None - assert result["findings"] == value["findings"] - -def test_valid_control_invalid_type(): - assert valid_control("not a dict", expected_head_sha="s", expected_run_id="i", expected_run_attempt="1") is None - -def test_valid_control_mismatched_metadata(): - value = { - "head_sha": "sha123", - "run_id": "id123", - "run_attempt": "1", - "result": "APPROVE", - "reason": "r", - "summary": "s", - "findings": [] - } - - assert valid_control(value, expected_head_sha="wrong", expected_run_id="id123", expected_run_attempt="1") is None - assert valid_control(value, expected_head_sha="sha123", expected_run_id="wrong", expected_run_attempt="1") is None - assert valid_control(value, expected_head_sha="sha123", expected_run_id="id123", expected_run_attempt="wrong") is None - -def test_valid_control_invalid_result(): - value = { - "head_sha": "sha", - "run_id": "id", - "run_attempt": "1", - "result": "INVALID", - "reason": "r", - "summary": "s", - "findings": [] - } - assert valid_control(value, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - -def test_valid_control_invalid_reason_summary(): - base = { - "head_sha": "sha", "run_id": "id", "run_attempt": "1", - "result": "APPROVE", "findings": [] - } - - # Missing reason - val = dict(base, summary="s") - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - - # Empty reason - val = dict(base, reason=" ", summary="s") - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - - # Missing summary - val = dict(base, reason="r") - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - - # Empty summary - val = dict(base, reason="r", summary="") - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - -def test_valid_control_findings_logic(): - base = { - "head_sha": "sha", "run_id": "id", "run_attempt": "1", - "reason": "r", "summary": "s" - } - - # findings not a list - val = dict(base, result="APPROVE", findings="not a list") - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - - # APPROVE with findings - val = dict(base, result="APPROVE", findings=[{}]) - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - - # REQUEST_CHANGES without findings - val = dict(base, result="REQUEST_CHANGES", findings=[]) - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - -def test_valid_control_invalid_findings(): - base = { - "head_sha": "sha", "run_id": "id", "run_attempt": "1", - "result": "REQUEST_CHANGES", "reason": "r", "summary": "s" - } - valid_finding = { - "line": 1, "path": "p", "severity": "s", "title": "t", - "problem": "p", "root_cause": "r", "fix_direction": "f", - "regression_test_direction": "r", "suggested_diff": "s" - } - - # Finding not a dict - val = dict(base, findings=["not dict"]) - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - - # Invalid line - val = dict(base, findings=[dict(valid_finding, line=0)]) - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - val = dict(base, findings=[dict(valid_finding, line="1")]) - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - - # Missing required field - for field in ["path", "severity", "title", "problem", "root_cause", "fix_direction", "regression_test_direction", "suggested_diff"]: - finding = dict(valid_finding) - del finding[field] - val = dict(base, findings=[finding]) - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None - - # Empty field - finding = dict(valid_finding) - finding[field] = " " - val = dict(base, findings=[finding]) - assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None diff --git a/tests/scripts/ci/test_pr_review_merge_scheduler.py b/tests/scripts/ci/test_pr_review_merge_scheduler.py deleted file mode 100644 index 6bbbbd6..0000000 --- a/tests/scripts/ci/test_pr_review_merge_scheduler.py +++ /dev/null @@ -1,80 +0,0 @@ -import pytest - -from scripts.ci.pr_review_merge_scheduler import is_opencode_context - -def test_is_opencode_context_checkrun_name(): - node = { - "__typename": "CheckRun", - "name": "opencode-review", - } - assert is_opencode_context(node) is True - -def test_is_opencode_context_checkrun_workflow_name(): - node = { - "__typename": "CheckRun", - "name": "other-check", - "checkSuite": { - "workflowRun": { - "workflow": { - "name": "OpenCode Review" - } - } - } - } - assert is_opencode_context(node) is True - -def test_is_opencode_context_checkrun_false(): - node = { - "__typename": "CheckRun", - "name": "other-check", - "checkSuite": { - "workflowRun": { - "workflow": { - "name": "Other Workflow" - } - } - } - } - assert is_opencode_context(node) is False - -def test_is_opencode_context_checkrun_missing_fields(): - node = { - "__typename": "CheckRun", - "name": "other-check", - "checkSuite": {} - } - assert is_opencode_context(node) is False - - node2 = { - "__typename": "CheckRun", - "name": "other-check", - # missing checkSuite entirely - } - assert is_opencode_context(node2) is False - -def test_is_opencode_context_statuscontext_match(): - node = { - "__typename": "StatusContext", - "context": "opencode-review", - } - assert is_opencode_context(node) is True - -def test_is_opencode_context_statuscontext_mismatch(): - node = { - "__typename": "StatusContext", - "context": "other-review", - } - assert is_opencode_context(node) is False - -def test_is_opencode_context_statuscontext_missing(): - node = { - "__typename": "StatusContext", - # missing context - } - assert is_opencode_context(node) is False - -def test_is_opencode_context_missing_typename(): - node = { - "context": "opencode-review", - } - assert is_opencode_context(node) is True diff --git a/tests/test_opencode_review_normalize_output.py b/tests/test_opencode_review_normalize_output.py deleted file mode 100644 index 6c4cc83..0000000 --- a/tests/test_opencode_review_normalize_output.py +++ /dev/null @@ -1,19 +0,0 @@ -import json -from unittest.mock import patch - -from scripts.ci.opencode_review_normalize_output import iter_json_objects - - -def test_iter_json_objects_decode_error(): - """Test that iter_json_objects handles JSONDecodeError when decoding.""" - text = "prefix { valid looking json } suffix" - - # We mock raw_decode to raise JSONDecodeError to hit the except block explicitly - # This fulfills the 'Requires mocking the operation that throws the exception' rationale. - with patch("json.JSONDecoder.raw_decode") as mock_raw_decode: - mock_raw_decode.side_effect = json.JSONDecodeError("Mocked error", text, 0) - - result = iter_json_objects(text) - - assert result == [] - assert mock_raw_decode.called diff --git a/tests/test_pr_review_merge_scheduler.py b/tests/test_pr_review_merge_scheduler.py deleted file mode 100644 index 3a16137..0000000 --- a/tests/test_pr_review_merge_scheduler.py +++ /dev/null @@ -1,24 +0,0 @@ -import sys -from pathlib import Path -import pytest - -sys.path.insert(0, str(Path(__file__).parent.parent / "scripts" / "ci")) -import pr_review_merge_scheduler - -def test_split_repo_success(): - assert pr_review_merge_scheduler.split_repo("owner/repo") == ("owner", "repo") - -def test_split_repo_success_multiple_slashes(): - assert pr_review_merge_scheduler.split_repo("owner/repo/extra") == ("owner", "repo/extra") - -def test_split_repo_invalid(): - with pytest.raises(ValueError, match="repo must be owner/name, got 'invalid'"): - pr_review_merge_scheduler.split_repo("invalid") - -def test_split_repo_empty_owner(): - with pytest.raises(ValueError, match="repo must be owner/name, got '/repo'"): - pr_review_merge_scheduler.split_repo("/repo") - -def test_split_repo_empty_repo(): - with pytest.raises(ValueError, match="repo must be owner/name, got 'owner/'"): - pr_review_merge_scheduler.split_repo("owner/") diff --git a/tests/test_vibesec.py b/tests/test_vibesec.py index 73a4bac..c9e4df2 100644 --- a/tests/test_vibesec.py +++ b/tests/test_vibesec.py @@ -1,13 +1,12 @@ -from argparse import Namespace -import os import re +import os import tempfile from pathlib import Path from unittest.mock import patch import pytest -from scanner.cli.vibesec import _collect_files, _print_scan_results, _scan_file, cmd_init, cmd_scan, cmd_review, REVIEW_PROMPT_BASE, REVIEW_PROMPT_NEXTJS, REVIEW_PROMPT_SUPABASE, REVIEW_PROMPT_FIREBASE, REVIEW_PROMPT_STRIPE, REVIEW_PROMPT_FOOTER +from scanner.cli.vibesec import _collect_files, _print_scan_results, _scan_file, cmd_init, cmd_scan MOCK_RULES = [ { @@ -19,9 +18,9 @@ }, { "id": "mock-todo", - "pattern": re.compile(r"TODO: fix issue"), + "pattern": re.compile(r"TODO: fix auth"), "severity": "HIGH", - "message": "Found issue todo", + "message": "Found auth todo", "extensions": None, }, { @@ -83,7 +82,7 @@ def test_scan_file_with_findings(tmp_path): @patch("scanner.cli.vibesec.SCAN_RULES", MOCK_RULES) def test_scan_file_with_multiple_findings(tmp_path): test_file = tmp_path / "unsafe_multiple.js" - test_file.write_text("const key = MOCK_SECRET_KEY;\n// TODO: fix issue here\n") + test_file.write_text("const key = MOCK_SECRET_KEY;\n// TODO: fix auth checks here\n") findings = _scan_file(test_file, tmp_path) rule_ids = [f["rule_id"] for f in findings] @@ -214,43 +213,6 @@ def test_collect_files_handles_cyclic_symlink(tmp_path): assert collected_rel_paths == {"a/a.py", "b/b.py"} -def test_collect_files_handles_oserror_in_scandir(tmp_path): - (tmp_path / "a.py").touch() - with patch("os.scandir", side_effect=PermissionError): - assert list(_collect_files(tmp_path)) == [] - - -def test_collect_files_handles_oserror_in_entry(tmp_path): - (tmp_path / "a.py").touch() - (tmp_path / "b.py").touch() - - original_scandir = os.scandir - - def mock_scandir(path): - iterator = original_scandir(path) - class MockIterator: - def __enter__(self): - return self - def __exit__(self, *args): - iterator.close() - def __iter__(self): - return self - def __next__(self): - entry = next(iterator) - if entry.name == "a.py": - class MockEntry: - name = entry.name - path = entry.path - def is_symlink(self): - raise PermissionError("Access denied") - return MockEntry() - return entry - return MockIterator() - - with patch("os.scandir", side_effect=mock_scandir): - collected_rel_paths = {f.relative_to(tmp_path).as_posix() for f in _collect_files(tmp_path)} - assert collected_rel_paths == {"b.py"} - @patch("scanner.cli.vibesec.SCAN_RULES", MOCK_RULES) def test_scan_file_skips_symlink(tmp_path): target = tmp_path / "target.py" @@ -482,65 +444,3 @@ def mock_lstat(path): with patch("scanner.cli.vibesec.os.lstat", side_effect=mock_lstat) as mock3: assert _scan_file(test_file, tmp_path) == [] mock3.assert_called_once() - - -# --------------------------------------------------------------------------- -# cmd_review tests -# --------------------------------------------------------------------------- - -def test_cmd_review_base_prompt(capsys): - args = Namespace(stack=None, db=None, payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_BASE in captured.out - assert REVIEW_PROMPT_FOOTER in captured.out - assert REVIEW_PROMPT_NEXTJS not in captured.out - assert REVIEW_PROMPT_SUPABASE not in captured.out - assert REVIEW_PROMPT_FIREBASE not in captured.out - assert REVIEW_PROMPT_STRIPE not in captured.out - -def test_cmd_review_nextjs(capsys): - args = Namespace(stack=["nextjs"], db=None, payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_NEXTJS in captured.out - -def test_cmd_review_supabase(capsys): - args = Namespace(stack=None, db="supabase", payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_SUPABASE in captured.out - -def test_cmd_review_supabase_via_stack(capsys): - args = Namespace(stack=["supabase"], db=None, payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_SUPABASE in captured.out - -def test_cmd_review_firebase(capsys): - args = Namespace(stack=None, db="firebase", payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_FIREBASE in captured.out - -def test_cmd_review_firebase_via_stack(capsys): - args = Namespace(stack=["firebase"], db=None, payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_FIREBASE in captured.out - -def test_cmd_review_stripe(capsys): - args = Namespace(stack=None, db=None, payments="stripe") - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_STRIPE in captured.out - -def test_cmd_review_all_options(capsys): - args = Namespace(stack=["nextjs"], db="supabase", payments="stripe") - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_BASE in captured.out - assert REVIEW_PROMPT_NEXTJS in captured.out - assert REVIEW_PROMPT_SUPABASE in captured.out - assert REVIEW_PROMPT_STRIPE in captured.out - assert REVIEW_PROMPT_FOOTER in captured.out From 0c9c7e2d6bf750fe41fc99e79277b23adbd3571f Mon Sep 17 00:00:00 2001 From: seonghobae <8172694+seonghobae@users.noreply.github.com> Date: Tue, 16 Jun 2026 06:34:36 +0000 Subject: [PATCH 04/14] =?UTF-8?q?=F0=9F=A7=AA=20Add=20tests=20for=20scanne?= =?UTF-8?q?r=20stat=20errors=20and=20large=20files?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From cb0111b6472e140f8737d5c6a8eaf2661e418427 Mon Sep 17 00:00:00 2001 From: seonghobae <8172694+seonghobae@users.noreply.github.com> Date: Tue, 16 Jun 2026 06:43:02 +0000 Subject: [PATCH 05/14] =?UTF-8?q?=F0=9F=A7=AA=20Add=20tests=20for=20scanne?= =?UTF-8?q?r=20stat=20errors=20and=20large=20files?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_vibesec.py | 117 +----------------------------------------- 1 file changed, 1 insertion(+), 116 deletions(-) diff --git a/tests/test_vibesec.py b/tests/test_vibesec.py index 8548165..c9e4df2 100644 --- a/tests/test_vibesec.py +++ b/tests/test_vibesec.py @@ -1,13 +1,12 @@ import re import os import tempfile -from argparse import Namespace from pathlib import Path from unittest.mock import patch import pytest -from scanner.cli.vibesec import _collect_files, _print_scan_results, _scan_file, cmd_init, cmd_scan, cmd_review, REVIEW_PROMPT_BASE, REVIEW_PROMPT_NEXTJS, REVIEW_PROMPT_SUPABASE, REVIEW_PROMPT_FIREBASE, REVIEW_PROMPT_STRIPE, REVIEW_PROMPT_FOOTER +from scanner.cli.vibesec import _collect_files, _print_scan_results, _scan_file, cmd_init, cmd_scan MOCK_RULES = [ { @@ -431,120 +430,6 @@ def test_scan_file_stat_error(tmp_path): assert _scan_file(test_file, tmp_path) == [] mock2.assert_called_once() -def test_collect_files_oserror_on_scandir(tmp_path): - (tmp_path / "dir1").mkdir() - (tmp_path / "dir1" / "file1.py").touch() - (tmp_path / "file2.py").touch() - - original_scandir = os.scandir - def mock_scandir(path): - if Path(path).name == "dir1": - raise PermissionError("Access denied") - return original_scandir(path) - - with patch("os.scandir", side_effect=mock_scandir): - files = list(_collect_files(tmp_path)) - assert len(files) == 1 - assert files[0].name == "file2.py" - -def test_collect_files_oserror_on_entry(tmp_path): - (tmp_path / "file1.py").touch() - (tmp_path / "file2.py").touch() - - original_scandir = os.scandir - def mock_scandir(path): - class MockEntry: - def __init__(self, entry): - self._entry = entry - self.name = entry.name - self.path = entry.path - def is_symlink(self): - return self._entry.is_symlink() - def is_dir(self, follow_symlinks=False): - if self.name == "file1.py": - raise PermissionError("Access denied") - return self._entry.is_dir(follow_symlinks=follow_symlinks) - def is_file(self, follow_symlinks=False): - return self._entry.is_file(follow_symlinks=follow_symlinks) - - class MockIterator: - def __init__(self, it): - self.it = it - def __enter__(self): - return self - def __exit__(self, *args): - self.it.close() - def __iter__(self): - for entry in self.it: - yield MockEntry(entry) - - return MockIterator(original_scandir(path)) - - with patch("os.scandir", side_effect=mock_scandir): - files = list(_collect_files(tmp_path)) - assert len(files) == 1 - assert files[0].name == "file2.py" -# --------------------------------------------------------------------------- -# cmd_review tests -# --------------------------------------------------------------------------- - -def test_cmd_review_base_prompt(capsys): - args = Namespace(stack=None, db=None, payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_BASE in captured.out - assert REVIEW_PROMPT_FOOTER in captured.out - assert REVIEW_PROMPT_NEXTJS not in captured.out - assert REVIEW_PROMPT_SUPABASE not in captured.out - assert REVIEW_PROMPT_FIREBASE not in captured.out - assert REVIEW_PROMPT_STRIPE not in captured.out - -def test_cmd_review_nextjs(capsys): - args = Namespace(stack=["nextjs"], db=None, payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_NEXTJS in captured.out - -def test_cmd_review_supabase(capsys): - args = Namespace(stack=None, db="supabase", payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_SUPABASE in captured.out - -def test_cmd_review_supabase_via_stack(capsys): - args = Namespace(stack=["supabase"], db=None, payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_SUPABASE in captured.out - -def test_cmd_review_firebase(capsys): - args = Namespace(stack=None, db="firebase", payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_FIREBASE in captured.out - -def test_cmd_review_firebase_via_stack(capsys): - args = Namespace(stack=["firebase"], db=None, payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_FIREBASE in captured.out - -def test_cmd_review_stripe(capsys): - args = Namespace(stack=None, db=None, payments="stripe") - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_STRIPE in captured.out - -def test_cmd_review_all_options(capsys): - args = Namespace(stack=["nextjs"], db="supabase", payments="stripe") - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_BASE in captured.out - assert REVIEW_PROMPT_NEXTJS in captured.out - assert REVIEW_PROMPT_SUPABASE in captured.out - assert REVIEW_PROMPT_STRIPE in captured.out - assert REVIEW_PROMPT_FOOTER in captured.out - def test_scan_file_large_file(tmp_path): test_file = tmp_path / "large_file.ts" test_file.write_text("const key = 'x';\n") From de23306c482ff161b5f1e6e1f58c0ea94720b34c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 16 Jun 2026 06:43:53 +0000 Subject: [PATCH 06/14] Restore PR scope to scanner tests --- .jules/bolt.md | 17 ++ scanner/cli/vibesec.py | 160 +++++++++-------- .../ci/opencode_review_normalize_output.py | 100 +++++++---- scripts/ci/pr_review_merge_scheduler.py | 26 +-- tests/scripts/__init__.py | 0 tests/scripts/ci/__init__.py | 0 .../test_opencode_review_normalize_output.py | 166 ++++++++++++++++++ .../ci/test_pr_review_merge_scheduler.py | 80 +++++++++ .../test_opencode_review_normalize_output.py | 19 ++ tests/test_pr_review_merge_scheduler.py | 24 +++ tests/test_vibesec.py | 82 +++++++-- 11 files changed, 548 insertions(+), 126 deletions(-) create mode 100644 tests/scripts/__init__.py create mode 100644 tests/scripts/ci/__init__.py create mode 100644 tests/scripts/ci/test_opencode_review_normalize_output.py create mode 100644 tests/scripts/ci/test_pr_review_merge_scheduler.py create mode 100644 tests/test_opencode_review_normalize_output.py create mode 100644 tests/test_pr_review_merge_scheduler.py diff --git a/.jules/bolt.md b/.jules/bolt.md index 3708540..8cf0535 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -13,3 +13,20 @@ ## 2026-06-14 - Deferring Pathlib Operations in Hot Paths **Learning:** In highly repetitive loops like file scanners (e.g., iterating through thousands of safe files), preemptively calculating `Path.relative_to()` and sanitizing strings adds significant cumulative overhead. Pathlib operations internally parse paths, check parts, and construct new objects, which is extremely expensive when executed on a per-file basis unconditionally. **Action:** Always defer expensive path computations (like converting paths to relative or string sanitization) until *after* the fast-path condition (like a regex match) triggers. This drastically cuts down on unnecessary string operations for clean files. +## 2025-03-09 - O(N^2) JSON parsing due to string slicing +**Learning:** Extracting JSON objects from a large string by iterating with `for index, char in enumerate(text)` and doing `decoder.raw_decode(text[index:])` results in O(N^2) complexity because of string slicing operations and overlapping extraction attempts on failure. +**Action:** Use a `while` loop combined with `text.find('{', index)` to find the next object, and `decoder.raw_decode(text, index)` to decode it directly without slicing. Then, advance `index` to the returned `end` position. + +## 2024-05-18 - Set literal vs Tuple membership check + +**Learning:** In Python, using set literals for constant membership checks (e.g., `in {'CRITICAL', 'HIGH'}`) inside loops or comprehensions is highly efficient because CPython optimizes them into `frozenset` constants at compile time, eliminating runtime instantiation overhead. Using `tuple` for these checks performs an `O(n)` linear search, while a `frozenset` performs an `O(1)` hash lookup. + +**Action:** Prefer set literals `in {"A", "B"}` over tuples `in ("A", "B")` when performing membership checks against constant items, especially in hot paths or tight loops. + +## 2024-06-16 - Parallelize Subprocess CLI Calls +**Learning:** Sequential, synchronous execution of `subprocess.run` (like calling the GitHub CLI) across multiple items (like PRs) is a significant I/O bottleneck. +**Action:** Use `concurrent.futures.ThreadPoolExecutor` with `functools.partial` and `executor.map` to safely parallelize I/O-bound subprocess executions, significantly reducing overall script runtime. + +## 2024-05-16 - Module-level Constants for Performance +**Learning:** Recreating static dictionaries (like severity mappings and icons) inside frequently called functions causes unnecessary memory allocations and slight performance overhead on every call. +**Action:** Extract static dictionaries to module-level constants to ensure they are instantiated only once when the module is loaded. diff --git a/scanner/cli/vibesec.py b/scanner/cli/vibesec.py index 5fb6e4e..eddad2e 100644 --- a/scanner/cli/vibesec.py +++ b/scanner/cli/vibesec.py @@ -49,6 +49,7 @@ - File uploads must validate type, size, and filename server-side. - Never set CORS to allow all origins on authenticated endpoints. - Add tests for cross-user access denial on every resource endpoint. +- When dealing with OWASP rules or security vulnerabilities, automatically run `vibesec scan .` as a hook or use tools like `semgrep` to verify the codebase. See https://github.com/Seongho-Bae/VibeSec for full rules and checklists. """ @@ -66,6 +67,7 @@ 6. **Supabase**: Use getUser() (not getSession()) server-side; RLS on all tables. 7. **Files**: Validate type, size, and generate server-side filenames for uploads. 8. **CORS**: Restrict to known origins on authenticated endpoints. +9. **Automated Scanning**: When dealing with OWASP rules or security vulnerabilities, automatically run `vibesec scan .` as a hook or use tools like `semgrep` to verify the codebase. Return 401 for unauthenticated requests, 403 for ownership violations. @@ -294,6 +296,54 @@ # Command implementations # --------------------------------------------------------------------------- + +def _install_tool_rules(config: dict, project_root, installed: list): + """Install the specific rules file based on tool configuration.""" + if config.get("shared_only"): + return + + target_file = project_root / config['path'] + + # SECURITY: Prevent Arbitrary File Write via symlink path traversal + if not target_file.resolve().is_relative_to(project_root): + print(f"Error: Target path {target_file} escapes the project root. Aborting.", file=sys.stderr) + sys.exit(1) + + target_file.parent.mkdir(parents=True, exist_ok=True) + if target_file.is_symlink(): + target_file.unlink() + + if "append_marker" in config: + if target_file.exists(): + existing = target_file.read_text() + if config['append_marker'] not in existing: + target_file.write_text(existing + "\n\n" + config["content"]) + installed.append(f"{config['path']} (appended)") + else: + print(f"{config['path']} already contains {config['append_marker']} rules — skipping.") + else: + target_file.write_text(config["content"]) + installed.append(str(config['path'])) + else: + target_file.write_text(config["content"]) + installed.append(str(config['path'])) + + +def _install_checklist(project_root, installed: list): + """Install the VIBESEC_CHECKLIST.md file.""" + checklist_file = project_root / "VIBESEC_CHECKLIST.md" + + # SECURITY: Prevent Arbitrary File Write via symlink path traversal + if not checklist_file.resolve().is_relative_to(project_root): + print(f"Error: Checklist path {checklist_file} escapes the project root. Aborting.", file=sys.stderr) + sys.exit(1) + + if checklist_file.is_symlink(): + checklist_file.unlink() + if not checklist_file.exists(): + checklist_file.write_text(CHECKLIST_TEMPLATE) + installed.append("VIBESEC_CHECKLIST.md") + def cmd_init(args): """Install security rules into the project.""" tool = getattr(args, "tool", "cursor") or "cursor" @@ -327,46 +377,8 @@ def cmd_init(args): sys.exit(1) config = tool_configs[tool] - if not config.get("shared_only"): - target_file = project_root / config["path"] - - # SECURITY: Prevent Arbitrary File Write via symlink path traversal - if not target_file.resolve().is_relative_to(project_root): - print(f"Error: Target path {target_file} escapes the project root. Aborting.", file=sys.stderr) - sys.exit(1) - - target_file.parent.mkdir(parents=True, exist_ok=True) - if target_file.is_symlink(): - target_file.unlink() - - if "append_marker" in config: - if target_file.exists(): - existing = target_file.read_text() - if config["append_marker"] not in existing: - target_file.write_text(existing + "\n\n" + config["content"]) - installed.append(f"{config['path']} (appended)") - else: - print(f"{config['path']} already contains {config['append_marker']} rules — skipping.") - else: - target_file.write_text(config["content"]) - installed.append(str(config["path"])) - else: - target_file.write_text(config["content"]) - installed.append(str(config["path"])) - # Always create the checklist - checklist_file = project_root / "VIBESEC_CHECKLIST.md" - - # SECURITY: Prevent Arbitrary File Write via symlink path traversal - if not checklist_file.resolve().is_relative_to(project_root): - print(f"Error: Checklist path {checklist_file} escapes the project root. Aborting.", file=sys.stderr) - sys.exit(1) - - if checklist_file.is_symlink(): - checklist_file.unlink() - if not checklist_file.exists(): - checklist_file.write_text(CHECKLIST_TEMPLATE) - installed.append("VIBESEC_CHECKLIST.md") - + _install_tool_rules(config, project_root, installed) + _install_checklist(project_root, installed) if stack and "supabase" in stack: _print_supabase_reminder() @@ -419,7 +431,7 @@ def cmd_scan(args): findings.extend(file_findings) _print_scan_results(findings, files_scanned) - return 1 if any(f["severity"] in ("CRITICAL", "HIGH") for f in findings) else 0 + return 1 if any(f["severity"] in {"CRITICAL", "HIGH"} for f in findings) else 0 def cmd_hook(args): @@ -493,6 +505,28 @@ def _get_applicable_rules(ext: str): return _RULES_CACHE[ext] +def _process_dir_entries(dir_path: str): + """Process entries in a directory, yielding files and returning subdirectories.""" + dirs = [] + try: + with os.scandir(dir_path) as it: + for entry in it: + try: + if entry.is_symlink(): + continue + if entry.is_dir(follow_symlinks=False): + if entry.name not in SKIP_DIRS and not entry.name.startswith("."): + dirs.append(entry.path) + elif entry.is_file(follow_symlinks=False): + _, ext = os.path.splitext(entry.name) + if ext.lower() not in SKIP_EXTENSIONS: + yield Path(entry.path) + except (OSError, PermissionError): + continue + except (OSError, PermissionError): + pass + return dirs + def _collect_files(base_path: Path): """Collect all scannable files, skipping unwanted directories.""" # ⚡ Bolt: Optimize file traversal using os.scandir and os.path.splitext @@ -502,25 +536,8 @@ def _collect_files(base_path: Path): stack = [str(base_path)] while stack: current_dir = stack.pop() - try: - with os.scandir(current_dir) as it: - dirs = [] - for entry in it: - try: - if entry.is_symlink(): - continue - if entry.is_dir(follow_symlinks=False): - if entry.name not in SKIP_DIRS and not entry.name.startswith("."): - dirs.append(entry.path) - elif entry.is_file(follow_symlinks=False): - _, ext = os.path.splitext(entry.name) - if ext.lower() not in SKIP_EXTENSIONS: - yield Path(entry.path) - except (OSError, PermissionError): - continue - stack.extend(reversed(dirs)) - except (OSError, PermissionError): - pass + dirs = yield from _process_dir_entries(current_dir) + stack.extend(reversed(dirs)) def _sanitize_terminal_output(text: str) -> str: @@ -585,21 +602,24 @@ def _scan_file(file_path: Path, base_path: Path): return findings + +# ⚡ Bolt: Move severity mappings to module level to avoid redundant +# dictionary allocations on every call to print scan results. +SEVERITY_ORDER = {"CRITICAL": 0, "HIGH": 1, "WARNING": 2, "INFO": 3} +SEVERITY_ICONS = { + "CRITICAL": "🔴 CRITICAL", + "HIGH": "🟠 HIGH", + "WARNING": "🟡 WARNING", + "INFO": "🔵 INFO", +} + def _print_scan_results(findings, files_scanned): - severity_order = {"CRITICAL": 0, "HIGH": 1, "WARNING": 2, "INFO": 3} - findings.sort(key=lambda f: severity_order.get(f["severity"], 99)) - - severity_icons = { - "CRITICAL": "🔴 CRITICAL", - "HIGH": "🟠 HIGH", - "WARNING": "🟡 WARNING", - "INFO": "🔵 INFO", - } + findings.sort(key=lambda f: SEVERITY_ORDER.get(f["severity"], 99)) counts = {"CRITICAL": 0, "HIGH": 0, "WARNING": 0, "INFO": 0} for f in findings: counts[f["severity"]] += 1 - icon = severity_icons.get(f["severity"], f["severity"]) + icon = SEVERITY_ICONS.get(f["severity"], f["severity"]) print(f"[{icon}] {f['file']}:{f['line']}") print(f" Rule: {f['rule_id']}") print(f" {f['message']}") diff --git a/scripts/ci/opencode_review_normalize_output.py b/scripts/ci/opencode_review_normalize_output.py index 2a850c6..2de45dc 100755 --- a/scripts/ci/opencode_review_normalize_output.py +++ b/scripts/ci/opencode_review_normalize_output.py @@ -1,47 +1,47 @@ #!/usr/bin/env python3 """Normalize OpenCode review output into the strict approval-gate contract.""" -from __future__ import annotations - import json import sys from pathlib import Path from typing import Any -def valid_control( - value: Any, - *, +def _validate_metadata( + value: dict[str, Any], expected_head_sha: str, expected_run_id: str, expected_run_attempt: str, -) -> dict[str, Any] | None: - if not isinstance(value, dict): - return None - +) -> bool: if value.get("head_sha") != expected_head_sha: - return None + return False if value.get("run_id") != expected_run_id: - return None + return False if value.get("run_attempt") != expected_run_attempt: - return None + return False + return True + +def _validate_result_and_reason(value: dict[str, Any]) -> bool: result = value.get("result") if result not in {"APPROVE", "REQUEST_CHANGES"}: - return None - + return False if not isinstance(value.get("reason"), str) or not value["reason"].strip(): - return None + return False if not isinstance(value.get("summary"), str) or not value["summary"].strip(): - return None + return False + return True + +def _validate_findings(value: dict[str, Any]) -> bool: + result = value.get("result") findings = value.get("findings") if not isinstance(findings, list): - return None + return False if result == "APPROVE" and findings: - return None + return False if result == "REQUEST_CHANGES" and not findings: - return None + return False required_finding_fields = ( "path", @@ -55,21 +55,47 @@ def valid_control( ) for finding in findings: if not isinstance(finding, dict): - return None + return False if not isinstance(finding.get("line"), int) or finding["line"] <= 0: - return None + return False for field in required_finding_fields: if not isinstance(finding.get(field), str) or not finding[field].strip(): - return None + return False + return True + + +def valid_control( + value: Any, + *, + expected_head_sha: str, + expected_run_id: str, + expected_run_attempt: str, +) -> dict[str, Any] | None: + if not isinstance(value, dict): + return None + + if not _validate_metadata( + value, + expected_head_sha, + expected_run_id, + expected_run_attempt, + ): + return None + + if not _validate_result_and_reason(value): + return None + + if not _validate_findings(value): + return None return { "head_sha": value["head_sha"], "run_id": value["run_id"], "run_attempt": value["run_attempt"], - "result": result, + "result": value["result"], "reason": value["reason"], "summary": value["summary"], - "findings": findings, + "findings": value["findings"], } @@ -83,14 +109,22 @@ def iter_json_objects(text: str) -> list[Any]: # OpenCode exports may contain prose around the JSON control object. pass - for index, character in enumerate(text): - if character != "{": - continue + # Optimization: Use a while loop with text.find() and decoder.raw_decode(text, index) + # to avoid O(N^2) behavior from redundant string slicing (text[index:]) and overlapping extractions. + index = 0 + length = len(text) + while index < length: + next_brace = text.find("{", index) + if next_brace == -1: + break + index = next_brace + try: - value, _ = decoder.raw_decode(text[index:]) + value, end = decoder.raw_decode(text, index) + values.append(value) + index = end except json.JSONDecodeError: - continue - values.append(value) + index += 1 return values @@ -106,6 +140,12 @@ def main(argv: list[str]) -> int: expected_head_sha, expected_run_id, expected_run_attempt, output_file_arg = argv[1:] output_file = Path(output_file_arg) + project_root = Path.cwd().resolve() + + if not output_file.resolve().is_relative_to(project_root): + print(f"error: output file path {output_file_arg!r} is outside the project root", file=sys.stderr) + return 65 + try: output_text = output_file.read_text(encoding="utf-8") except OSError as exc: diff --git a/scripts/ci/pr_review_merge_scheduler.py b/scripts/ci/pr_review_merge_scheduler.py index a8fee70..cab2198 100644 --- a/scripts/ci/pr_review_merge_scheduler.py +++ b/scripts/ci/pr_review_merge_scheduler.py @@ -1,11 +1,12 @@ #!/usr/bin/env python3 -from __future__ import annotations import argparse import json import os import subprocess import sys +import concurrent.futures +from functools import partial from dataclasses import dataclass from typing import Any @@ -330,17 +331,18 @@ def main(argv: list[str]) -> int: if not args.repo: raise SystemExit("--repo is required") prs = fetch_open_prs(args.repo, args.max_prs) - decisions = [ - inspect_pr( - args.repo, - pr, - dry_run=args.dry_run, - trigger_reviews=args.trigger_reviews, - enable_auto_merge_flag=args.enable_auto_merge, - workflow=args.review_workflow, - ) - for pr in prs - ] + + inspect_func = partial( + inspect_pr, + args.repo, + dry_run=args.dry_run, + trigger_reviews=args.trigger_reviews, + enable_auto_merge_flag=args.enable_auto_merge, + workflow=args.review_workflow, + ) + with concurrent.futures.ThreadPoolExecutor() as executor: + decisions = list(executor.map(inspect_func, prs)) + print_summary(decisions, dry_run=args.dry_run) return 0 diff --git a/tests/scripts/__init__.py b/tests/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/scripts/ci/__init__.py b/tests/scripts/ci/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/scripts/ci/test_opencode_review_normalize_output.py b/tests/scripts/ci/test_opencode_review_normalize_output.py new file mode 100644 index 0000000..6926389 --- /dev/null +++ b/tests/scripts/ci/test_opencode_review_normalize_output.py @@ -0,0 +1,166 @@ +import pytest + +from scripts.ci.opencode_review_normalize_output import valid_control + +def test_valid_control_approve(): + value = { + "head_sha": "sha123", + "run_id": "id123", + "run_attempt": "1", + "result": "APPROVE", + "reason": "Looks good", + "summary": "Approved", + "findings": [], + "extra_field": "should_be_ignored" + } + result = valid_control( + value, + expected_head_sha="sha123", + expected_run_id="id123", + expected_run_attempt="1" + ) + assert result == { + "head_sha": "sha123", + "run_id": "id123", + "run_attempt": "1", + "result": "APPROVE", + "reason": "Looks good", + "summary": "Approved", + "findings": [] + } + +def test_valid_control_request_changes(): + value = { + "head_sha": "sha123", + "run_id": "id123", + "run_attempt": "1", + "result": "REQUEST_CHANGES", + "reason": "Has issues", + "summary": "Needs work", + "findings": [ + { + "line": 42, + "path": "file.py", + "severity": "high", + "title": "Bug", + "problem": "Bad code", + "root_cause": "Typo", + "fix_direction": "Fix it", + "regression_test_direction": "Test it", + "suggested_diff": "- bad\n+ good", + "extra": "ignore" + } + ] + } + result = valid_control( + value, + expected_head_sha="sha123", + expected_run_id="id123", + expected_run_attempt="1" + ) + assert result is not None + assert result["findings"] == value["findings"] + +def test_valid_control_invalid_type(): + assert valid_control("not a dict", expected_head_sha="s", expected_run_id="i", expected_run_attempt="1") is None + +def test_valid_control_mismatched_metadata(): + value = { + "head_sha": "sha123", + "run_id": "id123", + "run_attempt": "1", + "result": "APPROVE", + "reason": "r", + "summary": "s", + "findings": [] + } + + assert valid_control(value, expected_head_sha="wrong", expected_run_id="id123", expected_run_attempt="1") is None + assert valid_control(value, expected_head_sha="sha123", expected_run_id="wrong", expected_run_attempt="1") is None + assert valid_control(value, expected_head_sha="sha123", expected_run_id="id123", expected_run_attempt="wrong") is None + +def test_valid_control_invalid_result(): + value = { + "head_sha": "sha", + "run_id": "id", + "run_attempt": "1", + "result": "INVALID", + "reason": "r", + "summary": "s", + "findings": [] + } + assert valid_control(value, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + +def test_valid_control_invalid_reason_summary(): + base = { + "head_sha": "sha", "run_id": "id", "run_attempt": "1", + "result": "APPROVE", "findings": [] + } + + # Missing reason + val = dict(base, summary="s") + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + + # Empty reason + val = dict(base, reason=" ", summary="s") + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + + # Missing summary + val = dict(base, reason="r") + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + + # Empty summary + val = dict(base, reason="r", summary="") + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + +def test_valid_control_findings_logic(): + base = { + "head_sha": "sha", "run_id": "id", "run_attempt": "1", + "reason": "r", "summary": "s" + } + + # findings not a list + val = dict(base, result="APPROVE", findings="not a list") + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + + # APPROVE with findings + val = dict(base, result="APPROVE", findings=[{}]) + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + + # REQUEST_CHANGES without findings + val = dict(base, result="REQUEST_CHANGES", findings=[]) + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + +def test_valid_control_invalid_findings(): + base = { + "head_sha": "sha", "run_id": "id", "run_attempt": "1", + "result": "REQUEST_CHANGES", "reason": "r", "summary": "s" + } + valid_finding = { + "line": 1, "path": "p", "severity": "s", "title": "t", + "problem": "p", "root_cause": "r", "fix_direction": "f", + "regression_test_direction": "r", "suggested_diff": "s" + } + + # Finding not a dict + val = dict(base, findings=["not dict"]) + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + + # Invalid line + val = dict(base, findings=[dict(valid_finding, line=0)]) + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + val = dict(base, findings=[dict(valid_finding, line="1")]) + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + + # Missing required field + for field in ["path", "severity", "title", "problem", "root_cause", "fix_direction", "regression_test_direction", "suggested_diff"]: + finding = dict(valid_finding) + del finding[field] + val = dict(base, findings=[finding]) + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None + + # Empty field + finding = dict(valid_finding) + finding[field] = " " + val = dict(base, findings=[finding]) + assert valid_control(val, expected_head_sha="sha", expected_run_id="id", expected_run_attempt="1") is None diff --git a/tests/scripts/ci/test_pr_review_merge_scheduler.py b/tests/scripts/ci/test_pr_review_merge_scheduler.py new file mode 100644 index 0000000..6bbbbd6 --- /dev/null +++ b/tests/scripts/ci/test_pr_review_merge_scheduler.py @@ -0,0 +1,80 @@ +import pytest + +from scripts.ci.pr_review_merge_scheduler import is_opencode_context + +def test_is_opencode_context_checkrun_name(): + node = { + "__typename": "CheckRun", + "name": "opencode-review", + } + assert is_opencode_context(node) is True + +def test_is_opencode_context_checkrun_workflow_name(): + node = { + "__typename": "CheckRun", + "name": "other-check", + "checkSuite": { + "workflowRun": { + "workflow": { + "name": "OpenCode Review" + } + } + } + } + assert is_opencode_context(node) is True + +def test_is_opencode_context_checkrun_false(): + node = { + "__typename": "CheckRun", + "name": "other-check", + "checkSuite": { + "workflowRun": { + "workflow": { + "name": "Other Workflow" + } + } + } + } + assert is_opencode_context(node) is False + +def test_is_opencode_context_checkrun_missing_fields(): + node = { + "__typename": "CheckRun", + "name": "other-check", + "checkSuite": {} + } + assert is_opencode_context(node) is False + + node2 = { + "__typename": "CheckRun", + "name": "other-check", + # missing checkSuite entirely + } + assert is_opencode_context(node2) is False + +def test_is_opencode_context_statuscontext_match(): + node = { + "__typename": "StatusContext", + "context": "opencode-review", + } + assert is_opencode_context(node) is True + +def test_is_opencode_context_statuscontext_mismatch(): + node = { + "__typename": "StatusContext", + "context": "other-review", + } + assert is_opencode_context(node) is False + +def test_is_opencode_context_statuscontext_missing(): + node = { + "__typename": "StatusContext", + # missing context + } + assert is_opencode_context(node) is False + +def test_is_opencode_context_missing_typename(): + node = { + "context": "opencode-review", + } + assert is_opencode_context(node) is True diff --git a/tests/test_opencode_review_normalize_output.py b/tests/test_opencode_review_normalize_output.py new file mode 100644 index 0000000..6c4cc83 --- /dev/null +++ b/tests/test_opencode_review_normalize_output.py @@ -0,0 +1,19 @@ +import json +from unittest.mock import patch + +from scripts.ci.opencode_review_normalize_output import iter_json_objects + + +def test_iter_json_objects_decode_error(): + """Test that iter_json_objects handles JSONDecodeError when decoding.""" + text = "prefix { valid looking json } suffix" + + # We mock raw_decode to raise JSONDecodeError to hit the except block explicitly + # This fulfills the 'Requires mocking the operation that throws the exception' rationale. + with patch("json.JSONDecoder.raw_decode") as mock_raw_decode: + mock_raw_decode.side_effect = json.JSONDecodeError("Mocked error", text, 0) + + result = iter_json_objects(text) + + assert result == [] + assert mock_raw_decode.called diff --git a/tests/test_pr_review_merge_scheduler.py b/tests/test_pr_review_merge_scheduler.py new file mode 100644 index 0000000..3a16137 --- /dev/null +++ b/tests/test_pr_review_merge_scheduler.py @@ -0,0 +1,24 @@ +import sys +from pathlib import Path +import pytest + +sys.path.insert(0, str(Path(__file__).parent.parent / "scripts" / "ci")) +import pr_review_merge_scheduler + +def test_split_repo_success(): + assert pr_review_merge_scheduler.split_repo("owner/repo") == ("owner", "repo") + +def test_split_repo_success_multiple_slashes(): + assert pr_review_merge_scheduler.split_repo("owner/repo/extra") == ("owner", "repo/extra") + +def test_split_repo_invalid(): + with pytest.raises(ValueError, match="repo must be owner/name, got 'invalid'"): + pr_review_merge_scheduler.split_repo("invalid") + +def test_split_repo_empty_owner(): + with pytest.raises(ValueError, match="repo must be owner/name, got '/repo'"): + pr_review_merge_scheduler.split_repo("/repo") + +def test_split_repo_empty_repo(): + with pytest.raises(ValueError, match="repo must be owner/name, got 'owner/'"): + pr_review_merge_scheduler.split_repo("owner/") diff --git a/tests/test_vibesec.py b/tests/test_vibesec.py index 8548165..b772687 100644 --- a/tests/test_vibesec.py +++ b/tests/test_vibesec.py @@ -1,5 +1,5 @@ -import re import os +import re import tempfile from argparse import Namespace from pathlib import Path @@ -19,9 +19,9 @@ }, { "id": "mock-todo", - "pattern": re.compile(r"TODO: fix auth"), + "pattern": re.compile(r"TODO: fix issue"), "severity": "HIGH", - "message": "Found auth todo", + "message": "Found issue todo", "extensions": None, }, { @@ -83,7 +83,7 @@ def test_scan_file_with_findings(tmp_path): @patch("scanner.cli.vibesec.SCAN_RULES", MOCK_RULES) def test_scan_file_with_multiple_findings(tmp_path): test_file = tmp_path / "unsafe_multiple.js" - test_file.write_text("const key = MOCK_SECRET_KEY;\n// TODO: fix auth checks here\n") + test_file.write_text("const key = MOCK_SECRET_KEY;\n// TODO: fix issue here\n") findings = _scan_file(test_file, tmp_path) rule_ids = [f["rule_id"] for f in findings] @@ -214,6 +214,45 @@ def test_collect_files_handles_cyclic_symlink(tmp_path): assert collected_rel_paths == {"a/a.py", "b/b.py"} +def test_collect_files_handles_oserror_in_scandir(tmp_path): + (tmp_path / "a.py").touch() + with patch("os.scandir", side_effect=PermissionError): + assert list(_collect_files(tmp_path)) == [] + + +def test_collect_files_handles_oserror_in_entry(tmp_path): + (tmp_path / "a.py").touch() + (tmp_path / "b.py").touch() + + original_scandir = os.scandir + + def mock_scandir(path): + iterator = original_scandir(path) + class MockIterator: + def __enter__(self): + return self + def __exit__(self, *args): + iterator.close() + def __iter__(self): + return self + def __next__(self): + entry = next(iterator) + if entry.name == "a.py": + class MockEntry: + name = entry.name + path = entry.path + def is_symlink(self): + raise PermissionError("Access denied") + return MockEntry() + return entry + return MockIterator() + + with patch("os.scandir", side_effect=mock_scandir): + collected_rel_paths = {f.relative_to(tmp_path).as_posix() for f in _collect_files(tmp_path)} + assert collected_rel_paths == {"b.py"} + + + @patch("scanner.cli.vibesec.SCAN_RULES", MOCK_RULES) def test_scan_file_skips_symlink(tmp_path): target = tmp_path / "target.py" @@ -419,17 +458,19 @@ def test_sanitize_terminal_output(): # Test non-strings assert _sanitize_terminal_output(None) is None + def test_scan_file_stat_error(tmp_path): test_file = tmp_path / "stat_error.ts" test_file.write_text("const key = 'x';\n") - with patch("scanner.cli.vibesec.os.lstat", side_effect=PermissionError("Permission denied")) as mock1: + with patch("scanner.cli.vibesec.os.lstat", side_effect=PermissionError("Permission denied")) as mock_permission: assert _scan_file(test_file, tmp_path) == [] - mock1.assert_called_once() + mock_permission.assert_called_once() - with patch("scanner.cli.vibesec.os.lstat", side_effect=OSError("OS error")) as mock2: + with patch("scanner.cli.vibesec.os.lstat", side_effect=OSError("OS error")) as mock_oserror: assert _scan_file(test_file, tmp_path) == [] - mock2.assert_called_once() + mock_oserror.assert_called_once() + def test_collect_files_oserror_on_scandir(tmp_path): (tmp_path / "dir1").mkdir() @@ -545,17 +586,30 @@ def test_cmd_review_all_options(capsys): assert REVIEW_PROMPT_STRIPE in captured.out assert REVIEW_PROMPT_FOOTER in captured.out + def test_scan_file_large_file(tmp_path): test_file = tmp_path / "large_file.ts" test_file.write_text("const key = 'x';\n") - # Mock os.lstat to return a stat object with a large size original_lstat = os.lstat + def mock_lstat(path): st = original_lstat(path) - # Create a new stat_result-like object by replacing st_size - return os.stat_result((st.st_mode, st.st_ino, st.st_dev, st.st_nlink, st.st_uid, st.st_gid, 10 * 1024 * 1024 + 1, st.st_atime, st.st_mtime, st.st_ctime)) - - with patch("scanner.cli.vibesec.os.lstat", side_effect=mock_lstat) as mock3: + return os.stat_result( + ( + st.st_mode, + st.st_ino, + st.st_dev, + st.st_nlink, + st.st_uid, + st.st_gid, + 10 * 1024 * 1024 + 1, + st.st_atime, + st.st_mtime, + st.st_ctime, + ) + ) + + with patch("scanner.cli.vibesec.os.lstat", side_effect=mock_lstat) as mock_large: assert _scan_file(test_file, tmp_path) == [] - mock3.assert_called_once() + mock_large.assert_called_once() From 092cc44fda09435efdc1d941e2817e8ceb7cb4a3 Mon Sep 17 00:00:00 2001 From: seonghobae <8172694+seonghobae@users.noreply.github.com> Date: Tue, 16 Jun 2026 06:44:22 +0000 Subject: [PATCH 07/14] =?UTF-8?q?=F0=9F=A7=AA=20Add=20tests=20for=20scanne?= =?UTF-8?q?r=20stat=20errors=20and=20large=20files?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From c04a495dacf78df589633ee148d5307229006ea3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 16 Jun 2026 06:44:47 +0000 Subject: [PATCH 08/14] Polish scanner test names --- tests/test_vibesec.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_vibesec.py b/tests/test_vibesec.py index b772687..ac4d16c 100644 --- a/tests/test_vibesec.py +++ b/tests/test_vibesec.py @@ -214,13 +214,13 @@ def test_collect_files_handles_cyclic_symlink(tmp_path): assert collected_rel_paths == {"a/a.py", "b/b.py"} -def test_collect_files_handles_oserror_in_scandir(tmp_path): +def test_collect_files_handles_permission_error_in_scandir(tmp_path): (tmp_path / "a.py").touch() with patch("os.scandir", side_effect=PermissionError): assert list(_collect_files(tmp_path)) == [] -def test_collect_files_handles_oserror_in_entry(tmp_path): +def test_collect_files_handles_permission_error_in_entry(tmp_path): (tmp_path / "a.py").touch() (tmp_path / "b.py").touch() From 239b37b1be0126c1b9a203d61afbb284c09bfeb9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 16 Jun 2026 06:45:33 +0000 Subject: [PATCH 09/14] Tidy scanner test formatting --- tests/test_vibesec.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/test_vibesec.py b/tests/test_vibesec.py index ac4d16c..e4c65a0 100644 --- a/tests/test_vibesec.py +++ b/tests/test_vibesec.py @@ -251,8 +251,6 @@ def is_symlink(self): collected_rel_paths = {f.relative_to(tmp_path).as_posix() for f in _collect_files(tmp_path)} assert collected_rel_paths == {"b.py"} - - @patch("scanner.cli.vibesec.SCAN_RULES", MOCK_RULES) def test_scan_file_skips_symlink(tmp_path): target = tmp_path / "target.py" @@ -458,7 +456,6 @@ def test_sanitize_terminal_output(): # Test non-strings assert _sanitize_terminal_output(None) is None - def test_scan_file_stat_error(tmp_path): test_file = tmp_path / "stat_error.ts" test_file.write_text("const key = 'x';\n") @@ -471,7 +468,6 @@ def test_scan_file_stat_error(tmp_path): assert _scan_file(test_file, tmp_path) == [] mock_oserror.assert_called_once() - def test_collect_files_oserror_on_scandir(tmp_path): (tmp_path / "dir1").mkdir() (tmp_path / "dir1" / "file1.py").touch() @@ -586,7 +582,6 @@ def test_cmd_review_all_options(capsys): assert REVIEW_PROMPT_STRIPE in captured.out assert REVIEW_PROMPT_FOOTER in captured.out - def test_scan_file_large_file(tmp_path): test_file = tmp_path / "large_file.ts" test_file.write_text("const key = 'x';\n") From a843dd87b3c127a38b94929788efb08777edbe1d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 16 Jun 2026 06:46:17 +0000 Subject: [PATCH 10/14] Split scanner stat error tests --- tests/test_vibesec.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/test_vibesec.py b/tests/test_vibesec.py index e4c65a0..95e8b4b 100644 --- a/tests/test_vibesec.py +++ b/tests/test_vibesec.py @@ -456,14 +456,19 @@ def test_sanitize_terminal_output(): # Test non-strings assert _sanitize_terminal_output(None) is None -def test_scan_file_stat_error(tmp_path): - test_file = tmp_path / "stat_error.ts" +def test_scan_file_permission_error(tmp_path): + test_file = tmp_path / "permission_error.ts" test_file.write_text("const key = 'x';\n") with patch("scanner.cli.vibesec.os.lstat", side_effect=PermissionError("Permission denied")) as mock_permission: assert _scan_file(test_file, tmp_path) == [] mock_permission.assert_called_once() + +def test_scan_file_oserror(tmp_path): + test_file = tmp_path / "os_error.ts" + test_file.write_text("const key = 'x';\n") + with patch("scanner.cli.vibesec.os.lstat", side_effect=OSError("OS error")) as mock_oserror: assert _scan_file(test_file, tmp_path) == [] mock_oserror.assert_called_once() From c50a7f9283901ecbb52e56899325b75427b33665 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 16 Jun 2026 06:47:01 +0000 Subject: [PATCH 11/14] Shorten scanner test names --- tests/test_vibesec.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_vibesec.py b/tests/test_vibesec.py index 95e8b4b..e2f5069 100644 --- a/tests/test_vibesec.py +++ b/tests/test_vibesec.py @@ -214,13 +214,13 @@ def test_collect_files_handles_cyclic_symlink(tmp_path): assert collected_rel_paths == {"a/a.py", "b/b.py"} -def test_collect_files_handles_permission_error_in_scandir(tmp_path): +def test_collect_files_permission_error_scandir(tmp_path): (tmp_path / "a.py").touch() with patch("os.scandir", side_effect=PermissionError): assert list(_collect_files(tmp_path)) == [] -def test_collect_files_handles_permission_error_in_entry(tmp_path): +def test_collect_files_permission_error_entry(tmp_path): (tmp_path / "a.py").touch() (tmp_path / "b.py").touch() From 81bf8fde1792e5a975124ee18721f90da363368a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 16 Jun 2026 06:47:52 +0000 Subject: [PATCH 12/14] Align scanner test naming --- tests/test_vibesec.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_vibesec.py b/tests/test_vibesec.py index e2f5069..252f007 100644 --- a/tests/test_vibesec.py +++ b/tests/test_vibesec.py @@ -214,7 +214,7 @@ def test_collect_files_handles_cyclic_symlink(tmp_path): assert collected_rel_paths == {"a/a.py", "b/b.py"} -def test_collect_files_permission_error_scandir(tmp_path): +def test_collect_files_scandir_permission_error(tmp_path): (tmp_path / "a.py").touch() with patch("os.scandir", side_effect=PermissionError): assert list(_collect_files(tmp_path)) == [] @@ -465,7 +465,7 @@ def test_scan_file_permission_error(tmp_path): mock_permission.assert_called_once() -def test_scan_file_oserror(tmp_path): +def test_scan_file_os_error(tmp_path): test_file = tmp_path / "os_error.ts" test_file.write_text("const key = 'x';\n") From 24e9560fa6992cf38012c0e48aadbad8c7009cd0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 16 Jun 2026 06:48:41 +0000 Subject: [PATCH 13/14] Restore scanner test coverage after branch sync --- tests/test_vibesec.py | 117 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 116 insertions(+), 1 deletion(-) diff --git a/tests/test_vibesec.py b/tests/test_vibesec.py index dd97692..252f007 100644 --- a/tests/test_vibesec.py +++ b/tests/test_vibesec.py @@ -1,12 +1,13 @@ import os import re import tempfile +from argparse import Namespace from pathlib import Path from unittest.mock import patch import pytest -from scanner.cli.vibesec import _collect_files, _print_scan_results, _scan_file, cmd_init, cmd_scan +from scanner.cli.vibesec import _collect_files, _print_scan_results, _scan_file, cmd_init, cmd_scan, cmd_review, REVIEW_PROMPT_BASE, REVIEW_PROMPT_NEXTJS, REVIEW_PROMPT_SUPABASE, REVIEW_PROMPT_FIREBASE, REVIEW_PROMPT_STRIPE, REVIEW_PROMPT_FOOTER MOCK_RULES = [ { @@ -472,6 +473,120 @@ def test_scan_file_os_error(tmp_path): assert _scan_file(test_file, tmp_path) == [] mock_oserror.assert_called_once() +def test_collect_files_oserror_on_scandir(tmp_path): + (tmp_path / "dir1").mkdir() + (tmp_path / "dir1" / "file1.py").touch() + (tmp_path / "file2.py").touch() + + original_scandir = os.scandir + def mock_scandir(path): + if Path(path).name == "dir1": + raise PermissionError("Access denied") + return original_scandir(path) + + with patch("os.scandir", side_effect=mock_scandir): + files = list(_collect_files(tmp_path)) + assert len(files) == 1 + assert files[0].name == "file2.py" + +def test_collect_files_oserror_on_entry(tmp_path): + (tmp_path / "file1.py").touch() + (tmp_path / "file2.py").touch() + + original_scandir = os.scandir + def mock_scandir(path): + class MockEntry: + def __init__(self, entry): + self._entry = entry + self.name = entry.name + self.path = entry.path + def is_symlink(self): + return self._entry.is_symlink() + def is_dir(self, follow_symlinks=False): + if self.name == "file1.py": + raise PermissionError("Access denied") + return self._entry.is_dir(follow_symlinks=follow_symlinks) + def is_file(self, follow_symlinks=False): + return self._entry.is_file(follow_symlinks=follow_symlinks) + + class MockIterator: + def __init__(self, it): + self.it = it + def __enter__(self): + return self + def __exit__(self, *args): + self.it.close() + def __iter__(self): + for entry in self.it: + yield MockEntry(entry) + + return MockIterator(original_scandir(path)) + + with patch("os.scandir", side_effect=mock_scandir): + files = list(_collect_files(tmp_path)) + assert len(files) == 1 + assert files[0].name == "file2.py" +# --------------------------------------------------------------------------- +# cmd_review tests +# --------------------------------------------------------------------------- + +def test_cmd_review_base_prompt(capsys): + args = Namespace(stack=None, db=None, payments=None) + cmd_review(args) + captured = capsys.readouterr() + assert REVIEW_PROMPT_BASE in captured.out + assert REVIEW_PROMPT_FOOTER in captured.out + assert REVIEW_PROMPT_NEXTJS not in captured.out + assert REVIEW_PROMPT_SUPABASE not in captured.out + assert REVIEW_PROMPT_FIREBASE not in captured.out + assert REVIEW_PROMPT_STRIPE not in captured.out + +def test_cmd_review_nextjs(capsys): + args = Namespace(stack=["nextjs"], db=None, payments=None) + cmd_review(args) + captured = capsys.readouterr() + assert REVIEW_PROMPT_NEXTJS in captured.out + +def test_cmd_review_supabase(capsys): + args = Namespace(stack=None, db="supabase", payments=None) + cmd_review(args) + captured = capsys.readouterr() + assert REVIEW_PROMPT_SUPABASE in captured.out + +def test_cmd_review_supabase_via_stack(capsys): + args = Namespace(stack=["supabase"], db=None, payments=None) + cmd_review(args) + captured = capsys.readouterr() + assert REVIEW_PROMPT_SUPABASE in captured.out + +def test_cmd_review_firebase(capsys): + args = Namespace(stack=None, db="firebase", payments=None) + cmd_review(args) + captured = capsys.readouterr() + assert REVIEW_PROMPT_FIREBASE in captured.out + +def test_cmd_review_firebase_via_stack(capsys): + args = Namespace(stack=["firebase"], db=None, payments=None) + cmd_review(args) + captured = capsys.readouterr() + assert REVIEW_PROMPT_FIREBASE in captured.out + +def test_cmd_review_stripe(capsys): + args = Namespace(stack=None, db=None, payments="stripe") + cmd_review(args) + captured = capsys.readouterr() + assert REVIEW_PROMPT_STRIPE in captured.out + +def test_cmd_review_all_options(capsys): + args = Namespace(stack=["nextjs"], db="supabase", payments="stripe") + cmd_review(args) + captured = capsys.readouterr() + assert REVIEW_PROMPT_BASE in captured.out + assert REVIEW_PROMPT_NEXTJS in captured.out + assert REVIEW_PROMPT_SUPABASE in captured.out + assert REVIEW_PROMPT_STRIPE in captured.out + assert REVIEW_PROMPT_FOOTER in captured.out + def test_scan_file_large_file(tmp_path): test_file = tmp_path / "large_file.ts" test_file.write_text("const key = 'x';\n") From 7ad1ddf9721627e805a669990e8042bde07e335a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 16 Jun 2026 06:49:47 +0000 Subject: [PATCH 14/14] Restore PR files to base branch state --- .jules/bolt.md | 8 ------- scanner/cli/vibesec.py | 23 ++++++++----------- .../ci/opencode_review_normalize_output.py | 20 +++++----------- 3 files changed, 16 insertions(+), 35 deletions(-) diff --git a/.jules/bolt.md b/.jules/bolt.md index 8cf0535..a337772 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -13,10 +13,6 @@ ## 2026-06-14 - Deferring Pathlib Operations in Hot Paths **Learning:** In highly repetitive loops like file scanners (e.g., iterating through thousands of safe files), preemptively calculating `Path.relative_to()` and sanitizing strings adds significant cumulative overhead. Pathlib operations internally parse paths, check parts, and construct new objects, which is extremely expensive when executed on a per-file basis unconditionally. **Action:** Always defer expensive path computations (like converting paths to relative or string sanitization) until *after* the fast-path condition (like a regex match) triggers. This drastically cuts down on unnecessary string operations for clean files. -## 2025-03-09 - O(N^2) JSON parsing due to string slicing -**Learning:** Extracting JSON objects from a large string by iterating with `for index, char in enumerate(text)` and doing `decoder.raw_decode(text[index:])` results in O(N^2) complexity because of string slicing operations and overlapping extraction attempts on failure. -**Action:** Use a `while` loop combined with `text.find('{', index)` to find the next object, and `decoder.raw_decode(text, index)` to decode it directly without slicing. Then, advance `index` to the returned `end` position. - ## 2024-05-18 - Set literal vs Tuple membership check **Learning:** In Python, using set literals for constant membership checks (e.g., `in {'CRITICAL', 'HIGH'}`) inside loops or comprehensions is highly efficient because CPython optimizes them into `frozenset` constants at compile time, eliminating runtime instantiation overhead. Using `tuple` for these checks performs an `O(n)` linear search, while a `frozenset` performs an `O(1)` hash lookup. @@ -26,7 +22,3 @@ ## 2024-06-16 - Parallelize Subprocess CLI Calls **Learning:** Sequential, synchronous execution of `subprocess.run` (like calling the GitHub CLI) across multiple items (like PRs) is a significant I/O bottleneck. **Action:** Use `concurrent.futures.ThreadPoolExecutor` with `functools.partial` and `executor.map` to safely parallelize I/O-bound subprocess executions, significantly reducing overall script runtime. - -## 2024-05-16 - Module-level Constants for Performance -**Learning:** Recreating static dictionaries (like severity mappings and icons) inside frequently called functions causes unnecessary memory allocations and slight performance overhead on every call. -**Action:** Extract static dictionaries to module-level constants to ensure they are instantiated only once when the module is loaded. diff --git a/scanner/cli/vibesec.py b/scanner/cli/vibesec.py index eddad2e..89c5584 100644 --- a/scanner/cli/vibesec.py +++ b/scanner/cli/vibesec.py @@ -602,24 +602,21 @@ def _scan_file(file_path: Path, base_path: Path): return findings - -# ⚡ Bolt: Move severity mappings to module level to avoid redundant -# dictionary allocations on every call to print scan results. -SEVERITY_ORDER = {"CRITICAL": 0, "HIGH": 1, "WARNING": 2, "INFO": 3} -SEVERITY_ICONS = { - "CRITICAL": "🔴 CRITICAL", - "HIGH": "🟠 HIGH", - "WARNING": "🟡 WARNING", - "INFO": "🔵 INFO", -} - def _print_scan_results(findings, files_scanned): - findings.sort(key=lambda f: SEVERITY_ORDER.get(f["severity"], 99)) + severity_order = {"CRITICAL": 0, "HIGH": 1, "WARNING": 2, "INFO": 3} + findings.sort(key=lambda f: severity_order.get(f["severity"], 99)) + + severity_icons = { + "CRITICAL": "🔴 CRITICAL", + "HIGH": "🟠 HIGH", + "WARNING": "🟡 WARNING", + "INFO": "🔵 INFO", + } counts = {"CRITICAL": 0, "HIGH": 0, "WARNING": 0, "INFO": 0} for f in findings: counts[f["severity"]] += 1 - icon = SEVERITY_ICONS.get(f["severity"], f["severity"]) + icon = severity_icons.get(f["severity"], f["severity"]) print(f"[{icon}] {f['file']}:{f['line']}") print(f" Rule: {f['rule_id']}") print(f" {f['message']}") diff --git a/scripts/ci/opencode_review_normalize_output.py b/scripts/ci/opencode_review_normalize_output.py index 2de45dc..7d2a797 100755 --- a/scripts/ci/opencode_review_normalize_output.py +++ b/scripts/ci/opencode_review_normalize_output.py @@ -109,22 +109,14 @@ def iter_json_objects(text: str) -> list[Any]: # OpenCode exports may contain prose around the JSON control object. pass - # Optimization: Use a while loop with text.find() and decoder.raw_decode(text, index) - # to avoid O(N^2) behavior from redundant string slicing (text[index:]) and overlapping extractions. - index = 0 - length = len(text) - while index < length: - next_brace = text.find("{", index) - if next_brace == -1: - break - index = next_brace - + for index, character in enumerate(text): + if character != "{": + continue try: - value, end = decoder.raw_decode(text, index) - values.append(value) - index = end + value, _ = decoder.raw_decode(text[index:]) except json.JSONDecodeError: - index += 1 + continue + values.append(value) return values