diff --git a/.github/workflows/opencode-review.yml b/.github/workflows/opencode-review.yml index 8c61be6..af72e21 100644 --- a/.github/workflows/opencode-review.yml +++ b/.github/workflows/opencode-review.yml @@ -38,6 +38,11 @@ jobs: pull-requests: write issues: write steps: + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.14" + - name: Checkout repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: diff --git a/.github/workflows/pr-review-merge-scheduler.yml b/.github/workflows/pr-review-merge-scheduler.yml index d0d5496..e81af23 100644 --- a/.github/workflows/pr-review-merge-scheduler.yml +++ b/.github/workflows/pr-review-merge-scheduler.yml @@ -44,6 +44,11 @@ jobs: TRIGGER_REVIEWS: ${{ github.event_name != 'workflow_dispatch' || inputs.trigger_reviews == true }} ENABLE_AUTO_MERGE: ${{ github.event_name != 'workflow_dispatch' || inputs.enable_auto_merge == true }} steps: + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.14" + - name: Checkout trusted scheduler uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: diff --git a/.jules/bolt.md b/.jules/bolt.md index 551637a..3708540 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -13,7 +13,3 @@ ## 2026-06-14 - Deferring Pathlib Operations in Hot Paths **Learning:** In highly repetitive loops like file scanners (e.g., iterating through thousands of safe files), preemptively calculating `Path.relative_to()` and sanitizing strings adds significant cumulative overhead. Pathlib operations internally parse paths, check parts, and construct new objects, which is extremely expensive when executed on a per-file basis unconditionally. **Action:** Always defer expensive path computations (like converting paths to relative or string sanitization) until *after* the fast-path condition (like a regex match) triggers. This drastically cuts down on unnecessary string operations for clean files. - -## 2024-05-30 - Optimize regex scanning using re.finditer -**Learning:** For file scanning, reading the file entirely (if within size limits) and using `re.finditer` over the full content uses native C implementations for searching, and calculates matches dramatically faster (over 2x) than reading and looping line-by-line via Python's interpreter. -**Action:** Always favor `re.finditer` or full-string string matching where large text files are involved, provided strict memory and file size limits are verified and enforced. diff --git a/scanner/cli/vibesec.py b/scanner/cli/vibesec.py index 0772791..5fb6e4e 100644 --- a/scanner/cli/vibesec.py +++ b/scanner/cli/vibesec.py @@ -485,8 +485,7 @@ def _get_applicable_rules(ext: str): "id": rule["id"], "severity": rule["severity"], "message": rule["message"], - "search": rule["pattern"].search, - "finditer": rule["pattern"].finditer + "search": rule["pattern"].search } for rule in SCAN_RULES if not rule["extensions"] or ext in rule["extensions"] @@ -564,34 +563,23 @@ def _scan_file(file_path: Path, base_path: Path): try: with file_path.open("r", encoding="utf-8", errors="ignore") as f: - content = f.read() - - for rule in applicable_rules: - for match in rule["finditer"](content): - if rel_path_str is None: - rel_path = file_path.relative_to(base_path) if base_path.is_dir() else file_path - rel_path_str = _sanitize_terminal_output(str(rel_path)) - - start = match.start() - line_num = content.count("\n", 0, start) + 1 - - line_start = content.rfind("\n", 0, start) - line_start = 0 if line_start == -1 else line_start + 1 - - line_end = content.find("\n", start) - line_end = len(content) if line_end == -1 else line_end - - line = content[line_start:line_end] - - findings.append({ - "rule_id": rule["id"], - "severity": rule["severity"], - "message": rule["message"], - # SECURITY: Sanitize output to prevent Terminal Output Injection - "file": rel_path_str, - "line": line_num, - "snippet": _sanitize_terminal_output(line.strip()[:120]), - }) + for line_num, line in enumerate(f, start=1): + for rule in applicable_rules: + match = rule["search"](line) + if match: + if rel_path_str is None: + rel_path = file_path.relative_to(base_path) if base_path.is_dir() else file_path + rel_path_str = _sanitize_terminal_output(str(rel_path)) + + findings.append({ + "rule_id": rule["id"], + "severity": rule["severity"], + "message": rule["message"], + # SECURITY: Sanitize output to prevent Terminal Output Injection + "file": rel_path_str, + "line": line_num, + "snippet": _sanitize_terminal_output(line.strip()[:120]), + }) except (OSError, PermissionError): pass diff --git a/tests/test_pr_review_merge_scheduler.py b/tests/test_pr_review_merge_scheduler.py new file mode 100644 index 0000000..3087b88 --- /dev/null +++ b/tests/test_pr_review_merge_scheduler.py @@ -0,0 +1,115 @@ +from scripts.ci.pr_review_merge_scheduler import opencode_in_progress + +def test_empty_pr_context(): + # Empty PR dict + assert opencode_in_progress({}) is False + + # PR with no context nodes + pr = { + "statusCheckRollup": { + "contexts": { + "nodes": [] + } + } + } + assert opencode_in_progress(pr) is False + +def test_no_opencode_context(): + # PR with irrelevant context nodes + pr = { + "statusCheckRollup": { + "contexts": { + "nodes": [ + {"__typename": "CheckRun", "name": "lint", "status": "IN_PROGRESS"}, + {"__typename": "StatusContext", "context": "ci/build", "state": "PENDING"} + ] + } + } + } + assert opencode_in_progress(pr) is False + +def test_opencode_completed_status(): + pr = { + "statusCheckRollup": { + "contexts": { + "nodes": [ + {"__typename": "CheckRun", "name": "opencode-review", "status": "COMPLETED"}, + {"__typename": "CheckRun", "name": "opencode-review", "status": "SUCCESS"}, + {"__typename": "StatusContext", "context": "opencode-review", "state": "FAILURE"}, + {"__typename": "StatusContext", "context": "opencode-review", "state": "ERROR"} + ] + } + } + } + assert opencode_in_progress(pr) is False + +def test_opencode_in_progress_status(): + pr1 = { + "statusCheckRollup": { + "contexts": { + "nodes": [ + {"__typename": "CheckRun", "name": "opencode-review", "status": "IN_PROGRESS"} + ] + } + } + } + assert opencode_in_progress(pr1) is True + + pr2 = { + "statusCheckRollup": { + "contexts": { + "nodes": [ + {"__typename": "StatusContext", "context": "opencode-review", "state": "PENDING"} + ] + } + } + } + assert opencode_in_progress(pr2) is True + + pr3 = { + "statusCheckRollup": { + "contexts": { + "nodes": [ + {"__typename": "CheckRun", "name": "opencode-review"} + ] + } + } + } + assert opencode_in_progress(pr3) is False + +def test_opencode_workflow_name_in_progress(): + pr = { + "statusCheckRollup": { + "contexts": { + "nodes": [ + { + "__typename": "CheckRun", + "name": "review", + "status": "QUEUED", + "checkSuite": { + "workflowRun": { + "workflow": { + "name": "OpenCode Review" + } + } + } + } + ] + } + } + } + assert opencode_in_progress(pr) is True + +def test_multiple_contexts_one_in_progress(): + pr = { + "statusCheckRollup": { + "contexts": { + "nodes": [ + {"__typename": "CheckRun", "name": "lint", "status": "IN_PROGRESS"}, + {"__typename": "CheckRun", "name": "opencode-review", "status": "COMPLETED"}, + {"__typename": "StatusContext", "context": "opencode-review", "state": "PENDING"} + ] + } + } + } + assert opencode_in_progress(pr) is True diff --git a/tests/test_vibesec.py b/tests/test_vibesec.py index 6968990..a5a41f9 100644 --- a/tests/test_vibesec.py +++ b/tests/test_vibesec.py @@ -1,13 +1,11 @@ -import os import re import tempfile -from argparse import Namespace from pathlib import Path from unittest.mock import patch import pytest -from scanner.cli.vibesec import _collect_files, _print_scan_results, _scan_file, cmd_init, cmd_scan, cmd_review, REVIEW_PROMPT_BASE, REVIEW_PROMPT_NEXTJS, REVIEW_PROMPT_SUPABASE, REVIEW_PROMPT_FIREBASE, REVIEW_PROMPT_STRIPE, REVIEW_PROMPT_FOOTER +from scanner.cli.vibesec import _collect_files, _print_scan_results, _scan_file, cmd_init, cmd_scan MOCK_RULES = [ { @@ -418,118 +416,3 @@ def test_sanitize_terminal_output(): # Test non-strings assert _sanitize_terminal_output(None) is None - - -def test_collect_files_oserror_on_scandir(tmp_path): - (tmp_path / "dir1").mkdir() - (tmp_path / "dir1" / "file1.py").touch() - (tmp_path / "file2.py").touch() - - original_scandir = os.scandir - def mock_scandir(path): - if Path(path).name == "dir1": - raise PermissionError("Access denied") - return original_scandir(path) - - with patch("os.scandir", side_effect=mock_scandir): - files = list(_collect_files(tmp_path)) - assert len(files) == 1 - assert files[0].name == "file2.py" - -def test_collect_files_oserror_on_entry(tmp_path): - (tmp_path / "file1.py").touch() - (tmp_path / "file2.py").touch() - - original_scandir = os.scandir - def mock_scandir(path): - class MockEntry: - def __init__(self, entry): - self._entry = entry - self.name = entry.name - self.path = entry.path - def is_symlink(self): - return self._entry.is_symlink() - def is_dir(self, follow_symlinks=False): - if self.name == "file1.py": - raise PermissionError("Access denied") - return self._entry.is_dir(follow_symlinks=follow_symlinks) - def is_file(self, follow_symlinks=False): - return self._entry.is_file(follow_symlinks=follow_symlinks) - - class MockIterator: - def __init__(self, it): - self.it = it - def __enter__(self): - return self - def __exit__(self, *args): - self.it.close() - def __iter__(self): - for entry in self.it: - yield MockEntry(entry) - - return MockIterator(original_scandir(path)) - - with patch("os.scandir", side_effect=mock_scandir): - files = list(_collect_files(tmp_path)) - assert len(files) == 1 - assert files[0].name == "file2.py" -# --------------------------------------------------------------------------- -# cmd_review tests -# --------------------------------------------------------------------------- - -def test_cmd_review_base_prompt(capsys): - args = Namespace(stack=None, db=None, payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_BASE in captured.out - assert REVIEW_PROMPT_FOOTER in captured.out - assert REVIEW_PROMPT_NEXTJS not in captured.out - assert REVIEW_PROMPT_SUPABASE not in captured.out - assert REVIEW_PROMPT_FIREBASE not in captured.out - assert REVIEW_PROMPT_STRIPE not in captured.out - -def test_cmd_review_nextjs(capsys): - args = Namespace(stack=["nextjs"], db=None, payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_NEXTJS in captured.out - -def test_cmd_review_supabase(capsys): - args = Namespace(stack=None, db="supabase", payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_SUPABASE in captured.out - -def test_cmd_review_supabase_via_stack(capsys): - args = Namespace(stack=["supabase"], db=None, payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_SUPABASE in captured.out - -def test_cmd_review_firebase(capsys): - args = Namespace(stack=None, db="firebase", payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_FIREBASE in captured.out - -def test_cmd_review_firebase_via_stack(capsys): - args = Namespace(stack=["firebase"], db=None, payments=None) - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_FIREBASE in captured.out - -def test_cmd_review_stripe(capsys): - args = Namespace(stack=None, db=None, payments="stripe") - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_STRIPE in captured.out - -def test_cmd_review_all_options(capsys): - args = Namespace(stack=["nextjs"], db="supabase", payments="stripe") - cmd_review(args) - captured = capsys.readouterr() - assert REVIEW_PROMPT_BASE in captured.out - assert REVIEW_PROMPT_NEXTJS in captured.out - assert REVIEW_PROMPT_SUPABASE in captured.out - assert REVIEW_PROMPT_STRIPE in captured.out - assert REVIEW_PROMPT_FOOTER in captured.out