From e0a1e433780a466921b9a48d2c500df7f0aec4e3 Mon Sep 17 00:00:00 2001 From: seonghobae <8172694+seonghobae@users.noreply.github.com> Date: Thu, 11 Jun 2026 18:51:15 +0000 Subject: [PATCH 1/2] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Optimize=20=5Fscan=5Ffi?= =?UTF-8?q?le=20with=20rule=20caching?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cached the applicable rules per extension and pre-extracted the `.search` method reference to eliminate redundant list comprehensions and dictionary/attribute lookups inside the tight per-line file scanning loop. Included cache-busting to prevent failing unit tests when `SCAN_RULES` is mocked. --- .jules/bolt.md | 4 ++++ scanner/cli/vibesec.py | 34 +++++++++++++++++++++++++++++----- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/.jules/bolt.md b/.jules/bolt.md index d6502a9..9210570 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -1,3 +1,7 @@ ## 2024-05-24 - File traversal performance **Learning:** When optimizing os.walk combined with Path objects, replacing them with os.scandir and os.path.splitext reduces stat() calls drastically, but requires careful matching of symlink behavior (os.walk matches directory symlinks depending on arguments, Path.is_file() follows symlinks by default). **Action:** Use entry.is_dir(follow_symlinks=False) to match os.walk and entry.is_file() to match Path.is_file() default. + +## 2024-06-11 - Global state caching in Python tests +**Learning:** When aggressively caching global module state (like pre-extracted regex rules from `SCAN_RULES`), tests using `unittest.mock.patch` on that global state may fail because the cache retains stale references to the unpatched objects. +**Action:** Implement cache-busting logic (e.g., tracking `id(SCAN_RULES)`) to clear the cache when the object identity changes. diff --git a/scanner/cli/vibesec.py b/scanner/cli/vibesec.py index 2931698..bd19ac2 100644 --- a/scanner/cli/vibesec.py +++ b/scanner/cli/vibesec.py @@ -394,6 +394,33 @@ def cmd_scan(args): return 1 if any(f["severity"] in ("CRITICAL", "HIGH") for f in findings) else 0 +# ⚡ Bolt: Cache applicable rules per file extension to avoid redundant list +# comprehensions and pre-extract the search method to avoid dictionary and +# attribute lookups in the tight scanning loop. +_RULES_CACHE = {} +_LAST_SCAN_RULES_ID = None + +def _get_applicable_rules(ext: str): + global _LAST_SCAN_RULES_ID, _RULES_CACHE + current_id = id(SCAN_RULES) + if _LAST_SCAN_RULES_ID != current_id: + _RULES_CACHE.clear() + _LAST_SCAN_RULES_ID = current_id + + if ext not in _RULES_CACHE: + _RULES_CACHE[ext] = [ + { + "id": rule["id"], + "severity": rule["severity"], + "message": rule["message"], + "search": rule["pattern"].search + } + for rule in SCAN_RULES + if not rule["extensions"] or ext in rule["extensions"] + ] + return _RULES_CACHE[ext] + + def _collect_files(base_path: Path): """Collect all scannable files, skipping unwanted directories.""" # ⚡ Bolt: Optimize file traversal using os.scandir and os.path.splitext @@ -442,10 +469,7 @@ def _scan_file(file_path: Path, base_path: Path): ext = file_path.suffix.lower() rel_path = file_path.relative_to(base_path) if base_path.is_dir() else file_path - applicable_rules = [ - rule for rule in SCAN_RULES - if not rule["extensions"] or ext in rule["extensions"] - ] + applicable_rules = _get_applicable_rules(ext) if not applicable_rules: return findings @@ -454,7 +478,7 @@ def _scan_file(file_path: Path, base_path: Path): with file_path.open("r", encoding="utf-8", errors="ignore") as f: for line_num, line in enumerate(f, start=1): for rule in applicable_rules: - match = rule["pattern"].search(line) + match = rule["search"](line) if match: findings.append({ "rule_id": rule["id"], From 67f87c083db93826a9321253a6c0cb45d1cc872e Mon Sep 17 00:00:00 2001 From: Seongho Bae Date: Fri, 12 Jun 2026 23:22:57 +0900 Subject: [PATCH 2/2] Trigger OpenCode review