diff --git a/.jules/bolt.md b/.jules/bolt.md index d6502a9..9210570 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -1,3 +1,7 @@ ## 2024-05-24 - File traversal performance **Learning:** When optimizing os.walk combined with Path objects, replacing them with os.scandir and os.path.splitext reduces stat() calls drastically, but requires careful matching of symlink behavior (os.walk matches directory symlinks depending on arguments, Path.is_file() follows symlinks by default). **Action:** Use entry.is_dir(follow_symlinks=False) to match os.walk and entry.is_file() to match Path.is_file() default. + +## 2024-06-11 - Global state caching in Python tests +**Learning:** When aggressively caching global module state (like pre-extracted regex rules from `SCAN_RULES`), tests using `unittest.mock.patch` on that global state may fail because the cache retains stale references to the unpatched objects. +**Action:** Implement cache-busting logic (e.g., tracking `id(SCAN_RULES)`) to clear the cache when the object identity changes. diff --git a/scanner/cli/vibesec.py b/scanner/cli/vibesec.py index 2931698..bd19ac2 100644 --- a/scanner/cli/vibesec.py +++ b/scanner/cli/vibesec.py @@ -394,6 +394,33 @@ def cmd_scan(args): return 1 if any(f["severity"] in ("CRITICAL", "HIGH") for f in findings) else 0 +# ⚡ Bolt: Cache applicable rules per file extension to avoid redundant list +# comprehensions and pre-extract the search method to avoid dictionary and +# attribute lookups in the tight scanning loop. +_RULES_CACHE = {} +_LAST_SCAN_RULES_ID = None + +def _get_applicable_rules(ext: str): + global _LAST_SCAN_RULES_ID, _RULES_CACHE + current_id = id(SCAN_RULES) + if _LAST_SCAN_RULES_ID != current_id: + _RULES_CACHE.clear() + _LAST_SCAN_RULES_ID = current_id + + if ext not in _RULES_CACHE: + _RULES_CACHE[ext] = [ + { + "id": rule["id"], + "severity": rule["severity"], + "message": rule["message"], + "search": rule["pattern"].search + } + for rule in SCAN_RULES + if not rule["extensions"] or ext in rule["extensions"] + ] + return _RULES_CACHE[ext] + + def _collect_files(base_path: Path): """Collect all scannable files, skipping unwanted directories.""" # ⚡ Bolt: Optimize file traversal using os.scandir and os.path.splitext @@ -442,10 +469,7 @@ def _scan_file(file_path: Path, base_path: Path): ext = file_path.suffix.lower() rel_path = file_path.relative_to(base_path) if base_path.is_dir() else file_path - applicable_rules = [ - rule for rule in SCAN_RULES - if not rule["extensions"] or ext in rule["extensions"] - ] + applicable_rules = _get_applicable_rules(ext) if not applicable_rules: return findings @@ -454,7 +478,7 @@ def _scan_file(file_path: Path, base_path: Path): with file_path.open("r", encoding="utf-8", errors="ignore") as f: for line_num, line in enumerate(f, start=1): for rule in applicable_rules: - match = rule["pattern"].search(line) + match = rule["search"](line) if match: findings.append({ "rule_id": rule["id"],