Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 2024-05-24 - File traversal performance
**Learning:** When optimizing os.walk combined with Path objects, replacing them with os.scandir and os.path.splitext reduces stat() calls drastically, but requires careful matching of symlink behavior (os.walk matches directory symlinks depending on arguments, Path.is_file() follows symlinks by default).
**Action:** Use entry.is_dir(follow_symlinks=False) to match os.walk and entry.is_file() to match Path.is_file() default.

## 2024-06-11 - Global state caching in Python tests
**Learning:** When aggressively caching global module state (like pre-extracted regex rules from `SCAN_RULES`), tests using `unittest.mock.patch` on that global state may fail because the cache retains stale references to the unpatched objects.
**Action:** Implement cache-busting logic (e.g., tracking `id(SCAN_RULES)`) to clear the cache when the object identity changes.
34 changes: 29 additions & 5 deletions scanner/cli/vibesec.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,33 @@ def cmd_scan(args):
return 1 if any(f["severity"] in ("CRITICAL", "HIGH") for f in findings) else 0


# ⚑ Bolt: Cache applicable rules per file extension to avoid redundant list
# comprehensions and pre-extract the search method to avoid dictionary and
# attribute lookups in the tight scanning loop.
_RULES_CACHE = {}
_LAST_SCAN_RULES_ID = None

def _get_applicable_rules(ext: str):
global _LAST_SCAN_RULES_ID, _RULES_CACHE
current_id = id(SCAN_RULES)
if _LAST_SCAN_RULES_ID != current_id:
_RULES_CACHE.clear()
_LAST_SCAN_RULES_ID = current_id

if ext not in _RULES_CACHE:
_RULES_CACHE[ext] = [
{
"id": rule["id"],
"severity": rule["severity"],
"message": rule["message"],
"search": rule["pattern"].search
}
for rule in SCAN_RULES
if not rule["extensions"] or ext in rule["extensions"]
]
return _RULES_CACHE[ext]


def _collect_files(base_path: Path):
"""Collect all scannable files, skipping unwanted directories."""
# ⚑ Bolt: Optimize file traversal using os.scandir and os.path.splitext
Expand Down Expand Up @@ -442,10 +469,7 @@ def _scan_file(file_path: Path, base_path: Path):
ext = file_path.suffix.lower()
rel_path = file_path.relative_to(base_path) if base_path.is_dir() else file_path

applicable_rules = [
rule for rule in SCAN_RULES
if not rule["extensions"] or ext in rule["extensions"]
]
applicable_rules = _get_applicable_rules(ext)

if not applicable_rules:
return findings
Expand All @@ -454,7 +478,7 @@ def _scan_file(file_path: Path, base_path: Path):
with file_path.open("r", encoding="utf-8", errors="ignore") as f:
for line_num, line in enumerate(f, start=1):
for rule in applicable_rules:
match = rule["pattern"].search(line)
match = rule["search"](line)
if match:
findings.append({
"rule_id": rule["id"],
Expand Down
Loading