From ffb3e30b4c5fb138ae0a5732743969733024a3f7 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Mon, 22 Jun 2026 17:22:54 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Optimize=20AST=20traversal?= =?UTF-8?q?=20generators?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaced `yield from` recursion with explicit stack-based generators in hot-path AST walk functions (`iter_calls_in_function_body` and `_walk_own` / `own_nodes`). This removes significant function call and frame overhead for deep ASTs, while preserving lazy evaluation to allow rules to short-circuit effectively. Includes updates to `.jules/bolt.md` documenting the performance nuance around eager list creation versus lazy stacks in rules engines. Co-authored-by: tachyon-beep <544926+tachyon-beep@users.noreply.github.com> --- .jules/bolt.md | 3 ++ src/wardline/scanner/ast_primitives.py | 58 ++++++++++++---------- src/wardline/scanner/rules/_ast_helpers.py | 30 ++++++----- 3 files changed, 53 insertions(+), 38 deletions(-) create mode 100644 .jules/bolt.md diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 00000000..54faca37 --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,3 @@ +## 2024-06-22 - Generator Short-circuiting in AST Rules +**Learning:** In a static analysis rules engine, replacing recursive `yield from` with eager list building (like `list.append()`) sacrifices short-circuiting capabilities. While lists are faster to build, many rules only need the first matching node (using `any()` or `next()`). If we eagerly compute the whole AST subtree list, we can cause major performance regressions on large AST blocks when a match is early. +**Action:** When optimizing `yield from` recursion in hot-path generators, use an explicit stack combined with `yield` instead of building eager lists, ensuring lazy evaluation is preserved so that short-circuiting works correctly. diff --git a/src/wardline/scanner/ast_primitives.py b/src/wardline/scanner/ast_primitives.py index 70f565b3..86ab2668 100644 --- a/src/wardline/scanner/ast_primitives.py +++ b/src/wardline/scanner/ast_primitives.py @@ -104,39 +104,45 @@ def iter_calls_in_function_body( Header expressions that execute in the enclosing scope (decorators, default values, base classes, metaclass keywords) are still attributed to ``node``. """ + # We maintain stack-based traversal with yield instead of building full + # lists eagerly, to preserve short-circuiting capabilities in rules that + # only need the first call or break early. + stack: list[ast.AST] = list(reversed(node.body)) + while stack: + current = stack.pop() - def walk_node(current: ast.AST) -> Iterator[ast.Call]: if isinstance(current, (ast.FunctionDef, ast.AsyncFunctionDef)): - for decorator in current.decorator_list: - yield from walk_node(decorator) - yield from _walk_argument_defaults(current.args) - return + defaults: list[ast.AST] = [] + for default in current.args.defaults: + defaults.append(default) + for kw_default in current.args.kw_defaults: + if kw_default is not None: + defaults.append(kw_default) + stack.extend(reversed(defaults)) + stack.extend(reversed(current.decorator_list)) + continue + if isinstance(current, ast.ClassDef): - for decorator in current.decorator_list: - yield from walk_node(decorator) - for base in current.bases: - yield from walk_node(base) - for keyword in current.keywords: - yield from walk_node(keyword.value) - return + keywords: list[ast.AST] = [kw.value for kw in current.keywords] + stack.extend(reversed(keywords)) + stack.extend(reversed(current.bases)) + stack.extend(reversed(current.decorator_list)) + continue + if isinstance(current, ast.Lambda): - yield from _walk_argument_defaults(current.args) - return + defaults = [] + for default in current.args.defaults: + defaults.append(default) + for kw_default in current.args.kw_defaults: + if kw_default is not None: + defaults.append(kw_default) + stack.extend(reversed(defaults)) + continue + if isinstance(current, ast.Call): yield current - for child in ast.iter_child_nodes(current): - yield from walk_node(child) - - def _walk_argument_defaults(args: ast.arguments) -> Iterator[ast.Call]: - for default in args.defaults: - yield from walk_node(default) - for kw_default in args.kw_defaults: - if kw_default is None: - continue - yield from walk_node(kw_default) - for stmt in node.body: - yield from walk_node(stmt) + stack.extend(reversed(list(ast.iter_child_nodes(current)))) def resolve_self_method_fqn( diff --git a/src/wardline/scanner/rules/_ast_helpers.py b/src/wardline/scanner/rules/_ast_helpers.py index 7c3b52ff..700fe2a5 100644 --- a/src/wardline/scanner/rules/_ast_helpers.py +++ b/src/wardline/scanner/rules/_ast_helpers.py @@ -36,12 +36,14 @@ def _own_statements(node: ast.AST) -> Iterator[ast.stmt]: """Yield every statement in *node*'s own scope, not descending into nested def/class bodies. Includes the bodies of if/for/while/try/with at any depth.""" - for child in ast.iter_child_nodes(node): - if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): + stack = list(reversed(list(ast.iter_child_nodes(node)))) + while stack: + current = stack.pop() + if isinstance(current, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): continue - if isinstance(child, ast.stmt): - yield child - yield from _own_statements(child) + if isinstance(current, ast.stmt): + yield current + stack.extend(reversed(list(ast.iter_child_nodes(current)))) def _own_reachable_statements( @@ -635,13 +637,17 @@ def handler_substitutes_on_failure(handler: ast.ExceptHandler, returned_names: f def own_nodes(node: ast.AST) -> Iterator[ast.AST]: """Yield *node* itself and all descendant nodes in its own scope (skipping nested scopes).""" yield node - yield from _walk_own(node) + + # We maintain yield-based traversal to enable short circuiting which is critical for rules engine performance. + stack = list(reversed(list(ast.iter_child_nodes(node)))) + while stack: + current = stack.pop() + yield current + if not isinstance(current, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef, ast.Lambda)): + stack.extend(reversed(list(ast.iter_child_nodes(current)))) def _walk_own(node: ast.AST) -> Iterator[ast.AST]: - for child in ast.iter_child_nodes(node): - if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef, ast.Lambda)): - yield child - else: - yield child - yield from _walk_own(child) + iterator = own_nodes(node) + next(iterator) + return iterator