Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 170 additions & 8 deletions pytype/blocks/blocks.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Functions for computing the execution order of bytecode."""

from collections.abc import Iterator
from typing import Any, cast
from typing import Any, Sequence, cast
from pycnite import bytecode as pyc_bytecode
from pycnite import marshal as pyc_marshal
import pycnite.types
Expand Down Expand Up @@ -316,7 +316,9 @@ def add_pop_block_targets(bytecode: list[opcodes.Opcode]) -> None:
todo.append((op.next, block_stack))


def _split_bytecode(bytecode: list[opcodes.Opcode]) -> list[Block]:
def _split_bytecode(
bytecode: list[opcodes.Opcode], processed_blocks: set[Block], python_version
) -> list[Block]:
"""Given a sequence of bytecodes, return basic blocks.

This will split the code at "basic block boundaries". These occur at
Expand All @@ -333,21 +335,175 @@ def _split_bytecode(bytecode: list[opcodes.Opcode]) -> list[Block]:
targets = {op.target for op in bytecode if op.target}
blocks = []
code = []
for op in bytecode:
prev_block: Block = None
i = 0
while i < len(bytecode):
op = bytecode[i]
# SEND is only used in the context of async for and `yield from`.
# These instructions are not used in other context, so it's safe to process
# it assuming that these are the only constructs they're being used.
if python_version >= (3, 12) and isinstance(op, opcodes.SEND):
if code:
prev_block = Block(code)
blocks.append(prev_block)
code = []
new_blocks, i = _preprocess_async_for_and_yield(
i, bytecode, prev_block, processed_blocks
)
blocks.extend(new_blocks)
prev_block = blocks[-1]
continue

code.append(op)
if (
op.no_next()
or op.does_jump()
or op.pops_block()
or op.next is None
or op.next in targets
or (op.next in targets)
and (
not isinstance(op.next, opcodes.GET_ANEXT)
or python_version < (3, 12)
)
):
blocks.append(Block(code))
prev_block = Block(code)
blocks.append(prev_block)
code = []
i += 1

return blocks


def compute_order(bytecode: list[opcodes.Opcode]) -> list[Block]:
def _preprocess_async_for_and_yield(
idx: int,
bytecode: Sequence[opcodes.Opcode],
prev_block: Block,
processed_blocks: set[Block],
) -> tuple[list[Block], int]:
"""Process bytecode instructions for yield and async for in a way that pytype can iterate correctly.

'Async for' and yield statements, contains instructions that starts with SEND
and ends with END_SEND.

The reason why we need to pre process async for is because the control flow of
async for is drastically different from regular control flows also due to the
fact that the termination of the loop happens by STOP_ASYNC_ITERATION
exception, not a regular control flow. So we need to split (or merge) the
basic blocks in a way that pytype executes in the order that what'd happen in
the runtime, so that it doesn't fail with wrong order of execution, which can
result in a stack underrun.

Args:
idx: The index of the SEND instruction.
bytecode: A list of instances of opcodes.Opcode
prev_block: The previous block that we want to connect the new blocks to.
processed_blocks: Blocks that has been processed so that it doesn't get
processed again by compute_order.

Returns:
A tuple of (list[Block], int), where the Block is the block containing the
iteration part of the async for construct, and the int is the index of the
END_SEND instruction.
"""
assert isinstance(bytecode[idx], opcodes.SEND)
i = next(
i
for i in range(idx + 1, len(bytecode))
if isinstance(bytecode[i], opcodes.JUMP_BACKWARD_NO_INTERRUPT)
)

end_block_idx = i + 1
# In CLEANUP_THROW can be present after JUMP_BACKWARD_NO_INTERRUPT
# depending on how the control flow graph is constructed.
# Usually, CLEANUP_THROW comes way after
if isinstance(bytecode[end_block_idx], opcodes.CLEANUP_THROW):
end_block_idx += 1

# Somehow pytype expects the SEND and YIELD_VALUE to be in different
# blocks, so we need to split.
send_block = Block(bytecode[idx : idx + 1])
yield_value_block = Block(bytecode[idx + 1 : end_block_idx])
prev_block.connect_outgoing(send_block)
send_block.connect_outgoing(yield_value_block)
processed_blocks.update(send_block, yield_value_block)
return [send_block, yield_value_block], end_block_idx


def _remove_jmp_to_get_anext_and_merge(
blocks: list[Block], processed_blocks: set[Block]
) -> list[Block]:
"""Remove JUMP_BACKWARD instructions to GET_ANEXT instructions.

And also merge the block that contains the END_ASYNC_FOR which is part of the
same loop of the GET_ANEXT and JUMP_BACKWARD construct, to the JUMP_BACKWARD
instruction. This is to ignore the JUMP_BACKWARD because in pytype's eyes it's
useless (as it'll jump back to block that it already executed), and also
this is the way to make pytype run the code of END_ASYNC_FOR and whatever
comes afterwards.

Args:
blocks: A list of Block instances.

Returns:
A list of Block instances after the removal and merge.
"""
op_to_block = {}
merge_list = []
for block_idx, block in enumerate(blocks):
for code in block.code:
op_to_block[code] = block_idx

for block_idx, block in enumerate(blocks):
for code in block.code:
if code.end_async_for_target:
merge_list.append((block_idx, op_to_block[code.end_async_for_target]))
map_target = {}
for block_idx, block_idx_to_merge in merge_list:
# Remove JUMP_BACKWARD instruction as we don't want to execute it.
jump_back_op = blocks[block_idx].code.pop()
blocks[block_idx].code.extend(blocks[block_idx_to_merge].code)
map_target[jump_back_op] = blocks[block_idx_to_merge].code[0]

if block_idx_to_merge < len(blocks) - 1:
blocks[block_idx].connect_outgoing(blocks[block_idx_to_merge + 1])
processed_blocks.add(blocks[block_idx])

to_delete = sorted({to_idx for _, to_idx in merge_list}, reverse=True)

for block_idx in to_delete:
del blocks[block_idx]

for block in blocks:
replace_op = map_target.get(block.code[-1].target, None)
if replace_op:
block.code[-1].target = replace_op

return blocks


def _remove_jump_back_block(blocks: list[Block]):
"""Remove JUMP_BACKWARD instructions which are exception handling for async for.

These are not used during the regular pytype control flow analysis.
"""
new_blocks = []
for block in blocks:
last_op = block.code[-1]
if (
isinstance(last_op, opcodes.JUMP_BACKWARD)
and isinstance(last_op.target, opcodes.END_SEND)
and len(block.code) >= 2
and isinstance(block.code[-2], opcodes.CLEANUP_THROW)
):
continue
new_blocks.append(block)

return new_blocks


def compute_order(
bytecode: list[opcodes.Opcode], python_version
) -> list[Block]:
"""Split bytecode into blocks and order the blocks.

This builds an "ancestor first" ordering of the basic blocks of the bytecode.
Expand All @@ -359,10 +515,16 @@ def compute_order(bytecode: list[opcodes.Opcode]) -> list[Block]:
Returns:
A list of Block instances.
"""
blocks = _split_bytecode(bytecode)
processed_blocks = set()
blocks = _split_bytecode(bytecode, processed_blocks, python_version)
if python_version >= (3, 12):
blocks = _remove_jump_back_block(blocks)
blocks = _remove_jmp_to_get_anext_and_merge(blocks, processed_blocks)
first_op_to_block = {block.code[0]: block for block in blocks}
for i, block in enumerate(blocks):
next_block = blocks[i + 1] if i < len(blocks) - 1 else None
if block in processed_blocks:
continue
first_op, last_op = block.code[0], block.code[-1]
if next_block and not last_op.no_next():
block.connect_outgoing(next_block)
Expand Down Expand Up @@ -390,7 +552,7 @@ def _order_code(dis_code: pycnite.types.DisassembledCode) -> OrderedCode:
"""
ops = opcodes.build_opcodes(dis_code)
add_pop_block_targets(ops)
blocks = compute_order(ops)
blocks = compute_order(ops, dis_code.python_version)
return OrderedCode(dis_code.code, ops, blocks)


Expand Down
59 changes: 35 additions & 24 deletions pytype/pyc/opcodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class Opcode:
"prev",
"next",
"target",
"end_async_for_target",
"block_target",
"code",
"annotation",
Expand All @@ -67,6 +68,9 @@ def __init__(self, index, line, endline=None, col=None, endcol=None):
self.prev = None
self.next = None
self.target = None
# The END_ASYNC_FOR instruction of which we want to make pytype jump to for
# this instruction.
self.end_async_for_target = None
self.block_target = None
self.code = None # If we have a CodeType or OrderedCode parent
self.annotation = None
Expand Down Expand Up @@ -1306,30 +1310,6 @@ def _should_elide_opcode(
and isinstance(op_items[i + 1][1], END_ASYNC_FOR)
)

# In 3.12 all generators are compiled into infinite loops, too. In addition,
# YIELD_VALUE inserts exception handling instructions:
# CLEANUP_THROW
# JUMP_BACKWARD
# These can appear on their own or they can be inserted between JUMP_BACKWARD
# and END_ASYNC_FOR, possibly many times. We keep eliding the `async for` jump
# and also elide the exception handling cleanup codes because they're not
# relevant for pytype and complicate the block graph.
if python_version == (3, 12):
return (
isinstance(op, CLEANUP_THROW)
or (
isinstance(op, JUMP_BACKWARD)
and i >= 1
and isinstance(op_items[i - 1][1], CLEANUP_THROW)
)
or (
isinstance(op, JUMP_BACKWARD)
and isinstance(
_get_opcode_following_cleanup_throw_jump_pairs(op_items, i + 1),
END_ASYNC_FOR,
)
)
)
return False


Expand Down Expand Up @@ -1372,13 +1352,44 @@ def _add_jump_targets(ops, offset_to_index):
op.target = ops[op.arg]


def _add_async_for_jump_back_targets(
ops: list[Opcode],
offset_to_op: dict[int, Opcode],
exc_table: pycnite.types.ExceptionTable,
):
"""Find the END_ASYNC_FOR target of which is related to a JUMP_BACKWARD instruction.

Also, assign them in a attribute end_async_for_target so that we can process
it later.
"""

get_anext_incoming: dict[JUMP_BACKWARD, set[GET_ANEXT]] = {}
for op in ops:
if isinstance(op, JUMP_BACKWARD) and isinstance(op.target, GET_ANEXT):
if op.target not in get_anext_incoming:
get_anext_incoming[op.target] = set()
get_anext_incoming[op.target].add(op)

for e in exc_table.entries:
if e.start in offset_to_op and isinstance(offset_to_op[e.start], GET_ANEXT):
get_anext = offset_to_op[e.start]
if get_anext not in get_anext_incoming:
continue
for jump_backward in get_anext_incoming[get_anext]:
jump_backward.end_async_for_target = offset_to_op[e.target]


def build_opcodes(dis_code: pycnite.types.DisassembledCode) -> list[Opcode]:
"""Build a list of opcodes from pycnite opcodes."""
offset_to_op = _make_opcodes(dis_code.opcodes, dis_code.python_version)
if dis_code.exception_table:
_add_setup_except(offset_to_op, dis_code.exception_table)
ops, offset_to_idx = _make_opcode_list(offset_to_op, dis_code.python_version)
_add_jump_targets(ops, offset_to_idx)
if dis_code.python_version >= (3, 12):
_add_async_for_jump_back_targets(
ops, offset_to_op, dis_code.exception_table
)
return ops


Expand Down
4 changes: 0 additions & 4 deletions pytype/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,10 +206,6 @@ def merge_into(self, other):
self.data_stack,
other.data_stack,
)
assert len(self.block_stack) == len(other.block_stack), (
self.block_stack,
other.block_stack,
)
both = list(zip(self.data_stack, other.data_stack))
if any(v1 is not v2 for v1, v2 in both):
for v, o in both:
Expand Down
60 changes: 60 additions & 0 deletions pytype/tests/test_async_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,66 @@ async def gen():
x4: Coroutine[Any, Any, None] = gen().aclose()
""")

@test_utils.skipBeforePy((3, 11), "New in 3.11")
def test_async_gen_coroutines_error(self):
"""Test whether the async for within async with does not fail at runtime."""
self.Check("""
def outer(f):
async def wrapper(t, *args, **kwargs):
if t is None:
async with f():
async for c in f():
yield c
else:
async for c in f():
yield c
return wrapper
""")

@test_utils.skipBeforePy((3, 11), "New in 3.11")
def test_async_for(self):
self.Check("""
async def iterate(num):
try:
async for s in range(num): # pytype: disable=attribute-error
if s > 3:
yield ''
except ValueError as e:
yield ''
yield ''
""")

@test_utils.skipBeforePy((3, 11), "New in 3.11")
def test_async_for_with_control_flow(self):
self.Check("""
from typing import Any
import random
async def iterate(stream: Any):
async for _ in stream:
if (random.randint(0, 100) != 30 or random.randint(0, 100) != 40):
continue
yield random.randint(0, 100)
""")

@test_utils.skipBeforePy((3, 11), "New in 3.11")
def test_async_double_for_loop(self):
self.Check("""
def outer(f):
async def wrapper(t, *args, **kwargs):
if t is None:
async with f():
async for c in f():
async for d in f():
yield c + d
yield c
else:
async for c in f():
async for d in f():
yield c + d
yield c
return wrapper
""")


if __name__ == "__main__":
test_base.main()
Loading