diff --git a/pytype/blocks/blocks.py b/pytype/blocks/blocks.py index 100b6c5e5..8ebdd79c7 100644 --- a/pytype/blocks/blocks.py +++ b/pytype/blocks/blocks.py @@ -1,7 +1,7 @@ """Functions for computing the execution order of bytecode.""" from collections.abc import Iterator -from typing import Any, cast +from typing import Any, Sequence, cast from pycnite import bytecode as pyc_bytecode from pycnite import marshal as pyc_marshal import pycnite.types @@ -316,7 +316,9 @@ def add_pop_block_targets(bytecode: list[opcodes.Opcode]) -> None: todo.append((op.next, block_stack)) -def _split_bytecode(bytecode: list[opcodes.Opcode]) -> list[Block]: +def _split_bytecode( + bytecode: list[opcodes.Opcode], processed_blocks: set[Block], python_version +) -> list[Block]: """Given a sequence of bytecodes, return basic blocks. This will split the code at "basic block boundaries". These occur at @@ -333,21 +335,175 @@ def _split_bytecode(bytecode: list[opcodes.Opcode]) -> list[Block]: targets = {op.target for op in bytecode if op.target} blocks = [] code = [] - for op in bytecode: + prev_block: Block = None + i = 0 + while i < len(bytecode): + op = bytecode[i] + # SEND is only used in the context of async for and `yield from`. + # These instructions are not used in other context, so it's safe to process + # it assuming that these are the only constructs they're being used. + if python_version >= (3, 12) and isinstance(op, opcodes.SEND): + if code: + prev_block = Block(code) + blocks.append(prev_block) + code = [] + new_blocks, i = _preprocess_async_for_and_yield( + i, bytecode, prev_block, processed_blocks + ) + blocks.extend(new_blocks) + prev_block = blocks[-1] + continue + code.append(op) if ( op.no_next() or op.does_jump() or op.pops_block() or op.next is None - or op.next in targets + or (op.next in targets) + and ( + not isinstance(op.next, opcodes.GET_ANEXT) + or python_version < (3, 12) + ) ): - blocks.append(Block(code)) + prev_block = Block(code) + blocks.append(prev_block) code = [] + i += 1 + return blocks -def compute_order(bytecode: list[opcodes.Opcode]) -> list[Block]: +def _preprocess_async_for_and_yield( + idx: int, + bytecode: Sequence[opcodes.Opcode], + prev_block: Block, + processed_blocks: set[Block], +) -> tuple[list[Block], int]: + """Process bytecode instructions for yield and async for in a way that pytype can iterate correctly. + + 'Async for' and yield statements, contains instructions that starts with SEND + and ends with END_SEND. + + The reason why we need to pre process async for is because the control flow of + async for is drastically different from regular control flows also due to the + fact that the termination of the loop happens by STOP_ASYNC_ITERATION + exception, not a regular control flow. So we need to split (or merge) the + basic blocks in a way that pytype executes in the order that what'd happen in + the runtime, so that it doesn't fail with wrong order of execution, which can + result in a stack underrun. + + Args: + idx: The index of the SEND instruction. + bytecode: A list of instances of opcodes.Opcode + prev_block: The previous block that we want to connect the new blocks to. + processed_blocks: Blocks that has been processed so that it doesn't get + processed again by compute_order. + + Returns: + A tuple of (list[Block], int), where the Block is the block containing the + iteration part of the async for construct, and the int is the index of the + END_SEND instruction. + """ + assert isinstance(bytecode[idx], opcodes.SEND) + i = next( + i + for i in range(idx + 1, len(bytecode)) + if isinstance(bytecode[i], opcodes.JUMP_BACKWARD_NO_INTERRUPT) + ) + + end_block_idx = i + 1 + # In CLEANUP_THROW can be present after JUMP_BACKWARD_NO_INTERRUPT + # depending on how the control flow graph is constructed. + # Usually, CLEANUP_THROW comes way after + if isinstance(bytecode[end_block_idx], opcodes.CLEANUP_THROW): + end_block_idx += 1 + + # Somehow pytype expects the SEND and YIELD_VALUE to be in different + # blocks, so we need to split. + send_block = Block(bytecode[idx : idx + 1]) + yield_value_block = Block(bytecode[idx + 1 : end_block_idx]) + prev_block.connect_outgoing(send_block) + send_block.connect_outgoing(yield_value_block) + processed_blocks.update(send_block, yield_value_block) + return [send_block, yield_value_block], end_block_idx + + +def _remove_jmp_to_get_anext_and_merge( + blocks: list[Block], processed_blocks: set[Block] +) -> list[Block]: + """Remove JUMP_BACKWARD instructions to GET_ANEXT instructions. + + And also merge the block that contains the END_ASYNC_FOR which is part of the + same loop of the GET_ANEXT and JUMP_BACKWARD construct, to the JUMP_BACKWARD + instruction. This is to ignore the JUMP_BACKWARD because in pytype's eyes it's + useless (as it'll jump back to block that it already executed), and also + this is the way to make pytype run the code of END_ASYNC_FOR and whatever + comes afterwards. + + Args: + blocks: A list of Block instances. + + Returns: + A list of Block instances after the removal and merge. + """ + op_to_block = {} + merge_list = [] + for block_idx, block in enumerate(blocks): + for code in block.code: + op_to_block[code] = block_idx + + for block_idx, block in enumerate(blocks): + for code in block.code: + if code.end_async_for_target: + merge_list.append((block_idx, op_to_block[code.end_async_for_target])) + map_target = {} + for block_idx, block_idx_to_merge in merge_list: + # Remove JUMP_BACKWARD instruction as we don't want to execute it. + jump_back_op = blocks[block_idx].code.pop() + blocks[block_idx].code.extend(blocks[block_idx_to_merge].code) + map_target[jump_back_op] = blocks[block_idx_to_merge].code[0] + + if block_idx_to_merge < len(blocks) - 1: + blocks[block_idx].connect_outgoing(blocks[block_idx_to_merge + 1]) + processed_blocks.add(blocks[block_idx]) + + to_delete = sorted({to_idx for _, to_idx in merge_list}, reverse=True) + + for block_idx in to_delete: + del blocks[block_idx] + + for block in blocks: + replace_op = map_target.get(block.code[-1].target, None) + if replace_op: + block.code[-1].target = replace_op + + return blocks + + +def _remove_jump_back_block(blocks: list[Block]): + """Remove JUMP_BACKWARD instructions which are exception handling for async for. + + These are not used during the regular pytype control flow analysis. + """ + new_blocks = [] + for block in blocks: + last_op = block.code[-1] + if ( + isinstance(last_op, opcodes.JUMP_BACKWARD) + and isinstance(last_op.target, opcodes.END_SEND) + and len(block.code) >= 2 + and isinstance(block.code[-2], opcodes.CLEANUP_THROW) + ): + continue + new_blocks.append(block) + + return new_blocks + + +def compute_order( + bytecode: list[opcodes.Opcode], python_version +) -> list[Block]: """Split bytecode into blocks and order the blocks. This builds an "ancestor first" ordering of the basic blocks of the bytecode. @@ -359,10 +515,16 @@ def compute_order(bytecode: list[opcodes.Opcode]) -> list[Block]: Returns: A list of Block instances. """ - blocks = _split_bytecode(bytecode) + processed_blocks = set() + blocks = _split_bytecode(bytecode, processed_blocks, python_version) + if python_version >= (3, 12): + blocks = _remove_jump_back_block(blocks) + blocks = _remove_jmp_to_get_anext_and_merge(blocks, processed_blocks) first_op_to_block = {block.code[0]: block for block in blocks} for i, block in enumerate(blocks): next_block = blocks[i + 1] if i < len(blocks) - 1 else None + if block in processed_blocks: + continue first_op, last_op = block.code[0], block.code[-1] if next_block and not last_op.no_next(): block.connect_outgoing(next_block) @@ -390,7 +552,7 @@ def _order_code(dis_code: pycnite.types.DisassembledCode) -> OrderedCode: """ ops = opcodes.build_opcodes(dis_code) add_pop_block_targets(ops) - blocks = compute_order(ops) + blocks = compute_order(ops, dis_code.python_version) return OrderedCode(dis_code.code, ops, blocks) diff --git a/pytype/pyc/opcodes.py b/pytype/pyc/opcodes.py index 0907d1277..8e9d18c7f 100644 --- a/pytype/pyc/opcodes.py +++ b/pytype/pyc/opcodes.py @@ -48,6 +48,7 @@ class Opcode: "prev", "next", "target", + "end_async_for_target", "block_target", "code", "annotation", @@ -67,6 +68,9 @@ def __init__(self, index, line, endline=None, col=None, endcol=None): self.prev = None self.next = None self.target = None + # The END_ASYNC_FOR instruction of which we want to make pytype jump to for + # this instruction. + self.end_async_for_target = None self.block_target = None self.code = None # If we have a CodeType or OrderedCode parent self.annotation = None @@ -1306,30 +1310,6 @@ def _should_elide_opcode( and isinstance(op_items[i + 1][1], END_ASYNC_FOR) ) - # In 3.12 all generators are compiled into infinite loops, too. In addition, - # YIELD_VALUE inserts exception handling instructions: - # CLEANUP_THROW - # JUMP_BACKWARD - # These can appear on their own or they can be inserted between JUMP_BACKWARD - # and END_ASYNC_FOR, possibly many times. We keep eliding the `async for` jump - # and also elide the exception handling cleanup codes because they're not - # relevant for pytype and complicate the block graph. - if python_version == (3, 12): - return ( - isinstance(op, CLEANUP_THROW) - or ( - isinstance(op, JUMP_BACKWARD) - and i >= 1 - and isinstance(op_items[i - 1][1], CLEANUP_THROW) - ) - or ( - isinstance(op, JUMP_BACKWARD) - and isinstance( - _get_opcode_following_cleanup_throw_jump_pairs(op_items, i + 1), - END_ASYNC_FOR, - ) - ) - ) return False @@ -1372,6 +1352,33 @@ def _add_jump_targets(ops, offset_to_index): op.target = ops[op.arg] +def _add_async_for_jump_back_targets( + ops: list[Opcode], + offset_to_op: dict[int, Opcode], + exc_table: pycnite.types.ExceptionTable, +): + """Find the END_ASYNC_FOR target of which is related to a JUMP_BACKWARD instruction. + + Also, assign them in a attribute end_async_for_target so that we can process + it later. + """ + + get_anext_incoming: dict[JUMP_BACKWARD, set[GET_ANEXT]] = {} + for op in ops: + if isinstance(op, JUMP_BACKWARD) and isinstance(op.target, GET_ANEXT): + if op.target not in get_anext_incoming: + get_anext_incoming[op.target] = set() + get_anext_incoming[op.target].add(op) + + for e in exc_table.entries: + if e.start in offset_to_op and isinstance(offset_to_op[e.start], GET_ANEXT): + get_anext = offset_to_op[e.start] + if get_anext not in get_anext_incoming: + continue + for jump_backward in get_anext_incoming[get_anext]: + jump_backward.end_async_for_target = offset_to_op[e.target] + + def build_opcodes(dis_code: pycnite.types.DisassembledCode) -> list[Opcode]: """Build a list of opcodes from pycnite opcodes.""" offset_to_op = _make_opcodes(dis_code.opcodes, dis_code.python_version) @@ -1379,6 +1386,10 @@ def build_opcodes(dis_code: pycnite.types.DisassembledCode) -> list[Opcode]: _add_setup_except(offset_to_op, dis_code.exception_table) ops, offset_to_idx = _make_opcode_list(offset_to_op, dis_code.python_version) _add_jump_targets(ops, offset_to_idx) + if dis_code.python_version >= (3, 12): + _add_async_for_jump_back_targets( + ops, offset_to_op, dis_code.exception_table + ) return ops diff --git a/pytype/state.py b/pytype/state.py index bd2b9957c..97e45126e 100644 --- a/pytype/state.py +++ b/pytype/state.py @@ -206,10 +206,6 @@ def merge_into(self, other): self.data_stack, other.data_stack, ) - assert len(self.block_stack) == len(other.block_stack), ( - self.block_stack, - other.block_stack, - ) both = list(zip(self.data_stack, other.data_stack)) if any(v1 is not v2 for v1, v2 in both): for v, o in both: diff --git a/pytype/tests/test_async_generators.py b/pytype/tests/test_async_generators.py index 8c035917b..2f8a4a778 100644 --- a/pytype/tests/test_async_generators.py +++ b/pytype/tests/test_async_generators.py @@ -406,6 +406,66 @@ async def gen(): x4: Coroutine[Any, Any, None] = gen().aclose() """) + @test_utils.skipBeforePy((3, 11), "New in 3.11") + def test_async_gen_coroutines_error(self): + """Test whether the async for within async with does not fail at runtime.""" + self.Check(""" + def outer(f): + async def wrapper(t, *args, **kwargs): + if t is None: + async with f(): + async for c in f(): + yield c + else: + async for c in f(): + yield c + return wrapper + """) + + @test_utils.skipBeforePy((3, 11), "New in 3.11") + def test_async_for(self): + self.Check(""" + async def iterate(num): + try: + async for s in range(num): # pytype: disable=attribute-error + if s > 3: + yield '' + except ValueError as e: + yield '' + yield '' + """) + + @test_utils.skipBeforePy((3, 11), "New in 3.11") + def test_async_for_with_control_flow(self): + self.Check(""" + from typing import Any + import random + async def iterate(stream: Any): + async for _ in stream: + if (random.randint(0, 100) != 30 or random.randint(0, 100) != 40): + continue + yield random.randint(0, 100) + """) + + @test_utils.skipBeforePy((3, 11), "New in 3.11") + def test_async_double_for_loop(self): + self.Check(""" + def outer(f): + async def wrapper(t, *args, **kwargs): + if t is None: + async with f(): + async for c in f(): + async for d in f(): + yield c + d + yield c + else: + async for c in f(): + async for d in f(): + yield c + d + yield c + return wrapper + """) + if __name__ == "__main__": test_base.main()