From 2032b6dadaa305f730077623abe7fc9520045054 Mon Sep 17 00:00:00 2001 From: AN Long Date: Mon, 19 Jan 2026 02:04:02 +0900 Subject: [PATCH 1/5] Eliminate redundant refcounting in the JIT for BINARY_OP --- Python/bytecodes.c | 9 ++++++--- Python/optimizer_bytecodes.c | 4 +++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 9058a5210e50f9..f1ff0d9960bccf 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -5113,7 +5113,7 @@ dummy_func( assert(oparg <= NB_OPARG_LAST); } - op(_BINARY_OP, (lhs, rhs -- res)) { + op(_BINARY_OP, (lhs, rhs -- res, l, r)) { PyObject *lhs_o = PyStackRef_AsPyObjectBorrow(lhs); PyObject *rhs_o = PyStackRef_AsPyObjectBorrow(rhs); @@ -5123,10 +5123,13 @@ dummy_func( ERROR_NO_POP(); } res = PyStackRef_FromPyObjectSteal(res_o); - DECREF_INPUTS(); + l = lhs; + r = rhs; + DEAD(lhs); + DEAD(rhs); } - macro(BINARY_OP) = _SPECIALIZE_BINARY_OP + unused/4 + _BINARY_OP; + macro(BINARY_OP) = _SPECIALIZE_BINARY_OP + unused/4 + _BINARY_OP + POP_TOP + POP_TOP; pure replicate(2:4) inst(SWAP, (bottom, unused[oparg-2], top -- bottom, unused[oparg-2], top)) { diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 876ba7c6de7482..773102e79e6774 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -210,8 +210,10 @@ dummy_func(void) { sym_set_type(left, &PyFloat_Type); } - op(_BINARY_OP, (lhs, rhs -- res)) { + op(_BINARY_OP, (lhs, rhs -- res, l, r)) { REPLACE_OPCODE_IF_EVALUATES_PURE(lhs, rhs, res); + l = lhs; + r = rhs; bool lhs_int = sym_matches_type(lhs, &PyLong_Type); bool rhs_int = sym_matches_type(rhs, &PyLong_Type); bool lhs_float = sym_matches_type(lhs, &PyFloat_Type); From 1cfaf8996dd6d1a7da04fc55de4577f72a2c1333 Mon Sep 17 00:00:00 2001 From: AN Long Date: Mon, 19 Jan 2026 02:07:33 +0900 Subject: [PATCH 2/5] make regen-all --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_ids.h | 2 +- Include/internal/pycore_uop_metadata.h | 6 ++--- Modules/_testinternalcapi/test_cases.c.h | 27 ++++++++++++++++------- Python/executor_cases.c.h | 26 ++++++++-------------- Python/generated_cases.c.h | 27 ++++++++++++++++------- Python/optimizer_cases.c.h | 26 +++++++++++++++++----- 7 files changed, 72 insertions(+), 44 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index e3f7f5a6f0bb16..331cee6f84773f 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1343,7 +1343,7 @@ extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256]; #ifdef NEED_OPCODE_METADATA const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = { - [BINARY_OP] = { .nuops = 1, .uops = { { _BINARY_OP, OPARG_SIMPLE, 4 } } }, + [BINARY_OP] = { .nuops = 3, .uops = { { _BINARY_OP, OPARG_SIMPLE, 4 }, { _POP_TOP, OPARG_SIMPLE, 4 }, { _POP_TOP, OPARG_SIMPLE, 4 } } }, [BINARY_OP_ADD_FLOAT] = { .nuops = 5, .uops = { { _GUARD_TOS_FLOAT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 }, { _POP_TOP_FLOAT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_ADD_INT] = { .nuops = 5, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_INT, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 }, { _POP_TOP_INT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_ADD_UNICODE] = { .nuops = 5, .uops = { { _GUARD_TOS_UNICODE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_ADD_UNICODE, OPARG_SIMPLE, 5 }, { _POP_TOP_UNICODE, OPARG_SIMPLE, 5 }, { _POP_TOP_UNICODE, OPARG_SIMPLE, 5 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 8fd7cef3368e13..534dd565ca84d8 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -369,7 +369,7 @@ extern "C" { #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE #define MAX_UOP_ID 568 -#define _BINARY_OP_r21 569 +#define _BINARY_OP_r23 569 #define _BINARY_OP_ADD_FLOAT_r03 570 #define _BINARY_OP_ADD_FLOAT_r13 571 #define _BINARY_OP_ADD_FLOAT_r23 572 diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 7989c2f33662e4..a6d95f8311d877 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -2919,7 +2919,7 @@ const _PyUopCachingInfo _PyUop_Caching[MAX_UOP_ID+1] = { .entries = { { -1, -1, -1 }, { -1, -1, -1 }, - { 1, 2, _BINARY_OP_r21 }, + { 3, 2, _BINARY_OP_r23 }, { -1, -1, -1 }, }, }, @@ -3929,7 +3929,7 @@ const uint16_t _PyUop_Uncached[MAX_UOP_REGS_ID+1] = { [_COPY_3_r23] = _COPY_3, [_COPY_3_r33] = _COPY_3, [_COPY_r01] = _COPY, - [_BINARY_OP_r21] = _BINARY_OP, + [_BINARY_OP_r23] = _BINARY_OP, [_SWAP_2_r02] = _SWAP_2, [_SWAP_2_r12] = _SWAP_2, [_SWAP_2_r22] = _SWAP_2, @@ -4083,7 +4083,7 @@ const uint16_t _PyUop_SpillsAndReloads[4][4] = { const char *const _PyOpcode_uop_name[MAX_UOP_REGS_ID+1] = { [_BINARY_OP] = "_BINARY_OP", - [_BINARY_OP_r21] = "_BINARY_OP_r21", + [_BINARY_OP_r23] = "_BINARY_OP_r23", [_BINARY_OP_ADD_FLOAT] = "_BINARY_OP_ADD_FLOAT", [_BINARY_OP_ADD_FLOAT_r03] = "_BINARY_OP_ADD_FLOAT_r03", [_BINARY_OP_ADD_FLOAT_r13] = "_BINARY_OP_ADD_FLOAT_r13", diff --git a/Modules/_testinternalcapi/test_cases.c.h b/Modules/_testinternalcapi/test_cases.c.h index fb584314ef40bc..83e9e55ade8698 100644 --- a/Modules/_testinternalcapi/test_cases.c.h +++ b/Modules/_testinternalcapi/test_cases.c.h @@ -32,6 +32,9 @@ _PyStackRef lhs; _PyStackRef rhs; _PyStackRef res; + _PyStackRef l; + _PyStackRef r; + _PyStackRef value; // _SPECIALIZE_BINARY_OP { rhs = stack_pointer[-1]; @@ -65,18 +68,26 @@ JUMP_TO_LABEL(error); } res = PyStackRef_FromPyObjectSteal(res_o); + l = lhs; + r = rhs; + } + // _POP_TOP + { + value = r; + stack_pointer[-2] = res; + stack_pointer[-1] = l; _PyFrame_SetStackPointer(frame, stack_pointer); - _PyStackRef tmp = lhs; - lhs = res; - stack_pointer[-2] = lhs; - PyStackRef_CLOSE(tmp); - tmp = rhs; - rhs = PyStackRef_NULL; - stack_pointer[-1] = rhs; - PyStackRef_CLOSE(tmp); + PyStackRef_XCLOSE(value); stack_pointer = _PyFrame_GetStackPointer(frame); + } + // _POP_TOP + { + value = l; stack_pointer += -1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); } DISPATCH(); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 6469deb238f5b0..904d546e526eb7 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -16594,12 +16594,14 @@ break; } - case _BINARY_OP_r21: { + case _BINARY_OP_r23: { CHECK_CURRENT_CACHED_VALUES(2); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); _PyStackRef rhs; _PyStackRef lhs; _PyStackRef res; + _PyStackRef l; + _PyStackRef r; _PyStackRef _stack_item_0 = _tos_cache0; _PyStackRef _stack_item_1 = _tos_cache1; oparg = CURRENT_OPARG(); @@ -16620,23 +16622,13 @@ JUMP_TO_ERROR(); } res = PyStackRef_FromPyObjectSteal(res_o); - _PyFrame_SetStackPointer(frame, stack_pointer); - _PyStackRef tmp = lhs; - lhs = res; - stack_pointer[-2] = lhs; - PyStackRef_CLOSE(tmp); - tmp = rhs; - rhs = PyStackRef_NULL; - stack_pointer[-1] = rhs; - PyStackRef_CLOSE(tmp); - stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer += -1; - ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + l = lhs; + r = rhs; + _tos_cache2 = r; + _tos_cache1 = l; _tos_cache0 = res; - _tos_cache1 = PyStackRef_ZERO_BITS; - _tos_cache2 = PyStackRef_ZERO_BITS; - SET_CURRENT_CACHED_VALUES(1); - stack_pointer += -1; + SET_CURRENT_CACHED_VALUES(3); + stack_pointer += -2; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); assert(WITHIN_STACK_BOUNDS_IGNORING_CACHE()); break; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index b5ae600c095e67..ad9199902dbff8 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -32,6 +32,9 @@ _PyStackRef lhs; _PyStackRef rhs; _PyStackRef res; + _PyStackRef l; + _PyStackRef r; + _PyStackRef value; // _SPECIALIZE_BINARY_OP { rhs = stack_pointer[-1]; @@ -65,18 +68,26 @@ JUMP_TO_LABEL(error); } res = PyStackRef_FromPyObjectSteal(res_o); + l = lhs; + r = rhs; + } + // _POP_TOP + { + value = r; + stack_pointer[-2] = res; + stack_pointer[-1] = l; _PyFrame_SetStackPointer(frame, stack_pointer); - _PyStackRef tmp = lhs; - lhs = res; - stack_pointer[-2] = lhs; - PyStackRef_CLOSE(tmp); - tmp = rhs; - rhs = PyStackRef_NULL; - stack_pointer[-1] = rhs; - PyStackRef_CLOSE(tmp); + PyStackRef_XCLOSE(value); stack_pointer = _PyFrame_GetStackPointer(frame); + } + // _POP_TOP + { + value = l; stack_pointer += -1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); } DISPATCH(); } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 012fe16bfd9096..4802da1a71b34f 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -3605,6 +3605,8 @@ JitOptRef rhs; JitOptRef lhs; JitOptRef res; + JitOptRef l; + JitOptRef r; rhs = stack_pointer[-1]; lhs = stack_pointer[-2]; if ( @@ -3616,6 +3618,8 @@ _PyStackRef lhs = sym_get_const_as_stackref(ctx, lhs_sym); _PyStackRef rhs = sym_get_const_as_stackref(ctx, rhs_sym); _PyStackRef res_stackref; + _PyStackRef l_stackref; + _PyStackRef r_stackref; /* Start of uop copied from bytecodes for constant evaluation */ PyObject *lhs_o = PyStackRef_AsPyObjectBorrow(lhs); PyObject *rhs_o = PyStackRef_AsPyObjectBorrow(rhs); @@ -3625,21 +3629,29 @@ JUMP_TO_LABEL(error); } res_stackref = PyStackRef_FromPyObjectSteal(res_o); + l_stackref = lhs; + r_stackref = rhs; /* End of uop copied from bytecodes for constant evaluation */ + (void)l_stackref; + (void)r_stackref; res = sym_new_const_steal(ctx, PyStackRef_AsPyObjectSteal(res_stackref)); if (sym_is_const(ctx, res)) { PyObject *result = sym_get_const(ctx, res); if (_Py_IsImmortal(result)) { - // Replace with _POP_TWO_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result - ADD_OP(_POP_TWO_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); + // Replace with _INSERT_2_LOAD_CONST_INLINE_BORROW since we have two inputs and an immortal result + ADD_OP(_INSERT_2_LOAD_CONST_INLINE_BORROW, 0, (uintptr_t)result); } } - CHECK_STACK_BOUNDS(-1); + CHECK_STACK_BOUNDS(1); stack_pointer[-2] = res; - stack_pointer += -1; + stack_pointer[-1] = l; + stack_pointer[0] = r; + stack_pointer += 1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; } + l = lhs; + r = rhs; bool lhs_int = sym_matches_type(lhs, &PyLong_Type); bool rhs_int = sym_matches_type(rhs, &PyLong_Type); bool lhs_float = sym_matches_type(lhs, &PyFloat_Type); @@ -3673,9 +3685,11 @@ else { res = sym_new_type(ctx, &PyFloat_Type); } - CHECK_STACK_BOUNDS(-1); + CHECK_STACK_BOUNDS(1); stack_pointer[-2] = res; - stack_pointer += -1; + stack_pointer[-1] = l; + stack_pointer[0] = r; + stack_pointer += 1; ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; } From 25a6ae49796d13720d9cfc3ad72a9fb6caf33bf4 Mon Sep 17 00:00:00 2001 From: AN Long Date: Mon, 19 Jan 2026 02:12:20 +0900 Subject: [PATCH 3/5] Add test --- Lib/test/test_capi/test_opt.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 79c7f530b8ae89..6fd80459b7aa85 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2880,6 +2880,29 @@ def testfunc(n): self.assertIn("_POP_TOP_NOP", uops) self.assertLessEqual(count_ops(ex, "_POP_TOP"), 2) + def test_binary_op_refcount_elimination(self): + class CustomAdder: + def __init__(self, val): + self.val = val + def __add__(self, other): + return CustomAdder(self.val + other.val) + + def testfunc(n): + a = CustomAdder(1) + b = CustomAdder(2) + res = None + for _ in range(n): + res = a + b + return res.val if res else 0 + + res, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) + self.assertEqual(res, 3) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_BINARY_OP", uops) + self.assertIn("_POP_TOP_NOP", uops) + self.assertLessEqual(count_ops(ex, "_POP_TOP"), 2) + def test_remove_guard_for_slice_list(self): def f(n): for i in range(n): From 2920f08ab152207b29c5117f011301000dc482a7 Mon Sep 17 00:00:00 2001 From: AN Long Date: Mon, 19 Jan 2026 02:12:35 +0900 Subject: [PATCH 4/5] Blurb this --- .../2026-01-19-01-56-44.gh-issue-144007.1xjdBf.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2026-01-19-01-56-44.gh-issue-144007.1xjdBf.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2026-01-19-01-56-44.gh-issue-144007.1xjdBf.rst b/Misc/NEWS.d/next/Core_and_Builtins/2026-01-19-01-56-44.gh-issue-144007.1xjdBf.rst new file mode 100644 index 00000000000000..26db86fae6bf25 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2026-01-19-01-56-44.gh-issue-144007.1xjdBf.rst @@ -0,0 +1 @@ +Eliminate redundant refcounting in the JIT for ``BINARY_OP``. From 90a42db82d5db22ec6773a36c84337b51ecd3f4d Mon Sep 17 00:00:00 2001 From: AN Long Date: Mon, 19 Jan 2026 02:43:31 +0900 Subject: [PATCH 5/5] Fix crash --- Python/optimizer_bytecodes.c | 2 +- Python/optimizer_cases.c.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 773102e79e6774..7091c575d12c41 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -211,9 +211,9 @@ dummy_func(void) { } op(_BINARY_OP, (lhs, rhs -- res, l, r)) { - REPLACE_OPCODE_IF_EVALUATES_PURE(lhs, rhs, res); l = lhs; r = rhs; + REPLACE_OPCODE_IF_EVALUATES_PURE(lhs, rhs, res); bool lhs_int = sym_matches_type(lhs, &PyLong_Type); bool rhs_int = sym_matches_type(rhs, &PyLong_Type); bool lhs_float = sym_matches_type(lhs, &PyFloat_Type); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 4802da1a71b34f..f0b4801a937f35 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -3609,6 +3609,8 @@ JitOptRef r; rhs = stack_pointer[-1]; lhs = stack_pointer[-2]; + l = lhs; + r = rhs; if ( sym_is_safe_const(ctx, lhs) && sym_is_safe_const(ctx, rhs) @@ -3650,8 +3652,6 @@ ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; } - l = lhs; - r = rhs; bool lhs_int = sym_matches_type(lhs, &PyLong_Type); bool rhs_int = sym_matches_type(rhs, &PyLong_Type); bool lhs_float = sym_matches_type(lhs, &PyFloat_Type);