From 4cb1ebfbf88b3443ac10f5b002260d89042c8052 Mon Sep 17 00:00:00 2001
From: Fabio Niephaus <fabio.niephaus@oracle.com>
Date: Tue, 13 Jan 2026 20:28:25 +0100
Subject: [PATCH 1/5] Use `XSum` port for `math.fsum()`.

---
 .../builtins/modules/MathModuleBuiltins.java  | 116 +---
 .../com/oracle/graal/python/util/XSum.java    | 624 ++++++++++++++++++
 mx.graalpython/copyrights/overrides           |   1 +
 mx.graalpython/copyrights/xsum.copyright.star |  22 +
 .../copyrights/xsum.copyright.star.regex      |  23 +
 5 files changed, 680 insertions(+), 106 deletions(-)
 create mode 100644 graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/XSum.java
 create mode 100644 mx.graalpython/copyrights/xsum.copyright.star
 create mode 100644 mx.graalpython/copyrights/xsum.copyright.star.regex

diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java
index 532614470f..53baea66cf 100644
--- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java
+++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2025, Oracle and/or its affiliates.
+ * Copyright (c) 2017, 2026, Oracle and/or its affiliates.
  * Copyright (c) 2014, Regents of the University of California
  *
  * All rights reserved.
@@ -34,7 +34,6 @@
 import java.math.BigDecimal;
 import java.math.BigInteger;
 import java.math.MathContext;
-import java.util.Arrays;
 import java.util.List;
 
 import com.oracle.graal.python.PythonLanguage;
@@ -91,6 +90,7 @@
 import com.oracle.graal.python.runtime.exception.PException;
 import com.oracle.graal.python.runtime.object.PFactory;
 import com.oracle.graal.python.util.OverflowException;
+import com.oracle.graal.python.util.XSum;
 import com.oracle.truffle.api.CompilerDirectives;
 import com.oracle.truffle.api.CompilerDirectives.CompilationFinal;
 import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
@@ -868,7 +868,6 @@ protected ArgumentClinicProvider getArgumentClinic() {
     @Builtin(name = "fsum", minNumOfPositionalArgs = 1)
     @GenerateNodeFactory
     public abstract static class FsumNode extends PythonUnaryBuiltinNode {
-
         @Specialization
         static double doIt(VirtualFrame frame, Object iterable,
                         @Bind Node inliningTarget,
@@ -877,119 +876,24 @@ static double doIt(VirtualFrame frame, Object iterable,
                         @Cached PyFloatAsDoubleNode asDoubleNode,
                         @Cached InlinedLoopConditionProfile loopProfile,
                         @Cached PRaiseNode raiseNode) {
-            /*
-             * This implementation is taken from CPython. The performance is not good. Should be
-             * faster. It can be easily replace with much simpler code based on BigDecimal:
-             *
-             * BigDecimal result = BigDecimal.ZERO;
-             *
-             * in cycle just: result = result.add(BigDecimal.valueof(x); ... The current
-             * implementation is little bit faster. The testFSum in test_math.py takes in different
-             * implementations: CPython ~0.6s CurrentImpl: ~14.3s Using BigDecimal: ~15.1
-             */
             Object iterator = getIter.execute(frame, inliningTarget, iterable);
-            double x, y, t, hi, lo = 0, yr, inf_sum = 0, special_sum = 0, sum;
-            double xsave;
-            int i, j, n = 0, arayLength = 32;
-            double[] p = new double[arayLength];
+
             boolean exhausted = false;
+            var acc = new XSum.SmallAccumulator();
             while (loopProfile.profile(inliningTarget, !exhausted)) {
                 try {
                     Object next = nextNode.execute(frame, inliningTarget, iterator);
-                    x = asDoubleNode.execute(frame, inliningTarget, next);
-                    xsave = x;
-                    for (i = j = 0; j < n; j++) { /* for y in partials */
-                        y = p[j];
-                        if (Math.abs(x) < Math.abs(y)) {
-                            t = x;
-                            x = y;
-                            y = t;
-                        }
-                        hi = x + y;
-                        yr = hi - x;
-                        lo = y - yr;
-                        if (lo != 0.0) {
-                            p[i++] = lo;
-                        }
-                        x = hi;
-                    }
-
-                    n = i;
-                    if (x != 0.0) {
-                        if (!Double.isFinite(x)) {
-                            /*
-                             * a nonfinite x could arise either as a result of intermediate
-                             * overflow, or as a result of a nan or inf in the summands
-                             */
-                            if (Double.isFinite(xsave)) {
-                                throw raiseNode.raise(inliningTarget, OverflowError, ErrorMessages.INTERMEDIATE_OVERFLOW_IN, "fsum");
-                            }
-                            if (Double.isInfinite(xsave)) {
-                                inf_sum += xsave;
-                            }
-                            special_sum += xsave;
-                            /* reset partials */
-                            n = 0;
-                        } else if (n >= arayLength) {
-                            arayLength += arayLength;
-                            p = Arrays.copyOf(p, arayLength);
-                        } else {
-                            p[n++] = x;
-                        }
-                    }
+                    acc.add(asDoubleNode.execute(frame, inliningTarget, next));
                 } catch (IteratorExhausted e) {
                     exhausted = true;
                 }
             }
-
-            if (special_sum != 0.0) {
-                if (Double.isNaN(inf_sum)) {
-                    throw raiseNode.raise(inliningTarget, ValueError, ErrorMessages.NEG_INF_PLUS_INF_IN);
-                } else {
-                    sum = special_sum;
-                    return sum;
-                }
-            }
-
-            hi = 0.0;
-            if (n > 0) {
-                hi = p[--n];
-                /*
-                 * sum_exact(ps, hi) from the top, stop when the sum becomes inexact.
-                 */
-                while (n > 0) {
-                    x = hi;
-                    y = p[--n];
-                    assert (Math.abs(y) < Math.abs(x));
-                    hi = x + y;
-                    yr = hi - x;
-                    lo = y - yr;
-                    if (lo != 0.0) {
-                        break;
-                    }
-                }
-                /*
-                 * Make half-even rounding work across multiple partials. Needed so that sum([1e-16,
-                 * 1, 1e16]) will round-up the last digit to two instead of down to zero (the 1e-16
-                 * makes the 1 slightly closer to two). With a potential 1 ULP rounding error
-                 * fixed-up, math.fsum() can guarantee commutativity.
-                 */
-                if (n > 0 && ((lo < 0.0 && p[n - 1] < 0.0) ||
-                                (lo > 0.0 && p[n - 1] > 0.0))) {
-                    y = lo * 2.0;
-                    x = hi + y;
-                    yr = x - hi;
-                    if (compareAsBigDecimal(y, yr) == 0) {
-                        hi = x;
-                    }
-                }
+            double result = acc.round();
+            if (Double.isNaN(result)) {
+                throw raiseNode.raise(inliningTarget, ValueError, ErrorMessages.NEG_INF_PLUS_INF_IN);
+            } else {
+                return result;
             }
-            return hi;
-        }
-
-        @TruffleBoundary
-        private static int compareAsBigDecimal(double y, double yr) {
-            return BigDecimal.valueOf(y).compareTo(BigDecimal.valueOf(yr));
         }
     }
 
diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/XSum.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/XSum.java
new file mode 100644
index 0000000000..5dc7d18648
--- /dev/null
+++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/XSum.java
@@ -0,0 +1,624 @@
+/* Copyright 2024, 2026 Oracle and/or its affiliates.
+   Copyright 2015, 2018, 2021, 2024 Radford M. Neal
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+   LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+   OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+   WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+package com.oracle.graal.python.util;
+
+/**
+ * Implementation of exact summation of double numbers based on
+ * <a href="https://gitlab.com/radfordneal/xsum">xsum</a>.
+ */
+public class XSum {
+
+    // CONSTANTS DEFINING THE FLOATING POINT FORMAT
+
+    /**
+     * Bits in fp mantissa, excludes implicit 1.
+     */
+    private static final int XSUM_MANTISSA_BITS = 52;
+
+    /**
+     * Bits in fp exponent.
+     */
+    private static final int XSUM_EXP_BITS = 11;
+
+    /**
+     * Mask for mantissa bits.
+     */
+    private static final long XSUM_MANTISSA_MASK = (1L << XSUM_MANTISSA_BITS) - 1;
+
+    /**
+     * Mask for exponent.
+     */
+    private static final long XSUM_EXP_MASK = (1 << XSUM_EXP_BITS) - 1;
+
+    /**
+     * Bias added to signed exponent.
+     */
+    private static final int XSUM_EXP_BIAS = (1 << (XSUM_EXP_BITS - 1)) - 1;
+
+    /**
+     * Position of sign bit.
+     */
+    private static final int XSUM_SIGN_BIT = XSUM_MANTISSA_BITS + XSUM_EXP_BITS;
+
+    /**
+     * Mask for sign bit.
+     */
+    private static final long XSUM_SIGN_MASK = 1L << XSUM_SIGN_BIT;
+
+    // CONSTANTS DEFINING THE SMALL ACCUMULATOR FORMAT
+
+    /**
+     * Bits in chunk of the small accumulator.
+     */
+    private static final int XSUM_SCHUNK_BITS = 64;
+
+    /**
+     * Number of low bits of exponent, in one chunk.
+     */
+    private static final int XSUM_LOW_EXP_BITS = 5;
+
+    /**
+     * Mask for low-order exponent bits.
+     */
+    private static final int XSUM_LOW_EXP_MASK = (1 << XSUM_LOW_EXP_BITS) - 1;
+
+    /**
+     * Number of high exponent bits for index.
+     */
+    private static final int XSUM_HIGH_EXP_BITS = XSUM_EXP_BITS - XSUM_LOW_EXP_BITS;
+
+    /**
+     * Number of chunks in the small accumulator.
+     */
+    private static final int XSUM_SCHUNKS = (1 << XSUM_HIGH_EXP_BITS) + 3;
+
+    /**
+     * Bits in low part of mantissa.
+     */
+    private static final int XSUM_LOW_MANTISSA_BITS = 1 << XSUM_LOW_EXP_BITS;
+
+    /**
+     * Mask for low bits.
+     */
+    private static final long XSUM_LOW_MANTISSA_MASK = (1L << XSUM_LOW_MANTISSA_BITS) - 1;
+
+    /**
+     * Bits sums can carry into.
+     */
+    private static final int XSUM_SMALL_CARRY_BITS = (XSUM_SCHUNK_BITS - 1) - XSUM_MANTISSA_BITS;
+
+    /**
+     * Number of terms that can added before the propagation is needed.
+     */
+    private static final int XSUM_SMALL_CARRY_TERMS = (1 << XSUM_SMALL_CARRY_BITS) - 1;
+
+    public static class SmallAccumulator {
+        // Chunks making up small accumulator
+        private long[] chunk = new long[XSUM_SCHUNKS];
+        // If non-zero, +Inf, -Inf, or NaN
+        private long inf;
+        // If non-zero, a NaN value with payload
+        private long nan;
+        // Number of remaining adds before carry propagation must be done again
+        private int addsUntilPropagate = XSUM_SMALL_CARRY_TERMS;
+
+        /**
+         * Add an array of floating-point numbers to a small accumulator. Mixes calls of
+         * carryPropagate with calls of add1NoCarry.
+         */
+        public void addArray(double[] vec) {
+            int n = vec.length; // number of elements not added yet
+            int pos = 0;
+            while (n > 0) {
+                if (addsUntilPropagate == 0) {
+                    carryPropagate();
+                }
+                int m = (n <= addsUntilPropagate) ? n : addsUntilPropagate;
+                int nextPos = pos + m;
+                for (int i = pos; i < nextPos; i++) {
+                    add1NoCarry(vec[i]);
+                }
+                addsUntilPropagate -= m;
+                pos = nextPos;
+                n -= m;
+            }
+        }
+
+        /**
+         * Add one double to a small accumulator. This is equivalent to, but somewhat faster than,
+         * calling addArray() with a vector of one value.
+         */
+        public void add(double value) {
+            if (addsUntilPropagate == 0) {
+                carryPropagate();
+            }
+
+            add1NoCarry(value);
+
+            addsUntilPropagate--;
+        }
+
+        /**
+         * Propagate carries to next chunk in a small accumulator. Needs to be called often enough
+         * that accumulated carries don't overflow out the top, as indicated by
+         * {@code addsUntilPropagate}. Returns the index of the uppermost non-zero chunk (0 if
+         * number is zero).
+         *
+         * After carry propagation, the uppermost non-zero chunk will indicate the sign of the
+         * number, and will not be -1 (all 1s). It will be in the range -2^XSUM_LOW_MANTISSA_BITS to
+         * 2^XSUM_LOW_MANTISSA_BITS - 1. Lower chunks will be non-negative, and in the range from 0
+         * up to 2^XSUM_LOW_MANTISSA_BITS - 1.
+         */
+        private int carryPropagate() {
+            int u;
+            int uix;
+
+            done: {
+                // Set u to the index of the uppermost non-zero (for now) chunk, or
+                // return with value 0 if there is none.
+
+                for (u = XSUM_SCHUNKS - 1; chunk[u] == 0; u--) {
+                    if (u == 0) {
+                        uix = 0;
+                        break done;
+                    }
+                }
+
+                // Carry propagate, starting at the low-order chunks. Note that the
+                // loop limit of u may be increased inside the loop.
+
+                uix = -1; // indicates that a non-zero chunk has not been found yet
+
+                int i = 0; // set to the index of the next non-zero chunck, from bottom
+
+                // Quickly skip over unused low-order chunks. Done here at the start
+                // on the theory that there are often many unused low-order chunks,
+                // justifying some overhead to begin, but later stretches of unused
+                // chunks may not be as large.
+
+                int e = u - 3; // go only to 3 before so won't access beyond chunk array
+                do {
+                    if ((chunk[i] | chunk[i + 1] | chunk[i + 2] | chunk[i + 3]) != 0) {
+                        break;
+                    }
+                    i += 4;
+                } while (i <= e);
+
+                do {
+                    long c; // Set to the chunk at index i (next non-zero one)
+
+                    // Find the next non-zero chunk, setting i to its index, or break out
+                    // of loop if there is none. Note that the chunk at index u is not
+                    // necessarily non-zero - it was initially, but u or the chunk at u
+                    // may have changed.
+
+                    do {
+                        c = chunk[i];
+                        if (c != 0) {
+                            break;
+                        }
+                        i += 1;
+                    } while (i <= u);
+
+                    if (c == 0) {
+                        break;
+                    }
+
+                    // Propagate possible carry from this chunk to next chunk up.
+
+                    long chigh = c >> XSUM_LOW_MANTISSA_BITS;
+                    if (chigh == 0) {
+                        uix = i;
+                        i += 1;
+                        continue;  // no need to change this chunk
+                    }
+
+                    if (u == i) {
+                        if (chigh == -1) {
+                            uix = i;
+                            break; // don't propagate -1 into the region of all zeros above
+                        }
+                        u = i + 1; // we will change chunk[u+1], so we'll need to look at it
+                    }
+
+                    long clow = c & XSUM_LOW_MANTISSA_MASK;
+                    if (clow != 0) {
+                        uix = i;
+                    }
+
+                    // We now change chunk[i] and add to chunk[i+1]. Note that i+1 should be
+                    // in range (no bigger than XSUM_CHUNKS-1) if summing memory, since
+                    // the number of chunks is big enough to hold any sum, and we do not
+                    // store redundant chunks with values 0 or -1 above previously non-zero
+                    // chunks. But other add operations might cause overflow, in which
+                    // case we produce a NaN with all 1s as payload. (We can't reliably produce
+                    // an Inf of the right sign.)
+
+                    chunk[i] = clow;
+                    if (i + 1 >= XSUM_SCHUNKS) {
+                        addInfNan((XSUM_EXP_MASK << XSUM_MANTISSA_BITS) | XSUM_MANTISSA_MASK);
+                        u = i;
+                    } else {
+                        chunk[i + 1] += chigh;
+                    }
+
+                    i += 1;
+
+                } while (i <= u);
+
+                // Check again for the number being zero, since carry propagation might
+                // have created zero from something that initially looked non-zero. */
+
+                if (uix < 0) {
+                    uix = 0;
+                    break done;
+                }
+
+                // While the uppermost chunk is negative, with value -1, combine it with
+                // the chunk below (if there is one) to produce the same number but with
+                // one fewer non-zero chunks.
+
+                while (chunk[uix] == -1 && uix > 0) {
+                    // Left shift of a negative number is undefined according to the standard,
+                    // so do a multiply - it's all presumably constant-folded by the compiler.
+                    chunk[uix - 1] += (-1L) * (1L << XSUM_LOW_MANTISSA_BITS);
+                    chunk[uix] = 0;
+                    uix -= 1;
+                }
+            }
+
+            // We can now add one less than the total allowed terms before the next carry propagate.
+
+            addsUntilPropagate = XSUM_SMALL_CARRY_TERMS - 1;
+
+            // Return index of uppermost non-zero chunk.
+
+            return uix;
+        }
+
+        /**
+         * Add one number to a small accumulator assuming no carry propagation is required.
+         */
+        private void add1NoCarry(double value) {
+            // Extract exponent and mantissa. Split exponent into high and low parts.
+            long ivalue = Double.doubleToRawLongBits(value);
+            int exp = (int) ((ivalue >> XSUM_MANTISSA_BITS) & XSUM_EXP_MASK);
+            long mantissa = ivalue & XSUM_MANTISSA_MASK;
+            int highExp = exp >> XSUM_LOW_EXP_BITS;
+            int lowExp = exp & XSUM_LOW_EXP_MASK;
+
+            // Categorize number as normal, denormalized, or Inf/NaN according to
+            // the value of the exponent field.
+
+            if (exp == 0) { // zero or denormalized
+                // If it's a zero (positive or negative), we do nothing.
+                if (mantissa == 0) {
+                    return;
+                }
+                // Denormalized mantissa has no implicit 1, but exponent is 1 not 0.
+                exp = lowExp = 1;
+            } else if (exp == XSUM_EXP_MASK) { // Inf or NaN
+                // Just update flags in accumulator structure.
+                addInfNan(ivalue);
+                return;
+            } else { // normalized
+                // OR in implicit 1 bit at top of mantissa
+                mantissa |= 1L << XSUM_MANTISSA_BITS;
+            }
+
+            // Use high part of exponent as index of chunk, and low part of
+            // exponent to give position within chunk. Fetch the two chunks
+            // that will be modified.
+
+            // Separate mantissa into two parts, after shifting, and add to (or
+            // subtract from) this chunk and the next higher chunk (which always
+            // exists since there are three extra ones at the top).
+
+            // Note that lowMantissa will have at most XSUM_LOW_MANTISSA_BITS bits,
+            // while highMantissa will have at most XSUM_MANTISSA_BITS bits, since
+            // even though highMantissa includes the extra implicit 1 bit, it will
+            // also be shifted right by at least one bit.
+
+            long lowMantissa = (mantissa << lowExp) & XSUM_LOW_MANTISSA_MASK;
+            long highMantissa = mantissa >> (XSUM_LOW_MANTISSA_BITS - lowExp);
+
+            // Add or subtract to or from the two affected chunks.
+
+            if (ivalue < 0) {
+                chunk[highExp] -= lowMantissa;
+                chunk[highExp + 1] -= highMantissa;
+            } else {
+                chunk[highExp] += lowMantissa;
+                chunk[highExp + 1] += highMantissa;
+            }
+        }
+
+        /**
+         * Add an inf or NaN to a small accumulator. This only changes the flags, not the chunks in
+         * the accumulator, which retains the sum of the finite terms (which is perhaps sometimes
+         * useful to access, though no function to do so is defined at present). A NaN with larger
+         * payload (seen as a 52-bit unsigned integer) takes precedence, with the sign of the NaN
+         * always being positive. This ensures that the order of summing NaN values doesn't matter.
+         */
+        private void addInfNan(long ivalue) {
+            long mantissa = ivalue & XSUM_MANTISSA_MASK;
+
+            if (mantissa == 0) { // Inf
+                if (inf == 0) { // no previous Inf
+                    inf = ivalue;
+                } else if (inf != ivalue) { // previous Inf was opposite sign
+                    inf = Double.doubleToRawLongBits(Double.NaN); // result will be a NaN
+                }
+            } else { // NaN
+                // Choose the NaN with the bigger payload and clear its sign.
+                // Using <= ensures that we will choose the first NaN over the previous zero.
+                if ((nan & XSUM_MANTISSA_MASK) <= mantissa) {
+                    nan = ivalue & ~XSUM_SIGN_MASK;
+                }
+            }
+        }
+
+        /**
+         * Return the result of rounding a small accumulator. The rounding mode is to nearest, with
+         * ties to even. The small accumulator may be modified by this operation (by carry
+         * propagation being done), but the value it represents should not change.
+         */
+        public double round() {
+            long intv;
+
+            // See if we have a NaN from one of the numbers being a NaN, in
+            // which case we return the NaN with largest payload, or an infinite
+            // result (+Inf, -Inf, or a NaN if both +Inf and -Inf occurred).
+            // Note that we do NOT return NaN if we have both an infinite number
+            // and a sum of other numbers that overflows with opposite sign,
+            // since there is no real ambiguity regarding the sign in such a case.
+
+            if (nan != 0) {
+                return Double.longBitsToDouble(nan);
+            }
+
+            if (inf != 0) {
+                return Double.longBitsToDouble(inf);
+            }
+
+            // If none of the numbers summed were infinite or NaN, we proceed to
+            // propagate carries, as a preliminary to finding the magnitude of
+            // the sum. This also ensures that the sign of the result can be
+            // determined from the uppermost non-zero chunk.
+
+            // We also find the index, i, of this uppermost non-zero chunk, as
+            // the value returned by carryPropagate, and set ivalue to
+            // chunk[i]. Note that ivalue will not be 0 or -1, unless
+            // i is 0 (the lowest chunk), in which case it will be handled by
+            // the code for denormalized numbers.
+
+            int i = carryPropagate();
+
+            long ivalue = chunk[i];
+
+            // Handle a possible denormalized number, including zero.
+
+            if (i <= 1) {
+                // Check for zero value, in which case we can return immediately.
+                if (ivalue == 0) {
+                    return 0.0;
+                }
+
+                // Check if it is actually a denormalized number. It always is if only
+                // the lowest chunk is non-zero. If the highest non-zero chunk is the
+                // next-to-lowest, we check the magnitude of the absolute value.
+                // Note that the real exponent is 1 (not 0), so we need to shift right
+                // by 1 here.
+
+                if (i == 0) {
+                    intv = ivalue >= 0 ? ivalue : -ivalue;
+                    intv >>= 1;
+                    if (ivalue < 0) {
+                        intv |= XSUM_SIGN_MASK;
+                    }
+                    return Double.longBitsToDouble(intv);
+                } else {
+                    // Note: Left shift of -ve number is undefined, so do a multiply instead,
+                    // which is probably optimized to a shift.
+                    intv = ivalue * (1L << (XSUM_LOW_MANTISSA_BITS - 1)) + (chunk[0] >> 1);
+                    if (intv < 0) {
+                        if (intv > -(1L << XSUM_MANTISSA_BITS)) {
+                            intv = (-intv) | XSUM_SIGN_MASK;
+                            return Double.longBitsToDouble(intv);
+                        }
+                    } else {
+                        if (intv < 1L << XSUM_MANTISSA_BITS) {
+                            return Double.longBitsToDouble(intv);
+                        }
+                    }
+                    // otherwise, it's not actually denormalized, so fall through to below
+                }
+            }
+
+            // Find the location of the uppermost 1 bit in the absolute value of the
+            // upper chunk by converting it (as a signed integer) to a floating point
+            // value, and looking at the exponent. Then set 'more' to the number of
+            // bits from the lower chunk (and maybe the next lower) that are needed
+            // to fill out the mantissa of the result (including the top implicit 1 bit), plus two
+            // extra bits to help decide on rounding. For negative numbers, it may turn out later
+            // that we need another bit because negating a negative value may carry out of the top
+            // here, but not once more bits are shifted into the bottom later on.
+
+            intv = Double.doubleToRawLongBits(ivalue);
+            int e = (int) ((intv >> XSUM_MANTISSA_BITS) & XSUM_EXP_MASK); // e-bias is in 0..32
+            int more = 2 + XSUM_MANTISSA_BITS + XSUM_EXP_BIAS - e;
+
+            // Change 'ivalue' to put in 'more' bits from lower chunks into the bottom.
+            // Also set 'j' to the index of the lowest chunk from which these bits came,
+            // and 'lower' to the remaining bits of that chunk not now in 'ivalue'.
+            // Note that 'lower' initially has at least one bit in it, which we can
+            // later move into 'ivalue' if it turns out that one more bit is needed.
+
+            ivalue *= 1L << more;  // multiply, since << of negative undefined
+            int j = i - 1;
+            long lower = chunk[j];  // must exist, since denormalized if i==0
+            if (more >= XSUM_LOW_MANTISSA_BITS) {
+                more -= XSUM_LOW_MANTISSA_BITS;
+                ivalue += lower << more;
+                j -= 1;
+                lower = j < 0 ? 0 : chunk[j];
+            }
+            ivalue += lower >> (XSUM_LOW_MANTISSA_BITS - more);
+            lower &= (1L << (XSUM_LOW_MANTISSA_BITS - more)) - 1;
+
+            // Decide on rounding, with separate code for positive and negative values.
+
+            // At this point, 'ivalue' has the signed mantissa bits, plus two extra
+            // bits, with 'e' recording the exponent position for these within their
+            // top chunk. For positive 'ivalue', the bits in 'lower' and chunks
+            // below 'j' add to the absolute value; for negative 'ivalue' they
+            // subtract.
+
+            // After setting 'ivalue' to the tentative unsigned mantissa
+            // (shifted left 2), and 'intv' to have the correct sign, this
+            // code goes to done_rounding if it finds that just discarding lower
+            // order bits is correct, and to round_away_from_zero if instead the
+            // magnitude should be increased by one in the lowest mantissa bit. */
+
+            done_rounding: {
+                round_away_from_zero: {
+
+                    if (ivalue >= 0) { // number is positive, lower bits are added to magnitude
+                        intv = 0; // positive sign
+
+                        if ((ivalue & 2) == 0) { // extra bits are 0x
+                            break done_rounding;
+                        }
+
+                        if ((ivalue & 1) != 0) { // extra bits are 11
+                            break round_away_from_zero;
+                        }
+
+                        if ((ivalue & 4) != 0) { // low bit is 1 (odd), extra bits are 10
+                            break round_away_from_zero;
+                        }
+
+                        if (lower == 0) { // see if any lower bits are non-zero
+                            while (j > 0) {
+                                j -= 1;
+                                if (chunk[j] != 0) {
+                                    lower = 1;
+                                    break;
+                                }
+                            }
+                        }
+
+                        if (lower != 0) { // low bit 0 (even), extra bits 10, non-zero lower bits
+                            break round_away_from_zero;
+                        } else { // low bit 0 (even), extra bits 10, all lower bits 0
+                            break done_rounding;
+                        }
+                    } else { // number is negative, lower bits are subtracted from magnitude
+
+                        // Check for a negative 'ivalue' that when negated doesn't contain a full
+                        // mantissa's worth of bits, plus one to help rounding. If so, move one
+                        // more bit into 'ivalue' from 'lower' (and remove it from 'lower').
+                        // This happens when the negation of the upper part of 'ivalue' has the
+                        // form 10000... but the negation of the full 'ivalue' is not 10000...
+
+                        if (((-ivalue) & (1L << (XSUM_MANTISSA_BITS + 2))) == 0) {
+                            long pos = 1L << (XSUM_LOW_MANTISSA_BITS - 1 - more);
+                            ivalue *= 2; // note that left shift undefined if ivalue is negative
+                            if ((lower & pos) != 0) {
+                                ivalue += 1;
+                                lower &= ~pos;
+                            }
+                            e -= 1;
+                        }
+
+                        intv = XSUM_SIGN_MASK; // negative sign
+                        ivalue = -ivalue; // ivalue now contains the absolute value
+
+                        if ((ivalue & 3) == 3) { // extra bits are 11
+                            break round_away_from_zero;
+                        }
+
+                        if ((ivalue & 3) <= 1) { // extra bits are 00 or 01
+                            break done_rounding;
+                        }
+
+                        if ((ivalue & 4) == 0) { // low bit is 0 (even), extra bits are 10
+                            break done_rounding;
+                        }
+
+                        if (lower == 0) { // see if any lower bits are non-zero
+                            while (j > 0) {
+                                j -= 1;
+                                if (chunk[j] != 0) {
+                                    lower = 1;
+                                    break;
+                                }
+                            }
+                        }
+
+                        if (lower != 0) { // low bit 1 (odd), extra bits 10, non-zero lower bits
+                            break done_rounding;
+                        } else { // low bit 1 (odd), extra bits are 10, lower bits are all 0
+                            break round_away_from_zero;
+                        }
+
+                    }
+                } // round_away_from_zero:
+
+                // Round away from zero, then check for carry having propagated out the
+                // top, and shift if so.
+
+                ivalue += 4; // add 1 to low-order mantissa bit
+                if ((ivalue & (1L << (XSUM_MANTISSA_BITS + 3))) != 0) {
+                    ivalue >>= 1;
+                    e += 1;
+                }
+            } // done_rounding:
+
+            // Get rid of the bottom 2 bits that were used to decide on rounding.
+
+            ivalue >>= 2;
+
+            // Adjust to the true exponent, accounting for where this chunk is.
+
+            e += (i << XSUM_LOW_EXP_BITS) - XSUM_EXP_BIAS - XSUM_MANTISSA_BITS;
+
+            // If exponent has overflowed, change to plus or minus Inf and return.
+
+            if (e >= XSUM_EXP_MASK) {
+                intv |= XSUM_EXP_MASK << XSUM_MANTISSA_BITS;
+                return Double.longBitsToDouble(intv);
+            }
+
+            // Put exponent and mantissa into intv, which already has the sign,
+            // then return fltv.
+
+            intv += ((long) e) << XSUM_MANTISSA_BITS;
+            intv += ivalue & XSUM_MANTISSA_MASK; /* mask out the implicit 1 bit */
+
+            return Double.longBitsToDouble(intv);
+        }
+
+    }
+
+}
diff --git a/mx.graalpython/copyrights/overrides b/mx.graalpython/copyrights/overrides
index 23486ad702..d58d981571 100644
--- a/mx.graalpython/copyrights/overrides
+++ b/mx.graalpython/copyrights/overrides
@@ -696,6 +696,7 @@ graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence
 graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/SequenceStorage.java,zippy.copyright
 graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/SequenceStorageFactory.java,zippy.copyright
 graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/SequenceStoreException.java,zippy.copyright
+graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/XSum.java,xsum.copyright
 graalpython/python-libposix/src/fork_exec.c,python.copyright
 graalpython/python-venvlauncher/src/venvlauncher.c,python.copyright
 graalpython/lib-graalpython/modules/graalpy-config.py,python.copyright
diff --git a/mx.graalpython/copyrights/xsum.copyright.star b/mx.graalpython/copyrights/xsum.copyright.star
new file mode 100644
index 0000000000..9f0e45e18d
--- /dev/null
+++ b/mx.graalpython/copyrights/xsum.copyright.star
@@ -0,0 +1,22 @@
+/* Copyright 2024, 2026 Oracle and/or its affiliates. All rights reserved.
+   Copyright 2015, 2018, 2021, 2024 Radford M. Neal
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+   LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+   OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+   WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
diff --git a/mx.graalpython/copyrights/xsum.copyright.star.regex b/mx.graalpython/copyrights/xsum.copyright.star.regex
new file mode 100644
index 0000000000..8af4cd2a3b
--- /dev/null
+++ b/mx.graalpython/copyrights/xsum.copyright.star.regex
@@ -0,0 +1,23 @@
+/\* Copyright (20[0-9][0-9], )*(20[0-9][0-9]) Oracle and/or its affiliates.
+   Copyright 2015, 2018, 2021, 2024 Radford M. Neal
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files \(the
+   "Software"\), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+   LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+   OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+   WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+\*/
+.*

From af9fa663c6c1c93adec7906f2b479fac41446e32 Mon Sep 17 00:00:00 2001
From: Fabio Niephaus <fabio.niephaus@oracle.com>
Date: Thu, 15 Jan 2026 10:48:42 +0100
Subject: [PATCH 2/5] Adapt `XSum` to CPython semantics.

---
 .../builtins/modules/MathModuleBuiltins.java  | 20 +++++++++--
 .../com/oracle/graal/python/util/XSum.java    | 36 +++++++++++++++----
 2 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java
index 53baea66cf..ba3a5e7e55 100644
--- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java
+++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java
@@ -888,9 +888,25 @@ static double doIt(VirtualFrame frame, Object iterable,
                     exhausted = true;
                 }
             }
+
+            if (acc.isNaNResult()) {
+                return Double.NaN;
+            }
+
+            if (acc.isInfiniteResult()) {
+                double result = acc.getInfiniteResult();
+                if (Double.isNaN(result)) {
+                    throw raiseNode.raise(inliningTarget, ValueError, ErrorMessages.NEG_INF_PLUS_INF_IN);
+                } else {
+                    assert Double.isInfinite(result);
+                    return result;
+                }
+            }
+
             double result = acc.round();
-            if (Double.isNaN(result)) {
-                throw raiseNode.raise(inliningTarget, ValueError, ErrorMessages.NEG_INF_PLUS_INF_IN);
+            // +Inf or -Inf if exponent has overflowed
+            if (Double.isInfinite(result)) {
+                throw raiseNode.raise(inliningTarget, OverflowError, ErrorMessages.INTERMEDIATE_OVERFLOW_IN, "fsum");
             } else {
                 return result;
             }
diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/XSum.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/XSum.java
index 5dc7d18648..535ec7c1db 100644
--- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/XSum.java
+++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/XSum.java
@@ -378,6 +378,28 @@ private void addInfNan(long ivalue) {
             }
         }
 
+        /**
+         * Return true if one of the numbers were a NaN.
+         */
+        public boolean isNaNResult() {
+            return nan != 0;
+        }
+
+        /**
+         * Return true if +Inf, -Inf, or both occurred.
+         */
+        public boolean isInfiniteResult() {
+            return inf != 0;
+        }
+
+        /**
+         * Return +Inf, -Inf, or a NaN if both +Inf and -Inf occurred.
+         */
+        public double getInfiniteResult() {
+            assert isInfiniteResult();
+            return Double.longBitsToDouble(inf);
+        }
+
         /**
          * Return the result of rounding a small accumulator. The rounding mode is to nearest, with
          * ties to even. The small accumulator may be modified by this operation (by carry
@@ -393,13 +415,15 @@ public double round() {
             // and a sum of other numbers that overflows with opposite sign,
             // since there is no real ambiguity regarding the sign in such a case.
 
-            if (nan != 0) {
-                return Double.longBitsToDouble(nan);
-            }
+            assert !isNaNResult() : "isNaNResult() must be handled before calling round()";
+            // if (nan != 0) {
+            // return Double.longBitsToDouble(nan);
+            // }
 
-            if (inf != 0) {
-                return Double.longBitsToDouble(inf);
-            }
+            assert !isInfiniteResult() : "isInfiniteResult() must be handled before calling round()";
+            // if (inf != 0) {
+            // return Double.longBitsToDouble(inf);
+            // }
 
             // If none of the numbers summed were infinite or NaN, we proceed to
             // propagate carries, as a preliminary to finding the magnitude of

From e38a8540d278371358b8d93bbf8f873739fd9ef1 Mon Sep 17 00:00:00 2001
From: Fabio Niephaus <fabio.niephaus@oracle.com>
Date: Wed, 14 Jan 2026 09:33:14 +0100
Subject: [PATCH 3/5] Replace profiling with loop count reporting.

---
 .../python/builtins/modules/MathModuleBuiltins.java   | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java
index ba3a5e7e55..b14c03e7b2 100644
--- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java
+++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java
@@ -108,6 +108,7 @@
 import com.oracle.truffle.api.dsl.Specialization;
 import com.oracle.truffle.api.dsl.TypeSystemReference;
 import com.oracle.truffle.api.frame.VirtualFrame;
+import com.oracle.truffle.api.nodes.LoopNode;
 import com.oracle.truffle.api.nodes.Node;
 import com.oracle.truffle.api.profiles.InlinedConditionProfile;
 import com.oracle.truffle.api.profiles.InlinedLoopConditionProfile;
@@ -874,18 +875,20 @@ static double doIt(VirtualFrame frame, Object iterable,
                         @Cached PyObjectGetIter getIter,
                         @Cached PyIterNextNode nextNode,
                         @Cached PyFloatAsDoubleNode asDoubleNode,
-                        @Cached InlinedLoopConditionProfile loopProfile,
                         @Cached PRaiseNode raiseNode) {
             Object iterator = getIter.execute(frame, inliningTarget, iterable);
 
-            boolean exhausted = false;
             var acc = new XSum.SmallAccumulator();
-            while (loopProfile.profile(inliningTarget, !exhausted)) {
+            int nbrIter = 0;
+            while (true) {
                 try {
                     Object next = nextNode.execute(frame, inliningTarget, iterator);
+                    nbrIter++;
                     acc.add(asDoubleNode.execute(frame, inliningTarget, next));
                 } catch (IteratorExhausted e) {
-                    exhausted = true;
+                    break;
+                } finally {
+                    LoopNode.reportLoopCount(inliningTarget, nbrIter);
                 }
             }
 

From 1c65a1134035cbef096ec4e724df89e9cb971c25 Mon Sep 17 00:00:00 2001
From: Fabio Niephaus <fabio.niephaus@oracle.com>
Date: Wed, 14 Jan 2026 11:15:44 +0100
Subject: [PATCH 4/5] Inline `PyIterNextNode` for interp performance.

---
 .../builtins/modules/MathModuleBuiltins.java  | 21 +++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java
index b14c03e7b2..d2bbb89d03 100644
--- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java
+++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java
@@ -49,6 +49,9 @@
 import com.oracle.graal.python.builtins.objects.ints.IntBuiltins;
 import com.oracle.graal.python.builtins.objects.ints.PInt;
 import com.oracle.graal.python.builtins.objects.tuple.PTuple;
+import com.oracle.graal.python.builtins.objects.type.TpSlots;
+import com.oracle.graal.python.builtins.objects.type.slots.TpSlot;
+import com.oracle.graal.python.builtins.objects.type.slots.TpSlotIterNext;
 import com.oracle.graal.python.lib.IteratorExhausted;
 import com.oracle.graal.python.lib.PyBoolCheckNode;
 import com.oracle.graal.python.lib.PyFloatAsDoubleNode;
@@ -869,24 +872,38 @@ protected ArgumentClinicProvider getArgumentClinic() {
     @Builtin(name = "fsum", minNumOfPositionalArgs = 1)
     @GenerateNodeFactory
     public abstract static class FsumNode extends PythonUnaryBuiltinNode {
+
+        /**
+         * Note: this specialization uses an inlined version of {@link PyIterNextNode} with the
+         * tp_iternext slot moved out of the loop.
+         */
         @Specialization
         static double doIt(VirtualFrame frame, Object iterable,
                         @Bind Node inliningTarget,
                         @Cached PyObjectGetIter getIter,
-                        @Cached PyIterNextNode nextNode,
+                        @Cached GetClassNode nextNodeGetClassNode,
+                        @Cached TpSlots.GetCachedTpSlotsNode nextNodeGetSlots,
+                        @Cached TpSlotIterNext.CallSlotTpIterNextNode nextNodeCallNext,
+                        @Cached IsBuiltinObjectProfile nextNodeStopIterationProfile,
                         @Cached PyFloatAsDoubleNode asDoubleNode,
                         @Cached PRaiseNode raiseNode) {
             Object iterator = getIter.execute(frame, inliningTarget, iterable);
 
+            TpSlot tpIternext = nextNodeGetSlots.execute(inliningTarget, nextNodeGetClassNode.execute(inliningTarget, iterator)).tp_iternext();
+            assert tpIternext != null;
+
             var acc = new XSum.SmallAccumulator();
             int nbrIter = 0;
             while (true) {
                 try {
-                    Object next = nextNode.execute(frame, inliningTarget, iterator);
+                    Object next = nextNodeCallNext.execute(frame, inliningTarget, tpIternext, iterator);
                     nbrIter++;
                     acc.add(asDoubleNode.execute(frame, inliningTarget, next));
                 } catch (IteratorExhausted e) {
                     break;
+                } catch (PException e) {
+                    e.expectStopIteration(inliningTarget, nextNodeStopIterationProfile);
+                    break;
                 } finally {
                     LoopNode.reportLoopCount(inliningTarget, nbrIter);
                 }

From f2ee1781df7b638f339cb3ae24b6c7ad3596c0f1 Mon Sep 17 00:00:00 2001
From: Fabio Niephaus <fabio.niephaus@oracle.com>
Date: Wed, 14 Jan 2026 11:48:47 +0100
Subject: [PATCH 5/5] Ensure loopCounts never overflow.

---
 .../builtins/modules/BuiltinFunctions.java    | 24 ++++++++++++-------
 .../builtins/modules/MathModuleBuiltins.java  |  8 ++++---
 .../functools/FunctoolsModuleBuiltins.java    | 12 +++++-----
 .../objects/common/HashingStorageNodes.java   | 22 ++++++++---------
 .../objects/dict/DictReprBuiltin.java         | 12 +++++-----
 .../objects/dict/DictViewBuiltins.java        | 15 +++++++-----
 .../oracle/graal/python/util/PythonUtils.java |  2 +-
 7 files changed, 53 insertions(+), 42 deletions(-)

diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/BuiltinFunctions.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/BuiltinFunctions.java
index 1900f38741..4afa9c565f 100644
--- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/BuiltinFunctions.java
+++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/BuiltinFunctions.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, 2025, Oracle and/or its affiliates.
+ * Copyright (c) 2017, 2026, Oracle and/or its affiliates.
  * Copyright (c) 2013, Regents of the University of California
  *
  * All rights reserved.
@@ -498,19 +498,21 @@ static boolean doObject(VirtualFrame frame, Object object,
                         @Cached PyIterNextNode nextNode,
                         @Cached PyObjectIsTrueNode isTrueNode) {
             Object iterator = getIter.execute(frame, inliningTarget, object);
-            int nbrIter = 0;
+            int loopCount = 0;
 
             while (true) {
                 try {
                     Object next = nextNode.execute(frame, inliningTarget, iterator);
-                    nbrIter++;
+                    if (CompilerDirectives.hasNextTier() && loopCount < Integer.MAX_VALUE) {
+                        loopCount++;
+                    }
                     if (!isTrueNode.execute(frame, next)) {
                         return false;
                     }
                 } catch (IteratorExhausted e) {
                     break;
                 } finally {
-                    LoopNode.reportLoopCount(inliningTarget, nbrIter);
+                    LoopNode.reportLoopCount(inliningTarget, loopCount);
                 }
             }
 
@@ -549,19 +551,21 @@ static boolean doObject(VirtualFrame frame, Object object,
                         @Cached PyIterNextNode nextNode,
                         @Cached PyObjectIsTrueNode isTrueNode) {
             Object iterator = getIter.execute(frame, inliningTarget, object);
-            int nbrIter = 0;
+            int loopCount = 0;
 
             while (true) {
                 try {
                     Object next = nextNode.execute(frame, inliningTarget, iterator);
-                    nbrIter++;
+                    if (CompilerDirectives.hasNextTier() && loopCount < Integer.MAX_VALUE) {
+                        loopCount++;
+                    }
                     if (isTrueNode.execute(frame, next)) {
                         return true;
                     }
                 } catch (IteratorExhausted e) {
                     break;
                 } finally {
-                    LoopNode.reportLoopCount(inliningTarget, nbrIter);
+                    LoopNode.reportLoopCount(inliningTarget, loopCount);
                 }
             }
 
@@ -1563,11 +1567,13 @@ static Object minmaxSequenceWithKey(VirtualFrame frame, Node inliningTarget, Obj
                         currentKey = nextKey;
                         currentValue = nextValue;
                     }
-                    loopCount++;
+                    if (CompilerDirectives.hasNextTier() && loopCount < Integer.MAX_VALUE) {
+                        loopCount++;
+                    }
                 } catch (IteratorExhausted e) {
                     break;
                 } finally {
-                    LoopNode.reportLoopCount(inliningTarget, loopCount < 0 ? Integer.MAX_VALUE : loopCount);
+                    LoopNode.reportLoopCount(inliningTarget, loopCount);
                 }
             }
 
diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java
index d2bbb89d03..f65249f7a2 100644
--- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java
+++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java
@@ -893,11 +893,13 @@ static double doIt(VirtualFrame frame, Object iterable,
             assert tpIternext != null;
 
             var acc = new XSum.SmallAccumulator();
-            int nbrIter = 0;
+            int loopCount = 0;
             while (true) {
                 try {
                     Object next = nextNodeCallNext.execute(frame, inliningTarget, tpIternext, iterator);
-                    nbrIter++;
+                    if (CompilerDirectives.hasNextTier() && loopCount < Integer.MAX_VALUE) {
+                        loopCount++;
+                    }
                     acc.add(asDoubleNode.execute(frame, inliningTarget, next));
                 } catch (IteratorExhausted e) {
                     break;
@@ -905,7 +907,7 @@ static double doIt(VirtualFrame frame, Object iterable,
                     e.expectStopIteration(inliningTarget, nextNodeStopIterationProfile);
                     break;
                 } finally {
-                    LoopNode.reportLoopCount(inliningTarget, nbrIter);
+                    LoopNode.reportLoopCount(inliningTarget, loopCount);
                 }
             }
 
diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/functools/FunctoolsModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/functools/FunctoolsModuleBuiltins.java
index 77671ee999..14147e88fd 100644
--- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/functools/FunctoolsModuleBuiltins.java
+++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/functools/FunctoolsModuleBuiltins.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * The Universal Permissive License (UPL), Version 1.0
@@ -45,7 +45,6 @@
 import static com.oracle.graal.python.nodes.BuiltinNames.T_FUNCTOOLS;
 import static com.oracle.graal.python.nodes.ErrorMessages.REDUCE_EMPTY_SEQ;
 import static com.oracle.graal.python.nodes.ErrorMessages.S_ARG_N_MUST_SUPPORT_ITERATION;
-import static com.oracle.truffle.api.nodes.LoopNode.reportLoopCount;
 
 import java.util.List;
 
@@ -75,6 +74,7 @@
 import com.oracle.truffle.api.dsl.NodeFactory;
 import com.oracle.truffle.api.dsl.Specialization;
 import com.oracle.truffle.api.frame.VirtualFrame;
+import com.oracle.truffle.api.nodes.LoopNode;
 import com.oracle.truffle.api.nodes.Node;
 import com.oracle.truffle.api.profiles.InlinedConditionProfile;
 
@@ -136,7 +136,7 @@ Object doReduce(VirtualFrame frame, Object function, Object sequence, Object ini
 
             Object[] args = new Object[2];
 
-            int count = 0;
+            int loopCount = 0;
             while (true) {
                 Object op2;
                 try {
@@ -152,11 +152,11 @@ Object doReduce(VirtualFrame frame, Object function, Object sequence, Object ini
                     args[1] = op2;
                     result = callNode.execute(frame, function, args);
                 }
-                if (CompilerDirectives.hasNextTier()) {
-                    count++;
+                if (CompilerDirectives.hasNextTier() && loopCount < Integer.MAX_VALUE) {
+                    loopCount++;
                 }
             }
-            reportLoopCount(this, count >= 0 ? count : Integer.MAX_VALUE);
+            LoopNode.reportLoopCount(this, loopCount);
 
             if (result == null) {
                 throw raiseNode.raise(inliningTarget, PythonBuiltinClassType.TypeError, REDUCE_EMPTY_SEQ);
diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/HashingStorageNodes.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/HashingStorageNodes.java
index f1e0013aeb..cfb38025d2 100644
--- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/HashingStorageNodes.java
+++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/HashingStorageNodes.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2022, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * The Universal Permissive License (UPL), Version 1.0
@@ -1216,12 +1216,12 @@ static boolean doIt(Frame frame, Node inliningTarget, HashingStorage aStorage, H
             if (lenANode.execute(inliningTarget, aStorage) != lenBNode.execute(inliningTarget, bStorage)) {
                 return false;
             }
-            int index = 0;
+            int loopCount = 0;
             try {
                 HashingStorageIterator aIter = getAIter.execute(inliningTarget, aStorage);
                 while (loopProfile.profile(inliningTarget, aIterNext.execute(inliningTarget, aStorage, aIter))) {
-                    if (CompilerDirectives.hasNextTier()) {
-                        index++;
+                    if (CompilerDirectives.hasNextTier() && loopCount < Integer.MAX_VALUE) {
+                        loopCount++;
                     }
 
                     Object aKey = aIterKey.execute(inliningTarget, aStorage, aIter);
@@ -1236,8 +1236,8 @@ static boolean doIt(Frame frame, Node inliningTarget, HashingStorage aStorage, H
                     return false;
                 }
             } finally {
-                if (index != 0) {
-                    LoopNode.reportLoopCount(inliningTarget, index);
+                if (loopCount != 0) {
+                    LoopNode.reportLoopCount(inliningTarget, loopCount);
                 }
             }
             return true;
@@ -1287,19 +1287,19 @@ static Object doIt(Frame frame, Node callbackInliningTarget, HashingStorage stor
                         @Cached HashingStorageGetIterator getIter,
                         @Cached HashingStorageIteratorNext iterNext,
                         @Cached InlinedLoopConditionProfile loopProfile) {
-            int index = 0;
+            int loopCount = 0;
             Object accumulator = accumulatorIn;
             try {
                 HashingStorageIterator aIter = getIter.execute(inliningTarget, storage);
                 while (loopProfile.profile(inliningTarget, iterNext.execute(inliningTarget, storage, aIter))) {
-                    if (CompilerDirectives.hasNextTier()) {
-                        index++;
+                    if (CompilerDirectives.hasNextTier() && loopCount < Integer.MAX_VALUE) {
+                        loopCount++;
                     }
                     accumulator = callback.execute(frame, callbackInliningTarget, storage, aIter, accumulator);
                 }
             } finally {
-                if (index != 0) {
-                    LoopNode.reportLoopCount(getIter, index);
+                if (loopCount != 0) {
+                    LoopNode.reportLoopCount(getIter, loopCount);
                 }
             }
             return accumulator;
diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/dict/DictReprBuiltin.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/dict/DictReprBuiltin.java
index 6828f40789..d3c50a45c9 100644
--- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/dict/DictReprBuiltin.java
+++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/dict/DictReprBuiltin.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * The Universal Permissive License (UPL), Version 1.0
@@ -407,7 +407,7 @@ static void repr(VirtualFrame frame, POrderedDict dict, TruffleStringBuilder sb,
                         @Cached FormatKeyValueDictRepr formatKeyValueDictRepr) {
             Object oditems = callMethod.execute(frame, inliningTarget, dict, T_ITEMS);
             ReprState s = new ReprState(dict, sb, false);
-            int count = 0;
+            int loopCount = 0;
             try {
                 Object iter = getIter.execute(frame, inliningTarget, oditems);
                 while (true) {
@@ -417,8 +417,8 @@ static void repr(VirtualFrame frame, POrderedDict dict, TruffleStringBuilder sb,
                     } catch (IteratorExhausted e) {
                         break;
                     }
-                    if (CompilerDirectives.hasNextTier()) {
-                        count++;
+                    if (CompilerDirectives.hasNextTier() && loopCount < Integer.MAX_VALUE) {
+                        loopCount++;
                     }
                     assert PGuards.isPTuple(next);
                     ObjectSequenceStorage item = (ObjectSequenceStorage) ((PTuple) next).getSequenceStorage();
@@ -427,8 +427,8 @@ static void repr(VirtualFrame frame, POrderedDict dict, TruffleStringBuilder sb,
                     formatKeyValueDictRepr.execute(key, value, s);
                 }
             } finally {
-                if (count != 0) {
-                    LoopNode.reportLoopCount(inliningTarget, count);
+                if (loopCount != 0) {
+                    LoopNode.reportLoopCount(inliningTarget, loopCount);
                 }
             }
         }
diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/dict/DictViewBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/dict/DictViewBuiltins.java
index 92dae6972f..a82ffa0a3a 100644
--- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/dict/DictViewBuiltins.java
+++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/dict/DictViewBuiltins.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * The Universal Permissive License (UPL), Version 1.0
@@ -46,9 +46,9 @@
 import java.util.List;
 
 import com.oracle.graal.python.PythonLanguage;
+import com.oracle.graal.python.annotations.Builtin;
 import com.oracle.graal.python.annotations.Slot;
 import com.oracle.graal.python.annotations.Slot.SlotKind;
-import com.oracle.graal.python.annotations.Builtin;
 import com.oracle.graal.python.builtins.CoreFunctions;
 import com.oracle.graal.python.builtins.PythonBuiltinClassType;
 import com.oracle.graal.python.builtins.PythonBuiltins;
@@ -75,7 +75,6 @@
 import com.oracle.graal.python.builtins.objects.type.slots.TpSlotBinaryOp.BinaryOpBuiltinNode;
 import com.oracle.graal.python.builtins.objects.type.slots.TpSlotLen.LenBuiltinNode;
 import com.oracle.graal.python.builtins.objects.type.slots.TpSlotRichCompare;
-import com.oracle.graal.python.lib.RichCmpOp;
 import com.oracle.graal.python.builtins.objects.type.slots.TpSlotSqContains.SqContainsBuiltinNode;
 import com.oracle.graal.python.lib.IteratorExhausted;
 import com.oracle.graal.python.lib.PyIterNextNode;
@@ -84,12 +83,14 @@
 import com.oracle.graal.python.lib.PyObjectRichCompareBool;
 import com.oracle.graal.python.lib.PyObjectSizeNode;
 import com.oracle.graal.python.lib.PySequenceContainsNode;
+import com.oracle.graal.python.lib.RichCmpOp;
 import com.oracle.graal.python.nodes.PNodeWithContext;
 import com.oracle.graal.python.nodes.function.PythonBuiltinBaseNode;
 import com.oracle.graal.python.nodes.function.builtins.PythonBinaryBuiltinNode;
 import com.oracle.graal.python.nodes.function.builtins.PythonUnaryBuiltinNode;
 import com.oracle.graal.python.runtime.object.PFactory;
 import com.oracle.graal.python.runtime.sequence.storage.SequenceStorage;
+import com.oracle.truffle.api.CompilerDirectives;
 import com.oracle.truffle.api.dsl.Bind;
 import com.oracle.truffle.api.dsl.Cached;
 import com.oracle.truffle.api.dsl.Cached.Exclusive;
@@ -308,7 +309,7 @@ public boolean doIt(VirtualFrame frame, Object self, Object other,
                         @Cached PyObjectIsTrueNode isTrueNode) {
             Object iterator = getIterNode.execute(frame, inliningTarget, self);
             boolean ok = checkAll;
-            int i = 0;
+            int loopCount = 0;
             try {
                 while (loopConditionProfile.profile(inliningTarget, checkAll && ok || !checkAll && !ok)) {
                     Object item;
@@ -318,10 +319,12 @@ public boolean doIt(VirtualFrame frame, Object self, Object other,
                         break;
                     }
                     ok = isTrueNode.execute(frame, containsNode.execute(frame, inliningTarget, other, item));
-                    i++;
+                    if (CompilerDirectives.hasNextTier() && loopCount < Integer.MAX_VALUE) {
+                        loopCount++;
+                    }
                 }
             } finally {
-                LoopNode.reportLoopCount(this, i < 0 ? Integer.MAX_VALUE : i);
+                LoopNode.reportLoopCount(this, loopCount);
             }
             return ok;
         }
diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/PythonUtils.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/PythonUtils.java
index 5dafef691c..ecf1d7ab58 100644
--- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/PythonUtils.java
+++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/PythonUtils.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * The Universal Permissive License (UPL), Version 1.0