From 4cb1ebfbf88b3443ac10f5b002260d89042c8052 Mon Sep 17 00:00:00 2001 From: Fabio Niephaus Date: Tue, 13 Jan 2026 20:28:25 +0100 Subject: [PATCH 1/5] Use `XSum` port for `math.fsum()`. --- .../builtins/modules/MathModuleBuiltins.java | 116 +--- .../com/oracle/graal/python/util/XSum.java | 624 ++++++++++++++++++ mx.graalpython/copyrights/overrides | 1 + mx.graalpython/copyrights/xsum.copyright.star | 22 + .../copyrights/xsum.copyright.star.regex | 23 + 5 files changed, 680 insertions(+), 106 deletions(-) create mode 100644 graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/XSum.java create mode 100644 mx.graalpython/copyrights/xsum.copyright.star create mode 100644 mx.graalpython/copyrights/xsum.copyright.star.regex diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java index 532614470f..53baea66cf 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2025, Oracle and/or its affiliates. + * Copyright (c) 2017, 2026, Oracle and/or its affiliates. * Copyright (c) 2014, Regents of the University of California * * All rights reserved. @@ -34,7 +34,6 @@ import java.math.BigDecimal; import java.math.BigInteger; import java.math.MathContext; -import java.util.Arrays; import java.util.List; import com.oracle.graal.python.PythonLanguage; @@ -91,6 +90,7 @@ import com.oracle.graal.python.runtime.exception.PException; import com.oracle.graal.python.runtime.object.PFactory; import com.oracle.graal.python.util.OverflowException; +import com.oracle.graal.python.util.XSum; import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.CompilerDirectives.CompilationFinal; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; @@ -868,7 +868,6 @@ protected ArgumentClinicProvider getArgumentClinic() { @Builtin(name = "fsum", minNumOfPositionalArgs = 1) @GenerateNodeFactory public abstract static class FsumNode extends PythonUnaryBuiltinNode { - @Specialization static double doIt(VirtualFrame frame, Object iterable, @Bind Node inliningTarget, @@ -877,119 +876,24 @@ static double doIt(VirtualFrame frame, Object iterable, @Cached PyFloatAsDoubleNode asDoubleNode, @Cached InlinedLoopConditionProfile loopProfile, @Cached PRaiseNode raiseNode) { - /* - * This implementation is taken from CPython. The performance is not good. Should be - * faster. It can be easily replace with much simpler code based on BigDecimal: - * - * BigDecimal result = BigDecimal.ZERO; - * - * in cycle just: result = result.add(BigDecimal.valueof(x); ... The current - * implementation is little bit faster. The testFSum in test_math.py takes in different - * implementations: CPython ~0.6s CurrentImpl: ~14.3s Using BigDecimal: ~15.1 - */ Object iterator = getIter.execute(frame, inliningTarget, iterable); - double x, y, t, hi, lo = 0, yr, inf_sum = 0, special_sum = 0, sum; - double xsave; - int i, j, n = 0, arayLength = 32; - double[] p = new double[arayLength]; + boolean exhausted = false; + var acc = new XSum.SmallAccumulator(); while (loopProfile.profile(inliningTarget, !exhausted)) { try { Object next = nextNode.execute(frame, inliningTarget, iterator); - x = asDoubleNode.execute(frame, inliningTarget, next); - xsave = x; - for (i = j = 0; j < n; j++) { /* for y in partials */ - y = p[j]; - if (Math.abs(x) < Math.abs(y)) { - t = x; - x = y; - y = t; - } - hi = x + y; - yr = hi - x; - lo = y - yr; - if (lo != 0.0) { - p[i++] = lo; - } - x = hi; - } - - n = i; - if (x != 0.0) { - if (!Double.isFinite(x)) { - /* - * a nonfinite x could arise either as a result of intermediate - * overflow, or as a result of a nan or inf in the summands - */ - if (Double.isFinite(xsave)) { - throw raiseNode.raise(inliningTarget, OverflowError, ErrorMessages.INTERMEDIATE_OVERFLOW_IN, "fsum"); - } - if (Double.isInfinite(xsave)) { - inf_sum += xsave; - } - special_sum += xsave; - /* reset partials */ - n = 0; - } else if (n >= arayLength) { - arayLength += arayLength; - p = Arrays.copyOf(p, arayLength); - } else { - p[n++] = x; - } - } + acc.add(asDoubleNode.execute(frame, inliningTarget, next)); } catch (IteratorExhausted e) { exhausted = true; } } - - if (special_sum != 0.0) { - if (Double.isNaN(inf_sum)) { - throw raiseNode.raise(inliningTarget, ValueError, ErrorMessages.NEG_INF_PLUS_INF_IN); - } else { - sum = special_sum; - return sum; - } - } - - hi = 0.0; - if (n > 0) { - hi = p[--n]; - /* - * sum_exact(ps, hi) from the top, stop when the sum becomes inexact. - */ - while (n > 0) { - x = hi; - y = p[--n]; - assert (Math.abs(y) < Math.abs(x)); - hi = x + y; - yr = hi - x; - lo = y - yr; - if (lo != 0.0) { - break; - } - } - /* - * Make half-even rounding work across multiple partials. Needed so that sum([1e-16, - * 1, 1e16]) will round-up the last digit to two instead of down to zero (the 1e-16 - * makes the 1 slightly closer to two). With a potential 1 ULP rounding error - * fixed-up, math.fsum() can guarantee commutativity. - */ - if (n > 0 && ((lo < 0.0 && p[n - 1] < 0.0) || - (lo > 0.0 && p[n - 1] > 0.0))) { - y = lo * 2.0; - x = hi + y; - yr = x - hi; - if (compareAsBigDecimal(y, yr) == 0) { - hi = x; - } - } + double result = acc.round(); + if (Double.isNaN(result)) { + throw raiseNode.raise(inliningTarget, ValueError, ErrorMessages.NEG_INF_PLUS_INF_IN); + } else { + return result; } - return hi; - } - - @TruffleBoundary - private static int compareAsBigDecimal(double y, double yr) { - return BigDecimal.valueOf(y).compareTo(BigDecimal.valueOf(yr)); } } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/XSum.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/XSum.java new file mode 100644 index 0000000000..5dc7d18648 --- /dev/null +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/XSum.java @@ -0,0 +1,624 @@ +/* Copyright 2024, 2026 Oracle and/or its affiliates. + Copyright 2015, 2018, 2021, 2024 Radford M. Neal + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ +package com.oracle.graal.python.util; + +/** + * Implementation of exact summation of double numbers based on + * xsum. + */ +public class XSum { + + // CONSTANTS DEFINING THE FLOATING POINT FORMAT + + /** + * Bits in fp mantissa, excludes implicit 1. + */ + private static final int XSUM_MANTISSA_BITS = 52; + + /** + * Bits in fp exponent. + */ + private static final int XSUM_EXP_BITS = 11; + + /** + * Mask for mantissa bits. + */ + private static final long XSUM_MANTISSA_MASK = (1L << XSUM_MANTISSA_BITS) - 1; + + /** + * Mask for exponent. + */ + private static final long XSUM_EXP_MASK = (1 << XSUM_EXP_BITS) - 1; + + /** + * Bias added to signed exponent. + */ + private static final int XSUM_EXP_BIAS = (1 << (XSUM_EXP_BITS - 1)) - 1; + + /** + * Position of sign bit. + */ + private static final int XSUM_SIGN_BIT = XSUM_MANTISSA_BITS + XSUM_EXP_BITS; + + /** + * Mask for sign bit. + */ + private static final long XSUM_SIGN_MASK = 1L << XSUM_SIGN_BIT; + + // CONSTANTS DEFINING THE SMALL ACCUMULATOR FORMAT + + /** + * Bits in chunk of the small accumulator. + */ + private static final int XSUM_SCHUNK_BITS = 64; + + /** + * Number of low bits of exponent, in one chunk. + */ + private static final int XSUM_LOW_EXP_BITS = 5; + + /** + * Mask for low-order exponent bits. + */ + private static final int XSUM_LOW_EXP_MASK = (1 << XSUM_LOW_EXP_BITS) - 1; + + /** + * Number of high exponent bits for index. + */ + private static final int XSUM_HIGH_EXP_BITS = XSUM_EXP_BITS - XSUM_LOW_EXP_BITS; + + /** + * Number of chunks in the small accumulator. + */ + private static final int XSUM_SCHUNKS = (1 << XSUM_HIGH_EXP_BITS) + 3; + + /** + * Bits in low part of mantissa. + */ + private static final int XSUM_LOW_MANTISSA_BITS = 1 << XSUM_LOW_EXP_BITS; + + /** + * Mask for low bits. + */ + private static final long XSUM_LOW_MANTISSA_MASK = (1L << XSUM_LOW_MANTISSA_BITS) - 1; + + /** + * Bits sums can carry into. + */ + private static final int XSUM_SMALL_CARRY_BITS = (XSUM_SCHUNK_BITS - 1) - XSUM_MANTISSA_BITS; + + /** + * Number of terms that can added before the propagation is needed. + */ + private static final int XSUM_SMALL_CARRY_TERMS = (1 << XSUM_SMALL_CARRY_BITS) - 1; + + public static class SmallAccumulator { + // Chunks making up small accumulator + private long[] chunk = new long[XSUM_SCHUNKS]; + // If non-zero, +Inf, -Inf, or NaN + private long inf; + // If non-zero, a NaN value with payload + private long nan; + // Number of remaining adds before carry propagation must be done again + private int addsUntilPropagate = XSUM_SMALL_CARRY_TERMS; + + /** + * Add an array of floating-point numbers to a small accumulator. Mixes calls of + * carryPropagate with calls of add1NoCarry. + */ + public void addArray(double[] vec) { + int n = vec.length; // number of elements not added yet + int pos = 0; + while (n > 0) { + if (addsUntilPropagate == 0) { + carryPropagate(); + } + int m = (n <= addsUntilPropagate) ? n : addsUntilPropagate; + int nextPos = pos + m; + for (int i = pos; i < nextPos; i++) { + add1NoCarry(vec[i]); + } + addsUntilPropagate -= m; + pos = nextPos; + n -= m; + } + } + + /** + * Add one double to a small accumulator. This is equivalent to, but somewhat faster than, + * calling addArray() with a vector of one value. + */ + public void add(double value) { + if (addsUntilPropagate == 0) { + carryPropagate(); + } + + add1NoCarry(value); + + addsUntilPropagate--; + } + + /** + * Propagate carries to next chunk in a small accumulator. Needs to be called often enough + * that accumulated carries don't overflow out the top, as indicated by + * {@code addsUntilPropagate}. Returns the index of the uppermost non-zero chunk (0 if + * number is zero). + * + * After carry propagation, the uppermost non-zero chunk will indicate the sign of the + * number, and will not be -1 (all 1s). It will be in the range -2^XSUM_LOW_MANTISSA_BITS to + * 2^XSUM_LOW_MANTISSA_BITS - 1. Lower chunks will be non-negative, and in the range from 0 + * up to 2^XSUM_LOW_MANTISSA_BITS - 1. + */ + private int carryPropagate() { + int u; + int uix; + + done: { + // Set u to the index of the uppermost non-zero (for now) chunk, or + // return with value 0 if there is none. + + for (u = XSUM_SCHUNKS - 1; chunk[u] == 0; u--) { + if (u == 0) { + uix = 0; + break done; + } + } + + // Carry propagate, starting at the low-order chunks. Note that the + // loop limit of u may be increased inside the loop. + + uix = -1; // indicates that a non-zero chunk has not been found yet + + int i = 0; // set to the index of the next non-zero chunck, from bottom + + // Quickly skip over unused low-order chunks. Done here at the start + // on the theory that there are often many unused low-order chunks, + // justifying some overhead to begin, but later stretches of unused + // chunks may not be as large. + + int e = u - 3; // go only to 3 before so won't access beyond chunk array + do { + if ((chunk[i] | chunk[i + 1] | chunk[i + 2] | chunk[i + 3]) != 0) { + break; + } + i += 4; + } while (i <= e); + + do { + long c; // Set to the chunk at index i (next non-zero one) + + // Find the next non-zero chunk, setting i to its index, or break out + // of loop if there is none. Note that the chunk at index u is not + // necessarily non-zero - it was initially, but u or the chunk at u + // may have changed. + + do { + c = chunk[i]; + if (c != 0) { + break; + } + i += 1; + } while (i <= u); + + if (c == 0) { + break; + } + + // Propagate possible carry from this chunk to next chunk up. + + long chigh = c >> XSUM_LOW_MANTISSA_BITS; + if (chigh == 0) { + uix = i; + i += 1; + continue; // no need to change this chunk + } + + if (u == i) { + if (chigh == -1) { + uix = i; + break; // don't propagate -1 into the region of all zeros above + } + u = i + 1; // we will change chunk[u+1], so we'll need to look at it + } + + long clow = c & XSUM_LOW_MANTISSA_MASK; + if (clow != 0) { + uix = i; + } + + // We now change chunk[i] and add to chunk[i+1]. Note that i+1 should be + // in range (no bigger than XSUM_CHUNKS-1) if summing memory, since + // the number of chunks is big enough to hold any sum, and we do not + // store redundant chunks with values 0 or -1 above previously non-zero + // chunks. But other add operations might cause overflow, in which + // case we produce a NaN with all 1s as payload. (We can't reliably produce + // an Inf of the right sign.) + + chunk[i] = clow; + if (i + 1 >= XSUM_SCHUNKS) { + addInfNan((XSUM_EXP_MASK << XSUM_MANTISSA_BITS) | XSUM_MANTISSA_MASK); + u = i; + } else { + chunk[i + 1] += chigh; + } + + i += 1; + + } while (i <= u); + + // Check again for the number being zero, since carry propagation might + // have created zero from something that initially looked non-zero. */ + + if (uix < 0) { + uix = 0; + break done; + } + + // While the uppermost chunk is negative, with value -1, combine it with + // the chunk below (if there is one) to produce the same number but with + // one fewer non-zero chunks. + + while (chunk[uix] == -1 && uix > 0) { + // Left shift of a negative number is undefined according to the standard, + // so do a multiply - it's all presumably constant-folded by the compiler. + chunk[uix - 1] += (-1L) * (1L << XSUM_LOW_MANTISSA_BITS); + chunk[uix] = 0; + uix -= 1; + } + } + + // We can now add one less than the total allowed terms before the next carry propagate. + + addsUntilPropagate = XSUM_SMALL_CARRY_TERMS - 1; + + // Return index of uppermost non-zero chunk. + + return uix; + } + + /** + * Add one number to a small accumulator assuming no carry propagation is required. + */ + private void add1NoCarry(double value) { + // Extract exponent and mantissa. Split exponent into high and low parts. + long ivalue = Double.doubleToRawLongBits(value); + int exp = (int) ((ivalue >> XSUM_MANTISSA_BITS) & XSUM_EXP_MASK); + long mantissa = ivalue & XSUM_MANTISSA_MASK; + int highExp = exp >> XSUM_LOW_EXP_BITS; + int lowExp = exp & XSUM_LOW_EXP_MASK; + + // Categorize number as normal, denormalized, or Inf/NaN according to + // the value of the exponent field. + + if (exp == 0) { // zero or denormalized + // If it's a zero (positive or negative), we do nothing. + if (mantissa == 0) { + return; + } + // Denormalized mantissa has no implicit 1, but exponent is 1 not 0. + exp = lowExp = 1; + } else if (exp == XSUM_EXP_MASK) { // Inf or NaN + // Just update flags in accumulator structure. + addInfNan(ivalue); + return; + } else { // normalized + // OR in implicit 1 bit at top of mantissa + mantissa |= 1L << XSUM_MANTISSA_BITS; + } + + // Use high part of exponent as index of chunk, and low part of + // exponent to give position within chunk. Fetch the two chunks + // that will be modified. + + // Separate mantissa into two parts, after shifting, and add to (or + // subtract from) this chunk and the next higher chunk (which always + // exists since there are three extra ones at the top). + + // Note that lowMantissa will have at most XSUM_LOW_MANTISSA_BITS bits, + // while highMantissa will have at most XSUM_MANTISSA_BITS bits, since + // even though highMantissa includes the extra implicit 1 bit, it will + // also be shifted right by at least one bit. + + long lowMantissa = (mantissa << lowExp) & XSUM_LOW_MANTISSA_MASK; + long highMantissa = mantissa >> (XSUM_LOW_MANTISSA_BITS - lowExp); + + // Add or subtract to or from the two affected chunks. + + if (ivalue < 0) { + chunk[highExp] -= lowMantissa; + chunk[highExp + 1] -= highMantissa; + } else { + chunk[highExp] += lowMantissa; + chunk[highExp + 1] += highMantissa; + } + } + + /** + * Add an inf or NaN to a small accumulator. This only changes the flags, not the chunks in + * the accumulator, which retains the sum of the finite terms (which is perhaps sometimes + * useful to access, though no function to do so is defined at present). A NaN with larger + * payload (seen as a 52-bit unsigned integer) takes precedence, with the sign of the NaN + * always being positive. This ensures that the order of summing NaN values doesn't matter. + */ + private void addInfNan(long ivalue) { + long mantissa = ivalue & XSUM_MANTISSA_MASK; + + if (mantissa == 0) { // Inf + if (inf == 0) { // no previous Inf + inf = ivalue; + } else if (inf != ivalue) { // previous Inf was opposite sign + inf = Double.doubleToRawLongBits(Double.NaN); // result will be a NaN + } + } else { // NaN + // Choose the NaN with the bigger payload and clear its sign. + // Using <= ensures that we will choose the first NaN over the previous zero. + if ((nan & XSUM_MANTISSA_MASK) <= mantissa) { + nan = ivalue & ~XSUM_SIGN_MASK; + } + } + } + + /** + * Return the result of rounding a small accumulator. The rounding mode is to nearest, with + * ties to even. The small accumulator may be modified by this operation (by carry + * propagation being done), but the value it represents should not change. + */ + public double round() { + long intv; + + // See if we have a NaN from one of the numbers being a NaN, in + // which case we return the NaN with largest payload, or an infinite + // result (+Inf, -Inf, or a NaN if both +Inf and -Inf occurred). + // Note that we do NOT return NaN if we have both an infinite number + // and a sum of other numbers that overflows with opposite sign, + // since there is no real ambiguity regarding the sign in such a case. + + if (nan != 0) { + return Double.longBitsToDouble(nan); + } + + if (inf != 0) { + return Double.longBitsToDouble(inf); + } + + // If none of the numbers summed were infinite or NaN, we proceed to + // propagate carries, as a preliminary to finding the magnitude of + // the sum. This also ensures that the sign of the result can be + // determined from the uppermost non-zero chunk. + + // We also find the index, i, of this uppermost non-zero chunk, as + // the value returned by carryPropagate, and set ivalue to + // chunk[i]. Note that ivalue will not be 0 or -1, unless + // i is 0 (the lowest chunk), in which case it will be handled by + // the code for denormalized numbers. + + int i = carryPropagate(); + + long ivalue = chunk[i]; + + // Handle a possible denormalized number, including zero. + + if (i <= 1) { + // Check for zero value, in which case we can return immediately. + if (ivalue == 0) { + return 0.0; + } + + // Check if it is actually a denormalized number. It always is if only + // the lowest chunk is non-zero. If the highest non-zero chunk is the + // next-to-lowest, we check the magnitude of the absolute value. + // Note that the real exponent is 1 (not 0), so we need to shift right + // by 1 here. + + if (i == 0) { + intv = ivalue >= 0 ? ivalue : -ivalue; + intv >>= 1; + if (ivalue < 0) { + intv |= XSUM_SIGN_MASK; + } + return Double.longBitsToDouble(intv); + } else { + // Note: Left shift of -ve number is undefined, so do a multiply instead, + // which is probably optimized to a shift. + intv = ivalue * (1L << (XSUM_LOW_MANTISSA_BITS - 1)) + (chunk[0] >> 1); + if (intv < 0) { + if (intv > -(1L << XSUM_MANTISSA_BITS)) { + intv = (-intv) | XSUM_SIGN_MASK; + return Double.longBitsToDouble(intv); + } + } else { + if (intv < 1L << XSUM_MANTISSA_BITS) { + return Double.longBitsToDouble(intv); + } + } + // otherwise, it's not actually denormalized, so fall through to below + } + } + + // Find the location of the uppermost 1 bit in the absolute value of the + // upper chunk by converting it (as a signed integer) to a floating point + // value, and looking at the exponent. Then set 'more' to the number of + // bits from the lower chunk (and maybe the next lower) that are needed + // to fill out the mantissa of the result (including the top implicit 1 bit), plus two + // extra bits to help decide on rounding. For negative numbers, it may turn out later + // that we need another bit because negating a negative value may carry out of the top + // here, but not once more bits are shifted into the bottom later on. + + intv = Double.doubleToRawLongBits(ivalue); + int e = (int) ((intv >> XSUM_MANTISSA_BITS) & XSUM_EXP_MASK); // e-bias is in 0..32 + int more = 2 + XSUM_MANTISSA_BITS + XSUM_EXP_BIAS - e; + + // Change 'ivalue' to put in 'more' bits from lower chunks into the bottom. + // Also set 'j' to the index of the lowest chunk from which these bits came, + // and 'lower' to the remaining bits of that chunk not now in 'ivalue'. + // Note that 'lower' initially has at least one bit in it, which we can + // later move into 'ivalue' if it turns out that one more bit is needed. + + ivalue *= 1L << more; // multiply, since << of negative undefined + int j = i - 1; + long lower = chunk[j]; // must exist, since denormalized if i==0 + if (more >= XSUM_LOW_MANTISSA_BITS) { + more -= XSUM_LOW_MANTISSA_BITS; + ivalue += lower << more; + j -= 1; + lower = j < 0 ? 0 : chunk[j]; + } + ivalue += lower >> (XSUM_LOW_MANTISSA_BITS - more); + lower &= (1L << (XSUM_LOW_MANTISSA_BITS - more)) - 1; + + // Decide on rounding, with separate code for positive and negative values. + + // At this point, 'ivalue' has the signed mantissa bits, plus two extra + // bits, with 'e' recording the exponent position for these within their + // top chunk. For positive 'ivalue', the bits in 'lower' and chunks + // below 'j' add to the absolute value; for negative 'ivalue' they + // subtract. + + // After setting 'ivalue' to the tentative unsigned mantissa + // (shifted left 2), and 'intv' to have the correct sign, this + // code goes to done_rounding if it finds that just discarding lower + // order bits is correct, and to round_away_from_zero if instead the + // magnitude should be increased by one in the lowest mantissa bit. */ + + done_rounding: { + round_away_from_zero: { + + if (ivalue >= 0) { // number is positive, lower bits are added to magnitude + intv = 0; // positive sign + + if ((ivalue & 2) == 0) { // extra bits are 0x + break done_rounding; + } + + if ((ivalue & 1) != 0) { // extra bits are 11 + break round_away_from_zero; + } + + if ((ivalue & 4) != 0) { // low bit is 1 (odd), extra bits are 10 + break round_away_from_zero; + } + + if (lower == 0) { // see if any lower bits are non-zero + while (j > 0) { + j -= 1; + if (chunk[j] != 0) { + lower = 1; + break; + } + } + } + + if (lower != 0) { // low bit 0 (even), extra bits 10, non-zero lower bits + break round_away_from_zero; + } else { // low bit 0 (even), extra bits 10, all lower bits 0 + break done_rounding; + } + } else { // number is negative, lower bits are subtracted from magnitude + + // Check for a negative 'ivalue' that when negated doesn't contain a full + // mantissa's worth of bits, plus one to help rounding. If so, move one + // more bit into 'ivalue' from 'lower' (and remove it from 'lower'). + // This happens when the negation of the upper part of 'ivalue' has the + // form 10000... but the negation of the full 'ivalue' is not 10000... + + if (((-ivalue) & (1L << (XSUM_MANTISSA_BITS + 2))) == 0) { + long pos = 1L << (XSUM_LOW_MANTISSA_BITS - 1 - more); + ivalue *= 2; // note that left shift undefined if ivalue is negative + if ((lower & pos) != 0) { + ivalue += 1; + lower &= ~pos; + } + e -= 1; + } + + intv = XSUM_SIGN_MASK; // negative sign + ivalue = -ivalue; // ivalue now contains the absolute value + + if ((ivalue & 3) == 3) { // extra bits are 11 + break round_away_from_zero; + } + + if ((ivalue & 3) <= 1) { // extra bits are 00 or 01 + break done_rounding; + } + + if ((ivalue & 4) == 0) { // low bit is 0 (even), extra bits are 10 + break done_rounding; + } + + if (lower == 0) { // see if any lower bits are non-zero + while (j > 0) { + j -= 1; + if (chunk[j] != 0) { + lower = 1; + break; + } + } + } + + if (lower != 0) { // low bit 1 (odd), extra bits 10, non-zero lower bits + break done_rounding; + } else { // low bit 1 (odd), extra bits are 10, lower bits are all 0 + break round_away_from_zero; + } + + } + } // round_away_from_zero: + + // Round away from zero, then check for carry having propagated out the + // top, and shift if so. + + ivalue += 4; // add 1 to low-order mantissa bit + if ((ivalue & (1L << (XSUM_MANTISSA_BITS + 3))) != 0) { + ivalue >>= 1; + e += 1; + } + } // done_rounding: + + // Get rid of the bottom 2 bits that were used to decide on rounding. + + ivalue >>= 2; + + // Adjust to the true exponent, accounting for where this chunk is. + + e += (i << XSUM_LOW_EXP_BITS) - XSUM_EXP_BIAS - XSUM_MANTISSA_BITS; + + // If exponent has overflowed, change to plus or minus Inf and return. + + if (e >= XSUM_EXP_MASK) { + intv |= XSUM_EXP_MASK << XSUM_MANTISSA_BITS; + return Double.longBitsToDouble(intv); + } + + // Put exponent and mantissa into intv, which already has the sign, + // then return fltv. + + intv += ((long) e) << XSUM_MANTISSA_BITS; + intv += ivalue & XSUM_MANTISSA_MASK; /* mask out the implicit 1 bit */ + + return Double.longBitsToDouble(intv); + } + + } + +} diff --git a/mx.graalpython/copyrights/overrides b/mx.graalpython/copyrights/overrides index 23486ad702..d58d981571 100644 --- a/mx.graalpython/copyrights/overrides +++ b/mx.graalpython/copyrights/overrides @@ -696,6 +696,7 @@ graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/SequenceStorage.java,zippy.copyright graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/SequenceStorageFactory.java,zippy.copyright graalpython/com.oracle.graal.python/src/com/oracle/graal/python/runtime/sequence/storage/SequenceStoreException.java,zippy.copyright +graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/XSum.java,xsum.copyright graalpython/python-libposix/src/fork_exec.c,python.copyright graalpython/python-venvlauncher/src/venvlauncher.c,python.copyright graalpython/lib-graalpython/modules/graalpy-config.py,python.copyright diff --git a/mx.graalpython/copyrights/xsum.copyright.star b/mx.graalpython/copyrights/xsum.copyright.star new file mode 100644 index 0000000000..9f0e45e18d --- /dev/null +++ b/mx.graalpython/copyrights/xsum.copyright.star @@ -0,0 +1,22 @@ +/* Copyright 2024, 2026 Oracle and/or its affiliates. All rights reserved. + Copyright 2015, 2018, 2021, 2024 Radford M. Neal + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ diff --git a/mx.graalpython/copyrights/xsum.copyright.star.regex b/mx.graalpython/copyrights/xsum.copyright.star.regex new file mode 100644 index 0000000000..8af4cd2a3b --- /dev/null +++ b/mx.graalpython/copyrights/xsum.copyright.star.regex @@ -0,0 +1,23 @@ +/\* Copyright (20[0-9][0-9], )*(20[0-9][0-9]) Oracle and/or its affiliates. + Copyright 2015, 2018, 2021, 2024 Radford M. Neal + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files \(the + "Software"\), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +\*/ +.* From af9fa663c6c1c93adec7906f2b479fac41446e32 Mon Sep 17 00:00:00 2001 From: Fabio Niephaus Date: Thu, 15 Jan 2026 10:48:42 +0100 Subject: [PATCH 2/5] Adapt `XSum` to CPython semantics. --- .../builtins/modules/MathModuleBuiltins.java | 20 +++++++++-- .../com/oracle/graal/python/util/XSum.java | 36 +++++++++++++++---- 2 files changed, 48 insertions(+), 8 deletions(-) diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java index 53baea66cf..ba3a5e7e55 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java @@ -888,9 +888,25 @@ static double doIt(VirtualFrame frame, Object iterable, exhausted = true; } } + + if (acc.isNaNResult()) { + return Double.NaN; + } + + if (acc.isInfiniteResult()) { + double result = acc.getInfiniteResult(); + if (Double.isNaN(result)) { + throw raiseNode.raise(inliningTarget, ValueError, ErrorMessages.NEG_INF_PLUS_INF_IN); + } else { + assert Double.isInfinite(result); + return result; + } + } + double result = acc.round(); - if (Double.isNaN(result)) { - throw raiseNode.raise(inliningTarget, ValueError, ErrorMessages.NEG_INF_PLUS_INF_IN); + // +Inf or -Inf if exponent has overflowed + if (Double.isInfinite(result)) { + throw raiseNode.raise(inliningTarget, OverflowError, ErrorMessages.INTERMEDIATE_OVERFLOW_IN, "fsum"); } else { return result; } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/XSum.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/XSum.java index 5dc7d18648..535ec7c1db 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/XSum.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/XSum.java @@ -378,6 +378,28 @@ private void addInfNan(long ivalue) { } } + /** + * Return true if one of the numbers were a NaN. + */ + public boolean isNaNResult() { + return nan != 0; + } + + /** + * Return true if +Inf, -Inf, or both occurred. + */ + public boolean isInfiniteResult() { + return inf != 0; + } + + /** + * Return +Inf, -Inf, or a NaN if both +Inf and -Inf occurred. + */ + public double getInfiniteResult() { + assert isInfiniteResult(); + return Double.longBitsToDouble(inf); + } + /** * Return the result of rounding a small accumulator. The rounding mode is to nearest, with * ties to even. The small accumulator may be modified by this operation (by carry @@ -393,13 +415,15 @@ public double round() { // and a sum of other numbers that overflows with opposite sign, // since there is no real ambiguity regarding the sign in such a case. - if (nan != 0) { - return Double.longBitsToDouble(nan); - } + assert !isNaNResult() : "isNaNResult() must be handled before calling round()"; + // if (nan != 0) { + // return Double.longBitsToDouble(nan); + // } - if (inf != 0) { - return Double.longBitsToDouble(inf); - } + assert !isInfiniteResult() : "isInfiniteResult() must be handled before calling round()"; + // if (inf != 0) { + // return Double.longBitsToDouble(inf); + // } // If none of the numbers summed were infinite or NaN, we proceed to // propagate carries, as a preliminary to finding the magnitude of From e38a8540d278371358b8d93bbf8f873739fd9ef1 Mon Sep 17 00:00:00 2001 From: Fabio Niephaus Date: Wed, 14 Jan 2026 09:33:14 +0100 Subject: [PATCH 3/5] Replace profiling with loop count reporting. --- .../python/builtins/modules/MathModuleBuiltins.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java index ba3a5e7e55..b14c03e7b2 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java @@ -108,6 +108,7 @@ import com.oracle.truffle.api.dsl.Specialization; import com.oracle.truffle.api.dsl.TypeSystemReference; import com.oracle.truffle.api.frame.VirtualFrame; +import com.oracle.truffle.api.nodes.LoopNode; import com.oracle.truffle.api.nodes.Node; import com.oracle.truffle.api.profiles.InlinedConditionProfile; import com.oracle.truffle.api.profiles.InlinedLoopConditionProfile; @@ -874,18 +875,20 @@ static double doIt(VirtualFrame frame, Object iterable, @Cached PyObjectGetIter getIter, @Cached PyIterNextNode nextNode, @Cached PyFloatAsDoubleNode asDoubleNode, - @Cached InlinedLoopConditionProfile loopProfile, @Cached PRaiseNode raiseNode) { Object iterator = getIter.execute(frame, inliningTarget, iterable); - boolean exhausted = false; var acc = new XSum.SmallAccumulator(); - while (loopProfile.profile(inliningTarget, !exhausted)) { + int nbrIter = 0; + while (true) { try { Object next = nextNode.execute(frame, inliningTarget, iterator); + nbrIter++; acc.add(asDoubleNode.execute(frame, inliningTarget, next)); } catch (IteratorExhausted e) { - exhausted = true; + break; + } finally { + LoopNode.reportLoopCount(inliningTarget, nbrIter); } } From 1c65a1134035cbef096ec4e724df89e9cb971c25 Mon Sep 17 00:00:00 2001 From: Fabio Niephaus Date: Wed, 14 Jan 2026 11:15:44 +0100 Subject: [PATCH 4/5] Inline `PyIterNextNode` for interp performance. --- .../builtins/modules/MathModuleBuiltins.java | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java index b14c03e7b2..d2bbb89d03 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java @@ -49,6 +49,9 @@ import com.oracle.graal.python.builtins.objects.ints.IntBuiltins; import com.oracle.graal.python.builtins.objects.ints.PInt; import com.oracle.graal.python.builtins.objects.tuple.PTuple; +import com.oracle.graal.python.builtins.objects.type.TpSlots; +import com.oracle.graal.python.builtins.objects.type.slots.TpSlot; +import com.oracle.graal.python.builtins.objects.type.slots.TpSlotIterNext; import com.oracle.graal.python.lib.IteratorExhausted; import com.oracle.graal.python.lib.PyBoolCheckNode; import com.oracle.graal.python.lib.PyFloatAsDoubleNode; @@ -869,24 +872,38 @@ protected ArgumentClinicProvider getArgumentClinic() { @Builtin(name = "fsum", minNumOfPositionalArgs = 1) @GenerateNodeFactory public abstract static class FsumNode extends PythonUnaryBuiltinNode { + + /** + * Note: this specialization uses an inlined version of {@link PyIterNextNode} with the + * tp_iternext slot moved out of the loop. + */ @Specialization static double doIt(VirtualFrame frame, Object iterable, @Bind Node inliningTarget, @Cached PyObjectGetIter getIter, - @Cached PyIterNextNode nextNode, + @Cached GetClassNode nextNodeGetClassNode, + @Cached TpSlots.GetCachedTpSlotsNode nextNodeGetSlots, + @Cached TpSlotIterNext.CallSlotTpIterNextNode nextNodeCallNext, + @Cached IsBuiltinObjectProfile nextNodeStopIterationProfile, @Cached PyFloatAsDoubleNode asDoubleNode, @Cached PRaiseNode raiseNode) { Object iterator = getIter.execute(frame, inliningTarget, iterable); + TpSlot tpIternext = nextNodeGetSlots.execute(inliningTarget, nextNodeGetClassNode.execute(inliningTarget, iterator)).tp_iternext(); + assert tpIternext != null; + var acc = new XSum.SmallAccumulator(); int nbrIter = 0; while (true) { try { - Object next = nextNode.execute(frame, inliningTarget, iterator); + Object next = nextNodeCallNext.execute(frame, inliningTarget, tpIternext, iterator); nbrIter++; acc.add(asDoubleNode.execute(frame, inliningTarget, next)); } catch (IteratorExhausted e) { break; + } catch (PException e) { + e.expectStopIteration(inliningTarget, nextNodeStopIterationProfile); + break; } finally { LoopNode.reportLoopCount(inliningTarget, nbrIter); } From f2ee1781df7b638f339cb3ae24b6c7ad3596c0f1 Mon Sep 17 00:00:00 2001 From: Fabio Niephaus Date: Wed, 14 Jan 2026 11:48:47 +0100 Subject: [PATCH 5/5] Ensure loopCounts never overflow. --- .../builtins/modules/BuiltinFunctions.java | 24 ++++++++++++------- .../builtins/modules/MathModuleBuiltins.java | 8 ++++--- .../functools/FunctoolsModuleBuiltins.java | 12 +++++----- .../objects/common/HashingStorageNodes.java | 22 ++++++++--------- .../objects/dict/DictReprBuiltin.java | 12 +++++----- .../objects/dict/DictViewBuiltins.java | 15 +++++++----- .../oracle/graal/python/util/PythonUtils.java | 2 +- 7 files changed, 53 insertions(+), 42 deletions(-) diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/BuiltinFunctions.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/BuiltinFunctions.java index 1900f38741..4afa9c565f 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/BuiltinFunctions.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/BuiltinFunctions.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2025, Oracle and/or its affiliates. + * Copyright (c) 2017, 2026, Oracle and/or its affiliates. * Copyright (c) 2013, Regents of the University of California * * All rights reserved. @@ -498,19 +498,21 @@ static boolean doObject(VirtualFrame frame, Object object, @Cached PyIterNextNode nextNode, @Cached PyObjectIsTrueNode isTrueNode) { Object iterator = getIter.execute(frame, inliningTarget, object); - int nbrIter = 0; + int loopCount = 0; while (true) { try { Object next = nextNode.execute(frame, inliningTarget, iterator); - nbrIter++; + if (CompilerDirectives.hasNextTier() && loopCount < Integer.MAX_VALUE) { + loopCount++; + } if (!isTrueNode.execute(frame, next)) { return false; } } catch (IteratorExhausted e) { break; } finally { - LoopNode.reportLoopCount(inliningTarget, nbrIter); + LoopNode.reportLoopCount(inliningTarget, loopCount); } } @@ -549,19 +551,21 @@ static boolean doObject(VirtualFrame frame, Object object, @Cached PyIterNextNode nextNode, @Cached PyObjectIsTrueNode isTrueNode) { Object iterator = getIter.execute(frame, inliningTarget, object); - int nbrIter = 0; + int loopCount = 0; while (true) { try { Object next = nextNode.execute(frame, inliningTarget, iterator); - nbrIter++; + if (CompilerDirectives.hasNextTier() && loopCount < Integer.MAX_VALUE) { + loopCount++; + } if (isTrueNode.execute(frame, next)) { return true; } } catch (IteratorExhausted e) { break; } finally { - LoopNode.reportLoopCount(inliningTarget, nbrIter); + LoopNode.reportLoopCount(inliningTarget, loopCount); } } @@ -1563,11 +1567,13 @@ static Object minmaxSequenceWithKey(VirtualFrame frame, Node inliningTarget, Obj currentKey = nextKey; currentValue = nextValue; } - loopCount++; + if (CompilerDirectives.hasNextTier() && loopCount < Integer.MAX_VALUE) { + loopCount++; + } } catch (IteratorExhausted e) { break; } finally { - LoopNode.reportLoopCount(inliningTarget, loopCount < 0 ? Integer.MAX_VALUE : loopCount); + LoopNode.reportLoopCount(inliningTarget, loopCount); } } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java index d2bbb89d03..f65249f7a2 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/MathModuleBuiltins.java @@ -893,11 +893,13 @@ static double doIt(VirtualFrame frame, Object iterable, assert tpIternext != null; var acc = new XSum.SmallAccumulator(); - int nbrIter = 0; + int loopCount = 0; while (true) { try { Object next = nextNodeCallNext.execute(frame, inliningTarget, tpIternext, iterator); - nbrIter++; + if (CompilerDirectives.hasNextTier() && loopCount < Integer.MAX_VALUE) { + loopCount++; + } acc.add(asDoubleNode.execute(frame, inliningTarget, next)); } catch (IteratorExhausted e) { break; @@ -905,7 +907,7 @@ static double doIt(VirtualFrame frame, Object iterable, e.expectStopIteration(inliningTarget, nextNodeStopIterationProfile); break; } finally { - LoopNode.reportLoopCount(inliningTarget, nbrIter); + LoopNode.reportLoopCount(inliningTarget, loopCount); } } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/functools/FunctoolsModuleBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/functools/FunctoolsModuleBuiltins.java index 77671ee999..14147e88fd 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/functools/FunctoolsModuleBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/functools/FunctoolsModuleBuiltins.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -45,7 +45,6 @@ import static com.oracle.graal.python.nodes.BuiltinNames.T_FUNCTOOLS; import static com.oracle.graal.python.nodes.ErrorMessages.REDUCE_EMPTY_SEQ; import static com.oracle.graal.python.nodes.ErrorMessages.S_ARG_N_MUST_SUPPORT_ITERATION; -import static com.oracle.truffle.api.nodes.LoopNode.reportLoopCount; import java.util.List; @@ -75,6 +74,7 @@ import com.oracle.truffle.api.dsl.NodeFactory; import com.oracle.truffle.api.dsl.Specialization; import com.oracle.truffle.api.frame.VirtualFrame; +import com.oracle.truffle.api.nodes.LoopNode; import com.oracle.truffle.api.nodes.Node; import com.oracle.truffle.api.profiles.InlinedConditionProfile; @@ -136,7 +136,7 @@ Object doReduce(VirtualFrame frame, Object function, Object sequence, Object ini Object[] args = new Object[2]; - int count = 0; + int loopCount = 0; while (true) { Object op2; try { @@ -152,11 +152,11 @@ Object doReduce(VirtualFrame frame, Object function, Object sequence, Object ini args[1] = op2; result = callNode.execute(frame, function, args); } - if (CompilerDirectives.hasNextTier()) { - count++; + if (CompilerDirectives.hasNextTier() && loopCount < Integer.MAX_VALUE) { + loopCount++; } } - reportLoopCount(this, count >= 0 ? count : Integer.MAX_VALUE); + LoopNode.reportLoopCount(this, loopCount); if (result == null) { throw raiseNode.raise(inliningTarget, PythonBuiltinClassType.TypeError, REDUCE_EMPTY_SEQ); diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/HashingStorageNodes.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/HashingStorageNodes.java index f1e0013aeb..cfb38025d2 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/HashingStorageNodes.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/common/HashingStorageNodes.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -1216,12 +1216,12 @@ static boolean doIt(Frame frame, Node inliningTarget, HashingStorage aStorage, H if (lenANode.execute(inliningTarget, aStorage) != lenBNode.execute(inliningTarget, bStorage)) { return false; } - int index = 0; + int loopCount = 0; try { HashingStorageIterator aIter = getAIter.execute(inliningTarget, aStorage); while (loopProfile.profile(inliningTarget, aIterNext.execute(inliningTarget, aStorage, aIter))) { - if (CompilerDirectives.hasNextTier()) { - index++; + if (CompilerDirectives.hasNextTier() && loopCount < Integer.MAX_VALUE) { + loopCount++; } Object aKey = aIterKey.execute(inliningTarget, aStorage, aIter); @@ -1236,8 +1236,8 @@ static boolean doIt(Frame frame, Node inliningTarget, HashingStorage aStorage, H return false; } } finally { - if (index != 0) { - LoopNode.reportLoopCount(inliningTarget, index); + if (loopCount != 0) { + LoopNode.reportLoopCount(inliningTarget, loopCount); } } return true; @@ -1287,19 +1287,19 @@ static Object doIt(Frame frame, Node callbackInliningTarget, HashingStorage stor @Cached HashingStorageGetIterator getIter, @Cached HashingStorageIteratorNext iterNext, @Cached InlinedLoopConditionProfile loopProfile) { - int index = 0; + int loopCount = 0; Object accumulator = accumulatorIn; try { HashingStorageIterator aIter = getIter.execute(inliningTarget, storage); while (loopProfile.profile(inliningTarget, iterNext.execute(inliningTarget, storage, aIter))) { - if (CompilerDirectives.hasNextTier()) { - index++; + if (CompilerDirectives.hasNextTier() && loopCount < Integer.MAX_VALUE) { + loopCount++; } accumulator = callback.execute(frame, callbackInliningTarget, storage, aIter, accumulator); } } finally { - if (index != 0) { - LoopNode.reportLoopCount(getIter, index); + if (loopCount != 0) { + LoopNode.reportLoopCount(getIter, loopCount); } } return accumulator; diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/dict/DictReprBuiltin.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/dict/DictReprBuiltin.java index 6828f40789..d3c50a45c9 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/dict/DictReprBuiltin.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/dict/DictReprBuiltin.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -407,7 +407,7 @@ static void repr(VirtualFrame frame, POrderedDict dict, TruffleStringBuilder sb, @Cached FormatKeyValueDictRepr formatKeyValueDictRepr) { Object oditems = callMethod.execute(frame, inliningTarget, dict, T_ITEMS); ReprState s = new ReprState(dict, sb, false); - int count = 0; + int loopCount = 0; try { Object iter = getIter.execute(frame, inliningTarget, oditems); while (true) { @@ -417,8 +417,8 @@ static void repr(VirtualFrame frame, POrderedDict dict, TruffleStringBuilder sb, } catch (IteratorExhausted e) { break; } - if (CompilerDirectives.hasNextTier()) { - count++; + if (CompilerDirectives.hasNextTier() && loopCount < Integer.MAX_VALUE) { + loopCount++; } assert PGuards.isPTuple(next); ObjectSequenceStorage item = (ObjectSequenceStorage) ((PTuple) next).getSequenceStorage(); @@ -427,8 +427,8 @@ static void repr(VirtualFrame frame, POrderedDict dict, TruffleStringBuilder sb, formatKeyValueDictRepr.execute(key, value, s); } } finally { - if (count != 0) { - LoopNode.reportLoopCount(inliningTarget, count); + if (loopCount != 0) { + LoopNode.reportLoopCount(inliningTarget, loopCount); } } } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/dict/DictViewBuiltins.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/dict/DictViewBuiltins.java index 92dae6972f..a82ffa0a3a 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/dict/DictViewBuiltins.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/dict/DictViewBuiltins.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -46,9 +46,9 @@ import java.util.List; import com.oracle.graal.python.PythonLanguage; +import com.oracle.graal.python.annotations.Builtin; import com.oracle.graal.python.annotations.Slot; import com.oracle.graal.python.annotations.Slot.SlotKind; -import com.oracle.graal.python.annotations.Builtin; import com.oracle.graal.python.builtins.CoreFunctions; import com.oracle.graal.python.builtins.PythonBuiltinClassType; import com.oracle.graal.python.builtins.PythonBuiltins; @@ -75,7 +75,6 @@ import com.oracle.graal.python.builtins.objects.type.slots.TpSlotBinaryOp.BinaryOpBuiltinNode; import com.oracle.graal.python.builtins.objects.type.slots.TpSlotLen.LenBuiltinNode; import com.oracle.graal.python.builtins.objects.type.slots.TpSlotRichCompare; -import com.oracle.graal.python.lib.RichCmpOp; import com.oracle.graal.python.builtins.objects.type.slots.TpSlotSqContains.SqContainsBuiltinNode; import com.oracle.graal.python.lib.IteratorExhausted; import com.oracle.graal.python.lib.PyIterNextNode; @@ -84,12 +83,14 @@ import com.oracle.graal.python.lib.PyObjectRichCompareBool; import com.oracle.graal.python.lib.PyObjectSizeNode; import com.oracle.graal.python.lib.PySequenceContainsNode; +import com.oracle.graal.python.lib.RichCmpOp; import com.oracle.graal.python.nodes.PNodeWithContext; import com.oracle.graal.python.nodes.function.PythonBuiltinBaseNode; import com.oracle.graal.python.nodes.function.builtins.PythonBinaryBuiltinNode; import com.oracle.graal.python.nodes.function.builtins.PythonUnaryBuiltinNode; import com.oracle.graal.python.runtime.object.PFactory; import com.oracle.graal.python.runtime.sequence.storage.SequenceStorage; +import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.dsl.Bind; import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.Cached.Exclusive; @@ -308,7 +309,7 @@ public boolean doIt(VirtualFrame frame, Object self, Object other, @Cached PyObjectIsTrueNode isTrueNode) { Object iterator = getIterNode.execute(frame, inliningTarget, self); boolean ok = checkAll; - int i = 0; + int loopCount = 0; try { while (loopConditionProfile.profile(inliningTarget, checkAll && ok || !checkAll && !ok)) { Object item; @@ -318,10 +319,12 @@ public boolean doIt(VirtualFrame frame, Object self, Object other, break; } ok = isTrueNode.execute(frame, containsNode.execute(frame, inliningTarget, other, item)); - i++; + if (CompilerDirectives.hasNextTier() && loopCount < Integer.MAX_VALUE) { + loopCount++; + } } } finally { - LoopNode.reportLoopCount(this, i < 0 ? Integer.MAX_VALUE : i); + LoopNode.reportLoopCount(this, loopCount); } return ok; } diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/PythonUtils.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/PythonUtils.java index 5dafef691c..ecf1d7ab58 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/PythonUtils.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/util/PythonUtils.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0