From 63cae89d24572981de0a6c1eb22a01d4b592e2f3 Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Mon, 18 May 2026 12:35:18 +0000 Subject: [PATCH 1/2] feat: u32 -> integer64, rename scan_na to check, catch unsigned wraps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two related changes: 1. Materialize `ui32` buffers as `bit64::integer64` instead of base `integer`. R's signed int32 has no headroom for `ui32` values `>= 2^31` — they previously wrapped to negative integers silently (except for the single `INT_MIN` bit pattern, which became `NA` and was caught by `scan_na`). `integer64` has 53 bits of headroom over `ui32`'s range, so every value round-trips losslessly. 2. Rename `scan_na` to `check`, broaden it to also detect unsigned wrap. `check = TRUE` now errors when device -> host materialization yields: * An `NA` for `i32` / `i64` (the `INT_MIN` / `INT64_MIN` NA collision), * A negative `integer64` for `ui32` / `ui64` (which can only happen for `ui64 >= 2^63`; `ui32` is now lossless and never produces one). The C++ `raw_to_array_impl` template's "integer64 storage" path now matches `uint32_t` alongside `int64_t` / `uint64_t`, and dispatch in `impl_raw_to_array` routes `ui32` through REALSXP. The R-side `value.PJRTArrayPromise` extends its integer64 classing list to include `ui32`. Co-Authored-By: Claude Opus 4.7 (1M context) --- NEWS.md | 17 ++++---- R/async.R | 2 +- R/buffer.R | 84 ++++++++++++++++++++++-------------- man/as_array.PJRTBuffer.Rd | 28 ++++++++---- man/pjrt_buffer.Rd | 6 +-- src/pjrt.cpp | 15 ++++--- tests/testthat/test-buffer.R | 75 +++++++++++++++++--------------- 7 files changed, 136 insertions(+), 91 deletions(-) diff --git a/NEWS.md b/NEWS.md index db2757c5..08e13983 100644 --- a/NEWS.md +++ b/NEWS.md @@ -8,14 +8,15 @@ registration mechanisms with coverage of both CUDA and CPU-specific aspects. * Added support for the `bit64` package to better support long integers. -* `pjrt_buffer()`, `pjrt_scalar()`, and `as_array()` gain a `scan_na` - argument (default `FALSE`). When `TRUE`, host → device transfers error if - the input contains any `NA` values; device → host transfers error if a - materialized `i32`, `ui32`, `i64`, or `ui64` buffer surfaces a value that - R cannot distinguish from `NA` (`INT_MIN` for the 32-bit dtypes, - `INT64_MIN` for the 64-bit dtypes — the latter via `bit64::integer64`). - Opt-in safety check for callers that want to fail loudly on - silent NA collisions. +* `pjrt_buffer()`, `pjrt_scalar()`, and `as_array()` gain a `check` + argument (default `FALSE`). When `TRUE`, the call errors instead of + silently losing information: on input if `data` contains `NA`s, on + output if the materialized R vector contains a value that's + indistinguishable from `NA` or that has wrapped through the integer + container. +* `as_array()` on a `ui32` buffer now returns a `bit64::integer64` + instead of a base `integer`, so values `>= 2^31` round-trip losslessly + rather than wrapping to negative. # pjrt 0.3.0 diff --git a/R/async.R b/R/async.R index 01495519..9cbb14bd 100644 --- a/R/async.R +++ b/R/async.R @@ -77,7 +77,7 @@ value.PJRTArrayPromise <- function(x, ...) { if (is.null(x$materialized)) { impl_host_data_await(x$data) out <- impl_raw_to_array(x$data, x$dtype, x$shape) - if (x$dtype %in% c("i64", "ui64")) { + if (x$dtype %in% c("i64", "ui64", "ui32")) { class(out) <- "integer64" } x$materialized <- out diff --git a/R/buffer.R b/R/buffer.R index bc8d8de6..f3de87cc 100644 --- a/R/buffer.R +++ b/R/buffer.R @@ -61,7 +61,7 @@ is_buffer <- function(x) { #' case the first device for that platform is used. #' The default is to use the CPU platform, but this can be configured via the `PJRT_PLATFORM` #' environment variable. -#' @param scan_na (`logical(1)`)\cr +#' @param check (`logical(1)`)\cr #' If `TRUE`, scan `data` for `NA` values before transferring to the device and #' raise an error if any are present. R's `NA` markers have no representation #' at the XLA level (e.g. `NA_integer_` is just the bit pattern `-2147483648`, @@ -95,19 +95,19 @@ pjrt_buffer <- function( dtype = NULL, device = NULL, shape = NULL, - scan_na = FALSE, + check = FALSE, ... ) { UseMethod("pjrt_buffer") } -check_scan_na <- function(data, scan_na) { - assert_flag(scan_na) - if (scan_na && anyNA(data)) { +check_input_na <- function(data, check) { + assert_flag(check) + if (check && anyNA(data)) { n_na <- sum(is.na(data)) cli_abort(c( "Input {.arg data} contains {n_na} {.val NA} value{?s}, which {?has/have} no representation at the XLA level.", - i = "Replace or drop missing values before transferring, or set {.code scan_na = FALSE} to skip this check." + i = "Replace or drop missing values before transferring, or set {.code check = FALSE} to skip this check." )) } invisible(NULL) @@ -144,7 +144,7 @@ pjrt_buffer.PJRTBuffer <- buffer_identity #' scalar <- pjrt_scalar(42, dtype = "f32") #' scalar #' @export -pjrt_scalar <- function(data, dtype = NULL, device = NULL, scan_na = FALSE, ...) { +pjrt_scalar <- function(data, dtype = NULL, device = NULL, check = FALSE, ...) { UseMethod("pjrt_scalar") } @@ -244,10 +244,10 @@ pjrt_buffer.logical <- function( dtype = NULL, device = NULL, shape = NULL, - scan_na = FALSE, + check = FALSE, ... ) { - check_scan_na(data, scan_na) + check_input_na(data, check) args <- convert_buffer_args(data, dtype, device, shape, "pred", ...) buffer <- do.call(impl_client_buffer_from_logical, args) buffer @@ -259,10 +259,10 @@ pjrt_buffer.integer <- function( dtype = NULL, device = NULL, shape = NULL, - scan_na = FALSE, + check = FALSE, ... ) { - check_scan_na(data, scan_na) + check_input_na(data, check) args <- convert_buffer_args(data, dtype, device, shape, "i32", ...) buffer <- do.call(impl_client_buffer_from_integer, args) buffer @@ -274,10 +274,10 @@ pjrt_buffer.numeric <- function( dtype = NULL, device = NULL, shape = NULL, - scan_na = FALSE, + check = FALSE, ... ) { - check_scan_na(data, scan_na) + check_input_na(data, check) args <- convert_buffer_args(data, dtype, device, shape, "f32", ...) buffer <- do.call(impl_client_buffer_from_double, args) buffer @@ -344,13 +344,13 @@ pjrt_scalar.logical <- function( data, dtype = NULL, device = NULL, - scan_na = FALSE, + check = FALSE, ... ) { if (length(data) != 1) { cli_abort("data must have length 1") } - check_scan_na(data, scan_na) + check_input_na(data, check) args <- convert_buffer_args(data, dtype, device, integer(), "pred", ...) buffer <- do.call(impl_client_buffer_from_logical, args) buffer @@ -361,13 +361,13 @@ pjrt_scalar.integer <- function( data, dtype = NULL, device = NULL, - scan_na = FALSE, + check = FALSE, ... ) { if (length(data) != 1) { cli_abort("data must have length 1") } - check_scan_na(data, scan_na) + check_input_na(data, check) args <- convert_buffer_args(data, dtype, device, integer(), "i32", ...) buffer <- do.call(impl_client_buffer_from_integer, args) buffer @@ -378,13 +378,13 @@ pjrt_scalar.numeric <- function( data, dtype = NULL, device = NULL, - scan_na = FALSE, + check = FALSE, ... ) { if (length(data) != 1) { cli_abort("data must have length 1") } - check_scan_na(data, scan_na) + check_input_na(data, check) args <- convert_buffer_args(data, dtype, device, integer(), "f32", ...) buffer <- do.call(impl_client_buffer_from_double, args) buffer @@ -440,26 +440,46 @@ elt_type <- function(x) { #' #' @param x ([`PJRTBuffer`][pjrt_buffer])\cr #' Buffer to convert. -#' @param scan_na (`logical(1)`)\cr -#' If `TRUE` and the buffer dtype is one of the four integer dtypes that -#' round-trip through a signed R container (`i32` / `ui32` via `integer`, -#' `i64` / `ui64` via `bit64::integer64`), scan the materialized vector -#' for the reserved NA bit pattern (`INT_MIN` or `INT64_MIN`) and raise an -#' error if any are present. No-op for float, boolean, and small-integer -#' dtypes (which have no NA-collision risk). +#' @param check (`logical(1)`)\cr +#' If `TRUE`, sanity-check the materialized R vector against losing +#' information across the device-to-host boundary, and abort if any +#' problematic value is detected: +#' * **`i32` / `i64`**: any `NA` in the result. R's `NA_integer_` shares +#' the bit pattern `INT_MIN`; `bit64`'s `NA_integer64_` shares +#' `INT64_MIN`. A legitimate device value at those bit patterns is +#' indistinguishable from `NA` once materialized in R. +#' * **`ui32` / `ui64`**: any negative value in the result. Both unsigned +#' dtypes are stored as `bit64::integer64` (signed 64-bit), which has +#' full headroom for `ui32` but wraps `ui64` values `>= 2^63` to +#' negative — any negative value therefore signals either an NA +#' collision (`INT64_MIN`) or a u64 wrap. +#' +#' No-op for float, boolean, and small-integer (`i8` / `i16` / `ui8` / +#' `ui16`) dtypes — none of those have a representable-value collision +#' with R's NA scheme. #' @param ... Additional arguments (unused). #' @return An R `array` (or `vector` for shape `integer()`). #' @export -as_array.PJRTBuffer <- function(x, scan_na = FALSE, ...) { +as_array.PJRTBuffer <- function(x, check = FALSE, ...) { result <- value(as_array_async(x)) - assert_flag(scan_na) - if (scan_na) { + assert_flag(check) + if (check) { dt <- as.character(elt_type(x)) - if (dt %in% c("i32", "ui32", "i64", "ui64") && anyNA(result)) { + if (dt %in% c("i32", "i64") && anyNA(result)) { cli_abort(c( "Materialized {.cls {dt}} buffer contains a value that R cannot distinguish from {.val NA}.", - i = "{.val i32}/{.val ui32} reserve the bit pattern {.val -2147483648} ({.code INT_MIN}); {.val i64}/{.val ui64} reserve {.val -9223372036854775808} ({.code INT64_MIN}).", - i = "This collision is irrecoverable: the device value and {.val NA} are indistinguishable in R. Set {.code scan_na = FALSE} to skip this check." + i = "{.val i32} reserves the bit pattern {.val -2147483648} ({.code INT_MIN}); {.val i64} reserves {.val -9223372036854775808} ({.code INT64_MIN}).", + i = "Set {.code check = FALSE} to skip this check." + )) + } else if (dt %in% c("ui32", "ui64") && (anyNA(result) || any(result < 0, na.rm = TRUE))) { + # A negative or NA value in an unsigned dtype's integer64 storage means + # the source value wrapped (ui64 >= 2^63) — exactly 2^63 becomes + # NA_integer64_ (INT64_MIN); 2^63 + k becomes a non-NA negative int64. + # Either way, the unsigned magnitude was lost. + cli_abort(c( + "Materialized {.cls {dt}} buffer contains a value that fell outside the positive range of R's {.cls integer64}.", + i = "{.val ui64} values {.code >= 2^63} wrap (exactly {.code 2^63} becomes {.code NA_integer64_}; values above it become negative {.cls integer64}).", + i = "Set {.code check = FALSE} to skip this check." )) } } diff --git a/man/as_array.PJRTBuffer.Rd b/man/as_array.PJRTBuffer.Rd index 9f6aacb5..9ff252dd 100644 --- a/man/as_array.PJRTBuffer.Rd +++ b/man/as_array.PJRTBuffer.Rd @@ -4,19 +4,31 @@ \alias{as_array.PJRTBuffer} \title{Convert a PJRTBuffer to an R Array} \usage{ -\method{as_array}{PJRTBuffer}(x, scan_na = FALSE, ...) +\method{as_array}{PJRTBuffer}(x, check = FALSE, ...) } \arguments{ \item{x}{(\code{\link[=pjrt_buffer]{PJRTBuffer}})\cr Buffer to convert.} -\item{scan_na}{(\code{logical(1)})\cr -If \code{TRUE} and the buffer dtype is one of the four integer dtypes that -round-trip through a signed R container (\code{i32} / \code{ui32} via \code{integer}, -\code{i64} / \code{ui64} via \code{bit64::integer64}), scan the materialized vector -for the reserved NA bit pattern (\code{INT_MIN} or \code{INT64_MIN}) and raise an -error if any are present. No-op for float, boolean, and small-integer -dtypes (which have no NA-collision risk).} +\item{check}{(\code{logical(1)})\cr +If \code{TRUE}, sanity-check the materialized R vector against losing +information across the device-to-host boundary, and abort if any +problematic value is detected: +\itemize{ +\item \strong{\code{i32} / \code{i64}}: any \code{NA} in the result. R's \code{NA_integer_} shares +the bit pattern \code{INT_MIN}; \code{bit64}'s \code{NA_integer64_} shares +\code{INT64_MIN}. A legitimate device value at those bit patterns is +indistinguishable from \code{NA} once materialized in R. +\item \strong{\code{ui32} / \code{ui64}}: any negative value in the result. Both unsigned +dtypes are stored as \code{bit64::integer64} (signed 64-bit), which has +full headroom for \code{ui32} but wraps \code{ui64} values \verb{>= 2^63} to +negative — any negative value therefore signals either an NA +collision (\code{INT64_MIN}) or a u64 wrap. +} + +No-op for float, boolean, and small-integer (\code{i8} / \code{i16} / \code{ui8} / +\code{ui16}) dtypes — none of those have a representable-value collision +with R's NA scheme.} \item{...}{Additional arguments (unused).} } diff --git a/man/pjrt_buffer.Rd b/man/pjrt_buffer.Rd index ba116563..bec58c0e 100644 --- a/man/pjrt_buffer.Rd +++ b/man/pjrt_buffer.Rd @@ -11,11 +11,11 @@ pjrt_buffer( dtype = NULL, device = NULL, shape = NULL, - scan_na = FALSE, + check = FALSE, ... ) -pjrt_scalar(data, dtype = NULL, device = NULL, scan_na = FALSE, ...) +pjrt_scalar(data, dtype = NULL, device = NULL, check = FALSE, ...) pjrt_empty(dtype, shape, device = NULL) } @@ -48,7 +48,7 @@ The dimensions of the buffer. The default (\code{NULL}) is to infer them from the data if possible. The default (\code{NULL}) depends on the method.} -\item{scan_na}{(\code{logical(1)})\cr +\item{check}{(\code{logical(1)})\cr If \code{TRUE}, scan \code{data} for \code{NA} values before transferring to the device and raise an error if any are present. R's \code{NA} markers have no representation at the XLA level (e.g. \code{NA_integer_} is just the bit pattern \code{-2147483648}, diff --git a/src/pjrt.cpp b/src/pjrt.cpp index 302b4249..7f050a65 100644 --- a/src/pjrt.cpp +++ b/src/pjrt.cpp @@ -315,10 +315,12 @@ SEXP raw_to_array_impl(const uint8_t *raw_data, void *out_data; if (r_type == REALSXP) { - if constexpr (std::is_same_v || std::is_same_v) { - // 64-bit integer -> "pseudo-double": REALSXP slots carry the int64 bit - // pattern (the storage layout used by bit64::integer64). The R caller - // attaches the integer64 class. + if constexpr (std::is_same_v || std::is_same_v || + std::is_same_v) { + // Integer dtype that can't fit in R's signed int32 -> "pseudo-double": + // REALSXP slots carry the int64 bit pattern (the storage layout used by + // bit64::integer64). The R caller attaches the integer64 class. Widening + // uint32_t to int64_t is value-preserving (u32 max < 2^53). static_assert(sizeof(double) == sizeof(int64_t), "bit64::integer64 layout requires sizeof(double) == " "sizeof(int64_t)"); @@ -689,7 +691,10 @@ SEXP impl_raw_to_array(Rcpp::XPtr host_data, } else if (dtype == "ui16") { return raw_to_array_impl(raw_data, dimensions, INTSXP); } else if (dtype == "ui32") { - return raw_to_array_impl(raw_data, dimensions, INTSXP); + // u32 -> integer64 storage: signed int32 has no headroom for ui32 values + // >= 2^31; widen to integer64 (53 bits of headroom) so the R caller gets + // the full unsigned magnitude. R-side classes the result as integer64. + return raw_to_array_impl(raw_data, dimensions, REALSXP); } else if (dtype == "ui64") { return raw_to_array_impl(raw_data, dimensions, REALSXP); } else if (dtype == "pred") { diff --git a/tests/testthat/test-buffer.R b/tests/testthat/test-buffer.R index 6e5414b0..4c61a6a9 100644 --- a/tests/testthat/test-buffer.R +++ b/tests/testthat/test-buffer.R @@ -200,7 +200,7 @@ test_that("pjrt_buffer handles edge cases", { expect_error(pjrt_buffer(numeric(0), shape = c(1, 4)), "but specified shape is") }) -test_that("pjrt_buffer scan_na = FALSE silently transfers NA", { +test_that("pjrt_buffer check = FALSE silently transfers NA", { # default behaviour: NAs flow through and become dtype-specific bit patterns. expect_no_error(pjrt_buffer(c(1, NA, 3))) expect_no_error(pjrt_buffer(c(1L, NA_integer_, 3L))) @@ -208,78 +208,85 @@ test_that("pjrt_buffer scan_na = FALSE silently transfers NA", { expect_no_error(pjrt_scalar(NA_integer_)) }) -test_that("pjrt_buffer scan_na = TRUE errors on NA input", { +test_that("pjrt_buffer check = TRUE errors on NA input", { expect_error( - pjrt_buffer(c(1, NA, 3), scan_na = TRUE), + pjrt_buffer(c(1, NA, 3), check = TRUE), "no representation at the XLA level" ) expect_error( - pjrt_buffer(c(1L, NA_integer_, 3L), scan_na = TRUE), + pjrt_buffer(c(1L, NA_integer_, 3L), check = TRUE), "no representation at the XLA level" ) expect_error( - pjrt_buffer(c(TRUE, NA, FALSE), scan_na = TRUE), + pjrt_buffer(c(TRUE, NA, FALSE), check = TRUE), "no representation at the XLA level" ) expect_error( - pjrt_scalar(NA_integer_, scan_na = TRUE), + pjrt_scalar(NA_integer_, check = TRUE), "no representation at the XLA level" ) expect_error( - pjrt_scalar(NA_real_, scan_na = TRUE), + pjrt_scalar(NA_real_, check = TRUE), "no representation at the XLA level" ) expect_error( - pjrt_scalar(NA, scan_na = TRUE), + pjrt_scalar(NA, check = TRUE), "no representation at the XLA level" ) # Clean inputs pass through unaffected. - expect_no_error(pjrt_buffer(c(1, 2, 3), scan_na = TRUE)) - expect_no_error(pjrt_buffer(c(1L, 2L, 3L), scan_na = TRUE)) - expect_no_error(pjrt_buffer(c(TRUE, FALSE), scan_na = TRUE)) + expect_no_error(pjrt_buffer(c(1, 2, 3), check = TRUE)) + expect_no_error(pjrt_buffer(c(1L, 2L, 3L), check = TRUE)) + expect_no_error(pjrt_buffer(c(TRUE, FALSE), check = TRUE)) }) -test_that("as_array scan_na = TRUE errors on NA collision for i32 / ui32 / i64 / ui64", { +test_that("as_array check = TRUE catches i32 / i64 NA collisions", { + client <- pjrt_client("cpu") + # i32: NA_integer_ bit pattern is INT_MIN (-2147483648). buf_i32 <- pjrt_buffer(NA_integer_, dtype = "i32") expect_true(anyNA(as_array(buf_i32))) - expect_error(as_array(buf_i32, scan_na = TRUE), "indistinguishable") - - # ui32: planting 2^31 (= 0x80000000) wraps to INT_MIN as signed int32. - bytes_u32 <- as.raw(c(0x00, 0x00, 0x00, 0x80)) - client <- pjrt_client("cpu") - buf_u32 <- impl_client_buffer_from_raw(client, devices(client)[[1L]], bytes_u32, 1L, "ui32") - expect_true(anyNA(as_array(buf_u32))) - expect_error(as_array(buf_u32, scan_na = TRUE), "indistinguishable") + expect_error(as_array(buf_i32, check = TRUE), "distinguish from") # i64: planting INT64_MIN. bytes_i64 <- as.raw(c(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80)) buf_i64 <- impl_client_buffer_from_raw(client, devices(client)[[1L]], bytes_i64, 1L, "i64") expect_true(anyNA(as_array(buf_i64))) - expect_error(as_array(buf_i64, scan_na = TRUE), "indistinguishable") + expect_error(as_array(buf_i64, check = TRUE), "distinguish from") - # ui64: planting 2^63 wraps to INT64_MIN. - buf_u64 <- impl_client_buffer_from_raw(client, devices(client)[[1L]], bytes_i64, 1L, "ui64") - expect_true(anyNA(as_array(buf_u64))) - expect_error(as_array(buf_u64, scan_na = TRUE), "indistinguishable") + # Clean buffers — no error. + expect_no_error(as_array(pjrt_buffer(1:3, dtype = "i32"), check = TRUE)) + expect_no_error(as_array(pjrt_buffer(1L, dtype = "i64"), check = TRUE)) +}) - # Clean buffers — no collision, no error. - expect_no_error(as_array(pjrt_buffer(1:3, dtype = "i32"), scan_na = TRUE)) - expect_no_error(as_array(pjrt_buffer(1L, dtype = "i64"), scan_na = TRUE)) +test_that("as_array check = TRUE catches ui64 wrap (>= 2^63)", { + client <- pjrt_client("cpu") + # 2^63 wraps to INT64_MIN (negative integer64). + bytes <- as.raw(c(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80)) + buf <- impl_client_buffer_from_raw(client, devices(client)[[1L]], bytes, 1L, "ui64") + expect_error(as_array(buf, check = TRUE), "outside the positive range") +}) + +test_that("as_array preserves the full ui32 range losslessly (no wrap)", { + client <- pjrt_client("cpu") + # Bit pattern 0x80000000 used to wrap to INT_MIN / NA_integer_ via signed + # int32. ui32 now materializes as integer64 (53 bits of headroom over u32). + bytes <- as.raw(c(0x00, 0x00, 0x00, 0x80)) + buf <- impl_client_buffer_from_raw(client, devices(client)[[1L]], bytes, 1L, "ui32") + result <- as_array(buf, check = TRUE) + expect_true(bit64::is.integer64(result)) + expect_equal(as.character(result), "2147483648") }) -test_that("as_array scan_na is a no-op for float / bool / small-integer dtypes", { - # Float NaN is not flagged — it isnt the R-NA bit pattern in the same way. +test_that("as_array check is a no-op for float / bool / small-integer dtypes", { buf_f32 <- pjrt_buffer(c(1, NaN, 3), dtype = "f32") - expect_no_error(as_array(buf_f32, scan_na = TRUE)) + expect_no_error(as_array(buf_f32, check = TRUE)) buf_pred <- pjrt_buffer(c(TRUE, FALSE), dtype = "pred") - expect_no_error(as_array(buf_pred, scan_na = TRUE)) + expect_no_error(as_array(buf_pred, check = TRUE)) - # i8/i16/ui8/ui16 fit fully inside R's int32 with headroom — no collision. buf_i8 <- pjrt_buffer(c(-128L, 0L, 127L), dtype = "i8") - expect_no_error(as_array(buf_i8, scan_na = TRUE)) + expect_no_error(as_array(buf_i8, check = TRUE)) }) test_that("pjrt_buffer preserves 3d dimensions", { From 62926ec530d6d6b097671f9d9f42b6e72748792c Mon Sep 17 00:00:00 2001 From: Sebastian Fischer Date: Mon, 18 May 2026 12:40:57 +0000 Subject: [PATCH 2/2] refactor: drop ui32 from the check (now lossless via integer64) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ui32 is materialized as integer64 with 53 bits of headroom over the unsigned-32 range, so it can never produce a wrapped or NA value — the check for ui32 was dead. Limit the negativity check to ui64 only and update the docstring. Co-Authored-By: Claude Opus 4.7 (1M context) --- R/buffer.R | 30 +++++++++++++++--------------- man/as_array.PJRTBuffer.Rd | 15 +++++++-------- tests/testthat/test-buffer.R | 2 +- 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/R/buffer.R b/R/buffer.R index f3de87cc..d22d6419 100644 --- a/R/buffer.R +++ b/R/buffer.R @@ -448,15 +448,14 @@ elt_type <- function(x) { #' the bit pattern `INT_MIN`; `bit64`'s `NA_integer64_` shares #' `INT64_MIN`. A legitimate device value at those bit patterns is #' indistinguishable from `NA` once materialized in R. -#' * **`ui32` / `ui64`**: any negative value in the result. Both unsigned -#' dtypes are stored as `bit64::integer64` (signed 64-bit), which has -#' full headroom for `ui32` but wraps `ui64` values `>= 2^63` to -#' negative — any negative value therefore signals either an NA -#' collision (`INT64_MIN`) or a u64 wrap. +#' * **`ui64`**: any negative value in the result. `ui64` is stored as +#' `bit64::integer64` (signed 64-bit), which wraps values `>= 2^63` +#' to negative — exactly `2^63` becomes `NA_integer64_`, anything +#' above becomes a non-NA negative integer64. #' -#' No-op for float, boolean, and small-integer (`i8` / `i16` / `ui8` / -#' `ui16`) dtypes — none of those have a representable-value collision -#' with R's NA scheme. +#' No-op for float, boolean, and small/unsigned-32 integer dtypes — +#' `ui32` is now stored as `integer64` and has full headroom, so it +#' cannot produce a wrapped or NA value. #' @param ... Additional arguments (unused). #' @return An R `array` (or `vector` for shape `integer()`). #' @export @@ -471,14 +470,15 @@ as_array.PJRTBuffer <- function(x, check = FALSE, ...) { i = "{.val i32} reserves the bit pattern {.val -2147483648} ({.code INT_MIN}); {.val i64} reserves {.val -9223372036854775808} ({.code INT64_MIN}).", i = "Set {.code check = FALSE} to skip this check." )) - } else if (dt %in% c("ui32", "ui64") && (anyNA(result) || any(result < 0, na.rm = TRUE))) { - # A negative or NA value in an unsigned dtype's integer64 storage means - # the source value wrapped (ui64 >= 2^63) — exactly 2^63 becomes - # NA_integer64_ (INT64_MIN); 2^63 + k becomes a non-NA negative int64. - # Either way, the unsigned magnitude was lost. + } else if (identical(dt, "ui64") && (anyNA(result) || any(result < 0, na.rm = TRUE))) { + # ui64 values >= 2^63 wrap when stored as signed int64 — exactly 2^63 + # becomes NA_integer64_ (INT64_MIN); 2^63 + k becomes a non-NA negative + # int64. Either way, the unsigned magnitude was lost. + # (ui32 is now materialized as integer64 and has full headroom, so it + # cannot produce a negative value; no check needed.) cli_abort(c( - "Materialized {.cls {dt}} buffer contains a value that fell outside the positive range of R's {.cls integer64}.", - i = "{.val ui64} values {.code >= 2^63} wrap (exactly {.code 2^63} becomes {.code NA_integer64_}; values above it become negative {.cls integer64}).", + "Materialized {.cls ui64} buffer contains a value `>= 2^63` that wrapped through R's signed {.cls integer64}.", + i = "Exactly {.code 2^63} becomes {.code NA_integer64_}; larger values become negative {.cls integer64}.", i = "Set {.code check = FALSE} to skip this check." )) } diff --git a/man/as_array.PJRTBuffer.Rd b/man/as_array.PJRTBuffer.Rd index 9ff252dd..04d943d5 100644 --- a/man/as_array.PJRTBuffer.Rd +++ b/man/as_array.PJRTBuffer.Rd @@ -19,16 +19,15 @@ problematic value is detected: the bit pattern \code{INT_MIN}; \code{bit64}'s \code{NA_integer64_} shares \code{INT64_MIN}. A legitimate device value at those bit patterns is indistinguishable from \code{NA} once materialized in R. -\item \strong{\code{ui32} / \code{ui64}}: any negative value in the result. Both unsigned -dtypes are stored as \code{bit64::integer64} (signed 64-bit), which has -full headroom for \code{ui32} but wraps \code{ui64} values \verb{>= 2^63} to -negative — any negative value therefore signals either an NA -collision (\code{INT64_MIN}) or a u64 wrap. +\item \strong{\code{ui64}}: any negative value in the result. \code{ui64} is stored as +\code{bit64::integer64} (signed 64-bit), which wraps values \verb{>= 2^63} +to negative — exactly \code{2^63} becomes \code{NA_integer64_}, anything +above becomes a non-NA negative integer64. } -No-op for float, boolean, and small-integer (\code{i8} / \code{i16} / \code{ui8} / -\code{ui16}) dtypes — none of those have a representable-value collision -with R's NA scheme.} +No-op for float, boolean, and small/unsigned-32 integer dtypes — +\code{ui32} is now stored as \code{integer64} and has full headroom, so it +cannot produce a wrapped or NA value.} \item{...}{Additional arguments (unused).} } diff --git a/tests/testthat/test-buffer.R b/tests/testthat/test-buffer.R index 4c61a6a9..a83f8742 100644 --- a/tests/testthat/test-buffer.R +++ b/tests/testthat/test-buffer.R @@ -264,7 +264,7 @@ test_that("as_array check = TRUE catches ui64 wrap (>= 2^63)", { # 2^63 wraps to INT64_MIN (negative integer64). bytes <- as.raw(c(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80)) buf <- impl_client_buffer_from_raw(client, devices(client)[[1L]], bytes, 1L, "ui64") - expect_error(as_array(buf, check = TRUE), "outside the positive range") + expect_error(as_array(buf, check = TRUE), "wrapped through") }) test_that("as_array preserves the full ui32 range losslessly (no wrap)", {