pbower · pbower · Mar 15, 2026 · Mar 15, 2026
diff --git a/Cargo.toml b/Cargo.toml
@@ -76,7 +76,7 @@ c_ffi_tests = ['cc']
 # Highly recommend keeping these off unless required
 # E.g., constrained or embedded environments, as they add combinatorial
 # weight to the binary and enum match arms
-extended_categorical = ["extended_numeric_types"]
+extended_categorical = []
 
 # Adds UInt8, UInt16, Int8, Int16 types.
 #

diff --git a/pyo3/Cargo.toml b/pyo3/Cargo.toml
@@ -22,7 +22,7 @@ name = "minarrow_pyo3"
 crate-type = ["cdylib", "rlib"]
 
 [dependencies]
-minarrow = { version = "0.8.1", features = ["large_string"] }
+minarrow = { version = "0.9.1", features = ["large_string"] }
 pyo3 = { version = "0.23" }
 thiserror = "2"
 

diff --git a/src/enums/array.rs b/src/enums/array.rs
diff --git a/src/enums/operators.rs b/src/enums/operators.rs
@@ -27,6 +27,13 @@ pub enum ArithmeticOperator {
     /// For integers, uses repeated multiplication. For floating-point, uses `pow()` function.
     /// Negative exponents on integers may yield zero due to truncation.
     Power,
+    /// Floor division (`lhs // rhs`)
+    ///
+    /// Rounds the quotient towards negative infinity. For unsigned integers this is
+    /// identical to truncation division. For signed integers, when the remainder is
+    /// non-zero and the operands have different signs, the result is one less than
+    /// truncation division. For floating-point, equivalent to `(lhs / rhs).floor()`.
+    FloorDiv,
 }
 
 /// Comparison operators for binary predicates.

diff --git a/src/enums/value/conversions.rs b/src/enums/value/conversions.rs
@@ -874,6 +874,20 @@ impl TryFrom<Value> for NumericArrayV {
                     }),
                 }
             }
+            Value::Array(inner) => {
+                let arr = Arc::try_unwrap(inner).unwrap_or_else(|arc| (*arc).clone());
+                match arr {
+                    Array::NumericArray(num_arr) => {
+                        let len = num_arr.len();
+                        Ok(NumericArrayV::new(num_arr, 0, len))
+                    }
+                    _ => Err(MinarrowError::TypeError {
+                        from: "Value",
+                        to: "NumericArrayV",
+                        message: Some("Array is not a NumericArray".to_owned()),
+                    }),
+                }
+            }
             _ => Err(MinarrowError::TypeError {
                 from: "Value",
                 to: "NumericArrayV",

diff --git a/src/ffi/arrow_c_ffi.rs b/src/ffi/arrow_c_ffi.rs
@@ -553,9 +553,9 @@ fn export_categorical_array_to_c(
 
     let mut field = schema.fields[0].clone();
     field.dtype = match index_bits {
-        #[cfg(all(feature = "extended_categorical", feature = "extended_numeric_types"))]
+        #[cfg(feature = "extended_categorical")]
         8 => ArrowType::Dictionary(crate::ffi::arrow_dtype::CategoricalIndexType::UInt8),
-        #[cfg(all(feature = "extended_categorical", feature = "extended_numeric_types"))]
+        #[cfg(feature = "extended_categorical")]
         16 => ArrowType::Dictionary(crate::ffi::arrow_dtype::CategoricalIndexType::UInt16),
         32 => ArrowType::Dictionary(crate::ffi::arrow_dtype::CategoricalIndexType::UInt32),
         #[cfg(feature = "extended_categorical")]
@@ -1483,14 +1483,12 @@ unsafe fn import_categorical(
 
     // Build codes & wrap
     match index_type {
-        #[cfg(feature = "extended_numeric_types")]
         #[cfg(feature = "extended_categorical")]
         CategoricalIndexType::UInt8 => {
             let codes_buf = unsafe { build_codes::<u8>(codes_ptr, len, ownership) };
             let arr = CategoricalArray::<u8>::new(codes_buf, dict_strings, null_mask);
             Arc::new(Array::TextArray(TextArray::Categorical8(Arc::new(arr))))
         }
-        #[cfg(feature = "extended_numeric_types")]
         #[cfg(feature = "extended_categorical")]
         CategoricalIndexType::UInt16 => {
             let codes_buf = unsafe { build_codes::<u16>(codes_ptr, len, ownership) };
@@ -1502,7 +1500,6 @@ unsafe fn import_categorical(
             let arr = CategoricalArray::<u32>::new(codes_buf, dict_strings, null_mask);
             Arc::new(Array::TextArray(TextArray::Categorical32(Arc::new(arr))))
         }
-        #[cfg(feature = "extended_numeric_types")]
         #[cfg(feature = "extended_categorical")]
         CategoricalIndexType::UInt64 => {
             let codes_buf = unsafe { build_codes::<u64>(codes_ptr, len, ownership) };

diff --git a/src/ffi/arrow_dtype.rs b/src/ffi/arrow_dtype.rs
@@ -117,8 +117,7 @@ pub enum ArrowType {
 /// - Smaller widths reduce memory footprint for low-cardinality data.
 /// - Larger widths enable more distinct categories without overflow.
 /// - Variant availability depends on feature flags:
-///   - `UInt8` and `UInt16` require both `extended_categorical` and `extended_numeric_types`.
-///   - `UInt64` requires `extended_categorical`.
+///   - `UInt8`, `UInt16`, and `UInt64` require `extended_categorical`.
 ///   - `UInt32` is always available.
 ///
 /// ## Interoperability
@@ -127,12 +126,12 @@ pub enum ArrowType {
 
 #[derive(PartialEq, Clone, Debug)]
 pub enum CategoricalIndexType {
-    #[cfg(all(feature = "extended_categorical", feature = "extended_numeric_types"))]
+    #[cfg(feature = "extended_categorical")]
     UInt8,
-    #[cfg(all(feature = "extended_categorical", feature = "extended_numeric_types"))]
+    #[cfg(feature = "extended_categorical")]
     UInt16,
     UInt32,
-    #[cfg(all(feature = "extended_categorical"))]
+    #[cfg(feature = "extended_categorical")]
     UInt64,
 }
 

diff --git a/src/kernels/arithmetic/simd.rs b/src/kernels/arithmetic/simd.rs
@@ -63,7 +63,7 @@ pub fn int_dense_body_simd<T, const LANES: usize>(
             ArithmeticOperator::Multiply => a * b,
             ArithmeticOperator::Divide => a / b, // Panics if divisor is zero
             ArithmeticOperator::Remainder => a % b, // Panics if divisor is zero
-            ArithmeticOperator::Power => {
+            ArithmeticOperator::Power | ArithmeticOperator::FloorDiv => {
                 vectorisable = 0;
                 break;
             }
@@ -88,6 +88,15 @@ pub fn int_dense_body_simd<T, const LANES: usize>(
                 }
                 acc
             }
+            ArithmeticOperator::FloorDiv => {
+                if rhs[idx] == T::zero() {
+                    panic!("Floor division by zero")
+                } else {
+                    let d = lhs[idx] / rhs[idx];
+                    let r = lhs[idx] % rhs[idx];
+                    if r != T::zero() && (lhs[idx] ^ rhs[idx]) < T::zero() { d - T::one() } else { d }
+                }
+            }
         };
     }
 }
@@ -159,6 +168,22 @@ pub fn int_masked_body_simd<T, const LANES: usize>(
                     let r = div_zero.select(Simd::splat(T::zero()), r);
                     (r, valid)
                 }
+                ArithmeticOperator::FloorDiv => {
+                    let div_zero = b.simd_eq(Simd::splat(T::zero()));
+                    let valid = !div_zero;
+                    // Per-lane floor division with sign correction
+                    let mut tmp = [T::zero(); LANES];
+                    for l in 0..LANES {
+                        if b[l] == T::zero() {
+                            tmp[l] = T::zero();
+                        } else {
+                            let d = a[l] / b[l];
+                            let r = a[l] % b[l];
+                            tmp[l] = if r != T::zero() && (a[l] ^ b[l]) < T::zero() { d - T::one() } else { d };
+                        }
+                    }
+                    (Simd::<T, LANES>::from_array(tmp), valid)
+                }
             };
             r.copy_to_slice(&mut out[i..i + LANES]);
             // Write the out_mask based on the op
@@ -217,6 +242,21 @@ pub fn int_masked_body_simd<T, const LANES: usize>(
                         }
                     }
                 }
+                ArithmeticOperator::FloorDiv => {
+                    if rhs[idx] == T::zero() {
+                        out[idx] = T::zero();
+                        unsafe {
+                            out_mask.set_unchecked(idx, false);
+                        }
+                    } else {
+                        let d = lhs[idx] / rhs[idx];
+                        let r = lhs[idx] % rhs[idx];
+                        out[idx] = if r != T::zero() && (lhs[idx] ^ rhs[idx]) < T::zero() { d - T::one() } else { d };
+                        unsafe {
+                            out_mask.set_unchecked(idx, true);
+                        }
+                    }
+                }
             }
         }
         return;
@@ -254,6 +294,18 @@ pub fn int_masked_body_simd<T, const LANES: usize>(
                 }
                 Simd::<T, LANES>::from_array(tmp)
             }
+            ArithmeticOperator::FloorDiv => {
+                // Per-lane floor division with sign correction
+                let mut tmp = [T::zero(); LANES];
+                for l in 0..LANES {
+                    if b[l] != T::zero() {
+                        let d = a[l] / b[l];
+                        let r = a[l] % b[l];
+                        tmp[l] = if r != T::zero() && (a[l] ^ b[l]) < T::zero() { d - T::one() } else { d };
+                    }
+                }
+                Simd::<T, LANES>::from_array(tmp)
+            }
         };
 
         // apply source validity mask, write results
@@ -262,8 +314,8 @@ pub fn int_masked_body_simd<T, const LANES: usize>(
 
         // write out-mask bits: combine source mask with div-by-zero validity
         let final_mask = match op {
-            ArithmeticOperator::Divide | ArithmeticOperator::Remainder => {
-                // For div/rem: valid iff source is valid AND not dividing by zero
+            ArithmeticOperator::Divide | ArithmeticOperator::Remainder | ArithmeticOperator::FloorDiv => {
+                // Valid iff source is valid and not dividing by zero
                 m_src & !div_zero
             }
             _ => m_src,
@@ -301,6 +353,15 @@ pub fn int_masked_body_simd<T, const LANES: usize>(
                     }
                 }
                 ArithmeticOperator::Power => (lhs[j].pow(rhs[j].to_u32().unwrap_or(0)), true),
+                ArithmeticOperator::FloorDiv => {
+                    if rhs[j] == T::zero() {
+                        (T::zero(), false)
+                    } else {
+                        let d = lhs[j] / rhs[j];
+                        let r = lhs[j] % rhs[j];
+                        if r != T::zero() && (lhs[j] ^ rhs[j]) < T::zero() { (d - T::one(), true) } else { (d, true) }
+                    }
+                }
             };
             out[j] = result;
             unsafe { out_mask.set_unchecked(j, final_valid) };
@@ -345,6 +406,7 @@ pub fn float_masked_body_f32_simd<const LANES: usize>(
             ArithmeticOperator::Divide => a / b,
             ArithmeticOperator::Remainder => a % b,
             ArithmeticOperator::Power => (b * a.ln()).exp(),
+            ArithmeticOperator::FloorDiv => (a / b).floor(),
         };
 
         let selected = m.select(res, Simd::<f32, LANES>::splat(0.0));
@@ -371,6 +433,7 @@ pub fn float_masked_body_f32_simd<const LANES: usize>(
                 ArithmeticOperator::Divide => lhs[j] / rhs[j],
                 ArithmeticOperator::Remainder => lhs[j] % rhs[j],
                 ArithmeticOperator::Power => (rhs[j] * lhs[j].ln()).exp(),
+                ArithmeticOperator::FloorDiv => (lhs[j] / rhs[j]).floor(),
             };
             unsafe { out_mask.set_unchecked(j, true) };
         } else {
@@ -417,6 +480,7 @@ pub fn float_masked_body_f64_simd<const LANES: usize>(
             ArithmeticOperator::Divide => a / b,
             ArithmeticOperator::Remainder => a % b,
             ArithmeticOperator::Power => (b * a.ln()).exp(),
+            ArithmeticOperator::FloorDiv => (a / b).floor(),
         };
 
         let selected = m.select(res, Simd::<f64, LANES>::splat(0.0));
@@ -443,6 +507,7 @@ pub fn float_masked_body_f64_simd<const LANES: usize>(
                 ArithmeticOperator::Divide => lhs[j] / rhs[j],
                 ArithmeticOperator::Remainder => lhs[j] % rhs[j],
                 ArithmeticOperator::Power => (rhs[j] * lhs[j].ln()).exp(),
+                ArithmeticOperator::FloorDiv => (lhs[j] / rhs[j]).floor(),
             };
             unsafe { out_mask.set_unchecked(j, true) };
         } else {
@@ -474,6 +539,7 @@ pub fn float_dense_body_f32_simd<const LANES: usize>(
             ArithmeticOperator::Divide => a / b,
             ArithmeticOperator::Remainder => a % b,
             ArithmeticOperator::Power => (b * a.ln()).exp(),
+            ArithmeticOperator::FloorDiv => (a / b).floor(),
         };
         res.copy_to_slice(&mut out[i..i + LANES]);
         i += LANES;
@@ -488,6 +554,7 @@ pub fn float_dense_body_f32_simd<const LANES: usize>(
             ArithmeticOperator::Divide => lhs[j] / rhs[j],
             ArithmeticOperator::Remainder => lhs[j] % rhs[j],
             ArithmeticOperator::Power => (rhs[j] * lhs[j].ln()).exp(),
+            ArithmeticOperator::FloorDiv => (lhs[j] / rhs[j]).floor(),
         };
     }
 }
@@ -514,6 +581,7 @@ pub fn float_dense_body_f64_simd<const LANES: usize>(
             ArithmeticOperator::Divide => a / b,
             ArithmeticOperator::Remainder => a % b,
             ArithmeticOperator::Power => (b * a.ln()).exp(),
+            ArithmeticOperator::FloorDiv => (a / b).floor(),
         };
         res.copy_to_slice(&mut out[i..i + LANES]);
         i += LANES;
@@ -528,6 +596,7 @@ pub fn float_dense_body_f64_simd<const LANES: usize>(
             ArithmeticOperator::Divide => lhs[j] / rhs[j],
             ArithmeticOperator::Remainder => lhs[j] % rhs[j],
             ArithmeticOperator::Power => (rhs[j] * lhs[j].ln()).exp(),
+            ArithmeticOperator::FloorDiv => (lhs[j] / rhs[j]).floor(),
         };
     }
 }

diff --git a/src/kernels/arithmetic/std.rs b/src/kernels/arithmetic/std.rs
@@ -54,6 +54,16 @@ pub fn int_dense_body_std<T: PrimInt + ToPrimitive + WrappingAdd + WrappingSub +
                 }
             }
             ArithmeticOperator::Power => lhs[i].pow(rhs[i].to_u32().unwrap_or(0)),
+            ArithmeticOperator::FloorDiv => {
+                if rhs[i] == T::zero() {
+                    panic!("Floor division by zero")
+                } else {
+                    let d = lhs[i] / rhs[i];
+                    let r = lhs[i] % rhs[i];
+                    // If remainder is non-zero and signs differ, floor toward -inf
+                    if r != T::zero() && (lhs[i] ^ rhs[i]) < T::zero() { d - T::one() } else { d }
+                }
+            }
         };
     }
 }
@@ -93,6 +103,15 @@ pub fn int_masked_body_std<T: PrimInt + ToPrimitive + WrappingAdd + WrappingSub
                     }
                 }
                 ArithmeticOperator::Power => (lhs[i].pow(rhs[i].to_u32().unwrap_or(0)), true),
+                ArithmeticOperator::FloorDiv => {
+                    if rhs[i] == T::zero() {
+                        (T::zero(), false)
+                    } else {
+                        let d = lhs[i] / rhs[i];
+                        let r = lhs[i] % rhs[i];
+                        if r != T::zero() && (lhs[i] ^ rhs[i]) < T::zero() { (d - T::one(), true) } else { (d, true) }
+                    }
+                }
             };
             out[i] = result;
             unsafe {
@@ -121,6 +140,7 @@ pub fn float_dense_body_std<T: Float>(op: ArithmeticOperator, lhs: &[T], rhs: &[
             ArithmeticOperator::Divide => lhs[i] / rhs[i],
             ArithmeticOperator::Remainder => lhs[i] % rhs[i],
             ArithmeticOperator::Power => (rhs[i] * lhs[i].ln()).exp(),
+            ArithmeticOperator::FloorDiv => (lhs[i] / rhs[i]).floor(),
         };
     }
 }
@@ -148,6 +168,7 @@ pub fn float_masked_body_std<T: Float>(
                 ArithmeticOperator::Divide => lhs[i] / rhs[i],
                 ArithmeticOperator::Remainder => lhs[i] % rhs[i],
                 ArithmeticOperator::Power => (rhs[i] * lhs[i].ln()).exp(),
+                ArithmeticOperator::FloorDiv => (lhs[i] / rhs[i]).floor(),
             };
             unsafe {
                 out_mask.set_unchecked(i, true);

diff --git a/src/kernels/arithmetic/string.rs b/src/kernels/arithmetic/string.rs
@@ -40,9 +40,6 @@ use crate::{Bitmask, Vec64};
 use num_traits::ToPrimitive;
 
 use crate::enums::operators::ArithmeticOperator::{self};
-#[cfg(feature = "str_arithmetic")]
-use crate::kernels::string::string_predicate_masks;
-
 #[cfg(feature = "str_arithmetic")]
 use crate::utils::{
     confirm_mask_capacity, estimate_categorical_cardinality, estimate_string_cardinality,
@@ -673,8 +670,10 @@ where
     let lmask_ref = lmask_slice.as_ref();
     let rmask_ref = rmask_slice.as_ref();
 
-    // build per‐position validity
-    let (lmask, rmask, mut out_mask) = string_predicate_masks(lmask_ref, rmask_ref, llen);
+    // build per-position validity
+    let lmask = lmask_ref;
+    let rmask = rmask_ref;
+    let mut out_mask = Bitmask::new_set_all(llen, false);
     let _ = confirm_mask_capacity(llen, lmask)?;
     let _ = confirm_mask_capacity(llen, rmask)?;