Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ c_ffi_tests = ['cc']
# Highly recommend keeping these off unless required
# E.g., constrained or embedded environments, as they add combinatorial
# weight to the binary and enum match arms
extended_categorical = ["extended_numeric_types"]
extended_categorical = []

# Adds UInt8, UInt16, Int8, Int16 types.
#
Expand Down
2 changes: 1 addition & 1 deletion pyo3/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ name = "minarrow_pyo3"
crate-type = ["cdylib", "rlib"]

[dependencies]
minarrow = { version = "0.8.1", features = ["large_string"] }
minarrow = { version = "0.9.1", features = ["large_string"] }
pyo3 = { version = "0.23" }
thiserror = "2"

Expand Down
393 changes: 391 additions & 2 deletions src/enums/array.rs

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions src/enums/operators.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ pub enum ArithmeticOperator {
/// For integers, uses repeated multiplication. For floating-point, uses `pow()` function.
/// Negative exponents on integers may yield zero due to truncation.
Power,
/// Floor division (`lhs // rhs`)
///
/// Rounds the quotient towards negative infinity. For unsigned integers this is
/// identical to truncation division. For signed integers, when the remainder is
/// non-zero and the operands have different signs, the result is one less than
/// truncation division. For floating-point, equivalent to `(lhs / rhs).floor()`.
FloorDiv,
}

/// Comparison operators for binary predicates.
Expand Down
14 changes: 14 additions & 0 deletions src/enums/value/conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -874,6 +874,20 @@ impl TryFrom<Value> for NumericArrayV {
}),
}
}
Value::Array(inner) => {
let arr = Arc::try_unwrap(inner).unwrap_or_else(|arc| (*arc).clone());
match arr {
Array::NumericArray(num_arr) => {
let len = num_arr.len();
Ok(NumericArrayV::new(num_arr, 0, len))
}
_ => Err(MinarrowError::TypeError {
from: "Value",
to: "NumericArrayV",
message: Some("Array is not a NumericArray".to_owned()),
}),
}
}
_ => Err(MinarrowError::TypeError {
from: "Value",
to: "NumericArrayV",
Expand Down
7 changes: 2 additions & 5 deletions src/ffi/arrow_c_ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -553,9 +553,9 @@ fn export_categorical_array_to_c(

let mut field = schema.fields[0].clone();
field.dtype = match index_bits {
#[cfg(all(feature = "extended_categorical", feature = "extended_numeric_types"))]
#[cfg(feature = "extended_categorical")]
8 => ArrowType::Dictionary(crate::ffi::arrow_dtype::CategoricalIndexType::UInt8),
#[cfg(all(feature = "extended_categorical", feature = "extended_numeric_types"))]
#[cfg(feature = "extended_categorical")]
16 => ArrowType::Dictionary(crate::ffi::arrow_dtype::CategoricalIndexType::UInt16),
32 => ArrowType::Dictionary(crate::ffi::arrow_dtype::CategoricalIndexType::UInt32),
#[cfg(feature = "extended_categorical")]
Expand Down Expand Up @@ -1483,14 +1483,12 @@ unsafe fn import_categorical(

// Build codes & wrap
match index_type {
#[cfg(feature = "extended_numeric_types")]
#[cfg(feature = "extended_categorical")]
CategoricalIndexType::UInt8 => {
let codes_buf = unsafe { build_codes::<u8>(codes_ptr, len, ownership) };
let arr = CategoricalArray::<u8>::new(codes_buf, dict_strings, null_mask);
Arc::new(Array::TextArray(TextArray::Categorical8(Arc::new(arr))))
}
#[cfg(feature = "extended_numeric_types")]
#[cfg(feature = "extended_categorical")]
CategoricalIndexType::UInt16 => {
let codes_buf = unsafe { build_codes::<u16>(codes_ptr, len, ownership) };
Expand All @@ -1502,7 +1500,6 @@ unsafe fn import_categorical(
let arr = CategoricalArray::<u32>::new(codes_buf, dict_strings, null_mask);
Arc::new(Array::TextArray(TextArray::Categorical32(Arc::new(arr))))
}
#[cfg(feature = "extended_numeric_types")]
#[cfg(feature = "extended_categorical")]
CategoricalIndexType::UInt64 => {
let codes_buf = unsafe { build_codes::<u64>(codes_ptr, len, ownership) };
Expand Down
9 changes: 4 additions & 5 deletions src/ffi/arrow_dtype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,7 @@ pub enum ArrowType {
/// - Smaller widths reduce memory footprint for low-cardinality data.
/// - Larger widths enable more distinct categories without overflow.
/// - Variant availability depends on feature flags:
/// - `UInt8` and `UInt16` require both `extended_categorical` and `extended_numeric_types`.
/// - `UInt64` requires `extended_categorical`.
/// - `UInt8`, `UInt16`, and `UInt64` require `extended_categorical`.
/// - `UInt32` is always available.
///
/// ## Interoperability
Expand All @@ -127,12 +126,12 @@ pub enum ArrowType {

#[derive(PartialEq, Clone, Debug)]
pub enum CategoricalIndexType {
#[cfg(all(feature = "extended_categorical", feature = "extended_numeric_types"))]
#[cfg(feature = "extended_categorical")]
UInt8,
#[cfg(all(feature = "extended_categorical", feature = "extended_numeric_types"))]
#[cfg(feature = "extended_categorical")]
UInt16,
UInt32,
#[cfg(all(feature = "extended_categorical"))]
#[cfg(feature = "extended_categorical")]
UInt64,
}

Expand Down
75 changes: 72 additions & 3 deletions src/kernels/arithmetic/simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ pub fn int_dense_body_simd<T, const LANES: usize>(
ArithmeticOperator::Multiply => a * b,
ArithmeticOperator::Divide => a / b, // Panics if divisor is zero
ArithmeticOperator::Remainder => a % b, // Panics if divisor is zero
ArithmeticOperator::Power => {
ArithmeticOperator::Power | ArithmeticOperator::FloorDiv => {
vectorisable = 0;
break;
}
Expand All @@ -88,6 +88,15 @@ pub fn int_dense_body_simd<T, const LANES: usize>(
}
acc
}
ArithmeticOperator::FloorDiv => {
if rhs[idx] == T::zero() {
panic!("Floor division by zero")
} else {
let d = lhs[idx] / rhs[idx];
let r = lhs[idx] % rhs[idx];
if r != T::zero() && (lhs[idx] ^ rhs[idx]) < T::zero() { d - T::one() } else { d }
}
}
};
}
}
Expand Down Expand Up @@ -159,6 +168,22 @@ pub fn int_masked_body_simd<T, const LANES: usize>(
let r = div_zero.select(Simd::splat(T::zero()), r);
(r, valid)
}
ArithmeticOperator::FloorDiv => {
let div_zero = b.simd_eq(Simd::splat(T::zero()));
let valid = !div_zero;
// Per-lane floor division with sign correction
let mut tmp = [T::zero(); LANES];
for l in 0..LANES {
if b[l] == T::zero() {
tmp[l] = T::zero();
} else {
let d = a[l] / b[l];
let r = a[l] % b[l];
tmp[l] = if r != T::zero() && (a[l] ^ b[l]) < T::zero() { d - T::one() } else { d };
}
}
(Simd::<T, LANES>::from_array(tmp), valid)
}
};
r.copy_to_slice(&mut out[i..i + LANES]);
// Write the out_mask based on the op
Expand Down Expand Up @@ -217,6 +242,21 @@ pub fn int_masked_body_simd<T, const LANES: usize>(
}
}
}
ArithmeticOperator::FloorDiv => {
if rhs[idx] == T::zero() {
out[idx] = T::zero();
unsafe {
out_mask.set_unchecked(idx, false);
}
} else {
let d = lhs[idx] / rhs[idx];
let r = lhs[idx] % rhs[idx];
out[idx] = if r != T::zero() && (lhs[idx] ^ rhs[idx]) < T::zero() { d - T::one() } else { d };
unsafe {
out_mask.set_unchecked(idx, true);
}
}
}
}
}
return;
Expand Down Expand Up @@ -254,6 +294,18 @@ pub fn int_masked_body_simd<T, const LANES: usize>(
}
Simd::<T, LANES>::from_array(tmp)
}
ArithmeticOperator::FloorDiv => {
// Per-lane floor division with sign correction
let mut tmp = [T::zero(); LANES];
for l in 0..LANES {
if b[l] != T::zero() {
let d = a[l] / b[l];
let r = a[l] % b[l];
tmp[l] = if r != T::zero() && (a[l] ^ b[l]) < T::zero() { d - T::one() } else { d };
}
}
Simd::<T, LANES>::from_array(tmp)
}
};

// apply source validity mask, write results
Expand All @@ -262,8 +314,8 @@ pub fn int_masked_body_simd<T, const LANES: usize>(

// write out-mask bits: combine source mask with div-by-zero validity
let final_mask = match op {
ArithmeticOperator::Divide | ArithmeticOperator::Remainder => {
// For div/rem: valid iff source is valid AND not dividing by zero
ArithmeticOperator::Divide | ArithmeticOperator::Remainder | ArithmeticOperator::FloorDiv => {
// Valid iff source is valid and not dividing by zero
m_src & !div_zero
}
_ => m_src,
Expand Down Expand Up @@ -301,6 +353,15 @@ pub fn int_masked_body_simd<T, const LANES: usize>(
}
}
ArithmeticOperator::Power => (lhs[j].pow(rhs[j].to_u32().unwrap_or(0)), true),
ArithmeticOperator::FloorDiv => {
if rhs[j] == T::zero() {
(T::zero(), false)
} else {
let d = lhs[j] / rhs[j];
let r = lhs[j] % rhs[j];
if r != T::zero() && (lhs[j] ^ rhs[j]) < T::zero() { (d - T::one(), true) } else { (d, true) }
}
}
};
out[j] = result;
unsafe { out_mask.set_unchecked(j, final_valid) };
Expand Down Expand Up @@ -345,6 +406,7 @@ pub fn float_masked_body_f32_simd<const LANES: usize>(
ArithmeticOperator::Divide => a / b,
ArithmeticOperator::Remainder => a % b,
ArithmeticOperator::Power => (b * a.ln()).exp(),
ArithmeticOperator::FloorDiv => (a / b).floor(),
};

let selected = m.select(res, Simd::<f32, LANES>::splat(0.0));
Expand All @@ -371,6 +433,7 @@ pub fn float_masked_body_f32_simd<const LANES: usize>(
ArithmeticOperator::Divide => lhs[j] / rhs[j],
ArithmeticOperator::Remainder => lhs[j] % rhs[j],
ArithmeticOperator::Power => (rhs[j] * lhs[j].ln()).exp(),
ArithmeticOperator::FloorDiv => (lhs[j] / rhs[j]).floor(),
};
unsafe { out_mask.set_unchecked(j, true) };
} else {
Expand Down Expand Up @@ -417,6 +480,7 @@ pub fn float_masked_body_f64_simd<const LANES: usize>(
ArithmeticOperator::Divide => a / b,
ArithmeticOperator::Remainder => a % b,
ArithmeticOperator::Power => (b * a.ln()).exp(),
ArithmeticOperator::FloorDiv => (a / b).floor(),
};

let selected = m.select(res, Simd::<f64, LANES>::splat(0.0));
Expand All @@ -443,6 +507,7 @@ pub fn float_masked_body_f64_simd<const LANES: usize>(
ArithmeticOperator::Divide => lhs[j] / rhs[j],
ArithmeticOperator::Remainder => lhs[j] % rhs[j],
ArithmeticOperator::Power => (rhs[j] * lhs[j].ln()).exp(),
ArithmeticOperator::FloorDiv => (lhs[j] / rhs[j]).floor(),
};
unsafe { out_mask.set_unchecked(j, true) };
} else {
Expand Down Expand Up @@ -474,6 +539,7 @@ pub fn float_dense_body_f32_simd<const LANES: usize>(
ArithmeticOperator::Divide => a / b,
ArithmeticOperator::Remainder => a % b,
ArithmeticOperator::Power => (b * a.ln()).exp(),
ArithmeticOperator::FloorDiv => (a / b).floor(),
};
res.copy_to_slice(&mut out[i..i + LANES]);
i += LANES;
Expand All @@ -488,6 +554,7 @@ pub fn float_dense_body_f32_simd<const LANES: usize>(
ArithmeticOperator::Divide => lhs[j] / rhs[j],
ArithmeticOperator::Remainder => lhs[j] % rhs[j],
ArithmeticOperator::Power => (rhs[j] * lhs[j].ln()).exp(),
ArithmeticOperator::FloorDiv => (lhs[j] / rhs[j]).floor(),
};
}
}
Expand All @@ -514,6 +581,7 @@ pub fn float_dense_body_f64_simd<const LANES: usize>(
ArithmeticOperator::Divide => a / b,
ArithmeticOperator::Remainder => a % b,
ArithmeticOperator::Power => (b * a.ln()).exp(),
ArithmeticOperator::FloorDiv => (a / b).floor(),
};
res.copy_to_slice(&mut out[i..i + LANES]);
i += LANES;
Expand All @@ -528,6 +596,7 @@ pub fn float_dense_body_f64_simd<const LANES: usize>(
ArithmeticOperator::Divide => lhs[j] / rhs[j],
ArithmeticOperator::Remainder => lhs[j] % rhs[j],
ArithmeticOperator::Power => (rhs[j] * lhs[j].ln()).exp(),
ArithmeticOperator::FloorDiv => (lhs[j] / rhs[j]).floor(),
};
}
}
Expand Down
21 changes: 21 additions & 0 deletions src/kernels/arithmetic/std.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,16 @@ pub fn int_dense_body_std<T: PrimInt + ToPrimitive + WrappingAdd + WrappingSub +
}
}
ArithmeticOperator::Power => lhs[i].pow(rhs[i].to_u32().unwrap_or(0)),
ArithmeticOperator::FloorDiv => {
if rhs[i] == T::zero() {
panic!("Floor division by zero")
} else {
let d = lhs[i] / rhs[i];
let r = lhs[i] % rhs[i];
// If remainder is non-zero and signs differ, floor toward -inf
if r != T::zero() && (lhs[i] ^ rhs[i]) < T::zero() { d - T::one() } else { d }
}
}
};
}
}
Expand Down Expand Up @@ -93,6 +103,15 @@ pub fn int_masked_body_std<T: PrimInt + ToPrimitive + WrappingAdd + WrappingSub
}
}
ArithmeticOperator::Power => (lhs[i].pow(rhs[i].to_u32().unwrap_or(0)), true),
ArithmeticOperator::FloorDiv => {
if rhs[i] == T::zero() {
(T::zero(), false)
} else {
let d = lhs[i] / rhs[i];
let r = lhs[i] % rhs[i];
if r != T::zero() && (lhs[i] ^ rhs[i]) < T::zero() { (d - T::one(), true) } else { (d, true) }
}
}
};
out[i] = result;
unsafe {
Expand Down Expand Up @@ -121,6 +140,7 @@ pub fn float_dense_body_std<T: Float>(op: ArithmeticOperator, lhs: &[T], rhs: &[
ArithmeticOperator::Divide => lhs[i] / rhs[i],
ArithmeticOperator::Remainder => lhs[i] % rhs[i],
ArithmeticOperator::Power => (rhs[i] * lhs[i].ln()).exp(),
ArithmeticOperator::FloorDiv => (lhs[i] / rhs[i]).floor(),
};
}
}
Expand Down Expand Up @@ -148,6 +168,7 @@ pub fn float_masked_body_std<T: Float>(
ArithmeticOperator::Divide => lhs[i] / rhs[i],
ArithmeticOperator::Remainder => lhs[i] % rhs[i],
ArithmeticOperator::Power => (rhs[i] * lhs[i].ln()).exp(),
ArithmeticOperator::FloorDiv => (lhs[i] / rhs[i]).floor(),
};
unsafe {
out_mask.set_unchecked(i, true);
Expand Down
9 changes: 4 additions & 5 deletions src/kernels/arithmetic/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,6 @@ use crate::{Bitmask, Vec64};
use num_traits::ToPrimitive;

use crate::enums::operators::ArithmeticOperator::{self};
#[cfg(feature = "str_arithmetic")]
use crate::kernels::string::string_predicate_masks;

#[cfg(feature = "str_arithmetic")]
use crate::utils::{
confirm_mask_capacity, estimate_categorical_cardinality, estimate_string_cardinality,
Expand Down Expand Up @@ -673,8 +670,10 @@ where
let lmask_ref = lmask_slice.as_ref();
let rmask_ref = rmask_slice.as_ref();

// build per‐position validity
let (lmask, rmask, mut out_mask) = string_predicate_masks(lmask_ref, rmask_ref, llen);
// build per-position validity
let lmask = lmask_ref;
let rmask = rmask_ref;
let mut out_mask = Bitmask::new_set_all(llen, false);
let _ = confirm_mask_capacity(llen, lmask)?;
let _ = confirm_mask_capacity(llen, rmask)?;

Expand Down
Loading
Loading