Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions benches/simd_math_remaining_baseline/inverse_hyperbolic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,60 @@ use simdeez::{prelude::*, simd_unsafe_generate_all};

use crate::shared::{self, INPUT_LEN};

#[inline(never)]
fn scalar_asinh_sum(input: &[f32]) -> f32 {
input.iter().copied().map(f32::asinh).sum()
}

#[inline(never)]
fn scalar_acosh_sum(input: &[f32]) -> f32 {
input.iter().copied().map(f32::acosh).sum()
}

#[inline(never)]
fn scalar_atanh_sum(input: &[f32]) -> f32 {
input.iter().copied().map(f32::atanh).sum()
}

simd_unsafe_generate_all!(
fn simdeez_asinh_sum(input: &[f32]) -> f32 {
shared::simdeez_unary_sum_impl::<S>(input, |v| v.asinh_u35())
}
);

simd_unsafe_generate_all!(
fn simdeez_acosh_sum(input: &[f32]) -> f32 {
shared::simdeez_unary_sum_impl::<S>(input, |v| v.acosh_u35())
}
);

simd_unsafe_generate_all!(
fn simdeez_atanh_sum(input: &[f32]) -> f32 {
shared::simdeez_unary_sum_impl::<S>(input, |v| v.atanh_u35())
}
);

pub fn register(c: &mut Criterion) {
let asinh_inputs = shared::make_unary_inputs(INPUT_LEN, 0xDEADB001, -16_384.0..16_384.0);
let acosh_inputs = shared::make_positive_inputs(INPUT_LEN, 0xDEADB002, 1.0, 16_384.0);
let atanh_inputs = shared::make_unary_inputs(INPUT_LEN, 0xDEADB003, -0.999_999..0.999_999);

shared::bench_unary(
c,
"simd_math_baseline/f32/asinh_u35",
&asinh_inputs,
scalar_asinh_sum,
simdeez_asinh_sum,
);

shared::bench_unary(
c,
"simd_math_baseline/f32/acosh_u35",
&acosh_inputs,
scalar_acosh_sum,
simdeez_acosh_sum,
);

shared::bench_unary(
c,
"simd_math_baseline/f32/atanh_u35",
Expand Down
7 changes: 4 additions & 3 deletions src/math/contracts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@ pub const ATAN2_U35_F32_MAX_ULP: u32 = 1;
pub const SINH_U35_F32_MAX_ULP: u32 = 35;
pub const COSH_U35_F32_MAX_ULP: u32 = 35;
pub const TANH_U35_F32_MAX_ULP: u32 = 35;
pub const ASINH_U35_F32_MAX_ULP: u32 = 1;
pub const ACOSH_U35_F32_MAX_ULP: u32 = 1;
pub const ATANH_U35_F32_MAX_ULP: u32 = 1;
// Portable f32 inverse-hyperbolic kernels target u35 accuracy.
pub const ASINH_U35_F32_MAX_ULP: u32 = 35;
pub const ACOSH_U35_F32_MAX_ULP: u32 = 35;
pub const ATANH_U35_F32_MAX_ULP: u32 = 35;
pub const HYPOT_U35_F32_MAX_ULP: u32 = 1;
// f32 log10 now rides the portable log2_u35 kernel and inherits its relaxed envelope.
pub const LOG10_U35_F32_MAX_ULP: u32 = 35;
Expand Down
27 changes: 27 additions & 0 deletions src/math/f32/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,33 @@ where
hyperbolic::tanh_u35(input)
}

#[inline(always)]
pub(crate) fn asinh_u35<V>(input: V) -> V
where
V: SimdFloat32,
V::Engine: Simd<Vf32 = V>,
{
portable::asinh_u35(input)
}

#[inline(always)]
pub(crate) fn acosh_u35<V>(input: V) -> V
where
V: SimdFloat32,
V::Engine: Simd<Vf32 = V>,
{
portable::acosh_u35(input)
}

#[inline(always)]
pub(crate) fn atanh_u35<V>(input: V) -> V
where
V: SimdFloat32,
V::Engine: Simd<Vf32 = V>,
{
portable::atanh_u35(input)
}

#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[inline(always)]
fn is_avx2_engine<E: Simd>() -> bool {
Expand Down
62 changes: 62 additions & 0 deletions src/math/f32/portable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,3 +253,65 @@ where
let fast = sin_fast / cos_fast;
patch_exceptional_lanes(input, fast, exceptional_mask, scalar::tan_u35_f32)
}

#[inline(always)]
pub(super) fn asinh_u35<V>(input: V) -> V
where
V: SimdFloat32,
V::Engine: Simd<Vf32 = V>,
{
let finite_mask = input.cmp_eq(input).bitcast_i32();
let abs_x = input.abs();
let tiny_mask = abs_x.cmp_lt(V::set1(0.05)).bitcast_i32();
let large_mask = abs_x.cmp_gt(V::set1(1.0e19)).bitcast_i32();
let zero_mask = input.cmp_eq(V::zeroes()).bitcast_i32();
let exceptional_mask =
finite_mask.cmp_eq(SimdI32::<V>::zeroes()) | tiny_mask | large_mask | zero_mask;

let radicand = (abs_x * abs_x) + V::set1(1.0);
let magnitude = log2_u35(abs_x + radicand.sqrt()) * V::set1(core::f32::consts::LN_2);
let negative_mask = input.cmp_lt(V::zeroes());
let fast = negative_mask.blendv(magnitude, -magnitude);

patch_exceptional_lanes(input, fast, exceptional_mask, scalar::asinh_u35_f32)
}

#[inline(always)]
pub(super) fn acosh_u35<V>(input: V) -> V
where
V: SimdFloat32,
V::Engine: Simd<Vf32 = V>,
{
let finite_mask = input.cmp_eq(input).bitcast_i32();
let in_domain_mask = input.cmp_gte(V::set1(1.0)).bitcast_i32();
let fast_mask = finite_mask & in_domain_mask;
let exceptional_mask = fast_mask.cmp_eq(SimdI32::<V>::zeroes());

let root_term = ((input - V::set1(1.0)).sqrt()) * ((input + V::set1(1.0)).sqrt());
let fast = log2_u35(input + root_term) * V::set1(core::f32::consts::LN_2);

patch_exceptional_lanes(input, fast, exceptional_mask, scalar::acosh_u35_f32)
}

#[inline(always)]
pub(super) fn atanh_u35<V>(input: V) -> V
where
V: SimdFloat32,
V::Engine: Simd<Vf32 = V>,
{
let finite_mask = input.cmp_eq(input).bitcast_i32();
let abs_x = input.abs();
let strict_domain_mask = abs_x.cmp_lt(V::set1(1.0)).bitcast_i32();
let non_zero_mask = input.cmp_neq(V::zeroes()).bitcast_i32();
let stable_range_mask = abs_x.cmp_lte(V::set1(0.75)).bitcast_i32();
let away_from_zero_mask = abs_x.cmp_gte(V::set1(0.05)).bitcast_i32();
let fast_mask =
finite_mask & strict_domain_mask & non_zero_mask & stable_range_mask & away_from_zero_mask;
let exceptional_mask = fast_mask.cmp_eq(SimdI32::<V>::zeroes());

let one = V::set1(1.0);
let ratio = (one + input) / (one - input);
let fast = log2_u35(ratio) * V::set1(0.5 * core::f32::consts::LN_2);

patch_exceptional_lanes(input, fast, exceptional_mask, scalar::atanh_u35_f32)
}
25 changes: 17 additions & 8 deletions src/math/families/inverse_hyperbolic.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,29 @@
use crate::math::{map, scalar};
use crate::{SimdFloat32, SimdFloat64};
use crate::math::{f32, map, scalar};
use crate::{Simd, SimdFloat32, SimdFloat64};

pub trait SimdMathF32InverseHyperbolic: SimdFloat32 {
#[inline(always)]
fn asinh_u35(self) -> Self {
map::unary_f32(self, scalar::asinh_u35_f32)
fn asinh_u35(self) -> Self
where
Self::Engine: Simd<Vf32 = Self>,
{
f32::asinh_u35(self)
}

#[inline(always)]
fn acosh_u35(self) -> Self {
map::unary_f32(self, scalar::acosh_u35_f32)
fn acosh_u35(self) -> Self
where
Self::Engine: Simd<Vf32 = Self>,
{
f32::acosh_u35(self)
}

#[inline(always)]
fn atanh_u35(self) -> Self {
map::unary_f32(self, scalar::atanh_u35_f32)
fn atanh_u35(self) -> Self
where
Self::Engine: Simd<Vf32 = Self>,
{
f32::atanh_u35(self)
}
}

Expand Down
137 changes: 137 additions & 0 deletions src/tests/simd_math_targeted_edges/inverse_hyperbolic.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
use super::*;
use crate::math::SimdMathF32InverseHyperbolic;

fn run_f32_inverse_hyperbolic_domain_edges<S: Simd>() {
let acosh_inputs = [
0.0f32,
0.5,
1.0 - f32::EPSILON,
1.0,
1.0 + f32::EPSILON,
1.000_001,
2.0,
10.0,
f32::INFINITY,
f32::NAN,
];
check_targeted_unary_f32::<S>(
"acosh_u35",
&acosh_inputs,
contracts::ACOSH_U35_F32_MAX_ULP,
|v| v.acosh_u35(),
f32::acosh,
);

let atanh_inputs = [
-1.0f32,
-1.0 + f32::EPSILON,
-0.999_999,
-0.5,
-0.0,
0.0,
0.5,
0.999_999,
1.0 - f32::EPSILON,
1.0,
1.0 + f32::EPSILON,
f32::NAN,
];
check_targeted_unary_f32::<S>(
"atanh_u35",
&atanh_inputs,
contracts::ATANH_U35_F32_MAX_ULP,
|v| v.atanh_u35(),
f32::atanh,
);

let asinh_inputs = [
f32::NEG_INFINITY,
-1.0e20,
-1000.0,
-1.0,
-f32::MIN_POSITIVE,
-0.0,
0.0,
f32::MIN_POSITIVE,
1.0,
1000.0,
1.0e20,
f32::INFINITY,
f32::NAN,
];
check_targeted_unary_f32::<S>(
"asinh_u35",
&asinh_inputs,
contracts::ASINH_U35_F32_MAX_ULP,
|v| v.asinh_u35(),
f32::asinh,
);
}

fn run_f32_inverse_hyperbolic_mixed_lanes<S: Simd>() {
let mut lanes = vec![0.0f32; S::Vf32::WIDTH];
let pattern = [
-0.75f32,
-0.0,
0.0,
0.75,
0.999_999,
-0.999_999,
1.0,
-1.0,
1.0 + f32::EPSILON,
f32::NAN,
f32::INFINITY,
f32::NEG_INFINITY,
1.0,
2.0,
10.0,
0.5,
];
for (i, lane) in lanes.iter_mut().enumerate() {
*lane = pattern[i % pattern.len()];
}

let input = S::Vf32::load_from_slice(&lanes);
let asinh = input.asinh_u35();
let acosh = input.acosh_u35();
let atanh = input.atanh_u35();

for (lane, &x) in lanes.iter().enumerate() {
assert_f32_contract(
"asinh_u35",
x,
asinh[lane],
x.asinh(),
contracts::ASINH_U35_F32_MAX_ULP,
)
.unwrap_or_else(|e| panic!("lane {lane}: {e}"));

assert_f32_contract(
"acosh_u35",
x,
acosh[lane],
x.acosh(),
contracts::ACOSH_U35_F32_MAX_ULP,
)
.unwrap_or_else(|e| panic!("lane {lane}: {e}"));

assert_f32_contract(
"atanh_u35",
x,
atanh[lane],
x.atanh(),
contracts::ATANH_U35_F32_MAX_ULP,
)
.unwrap_or_else(|e| panic!("lane {lane}: {e}"));
}
}

simd_math_targeted_all_backends!(
f32_inverse_hyperbolic_domain_edges,
run_f32_inverse_hyperbolic_domain_edges
);
simd_math_targeted_all_backends!(
f32_inverse_hyperbolic_mixed_lanes,
run_f32_inverse_hyperbolic_mixed_lanes
);
1 change: 1 addition & 0 deletions src/tests/simd_math_targeted_edges/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,4 +140,5 @@ macro_rules! simd_math_targeted_all_backends {
mod binary_misc;
mod core;
mod hyperbolic;
mod inverse_hyperbolic;
mod inverse_trig;
Loading