Skip to content

Commit f8052b8

Browse files
committed
Merge branch 'feat/f64-hyperbolic-audit' into feat/f64-wave-1
2 parents 95b9b4c + 5b3cad1 commit f8052b8

4 files changed

Lines changed: 59 additions & 192 deletions

File tree

src/math/f64/hyperbolic.rs

Lines changed: 5 additions & 167 deletions
Original file line numberDiff line numberDiff line change
@@ -1,188 +1,26 @@
1-
use crate::math::scalar;
2-
use crate::{Simd, SimdBaseIo, SimdBaseOps, SimdConsts, SimdFloat64};
3-
4-
type SimdI64<V> = <<V as SimdConsts>::Engine as Simd>::Vi64;
5-
6-
const SINH_COSH_SMALL_ABS: f64 = 0.125;
7-
const SINH_COSH_FAST_ABS_MAX: f64 = 0.125;
8-
const TANH_SMALL_ABS: f64 = 0.0;
9-
const TANH_FAST_ABS_MAX: f64 = 0.0;
10-
11-
#[inline(always)]
12-
fn any_lane_nonzero<V>(mask: SimdI64<V>) -> bool
13-
where
14-
V: SimdFloat64,
15-
{
16-
unsafe {
17-
let lanes = mask.as_array();
18-
for lane in 0..V::WIDTH {
19-
if lanes[lane] != 0 {
20-
return true;
21-
}
22-
}
23-
}
24-
25-
false
26-
}
27-
28-
#[inline(always)]
29-
fn patch_exceptional_lanes<V>(
30-
input: V,
31-
output: V,
32-
exceptional_mask: SimdI64<V>,
33-
scalar_fallback: fn(f64) -> f64,
34-
) -> V
35-
where
36-
V: SimdFloat64,
37-
{
38-
if !any_lane_nonzero::<V>(exceptional_mask) {
39-
return output;
40-
}
41-
42-
unsafe {
43-
let input_lanes = input.as_array();
44-
let mask_lanes = exceptional_mask.as_array();
45-
let mut output_lanes = output.as_array();
46-
47-
for lane in 0..V::WIDTH {
48-
if mask_lanes[lane] != 0 {
49-
output_lanes[lane] = scalar_fallback(input_lanes[lane]);
50-
}
51-
}
52-
53-
V::load_from_ptr_unaligned(&output_lanes as *const V::ArrayRepresentation as *const f64)
54-
}
55-
}
56-
57-
#[inline(always)]
58-
fn exp_u35<V>(input: V) -> V
59-
where
60-
V: SimdFloat64,
61-
{
62-
// Temporary family-local bridge: use scalar exp lane mapping here while
63-
// avoiding scalar lane mapping for the final hyperbolic functions.
64-
unsafe {
65-
let mut lanes = input.as_array();
66-
for lane in 0..V::WIDTH {
67-
lanes[lane] = scalar::exp_u35_f64(lanes[lane]);
68-
}
69-
V::load_from_ptr_unaligned(&lanes as *const V::ArrayRepresentation as *const f64)
70-
}
71-
}
72-
73-
#[inline(always)]
74-
fn sinh_small<V>(input: V, input_sq: V) -> V
75-
where
76-
V: SimdFloat64,
77-
{
78-
let poly = ((((V::set1(1.0 / 39916800.0) * input_sq) + V::set1(1.0 / 362880.0)) * input_sq
79-
+ V::set1(1.0 / 5040.0))
80-
* input_sq
81-
+ V::set1(1.0 / 120.0))
82-
* input_sq
83-
+ V::set1(1.0 / 6.0);
84-
85-
input + (input * input_sq * poly)
86-
}
87-
88-
#[inline(always)]
89-
fn cosh_small<V>(input_sq: V) -> V
90-
where
91-
V: SimdFloat64,
92-
{
93-
let poly = (((V::set1(1.0 / 40320.0) * input_sq) + V::set1(1.0 / 720.0)) * input_sq
94-
+ V::set1(1.0 / 24.0))
95-
* input_sq
96-
+ V::set1(0.5);
97-
98-
V::set1(1.0) + (input_sq * poly)
99-
}
100-
101-
#[inline(always)]
102-
fn sinh_cosh_medium<V>(abs_input: V) -> (V, V)
103-
where
104-
V: SimdFloat64,
105-
{
106-
let exp_abs = exp_u35(abs_input);
107-
let exp_neg_abs = V::set1(1.0) / exp_abs;
108-
let half = V::set1(0.5);
109-
110-
(
111-
(exp_abs - exp_neg_abs) * half,
112-
(exp_abs + exp_neg_abs) * half,
113-
)
114-
}
115-
116-
#[inline(always)]
117-
fn sinh_cosh_masks<V>(input: V) -> (SimdI64<V>, V, V)
118-
where
119-
V: SimdFloat64,
120-
{
121-
let abs_input = input.abs();
122-
let finite_mask = input.cmp_eq(input).bitcast_i64();
123-
let within_fast_range = abs_input
124-
.cmp_lte(V::set1(SINH_COSH_FAST_ABS_MAX))
125-
.bitcast_i64();
126-
127-
(finite_mask & within_fast_range, abs_input, input * input)
128-
}
1+
use crate::math::{map, scalar};
2+
use crate::SimdFloat64;
1293

1304
#[inline(always)]
1315
pub(crate) fn sinh_u35<V>(input: V) -> V
1326
where
1337
V: SimdFloat64,
1348
{
135-
let (fast_mask, abs_input, input_sq) = sinh_cosh_masks(input);
136-
let exceptional_mask = fast_mask.cmp_eq(SimdI64::<V>::zeroes());
137-
let small_mask = abs_input.cmp_lt(V::set1(SINH_COSH_SMALL_ABS));
138-
139-
let fast_small = sinh_small(input, input_sq);
140-
let exp_input = exp_u35(input);
141-
let exp_neg_input = V::set1(1.0) / exp_input;
142-
let sinh_medium = (exp_input - exp_neg_input) * V::set1(0.5);
143-
let fast = small_mask.blendv(sinh_medium, fast_small);
144-
let zero_mask = input.cmp_eq(V::set1(0.0));
145-
let fast = zero_mask.blendv(fast, input);
146-
147-
patch_exceptional_lanes(input, fast, exceptional_mask, scalar::sinh_u35_f64)
9+
map::unary_f64(input, scalar::sinh_u35_f64)
14810
}
14911

15012
#[inline(always)]
15113
pub(crate) fn cosh_u35<V>(input: V) -> V
15214
where
15315
V: SimdFloat64,
15416
{
155-
let (fast_mask, abs_input, input_sq) = sinh_cosh_masks(input);
156-
let exceptional_mask = fast_mask.cmp_eq(SimdI64::<V>::zeroes());
157-
let small_mask = abs_input.cmp_lt(V::set1(SINH_COSH_SMALL_ABS));
158-
159-
let fast_small = cosh_small(input_sq);
160-
let (_, cosh_medium) = sinh_cosh_medium(abs_input);
161-
let fast = small_mask.blendv(cosh_medium, fast_small);
162-
163-
patch_exceptional_lanes(input, fast, exceptional_mask, scalar::cosh_u35_f64)
17+
map::unary_f64(input, scalar::cosh_u35_f64)
16418
}
16519

16620
#[inline(always)]
16721
pub(crate) fn tanh_u35<V>(input: V) -> V
16822
where
16923
V: SimdFloat64,
17024
{
171-
let abs_input = input.abs();
172-
let finite_mask = input.cmp_eq(input).bitcast_i64();
173-
let within_fast_range = abs_input.cmp_lte(V::set1(TANH_FAST_ABS_MAX)).bitcast_i64();
174-
let exceptional_mask = (finite_mask & within_fast_range).cmp_eq(SimdI64::<V>::zeroes());
175-
let small_mask = abs_input.cmp_lt(V::set1(TANH_SMALL_ABS));
176-
177-
let input_sq = input * input;
178-
let fast_small = sinh_small(input, input_sq) / cosh_small(input_sq);
179-
180-
let exp_input = exp_u35(input);
181-
let exp_neg_input = V::set1(1.0) / exp_input;
182-
let tanh_medium = (exp_input - exp_neg_input) / (exp_input + exp_neg_input);
183-
let fast = small_mask.blendv(tanh_medium, fast_small);
184-
let zero_mask = input.cmp_eq(V::set1(0.0));
185-
let fast = zero_mask.blendv(fast, input);
186-
187-
patch_exceptional_lanes(input, fast, exceptional_mask, scalar::tanh_u35_f64)
25+
map::unary_f64(input, scalar::tanh_u35_f64)
18826
}

src/math/f64/inverse_hyperbolic.rs

Lines changed: 3 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::math::{f64, scalar};
1+
use crate::math::{f64, map, scalar};
22
use crate::{Simd, SimdBaseIo, SimdBaseOps, SimdConsts, SimdFloat64};
33

44
type SimdI64<V> = <<V as SimdConsts>::Engine as Simd>::Vi64;
@@ -79,15 +79,7 @@ where
7979
V: SimdFloat64,
8080
V::Engine: Simd<Vf64 = V>,
8181
{
82-
let finite_mask = input.cmp_eq(input).bitcast_i64();
83-
let in_domain_mask = input.cmp_gte(V::set1(1.0)).bitcast_i64();
84-
let fast_mask = finite_mask & in_domain_mask;
85-
let exceptional_mask = fast_mask.cmp_eq(SimdI64::<V>::zeroes());
86-
87-
let root_term = ((input - V::set1(1.0)).sqrt()) * ((input + V::set1(1.0)).sqrt());
88-
let fast = f64::ln_u35(input + root_term);
89-
90-
patch_exceptional_lanes(input, fast, exceptional_mask, scalar::acosh_u35_f64)
82+
map::unary_f64(input, scalar::acosh_u35_f64)
9183
}
9284

9385
#[inline(always)]
@@ -96,19 +88,5 @@ where
9688
V: SimdFloat64,
9789
V::Engine: Simd<Vf64 = V>,
9890
{
99-
let finite_mask = input.cmp_eq(input).bitcast_i64();
100-
let abs_x = input.abs();
101-
let strict_domain_mask = abs_x.cmp_lt(V::set1(1.0)).bitcast_i64();
102-
let non_zero_mask = input.cmp_neq(V::zeroes()).bitcast_i64();
103-
let stable_range_mask = abs_x.cmp_lte(V::set1(0.99)).bitcast_i64();
104-
let away_from_zero_mask = abs_x.cmp_gte(V::set1(0.9)).bitcast_i64();
105-
let fast_mask =
106-
finite_mask & strict_domain_mask & non_zero_mask & stable_range_mask & away_from_zero_mask;
107-
let exceptional_mask = fast_mask.cmp_eq(SimdI64::<V>::zeroes());
108-
109-
let one = V::set1(1.0);
110-
let ratio = (one + input) / (one - input);
111-
let fast = f64::ln_u35(ratio) * V::set1(0.5);
112-
113-
patch_exceptional_lanes(input, fast, exceptional_mask, scalar::atanh_u35_f64)
91+
map::unary_f64(input, scalar::atanh_u35_f64)
11492
}

src/tests/simd_math_targeted_edges/hyperbolic.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,3 +300,28 @@ simd_math_targeted_all_backends!(
300300
f64_hyperbolic_special_values_and_mixed_lanes,
301301
run_f64_hyperbolic_special_values_and_mixed_lanes
302302
);
303+
304+
fn run_f64_hyperbolic_signed_zero_semantics<S: Simd>() {
305+
let mut lanes = vec![0.0f64; S::Vf64::WIDTH];
306+
lanes[0] = -0.0;
307+
308+
let input = S::Vf64::load_from_slice(&lanes);
309+
let sinh = input.sinh_u35();
310+
let tanh = input.tanh_u35();
311+
312+
assert_eq!(sinh[0].to_bits(), (-0.0f64).sinh().to_bits());
313+
assert_eq!(tanh[0].to_bits(), (-0.0f64).tanh().to_bits());
314+
315+
if S::Vf64::WIDTH > 1 {
316+
assert_eq!(sinh[1].to_bits(), 0.0f64.sinh().to_bits());
317+
assert_eq!(tanh[1].to_bits(), 0.0f64.tanh().to_bits());
318+
}
319+
320+
let cosh = input.cosh_u35();
321+
assert_eq!(cosh[0].to_bits(), (-0.0f64).cosh().to_bits());
322+
}
323+
324+
simd_math_targeted_all_backends!(
325+
f64_hyperbolic_signed_zero_semantics,
326+
run_f64_hyperbolic_signed_zero_semantics
327+
);

src/tests/simd_math_targeted_edges/inverse_hyperbolic.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,3 +270,29 @@ simd_math_targeted_all_backends!(
270270
f64_inverse_hyperbolic_mixed_lanes,
271271
run_f64_inverse_hyperbolic_mixed_lanes
272272
);
273+
274+
fn run_f64_inverse_hyperbolic_signed_zero_semantics<S: Simd>() {
275+
let mut lanes = vec![0.0f64; S::Vf64::WIDTH];
276+
lanes[0] = -0.0;
277+
278+
let input = S::Vf64::load_from_slice(&lanes);
279+
let asinh = input.asinh_u35();
280+
let atanh = input.atanh_u35();
281+
282+
assert_eq!(asinh[0].to_bits(), (-0.0f64).asinh().to_bits());
283+
assert_eq!(atanh[0].to_bits(), (-0.0f64).atanh().to_bits());
284+
285+
if S::Vf64::WIDTH > 1 {
286+
assert_eq!(asinh[1].to_bits(), 0.0f64.asinh().to_bits());
287+
assert_eq!(atanh[1].to_bits(), 0.0f64.atanh().to_bits());
288+
}
289+
290+
let ones = S::Vf64::set1(1.0);
291+
let acosh = ones.acosh_u35();
292+
assert_eq!(acosh[0].to_bits(), 1.0f64.acosh().to_bits());
293+
}
294+
295+
simd_math_targeted_all_backends!(
296+
f64_inverse_hyperbolic_signed_zero_semantics,
297+
run_f64_inverse_hyperbolic_signed_zero_semantics
298+
);

0 commit comments

Comments
 (0)