From 36b49d94fc87e26d34a1b08a4e6723e79687eee2 Mon Sep 17 00:00:00 2001 From: viiccwen Date: Wed, 8 Apr 2026 10:47:28 +0000 Subject: [PATCH 1/6] feat(kernel): add angle encoding kernel (f32) --- qdp/qdp-kernels/src/angle.cu | 55 ++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/qdp/qdp-kernels/src/angle.cu b/qdp/qdp-kernels/src/angle.cu index a3fd5668f4..2a2d129689 100644 --- a/qdp/qdp-kernels/src/angle.cu +++ b/qdp/qdp-kernels/src/angle.cu @@ -42,6 +42,24 @@ __global__ void angle_encode_kernel( state[idx] = make_cuDoubleComplex(amplitude, 0.0); } +__global__ void angle_encode_kernel_f32( + const float* __restrict__ angles, + cuComplex* __restrict__ state, + size_t state_len, + unsigned int num_qubits +) { + size_t idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= state_len) return; + + float amplitude = 1.0f; + for (unsigned int bit = 0; bit < num_qubits; ++bit) { + float angle = angles[bit]; + amplitude *= ((idx >> bit) & 1U) ? sinf(angle) : cosf(angle); + } + + state[idx] = make_cuComplex(amplitude, 0.0f); +} + __global__ void angle_encode_batch_kernel( const double* __restrict__ angles_batch, cuDoubleComplex* __restrict__ state_batch, @@ -109,6 +127,43 @@ int launch_angle_encode( return (int)cudaGetLastError(); } +/// Launch angle encoding kernel for float32 input +/// +/// # Arguments +/// * angles_d - Device pointer to per-qubit angles +/// * state_d - Device pointer to output state vector +/// * state_len - Target state vector size (2^num_qubits) +/// * num_qubits - Number of qubits (angles length) +/// * stream - CUDA stream for async execution (nullptr = default stream) +/// +/// # Returns +/// CUDA error code (0 = cudaSuccess) +int launch_angle_encode_f32( + const float* angles_d, + void* state_d, + size_t state_len, + unsigned int num_qubits, + cudaStream_t stream +) { + if (state_len == 0 || num_qubits == 0) { + return cudaErrorInvalidValue; + } + + cuComplex* state_complex_d = static_cast(state_d); + + const int blockSize = DEFAULT_BLOCK_SIZE; + const int gridSize = (state_len + blockSize - 1) / blockSize; + + angle_encode_kernel_f32<<>>( + angles_d, + state_complex_d, + state_len, + num_qubits + ); + + return (int)cudaGetLastError(); +} + /// Launch batch angle encoding kernel /// /// # Arguments From 1f1d8d0aca8920f40f41982847da1a8c50597ef0 Mon Sep 17 00:00:00 2001 From: viiccwen Date: Wed, 8 Apr 2026 10:48:41 +0000 Subject: [PATCH 2/6] feat: add angle encoding from GPU pointer (f32) --- qdp/qdp-core/src/gpu/encodings/angle.rs | 59 +++++++++++++++++++++++++ qdp/qdp-core/src/lib.rs | 57 ++++++++++++++++++++++++ qdp/qdp-kernels/src/lib.rs | 25 +++++++++++ 3 files changed, 141 insertions(+) diff --git a/qdp/qdp-core/src/gpu/encodings/angle.rs b/qdp/qdp-core/src/gpu/encodings/angle.rs index 24ed50a6dc..34de8e31f9 100644 --- a/qdp/qdp-core/src/gpu/encodings/angle.rs +++ b/qdp/qdp-core/src/gpu/encodings/angle.rs @@ -401,6 +401,65 @@ impl QuantumEncoder for AngleEncoder { } impl AngleEncoder { + #[cfg(target_os = "linux")] + pub unsafe fn encode_from_gpu_ptr_f32_with_stream( + device: &Arc, + input_d: *const f32, + input_len: usize, + num_qubits: usize, + stream: *mut c_void, + ) -> Result { + if input_len == 0 { + return Err(MahoutError::InvalidInput( + "Input data cannot be empty".into(), + )); + } + if input_len != num_qubits { + return Err(MahoutError::InvalidInput(format!( + "Angle encoding expects {} values (one per qubit), got {}", + num_qubits, input_len + ))); + } + + let state_len = 1 << num_qubits; + let state_vector = { + crate::profile_scope!("GPU::Alloc"); + GpuStateVector::new(device, num_qubits, Precision::Float32)? + }; + let state_ptr = state_vector.ptr_f32().ok_or_else(|| { + MahoutError::InvalidInput( + "State vector precision mismatch (expected float32 buffer)".to_string(), + ) + })?; + + { + crate::profile_scope!("GPU::KernelLaunch"); + let ret = unsafe { + qdp_kernels::launch_angle_encode_f32( + input_d, + state_ptr as *mut c_void, + state_len, + num_qubits as u32, + stream, + ) + }; + if ret != 0 { + return Err(MahoutError::KernelLaunch(format!( + "Angle encoding kernel (f32) failed with CUDA error code: {} ({})", + ret, + cuda_error_to_string(ret) + ))); + } + } + + { + crate::profile_scope!("GPU::Synchronize"); + crate::gpu::cuda_sync::sync_cuda_stream(stream, "CUDA stream synchronize failed")?; + } + + Ok(state_vector) + } + #[cfg(target_os = "linux")] fn encode_batch_async_pipeline( device: &Arc, diff --git a/qdp/qdp-core/src/lib.rs b/qdp/qdp-core/src/lib.rs index 0153f8719f..3db9accdc1 100644 --- a/qdp/qdp-core/src/lib.rs +++ b/qdp/qdp-core/src/lib.rs @@ -639,6 +639,63 @@ impl QdpEngine { Ok(state_vector.to_dlpack()) } + /// Encode angle from existing GPU pointer (float32 input only). + /// + /// Zero-copy encoding from CUDA float32 tensors. Uses the default CUDA stream. + /// For stream interop use `encode_angle_from_gpu_ptr_f32_with_stream`. + /// + /// # Safety + /// The input pointer must: + /// - Point to valid GPU memory on the same device as the engine + /// - Contain at least `input_len` f32 elements + /// - Remain valid for the duration of this call + #[cfg(target_os = "linux")] + pub unsafe fn encode_angle_from_gpu_ptr_f32( + &self, + input_d: *const f32, + input_len: usize, + num_qubits: usize, + ) -> Result<*mut DLManagedTensor> { + unsafe { + self.encode_angle_from_gpu_ptr_f32_with_stream( + input_d, + input_len, + num_qubits, + std::ptr::null_mut(), + ) + } + } + + /// Encode angle from existing GPU pointer (float32) on a specified CUDA stream. + /// + /// # Safety + /// In addition to the `encode_angle_from_gpu_ptr_f32` requirements, the stream pointer + /// must remain valid for the duration of this call. + #[cfg(target_os = "linux")] + pub unsafe fn encode_angle_from_gpu_ptr_f32_with_stream( + &self, + input_d: *const f32, + input_len: usize, + num_qubits: usize, + stream: *mut c_void, + ) -> Result<*mut DLManagedTensor> { + crate::profile_scope!("Mahout::EncodeAngleFromGpuPtrF32"); + + validate_cuda_input_ptr(&self.device, input_d as *const c_void)?; + + let state_vector = unsafe { + gpu::AngleEncoder::encode_from_gpu_ptr_f32_with_stream( + &self.device, + input_d, + input_len, + num_qubits, + stream, + ) + }?; + let state_vector = state_vector.to_precision(&self.device, self.precision)?; + Ok(state_vector.to_dlpack()) + } + /// Encode a batch from an existing GPU pointer (float32 input, amplitude encoding only). /// /// Zero-copy batch encoding from PyTorch CUDA float32 tensors. Uses the default CUDA stream. diff --git a/qdp/qdp-kernels/src/lib.rs b/qdp/qdp-kernels/src/lib.rs index e5ad2b69fd..4649092863 100644 --- a/qdp/qdp-kernels/src/lib.rs +++ b/qdp/qdp-kernels/src/lib.rs @@ -227,6 +227,19 @@ unsafe extern "C" { stream: *mut c_void, ) -> i32; + /// Launch angle encoding kernel for float32 inputs. + /// Returns CUDA error code (0 = success) + /// + /// # Safety + /// Requires valid GPU pointers, must sync before freeing + pub fn launch_angle_encode_f32( + angles_d: *const f32, + state_d: *mut c_void, + state_len: usize, + num_qubits: u32, + stream: *mut c_void, + ) -> i32; + /// Launch batch angle encoding kernel /// Returns CUDA error code (0 = success) /// @@ -458,6 +471,18 @@ pub extern "C" fn launch_angle_encode( 999 } +#[cfg(any(not(target_os = "linux"), qdp_no_cuda))] +#[unsafe(no_mangle)] +pub extern "C" fn launch_angle_encode_f32( + _angles_d: *const f32, + _state_d: *mut c_void, + _state_len: usize, + _num_qubits: u32, + _stream: *mut c_void, +) -> i32 { + 999 +} + #[cfg(any(not(target_os = "linux"), qdp_no_cuda))] #[unsafe(no_mangle)] pub extern "C" fn launch_angle_encode_batch( From e8d2596e2ac361da7382fa5dffa6ec83934b6c91 Mon Sep 17 00:00:00 2001 From: viiccwen Date: Wed, 8 Apr 2026 10:49:02 +0000 Subject: [PATCH 3/6] test: add unit tests for angle encoding --- qdp/qdp-core/tests/gpu_ptr_encoding.rs | 148 ++++++++++++++++++++++++ qdp/qdp-kernels/tests/angle_encode.rs | 149 +++++++++++++++++++++++++ 2 files changed, 297 insertions(+) create mode 100644 qdp/qdp-kernels/tests/angle_encode.rs diff --git a/qdp/qdp-core/tests/gpu_ptr_encoding.rs b/qdp/qdp-core/tests/gpu_ptr_encoding.rs index 470402727d..ecb1ec6462 100644 --- a/qdp/qdp-core/tests/gpu_ptr_encoding.rs +++ b/qdp/qdp-core/tests/gpu_ptr_encoding.rs @@ -892,6 +892,154 @@ fn test_encode_from_gpu_ptr_f32_input_exceeds_state_len() { } } +#[test] +fn test_encode_angle_from_gpu_ptr_f32_success() { + let engine = match engine_f32() { + Some(e) => e, + None => { + println!("SKIP: No GPU"); + return; + } + }; + let (_device, input_d) = match common::copy_f32_to_device(&[0.0, std::f32::consts::FRAC_PI_2]) { + Some(t) => t, + None => { + println!("SKIP: No CUDA device"); + return; + } + }; + let ptr = *input_d.device_ptr() as *const f32; + let dlpack_ptr = unsafe { + engine + .encode_angle_from_gpu_ptr_f32(ptr, input_d.len(), 2) + .expect("encode_angle_from_gpu_ptr_f32") + }; + unsafe { common::assert_dlpack_shape_2d_and_delete(dlpack_ptr, 1, 4) }; +} + +#[test] +fn test_encode_angle_from_gpu_ptr_f32_with_stream_success() { + let engine = match engine_f32() { + Some(e) => e, + None => { + println!("SKIP: No GPU"); + return; + } + }; + let (device, input_d) = match common::copy_f32_to_device(&[0.0, std::f32::consts::FRAC_PI_2]) { + Some(t) => t, + None => { + println!("SKIP: No CUDA device"); + return; + } + }; + let stream = device.fork_default_stream().expect("fork_default_stream"); + let dlpack_ptr = unsafe { + engine + .encode_angle_from_gpu_ptr_f32_with_stream( + *input_d.device_ptr() as *const f32, + input_d.len(), + 2, + stream.stream as *mut c_void, + ) + .expect("encode_angle_from_gpu_ptr_f32_with_stream") + }; + unsafe { common::assert_dlpack_shape_2d_and_delete(dlpack_ptr, 1, 4) }; +} + +#[test] +fn test_encode_angle_from_gpu_ptr_f32_success_f64_engine() { + let Some(engine) = common::qdp_engine_with_precision(Precision::Float64) else { + println!("SKIP: No GPU"); + return; + }; + let (_device, input_d) = match common::copy_f32_to_device(&[0.0, std::f32::consts::FRAC_PI_2]) { + Some(t) => t, + None => { + println!("SKIP: No CUDA device"); + return; + } + }; + let ptr = *input_d.device_ptr() as *const f32; + let dlpack_ptr = unsafe { + engine + .encode_angle_from_gpu_ptr_f32(ptr, input_d.len(), 2) + .expect("encode_angle_from_gpu_ptr_f32 (Float64 engine)") + }; + unsafe { common::assert_dlpack_shape_2d_and_delete(dlpack_ptr, 1, 4) }; +} + +#[test] +fn test_encode_angle_from_gpu_ptr_f32_empty_input() { + let engine = match engine_f32() { + Some(e) => e, + None => { + println!("SKIP: No GPU"); + return; + } + }; + let (_device, input_d) = match common::copy_f32_to_device(&[0.0]) { + Some(t) => t, + None => { + println!("SKIP: No CUDA device"); + return; + } + }; + let ptr = *input_d.device_ptr() as *const f32; + let result = unsafe { engine.encode_angle_from_gpu_ptr_f32(ptr, 0, 1) }; + assert!(result.is_err()); + match &result.unwrap_err() { + MahoutError::InvalidInput(msg) => { + assert!(msg.contains("empty") || msg.contains("null")); + } + e => panic!("Expected InvalidInput, got {:?}", e), + } +} + +#[test] +fn test_encode_angle_from_gpu_ptr_f32_null_pointer() { + let engine = match engine_f32() { + Some(e) => e, + None => { + println!("SKIP: No GPU"); + return; + } + }; + let result = unsafe { engine.encode_angle_from_gpu_ptr_f32(std::ptr::null(), 2, 2) }; + assert!(result.is_err()); + match &result.unwrap_err() { + MahoutError::InvalidInput(msg) => assert!(msg.contains("null")), + e => panic!("Expected InvalidInput, got {:?}", e), + } +} + +#[test] +fn test_encode_angle_from_gpu_ptr_f32_qubit_mismatch() { + let engine = match engine_f32() { + Some(e) => e, + None => { + println!("SKIP: No GPU"); + return; + } + }; + let (_device, input_d) = match common::copy_f32_to_device(&[0.0, std::f32::consts::FRAC_PI_2]) { + Some(t) => t, + None => { + println!("SKIP: No CUDA device"); + return; + } + }; + let ptr = *input_d.device_ptr() as *const f32; + let result = unsafe { engine.encode_angle_from_gpu_ptr_f32(ptr, input_d.len(), 1) }; + assert!(result.is_err()); + match &result.unwrap_err() { + MahoutError::InvalidInput(msg) => { + assert!(msg.contains("expects 1 values") || msg.contains("got 2")); + } + e => panic!("Expected InvalidInput, got {:?}", e), + } +} + #[test] fn test_encode_batch_from_gpu_ptr_f32_success() { let engine = match engine_f32() { diff --git a/qdp/qdp-kernels/tests/angle_encode.rs b/qdp/qdp-kernels/tests/angle_encode.rs new file mode 100644 index 0000000000..f42b3cd50a --- /dev/null +++ b/qdp/qdp-kernels/tests/angle_encode.rs @@ -0,0 +1,149 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Tests for angle encoding CUDA kernels. + +#![allow(unused_unsafe)] + +#[cfg(target_os = "linux")] +use cudarc::driver::{CudaDevice, DevicePtr, DevicePtrMut}; +#[cfg(target_os = "linux")] +use qdp_kernels::{CuComplex, launch_angle_encode_f32}; + +const EPSILON_F32: f32 = 1e-5; + +#[test] +#[cfg(target_os = "linux")] +fn test_angle_encode_basic_f32() { + let device = match CudaDevice::new(0) { + Ok(d) => d, + Err(_) => { + println!("SKIP: No CUDA device available"); + return; + } + }; + + let input = vec![std::f32::consts::FRAC_PI_2, 0.0_f32]; + let state_len = 4usize; + + let input_d = device.htod_copy(input).unwrap(); + let mut state_d = device.alloc_zeros::(state_len).unwrap(); + + let result = unsafe { + launch_angle_encode_f32( + *input_d.device_ptr() as *const f32, + *state_d.device_ptr_mut() as *mut std::ffi::c_void, + state_len, + 2, + std::ptr::null_mut(), + ) + }; + assert_eq!(result, 0, "Kernel launch should succeed"); + + let state_h = device.dtoh_sync_copy(&state_d).unwrap(); + let expected = [0.0_f32, 1.0_f32, 0.0_f32, 0.0_f32]; + + for (idx, (actual, expected)) in state_h.iter().zip(expected.iter()).enumerate() { + assert!( + (actual.x - expected).abs() < EPSILON_F32, + "state[{idx}].x expected {expected}, got {}", + actual.x + ); + assert!( + actual.y.abs() < EPSILON_F32, + "state[{idx}].y expected 0, got {}", + actual.y + ); + } +} + +#[test] +#[cfg(target_os = "linux")] +fn test_angle_encode_matches_expected_product_state_f32() { + let device = match CudaDevice::new(0) { + Ok(d) => d, + Err(_) => { + println!("SKIP: No CUDA device available"); + return; + } + }; + + let angles = vec![0.3_f32, 0.7_f32]; + let state_len = 4usize; + + let input_d = device.htod_copy(angles.clone()).unwrap(); + let mut state_d = device.alloc_zeros::(state_len).unwrap(); + + let result = unsafe { + launch_angle_encode_f32( + *input_d.device_ptr() as *const f32, + *state_d.device_ptr_mut() as *mut std::ffi::c_void, + state_len, + 2, + std::ptr::null_mut(), + ) + }; + assert_eq!(result, 0, "Kernel launch should succeed"); + + let state_h = device.dtoh_sync_copy(&state_d).unwrap(); + let expected = [ + angles[0].cos() * angles[1].cos(), + angles[0].sin() * angles[1].cos(), + angles[0].cos() * angles[1].sin(), + angles[0].sin() * angles[1].sin(), + ]; + + for (idx, (actual, expected)) in state_h.iter().zip(expected.iter()).enumerate() { + assert!( + (actual.x - expected).abs() < EPSILON_F32, + "state[{idx}].x expected {expected}, got {}", + actual.x + ); + assert!( + actual.y.abs() < EPSILON_F32, + "state[{idx}].y expected 0, got {}", + actual.y + ); + } +} + +#[test] +#[cfg(target_os = "linux")] +fn test_angle_encode_f32_rejects_zero_qubits() { + let device = match CudaDevice::new(0) { + Ok(d) => d, + Err(_) => { + println!("SKIP: No CUDA device available"); + return; + } + }; + + let input = vec![0.0_f32]; + let input_d = device.htod_copy(input).unwrap(); + let mut state_d = device.alloc_zeros::(1).unwrap(); + + let result = unsafe { + launch_angle_encode_f32( + *input_d.device_ptr() as *const f32, + *state_d.device_ptr_mut() as *mut std::ffi::c_void, + 1, + 0, + std::ptr::null_mut(), + ) + }; + + assert_ne!(result, 0, "Zero-qubit launch should fail"); +} From bddfbff6aeb20d8565cd06fc0804e40421e95760 Mon Sep 17 00:00:00 2001 From: viiccwen Date: Wed, 8 Apr 2026 14:23:55 +0000 Subject: [PATCH 4/6] fix: pre-commit error --- qdp/qdp-core/src/gpu/encodings/angle.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/qdp/qdp-core/src/gpu/encodings/angle.rs b/qdp/qdp-core/src/gpu/encodings/angle.rs index 34de8e31f9..9bc81914b6 100644 --- a/qdp/qdp-core/src/gpu/encodings/angle.rs +++ b/qdp/qdp-core/src/gpu/encodings/angle.rs @@ -402,6 +402,15 @@ impl QuantumEncoder for AngleEncoder { impl AngleEncoder { #[cfg(target_os = "linux")] + /// Encodes `input_len` angle values from a device-resident `f32` buffer into a GPU state + /// vector, using the provided CUDA stream for all launched work. + /// + /// # Safety + /// The caller must ensure that `input_d` points to at least `input_len` contiguous `f32` + /// values in GPU-accessible memory and remains valid for the duration of this call. + /// The caller must also ensure that `stream` is either null or a valid CUDA stream handle + /// associated with `device`, and that no concurrent use of these raw pointers violates Rust's + /// aliasing or lifetime rules. pub unsafe fn encode_from_gpu_ptr_f32_with_stream( device: &Arc, input_d: *const f32, From 2c8ac872c3e2623c4ccb71adc163c4a18fc32479 Mon Sep 17 00:00:00 2001 From: viiccwen Date: Sun, 12 Apr 2026 05:45:07 +0000 Subject: [PATCH 5/6] fix: add validation for qubit count in AngleEncoder methods --- qdp/qdp-core/src/gpu/encodings/angle.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/qdp/qdp-core/src/gpu/encodings/angle.rs b/qdp/qdp-core/src/gpu/encodings/angle.rs index 9bc81914b6..974a7b6c7a 100644 --- a/qdp/qdp-core/src/gpu/encodings/angle.rs +++ b/qdp/qdp-core/src/gpu/encodings/angle.rs @@ -232,6 +232,7 @@ impl QuantumEncoder for AngleEncoder { num_qubits, input_len ))); } + validate_qubit_count(num_qubits)?; let state_len = 1 << num_qubits; let angles_d = input_d as *const f64; let state_vector = { @@ -290,6 +291,7 @@ impl QuantumEncoder for AngleEncoder { num_qubits, sample_size ))); } + validate_qubit_count(num_qubits)?; let state_len = 1 << num_qubits; let input_batch_d = input_batch_d as *const f64; let angle_validation_buffer = { @@ -401,7 +403,6 @@ impl QuantumEncoder for AngleEncoder { } impl AngleEncoder { - #[cfg(target_os = "linux")] /// Encodes `input_len` angle values from a device-resident `f32` buffer into a GPU state /// vector, using the provided CUDA stream for all launched work. /// @@ -411,6 +412,7 @@ impl AngleEncoder { /// The caller must also ensure that `stream` is either null or a valid CUDA stream handle /// associated with `device`, and that no concurrent use of these raw pointers violates Rust's /// aliasing or lifetime rules. + #[cfg(target_os = "linux")] pub unsafe fn encode_from_gpu_ptr_f32_with_stream( device: &Arc, input_d: *const f32, @@ -430,6 +432,7 @@ impl AngleEncoder { ))); } + validate_qubit_count(num_qubits)?; let state_len = 1 << num_qubits; let state_vector = { crate::profile_scope!("GPU::Alloc"); From 7cd9dbe536e428c1139b7062e2128aec5fe838e5 Mon Sep 17 00:00:00 2001 From: viiccwen Date: Sun, 12 Apr 2026 05:45:24 +0000 Subject: [PATCH 6/6] test: add additional unit tests for angle encoding error handling --- qdp/qdp-core/tests/gpu_ptr_encoding.rs | 62 ++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/qdp/qdp-core/tests/gpu_ptr_encoding.rs b/qdp/qdp-core/tests/gpu_ptr_encoding.rs index ecb1ec6462..80a054a693 100644 --- a/qdp/qdp-core/tests/gpu_ptr_encoding.rs +++ b/qdp/qdp-core/tests/gpu_ptr_encoding.rs @@ -1040,6 +1040,68 @@ fn test_encode_angle_from_gpu_ptr_f32_qubit_mismatch() { } } +#[test] +fn test_encode_angle_from_gpu_ptr_f32_too_many_qubits() { + let engine = match engine_f32() { + Some(e) => e, + None => { + println!("SKIP: No GPU"); + return; + } + }; + let input = vec![0.0_f32; 31]; + let (_device, input_d) = match common::copy_f32_to_device(&input) { + Some(t) => t, + None => { + println!("SKIP: No CUDA device"); + return; + } + }; + let ptr = *input_d.device_ptr() as *const f32; + let result = unsafe { engine.encode_angle_from_gpu_ptr_f32(ptr, input_d.len(), 31) }; + assert!(result.is_err()); + match &result.unwrap_err() { + MahoutError::InvalidInput(msg) => { + assert!(msg.contains("exceeds practical limit"), "got: {msg}"); + } + e => panic!("Expected InvalidInput, got {:?}", e), + } +} + +#[test] +fn test_encode_angle_from_gpu_ptr_f32_with_stream_too_many_qubits() { + let engine = match engine_f32() { + Some(e) => e, + None => { + println!("SKIP: No GPU"); + return; + } + }; + let (device, input_d) = match common::copy_f32_to_device(&[0.0_f32; 31]) { + Some(t) => t, + None => { + println!("SKIP: No CUDA device"); + return; + } + }; + let stream = device.fork_default_stream().expect("fork_default_stream"); + let result = unsafe { + engine.encode_angle_from_gpu_ptr_f32_with_stream( + *input_d.device_ptr() as *const f32, + input_d.len(), + 31, + stream.stream as *mut c_void, + ) + }; + assert!(result.is_err()); + match &result.unwrap_err() { + MahoutError::InvalidInput(msg) => { + assert!(msg.contains("exceeds practical limit"), "got: {msg}"); + } + e => panic!("Expected InvalidInput, got {:?}", e), + } +} + #[test] fn test_encode_batch_from_gpu_ptr_f32_success() { let engine = match engine_f32() {