Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions qdp/qdp-core/src/gpu/encodings/angle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ impl QuantumEncoder for AngleEncoder {
num_qubits, input_len
)));
}
validate_qubit_count(num_qubits)?;
let state_len = 1 << num_qubits;
let angles_d = input_d as *const f64;
let state_vector = {
Expand Down Expand Up @@ -290,6 +291,7 @@ impl QuantumEncoder for AngleEncoder {
num_qubits, sample_size
)));
}
validate_qubit_count(num_qubits)?;
let state_len = 1 << num_qubits;
let input_batch_d = input_batch_d as *const f64;
let angle_validation_buffer = {
Expand Down Expand Up @@ -401,6 +403,75 @@ impl QuantumEncoder for AngleEncoder {
}

impl AngleEncoder {
/// Encodes `input_len` angle values from a device-resident `f32` buffer into a GPU state
/// vector, using the provided CUDA stream for all launched work.
///
/// # Safety
/// The caller must ensure that `input_d` points to at least `input_len` contiguous `f32`
/// values in GPU-accessible memory and remains valid for the duration of this call.
/// The caller must also ensure that `stream` is either null or a valid CUDA stream handle
/// associated with `device`, and that no concurrent use of these raw pointers violates Rust's
/// aliasing or lifetime rules.
#[cfg(target_os = "linux")]
pub unsafe fn encode_from_gpu_ptr_f32_with_stream(
device: &Arc<CudaDevice>,
input_d: *const f32,
input_len: usize,
num_qubits: usize,
stream: *mut c_void,
) -> Result<GpuStateVector> {
if input_len == 0 {
return Err(MahoutError::InvalidInput(
"Input data cannot be empty".into(),
));
}
if input_len != num_qubits {
return Err(MahoutError::InvalidInput(format!(
"Angle encoding expects {} values (one per qubit), got {}",
num_qubits, input_len
)));
}

validate_qubit_count(num_qubits)?;
let state_len = 1 << num_qubits;
let state_vector = {
crate::profile_scope!("GPU::Alloc");
GpuStateVector::new(device, num_qubits, Precision::Float32)?
};
let state_ptr = state_vector.ptr_f32().ok_or_else(|| {
MahoutError::InvalidInput(
"State vector precision mismatch (expected float32 buffer)".to_string(),
)
})?;

{
crate::profile_scope!("GPU::KernelLaunch");
let ret = unsafe {
qdp_kernels::launch_angle_encode_f32(
input_d,
state_ptr as *mut c_void,
state_len,
num_qubits as u32,
stream,
)
};
if ret != 0 {
return Err(MahoutError::KernelLaunch(format!(
"Angle encoding kernel (f32) failed with CUDA error code: {} ({})",
ret,
cuda_error_to_string(ret)
)));
}
}

{
crate::profile_scope!("GPU::Synchronize");
crate::gpu::cuda_sync::sync_cuda_stream(stream, "CUDA stream synchronize failed")?;
}

Ok(state_vector)
}

#[cfg(target_os = "linux")]
fn encode_batch_async_pipeline(
device: &Arc<CudaDevice>,
Expand Down
57 changes: 57 additions & 0 deletions qdp/qdp-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,63 @@ impl QdpEngine {
Ok(state_vector.to_dlpack())
}

/// Encode angle from existing GPU pointer (float32 input only).
///
/// Zero-copy encoding from CUDA float32 tensors. Uses the default CUDA stream.
/// For stream interop use `encode_angle_from_gpu_ptr_f32_with_stream`.
///
/// # Safety
/// The input pointer must:
/// - Point to valid GPU memory on the same device as the engine
/// - Contain at least `input_len` f32 elements
/// - Remain valid for the duration of this call
#[cfg(target_os = "linux")]
pub unsafe fn encode_angle_from_gpu_ptr_f32(
&self,
input_d: *const f32,
input_len: usize,
num_qubits: usize,
) -> Result<*mut DLManagedTensor> {
unsafe {
self.encode_angle_from_gpu_ptr_f32_with_stream(
input_d,
input_len,
num_qubits,
std::ptr::null_mut(),
)
}
}

/// Encode angle from existing GPU pointer (float32) on a specified CUDA stream.
///
/// # Safety
/// In addition to the `encode_angle_from_gpu_ptr_f32` requirements, the stream pointer
/// must remain valid for the duration of this call.
#[cfg(target_os = "linux")]
pub unsafe fn encode_angle_from_gpu_ptr_f32_with_stream(
&self,
input_d: *const f32,
input_len: usize,
num_qubits: usize,
stream: *mut c_void,
) -> Result<*mut DLManagedTensor> {
crate::profile_scope!("Mahout::EncodeAngleFromGpuPtrF32");

validate_cuda_input_ptr(&self.device, input_d as *const c_void)?;

let state_vector = unsafe {
gpu::AngleEncoder::encode_from_gpu_ptr_f32_with_stream(
&self.device,
input_d,
input_len,
num_qubits,
stream,
)
}?;
let state_vector = state_vector.to_precision(&self.device, self.precision)?;
Ok(state_vector.to_dlpack())
}

/// Encode a batch from an existing GPU pointer (float32 input, amplitude encoding only).
///
/// Zero-copy batch encoding from PyTorch CUDA float32 tensors. Uses the default CUDA stream.
Expand Down
210 changes: 210 additions & 0 deletions qdp/qdp-core/tests/gpu_ptr_encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -892,6 +892,216 @@ fn test_encode_from_gpu_ptr_f32_input_exceeds_state_len() {
}
}

#[test]
fn test_encode_angle_from_gpu_ptr_f32_success() {
let engine = match engine_f32() {
Some(e) => e,
None => {
println!("SKIP: No GPU");
return;
}
};
let (_device, input_d) = match common::copy_f32_to_device(&[0.0, std::f32::consts::FRAC_PI_2]) {
Some(t) => t,
None => {
println!("SKIP: No CUDA device");
return;
}
};
let ptr = *input_d.device_ptr() as *const f32;
let dlpack_ptr = unsafe {
engine
.encode_angle_from_gpu_ptr_f32(ptr, input_d.len(), 2)
.expect("encode_angle_from_gpu_ptr_f32")
};
unsafe { common::assert_dlpack_shape_2d_and_delete(dlpack_ptr, 1, 4) };
}

#[test]
fn test_encode_angle_from_gpu_ptr_f32_with_stream_success() {
let engine = match engine_f32() {
Some(e) => e,
None => {
println!("SKIP: No GPU");
return;
}
};
let (device, input_d) = match common::copy_f32_to_device(&[0.0, std::f32::consts::FRAC_PI_2]) {
Some(t) => t,
None => {
println!("SKIP: No CUDA device");
return;
}
};
let stream = device.fork_default_stream().expect("fork_default_stream");
let dlpack_ptr = unsafe {
engine
.encode_angle_from_gpu_ptr_f32_with_stream(
*input_d.device_ptr() as *const f32,
input_d.len(),
2,
stream.stream as *mut c_void,
)
.expect("encode_angle_from_gpu_ptr_f32_with_stream")
};
unsafe { common::assert_dlpack_shape_2d_and_delete(dlpack_ptr, 1, 4) };
}

#[test]
fn test_encode_angle_from_gpu_ptr_f32_success_f64_engine() {
let Some(engine) = common::qdp_engine_with_precision(Precision::Float64) else {
println!("SKIP: No GPU");
return;
};
let (_device, input_d) = match common::copy_f32_to_device(&[0.0, std::f32::consts::FRAC_PI_2]) {
Some(t) => t,
None => {
println!("SKIP: No CUDA device");
return;
}
};
let ptr = *input_d.device_ptr() as *const f32;
let dlpack_ptr = unsafe {
engine
.encode_angle_from_gpu_ptr_f32(ptr, input_d.len(), 2)
.expect("encode_angle_from_gpu_ptr_f32 (Float64 engine)")
};
unsafe { common::assert_dlpack_shape_2d_and_delete(dlpack_ptr, 1, 4) };
}

#[test]
fn test_encode_angle_from_gpu_ptr_f32_empty_input() {
let engine = match engine_f32() {
Some(e) => e,
None => {
println!("SKIP: No GPU");
return;
}
};
let (_device, input_d) = match common::copy_f32_to_device(&[0.0]) {
Some(t) => t,
None => {
println!("SKIP: No CUDA device");
return;
}
};
let ptr = *input_d.device_ptr() as *const f32;
let result = unsafe { engine.encode_angle_from_gpu_ptr_f32(ptr, 0, 1) };
assert!(result.is_err());
match &result.unwrap_err() {
MahoutError::InvalidInput(msg) => {
assert!(msg.contains("empty") || msg.contains("null"));
}
e => panic!("Expected InvalidInput, got {:?}", e),
}
}

#[test]
fn test_encode_angle_from_gpu_ptr_f32_null_pointer() {
let engine = match engine_f32() {
Some(e) => e,
None => {
println!("SKIP: No GPU");
return;
}
};
let result = unsafe { engine.encode_angle_from_gpu_ptr_f32(std::ptr::null(), 2, 2) };
assert!(result.is_err());
match &result.unwrap_err() {
MahoutError::InvalidInput(msg) => assert!(msg.contains("null")),
e => panic!("Expected InvalidInput, got {:?}", e),
}
}

#[test]
fn test_encode_angle_from_gpu_ptr_f32_qubit_mismatch() {
let engine = match engine_f32() {
Some(e) => e,
None => {
println!("SKIP: No GPU");
return;
}
};
let (_device, input_d) = match common::copy_f32_to_device(&[0.0, std::f32::consts::FRAC_PI_2]) {
Some(t) => t,
None => {
println!("SKIP: No CUDA device");
return;
}
};
let ptr = *input_d.device_ptr() as *const f32;
let result = unsafe { engine.encode_angle_from_gpu_ptr_f32(ptr, input_d.len(), 1) };
assert!(result.is_err());
match &result.unwrap_err() {
MahoutError::InvalidInput(msg) => {
assert!(msg.contains("expects 1 values") || msg.contains("got 2"));
}
e => panic!("Expected InvalidInput, got {:?}", e),
}
}

#[test]
fn test_encode_angle_from_gpu_ptr_f32_too_many_qubits() {
let engine = match engine_f32() {
Some(e) => e,
None => {
println!("SKIP: No GPU");
return;
}
};
let input = vec![0.0_f32; 31];
let (_device, input_d) = match common::copy_f32_to_device(&input) {
Some(t) => t,
None => {
println!("SKIP: No CUDA device");
return;
}
};
let ptr = *input_d.device_ptr() as *const f32;
let result = unsafe { engine.encode_angle_from_gpu_ptr_f32(ptr, input_d.len(), 31) };
assert!(result.is_err());
match &result.unwrap_err() {
MahoutError::InvalidInput(msg) => {
assert!(msg.contains("exceeds practical limit"), "got: {msg}");
}
e => panic!("Expected InvalidInput, got {:?}", e),
}
}

#[test]
fn test_encode_angle_from_gpu_ptr_f32_with_stream_too_many_qubits() {
let engine = match engine_f32() {
Some(e) => e,
None => {
println!("SKIP: No GPU");
return;
}
};
let (device, input_d) = match common::copy_f32_to_device(&[0.0_f32; 31]) {
Some(t) => t,
None => {
println!("SKIP: No CUDA device");
return;
}
};
let stream = device.fork_default_stream().expect("fork_default_stream");
let result = unsafe {
engine.encode_angle_from_gpu_ptr_f32_with_stream(
*input_d.device_ptr() as *const f32,
input_d.len(),
31,
stream.stream as *mut c_void,
)
};
assert!(result.is_err());
match &result.unwrap_err() {
MahoutError::InvalidInput(msg) => {
assert!(msg.contains("exceeds practical limit"), "got: {msg}");
}
e => panic!("Expected InvalidInput, got {:?}", e),
}
}

#[test]
fn test_encode_batch_from_gpu_ptr_f32_success() {
let engine = match engine_f32() {
Expand Down
Loading
Loading