Skip to content

Commit 0c84ab9

Browse files
committed
[QDP] feat: add credit card fraud benchmark + amplitude encoding optimizations
1 parent 2dc9a02 commit 0c84ab9

10 files changed

Lines changed: 1804 additions & 40 deletions

File tree

qdp/qdp-core/src/gpu/encodings/amplitude.rs

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -245,21 +245,9 @@ impl QuantumEncoder for AmplitudeEncoder {
245245
buffer
246246
};
247247

248-
// Validate norms on host to catch zero or NaN samples early
249-
{
250-
crate::profile_scope!("GPU::NormValidation");
251-
let host_inv_norms = device
252-
.dtoh_sync_copy(&inv_norms_gpu)
253-
.map_err(|e| MahoutError::Cuda(format!("Failed to copy norms to host: {:?}", e)))?;
254-
255-
if host_inv_norms.iter().any(|v| !v.is_finite() || *v == 0.0) {
256-
return Err(MahoutError::InvalidInput(
257-
"One or more samples have zero or invalid norm".to_string(),
258-
));
259-
}
260-
}
261-
262-
// Launch batch kernel
248+
// Launch batch encode kernel — takes GPU norm buffer directly, no D2H needed yet.
249+
// We defer the norm validation D2H copy until AFTER the encode kernel + sync so that
250+
// the norm kernel → encode kernel sequence runs without an intermediate GPU-CPU roundtrip.
263251
{
264252
crate::profile_scope!("GPU::BatchKernelLaunch");
265253
let state_ptr = batch_state_vector.ptr_f64().ok_or_else(|| {
@@ -288,14 +276,30 @@ impl QuantumEncoder for AmplitudeEncoder {
288276
}
289277
}
290278

291-
// Synchronize
279+
// Synchronize — all GPU work (norm + encode) complete after this point.
292280
{
293281
crate::profile_scope!("GPU::Synchronize");
294282
device
295283
.synchronize()
296284
.map_err(|e| MahoutError::Cuda(format!("Sync failed: {:?}", e)))?;
297285
}
298286

287+
// Validate norms on host AFTER sync: D2H copy no longer blocks the encode kernel.
288+
// This preserves error detection for zero/NaN samples without adding a mid-pipeline
289+
// GPU-CPU roundtrip between the norm and encode kernels.
290+
{
291+
crate::profile_scope!("GPU::NormValidation");
292+
let host_inv_norms = device
293+
.dtoh_sync_copy(&inv_norms_gpu)
294+
.map_err(|e| MahoutError::Cuda(format!("Failed to copy norms to host: {:?}", e)))?;
295+
296+
if host_inv_norms.iter().any(|v| !v.is_finite() || *v == 0.0) {
297+
return Err(MahoutError::InvalidInput(
298+
"One or more samples have zero or invalid norm".to_string(),
299+
));
300+
}
301+
}
302+
299303
Ok(batch_state_vector)
300304
}
301305

@@ -412,17 +416,8 @@ impl QuantumEncoder for AmplitudeEncoder {
412416
}
413417
buffer
414418
};
415-
{
416-
crate::profile_scope!("GPU::NormValidation");
417-
let host_inv_norms = device
418-
.dtoh_sync_copy(&inv_norms_gpu)
419-
.map_err(|e| MahoutError::Cuda(format!("Failed to copy norms to host: {:?}", e)))?;
420-
if host_inv_norms.iter().any(|v| !v.is_finite() || *v == 0.0) {
421-
return Err(MahoutError::InvalidInput(
422-
"One or more samples have zero or invalid norm".to_string(),
423-
));
424-
}
425-
}
419+
// Launch encode kernel before D2H norm validation: GPU norm buffer is passed directly,
420+
// so the encode kernel can run immediately after the norm kernel without a CPU roundtrip.
426421
{
427422
crate::profile_scope!("GPU::BatchKernelLaunch");
428423
use cudarc::driver::DevicePtr;
@@ -450,10 +445,22 @@ impl QuantumEncoder for AmplitudeEncoder {
450445
)));
451446
}
452447
}
448+
// Synchronize first; then validate norms on host (D2H after all GPU work is done).
453449
{
454450
crate::profile_scope!("GPU::Synchronize");
455451
sync_cuda_stream(stream, "CUDA stream synchronize failed")?;
456452
}
453+
{
454+
crate::profile_scope!("GPU::NormValidation");
455+
let host_inv_norms = device
456+
.dtoh_sync_copy(&inv_norms_gpu)
457+
.map_err(|e| MahoutError::Cuda(format!("Failed to copy norms to host: {:?}", e)))?;
458+
if host_inv_norms.iter().any(|v| !v.is_finite() || *v == 0.0) {
459+
return Err(MahoutError::InvalidInput(
460+
"One or more samples have zero or invalid norm".to_string(),
461+
));
462+
}
463+
}
457464
Ok(batch_state_vector)
458465
}
459466

qdp/qdp-core/src/pipeline_runner.rs

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,47 @@ impl PipelineIterator {
246246
})
247247
}
248248

249+
/// Create a pipeline iterator from an in-memory array (e.g. from Python numpy).
250+
/// Data is owned by the iterator; the full encode loop runs in Rust (take_batch + encode_batch).
251+
pub fn new_from_array(
252+
engine: QdpEngine,
253+
data: Vec<f64>,
254+
num_samples: usize,
255+
sample_size: usize,
256+
config: PipelineConfig,
257+
batch_limit: usize,
258+
) -> Result<Self> {
259+
let vector_len = vector_len(config.num_qubits, &config.encoding_method);
260+
if sample_size != vector_len {
261+
return Err(MahoutError::InvalidInput(format!(
262+
"Array sample_size {} does not match vector_len {} for num_qubits={}, encoding={}",
263+
sample_size, vector_len, config.num_qubits, config.encoding_method
264+
)));
265+
}
266+
if data.len() != num_samples * sample_size {
267+
return Err(MahoutError::InvalidInput(format!(
268+
"Array length {} is not num_samples ({}) * sample_size ({})",
269+
data.len(),
270+
num_samples,
271+
sample_size
272+
)));
273+
}
274+
let source = DataSource::InMemory {
275+
data,
276+
cursor: 0,
277+
num_samples,
278+
sample_size,
279+
batches_yielded: 0,
280+
batch_limit,
281+
};
282+
Ok(Self {
283+
engine,
284+
config,
285+
source,
286+
vector_len,
287+
})
288+
}
289+
249290
/// Create a pipeline iterator from a Parquet file using streaming read (Phase 2b).
250291
/// Only `.parquet` is supported; reduces memory for large files by reading in chunks.
251292
/// Validates sample_size == vector_len after the first chunk.
@@ -411,7 +452,61 @@ impl PipelineIterator {
411452
}
412453

413454
/// Returns the next batch as a DLPack pointer; `Ok(None)` when exhausted.
455+
/// For InMemory source, passes a slice reference to encode_batch (no per-batch copy).
414456
pub fn next_batch(&mut self) -> Result<Option<*mut DLManagedTensor>> {
457+
// InMemory: update cursor, then encode from &data[start..end] to avoid to_vec().
458+
let in_memory_range: Option<(usize, usize, usize, usize)> = match &mut self.source {
459+
DataSource::InMemory {
460+
data,
461+
cursor,
462+
sample_size,
463+
batches_yielded,
464+
batch_limit,
465+
..
466+
} => {
467+
if *batches_yielded >= *batch_limit {
468+
None
469+
} else {
470+
let remaining = (data.len() - *cursor) / *sample_size;
471+
if remaining == 0 {
472+
None
473+
} else {
474+
let batch_n = remaining.min(self.config.batch_size);
475+
let start = *cursor;
476+
let end = start + batch_n * *sample_size;
477+
*cursor = end;
478+
*batches_yielded += 1;
479+
Some((
480+
start,
481+
batch_n,
482+
*sample_size,
483+
self.config.num_qubits as usize,
484+
))
485+
}
486+
}
487+
}
488+
_ => None,
489+
};
490+
491+
if let Some((start, batch_n, sample_size, num_qubits)) = in_memory_range {
492+
let slice = match &self.source {
493+
DataSource::InMemory { data, .. } => {
494+
let len = batch_n * sample_size;
495+
&data[start..start + len]
496+
}
497+
_ => unreachable!(),
498+
};
499+
let ptr = self.engine.encode_batch(
500+
slice,
501+
batch_n,
502+
sample_size,
503+
num_qubits,
504+
&self.config.encoding_method,
505+
)?;
506+
return Ok(Some(ptr));
507+
}
508+
509+
// Synthetic / Streaming: take_batch_from_source (may copy) then encode.
415510
let Some((batch_data, batch_n, sample_size, num_qubits)) = self.take_batch_from_source()?
416511
else {
417512
return Ok(None);

qdp/qdp-python/benchmark/encoding_benchmarks/README.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,3 +75,25 @@ To see the full list of options and defaults, append `--help`:
7575
uv run python benchmark/encoding_benchmarks/pennylane_baseline/iris_amplitude.py --help
7676
uv run python benchmark/encoding_benchmarks/qdp_pipeline/iris_amplitude.py --help
7777
```
78+
79+
## Credit Card Fraud amplitude baseline (PennyLane)
80+
81+
Minimal, reproducible steps (run from `qdp/qdp-python`):
82+
83+
1. **Download dataset (once)** — Kaggle `creditcard.csv` mirror:
84+
85+
```bash
86+
mkdir -p benchmark/encoding_benchmarks/pennylane_baseline/data
87+
curl -L -o benchmark/encoding_benchmarks/pennylane_baseline/data/creditcard.csv \
88+
https://raw.githubusercontent.com/nsethi31/Kaggle-Data-Credit-Card-Fraud-Detection/master/creditcard.csv
89+
```
90+
91+
2. **Run the PennyLane baseline** — StandardScaler → PCA(16) → L2 norm → 4‑qubit amplitude VQC:
92+
93+
```bash
94+
uv run python benchmark/encoding_benchmarks/pennylane_baseline/creditcardfraud_amplitude.py \
95+
--data-file benchmark/encoding_benchmarks/pennylane_baseline/data/creditcard.csv \
96+
--max-samples 300000 --iters 200 --batch-size 512 --trials 1
97+
```
98+
99+
This prints compile time, train time / throughput, and task metrics (AUPRC, F1, precision, recall) on the test set.

0 commit comments

Comments
 (0)