diff --git a/.gitignore b/.gitignore index bdb326f..83f036b 100644 --- a/.gitignore +++ b/.gitignore @@ -46,6 +46,10 @@ docs/package-lock.json # the .bin into docs/notebooks/data/xcat/ (default path, symlink-friendly). docs/notebooks/data/ +# Pluto cell-output caches written by PlutoSpace / pluto-collab — large +# (tens of MB), regenerated on every run, never tracked. +*.pluto-cache.toml + # File generated by Pkg, the package manager, based on a corresponding Project.toml # It records a fixed state of all packages used by the project. As such, it should not be # committed for packages, but should be committed for applications that require a static diff --git a/docs/assets/dual_kvp_vmi_projection_grid.png b/docs/assets/dual_kvp_vmi_projection_grid.png index bdb8228..62cc65a 100644 Binary files a/docs/assets/dual_kvp_vmi_projection_grid.png and b/docs/assets/dual_kvp_vmi_projection_grid.png differ diff --git a/docs/assets/dual_kvp_vmi_projection_monoplus.png b/docs/assets/dual_kvp_vmi_projection_monoplus.png index d618b36..7cafdc6 100644 Binary files a/docs/assets/dual_kvp_vmi_projection_monoplus.png and b/docs/assets/dual_kvp_vmi_projection_monoplus.png differ diff --git a/docs/assets/dual_kvp_vmi_water_noise_vs_energy.png b/docs/assets/dual_kvp_vmi_water_noise_vs_energy.png index 69a9bb5..1b70eef 100644 Binary files a/docs/assets/dual_kvp_vmi_water_noise_vs_energy.png and b/docs/assets/dual_kvp_vmi_water_noise_vs_energy.png differ diff --git a/docs/assets/pcct_vmi_projection_grid.png b/docs/assets/pcct_vmi_projection_grid.png index 60a4fb6..c96932c 100644 Binary files a/docs/assets/pcct_vmi_projection_grid.png and b/docs/assets/pcct_vmi_projection_grid.png differ diff --git a/docs/assets/pcct_vmi_projection_monoplus.png b/docs/assets/pcct_vmi_projection_monoplus.png index c8efd6f..9b9e79d 100644 Binary files a/docs/assets/pcct_vmi_projection_monoplus.png and b/docs/assets/pcct_vmi_projection_monoplus.png differ diff --git a/docs/assets/pcct_vmi_regression.png b/docs/assets/pcct_vmi_regression.png index 5bd40a6..d24068c 100644 Binary files a/docs/assets/pcct_vmi_regression.png and b/docs/assets/pcct_vmi_regression.png differ diff --git a/docs/assets/pcct_vmi_vs_theoretical.png b/docs/assets/pcct_vmi_vs_theoretical.png index fc581aa..2ade936 100644 Binary files a/docs/assets/pcct_vmi_vs_theoretical.png and b/docs/assets/pcct_vmi_vs_theoretical.png differ diff --git a/docs/assets/pcct_vmi_water_noise_vs_energy.png b/docs/assets/pcct_vmi_water_noise_vs_energy.png index fd81119..1e385af 100644 Binary files a/docs/assets/pcct_vmi_water_noise_vs_energy.png and b/docs/assets/pcct_vmi_water_noise_vs_energy.png differ diff --git a/docs/assets/pcct_vmi_water_roi_check.png b/docs/assets/pcct_vmi_water_roi_check.png index e0fba4d..3f95b20 100644 Binary files a/docs/assets/pcct_vmi_water_roi_check.png and b/docs/assets/pcct_vmi_water_roi_check.png differ diff --git a/docs/assets/qrm_thorax_vmi_noise_roi.png b/docs/assets/qrm_thorax_vmi_noise_roi.png index d2b630b..4bafd90 100644 Binary files a/docs/assets/qrm_thorax_vmi_noise_roi.png and b/docs/assets/qrm_thorax_vmi_noise_roi.png differ diff --git a/docs/assets/qrm_thorax_vmi_vs_theoretical.png b/docs/assets/qrm_thorax_vmi_vs_theoretical.png index d85621e..dbe4122 100644 Binary files a/docs/assets/qrm_thorax_vmi_vs_theoretical.png and b/docs/assets/qrm_thorax_vmi_vs_theoretical.png differ diff --git a/docs/assets/qrm_thorax_vmi_water_roi_check.png b/docs/assets/qrm_thorax_vmi_water_roi_check.png index b1ef2b7..d01ccf3 100644 Binary files a/docs/assets/qrm_thorax_vmi_water_roi_check.png and b/docs/assets/qrm_thorax_vmi_water_roi_check.png differ diff --git a/docs/assets/recon_compare_4panel.png b/docs/assets/recon_compare_4panel.png index 5dfe96f..42b630b 100644 Binary files a/docs/assets/recon_compare_4panel.png and b/docs/assets/recon_compare_4panel.png differ diff --git a/docs/assets/sinogram_standard.png b/docs/assets/sinogram_standard.png index e6c1651..c49b028 100644 Binary files a/docs/assets/sinogram_standard.png and b/docs/assets/sinogram_standard.png differ diff --git a/docs/assets/vmi_regression.png b/docs/assets/vmi_regression.png index c36d036..e9d608c 100644 Binary files a/docs/assets/vmi_regression.png and b/docs/assets/vmi_regression.png differ diff --git a/docs/assets/vmi_vs_theoretical.png b/docs/assets/vmi_vs_theoretical.png index 71e3660..c782a5b 100644 Binary files a/docs/assets/vmi_vs_theoretical.png and b/docs/assets/vmi_vs_theoretical.png differ diff --git a/docs/assets/vmi_water_roi_check.png b/docs/assets/vmi_water_roi_check.png index a90ca81..26f4c15 100644 Binary files a/docs/assets/vmi_water_roi_check.png and b/docs/assets/vmi_water_roi_check.png differ diff --git a/docs/assets/xcat_fbp_vs_hir.png b/docs/assets/xcat_fbp_vs_hir.png index 1f836b4..681b89c 100644 Binary files a/docs/assets/xcat_fbp_vs_hir.png and b/docs/assets/xcat_fbp_vs_hir.png differ diff --git a/docs/notebooks/01_five_struct_api.jl b/docs/notebooks/01_five_struct_api.jl index cb1a8ed..72d3a50 100644 --- a/docs/notebooks/01_five_struct_api.jl +++ b/docs/notebooks/01_five_struct_api.jl @@ -1,5 +1,8 @@ ### A Pluto.jl notebook ### -# v0.19.0 +# v0.1.0 + +using Markdown +using InteractiveUtils # ╔═╡ 01000003-0000-4000-8000-000000000001 begin @@ -47,6 +50,7 @@ detect a GPU backend. import BasisSimulator as BS # ╔═╡ 01000003-0000-4000-8000-000000000003 +# ╠═╡ show_logs = false import CairoMakie as CM # ╔═╡ 01000004-0000-4000-8000-000000000001 diff --git a/docs/notebooks/02_xcat_custom_materials.jl b/docs/notebooks/02_xcat_custom_materials.jl index 7097d53..0a901ab 100644 --- a/docs/notebooks/02_xcat_custom_materials.jl +++ b/docs/notebooks/02_xcat_custom_materials.jl @@ -1,5 +1,5 @@ ### A Pluto.jl notebook ### -# v0.20.24 +# v0.1.0 using Markdown using InteractiveUtils diff --git a/docs/notebooks/03_dual_kvp_switching_vmi.jl b/docs/notebooks/03_dual_kvp_switching_vmi.jl index 68d5ace..7d8f78f 100644 --- a/docs/notebooks/03_dual_kvp_switching_vmi.jl +++ b/docs/notebooks/03_dual_kvp_switching_vmi.jl @@ -1,5 +1,8 @@ ### A Pluto.jl notebook ### -# v0.19.0 +# v0.1.0 + +using Markdown +using InteractiveUtils # ╔═╡ 05000001-0000-4000-8000-000000000001 begin @@ -307,7 +310,7 @@ subspace `U[:,2]` with a small separable Gaussian. One knob: """ # ╔═╡ 05000007-0000-4000-8000-000000000005 -SVD_SIGMA_PX = 1.0; # Gaussian σ (px) for U[:,2]; 0 = passthrough (no denoising) +SVD_SIGMA_PX = 0.0; # Gaussian σ (px) for U[:,2]; 0 = passthrough (no denoising) # ╔═╡ 05000007-0000-4000-8000-000000000010 # 2-channel joint projection-domain SVD denoise (BS.apply_sino_svd_denoise). diff --git a/docs/notebooks/04_pcct_vmi.jl b/docs/notebooks/04_pcct_vmi.jl index 72fad8c..a5efc54 100644 --- a/docs/notebooks/04_pcct_vmi.jl +++ b/docs/notebooks/04_pcct_vmi.jl @@ -1,5 +1,8 @@ ### A Pluto.jl notebook ### -# v0.19.0 +# v0.1.0 + +using Markdown +using InteractiveUtils # ╔═╡ 06000001-0000-4000-8000-000000000001 begin diff --git a/docs/notebooks/07_qrm_thorax_pure_material_vmi.jl b/docs/notebooks/07_qrm_thorax_pure_material_vmi.jl index cbd9a29..c92194b 100644 --- a/docs/notebooks/07_qrm_thorax_pure_material_vmi.jl +++ b/docs/notebooks/07_qrm_thorax_pure_material_vmi.jl @@ -1,5 +1,8 @@ ### A Pluto.jl notebook ### -# v0.19.0 +# v0.1.0 + +using Markdown +using InteractiveUtils # ╔═╡ 07010003-0000-4000-8000-000000000001 begin diff --git a/src/BasisSimulator.jl b/src/BasisSimulator.jl index a04a635..47cb35d 100644 --- a/src/BasisSimulator.jl +++ b/src/BasisSimulator.jl @@ -79,6 +79,11 @@ include("geometry/affine.jl") # Reference: CERN/TIGRE/Common/CUDA/Siddon_projection.cu include("projection/siddon.jl") +# Distance-driven forward projection (CatSim/XCIST DD3, gather form) +# Reference: De Man & Basu 2004; gecatsim DD3Proj_roi_notrans_mm.cpp +# Drop-in replacement for Siddon (same signatures, cm units, Σμ·l output). +include("projection/dd.jl") + # ============================================================================= # Detector (ALL detector effects + noise) # ============================================================================= diff --git a/src/api/driver.jl b/src/api/driver.jl index 5c7a68b..c35db8b 100644 --- a/src/api/driver.jl +++ b/src/api/driver.jl @@ -1321,7 +1321,7 @@ function reconstruct!( # Forward project with subset geometry → subset_Ax_buf ax_view = view(ws.subset_Ax_buf, :, :, 1:n_sub) fill!(ax_view, zero(T)) - siddon_forward_project!( + dd_forward_project!( ax_view, ws.volume, geom_s; ws_source_positions = ws.subset_geom_source_positions[s], ws_detector_centers = ws.subset_geom_detector_centers[s], diff --git a/src/correction/bhc_image_domain.jl b/src/correction/bhc_image_domain.jl index 4fdb69b..f87cee1 100644 --- a/src/correction/bhc_image_domain.jl +++ b/src/correction/bhc_image_domain.jl @@ -107,7 +107,7 @@ function apply_bhc_image_domain( # Use geom's recon FOV (no volume_extent override) so this matches Step 3's # FDK back-projection grid exactly — same physical box on both halves of # the round-trip. - ξ_gpu = siddon_forward_project(high_atten_μ, geom) + ξ_gpu = dd_forward_project(high_atten_μ, geom) # Step 3: FDK-reconstruct the error sinogram → error image. error_image = fdk_reconstruct(ξ_gpu, geom, matrix_size) diff --git a/src/correction/bhc_sinogram.jl b/src/correction/bhc_sinogram.jl index 8abfb08..cb988b3 100644 --- a/src/correction/bhc_sinogram.jl +++ b/src/correction/bhc_sinogram.jl @@ -603,7 +603,7 @@ function apply_bhc_two_material( end end - p_b_gpu = siddon_forward_project(bone_μ_gpu, geom) + p_b_gpu = dd_forward_project(bone_μ_gpu, geom) p_s_gpu = similar(sino_water) copyto!(p_s_gpu, sino_water) diff --git a/src/detector/photon_counting.jl b/src/detector/photon_counting.jl index c2e6ec7..1207b6e 100644 --- a/src/detector/photon_counting.jl +++ b/src/detector/photon_counting.jl @@ -495,7 +495,7 @@ function pcct_forward_project( if has_src copyto!(bt_sub, @view ws_source_spectral[:, :, ts:te]) - siddon_fused_spectral_project!( + dd_fused_spectral_project!( _pilot, _outputs_flat, Int32(n_bins), mask, _proj_geom, μ_sub, W_sub, Val(TILE_K), Int32(1); volume_extent=volume_extent, @@ -503,7 +503,7 @@ function pcct_forward_project( ws_detector_u=_ws_u, ws_detector_v=_ws_v, ws_bowtie_spectral=bt_sub) else - siddon_fused_spectral_project!( + dd_fused_spectral_project!( _pilot, _outputs_flat, Int32(n_bins), mask, _proj_geom, μ_sub, W_sub, Val(TILE_K), Int32(1); volume_extent=volume_extent, @@ -636,7 +636,7 @@ function pcct_forward_project( # Forward project at this energy (native or binned resolution) fill!(sino_buf, zero(T)) - siddon_forward_project!(sino_buf, μ_volume, proj_geom; + dd_forward_project!(sino_buf, μ_volume, proj_geom; ws_source_positions=_ws_src, ws_detector_centers=_ws_det, ws_detector_u=_ws_u, diff --git a/src/projection/dd.jl b/src/projection/dd.jl new file mode 100644 index 0000000..fe967d4 --- /dev/null +++ b/src/projection/dd.jl @@ -0,0 +1,564 @@ +# ============================================================================= +# Distance-Driven (DD) Forward Projection — AcceleratedKernels.jl +# ============================================================================= +# +# Julia / AcceleratedKernels.jl port of the **distance-driven** 3D forward +# projector from CatSim / XCIST (gecatsim), specifically +# `clib_build/src/DD3Proj_roi_notrans_mm.cpp` (the physical-mm, no-transpose +# variant). Upstream is GE Precision HealthCare, Apache-2.0 / XCIST license +# (https://github.com/xcist/main). Modifications relative to the original +# C++ are Copyright (c) 2025 MolloiLab and contributors (MIT, see ../../LICENSE). +# +# Reference: De Man B, Basu S. "Distance-driven projection and backprojection +# in three dimensions." Phys Med Biol. 2004;49(11):2463-2475. +# doi:10.1088/0031-9155/49/11/024 +# +# Why a second projector +# ---------------------- +# `siddon.jl` traces each ray independently (ray-driven). Distance-driven +# instead maps image-voxel boundaries and detector-cell boundaries onto a +# common iso-plane (perpendicular to the dominant ray axis) and integrates +# their *overlap*. This is the algorithm CatSim/XCIST uses and gives cleaner +# adjoint / lower high-frequency artifact behaviour than Siddon. +# +# Drop-in contract +# ---------------- +# The four public entry points mirror the Siddon signatures and semantics +# EXACTLY, so every downstream consumer (BHC, IR, polychromatic EI, +# photon-counting PCCT, FDK) works unchanged: +# +# dd_forward_project! / dd_forward_project ←→ siddon_forward_project! / … +# dd_fused_poly_project! ←→ siddon_fused_poly_project! +# dd_fused_spectral_project! ←→ siddon_fused_spectral_project! +# +# Output-stationary (gather) reformulation +# ---------------------------------------- +# The upstream C is image-stationary *scatter* (loops image rows, accumulates +# into a shared per-view detector buffer — a reduction). We reformulate it +# output-stationary: ONE thread per output sinogram cell gathers the +# distance-driven overlap of its own back-projected footprint. This is +# faithful DD (only the floating-point summation order differs) and maps onto +# `AK.foreachindex(sinogram)` exactly like Siddon — no atomics, no races, +# portable to Metal / CUDA / ROCm / CPU. +# +# Per-view axis choice (DD3 `vertical`): integrate along whichever world axis +# the source most aligns with (|s_y| ≥ |s_x| → integrate along y, else along x), +# keeping the boundary→iso-plane mapping well conditioned. Each image row's +# contributing voxel range is bounded by inverse projection so per-cell work is +# O(n_rows × footprint), comparable to a Siddon ray walk. +# +# The fused poly/spectral kernels reuse Siddon's `@generated` NTuple helpers +# (`_fused_accum_energies`, `_fused_beer_lambert[_bt]`, `_tiled_*`) verbatim — +# only the per-voxel weight changes from Siddon's chord `path_length` to DD's +# overlap `ox·oz·norm`. +# +# Assumptions (match the DD3_mm variant) +# -------------------------------------- +# - Flat detector, detector_v = ẑ (BasisSimulator's CTGeometry construction). +# - Isotropic in-plane voxels (vox_x ≈ vox_y); the DD3_mm obliquity scaling +# uses a single in-plane voxel size. Enforced with a warning. +# - Volume centred at isocentre with physical extent `volume_extent` (else +# `geom.fov`), identical to Siddon. +# +# ============================================================================= + +import AcceleratedKernels as AK + +export dd_forward_project!, dd_forward_project, + dd_fused_poly_project!, dd_fused_spectral_project! + +# NOTE (dd-raytracing branch): distance-driven is THE projector here. The +# internal pipeline (`_forward_project_poly!`, `pcct_forward_project`, BHC, IR, +# IR-recon) calls the `dd_*` functions below directly, exactly as it used to +# call `siddon_*` — same signatures, so no downstream/notebook code changed. +# `siddon.jl` is retained only for reference/benchmarking; on merge it is +# expected to be deleted and these become the sole projector. + +# ----------------------------------------------------------------------------- +# Overlap length of two 1-D intervals [a_lo, a_hi] ∩ [b_lo, b_hi]. +# ----------------------------------------------------------------------------- +@inline function _dd_overlap(a_lo::T, a_hi::T, b_lo::T, b_hi::T) where {T} + lo = max(a_lo, b_lo) + hi = min(a_hi, b_hi) + return hi > lo ? hi - lo : zero(T) +end + +# ----------------------------------------------------------------------------- +# Contributing voxel-index range [i_start, i_end] along one axis: the voxels +# whose projected boundaries straddle [lo, hi] on the iso-plane. Inverse of +# pos(b) = s0 + (vmin + b*vstep - s0) * mag_fac (boundary index b, 0-based) +# Widened by ±1 for float safety; non-overlapping extras are filtered by the +# `_dd_overlap > 0` test in the loop. +# ----------------------------------------------------------------------------- +@inline function _dd_bounds( + lo::T, hi::T, s0::T, vmin::T, vstep::T, inv_mf::T, n::Int32, + ) where {T} + c_lo = ((lo - s0) * inv_mf - (vmin - s0)) / vstep + c_hi = ((hi - s0) * inv_mf - (vmin - s0)) / vstep + c_lo = clamp(c_lo, zero(T), T(n) + one(T)) + c_hi = clamp(c_hi, zero(T), T(n) + one(T)) + i_start = max(Int32(1), unsafe_trunc(Int32, floor(c_lo))) + i_end = min(n, unsafe_trunc(Int32, ceil(c_hi)) + Int32(1)) + return (i_start, i_end) +end + +# ----------------------------------------------------------------------------- +# Per-cell distance-driven geometry: maps the detector cell (col,row) onto the +# iso-plane and returns the footprint + normalisation shared by every DD kernel. +# Returns a flat tuple (compiler-inlined): the value-tuple plus a `valid` flag. +# ----------------------------------------------------------------------------- +@inline function _dd_cell_setup( + col::Int32, row::Int32, + sx::T, sy::T, sz::T, + dcx::T, dcy::T, dcz::T, ux::T, uy::T, vvz::T, + mag::T, ps::T, prs::T, col_center::T, row_center::T, + nx::Int32, ny::Int32, + vmin_x::T, vmin_y::T, vx::T, vy::T, + ) where {T} + + eps = T(1.0e-12) + half = T(0.5) + + # detector COLUMN boundaries (transaxial), world (x,y) at detector mid-row + uL = (T(col) - half - col_center) * ps * mag + uU = (T(col) + half - col_center) * ps * mag + bxL = dcx + uL * ux; byL = dcy + uL * uy + bxU = dcx + uU * ux; byU = dcy + uU * uy + + # integration axis = dominant source component + vertical = abs(sy) >= abs(sx) + if vertical + s_long = sy; s_tran = sx + dtL = bxL; dlL = byL; dtU = bxU; dlU = byU + n_t = nx; v_t = vx; vmin_t = vmin_x + n_long = ny; v_long = vy; vmin_long = vmin_y + else + s_long = sx; s_tran = sy + dtL = byL; dlL = bxL; dtU = byU; dlU = bxU + n_t = ny; v_t = vy; vmin_t = vmin_y + n_long = nx; v_long = vx; vmin_long = vmin_x + end + + # project the two column boundaries onto the iso-plane (long = 0) + scaleL = s_long / (s_long - dlL) + scaleU = s_long / (s_long - dlU) + dXa = s_tran + (dtL - s_tran) * scaleL + dXb = s_tran + (dtU - s_tran) * scaleU + dXlo = min(dXa, dXb); dXhi = max(dXa, dXb) + detXstep = dXhi - dXlo + deltaT = half * (dXa + dXb) - s_tran + scale_col = half * (scaleL + scaleU) + + # detector ROW boundaries (axial), world z, projected to iso-plane + wL = (T(row) - half - row_center) * prs * mag + wU = (T(row) + half - row_center) * prs * mag + zdL = dcz + wL * vvz + zdU = dcz + wU * vvz + dZa = sz + scale_col * (zdL - sz) + dZb = sz + scale_col * (zdU - sz) + dZlo = min(dZa, dZb); dZhi = max(dZa, dZb) + detZstep = dZhi - dZlo + deltaZ = half * (dZa + dZb) - sz + + valid = detXstep > eps && detZstep > eps + invCos = sqrt(s_long * s_long + deltaT * deltaT + deltaZ * deltaZ) / + abs(s_long) * v_long + norm = valid ? invCos / (detXstep * detZstep) : zero(T) + + return (valid, vertical, s_long, s_tran, dXlo, dXhi, dZlo, dZhi, norm, + n_t, v_t, vmin_t, n_long, v_long, vmin_long) +end + +# ----------------------------------------------------------------------------- +# Single detector-cell mono line integral (Σμ·l), gather form. +# ----------------------------------------------------------------------------- +@inline function _dd_trace_cell( + volume::AbstractArray{T, 3}, + col::Int32, row::Int32, + sx::T, sy::T, sz::T, + dcx::T, dcy::T, dcz::T, ux::T, uy::T, vvz::T, + mag::T, ps::T, prs::T, col_center::T, row_center::T, + nx::Int32, ny::Int32, nz::Int32, + vmin_x::T, vmin_y::T, vmin_z::T, vx::T, vy::T, vz::T, + ) where {T} + + (valid, vertical, s_long, s_tran, dXlo, dXhi, dZlo, dZhi, norm, + n_t, v_t, vmin_t, n_long, v_long, vmin_long) = _dd_cell_setup( + col, row, sx, sy, sz, dcx, dcy, dcz, ux, uy, vvz, + mag, ps, prs, col_center, row_center, nx, ny, vmin_x, vmin_y, vx, vy) + + valid || return zero(T) + + accum = zero(T) + il = Int32(1) + while il <= n_long + lp = vmin_long + (T(il) - T(0.5)) * v_long + mag_fac = s_long / (s_long - lp) + inv_mf = one(T) / mag_fac + + (it_s, it_e) = _dd_bounds(dXlo, dXhi, s_tran, vmin_t, v_t, inv_mf, n_t) + (ip_s, ip_e) = _dd_bounds(dZlo, dZhi, sz, vmin_z, vz, inv_mf, nz) + + it = it_s + while it <= it_e + t0 = s_tran + (vmin_t + T(it - Int32(1)) * v_t - s_tran) * mag_fac + t1 = s_tran + (vmin_t + T(it) * v_t - s_tran) * mag_fac + ox = _dd_overlap(dXlo, dXhi, min(t0, t1), max(t0, t1)) + if ox > zero(T) + ixv = vertical ? it : il + iyv = vertical ? il : it + ip = ip_s + while ip <= ip_e + z0 = sz + (vmin_z + T(ip - Int32(1)) * vz - sz) * mag_fac + z1 = sz + (vmin_z + T(ip) * vz - sz) * mag_fac + oz = _dd_overlap(dZlo, dZhi, min(z0, z1), max(z0, z1)) + if oz > zero(T) + accum += ox * oz * volume[ixv, iyv, ip] + end + ip += Int32(1) + end + end + it += Int32(1) + end + il += Int32(1) + end + + return norm * accum +end + +# ----------------------------------------------------------------------------- +# Helper: build a typed geometry array on the same backend as `ref`. +# ----------------------------------------------------------------------------- +@inline function _dd_geom_array(ws, ref, src, ::Type{T}) where {T} + ws !== nothing && return ws + a = similar(ref, T, size(src)...) + copyto!(a, T.(src)) + return a +end + +# ============================================================================= +# Mono: in-place + allocating — mirror siddon_forward_project! / …project +# ============================================================================= +""" + dd_forward_project!(sinogram, volume, geom; volume_extent=nothing, ...) -> sinogram + +Distance-driven forward projection (CatSim/XCIST DD3, gather form). Drop-in +replacement for [`siddon_forward_project!`](@ref): same arguments, same +`[n_cols, n_rows, n_angles]` sinogram, same cm units and Σμ·l output. +""" +function dd_forward_project!( + sinogram::AbstractArray{T, 3}, + volume::AbstractArray{T, 3}, + geom::CTGeometry; + ws_source_positions = nothing, + ws_detector_centers = nothing, + ws_detector_u = nothing, + ws_detector_v = nothing, + volume_extent::Union{Nothing, NTuple{3, Float64}} = nothing, + ) where {T <: AbstractFloat} + + nx = Int32(size(volume, 1)); ny = Int32(size(volume, 2)); nz = Int32(size(volume, 3)) + n_cols = Int32(size(sinogram, 1)); n_rows = Int32(size(sinogram, 2)) + + vol_bounds = volume_extent !== nothing ? volume_extent : geom.fov + vmin_x = T(-vol_bounds[1] / 2); vmin_y = T(-vol_bounds[2] / 2); vmin_z = T(-vol_bounds[3] / 2) + vx = T(vol_bounds[1]) / T(nx); vy = T(vol_bounds[2]) / T(ny); vz = T(vol_bounds[3]) / T(nz) + _dd_check_isotropy(vx, vy) + + mag = T(geom.SDD / geom.SAD) + ps = T(geom.pixel_size); prs = T(geom.pixel_row_size) + col_center = (T(n_cols) + one(T)) / T(2) + row_center = (T(n_rows) + one(T)) / T(2) + + sp = _dd_geom_array(ws_source_positions, volume, geom.source_positions, T) + dc = _dd_geom_array(ws_detector_centers, volume, geom.detector_centers, T) + du = _dd_geom_array(ws_detector_u, volume, geom.detector_u, T) + dv = _dd_geom_array(ws_detector_v, volume, geom.detector_v, T) + + AK.foreachindex(sinogram) do idx + idx_0 = Int32(idx - 1) + col = (idx_0 % n_cols) + Int32(1) + idx_0 = idx_0 ÷ n_cols + row = (idx_0 % n_rows) + Int32(1) + angle = (idx_0 ÷ n_rows) + Int32(1) + + sinogram[idx] = _dd_trace_cell( + volume, col, row, + sp[1, angle], sp[2, angle], sp[3, angle], + dc[1, angle], dc[2, angle], dc[3, angle], du[1, angle], du[2, angle], dv[3, angle], + mag, ps, prs, col_center, row_center, + nx, ny, nz, vmin_x, vmin_y, vmin_z, vx, vy, vz, + ) + end + return sinogram +end + +""" + dd_forward_project(volume, geom; volume_extent=nothing) -> sinogram + +Allocating distance-driven forward projection. Drop-in for +[`siddon_forward_project`](@ref). +""" +function dd_forward_project( + volume::AbstractArray{T, 3}, + geom::CTGeometry; + volume_extent::Union{Nothing, NTuple{3, Float64}} = nothing, + ) where {T <: AbstractFloat} + sinogram = similar(volume, T, geom.n_cols, geom.n_rows, geom.n_angles) + fill!(sinogram, zero(T)) + return dd_forward_project!(sinogram, volume, geom; volume_extent = volume_extent) +end + +@inline function _dd_check_isotropy(vx::T, vy::T) where {T} + if abs(vx - vy) > T(1.0e-4) * max(vx, vy) + @warn "dd projector: in-plane voxels not isotropic (vx=$vx, vy=$vy); \ + DD3 obliquity scaling assumes vox_x ≈ vox_y — results may be biased." maxlog = 1 + end + return nothing +end + +# ============================================================================= +# Fused polychromatic — mirror siddon_fused_poly_project! +# ============================================================================= +""" + dd_fused_poly_project!(sinogram, mask, geom, μ_table_gpu, wη_gpu, Val(N_E); kwargs...) + +Distance-driven analog of [`siddon_fused_poly_project!`](@ref): one gather pass +per detector cell through the material `mask`, accumulating line integrals for +all `N_E` energy bins, then Beer-Lambert `-log(Σ_e wη[e]·exp(-L_e))`. Same +arguments, output, and bowtie-spectral handling as the Siddon version. +""" +function dd_fused_poly_project!( + sinogram::AbstractArray{T, 3}, + mask::AbstractArray{<:Unsigned, 3}, + geom::CTGeometry, + μ_table_gpu::AbstractArray{T, 2}, + wη_gpu::AbstractArray{T, 1}, + ::Val{N_E}; + volume_extent::Union{Nothing, NTuple{3, Float64}} = nothing, + ws_source_positions = nothing, + ws_detector_centers = nothing, + ws_detector_u = nothing, + ws_detector_v = nothing, + ws_bowtie_spectral = nothing, + ) where {T <: AbstractFloat, N_E} + + nx = Int32(size(mask, 1)); ny = Int32(size(mask, 2)); nz = Int32(size(mask, 3)) + n_cols = Int32(size(sinogram, 1)); n_rows = Int32(size(sinogram, 2)) + + vol_bounds = volume_extent !== nothing ? volume_extent : geom.fov + vmin_x = T(-vol_bounds[1] / 2); vmin_y = T(-vol_bounds[2] / 2); vmin_z = T(-vol_bounds[3] / 2) + vx = T(vol_bounds[1]) / T(nx); vy = T(vol_bounds[2]) / T(ny); vz = T(vol_bounds[3]) / T(nz) + _dd_check_isotropy(vx, vy) + + mag = T(geom.SDD / geom.SAD) + ps = T(geom.pixel_size); prs = T(geom.pixel_row_size) + col_center = (T(n_cols) + one(T)) / T(2) + row_center = (T(n_rows) + one(T)) / T(2) + + sp = _dd_geom_array(ws_source_positions, sinogram, geom.source_positions, T) + dc = _dd_geom_array(ws_detector_centers, sinogram, geom.detector_centers, T) + du = _dd_geom_array(ws_detector_u, sinogram, geom.detector_u, T) + dv = _dd_geom_array(ws_detector_v, sinogram, geom.detector_v, T) + + nc_nr = n_cols * n_rows + has_bowtie = ws_bowtie_spectral !== nothing + _bt = has_bowtie ? ws_bowtie_spectral : similar(μ_table_gpu, T, 1, 1, 1) + + let mask = mask, μ_tbl = μ_table_gpu, wη = wη_gpu, + sp = sp, dc = dc, du = du, dv = dv, bt = _bt, + vmx = vmin_x, vmy = vmin_y, vmz = vmin_z, vsx = vx, vsy = vy, vsz = vz, + nx = nx, ny = ny, nz = nz, nc = n_cols, nr = n_rows, + mag = mag, ps = ps, prs = prs, cc = col_center, rc = row_center, + ncnr = nc_nr, hbt = has_bowtie + + AK.foreachindex(sinogram) do idx + idx_0 = Int32(idx - 1) + col = (idx_0 % nc) + Int32(1) + idx_0 = idx_0 ÷ nc + row = (idx_0 % nr) + Int32(1) + angle = (idx_0 ÷ nr) + Int32(1) + + sx = sp[1, angle]; sy = sp[2, angle]; sz = sp[3, angle] + dcx = dc[1, angle]; dcy = dc[2, angle]; dcz = dc[3, angle] + ux = du[1, angle]; uy = du[2, angle]; vvz = dv[3, angle] + + (valid, vertical, s_long, s_tran, dXlo, dXhi, dZlo, dZhi, norm, + n_t, v_t, vmin_t, n_long, v_long, vmin_long) = _dd_cell_setup( + col, row, sx, sy, sz, dcx, dcy, dcz, ux, uy, vvz, + mag, ps, prs, cc, rc, nx, ny, vmx, vmy, vsx, vsy) + + accums = ntuple(_ -> zero(T), Val(N_E)) + if valid + il = Int32(1) + while il <= n_long + lp = vmin_long + (T(il) - T(0.5)) * v_long + mag_fac = s_long / (s_long - lp) + inv_mf = one(T) / mag_fac + (it_s, it_e) = _dd_bounds(dXlo, dXhi, s_tran, vmin_t, v_t, inv_mf, n_t) + (ip_s, ip_e) = _dd_bounds(dZlo, dZhi, sz, vmz, vsz, inv_mf, nz) + it = it_s + while it <= it_e + t0 = s_tran + (vmin_t + T(it - Int32(1)) * v_t - s_tran) * mag_fac + t1 = s_tran + (vmin_t + T(it) * v_t - s_tran) * mag_fac + ox = _dd_overlap(dXlo, dXhi, min(t0, t1), max(t0, t1)) + if ox > zero(T) + ixv = vertical ? it : il + iyv = vertical ? il : it + ip = ip_s + while ip <= ip_e + z0 = sz + (vmz + T(ip - Int32(1)) * vsz - sz) * mag_fac + z1 = sz + (vmz + T(ip) * vsz - sz) * mag_fac + oz = _dd_overlap(dZlo, dZhi, min(z0, z1), max(z0, z1)) + if oz > zero(T) + mat = Int32(mask[ixv, iyv, ip]) + Int32(1) + accums = _fused_accum_energies(accums, μ_tbl, mat, ox * oz * norm) + end + ip += Int32(1) + end + end + it += Int32(1) + end + il += Int32(1) + end + end + + I_total = if hbt + bt_base = Int32(col) + (Int32(row) - Int32(1)) * nc + _fused_beer_lambert_bt(accums, wη, bt, bt_base, ncnr) + else + _fused_beer_lambert(accums, wη) + end + sinogram[idx] = -log(max(I_total, T(1.0e-10))) + end + end + return sinogram +end + +# ============================================================================= +# Fused spectral (PCCT, tiled) — mirror siddon_fused_spectral_project! +# ============================================================================= +""" + dd_fused_spectral_project!(pilot, outputs_flat, n_bins, mask, geom, + μ_table_gpu, W_gpu, Val(K), tile_start; kwargs...) + +Distance-driven analog of [`siddon_fused_spectral_project!`](@ref): one gather +pass per detector cell accumulating `K` energy bins (tile starting at +`tile_start`), then ADDS per-bin partial Beer-Lambert sums to `outputs_flat`. +Same tiled-accumulation contract as the Siddon version (zero-init before the +first tile; tiles accumulate). +""" +function dd_fused_spectral_project!( + pilot::AbstractArray{T, 3}, + outputs_flat::AbstractArray{T, 1}, + n_bins::Int32, + mask::AbstractArray{<:Unsigned, 3}, + geom::CTGeometry, + μ_table_gpu::AbstractArray{T, 2}, + W_gpu::AbstractArray{T, 2}, + ::Val{K}, + tile_start::Int32; + volume_extent::Union{Nothing, NTuple{3, Float64}} = nothing, + ws_source_positions = nothing, + ws_detector_centers = nothing, + ws_detector_u = nothing, + ws_detector_v = nothing, + ws_bowtie_spectral = nothing, + ) where {T <: AbstractFloat, K} + + nx = Int32(size(mask, 1)); ny = Int32(size(mask, 2)); nz = Int32(size(mask, 3)) + n_cols = Int32(size(pilot, 1)); n_rows = Int32(size(pilot, 2)); n_angles = Int32(size(pilot, 3)) + n_elem = n_cols * n_rows * n_angles + + vol_bounds = volume_extent !== nothing ? volume_extent : geom.fov + vmin_x = T(-vol_bounds[1] / 2); vmin_y = T(-vol_bounds[2] / 2); vmin_z = T(-vol_bounds[3] / 2) + vx = T(vol_bounds[1]) / T(nx); vy = T(vol_bounds[2]) / T(ny); vz = T(vol_bounds[3]) / T(nz) + _dd_check_isotropy(vx, vy) + + mag = T(geom.SDD / geom.SAD) + ps = T(geom.pixel_size); prs = T(geom.pixel_row_size) + col_center = (T(n_cols) + one(T)) / T(2) + row_center = (T(n_rows) + one(T)) / T(2) + + sp = _dd_geom_array(ws_source_positions, mask, geom.source_positions, T) + dc = _dd_geom_array(ws_detector_centers, mask, geom.detector_centers, T) + du = _dd_geom_array(ws_detector_u, mask, geom.detector_u, T) + dv = _dd_geom_array(ws_detector_v, mask, geom.detector_v, T) + + nc_nr = n_cols * n_rows + has_src_spectral = ws_bowtie_spectral !== nothing + _bt = has_src_spectral ? ws_bowtie_spectral : similar(μ_table_gpu, T, 1, 1, 1) + + let mask = mask, μ_tbl = μ_table_gpu, W = W_gpu, ts = tile_start, + sp = sp, dc = dc, du = du, dv = dv, bt = _bt, + vmx = vmin_x, vmy = vmin_y, vmz = vmin_z, vsx = vx, vsy = vy, vsz = vz, + nx = nx, ny = ny, nz = nz, nc = n_cols, nr = n_rows, + ne = n_elem, nb = n_bins, oflat = outputs_flat, + mag = mag, ps = ps, prs = prs, cc = col_center, rc = row_center, + ncnr = nc_nr, hbt = has_src_spectral + + AK.foreachindex(pilot) do idx + idx_0 = Int32(idx - 1) + col = (idx_0 % nc) + Int32(1) + idx_0 = idx_0 ÷ nc + row = (idx_0 % nr) + Int32(1) + angle = (idx_0 ÷ nr) + Int32(1) + + sx = sp[1, angle]; sy = sp[2, angle]; sz = sp[3, angle] + dcx = dc[1, angle]; dcy = dc[2, angle]; dcz = dc[3, angle] + ux = du[1, angle]; uy = du[2, angle]; vvz = dv[3, angle] + + (valid, vertical, s_long, s_tran, dXlo, dXhi, dZlo, dZhi, norm, + n_t, v_t, vmin_t, n_long, v_long, vmin_long) = _dd_cell_setup( + col, row, sx, sy, sz, dcx, dcy, dcz, ux, uy, vvz, + mag, ps, prs, cc, rc, nx, ny, vmx, vmy, vsx, vsy) + + accums = ntuple(_ -> zero(T), Val(K)) + if valid + il = Int32(1) + while il <= n_long + lp = vmin_long + (T(il) - T(0.5)) * v_long + mag_fac = s_long / (s_long - lp) + inv_mf = one(T) / mag_fac + (it_s, it_e) = _dd_bounds(dXlo, dXhi, s_tran, vmin_t, v_t, inv_mf, n_t) + (ip_s, ip_e) = _dd_bounds(dZlo, dZhi, sz, vmz, vsz, inv_mf, nz) + it = it_s + while it <= it_e + t0 = s_tran + (vmin_t + T(it - Int32(1)) * v_t - s_tran) * mag_fac + t1 = s_tran + (vmin_t + T(it) * v_t - s_tran) * mag_fac + ox = _dd_overlap(dXlo, dXhi, min(t0, t1), max(t0, t1)) + if ox > zero(T) + ixv = vertical ? it : il + iyv = vertical ? il : it + ip = ip_s + while ip <= ip_e + z0 = sz + (vmz + T(ip - Int32(1)) * vsz - sz) * mag_fac + z1 = sz + (vmz + T(ip) * vsz - sz) * mag_fac + oz = _dd_overlap(dZlo, dZhi, min(z0, z1), max(z0, z1)) + if oz > zero(T) + mat = Int32(mask[ixv, iyv, ip]) + Int32(1) + accums = _tiled_accum_energies(accums, μ_tbl, mat, ox * oz * norm, ts) + end + ip += Int32(1) + end + end + it += Int32(1) + end + il += Int32(1) + end + end + + if hbt + bt_base = Int32(col) + (Int32(row) - Int32(1)) * nc + for b in Int32(1):nb + oflat[idx + (b - Int32(1)) * ne] += _tiled_beer_lambert_col_bt(accums, W, b, ts, bt, bt_base, ncnr) + end + else + for b in Int32(1):nb + oflat[idx + (b - Int32(1)) * ne] += _tiled_beer_lambert_col(accums, W, b, ts) + end + end + end + end + return outputs_flat +end diff --git a/src/projection/polychromatic.jl b/src/projection/polychromatic.jl index bf37c72..73c3db6 100644 --- a/src/projection/polychromatic.jl +++ b/src/projection/polychromatic.jl @@ -474,7 +474,7 @@ function _forward_project_poly!( _buf end - siddon_fused_poly_project!( + dd_fused_poly_project!( sinogram, mask, geom, ws_μ_table_gpu, wη_dev, Val(n_energies); volume_extent = volume_extent, ws_source_positions = ws_source_positions, @@ -524,7 +524,7 @@ function _forward_project_poly!( end # Use proven-fast fused kernel with Val(16) — 95ms/tile on Metal - siddon_fused_poly_project!( + dd_fused_poly_project!( sinogram, mask, geom, μ_sub, wη_sub, Val(TILE_K); volume_extent = volume_extent, @@ -580,7 +580,7 @@ function _forward_project_poly!( # Forward project at this energy fill!(sino_mono, zero(T)) - siddon_forward_project!( + dd_forward_project!( sino_mono, μ_volume, geom; ws_source_positions = ws_source_positions, ws_detector_centers = ws_detector_centers, diff --git a/src/reconstruction/ir/utils.jl b/src/reconstruction/ir/utils.jl index f2dd37e..3aa8fa5 100644 --- a/src/reconstruction/ir/utils.jl +++ b/src/reconstruction/ir/utils.jl @@ -183,7 +183,7 @@ function compute_projection_weights( ::Type{T} ) where T <: AbstractFloat ones_volume = ones(T, volume_size...) - ray_sums = siddon_forward_project(ones_volume, geom) + ray_sums = dd_forward_project(ones_volume, geom) eps = T(1e-8) AK.foreachindex(ray_sums) do idx val = ray_sums[idx] diff --git a/test/projection.jl b/test/projection.jl index b3036f7..26cd4e2 100644 --- a/test/projection.jl +++ b/test/projection.jl @@ -81,6 +81,122 @@ end @test maximum(abs.(sino_alloc .- sino_inplace)) < 1.0e-4 end +# ----------------------------------------------------------------------------- +# dd_forward_project — distance-driven projector (drop-in for Siddon). +# Must agree with Siddon on line integrals (both discretise ∫μ·dl) and be a +# linear operator on μ. Small edge differences (DD footprint vs Siddon chord) +# are expected; we bound the mean relative deviation on rays that hit the object. +# ----------------------------------------------------------------------------- +@testset "dd_forward_project (distance-driven)" begin + # Larger toy grid so the object spans many detector rays (fair DD-vs-Siddon stats). + geom = _toy_proj_geom(n_cols = 64, n_rows = 8, n_angles = 8, fov_cm = 20.0) + nx = ny = 64 + nz = 8 + + @testset "shape + zero volume → zero sinogram" begin + vol = zeros(Float32, nx, ny, nz) + sino = BS.dd_forward_project(vol, geom) + @test size(sino) == (geom.n_cols, geom.n_rows, geom.n_angles) + @test eltype(sino) == Float32 + @test all(sino .== 0) + end + + # Centred water cylinder, μ = 0.2 cm⁻¹. + vol = zeros(Float32, nx, ny, nz) + cx = (nx + 1) / 2 + cy = (ny + 1) / 2 + R = 0.6 * (nx / 2) + for k in 1:nz, j in 1:ny, i in 1:nx + if (i - cx)^2 + (j - cy)^2 <= R^2 + vol[i, j, k] = 0.2f0 + end + end + + sino_siddon = BS.siddon_forward_project(vol, geom) + sino_dd = BS.dd_forward_project(vol, geom) + + @testset "agrees with Siddon on object rays" begin + mask = sino_siddon .> 1.0f-4 + relΔ = abs.(sino_dd[mask] .- sino_siddon[mask]) ./ sino_siddon[mask] + @test count(mask) > 1000 # object actually illuminated + @test sum(relΔ) / length(relΔ) < 0.01 # mean relative deviation < 1% + @test maximum(relΔ) < 0.05 # worst-ray deviation < 5% + end + + @testset "in-place == allocating" begin + sino_ip = zeros(Float32, geom.n_cols, geom.n_rows, geom.n_angles) + BS.dd_forward_project!(sino_ip, vol, geom) + @test maximum(abs.(sino_ip .- sino_dd)) < 1.0e-5 + end + + @testset "linearity in μ — 2× scale doubles the sinogram" begin + sino_2x = BS.dd_forward_project(2 .* vol, geom) + @test maximum(abs.(sino_2x .- 2 .* sino_dd)) < 1.0e-4 + end +end + +# ----------------------------------------------------------------------------- +# dd_fused_poly_project! / dd_fused_spectral_project! — must agree with the +# Siddon fused kernels (same mask / μ-table / weights) within the DD-vs-Siddon +# discretisation tolerance. These are the kernels the EI (polychromatic) and +# PCCT (photon-counting) pipelines call. +# ----------------------------------------------------------------------------- +@testset "dd fused projectors vs Siddon fused" begin + geom = _toy_proj_geom(n_cols = 64, n_rows = 8, n_angles = 8, fov_cm = 20.0) + nx = ny = 64 + nz = 8 + N_E = 8 + + # material mask: 0 = air, 1 = water cylinder + mask = zeros(UInt16, nx, ny, nz) + for k in 1:nz, j in 1:ny, i in 1:nx + if (i - 32.5)^2 + (j - 32.5)^2 <= (0.6 * 32)^2 + mask[i, j, k] = UInt16(1) + end + end + μ_table = zeros(Float32, 2, N_E) # row 1 = air (0), row 2 = water + for e in 1:N_E + μ_table[2, e] = 0.30f0 - 0.12f0 * (e - 1) / (N_E - 1) + end + wη = fill(Float32(1 / N_E), N_E) + + @testset "fused poly" begin + sino_s = zeros(Float32, geom.n_cols, geom.n_rows, geom.n_angles) + sino_d = zeros(Float32, geom.n_cols, geom.n_rows, geom.n_angles) + BS.siddon_fused_poly_project!(sino_s, mask, geom, μ_table, wη, Val(N_E)) + BS.dd_fused_poly_project!(sino_d, mask, geom, μ_table, wη, Val(N_E)) + m = sino_s .> 1.0f-4 + relΔ = abs.(sino_d[m] .- sino_s[m]) ./ sino_s[m] + @test count(m) > 1000 + @test sum(relΔ) / length(relΔ) < 0.01 # mean within 1% + @test maximum(relΔ) < 0.05 + end + + @testset "fused spectral (PCCT, single tile)" begin + n_bins = Int32(2) + K = N_E + W = zeros(Float32, N_E, 2) + for e in 1:N_E + W[e, 1] = wη[e] * (1.0f0 - (e - 1) / (N_E - 1)) + W[e, 2] = wη[e] * ((e - 1) / (N_E - 1)) + end + pilot = zeros(Float32, geom.n_cols, geom.n_rows, geom.n_angles) + ne = length(pilot) + out_s = zeros(Float32, ne * 2) + out_d = zeros(Float32, ne * 2) + BS.siddon_fused_spectral_project!(pilot, out_s, n_bins, mask, geom, μ_table, W, Val(K), Int32(1)) + BS.dd_fused_spectral_project!(pilot, out_d, n_bins, mask, geom, μ_table, W, Val(K), Int32(1)) + for b in 1:2 + seg = ((b - 1) * ne + 1):(b * ne) + bs = out_s[seg] + bd = out_d[seg] + mm = bs .> 1.0f-6 + relb = abs.(bd[mm] .- bs[mm]) ./ bs[mm] + @test sum(relb) / length(relb) < 0.02 # bin mean within 2% + end + end +end + # ----------------------------------------------------------------------------- # create_μ_volume! — label → material → μ at energy mapping. # -----------------------------------------------------------------------------