From f40fedbffe7f5b6200b79e7bb7fa9851210caba4 Mon Sep 17 00:00:00 2001 From: Alonso Martinez Cisneros Date: Wed, 1 Apr 2026 15:43:44 +0200 Subject: [PATCH 01/10] Initial (correct) implmentation of getindex for device-side CSC, CSR and SparseVector --- src/GPUArrays.jl | 1 + src/device/indexing.jl | 72 ++++++++++++++++++++++++++++++++++++++++++ src/device/sparse.jl | 1 + test/runtests.jl | 1 + 4 files changed, 75 insertions(+) create mode 100644 src/device/indexing.jl diff --git a/src/GPUArrays.jl b/src/GPUArrays.jl index 17a4d897..8a3fed24 100644 --- a/src/GPUArrays.jl +++ b/src/GPUArrays.jl @@ -20,6 +20,7 @@ using KernelAbstractions # device functionality include("device/abstractarray.jl") include("device/sparse.jl") +include("device/indexing.jl") # host abstractions include("host/abstractarray.jl") diff --git a/src/device/indexing.jl b/src/device/indexing.jl new file mode 100644 index 00000000..16a06404 --- /dev/null +++ b/src/device/indexing.jl @@ -0,0 +1,72 @@ +# device-level indexing +using SparseArrays: nonzeroinds, nonzeros, nnz, getcolptr +using Base: @propagate_inbounds + +Base.IndexStyle(::Type{GPUSparseDeviceVector}) = Base.IndexLinear() + +# Scalar indexing +## Adapted from SparseArrays.AbstractSparseVector + +@propagate_inbounds function Base.getindex(v::GPUSparseDeviceVector{Tv,Ti}, i::Integer) where {Tv,Ti} + @boundscheck checkbounds(v, i) + m = nnz(v) + nzind = nonzeroinds(v) + nzval = nonzeros(v) + + ii = searchsortedfirst(nzind, convert(Ti, i)) + (ii <= m && nzind[ii] == i) ? nzval[ii] : zero(Tv) +end + +# TODO: Logical indexing + +# Indexing by colon not implemented. Non-scalar indexing would allocate in device code +@propagate_inbounds Base.getindex(A::AbstractGPUSparseDeviceMatrix, I::Tuple{Integer,Integer}) = getindex(A, I[1], I[2]) + +## Adapted logic from SparseArrays.AbstractSparseMatrixCSC +@propagate_inbounds function Base.getindex(A::GPUSparseDeviceMatrixCSC{Tv,Ti}, i::Integer, j::Integer) where {Tv,Ti} + @boundscheck checkbounds(A, i, j) + colPtr, rowVal, nzVal = getcolptr(A), rowvals(A), nonzeros(A) + + # Range of possible row indices + rl = convert(Ti, @inbounds colPtr[j]) + rr = convert(Ti, @inbounds colPtr[j+1] - 1) + (rl > rr) && return zero(Tv) + + # possible_row = @view rowVal[rl:rr] + ii = searchsortedfirst(rowVal, convert(Ti, i), rl, rr, Base.Order.Forward) + (ii <= nnz(A) && rowVal[ii] == i) ? nzVal[ii] : zero(Tv) +end + +@propagate_inbounds function Base.getindex(A::GPUSparseDeviceMatrixCSR{Tv,Ti}, i::Integer, j::Integer) where {Tv,Ti} + @boundscheck checkbounds(A, i, j) + rowPtr, colVal, nzVal = A.rowPtr, A.colVal, A.nzVal + + # Range of possible col indices + rt = convert(Ti, @inbounds rowPtr[i]) + rb = convert(Ti, @inbounds rowPtr[i+1] - 1) + (rt > rb) && return zero(Tv) + + # possible_col = @view colVal[rt:rb] + jj = searchsortedfirst(colVal, convert(Ti, j), rt, rb, Base.Order.Forward) + (jj <= nnz(A) && colVal[jj] == j) ? nzVal[jj] : zero(Tv) +end + +## Adapted from CUDA.jl/blob/lib/cusparse/src/array.jl#L490 +# FIXME: Currently not correct +@propagate_inbounds function Base.getindex(A::GPUSparseDeviceMatrixCOO{Tv,Ti}, i::Integer, j::Integer) where {Tv,Ti} + # COO in CUDA is assumed to be sorted by row: https://docs.nvidia.com/cuda/cusparse/storage-formats.html?highlight=coo#coordinate-coo + # @boundscheck checkbounds(A, i, j) + rowInd, colInd, nzVal = A.rowInd, A.colInd, A.nzVal + + # Looking for the range s.t. rowInd[r1:r2] .== i + rl = searchsortedfirst(rowInd, i) + (rl > nnz(A) || rowInd[rl] > i) && return 42 + rr = min(searchsortedfirst(rowInd, i+1, Base.Order.Forward), nnz(A)) # searchsortedlast didn't behave as expected + # FIXME: colInd isn't sorted + jj = searchsortedfirst(colInd, j, rl, rr, Base.Order.Forward) + (jj > rr || jj == nnz(A) + 1 || colInd[jj] > j) && return zero(Tv) + + return jj +end + +# TODO: Support BSR format diff --git a/src/device/sparse.jl b/src/device/sparse.jl index b8346eaf..d315ccf2 100644 --- a/src/device/sparse.jl +++ b/src/device/sparse.jl @@ -106,6 +106,7 @@ Base.length(g::AbstractGPUSparseDeviceMatrix) = prod(g.dims) Base.size(g::AbstractGPUSparseDeviceMatrix) = g.dims SparseArrays.nnz(g::AbstractGPUSparseDeviceMatrix) = g.nnz SparseArrays.getnzval(g::AbstractGPUSparseDeviceMatrix) = g.nzVal +# FIXME: Implement `rowvals` to explicitly say why it's not available for CSR format? struct GPUSparseDeviceArrayCSR{Tv, Ti, Vi, Vv, N, M, A} <: AbstractSparseArray{Tv, Ti, N} rowPtr::Vi diff --git a/test/runtests.jl b/test/runtests.jl index a91715b9..ba5e40be 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -10,6 +10,7 @@ const init_worker_code = quote TestSuite.sparse_types(::Type{<:JLArray}) = (JLSparseVector, JLSparseMatrixCSC, JLSparseMatrixCSR) TestSuite.sparse_types(::Type{<:Array}) = (SparseVector, SparseMatrixCSC) + TestSuite.sparse_device_types(::Type{<:Array}) = (GPUSparseDeviceVector, GPUSparseDeviceMatrixCSC, GPUSparseDeviceMatrixCSR, GPUSparseDeviceMatrixCOO) # Disable Float16-related tests until JuliaGPU/KernelAbstractions#600 is resolved if isdefined(JLArrays.KernelAbstractions, :POCL) From 0878ad62d548b70012d2a62ceaa3b1e6716c951a Mon Sep 17 00:00:00 2001 From: Alonso Martinez Cisneros Date: Sun, 5 Apr 2026 06:33:34 +0200 Subject: [PATCH 02/10] Fixes for COO indices. It works now --- src/device/indexing.jl | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/src/device/indexing.jl b/src/device/indexing.jl index 16a06404..6878b066 100644 --- a/src/device/indexing.jl +++ b/src/device/indexing.jl @@ -7,7 +7,10 @@ Base.IndexStyle(::Type{GPUSparseDeviceVector}) = Base.IndexLinear() # Scalar indexing ## Adapted from SparseArrays.AbstractSparseVector -@propagate_inbounds function Base.getindex(v::GPUSparseDeviceVector{Tv,Ti}, i::Integer) where {Tv,Ti} +@propagate_inbounds function Base.getindex( + v::GPUSparseDeviceVector{Tv,Ti}, + i::Integer, +) where {Tv,Ti} @boundscheck checkbounds(v, i) m = nnz(v) nzind = nonzeroinds(v) @@ -20,10 +23,17 @@ end # TODO: Logical indexing # Indexing by colon not implemented. Non-scalar indexing would allocate in device code -@propagate_inbounds Base.getindex(A::AbstractGPUSparseDeviceMatrix, I::Tuple{Integer,Integer}) = getindex(A, I[1], I[2]) +@propagate_inbounds Base.getindex( + A::AbstractGPUSparseDeviceMatrix, + I::Tuple{Integer,Integer}, +) = getindex(A, I[1], I[2]) ## Adapted logic from SparseArrays.AbstractSparseMatrixCSC -@propagate_inbounds function Base.getindex(A::GPUSparseDeviceMatrixCSC{Tv,Ti}, i::Integer, j::Integer) where {Tv,Ti} +@propagate_inbounds function Base.getindex( + A::GPUSparseDeviceMatrixCSC{Tv,Ti}, + i::Integer, + j::Integer, +) where {Tv,Ti} @boundscheck checkbounds(A, i, j) colPtr, rowVal, nzVal = getcolptr(A), rowvals(A), nonzeros(A) @@ -32,12 +42,15 @@ end rr = convert(Ti, @inbounds colPtr[j+1] - 1) (rl > rr) && return zero(Tv) - # possible_row = @view rowVal[rl:rr] ii = searchsortedfirst(rowVal, convert(Ti, i), rl, rr, Base.Order.Forward) (ii <= nnz(A) && rowVal[ii] == i) ? nzVal[ii] : zero(Tv) end -@propagate_inbounds function Base.getindex(A::GPUSparseDeviceMatrixCSR{Tv,Ti}, i::Integer, j::Integer) where {Tv,Ti} +@propagate_inbounds function Base.getindex( + A::GPUSparseDeviceMatrixCSR{Tv,Ti}, + i::Integer, + j::Integer, +) where {Tv,Ti} @boundscheck checkbounds(A, i, j) rowPtr, colVal, nzVal = A.rowPtr, A.colVal, A.nzVal @@ -46,27 +59,28 @@ end rb = convert(Ti, @inbounds rowPtr[i+1] - 1) (rt > rb) && return zero(Tv) - # possible_col = @view colVal[rt:rb] jj = searchsortedfirst(colVal, convert(Ti, j), rt, rb, Base.Order.Forward) (jj <= nnz(A) && colVal[jj] == j) ? nzVal[jj] : zero(Tv) end ## Adapted from CUDA.jl/blob/lib/cusparse/src/array.jl#L490 -# FIXME: Currently not correct -@propagate_inbounds function Base.getindex(A::GPUSparseDeviceMatrixCOO{Tv,Ti}, i::Integer, j::Integer) where {Tv,Ti} +@propagate_inbounds function Base.getindex( + A::GPUSparseDeviceMatrixCOO{Tv,Ti}, + i::Integer, + j::Integer, +) where {Tv,Ti} # COO in CUDA is assumed to be sorted by row: https://docs.nvidia.com/cuda/cusparse/storage-formats.html?highlight=coo#coordinate-coo - # @boundscheck checkbounds(A, i, j) + @boundscheck checkbounds(A, i, j) rowInd, colInd, nzVal = A.rowInd, A.colInd, A.nzVal # Looking for the range s.t. rowInd[r1:r2] .== i rl = searchsortedfirst(rowInd, i) (rl > nnz(A) || rowInd[rl] > i) && return 42 rr = min(searchsortedfirst(rowInd, i+1, Base.Order.Forward), nnz(A)) # searchsortedlast didn't behave as expected - # FIXME: colInd isn't sorted jj = searchsortedfirst(colInd, j, rl, rr, Base.Order.Forward) (jj > rr || jj == nnz(A) + 1 || colInd[jj] > j) && return zero(Tv) - return jj + return nzVal[jj] end # TODO: Support BSR format From f7f210bd53533301665f9f06399974ccb8ea38af Mon Sep 17 00:00:00 2001 From: Alonso Martinez Cisneros Date: Tue, 7 Apr 2026 19:40:06 +0200 Subject: [PATCH 03/10] Implementing scalar indexing for BSR formatted sparse device arrays --- src/device/indexing.jl | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/device/indexing.jl b/src/device/indexing.jl index 6878b066..05f5d943 100644 --- a/src/device/indexing.jl +++ b/src/device/indexing.jl @@ -83,4 +83,22 @@ end return nzVal[jj] end -# TODO: Support BSR format +## Adapted from CUDA.jl/blob/lib/cusparse/src/array.jl#L500 +@propagate_inbounds function Base.getindex( + A::GPUSparseDeviceMatrixBSR{Tv,Ti}, + i::Integer, + j::Integer, +) where {Tv,Ti} + @boundscheck checkbounds(A, i, j) + rowPtr, colVal, nzVal = A.rowPtr, A.colVal, A.nzVal + + i_block, i_idx = fldmod1(i, A.blockDim) + j_block, j_idx = fldmod1(j, A.blockDim) + block_idx = (i_idx-1) * A.blockDim + j_idx - 1 + c1 = convert(Ti, rowPtr[i_block]) + c1 = convert(Ti, rowPtr[i_block+1]-1) + (c1 > c2) && return zero(Tv) + c1 = searchsortedfirst(colVal, j_block, c1, c2, Base.Order.Forward) + (c1 > c2 || colVal[c1] != j_block) && return zero(Tv) + nzVal[c1+block_idx] +end From 0f2213c9548dac2122623c04fb40d24f34ab5896 Mon Sep 17 00:00:00 2001 From: Alonso Martinez Cisneros Date: Wed, 8 Apr 2026 20:37:03 +0200 Subject: [PATCH 04/10] Implementing logical (scalar) indexing --- src/device/indexing.jl | 54 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 4 deletions(-) diff --git a/src/device/indexing.jl b/src/device/indexing.jl index 05f5d943..13b05e8f 100644 --- a/src/device/indexing.jl +++ b/src/device/indexing.jl @@ -4,9 +4,9 @@ using Base: @propagate_inbounds Base.IndexStyle(::Type{GPUSparseDeviceVector}) = Base.IndexLinear() -# Scalar indexing -## Adapted from SparseArrays.AbstractSparseVector +# Implementing only scalar indexing. Non-scalar indexing would allocate in device code +## Adapted from SparseArrays.AbstractSparseVector @propagate_inbounds function Base.getindex( v::GPUSparseDeviceVector{Tv,Ti}, i::Integer, @@ -20,9 +20,55 @@ Base.IndexStyle(::Type{GPUSparseDeviceVector}) = Base.IndexLinear() (ii <= m && nzind[ii] == i) ? nzval[ii] : zero(Tv) end -# TODO: Logical indexing +# Logical getindex +@propagate_inbounds function Base.getindex( + v::GPUSparseDeviceVector, + I::AbstractVector{Bool}, +) + isone(count(I)) ? v[findfirst(I)] : + error( + "Logical index contains more than one true value. Device arrays only support scalar indexing.", + ) +end + +@propagate_inbounds function Base.getindex( + A::AbstractGPUSparseDeviceMatrix, + i::Integer, + J::AbstractVector{Bool}, +) + isone(count(J)) ? A[i, findfirst(J)] : + error( + "Logical index contains more than one true value. Device arrays only support scalar indexing.", + ) +end + +@propagate_inbounds function Base.getindex( + A::AbstractGPUSparseDeviceMatrix, + I::AbstractVector{Bool}, + j::Integer, +) + isone(count(I)) ? A[findfirst(I), j] : + error( + "Logical index contains more than one true value. Device arrays only support scalar indexing.", + ) +end + +@propagate_inbounds function Base.getindex( + A::AbstractGPUSparseDeviceMatrix, + I::AbstractVector{Bool}, + J::AbstractVector{Bool}, +) + if (isone(count(I)) && isone(count(J))) + A[findfirst(I), findfirst(J)] + else + failing_len, failing_idx = findmax((count(I), count(J))) + error( + "Logical index $(failing_idx == 1 ? "I" : "J") contains $(failing_len) true " * + "values. Device arrays only support scalar indexing.", + ) + end +end -# Indexing by colon not implemented. Non-scalar indexing would allocate in device code @propagate_inbounds Base.getindex( A::AbstractGPUSparseDeviceMatrix, I::Tuple{Integer,Integer}, From 2bfb0127c063c76f496db420db0c169c45d375ad Mon Sep 17 00:00:00 2001 From: Alonso Martinez Cisneros Date: Tue, 14 Apr 2026 20:31:25 +0200 Subject: [PATCH 05/10] Fixing incorrect indexing in BSR and COO matrices --- src/device/indexing.jl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/device/indexing.jl b/src/device/indexing.jl index 13b05e8f..c43b5f63 100644 --- a/src/device/indexing.jl +++ b/src/device/indexing.jl @@ -115,15 +115,17 @@ end i::Integer, j::Integer, ) where {Tv,Ti} - # COO in CUDA is assumed to be sorted by row: https://docs.nvidia.com/cuda/cusparse/storage-formats.html?highlight=coo#coordinate-coo + # COO in CUDA is assumed to be sorted by row: + #https://docs.nvidia.com/cuda/cusparse/storage-formats.html?highlight=coo#coordinate-coo @boundscheck checkbounds(A, i, j) rowInd, colInd, nzVal = A.rowInd, A.colInd, A.nzVal # Looking for the range s.t. rowInd[r1:r2] .== i - rl = searchsortedfirst(rowInd, i) - (rl > nnz(A) || rowInd[rl] > i) && return 42 - rr = min(searchsortedfirst(rowInd, i+1, Base.Order.Forward), nnz(A)) # searchsortedlast didn't behave as expected - jj = searchsortedfirst(colInd, j, rl, rr, Base.Order.Forward) + rl = searchsortedfirst(rowInd, i, Base.Order.Forward) + (rl > nnz(A) || rowInd[rl] > i) && return zero(Tv) + rr = min(searchsortedfirst(rowInd, i+1, Base.Order.Forward), nnz(A)) + # Important to exclude rr, as including it un-sorts colInd[rl:rr] + jj = searchsortedfirst(colInd, j, rl, rr-1, Base.Order.Forward) (jj > rr || jj == nnz(A) + 1 || colInd[jj] > j) && return zero(Tv) return nzVal[jj] @@ -142,7 +144,7 @@ end j_block, j_idx = fldmod1(j, A.blockDim) block_idx = (i_idx-1) * A.blockDim + j_idx - 1 c1 = convert(Ti, rowPtr[i_block]) - c1 = convert(Ti, rowPtr[i_block+1]-1) + c2 = convert(Ti, rowPtr[i_block+1]-1) (c1 > c2) && return zero(Tv) c1 = searchsortedfirst(colVal, j_block, c1, c2, Base.Order.Forward) (c1 > c2 || colVal[c1] != j_block) && return zero(Tv) From bbde7937f8008f77891afe3da99dad4f35ebe41f Mon Sep 17 00:00:00 2001 From: Alonso Martinez Cisneros Date: Mon, 27 Apr 2026 14:04:20 +0200 Subject: [PATCH 06/10] Fixing assumption in COO index that colInd is sorted --- src/device/indexing.jl | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/device/indexing.jl b/src/device/indexing.jl index c43b5f63..482297f8 100644 --- a/src/device/indexing.jl +++ b/src/device/indexing.jl @@ -74,6 +74,16 @@ end I::Tuple{Integer,Integer}, ) = getindex(A, I[1], I[2]) +# Scalar getindex methods linear-scan the minor axis rather than binary-searching +# and sum across matching entries. cuSPARSE formats don't guarantee sorted indices +# within a major-axis slice (e.g. SpGEMM output may leave CSR columns unsorted +# within a row, and COO is only guaranteed row-sorted), nor uniqueness — duplicate +# (i, j) entries are permitted and their values sum, matching the convention of +# Julia's `sparse()` constructor and SciPy/CuPy. For Bool we OR instead of sum, +# also matching `sparse()`, since Bool + Bool doesn't stay Bool. +sum_duplicate(a, b) = a + b +sum_duplicate(a::Bool, b::Bool) = a | b + ## Adapted logic from SparseArrays.AbstractSparseMatrixCSC @propagate_inbounds function Base.getindex( A::GPUSparseDeviceMatrixCSC{Tv,Ti}, @@ -115,7 +125,7 @@ end i::Integer, j::Integer, ) where {Tv,Ti} - # COO in CUDA is assumed to be sorted by row: + # COO in CUDA is assumed to be sorted by row (not col): #https://docs.nvidia.com/cuda/cusparse/storage-formats.html?highlight=coo#coordinate-coo @boundscheck checkbounds(A, i, j) rowInd, colInd, nzVal = A.rowInd, A.colInd, A.nzVal @@ -125,10 +135,12 @@ end (rl > nnz(A) || rowInd[rl] > i) && return zero(Tv) rr = min(searchsortedfirst(rowInd, i+1, Base.Order.Forward), nnz(A)) # Important to exclude rr, as including it un-sorts colInd[rl:rr] - jj = searchsortedfirst(colInd, j, rl, rr-1, Base.Order.Forward) - (jj > rr || jj == nnz(A) + 1 || colInd[jj] > j) && return zero(Tv) - - return nzVal[jj] + # Column is not guaranteed to be sorted. We linear scan + result = zero(Tv) + for k in rl:rl + A.colInd[k] == i && (result = sum_duplicate(result, nonzeros(A)[k])) + end + return result end ## Adapted from CUDA.jl/blob/lib/cusparse/src/array.jl#L500 From 9f76958d0c384516918e9887d4d4edd669406673 Mon Sep 17 00:00:00 2001 From: Alonso Martinez Cisneros Date: Mon, 27 Apr 2026 14:12:21 +0200 Subject: [PATCH 07/10] Fixing indexing assumption for BSR format --- src/device/indexing.jl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/device/indexing.jl b/src/device/indexing.jl index 482297f8..359d9fd4 100644 --- a/src/device/indexing.jl +++ b/src/device/indexing.jl @@ -150,15 +150,15 @@ end j::Integer, ) where {Tv,Ti} @boundscheck checkbounds(A, i, j) - rowPtr, colVal, nzVal = A.rowPtr, A.colVal, A.nzVal i_block, i_idx = fldmod1(i, A.blockDim) j_block, j_idx = fldmod1(j, A.blockDim) block_idx = (i_idx-1) * A.blockDim + j_idx - 1 - c1 = convert(Ti, rowPtr[i_block]) - c2 = convert(Ti, rowPtr[i_block+1]-1) - (c1 > c2) && return zero(Tv) - c1 = searchsortedfirst(colVal, j_block, c1, c2, Base.Order.Forward) - (c1 > c2 || colVal[c1] != j_block) && return zero(Tv) - nzVal[c1+block_idx] + c1 = convert(Ti, A.rowPtr[i_block]) + c2 = convert(Ti, A.rowPtr[i_block+1]-1) + result = zero(T) + for k in c1:c2 + A.colVal[k] == i_block && (result == sum_duplicate(result, nonzeros(a)[k+block_idx])) + end + return result end From 8298dbfc0f34081d507c9a48a9f8415f09b5bee3 Mon Sep 17 00:00:00 2001 From: Alonso Martinez Cisneros Date: Tue, 28 Apr 2026 11:47:27 +0200 Subject: [PATCH 08/10] Initial testing for device-side sparse array indexing --- src/device/indexing.jl | 23 +++++++++++++++ test/runtests.jl | 3 +- test/testsuite.jl | 26 ++++++++--------- test/testsuite/sparse.jl | 63 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 99 insertions(+), 16 deletions(-) diff --git a/src/device/indexing.jl b/src/device/indexing.jl index 359d9fd4..4c2893b4 100644 --- a/src/device/indexing.jl +++ b/src/device/indexing.jl @@ -20,6 +20,29 @@ Base.IndexStyle(::Type{GPUSparseDeviceVector}) = Base.IndexLinear() (ii <= m && nzind[ii] == i) ? nzval[ii] : zero(Tv) end +# @propagate_inbounds function Base.setindex!( +# x::GPUSparseDeviceVector{Tv,Ti}, +# v::Tv, +# i::Integer, +# ) where {Tv,Ti} +# @boundscheck checkbounds(x, i) +# m = nnz(x) +# nzind = nonzeroinds(x) +# nzval = nonzeros(x) + +# ii = searchsortedfirst(nzind, convert(Ti, i)) +# # (ii <= m && nzind[ii] == i) ? nzval[ii] : zero(Tv) +# if 1 <= ii <= m && nzind[ii] == i +# nzval[ii] = v +# else +# # nzval[ii] = convert(Tv, 43) +# if v !== zero(Tv) +# # insert!(nzind, ii, convert(Tv, i)) +# insert!(nzind, ii, v) +# end +# end +# end + # Logical getindex @propagate_inbounds function Base.getindex( v::GPUSparseDeviceVector, diff --git a/test/runtests.jl b/test/runtests.jl index ba5e40be..e9161675 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,8 +9,7 @@ const init_worker_code = quote include("testsuite.jl") TestSuite.sparse_types(::Type{<:JLArray}) = (JLSparseVector, JLSparseMatrixCSC, JLSparseMatrixCSR) - TestSuite.sparse_types(::Type{<:Array}) = (SparseVector, SparseMatrixCSC) - TestSuite.sparse_device_types(::Type{<:Array}) = (GPUSparseDeviceVector, GPUSparseDeviceMatrixCSC, GPUSparseDeviceMatrixCSR, GPUSparseDeviceMatrixCOO) + TestSuite.sparse_types(::Type{<:Array}) = (SparseVector, SparseMatrixCSC) # TODO: Add CSR, COO, etc formats # Disable Float16-related tests until JuliaGPU/KernelAbstractions#600 is resolved if isdefined(JLArrays.KernelAbstractions, :POCL) diff --git a/test/testsuite.jl b/test/testsuite.jl index e4aea4f7..ade189a7 100644 --- a/test/testsuite.jl +++ b/test/testsuite.jl @@ -100,20 +100,20 @@ macro testsuite(name, ex) end end -include("testsuite/construction.jl") -include("testsuite/indexing.jl") -include("testsuite/base.jl") -include("testsuite/vector.jl") -include("testsuite/reductions.jl") -include("testsuite/broadcasting.jl") -include("testsuite/linalg.jl") -include("testsuite/math.jl") -include("testsuite/random.jl") -include("testsuite/uniformscaling.jl") -include("testsuite/statistics.jl") +# include("testsuite/construction.jl") +# include("testsuite/indexing.jl") +# include("testsuite/base.jl") +# include("testsuite/vector.jl") +# include("testsuite/reductions.jl") +# include("testsuite/broadcasting.jl") +# include("testsuite/linalg.jl") +# include("testsuite/math.jl") +# include("testsuite/random.jl") +# include("testsuite/uniformscaling.jl") +# include("testsuite/statistics.jl") include("testsuite/sparse.jl") -include("testsuite/alloc_cache.jl") -include("testsuite/jld2ext.jl") +# include("testsuite/alloc_cache.jl") +# include("testsuite/jld2ext.jl") """ Runs the entire GPUArrays test suite on array type `AT` diff --git a/test/testsuite/sparse.jl b/test/testsuite/sparse.jl index 3e0d92fe..753dc497 100644 --- a/test/testsuite/sparse.jl +++ b/test/testsuite/sparse.jl @@ -20,6 +20,48 @@ end using SparseArrays using SparseArrays: nonzeroinds, nonzeros, rowvals +@kernel function linearize_kernel!(dst, @Const(src)) + I = @index(Global, Linear) + @inbounds dst[I] = src[I] +end + +function linearize!(dst, src) + backend = get_backend(src) + @assert length(dst) == length(src) + + kernel = linearize_kernel!(backend) + kernel(dst, src, ndrange=length(dst)) + return +end + +@kernel function singleindex_kernel!(dst, @Const(src), i) + I = @index(Global, Linear) + @inbounds dst[1] = src[i] +end + +function singleindex!(dst, src::AbstractSparseVector, i::Integer) + backend = get_backend(src) + @assert length(dst) == 1 + checkbounds(src, i) + kernel = singleindex_kernel!(backend) + kernel(dst, src, i, ndrange=1) + return +end + +@kernel function singleindex_kernel!(dst, @Const(src), i, j) + I = @index(Global, Linear) + @inbounds dst[1] = src[i, j] +end + +function singleindex!(dst, src::AbstractSparseMatrix, i::Integer, j::Integer) + backend = get_backend(src) + @assert length(dst) == 1 + checkbounds(src, i) + kernel = singleindex_kernel!(backend) + kernel(dst, src, i, j, ndrange=1) + return +end + function vector(AT, eltypes) dense_AT = GPUArrays.dense_array_type(AT) for ET in eltypes @@ -38,6 +80,7 @@ function vector(AT, eltypes) @test ndims(d_x) == 1 dense_d_x = dense_AT(x) dense_d_x2 = dense_AT(d_x) + # Indexing @allowscalar begin @test Array(d_x[:]) == x[:] @test d_x[firstindex(d_x)] == x[firstindex(x)] @@ -54,6 +97,13 @@ function vector(AT, eltypes) @test Array(nonzeroinds(d_x)) == nonzeroinds(x) @test Array(rowvals(d_x)) == nonzeroinds(x) @test nnz(d_x) == length(nonzeros(d_x)) + # Device-side (scalar) indexing + dst = zeros(ET, size(d_x)) + linearize!(dst, d_x) + @test x == dst + dst_single = zeros(ET, 1) + singleindex!(dst_single, d_x, 17) + @test only(dst_single) == x[17] end end end @@ -77,7 +127,7 @@ end function matrix(AT, eltypes) dense_AT = GPUArrays.dense_array_type(AT) for ET in eltypes - @testset "Sparse matrix properties($ET)" begin + @testset "$(AT) properties($ET)" begin m = 25 n = 35 k = 10 @@ -98,6 +148,7 @@ function matrix(AT, eltypes) @test size(d_x, 2) == n @test size(d_x, 3) == 1 @test ndims(d_x) == 2 + # Indexing @allowscalar begin @test d_x[:, :] == x[:, :] @test d_tx[:, :] == transpose(x)[:, :] @@ -127,6 +178,16 @@ function matrix(AT, eltypes) @test nnz(d_x) == length(nonzeros(d_x)) @test !issymmetric(d_x) @test !ishermitian(d_x) + # Device-side (scalar) indexing + x = sprand(ET, m, n, 0.2) + d_x = AT(x) + dst = zeros(ET, length(d_x)) + linearize!(dst, d_x) + @test Array(x[:]) == dst + dst_single = zeros(ET, 1) + i, j = floor(Int, m * 0.5), floor(Int, n * 0.5) + singleindex!(dst_single, d_x, i, j) + @test only(dst_single) == x[i, j] end end end From 6269c9d999114f29c83623f567c1c472f66d8fd5 Mon Sep 17 00:00:00 2001 From: Alonso Martinez Cisneros Date: Tue, 28 Apr 2026 11:48:30 +0200 Subject: [PATCH 09/10] Removing comment --- src/device/indexing.jl | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/src/device/indexing.jl b/src/device/indexing.jl index 4c2893b4..359d9fd4 100644 --- a/src/device/indexing.jl +++ b/src/device/indexing.jl @@ -20,29 +20,6 @@ Base.IndexStyle(::Type{GPUSparseDeviceVector}) = Base.IndexLinear() (ii <= m && nzind[ii] == i) ? nzval[ii] : zero(Tv) end -# @propagate_inbounds function Base.setindex!( -# x::GPUSparseDeviceVector{Tv,Ti}, -# v::Tv, -# i::Integer, -# ) where {Tv,Ti} -# @boundscheck checkbounds(x, i) -# m = nnz(x) -# nzind = nonzeroinds(x) -# nzval = nonzeros(x) - -# ii = searchsortedfirst(nzind, convert(Ti, i)) -# # (ii <= m && nzind[ii] == i) ? nzval[ii] : zero(Tv) -# if 1 <= ii <= m && nzind[ii] == i -# nzval[ii] = v -# else -# # nzval[ii] = convert(Tv, 43) -# if v !== zero(Tv) -# # insert!(nzind, ii, convert(Tv, i)) -# insert!(nzind, ii, v) -# end -# end -# end - # Logical getindex @propagate_inbounds function Base.getindex( v::GPUSparseDeviceVector, From 3aac29c028a56ea2255a43d51b043019f59b6554 Mon Sep 17 00:00:00 2001 From: Alonso Martinez Cisneros Date: Tue, 28 Apr 2026 12:01:46 +0200 Subject: [PATCH 10/10] Reintroducing erroneously skipped tests --- test/testsuite.jl | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/test/testsuite.jl b/test/testsuite.jl index ade189a7..e4aea4f7 100644 --- a/test/testsuite.jl +++ b/test/testsuite.jl @@ -100,20 +100,20 @@ macro testsuite(name, ex) end end -# include("testsuite/construction.jl") -# include("testsuite/indexing.jl") -# include("testsuite/base.jl") -# include("testsuite/vector.jl") -# include("testsuite/reductions.jl") -# include("testsuite/broadcasting.jl") -# include("testsuite/linalg.jl") -# include("testsuite/math.jl") -# include("testsuite/random.jl") -# include("testsuite/uniformscaling.jl") -# include("testsuite/statistics.jl") +include("testsuite/construction.jl") +include("testsuite/indexing.jl") +include("testsuite/base.jl") +include("testsuite/vector.jl") +include("testsuite/reductions.jl") +include("testsuite/broadcasting.jl") +include("testsuite/linalg.jl") +include("testsuite/math.jl") +include("testsuite/random.jl") +include("testsuite/uniformscaling.jl") +include("testsuite/statistics.jl") include("testsuite/sparse.jl") -# include("testsuite/alloc_cache.jl") -# include("testsuite/jld2ext.jl") +include("testsuite/alloc_cache.jl") +include("testsuite/jld2ext.jl") """ Runs the entire GPUArrays test suite on array type `AT`