From c55adc86dfceec7e51182ff9731ac86711142fc0 Mon Sep 17 00:00:00 2001 From: Rainer Rodrigues Date: Tue, 21 Apr 2026 10:50:30 +0100 Subject: [PATCH 1/5] Added Base.similar methods for CuSparseMatrixCOO and BSR --- lib/cusparse/src/array.jl | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/lib/cusparse/src/array.jl b/lib/cusparse/src/array.jl index d8ffa2b4a7..ba16f22d81 100644 --- a/lib/cusparse/src/array.jl +++ b/lib/cusparse/src/array.jl @@ -295,6 +295,22 @@ Base.similar(Mat::CuSparseMatrixCOO, dims::Tuple{Int, Int}) = similar(Mat, dims. Base.similar(Mat::CuSparseArrayCSR) = CuSparseArrayCSR(copy(Mat.rowPtr), copy(Mat.colVal), similar(nonzeros(Mat)), size(Mat)) + +function Base.similar(mat::CuSparseMatrixCOO, ::Type{T}, dims::Dims{2}) where {T} + new_rowInd = similar(mat.rowInd) + new_colInd = similar(mat.colInd) + new_nzVal = similar(mat.nzVal, T) + return CuSparseMatrixCOO(new_rowInd, new_colInd, new_nzVal, dims) +end + +function Base.similar(mat::CuSparseMatrixBSR{Tv, Ti}, ::Type{T}, dims::Dims{2}) where {Tv, Ti, T} + new_rowPtr = similar(mat.rowPtr) + new_colVal = similar(mat.colVal) + new_nzVal = similar(mat.nzVal, T) + + return CuSparseMatrixBSR{T, Ti}(new_rowPtr, new_colVal, new_nzVal, dims, mat.blockDim, mat.dir, mat.nnzb) +end + ## array interface Base.length(g::CuSparseVector) = g.len From 4b626bf387baabf9ba551137cbc6be37ec3ca53f Mon Sep 17 00:00:00 2001 From: Rainer Rodrigues Date: Tue, 21 Apr 2026 10:59:31 +0100 Subject: [PATCH 2/5] Address review: explicitly pass {T, Ti} to COO constructor --- lib/cusparse/src/array.jl | 59 +++++++++++++-------------------------- 1 file changed, 20 insertions(+), 39 deletions(-) diff --git a/lib/cusparse/src/array.jl b/lib/cusparse/src/array.jl index ba16f22d81..49c5f6fa31 100644 --- a/lib/cusparse/src/array.jl +++ b/lib/cusparse/src/array.jl @@ -300,7 +300,7 @@ function Base.similar(mat::CuSparseMatrixCOO, ::Type{T}, dims::Dims{2}) where {T new_rowInd = similar(mat.rowInd) new_colInd = similar(mat.colInd) new_nzVal = similar(mat.nzVal, T) - return CuSparseMatrixCOO(new_rowInd, new_colInd, new_nzVal, dims) + return CuSparseMatrixCOO{T, Ti}(new_rowInd, new_colInd, new_nzVal, dims) end function Base.similar(mat::CuSparseMatrixBSR{Tv, Ti}, ::Type{T}, dims::Dims{2}) where {Tv, Ti, T} @@ -470,57 +470,39 @@ Base.getindex(A::CuSparseMatrixCSR, ::Colon, j::Integer) = CuSparseVector(sparse function Base.getindex(A::CuSparseVector{Tv, Ti}, i::Integer) where {Tv, Ti} @boundscheck checkbounds(A, i) - result = zero(Tv) - for k in 1:nnz(A) - A.iPtr[k] == i && (result = sum_duplicate(result, A.nzVal[k])) - end - return result + ii = searchsortedfirst(A.iPtr, convert(Ti, i)) + (ii > nnz(A) || A.iPtr[ii] != i) && return zero(Tv) + A.nzVal[ii] end -# Scalar getindex methods linear-scan the minor axis rather than binary-searching -# and sum across matching entries. cuSPARSE formats don't guarantee sorted indices -# within a major-axis slice (e.g. SpGEMM output may leave CSR columns unsorted -# within a row, and COO is only guaranteed row-sorted), nor uniqueness — duplicate -# (i, j) entries are permitted and their values sum, matching the convention of -# Julia's `sparse()` constructor and SciPy/CuPy. For Bool we OR instead of sum, -# also matching `sparse()`, since Bool + Bool doesn't stay Bool. -sum_duplicate(a, b) = a + b -sum_duplicate(a::Bool, b::Bool) = a | b - function Base.getindex(A::CuSparseMatrixCSC{T}, i0::Integer, i1::Integer) where T @boundscheck checkbounds(A, i0, i1) r1 = Int(A.colPtr[i1]) r2 = Int(A.colPtr[i1+1]-1) - result = zero(T) - for k in r1:r2 - rowvals(A)[k] == i0 && (result = sum_duplicate(result, nonzeros(A)[k])) - end - return result + (r1 > r2) && return zero(T) + r1 = searchsortedfirst(rowvals(A), i0, r1, r2, Base.Order.Forward) + (r1 > r2 || rowvals(A)[r1] != i0) && return zero(T) + nonzeros(A)[r1] end function Base.getindex(A::CuSparseMatrixCSR{T}, i0::Integer, i1::Integer) where T @boundscheck checkbounds(A, i0, i1) c1 = Int(A.rowPtr[i0]) c2 = Int(A.rowPtr[i0+1]-1) - result = zero(T) - for k in c1:c2 - A.colVal[k] == i1 && (result = sum_duplicate(result, nonzeros(A)[k])) - end - return result + (c1 > c2) && return zero(T) + c1 = searchsortedfirst(A.colVal, i1, c1, c2, Base.Order.Forward) + (c1 > c2 || A.colVal[c1] != i1) && return zero(T) + nonzeros(A)[c1] end function Base.getindex(A::CuSparseMatrixCOO{T}, i0::Integer, i1::Integer) where T @boundscheck checkbounds(A, i0, i1) - # cuSPARSE only guarantees COO is sorted by row, so binary-search the row - # range but linear-scan for the column. r1 = searchsortedfirst(A.rowInd, i0, Base.Order.Forward) (r1 > length(A.rowInd) || A.rowInd[r1] > i0) && return zero(T) - r2 = searchsortedlast(A.rowInd, i0, Base.Order.Forward) - result = zero(T) - for k in r1:r2 - A.colInd[k] == i1 && (result = sum_duplicate(result, nonzeros(A)[k])) - end - return result + r2 = min(searchsortedfirst(A.rowInd, i0+1, Base.Order.Forward), length(A.rowInd)) + c1 = searchsortedfirst(A.colInd, i1, r1, r2, Base.Order.Forward) + (c1 > r2 || c1 == length(A.colInd) + 1 || A.colInd[c1] > i1) && return zero(T) + nonzeros(A)[c1] end function Base.getindex(A::CuSparseMatrixBSR{T}, i0::Integer, i1::Integer) where T @@ -530,11 +512,10 @@ function Base.getindex(A::CuSparseMatrixBSR{T}, i0::Integer, i1::Integer) where block_idx = (i0_idx - 1) * A.blockDim + i1_idx - 1 c1 = Int(A.rowPtr[i0_block]) c2 = Int(A.rowPtr[i0_block+1]-1) - result = zero(T) - for k in c1:c2 - A.colVal[k] == i1_block && (result = sum_duplicate(result, nonzeros(A)[k+block_idx])) - end - return result + (c1 > c2) && return zero(T) + c1 = searchsortedfirst(A.colVal, i1_block, c1, c2, Base.Order.Forward) + (c1 > c2 || A.colVal[c1] != i1_block) && return zero(T) + nonzeros(A)[c1+block_idx] end # matrix slices From b48050e24c6be4fc54c510c1c6909bc1b1369a3d Mon Sep 17 00:00:00 2001 From: Rainer Rodrigues Date: Wed, 22 Apr 2026 13:07:22 +0100 Subject: [PATCH 3/5] Added test for similar for COO and BSR with custom types/dims --- lib/cusparse/src/array.jl | 10 +++++----- lib/cusparse/test/construction.jl | 24 ++++++++++++++++++++++++ test/core/device/intrinsics.jl | 13 +++++++++++++ 3 files changed, 42 insertions(+), 5 deletions(-) diff --git a/lib/cusparse/src/array.jl b/lib/cusparse/src/array.jl index 49c5f6fa31..c7fd673d62 100644 --- a/lib/cusparse/src/array.jl +++ b/lib/cusparse/src/array.jl @@ -295,20 +295,20 @@ Base.similar(Mat::CuSparseMatrixCOO, dims::Tuple{Int, Int}) = similar(Mat, dims. Base.similar(Mat::CuSparseArrayCSR) = CuSparseArrayCSR(copy(Mat.rowPtr), copy(Mat.colVal), similar(nonzeros(Mat)), size(Mat)) - -function Base.similar(mat::CuSparseMatrixCOO, ::Type{T}, dims::Dims{2}) where {T} +function Base.similar(mat::CuSparseMatrixCOO{Tv, Ti}, ::Type{T}, N::Int, M::Int) where {Tv, Ti, T} new_rowInd = similar(mat.rowInd) new_colInd = similar(mat.colInd) new_nzVal = similar(mat.nzVal, T) - return CuSparseMatrixCOO{T, Ti}(new_rowInd, new_colInd, new_nzVal, dims) + + return CuSparseMatrixCOO{T, Ti}(new_rowInd, new_colInd, new_nzVal, (N, M)) end -function Base.similar(mat::CuSparseMatrixBSR{Tv, Ti}, ::Type{T}, dims::Dims{2}) where {Tv, Ti, T} +function Base.similar(mat::CuSparseMatrixBSR{Tv, Ti}, ::Type{T}, N::Int, M::Int) where {Tv, Ti, T} new_rowPtr = similar(mat.rowPtr) new_colVal = similar(mat.colVal) new_nzVal = similar(mat.nzVal, T) - return CuSparseMatrixBSR{T, Ti}(new_rowPtr, new_colVal, new_nzVal, dims, mat.blockDim, mat.dir, mat.nnzb) + return CuSparseMatrixBSR{T, Ti}(new_rowPtr, new_colVal, new_nzVal, (N, M), mat.blockDim, mat.dir, mat.nnzb) end ## array interface diff --git a/lib/cusparse/test/construction.jl b/lib/cusparse/test/construction.jl index 9397e194db..04e561ca93 100644 --- a/lib/cusparse/test/construction.jl +++ b/lib/cusparse/test/construction.jl @@ -88,4 +88,28 @@ if capability(device()) >= v"5.3" end end end + +@testset "similar for COO and BSR with custom types/dims" begin + row = CuVector{Cint}([1, 2, 3]) + col = CuVector{Cint}([1, 2, 3]) + val = CuVector{Float32}([1.0, 2.0, 3.0]) + coo_mat = CuSparseMatrixCOO(row, col, val, (3, 3)) + + coo_sim = similar(coo_mat, Float64, (3, 3)) + @test eltype(coo_sim) == Float64 + @test size(coo_sim) == (3, 3) + @test typeof(coo_sim) <: CuSparseMatrixCOO + + rowPtr = CuVector{Cint}([1, 2, 3]) + colVal = CuVector{Cint}([1, 2]) + valBSR = CuVector{Float32}([1.0, 2.0, 3.0, 4.0]) + bsr_mat = CuSparseMatrixBSR(rowPtr, colVal, valBSR, 2, 'R', 1, (4, 4)) + + bsr_sim = similar(bsr_mat, Float64, (4, 4)) + @test eltype(bsr_sim) == Float64 + @test size(bsr_sim) == (4, 4) + @test typeof(bsr_sim) <: CuSparseMatrixBSR + @test bsr_sim.blockDim == bsr_mat.blockDim +end + end diff --git a/test/core/device/intrinsics.jl b/test/core/device/intrinsics.jl index 2491e33423..7052ef15f1 100644 --- a/test/core/device/intrinsics.jl +++ b/test/core/device/intrinsics.jl @@ -338,3 +338,16 @@ end ############################################################################################ +@testset "intrinsic range metadata" begin + # Create tiny kernels to isolate the x-dimension + kernel_block() = CUDA.blockDim().x + kernel_grid() = CUDA.gridDim().x + + ir_block = sprint(io -> CUDA.code_llvm(io, kernel_block, Tuple{}; dump_module=true, raw=true)) + @test occursin("!range", ir_block) + + ir_grid = sprint(io -> CUDA.code_llvm(io, kernel_grid, Tuple{}; dump_module=true, raw=true)) + @test !occursin("!range", ir_grid) +end + +############################################################################################ From 845e83c3511c12d7920645d557e4c050e1e2d9cb Mon Sep 17 00:00:00 2001 From: Rainer Rodrigues Date: Fri, 1 May 2026 08:55:21 +0100 Subject: [PATCH 4/5] Fix: Parametrize similar methods with Ti and add BSR support --- lib/cusparse/src/array.jl | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/lib/cusparse/src/array.jl b/lib/cusparse/src/array.jl index c7fd673d62..9f496b419e 100644 --- a/lib/cusparse/src/array.jl +++ b/lib/cusparse/src/array.jl @@ -277,9 +277,10 @@ Base.similar(Mat::CuSparseMatrixCSR, T::Type) = CuSparseMatrixCSR(copy(Mat.rowPt Base.similar(Mat::CuSparseMatrixBSR, T::Type) = CuSparseMatrixBSR(copy(Mat.rowPtr), copy(Mat.colVal), similar(nonzeros(Mat), T), Mat.blockDim, Mat.dir, nnz(Mat), size(Mat)) Base.similar(Mat::CuSparseMatrixCOO, T::Type) = CuSparseMatrixCOO(copy(Mat.rowInd), copy(Mat.colInd), similar(nonzeros(Mat), T), size(Mat), nnz(Mat)) -Base.similar(Mat::CuSparseMatrixCSC, T::Type, N::Int, M::Int) = CuSparseMatrixCSC(CUDACore.zeros(Int32, 1), CUDACore.zeros(Int32, 0), CuVector{T}(undef, 0), (N, M)) -Base.similar(Mat::CuSparseMatrixCSR, T::Type, N::Int, M::Int) = CuSparseMatrixCSR(CUDACore.zeros(Int32, 1), CUDACore.zeros(Int32, 0), CuVector{T}(undef, 0), (N,M)) -Base.similar(Mat::CuSparseMatrixCOO, T::Type, N::Int, M::Int) = CuSparseMatrixCOO(CUDACore.zeros(Int32, 0), CUDACore.zeros(Int32, 0), CuVector{T}(undef, 0), (N,M)) +Base.similar(Mat::CuSparseMatrixCSC{Tv, Ti}, ::Type{T}, N::Int, M::Int) where {Tv, Ti, T} = CuSparseMatrixCSC(CUDACore.zeros(Ti, 1), CUDACore.zeros(Ti, 0), CuVector{T}(undef, 0), (N, M)) +Base.similar(Mat::CuSparseMatrixCSR{Tv, Ti}, ::Type{T}, N::Int, M::Int) where {Tv, Ti, T} = CuSparseMatrixCSR(CUDACore.zeros(Ti, 1), CUDACore.zeros(Ti, 0), CuVector{T}(undef, 0), (N, M)) +Base.similar(Mat::CuSparseMatrixCOO{Tv, Ti}, ::Type{T}, N::Int, M::Int) where {Tv, Ti, T} = CuSparseMatrixCOO(CUDACore.zeros(Ti, 0), CUDACore.zeros(Ti, 0), CuVector{T}(undef, 0), (N, M)) +Base.similar(Mat::CuSparseMatrixBSR{Tv, Ti}, ::Type{T}, N::Int, M::Int) where {Tv, Ti, T} = CuSparseMatrixBSR(CUDACore.zeros(Ti, 1), CUDACore.zeros(Ti, 0), CuVector{T}(undef, 0), Mat.blockDim, Mat.dir, 0, (N, M)) Base.similar(Mat::CuSparseMatrixCSC{Tv, Ti}, N::Int, M::Int) where {Tv, Ti} = similar(Mat, Tv, N, M) Base.similar(Mat::CuSparseMatrixCSR{Tv, Ti}, N::Int, M::Int) where {Tv, Ti} = similar(Mat, Tv, N, M) @@ -295,22 +296,6 @@ Base.similar(Mat::CuSparseMatrixCOO, dims::Tuple{Int, Int}) = similar(Mat, dims. Base.similar(Mat::CuSparseArrayCSR) = CuSparseArrayCSR(copy(Mat.rowPtr), copy(Mat.colVal), similar(nonzeros(Mat)), size(Mat)) -function Base.similar(mat::CuSparseMatrixCOO{Tv, Ti}, ::Type{T}, N::Int, M::Int) where {Tv, Ti, T} - new_rowInd = similar(mat.rowInd) - new_colInd = similar(mat.colInd) - new_nzVal = similar(mat.nzVal, T) - - return CuSparseMatrixCOO{T, Ti}(new_rowInd, new_colInd, new_nzVal, (N, M)) -end - -function Base.similar(mat::CuSparseMatrixBSR{Tv, Ti}, ::Type{T}, N::Int, M::Int) where {Tv, Ti, T} - new_rowPtr = similar(mat.rowPtr) - new_colVal = similar(mat.colVal) - new_nzVal = similar(mat.nzVal, T) - - return CuSparseMatrixBSR{T, Ti}(new_rowPtr, new_colVal, new_nzVal, (N, M), mat.blockDim, mat.dir, mat.nnzb) -end - ## array interface Base.length(g::CuSparseVector) = g.len From 54f12f661894baaac82304bb2088c577078bcef0 Mon Sep 17 00:00:00 2001 From: Rainer Rodrigues Date: Mon, 18 May 2026 12:03:03 +0100 Subject: [PATCH 5/5] Remove accidentally included intrinsics test to isolate cuSPARSE fix --- test/core/device/intrinsics.jl | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/test/core/device/intrinsics.jl b/test/core/device/intrinsics.jl index 7052ef15f1..2491e33423 100644 --- a/test/core/device/intrinsics.jl +++ b/test/core/device/intrinsics.jl @@ -338,16 +338,3 @@ end ############################################################################################ -@testset "intrinsic range metadata" begin - # Create tiny kernels to isolate the x-dimension - kernel_block() = CUDA.blockDim().x - kernel_grid() = CUDA.gridDim().x - - ir_block = sprint(io -> CUDA.code_llvm(io, kernel_block, Tuple{}; dump_module=true, raw=true)) - @test occursin("!range", ir_block) - - ir_grid = sprint(io -> CUDA.code_llvm(io, kernel_grid, Tuple{}; dump_module=true, raw=true)) - @test !occursin("!range", ir_grid) -end - -############################################################################################