The Minimal Working Example (MWE) for this bug:
using CUDA
using LinearAlgebra
using LinearAlgebra: checksquare
# Adapted from uniformscaling.jl:287 as of Julia 1.10.9
# TODO: Discuss this change upstream:
function LinearAlgebra.mul!(out::SubArray{T, 2, <: CuMatrix{T}}, a::Number, B::UniformScaling, α::Number, β::Number) where {T}
checksquare(out)
if iszero(β) # zero contribution of the out matrix
fill!(out, zero(T))
elseif !isone(β)
rmul!(out, β)
end
s = convert(T, a*B.λ*α)
if !iszero(s)
ids = diagind(out)
out[ids] .+= s # allocates :-(
end
return out
end
n = 100
X = CUDA.randn(n, n)
Xv = @view X[1:n, 1:n]
@assert Xv isa SubArray
mul!(Xv, 42, I) # boom
All the rest is identical to #3041.
First appeared in #3041 (comment).
The Minimal Working Example (MWE) for this bug:
All the rest is identical to #3041.
First appeared in #3041 (comment).