Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion lib/JLArrays/src/JLArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,10 @@ end

function (obj::Kernel{JLBackend})(args...; ndrange=nothing, workgroupsize=nothing)
ndrange, workgroupsize, _, _ = launch_config(obj, ndrange, workgroupsize)
device_args = jlconvert.(args)
# Use `map` rather than `jlconvert.(args)` to skip the broadcast
# machinery (broadcasted/materialize/ntuple) that would otherwise
# be specialized per unique arg-tuple type.
device_args = map(jlconvert, args)
new_obj = convert_to_cpu(obj)
new_obj(device_args...; ndrange, workgroupsize)
end
Expand Down
19 changes: 10 additions & 9 deletions src/host/abstractarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ unsafe_free!(x::AbstractGPUArray) = unsafe_free!(storage(x))

using Serialization: AbstractSerializer, serialize_type

function Serialization.serialize(s::AbstractSerializer, t::T) where T <: AbstractGPUArray
serialize_type(s, T)
function Serialization.serialize(s::AbstractSerializer, @nospecialize(t::AbstractGPUArray))
serialize_type(s, typeof(t))
serialize(s, Array(t))
end

Expand All @@ -136,16 +136,17 @@ end
struct ToArray end
Adapt.adapt_storage(::ToArray, xs::AbstractGPUArray) = convert(Array, xs)

# display
Base.print_array(io::IO, X::AnyGPUArray) =
# display: show is called on the materialised CPU copy, so no need to
# specialize the forwarders per element type / wrapper.
Base.print_array(io::IO, @nospecialize(X::AnyGPUArray)) =
Base.print_array(io, adapt(ToArray(), X))

# show
Base._show_nonempty(io::IO, X::AnyGPUArray, prefix::String) =
Base._show_nonempty(io::IO, @nospecialize(X::AnyGPUArray), prefix::String) =
Base._show_nonempty(io, adapt(ToArray(), X), prefix)
Base._show_empty(io::IO, X::AnyGPUArray) =
Base._show_empty(io::IO, @nospecialize(X::AnyGPUArray)) =
Base._show_empty(io, adapt(ToArray(), X))
Base.show_vector(io::IO, v::AnyGPUArray, args...) =
Base.show_vector(io::IO, @nospecialize(v::AnyGPUArray), args...) =
Base.show_vector(io, adapt(ToArray(), v), args...)

## collect to CPU (discarding wrapper type)
Expand Down Expand Up @@ -324,7 +325,7 @@ end

Base.copy(x::AbstractGPUArray) = error("Not implemented") # COV_EXCL_LINE

Base.deepcopy_internal(x::AbstractGPUArray, ::IdDict) = copy(x)
Base.deepcopy_internal(@nospecialize(x::AbstractGPUArray), ::IdDict) = copy(x)


# filtering
Expand All @@ -345,7 +346,7 @@ end

# this is needed because copyto! of most GPU arrays
# doesn't currently support Tuple sources
function Base.append!(a::AbstractGPUVector, items::Tuple)
function Base.append!(a::AbstractGPUVector, @nospecialize(items::Tuple))
append!(a, collect(items))
return a
end
2 changes: 1 addition & 1 deletion src/host/construction.jl
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ function hasfieldcount(@nospecialize(dt))
end

# for finding specific element types, e.g., when Float64 is unsupported
function contains_eltype(T, typ)
function contains_eltype(@nospecialize(T), @nospecialize(typ))
if T === typ
return true
elseif T isa Union
Expand Down
32 changes: 21 additions & 11 deletions test/testsuite.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,36 @@ using Test

using Adapt

test_result(a, b; kwargs...) = a == b
test_result(@nospecialize(a), @nospecialize(b); kwargs...) = a == b
test_result(a::Number, b::Number; kwargs...) = ≈(a, b; kwargs...)
test_result(a::Missing, b::Missing; kwargs...) = true
test_result(a::Number, b::Missing; kwargs...) = false
test_result(a::Missing, b::Number; kwargs...) = false
function test_result(a::AbstractArray{T}, b::AbstractArray{T}; kwargs...) where {T<:Number}
≈(collect(a), collect(b); kwargs...)
end
function test_result(a::AbstractArray{T}, b::AbstractArray{T};
kwargs...) where {T<:NTuple{N,<:Number} where {N}}
ET = eltype(T)
≈(reinterpret(ET, collect(a)), reinterpret(ET, collect(b)); kwargs...)
# Branch on eltype at runtime so one compiled method body handles every
# (T, ndims) combination — the `where T` version would still instantiate
# per element type even under @nospecialize.
function test_result(@nospecialize(a::AbstractArray), @nospecialize(b::AbstractArray); kwargs...)
T = eltype(a)
# The original `where T<:…` methods required matching eltypes; preserve
# that by falling through to `a == b` when they diverge.
if eltype(b) === T
if T <: Number
return ≈(collect(a), collect(b); kwargs...)
elseif T <: NTuple{N,<:Number} where {N}
ET = eltype(T)
return ≈(reinterpret(ET, collect(a)), reinterpret(ET, collect(b)); kwargs...)
end
end
a == b
end
function test_result(as::NTuple{N,Any}, bs::NTuple{N,Any}; kwargs...) where {N}
function test_result(@nospecialize(as::Tuple), @nospecialize(bs::Tuple); kwargs...)
length(as) == length(bs) || return false
all(zip(as, bs)) do (a, b)
test_result(a, b; kwargs...)
end
end

function compare(f, AT::Type{<:AbstractGPUArray}, xs...; kwargs...)
function compare(@nospecialize(f), AT::Type{<:AbstractGPUArray}, @nospecialize(xs...); kwargs...)
# copy on the CPU, adapt on the GPU, but keep Ref's
cpu_in = map(x -> isa(x, Base.RefValue) ? x[] : deepcopy(x), xs)
gpu_in = map(x -> isa(x, Base.RefValue) ? x[] : adapt(AT, x), xs)
Expand All @@ -46,7 +56,7 @@ function compare(f, AT::Type{<:AbstractGPUArray}, xs...; kwargs...)
test_result(cpu_out, gpu_out; kwargs...)
end

function compare(f, AT::Type{<:Array}, xs...; kwargs...)
function compare(@nospecialize(f), AT::Type{<:Array}, @nospecialize(xs...); kwargs...)
# no need to actually run this tests: we have nothing to compare against,
# and we'll run it on a CPU array anyhow when comparing to a GPU array.
#
Expand Down
4 changes: 2 additions & 2 deletions test/testsuite/random.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
end

@testset "rand" begin # uniform
@testset "$T $d" for T in eltypes, d in (2, (2,2), (2,2,2), 3, (3,3))
@testset "$T $d" for T in eltypes, d in (2, (2,2), (2,2,2))
A = AT{T}(undef, d)
B = copy(A)
rand!(rng, A)
Expand All @@ -31,7 +31,7 @@

@testset "randn" begin # normally-distributed
@testset "$T $d" for T in filter(isrealfloattype, eltypes),
d in (2, (2,2), (2,2,2), 3, (3,3))
d in (2, (2,2), (2,2,2))
A = AT{T}(undef, d)
B = copy(A)
randn!(rng, A)
Expand Down
79 changes: 41 additions & 38 deletions test/testsuite/reductions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,11 @@ end
end
end
# Test more corner cases. Tests from AcceleraterKernels.jl
for dims in [1,2,3,4,[1,2],[1,3],[1,4],[2,3],[2,4],[3,4],[1,2,3],[1,2,4],[1,3,4],[2,3,4],[1,2,3,4]]
for isize in 0:3
for jsize in 0:3
for ksize in 0:3
@test compare(A->mapreduce(x->x+x, +, A; init=zero(Int32), dims), AT, rand(Int32(1):Int32(10), isize, jsize, ksize))
end
end
end
# Cover empty (size 0) and non-singleton (size 3) axes; the size-10 loop above
# already covers the common non-edge shape.
for dims in [1,2,3,4,[1,2],[1,3],[1,4],[2,3],[2,4],[3,4],[1,2,3],[1,2,4],[1,3,4],[2,3,4],[1,2,3,4]],
isize in (0, 3), jsize in (0, 3), ksize in (0, 3)
@test compare(A->mapreduce(x->x+x, +, A; init=zero(Int32), dims), AT, rand(Int32(1):Int32(10), isize, jsize, ksize))
end
end

Expand All @@ -84,35 +81,38 @@ end
end
end
# Test more corner cases. Tests from AcceleraterKernels.jl
for dims in [1,2,3,4,[1,2],[1,3],[1,4],[2,3],[2,4],[3,4],[1,2,3],[1,2,4],[1,3,4],[2,3,4],[1,2,3,4]]
for isize in 0:3
for jsize in 0:3
for ksize in 0:3
@test compare(A->reduce(+, A; init=zero(Int32), dims), AT, rand(Int32(1):Int32(10), isize, jsize, ksize))
end
end
end
# Cover empty (size 0) and non-singleton (size 3) axes; the size-10 loop above
# already covers the common non-edge shape.
for dims in [1,2,3,4,[1,2],[1,3],[1,4],[2,3],[2,4],[3,4],[1,2,3],[1,2,4],[1,3,4],[2,3,4],[1,2,3,4]],
isize in (0, 3), jsize in (0, 3), ksize in (0, 3)
@test compare(A->reduce(+, A; init=zero(Int32), dims), AT, rand(Int32(1):Int32(10), isize, jsize, ksize))
end
end

@testsuite "reductions/sum prod" (AT, eltypes)->begin
@testset "$ET" for ET in eltypes
range = ET <: Real ? (ET(1):ET(10)) : ET
for (sz,dims) in [(10,)=>[1], (10,10)=>[1,2], (10,10,10)=>[1,2,3], (10,10,10)=>[],
(10,)=>:, (10,10)=>:, (10,10,10)=>:,
(10,10,10)=>[1], (10,10,10)=>[2], (10,10,10)=>[3],
(0,)=>[1]]

# whole-array reductions: exercise each unique shape only once
for sz in ((10,), (10,10), (10,10,10), (0,))
@test compare(A->sum(A), AT, rand(range, sz))
@test compare(A->sum(A; dims=dims), AT, rand(range, sz))
@test compare(A->prod(A), AT, rand(range, sz))
@test compare(A->prod(A; dims=dims), AT, rand(range, sz))
if typeof(abs(rand(range))) in eltypes
# abs(::Complex{Int}) promotes to Float64
@test compare(A->sum(abs, A), AT, rand(range, sz))
@test compare(A->prod(abs, A), AT, rand(range, sz))
end
end

# reductions along specific dims
for (sz,dims) in [(10,)=>[1], (10,10)=>[1,2], (10,10,10)=>[1,2,3], (10,10,10)=>[],
(10,)=>:, (10,10)=>:, (10,10,10)=>:,
(10,10,10)=>[1], (10,10,10)=>[2], (10,10,10)=>[3],
(0,)=>[1]]
@test compare(A->sum(A; dims=dims), AT, rand(range, sz))
@test compare(A->prod(A; dims=dims), AT, rand(range, sz))
end

if ET in (Float32, Float64, Int64, ComplexF32, ComplexF64)
# smaller-scale test to avoid very large values and roundoff issues
for (sz,red) in [(2,)=>(1,), (2,2)=>(1,1), (2,2,2)=>(1,1,1), (2,2,2)=>(2,2,2),
Expand All @@ -126,30 +126,33 @@ end

@testsuite "reductions/minimum maximum extrema" (AT, eltypes)->begin
@testset "$ET" for ET in eltypes
ET <: Complex && continue
range = ET <: Real ? (ET(1):ET(10)) : ET

# whole-array reductions: exercise each unique shape only once
for sz in ((10,), (10,10), (10,10,10))
@test compare(A->minimum(A), AT, rand(range, sz))
@test compare(A->minimum(x->x*x, A), AT, rand(range, sz))
@test compare(A->maximum(A), AT, rand(range, sz))
@test compare(A->maximum(x->x*x, A), AT, rand(range, sz))
@test compare(A->extrema(A), AT, rand(range, sz))
@test compare(A->extrema(x->x*x, A), AT, rand(range, sz))
end

# reductions along specific dims
for (sz,dims) in [(10,)=>[1], (10,10)=>[1,2], (10,10,10)=>[1,2,3], (10,10,10)=>[],
(10,)=>:, (10,10)=>:, (10,10,10)=>:,
(10,10,10)=>[1], (10,10,10)=>[2], (10,10,10)=>[3]]
if !(ET <: Complex)
@test compare(A->minimum(A), AT, rand(range, sz))
@test compare(A->minimum(x->x*x, A), AT, rand(range, sz))
@test compare(A->minimum(A; dims=dims), AT, rand(range, sz))
@test compare(A->maximum(A), AT, rand(range, sz))
@test compare(A->maximum(x->x*x, A), AT, rand(range, sz))
@test compare(A->maximum(A; dims=dims), AT, rand(range, sz))
@test compare(A->extrema(A), AT, rand(range, sz))
@test compare(A->extrema(x->x*x, A), AT, rand(range, sz))
@test compare(A->extrema(A; dims=dims), AT, rand(range, sz))
end
@test compare(A->minimum(A; dims=dims), AT, rand(range, sz))
@test compare(A->maximum(A; dims=dims), AT, rand(range, sz))
@test compare(A->extrema(A; dims=dims), AT, rand(range, sz))
end

for (sz,red) in [(10,)=>(1,), (10,10)=>(1,1), (10,10,10)=>(1,1,1), (10,10,10)=>(10,10,10),
(10,10,10)=>(1,10,10), (10,10,10)=>(10,1,10), (10,10,10)=>(10,10,1)]
if !(ET <: Complex)
@test compare((A,R)->minimum!(R, A), AT, rand(range, sz), fill(typemax(ET), red))
@test compare((A,R)->maximum!(R, A), AT, rand(range, sz), fill(typemin(ET), red))
@test compare((A,R)->extrema!(R, A), AT, rand(range, sz), fill((typemax(ET),typemin(ET)), red))
end
@test compare((A,R)->minimum!(R, A), AT, rand(range, sz), fill(typemax(ET), red))
@test compare((A,R)->maximum!(R, A), AT, rand(range, sz), fill(typemin(ET), red))
@test compare((A,R)->extrema!(R, A), AT, rand(range, sz), fill((typemax(ET),typemin(ET)), red))
end
end
end
Expand Down
Loading