From 810464cbb84856d479588ea13beea618a174cad1 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Sat, 18 Oct 2025 10:05:20 +0200 Subject: [PATCH 1/2] Remove superfluous testsets. --- test/array.jl | 200 ++++++++++++++++----------------- test/atomics.jl | 36 +++--- test/cmdqueue.jl | 46 ++++---- test/context.jl | 148 ++++++++++++------------ test/device.jl | 170 ++++++++++++++-------------- test/event.jl | 94 ++++++++-------- test/execution.jl | 4 - test/intrinsics.jl | 4 - test/kernel.jl | 274 ++++++++++++++++++++++----------------------- test/memory.jl | 46 ++++---- test/platform.jl | 38 +++---- test/program.jl | 110 +++++++++--------- 12 files changed, 572 insertions(+), 598 deletions(-) diff --git a/test/array.jl b/test/array.jl index d2dfcf68..b89eae31 100644 --- a/test/array.jl +++ b/test/array.jl @@ -1,116 +1,114 @@ using LinearAlgebra import Adapt -@testset "CLArray" begin - @testset "constructors" begin - xs = CLArray{Int, 2, cl.Buffer}(undef, 2, 3) - @test collect(CLArray([1 2; 3 4])) == [1 2; 3 4] - @test testf(vec, rand(Float32, 5, 3)) - @test Base.elsize(xs) == sizeof(Int) - @test CLArray{Int, 2}(xs) === xs - - @test device_accessible(xs) - @test !host_accessible(xs) - @test_throws ArgumentError Base.unsafe_convert(Ptr{Int}, xs) - @test_throws ArgumentError Base.unsafe_convert(Ptr{Float32}, xs) - - @test collect(OpenCL.zeros(Float32, 2, 2)) == zeros(Float32, 2, 2) - @test collect(OpenCL.ones(Float32, 2, 2)) == ones(Float32, 2, 2) - - @test collect(OpenCL.fill(0, 2, 2)) == zeros(Int, 2, 2) - @test collect(OpenCL.fill(1, 2, 2)) == ones(Int, 2, 2) - end +@testset "constructors" begin + xs = CLArray{Int, 2, cl.Buffer}(undef, 2, 3) + @test collect(CLArray([1 2; 3 4])) == [1 2; 3 4] + @test testf(vec, rand(Float32, 5, 3)) + @test Base.elsize(xs) == sizeof(Int) + @test CLArray{Int, 2}(xs) === xs + + @test device_accessible(xs) + @test !host_accessible(xs) + @test_throws ArgumentError Base.unsafe_convert(Ptr{Int}, xs) + @test_throws ArgumentError Base.unsafe_convert(Ptr{Float32}, xs) + + @test collect(OpenCL.zeros(Float32, 2, 2)) == zeros(Float32, 2, 2) + @test collect(OpenCL.ones(Float32, 2, 2)) == ones(Float32, 2, 2) + + @test collect(OpenCL.fill(0, 2, 2)) == zeros(Int, 2, 2) + @test collect(OpenCL.fill(1, 2, 2)) == ones(Int, 2, 2) +end - @testset "adapt" begin - A = rand(Float32, 3, 3) - dA = CLArray(A) - @test Adapt.adapt(Array, dA) == A - @test Adapt.adapt(CLArray, A) isa CLArray - @test Array(Adapt.adapt(CLArray, A)) == A - end +@testset "adapt" begin + A = rand(Float32, 3, 3) + dA = CLArray(A) + @test Adapt.adapt(Array, dA) == A + @test Adapt.adapt(CLArray, A) isa CLArray + @test Array(Adapt.adapt(CLArray, A)) == A +end - @testset "reshape" begin - A = [ - 1 2 3 4 - 5 6 7 8 - ] - gA = reshape(CLArray(A), 1, 8) - _A = reshape(A, 1, 8) - _gA = Array(gA) - @test all(_A .== _gA) - A = [1, 2, 3, 4] - gA = reshape(CLArray(A), 4) - end +@testset "reshape" begin + A = [ + 1 2 3 4 + 5 6 7 8 + ] + gA = reshape(CLArray(A), 1, 8) + _A = reshape(A, 1, 8) + _gA = Array(gA) + @test all(_A .== _gA) + A = [1, 2, 3, 4] + gA = reshape(CLArray(A), 4) +end - @testset "fill(::SubArray)" begin - xs = OpenCL.zeros(Float32, 3) - fill!(view(xs, 2:2), 1) - @test Array(xs) == [0, 1, 0] - end +@testset "fill(::SubArray)" begin + xs = OpenCL.zeros(Float32, 3) + fill!(view(xs, 2:2), 1) + @test Array(xs) == [0, 1, 0] +end - @testset "reinterpret of view with non-aligned offset" begin - # reinterpreting a view to a larger element type where the byte offset - # is not a multiple of the new element size - a = CLArray(Int32[1,2,3,4,5,6,7,8,9]) - v = view(a, 2:7) # offset of 1 Int32 = 4 bytes - r = reinterpret(Int64, v) # Int64 = 8 bytes; 4 is not a multiple of 8 - @test Array(r) == reinterpret(Int64, @view Array(a)[2:7]) +@testset "reinterpret of view with non-aligned offset" begin + # reinterpreting a view to a larger element type where the byte offset + # is not a multiple of the new element size + a = CLArray(Int32[1,2,3,4,5,6,7,8,9]) + v = view(a, 2:7) # offset of 1 Int32 = 4 bytes + r = reinterpret(Int64, v) # Int64 = 8 bytes; 4 is not a multiple of 8 + @test Array(r) == reinterpret(Int64, @view Array(a)[2:7]) +end +# TODO: Look into how to port the @sync + +if cl.USMBackend() in cl.supported_memory_backends(cl.device()) + @testset "shared buffers & unsafe_wrap" begin + a = CLVector{Int, cl.UnifiedSharedMemory}(undef, 2) + + # check that basic operations work on arrays backed by shared memory + fill!(a, 40) + a .+= 2 + @test Array(a) == [42, 42] + + # derive an Array object and test that the memory keeps in sync + b = unsafe_wrap(Array, a) + b[1] = 100 + @test Array(a) == [100, 42] + copyto!(a, 2, [200], 1, 1) + cl.finish(cl.queue()) + @test b == [100, 200] end - # TODO: Look into how to port the @sync - - if cl.USMBackend() in cl.supported_memory_backends(cl.device()) - @testset "shared buffers & unsafe_wrap" begin - a = CLVector{Int, cl.UnifiedSharedMemory}(undef, 2) - - # check that basic operations work on arrays backed by shared memory - fill!(a, 40) - a .+= 2 - @test Array(a) == [42, 42] - - # derive an Array object and test that the memory keeps in sync - b = unsafe_wrap(Array, a) - b[1] = 100 - @test Array(a) == [100, 42] - copyto!(a, 2, [200], 1, 1) - cl.finish(cl.queue()) - @test b == [100, 200] - end - - # https://github.com/JuliaGPU/CUDA.jl/issues/2191 - @testset "preserving memory types" begin - a = CLVector{Int, cl.UnifiedSharedMemory}([1]) - @test OpenCL.memtype(a) == cl.UnifiedSharedMemory - - # unified-ness should be preserved - b = a .+ 1 - @test OpenCL.memtype(b) == cl.UnifiedSharedMemory - - # when there's a conflict, we should defer to unified memory - c = CLVector{Int, cl.UnifiedSharedMemory}([1]) - d = CLVector{Int, cl.UnifiedDeviceMemory}([1]) - e = c .+ d - @test OpenCL.memtype(e) == cl.UnifiedSharedMemory - end + + # https://github.com/JuliaGPU/CUDA.jl/issues/2191 + @testset "preserving memory types" begin + a = CLVector{Int, cl.UnifiedSharedMemory}([1]) + @test OpenCL.memtype(a) == cl.UnifiedSharedMemory + + # unified-ness should be preserved + b = a .+ 1 + @test OpenCL.memtype(b) == cl.UnifiedSharedMemory + + # when there's a conflict, we should defer to unified memory + c = CLVector{Int, cl.UnifiedSharedMemory}([1]) + d = CLVector{Int, cl.UnifiedDeviceMemory}([1]) + e = c .+ d + @test OpenCL.memtype(e) == cl.UnifiedSharedMemory end +end - @testset "resizing" begin - a = CLArray([1, 2, 3]) +@testset "resizing" begin + a = CLArray([1, 2, 3]) - resize!(a, 3) - @test length(a) == 3 - @test Array(a) == [1, 2, 3] + resize!(a, 3) + @test length(a) == 3 + @test Array(a) == [1, 2, 3] - resize!(a, 5) - @test length(a) == 5 - @test Array(a)[1:3] == [1, 2, 3] + resize!(a, 5) + @test length(a) == 5 + @test Array(a)[1:3] == [1, 2, 3] - resize!(a, 2) - @test length(a) == 2 - @test Array(a)[1:2] == [1, 2] + resize!(a, 2) + @test length(a) == 2 + @test Array(a)[1:2] == [1, 2] - b = CLArray{Int}(undef, 0) - @test length(b) == 0 - resize!(b, 1) - @test length(b) == 1 - end + b = CLArray{Int}(undef, 0) + @test length(b) == 0 + resize!(b, 1) + @test length(b) == 1 end diff --git a/test/atomics.jl b/test/atomics.jl index 71fefa8b..9b70912d 100644 --- a/test/atomics.jl +++ b/test/atomics.jl @@ -101,28 +101,26 @@ end if T == Float64 && !("cl_khr_fp64" in cl.device().extensions) continue end - if "cl_ext_float_atomics" in cl.device().extensions - @eval function atomic_float_add(counter, val::$T) - @builtin_ccall( - "atomic_add", $T, - (LLVMPtr{$T, AS.CrossWorkgroup}, $T), - pointer(counter), val, - ) - return - end +if "cl_ext_float_atomics" in cl.device().extensions + @eval function atomic_float_add(counter, val::$T) + @builtin_ccall( + "atomic_add", $T, + (LLVMPtr{$T, AS.CrossWorkgroup}, $T), + pointer(counter), val, + ) + return + end - @testset "SPV_EXT_shader_atomic_float_add extension" begin - a = OpenCL.zeros(T) - @opencl global_size = 1000 extensions = ["SPV_EXT_shader_atomic_float_add"] atomic_float_add(a, one(T)) - @test OpenCL.@allowscalar a[] == T(1000.0) + @testset "SPV_EXT_shader_atomic_float_add extension" begin + a = OpenCL.zeros(T) + @opencl global_size = 1000 extensions = ["SPV_EXT_shader_atomic_float_add"] atomic_float_add(a, one(T)) + @test OpenCL.@allowscalar a[] == T(1000.0) - spv = sprint() do io - OpenCL.code_native(io, atomic_float_add, Tuple{CLDeviceArray{T, 0, 1}, T}; extensions = ["SPV_EXT_shader_atomic_float_add"]) - end - @test occursin("OpExtension \"SPV_EXT_shader_atomic_float_add\"", spv) - @test occursin("OpAtomicFAddEXT", spv) + spv = sprint() do io + OpenCL.code_native(io, atomic_float_add, Tuple{CLDeviceArray{T, 0, 1}, T}; extensions = ["SPV_EXT_shader_atomic_float_add"]) end + @test occursin("OpExtension \"SPV_EXT_shader_atomic_float_add\"", spv) + @test occursin("OpAtomicFAddEXT", spv) end - end end diff --git a/test/cmdqueue.jl b/test/cmdqueue.jl index f31ed9b4..f2ff64b1 100644 --- a/test/cmdqueue.jl +++ b/test/cmdqueue.jl @@ -1,27 +1,25 @@ -@testset "CmdQueue" begin - @testset "constructor" begin - @test cl.CmdQueue() != nothing - @test cl.CmdQueue(:profile) != nothing - try - cl.CmdQueue(:out_of_order) - cl.CmdQueue((:profile, :out_of_order)) - catch err - @warn("Platform $(cl.device().platform.name) does not seem to " * - "suport out of order queues: \n$err",maxlog=1, - exception=(err, catch_backtrace())) - end - @test_throws ArgumentError cl.CmdQueue(:unrecognized_flag) - for flag in [:profile, :out_of_order] - @test_throws ArgumentError cl.CmdQueue((flag, :unrecognized_flag)) - @test_throws ArgumentError cl.CmdQueue((flag, flag)) - end +@testset "constructor" begin + @test cl.CmdQueue() != nothing + @test cl.CmdQueue(:profile) != nothing + try + cl.CmdQueue(:out_of_order) + cl.CmdQueue((:profile, :out_of_order)) + catch err + @warn("Platform $(cl.device().platform.name) does not seem to " * + "suport out of order queues: \n$err",maxlog=1, + exception=(err, catch_backtrace())) end - - @testset "info" begin - q = cl.CmdQueue() - @test q.context == cl.context() - @test q.device == cl.device() - @test q.reference_count > 0 - @test typeof(q.properties) == cl.cl_command_queue_properties + @test_throws ArgumentError cl.CmdQueue(:unrecognized_flag) + for flag in [:profile, :out_of_order] + @test_throws ArgumentError cl.CmdQueue((flag, :unrecognized_flag)) + @test_throws ArgumentError cl.CmdQueue((flag, flag)) end end + +@testset "info" begin + q = cl.CmdQueue() + @test q.context == cl.context() + @test q.device == cl.device() + @test q.reference_count > 0 + @test typeof(q.properties) == cl.cl_command_queue_properties +end diff --git a/test/context.jl b/test/context.jl index bc5529cc..8d6bc620 100644 --- a/test/context.jl +++ b/test/context.jl @@ -1,87 +1,85 @@ -@testset "Context" begin - @testset "constructor" begin - @test_throws MethodError (cl.Context([])) +@testset "constructor" begin + @test_throws MethodError (cl.Context([])) - ctx = cl.Context(cl.device()) - @test ctx != nothing - @test ctx.reference_count == 1 - ctx_id = pointer(ctx) + ctx = cl.Context(cl.device()) + @test ctx != nothing + @test ctx.reference_count == 1 + ctx_id = pointer(ctx) - ctx2 = cl.Context(ctx_id; retain=true) - @test ctx.reference_count == 2 - finalize(ctx2) - @test ctx.reference_count == 1 + ctx2 = cl.Context(ctx_id; retain=true) + @test ctx.reference_count == 2 + finalize(ctx2) + @test ctx.reference_count == 1 - # TODO: support switching contexts - #@testset "Context callback" begin - # function context_test_callback(arg1, arg2, arg3) - # # We're not really testing it because, nvidia doesn't seem to care about this functionality: - # # https://devtalk.nvidia.com/default/topic/497433/context-callback-never-called/ - # OpenCL.cl.log_error("Callback works") - # return - # end - # - # function create_context_error() - # empty_kernel = " - # __kernel void test() { - # int c = 1 + 1; - # };" - # try - # p = cl.Program(source = empty_kernel) |> cl.build! - # k = cl.Kernel(p, "test") - # cl.call(k; global_size=1, local_size=10000000) - # catch - # end - # end - # - # ctx = cl.Context(cl.device(), callback = context_test_callback) - # context!(ctx) do - # create_context_error() - # end - #end - end + # TODO: support switching contexts + #@testset "Context callback" begin + # function context_test_callback(arg1, arg2, arg3) + # # We're not really testing it because, nvidia doesn't seem to care about this functionality: + # # https://devtalk.nvidia.com/default/topic/497433/context-callback-never-called/ + # OpenCL.cl.log_error("Callback works") + # return + # end + # + # function create_context_error() + # empty_kernel = " + # __kernel void test() { + # int c = 1 + 1; + # };" + # try + # p = cl.Program(source = empty_kernel) |> cl.build! + # k = cl.Kernel(p, "test") + # cl.call(k; global_size=1, local_size=10000000) + # catch + # end + # end + # + # ctx = cl.Context(cl.device(), callback = context_test_callback) + # context!(ctx) do + # create_context_error() + # end + #end +end - @testset "platform properties" begin - try - cl.Context(cl.CL_DEVICE_TYPE_CPU) - catch err - @test typeof(err) == cl.CLError - # CL_DEVICE_NOT_FOUND could be throw for GPU only drivers - @test err.desc in (:CL_INVALID_PLATFORM, - :CL_DEVICE_NOT_FOUND) - end +@testset "platform properties" begin + try + cl.Context(cl.CL_DEVICE_TYPE_CPU) + catch err + @test typeof(err) == cl.CLError + # CL_DEVICE_NOT_FOUND could be throw for GPU only drivers + @test err.desc in (:CL_INVALID_PLATFORM, + :CL_DEVICE_NOT_FOUND) + end - properties = [(cl.CL_CONTEXT_PLATFORM, cl.platform())] - for (cl_dev_type, sym_dev_type) in [(cl.CL_DEVICE_TYPE_CPU, :cpu), - (cl.CL_DEVICE_TYPE_GPU, :gpu)] - if !cl.has_device_type(cl.platform(), sym_dev_type) - continue - end - @test cl.Context(sym_dev_type; properties) != nothing - @test cl.Context(cl_dev_type; properties) != nothing - ctx = cl.Context(cl_dev_type; properties) - @test !isempty(ctx.properties) - test_properties = ctx.properties + properties = [(cl.CL_CONTEXT_PLATFORM, cl.platform())] + for (cl_dev_type, sym_dev_type) in [(cl.CL_DEVICE_TYPE_CPU, :cpu), + (cl.CL_DEVICE_TYPE_GPU, :gpu)] + if !cl.has_device_type(cl.platform(), sym_dev_type) + continue + end + @test cl.Context(sym_dev_type; properties) != nothing + @test cl.Context(cl_dev_type; properties) != nothing + ctx = cl.Context(cl_dev_type; properties) + @test !isempty(ctx.properties) + test_properties = ctx.properties - @test test_properties == properties + @test test_properties == properties - platform_in_properties = false - for (t, v) in test_properties - if t == cl.CL_CONTEXT_PLATFORM - @test v.name == cl.platform().name - @test v == cl.platform() - platform_in_properties = true - break - end + platform_in_properties = false + for (t, v) in test_properties + if t == cl.CL_CONTEXT_PLATFORM + @test v.name == cl.platform().name + @test v == cl.platform() + platform_in_properties = true + break end - @test platform_in_properties - end - try - ctx2 = cl.Context(cl.CL_DEVICE_TYPE_ACCELERATOR; properties) - catch err - @test typeof(err) == cl.CLError - @test err.desc == :CL_DEVICE_NOT_FOUND end + @test platform_in_properties + end + try + ctx2 = cl.Context(cl.CL_DEVICE_TYPE_ACCELERATOR; properties) + catch err + @test typeof(err) == cl.CLError + @test err.desc == :CL_DEVICE_NOT_FOUND end end diff --git a/test/device.jl b/test/device.jl index ed5b2066..c24bbee6 100644 --- a/test/device.jl +++ b/test/device.jl @@ -1,99 +1,97 @@ -@testset "Device" begin - @testset "Type" begin - for (t, k) in zip((cl.CL_DEVICE_TYPE_GPU, cl.CL_DEVICE_TYPE_CPU, - cl.CL_DEVICE_TYPE_ACCELERATOR, cl.CL_DEVICE_TYPE_ALL), - (:gpu, :cpu, :accelerator, :all)) +@testset "Type" begin + for (t, k) in zip((cl.CL_DEVICE_TYPE_GPU, cl.CL_DEVICE_TYPE_CPU, + cl.CL_DEVICE_TYPE_ACCELERATOR, cl.CL_DEVICE_TYPE_ALL), + (:gpu, :cpu, :accelerator, :all)) - #for (dk, dt) in zip(cl.devices(cl.platform(), k), cl.devices(cl.platform(), t)) - # @fact dk == dt --> true - #end - #devices = cl.devices(cl.platform(), k) - #for device in devices - # @fact device.device_type == t --> true - #end - end + #for (dk, dt) in zip(cl.devices(cl.platform(), k), cl.devices(cl.platform(), t)) + # @fact dk == dt --> true + #end + #devices = cl.devices(cl.platform(), k) + #for device in devices + # @fact device.device_type == t --> true + #end end +end - @testset "Equality" begin - devices = cl.devices(cl.platform()) +@testset "Equality" begin + devices = cl.devices(cl.platform()) - if length(devices) > 1 - d1 = devices[1] - for d2 in devices[2:end] - @test pointer(d2) != pointer(d1) - @test hash(d2) != hash(d1) - @test isequal(d2, d1) == false - end + if length(devices) > 1 + d1 = devices[1] + for d2 in devices[2:end] + @test pointer(d2) != pointer(d1) + @test hash(d2) != hash(d1) + @test isequal(d2, d1) == false end - end + end +end - @testset "Info" begin - device_info_keys = Symbol[ - :driver_version, - :version, - :extensions, - :platform, - :name, - :device_type, - :has_image_support, - :vendor_id, - :max_compute_units, - :max_work_item_size, - :max_clock_frequency, - :address_bits, - :max_read_image_args, - :max_write_image_args, - :global_mem_size, - :max_mem_alloc_size, - :max_const_buffer_size, - :local_mem_size, - :has_local_mem, - :host_unified_memory, - :available, - :compiler_available, - :max_work_group_size, - :max_parameter_size, - :profiling_timer_resolution, - :max_image2d_shape, - :max_image3d_shape, - ] - @test isa(cl.platform(), cl.Platform) - if isdefined(Core, :FieldError) # VERSION > v"1.12.0-" - @test_throws FieldError cl.platform().zjdlkf - else - @test_throws ErrorException cl.platform().zjdlkf - end +@testset "Info" begin + device_info_keys = Symbol[ + :driver_version, + :version, + :extensions, + :platform, + :name, + :device_type, + :has_image_support, + :vendor_id, + :max_compute_units, + :max_work_item_size, + :max_clock_frequency, + :address_bits, + :max_read_image_args, + :max_write_image_args, + :global_mem_size, + :max_mem_alloc_size, + :max_const_buffer_size, + :local_mem_size, + :has_local_mem, + :host_unified_memory, + :available, + :compiler_available, + :max_work_group_size, + :max_parameter_size, + :profiling_timer_resolution, + :max_image2d_shape, + :max_image3d_shape, + ] + @test isa(cl.platform(), cl.Platform) + if isdefined(Core, :FieldError) # VERSION > v"1.12.0-" + @test_throws FieldError cl.platform().zjdlkf + else + @test_throws ErrorException cl.platform().zjdlkf + end - device = cl.device() - @test isa(device, cl.Device) - if isdefined(Core, :FieldError) # VERSION > v"1.12.0-" - @test_throws FieldError device.zjdlkf - else - @test_throws ErrorException device.zjdlkf - end - for k in device_info_keys - v = getproperty(device, k) - if k == :extensions - @test isa(v, Array) - if length(v) > 0 - @test isa(v, Array{String, 1}) - end - elseif k == :platform - @test v == cl.platform() - elseif k == :max_work_item_sizes - @test length(v) == 3 - elseif k == :max_image2d_shape - @test length(v) == 2 - elseif k == :max_image3d_shape - @test length(v) == 3 + device = cl.device() + @test isa(device, cl.Device) + if isdefined(Core, :FieldError) # VERSION > v"1.12.0-" + @test_throws FieldError device.zjdlkf + else + @test_throws ErrorException device.zjdlkf + end + for k in device_info_keys + v = getproperty(device, k) + if k == :extensions + @test isa(v, Array) + if length(v) > 0 + @test isa(v, Array{String, 1}) end + elseif k == :platform + @test v == cl.platform() + elseif k == :max_work_item_sizes + @test length(v) == 3 + elseif k == :max_image2d_shape + @test length(v) == 2 + elseif k == :max_image3d_shape + @test length(v) == 3 end + end - @test cl.queue_properties(cl.device()).profiling isa Bool - @test cl.queue_properties(cl.device()).out_of_order_exec isa Bool + @test cl.queue_properties(cl.device()).profiling isa Bool + @test cl.queue_properties(cl.device()).out_of_order_exec isa Bool - @test cl.exec_capabilities(cl.device()).native_kernel isa Bool + @test cl.exec_capabilities(cl.device()).native_kernel isa Bool - @test cl.svm_capabilities(cl.device()).fine_grain_buffer isa Bool - end + @test cl.svm_capabilities(cl.device()).fine_grain_buffer isa Bool end diff --git a/test/event.jl b/test/event.jl index d3c437cb..55796fac 100644 --- a/test/event.jl +++ b/test/event.jl @@ -3,68 +3,68 @@ if contains(cl.platform().vendor, "Intel") || contains(cl.platform().vendor, "po # hangs on Intel @warn "Skipping event tests on $(cl.platform().name)" else -@testset "Event" begin - @testset "status" begin - evt = cl.UserEvent() - evt.status - @test evt.status == :submitted - cl.complete(evt) - @test evt.status == :complete - finalize(evt) - end - @testset "wait" begin - # create user event - usr_evt = cl.UserEvent() - cl.enqueue_wait_for_events(usr_evt) +@testset "status" begin + evt = cl.UserEvent() + evt.status + @test evt.status == :submitted + cl.complete(evt) + @test evt.status == :complete + finalize(evt) +end + +@testset "wait" begin + # create user event + usr_evt = cl.UserEvent() + cl.enqueue_wait_for_events(usr_evt) - # create marker event - mkr_evt = cl.enqueue_marker() + # create marker event + mkr_evt = cl.enqueue_marker() - @test usr_evt.status == :submitted - @test mkr_evt.status in (:queued, :submitted) + @test usr_evt.status == :submitted + @test mkr_evt.status in (:queued, :submitted) - cl.complete(usr_evt) - @test usr_evt.status == :complete + cl.complete(usr_evt) + @test usr_evt.status == :complete - wait(mkr_evt) - @test mkr_evt.status == :complete + wait(mkr_evt) + @test mkr_evt.status == :complete - @test cl.cl_event_status(:running) == cl.CL_RUNNING - @test cl.cl_event_status(:submitted) == cl.CL_SUBMITTED - @test cl.cl_event_status(:queued) == cl.CL_QUEUED - @test cl.cl_event_status(:complete) == cl.CL_COMPLETE - end + @test cl.cl_event_status(:running) == cl.CL_RUNNING + @test cl.cl_event_status(:submitted) == cl.CL_SUBMITTED + @test cl.cl_event_status(:queued) == cl.CL_QUEUED + @test cl.cl_event_status(:complete) == cl.CL_COMPLETE +end - @testset "callback" begin - global callback_called = Ref(false) +@testset "callback" begin + global callback_called = Ref(false) - function test_callback(evt, status) - callback_called[] = true - end + function test_callback(evt, status) + callback_called[] = true + end - usr_evt = cl.UserEvent() + usr_evt = cl.UserEvent() - cl.enqueue_wait_for_events(usr_evt) + cl.enqueue_wait_for_events(usr_evt) - mkr_evt = cl.enqueue_marker() - cl.add_callback(mkr_evt, test_callback) + mkr_evt = cl.enqueue_marker() + cl.add_callback(mkr_evt, test_callback) - @test usr_evt.status == :submitted - @test mkr_evt.status in (:queued, :submitted) - @test !callback_called[] + @test usr_evt.status == :submitted + @test mkr_evt.status in (:queued, :submitted) + @test !callback_called[] - cl.complete(usr_evt) - @test usr_evt.status == :complete + cl.complete(usr_evt) + @test usr_evt.status == :complete - wait(mkr_evt) + wait(mkr_evt) - # Give callback some time to finish - yield() - sleep(0.5) + # Give callback some time to finish + yield() + sleep(0.5) - @test mkr_evt.status == :complete - @test callback_called[] - end + @test mkr_evt.status == :complete + @test callback_called[] end + end diff --git a/test/execution.jl b/test/execution.jl index e15a4349..2900c04f 100644 --- a/test/execution.jl +++ b/test/execution.jl @@ -1,7 +1,5 @@ using SPIRV_LLVM_Translator_jll -@testset "execution" begin - @testset "@opencl" begin dummy() = nothing @@ -150,5 +148,3 @@ end @test occursin("target triple = \"spir64-unknown-unknown\"", llvm_backend_khronos) end end - -end diff --git a/test/intrinsics.jl b/test/intrinsics.jl index f5d9eff7..107ec0a3 100644 --- a/test/intrinsics.jl +++ b/test/intrinsics.jl @@ -16,8 +16,6 @@ const ispocl = cl.platform().name == "Portable Computing Language" # XXX: Why does pocl on windows not support vectors of size 2, 8, 16? const simd_ns = (Sys.iswindows() && ispocl) ? [3, 4] : [2, 3, 4, 8, 16] -@testset "intrinsics" begin - @testset "barrier" begin # work-group @@ -337,5 +335,3 @@ end # if cl.sub_groups_supported(cl.device()) end end - -end diff --git a/test/kernel.jl b/test/kernel.jl index e7033c8b..a938305b 100644 --- a/test/kernel.jl +++ b/test/kernel.jl @@ -1,163 +1,161 @@ -@testset "Kernel" begin - test_source = " - __kernel void sum(__global const float *a, - __global const float *b, - __global float *c, - const unsigned int count) - { - unsigned int gid = get_global_id(0); - if (gid < count) { - c[gid] = a[gid] + b[gid]; - } - } - " - - #TODO: tests for invalid kernel build error && logs... - - @testset "constructor" begin - prg = cl.Program(source=test_source) - @test_throws ArgumentError cl.Kernel(prg, "sum") - cl.build!(prg) - @test cl.Kernel(prg, "sum") != nothing - end +test_source = " +__kernel void sum(__global const float *a, + __global const float *b, + __global float *c, + const unsigned int count) +{ + unsigned int gid = get_global_id(0); + if (gid < count) { + c[gid] = a[gid] + b[gid]; + } +} +" + +#TODO: tests for invalid kernel build error && logs... + +@testset "constructor" begin + prg = cl.Program(source=test_source) + @test_throws ArgumentError cl.Kernel(prg, "sum") + cl.build!(prg) + @test cl.Kernel(prg, "sum") != nothing +end - @testset "info" begin - prg = cl.Program(source=test_source) - cl.build!(prg) - k = cl.Kernel(prg, "sum") - @test k.function_name == "sum" - @test k.num_args == 4 - @test k.reference_count > 0 - @test k.program == prg - @test typeof(k.attributes) == String - end +@testset "info" begin + prg = cl.Program(source=test_source) + cl.build!(prg) + k = cl.Kernel(prg, "sum") + @test k.function_name == "sum" + @test k.num_args == 4 + @test k.reference_count > 0 + @test k.program == prg + @test typeof(k.attributes) == String +end - @testset "mem/workgroup size" begin - prg = cl.Program(source=test_source) - cl.build!(prg) - k = cl.Kernel(prg, "sum") - wginfo = cl.work_group_info(k, cl.device()) - for sf in [:size, :compile_size, :local_mem_size, :private_mem_size, :prefered_size_multiple] - @test getproperty(wginfo, sf) != nothing - end +@testset "mem/workgroup size" begin + prg = cl.Program(source=test_source) + cl.build!(prg) + k = cl.Kernel(prg, "sum") + wginfo = cl.work_group_info(k, cl.device()) + for sf in [:size, :compile_size, :local_mem_size, :private_mem_size, :prefered_size_multiple] + @test getproperty(wginfo, sf) != nothing end +end - @testset "set_arg!/set_args!" begin - prg = cl.Program(source=test_source) |> cl.build! - k = cl.Kernel(prg, "sum") +@testset "set_arg!/set_args!" begin + prg = cl.Program(source=test_source) |> cl.build! + k = cl.Kernel(prg, "sum") - count = 1024 - nbytes = count * sizeof(Float32) + count = 1024 + nbytes = count * sizeof(Float32) - h_ones = ones(Float32, count) + h_ones = ones(Float32, count) - A = CLArray(h_ones) - B = CLArray(h_ones) - C = CLArray{Float32}(undef, count) + A = CLArray(h_ones) + B = CLArray(h_ones) + C = CLArray{Float32}(undef, count) - # we use julia's index by one convention - cl.set_arg!(k, 1, A.data[].mem) - cl.set_arg!(k, 2, B.data[].mem) - cl.set_arg!(k, 3, C.data[].mem) - cl.set_arg!(k, 4, UInt32(count)) + # we use julia's index by one convention + cl.set_arg!(k, 1, A.data[].mem) + cl.set_arg!(k, 2, B.data[].mem) + cl.set_arg!(k, 3, C.data[].mem) + cl.set_arg!(k, 4, UInt32(count)) - cl.enqueue_kernel(k, count) |> wait - r = Array(C) + cl.enqueue_kernel(k, count) |> wait + r = Array(C) - @test all(x -> x == 2.0, r) - cl.flush(cl.queue()) + @test all(x -> x == 2.0, r) + cl.flush(cl.queue()) - # test set_args with new kernel - k2 = cl.Kernel(prg, "sum") - cl.set_args!(k2, A.data[].mem, B.data[].mem, C.data[].mem, UInt32(count)) + # test set_args with new kernel + k2 = cl.Kernel(prg, "sum") + cl.set_args!(k2, A.data[].mem, B.data[].mem, C.data[].mem, UInt32(count)) - h_twos = fill(2f0, count) - copyto!(A, h_twos) - copyto!(B, h_twos) + h_twos = fill(2f0, count) + copyto!(A, h_twos) + copyto!(B, h_twos) - #TODO: check for ocl version, fill is opencl v1.2 - #cl.enqueue_fill(A, 2f0) - #cl.enqueue_fill(B, 2f0) + #TODO: check for ocl version, fill is opencl v1.2 + #cl.enqueue_fill(A, 2f0) + #cl.enqueue_fill(B, 2f0) - cl.enqueue_kernel(k, count) + cl.enqueue_kernel(k, count) - @test all(x -> x == 4.0, Array(C)) - end + @test all(x -> x == 4.0, Array(C)) +end - @testset "clcall" begin - simple_kernel = " - __kernel void test(__global float *i) { - *i += 1; - };" +@testset "clcall" begin + simple_kernel = " + __kernel void test(__global float *i) { + *i += 1; + };" - h_buff = Float32[1,] - d_arr = CLArray(h_buff) + h_buff = Float32[1,] + d_arr = CLArray(h_buff) - p = cl.Program(source=simple_kernel) |> cl.build! - k = cl.Kernel(p, "test") + p = cl.Program(source=simple_kernel) |> cl.build! + k = cl.Kernel(p, "test") - # dimensions must be the same size - @test_throws ArgumentError clcall(k, Tuple{CLPtr{Float32}}, d_arr; - global_size=(1,), local_size=(1,1)) - @test_throws ArgumentError clcall(k, Tuple{CLPtr{Float32}}, d_arr; - global_size=(1,1), local_size=(1,)) + # dimensions must be the same size + @test_throws ArgumentError clcall(k, Tuple{CLPtr{Float32}}, d_arr; + global_size=(1,), local_size=(1,1)) + @test_throws ArgumentError clcall(k, Tuple{CLPtr{Float32}}, d_arr; + global_size=(1,1), local_size=(1,)) - # dimensions are bounded - max_work_dim = cl.device().max_work_item_dims - bad = tuple([1 for _ in 1:(max_work_dim + 1)]) + # dimensions are bounded + max_work_dim = cl.device().max_work_item_dims + bad = tuple([1 for _ in 1:(max_work_dim + 1)]) - # calls are asynchronous, but cl.read blocks - clcall(k, Tuple{CLPtr{Float32}}, d_arr) - @test Array(d_arr) == [2f0] + # calls are asynchronous, but cl.read blocks + clcall(k, Tuple{CLPtr{Float32}}, d_arr) + @test Array(d_arr) == [2f0] - # enqueue task is an alias for calling - # a kernel with a global/local size of 1 - evt = cl.enqueue_task(k) - @test Array(d_arr) == [3f0] - end + # enqueue task is an alias for calling + # a kernel with a global/local size of 1 + evt = cl.enqueue_task(k) + @test Array(d_arr) == [3f0] +end - @testset "packed structures" begin - test_source = " - struct __attribute__((packed)) Test2{ - long f1; - int __attribute__((aligned (8))) f2; - }; - __kernel void structest(__global float *out, struct Test2 b){ - out[0] = b.f1; - out[1] = b.f2; - } - " - prg = cl.Program(source = test_source) - cl.build!(prg) - structkernel = cl.Kernel(prg, "structest") - out = CLArray{Float32}(undef, 2) - bstruct = (1, Int32(4)) - clcall(structkernel, Tuple{CLPtr{Float32}, Tuple{Int64, Cint}}, out, bstruct) - @test Array(out) == [1f0, 4f0] - end +@testset "packed structures" begin + test_source = " + struct __attribute__((packed)) Test2{ + long f1; + int __attribute__((aligned (8))) f2; + }; + __kernel void structest(__global float *out, struct Test2 b){ + out[0] = b.f1; + out[1] = b.f2; + } + " + prg = cl.Program(source = test_source) + cl.build!(prg) + structkernel = cl.Kernel(prg, "structest") + out = CLArray{Float32}(undef, 2) + bstruct = (1, Int32(4)) + clcall(structkernel, Tuple{CLPtr{Float32}, Tuple{Int64, Cint}}, out, bstruct) + @test Array(out) == [1f0, 4f0] +end - @testset "vector arguments" begin - test_source = " - __kernel void vec3_unpack(__global float *out, float3 a, float3 b) { - out[0] = a.x; - out[1] = a.y; - out[2] = a.z; - out[3] = b.x; - out[4] = b.y; - out[5] = b.z; - } - " - prg = cl.Program(source = test_source) - cl.build!(prg) - vec3kernel = cl.Kernel(prg, "vec3_unpack") - out = CLArray{Float32}(undef, 6) - # NOTE: the user is responsible for padding the vector to 4 elements - # (only on some platforms) - vec3_a = (1f0, 2f0, 3f0, 0f0) - vec3_b = (4f0, 5f0, 6f0, 0f0) - clcall( - vec3kernel, Tuple{CLPtr{Float32}, NTuple{4, Float32}, NTuple{4, Float32}}, - out, vec3_a, vec3_b) - @test Array(out) == [1f0, 2f0, 3f0, 4f0, 5f0, 6f0] - end +@testset "vector arguments" begin + test_source = " + __kernel void vec3_unpack(__global float *out, float3 a, float3 b) { + out[0] = a.x; + out[1] = a.y; + out[2] = a.z; + out[3] = b.x; + out[4] = b.y; + out[5] = b.z; + } + " + prg = cl.Program(source = test_source) + cl.build!(prg) + vec3kernel = cl.Kernel(prg, "vec3_unpack") + out = CLArray{Float32}(undef, 6) + # NOTE: the user is responsible for padding the vector to 4 elements + # (only on some platforms) + vec3_a = (1f0, 2f0, 3f0, 0f0) + vec3_b = (4f0, 5f0, 6f0, 0f0) + clcall( + vec3kernel, Tuple{CLPtr{Float32}, NTuple{4, Float32}, NTuple{4, Float32}}, + out, vec3_a, vec3_b) + @test Array(out) == [1f0, 2f0, 3f0, 4f0, 5f0, 6f0] end diff --git a/test/memory.jl b/test/memory.jl index 6adc081c..f352fd73 100644 --- a/test/memory.jl +++ b/test/memory.jl @@ -1,32 +1,30 @@ -@testset "Memory" begin - function create_test_buffer() - testarray = zeros(Float32, 1000) - cl.Buffer(testarray) - end +function create_test_buffer() + testarray = zeros(Float32, 1000) + cl.Buffer(testarray) +end - @testset "context" begin - buf = create_test_buffer() +@testset "context" begin + buf = create_test_buffer() - ctx = cl.context(buf) + ctx = cl.context(buf) - @test ctx != nothing - @test isequal(ctx, cl.context()) != nothing - end + @test ctx != nothing + @test isequal(ctx, cl.context()) != nothing +end - @testset "properties" begin - buf = create_test_buffer() +@testset "properties" begin + buf = create_test_buffer() - expectations = [ - (:type, cl.CL_MEM_OBJECT_BUFFER), - (:flags, (:rw, :copy)), - (:size, sizeof(buf)), - (:reference_count, 1), - (:map_count, 0) - ] + expectations = [ + (:type, cl.CL_MEM_OBJECT_BUFFER), + (:flags, (:rw, :copy)), + (:size, sizeof(buf)), + (:reference_count, 1), + (:map_count, 0) + ] - for expectation in expectations - prop, value = expectation - @test getproperty(buf, prop) == value - end + for expectation in expectations + prop, value = expectation + @test getproperty(buf, prop) == value end end diff --git a/test/platform.jl b/test/platform.jl index 5e1c95f4..b6f410d6 100644 --- a/test/platform.jl +++ b/test/platform.jl @@ -1,27 +1,25 @@ -@testset "Platform" begin - @testset "Info" begin - @test length(cl.platforms()) == cl.num_platforms() +@testset "Info" begin + @test length(cl.platforms()) == cl.num_platforms() - @test cl.platform() != nothing - @test pointer(cl.platform()) != C_NULL - @test cl.platform().opencl_version isa VersionNumber - end + @test cl.platform() != nothing + @test pointer(cl.platform()) != C_NULL + @test cl.platform().opencl_version isa VersionNumber +end - @testset "Equality" begin - platform = cl.platforms()[1] - platform_copy = cl.platforms()[1] +@testset "Equality" begin + platform = cl.platforms()[1] + platform_copy = cl.platforms()[1] - @test pointer(platform) == pointer(platform_copy) - @test hash(platform) == hash(platform_copy) - @test isequal(platform, platform) + @test pointer(platform) == pointer(platform_copy) + @test hash(platform) == hash(platform_copy) + @test isequal(platform, platform) - if length(cl.platforms()) > 1 - p1 = cl.platforms()[1] - for p2 in cl.platforms()[2:end] - @test pointer(p2) != pointer(p1) - @test hash(p2) != hash(p1) - @test !isequal(p2, p1) - end + if length(cl.platforms()) > 1 + p1 = cl.platforms()[1] + for p2 in cl.platforms()[2:end] + @test pointer(p2) != pointer(p1) + @test hash(p2) != hash(p1) + @test !isequal(p2, p1) end end end diff --git a/test/program.jl b/test/program.jl index 01c62839..b7d433b4 100644 --- a/test/program.jl +++ b/test/program.jl @@ -1,70 +1,68 @@ -@testset "Program" begin - let - @test_throws ArgumentError cl.Program() - @test_throws ArgumentError cl.Program(source="", il="") - end +let + @test_throws ArgumentError cl.Program() + @test_throws ArgumentError cl.Program(source="", il="") +end - test_source = " - __kernel void sum(__global const float *a, - __global const float *b, - __global float *c) - { - uint gid = get_global_id(0); - c[gid] = a[gid] + b[gid]; - } - " +test_source = " +__kernel void sum(__global const float *a, + __global const float *b, + __global float *c) +{ + uint gid = get_global_id(0); + c[gid] = a[gid] + b[gid]; +} +" - function create_test_program() - cl.Program(source=test_source) - end +function create_test_program() + cl.Program(source=test_source) +end - @testset "source constructor" begin - prg = cl.Program(source=test_source) - @test prg != nothing - end - @testset "info" begin - prg = cl.Program(source=test_source) +@testset "source constructor" begin + prg = cl.Program(source=test_source) + @test prg != nothing +end +@testset "info" begin + prg = cl.Program(source=test_source) - @test prg.context == cl.context() + @test prg.context == cl.context() - @test typeof(prg.devices) == Vector{cl.Device} - @test length(prg.devices) > 0 - @test cl.device() in prg.devices + @test typeof(prg.devices) == Vector{cl.Device} + @test length(prg.devices) > 0 + @test cl.device() in prg.devices - @test typeof(prg.source) == String - @test prg.source == test_source + @test typeof(prg.source) == String + @test prg.source == test_source - @test prg.reference_count > 0 - @test isempty(strip(prg.build_log[cl.device()])) - end + @test prg.reference_count > 0 + @test isempty(strip(prg.build_log[cl.device()])) +end - @testset "build" begin - prg = cl.Program(source=test_source) - @test cl.build!(prg) != nothing +@testset "build" begin + prg = cl.Program(source=test_source) + @test cl.build!(prg) != nothing - @test prg.build_status[cl.device()] == cl.CL_BUILD_SUCCESS - @test prg.build_log[cl.device()] isa String - end + @test prg.build_status[cl.device()] == cl.CL_BUILD_SUCCESS + @test prg.build_log[cl.device()] isa String +end - @testset "source code" begin - prg = cl.Program(source=test_source) - @test prg.source == test_source - end +@testset "source code" begin + prg = cl.Program(source=test_source) + @test prg.source == test_source +end - if contains(cl.platform().vendor, "pocl") - @warn "Skipping binary program tests on $(cl.platform().name)" - else - @testset "binaries" begin - prg = cl.Program(source=test_source) |> cl.build! +if contains(cl.platform().vendor, "pocl") + @warn "Skipping binary program tests on $(cl.platform().name)" +else + @testset "binaries" begin + prg = cl.Program(source=test_source) |> cl.build! - @test cl.device() in collect(keys(prg.binaries)) - binaries = prg.binaries - @test cl.device() in collect(keys(binaries)) - @test binaries[cl.device()] != nothing - @test length(binaries[cl.device()]) > 0 - prg2 = cl.Program(binaries=binaries) - @test prg2.binaries == binaries - @test prg2.source === nothing - end + @test cl.device() in collect(keys(prg.binaries)) + binaries = prg.binaries + @test cl.device() in collect(keys(binaries)) + @test binaries[cl.device()] != nothing + @test length(binaries[cl.device()]) > 0 + prg2 = cl.Program(binaries=binaries) + @test prg2.binaries == binaries + @test prg2.source === nothing end end From 3cf447127f66c3fd4b9b0f5fbcd65a681cb639c2 Mon Sep 17 00:00:00 2001 From: Christian Guinard <28689358+christiangnrd@users.noreply.github.com> Date: Sat, 31 Jan 2026 19:59:19 -0400 Subject: [PATCH 2/2] Apply suggestion from @christiangnrd --- test/atomics.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/atomics.jl b/test/atomics.jl index 9b70912d..954ea09c 100644 --- a/test/atomics.jl +++ b/test/atomics.jl @@ -123,4 +123,6 @@ if "cl_ext_float_atomics" in cl.device().extensions @test occursin("OpAtomicFAddEXT", spv) end end + +end end