From 1900e8b173ca6eae25e8fdb5f195a748545f4363 Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Fri, 31 Oct 2025 12:04:58 -0500 Subject: [PATCH 1/9] Add atomic float support --- lib/intrinsics/src/atomic.jl | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/lib/intrinsics/src/atomic.jl b/lib/intrinsics/src/atomic.jl index 9bbbdbe6..a93f926e 100644 --- a/lib/intrinsics/src/atomic.jl +++ b/lib/intrinsics/src/atomic.jl @@ -3,13 +3,15 @@ # provides atomic functions that rely on the OpenCL base atomics, as well as the # cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics extensions. +const atomic_float_types = [Float32, Float64] const atomic_integer_types = [UInt32, Int32, UInt64, Int64] const atomic_memory_types = [AS.Workgroup, AS.CrossWorkgroup] +const atomic_types = vcat(atomic_float_types, atomic_integer_types) # generically typed -for gentype in atomic_integer_types, as in atomic_memory_types +for gentype in atomic_types, as in atomic_memory_types @eval begin @device_function atomic_add!(p::LLVMPtr{$gentype,$as}, val::$gentype) = @@ -45,15 +47,17 @@ for gentype in atomic_integer_types, as in atomic_memory_types @device_function atomic_xor!(p::LLVMPtr{$gentype,$as}, val::$gentype) = @builtin_ccall("atomic_xor", $gentype, (LLVMPtr{$gentype,$as}, $gentype), p, val) - -@device_function atomic_xchg!(p::LLVMPtr{$gentype,$as}, val::$gentype) = - @builtin_ccall("atomic_xchg", $gentype, - (LLVMPtr{$gentype,$as}, $gentype), p, val) - -@device_function atomic_cmpxchg!(p::LLVMPtr{$gentype,$as}, cmp::$gentype, val::$gentype) = - @builtin_ccall("atomic_cmpxchg", $gentype, - (LLVMPtr{$gentype,$as}, $gentype, $gentype), p, cmp, val) - +end +if gentype in atomic_integer_types + @eval begin + @device_function atomic_xchg!(p::LLVMPtr{$gentype,$as}, val::$gentype) = + @builtin_ccall("atomic_xchg", $gentype, + (LLVMPtr{$gentype,$as}, $gentype), p, val) + + @device_function atomic_cmpxchg!(p::LLVMPtr{$gentype,$as}, cmp::$gentype, val::$gentype) = + @builtin_ccall("atomic_cmpxchg", $gentype, + (LLVMPtr{$gentype,$as}, $gentype, $gentype), p, cmp, val) + end end end From 5a8199fddf274959c0b7a147458d27210df0651e Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Tue, 18 Nov 2025 08:29:22 -0600 Subject: [PATCH 2/9] Add tests --- test/atomics.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/atomics.jl b/test/atomics.jl index f46a535b..175e87a4 100644 --- a/test/atomics.jl +++ b/test/atomics.jl @@ -15,6 +15,11 @@ end end end +@testset "atomic_add! ($T)" for T in [Float32, Float64] + # Float64 requires cl_khr_fp64 extension + if T == Float64 && !("cl_khr_fp64" in cl.device().extensions) + continue + end if "cl_ext_float_atomics" in cl.device().extensions function atomic_float_add(counter, val) @builtin_ccall( @@ -39,3 +44,4 @@ end end end +end From c1b2f5fcfc11fdb46166f81b12b703fe7c1c2a90 Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Wed, 19 Nov 2025 13:01:52 -0600 Subject: [PATCH 3/9] More tests --- test/atomics.jl | 49 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/test/atomics.jl b/test/atomics.jl index 175e87a4..c6bd9a22 100644 --- a/test/atomics.jl +++ b/test/atomics.jl @@ -1,18 +1,39 @@ using SPIRVIntrinsics: @builtin_ccall, @typed_ccall, LLVMPtr, known_intrinsics +skip_int64(T) = sizeof(T) == 8 && T <: Integer && !("cl_khr_int64_extended_atomics" in cl.device().extensions) +skip_float64(T) = T == Float64 && !("cl_khr_fp64" in cl.device().extensions) +integer_types = [Int32, UInt32, Int64, UInt64] +float_types = [Float32, Float64] @testset "atomics" begin -function atomic_count(counter) - OpenCL.@atomic counter[] += 1 +function atomic_add_(counter, ::Val{T}) where T + OpenCL.@atomic counter[] += one(T) return end -@testset "atomic_add! ($T)" for T in [Int32, UInt32, Int64, UInt64] - if sizeof(T) == 4 || "cl_khr_int64_extended_atomics" in cl.device().extensions - a = OpenCL.zeros(T) - @opencl global_size=1000 atomic_count(a) - @test OpenCL.@allowscalar a[] == 1000 +@testset "atomic_add! ($T)" for T in vcat(integer_types, float_types) + if skip_int64(T) || skip_float64(T) + continue + end + @show T + a = OpenCL.zeros(T) + @opencl global_size=1000 atomic_add_(a, Val(T)) + @test OpenCL.@allowscalar a[] == T(1000) +end + +function atomic_sub_(counter, ::Val{T}) where T + OpenCL.@atomic counter[] -= one(T) + return +end + +@testset "atomic_sub! ($T)" for T in vcat(integer_types, float_types) + if skip_int64(T) || skip_float64(T) + continue end + @show T + a = T(1000.0) + @opencl global_size=1000 atomic_sub_(a, Val(T)) + @test OpenCL.@allowscalar a[] == zero(T) end @testset "atomic_add! ($T)" for T in [Float32, Float64] @@ -21,22 +42,22 @@ end continue end if "cl_ext_float_atomics" in cl.device().extensions - function atomic_float_add(counter, val) + @eval function atomic_float_add(counter, val::$T) @builtin_ccall( - "atomic_add", Float32, - (LLVMPtr{Float32, AS.CrossWorkgroup}, Float32), + "atomic_add", $T, + (LLVMPtr{$T, AS.CrossWorkgroup}, $T), pointer(counter), val, ) return end @testset "SPV_EXT_shader_atomic_float_add extension" begin - a = OpenCL.zeros(Float32) - @opencl global_size = 1000 extensions = ["SPV_EXT_shader_atomic_float_add"] atomic_float_add(a, 1.0f0) - @test OpenCL.@allowscalar a[] == 1000.0f0 + a = OpenCL.zeros(T) + @opencl global_size = 1000 extensions = ["SPV_EXT_shader_atomic_float_add"] atomic_float_add(a, one(T)) + @test OpenCL.@allowscalar a[] == T(1000.0) spv = sprint() do io - OpenCL.code_native(io, atomic_float_add, Tuple{CLDeviceArray{Float32, 0, 1}, Float32}; extensions = ["SPV_EXT_shader_atomic_float_add"]) + OpenCL.code_native(io, atomic_float_add, Tuple{CLDeviceArray{T, 0, 1}, T}; extensions = ["SPV_EXT_shader_atomic_float_add"]) end @test occursin("OpExtension \"SPV_EXT_shader_atomic_float_add\"", spv) @test occursin("OpAtomicFAddEXT", spv) From a9a6373e28f05c1d1bc3677be0b6294917a1e029 Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Thu, 20 Nov 2025 09:51:43 -0600 Subject: [PATCH 4/9] Compilation works --- test/atomics.jl | 132 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 106 insertions(+), 26 deletions(-) diff --git a/test/atomics.jl b/test/atomics.jl index c6bd9a22..d9a284e8 100644 --- a/test/atomics.jl +++ b/test/atomics.jl @@ -1,41 +1,121 @@ using SPIRVIntrinsics: @builtin_ccall, @typed_ccall, LLVMPtr, known_intrinsics -skip_int64(T) = sizeof(T) == 8 && T <: Integer && !("cl_khr_int64_extended_atomics" in cl.device().extensions) -skip_float64(T) = T == Float64 && !("cl_khr_fp64" in cl.device().extensions) +# Define the types to test integer_types = [Int32, UInt32, Int64, UInt64] float_types = [Float32, Float64] +all_types = vcat(integer_types, float_types) + +dev = OpenCL.cl.device() +# Define atomic operations to test +atomic_operations = [ + :atomic_add!, + :atomic_sub!, + :atomic_and!, + :atomic_or!, + :atomic_xor!, + :atomic_max!, + :atomic_min!, + :atomic_xchg!, + :atomic_cas!, +] @testset "atomics" begin +for op in atomic_operations + for T in all_types + # Skip Int64/UInt64 if not supported + if sizeof(T) == 8 && T <: Integer && !("cl_khr_int64_extended_atomics" in dev.extensions) + continue + end -function atomic_add_(counter, ::Val{T}) where T - OpenCL.@atomic counter[] += one(T) - return -end + # Skip Float64 if not supported + if T == Float64 && !("cl_khr_fp64" in dev.extensions) + continue + end -@testset "atomic_add! ($T)" for T in vcat(integer_types, float_types) - if skip_int64(T) || skip_float64(T) - continue - end - @show T - a = OpenCL.zeros(T) - @opencl global_size=1000 atomic_add_(a, Val(T)) - @test OpenCL.@allowscalar a[] == T(1000) -end + # Bitwise operations (only valid for integers) + if op in [:atomic_and!, :atomic_or!, :atomic_xor!] && T <: AbstractFloat + continue + end -function atomic_sub_(counter, ::Val{T}) where T - OpenCL.@atomic counter[] -= one(T) - return -end + # Min/max operations (only supported for 32-bit integers in OpenCL) + if op in [:atomic_min!, :atomic_max!] && !(T in [Int32, UInt32]) + continue + end -@testset "atomic_sub! ($T)" for T in vcat(integer_types, float_types) - if skip_int64(T) || skip_float64(T) - continue + test_name = Symbol("test_", op, "_", T) + + if op in [:atomic_add!, :atomic_sub!] + # Arithmetic operations + if op == :atomic_add! + @eval function $test_name(counter) + OpenCL.@atomic counter[] += one($T) + return + end + else + @eval function $test_name(counter) + OpenCL.@atomic counter[] -= one($T) + return + end + end + elseif op in [:atomic_and!, :atomic_or!, :atomic_xor!] + # Bitwise operations + if op == :atomic_and! + @eval function $test_name(counter) + OpenCL.@atomic counter[] &= one($T) + return + end + elseif op == :atomic_or! + @eval function $test_name(counter) + OpenCL.@atomic counter[] |= one($T) + return + end + else # xor + @eval function $test_name(counter) + OpenCL.@atomic counter[] ⊻= one($T) + return + end + end + elseif op in [:atomic_max!, :atomic_min!] + # Min/max operations - use low-level API directly + if op == :atomic_max! + @eval function $test_name(counter) + ptr = OpenCL.pointer(counter, 1) + OpenCL.atomic_max!(ptr, one($T)) + return + end + else + @eval function $test_name(counter) + ptr = OpenCL.pointer(counter, 1) + OpenCL.atomic_min!(ptr, one($T)) + return + end + end + elseif op == :atomic_xchg! + # Exchange operation - use low-level API directly + @eval function $test_name(counter) + ptr = OpenCL.pointer(counter, 1) + OpenCL.atomic_xchg!(ptr, one($T)) + return + end + elseif op == :atomic_cas! + # CAS operation - use low-level API directly (it's called atomic_cmpxchg!) + @eval function $test_name(counter) + ptr = OpenCL.pointer(counter, 1) + OpenCL.atomic_cmpxchg!(ptr, $T(0), one($T)) + return + end + else + error("Unknown operation: $op") + end + + + # Try to compile the kernel - this is the key test + a = OpenCL.zeros(T) + kernel_func = @eval $test_name + OpenCL.@opencl kernel_func(a) end - @show T - a = T(1000.0) - @opencl global_size=1000 atomic_sub_(a, Val(T)) - @test OpenCL.@allowscalar a[] == zero(T) end + @testset "atomic_add! ($T)" for T in [Float32, Float64] # Float64 requires cl_khr_fp64 extension if T == Float64 && !("cl_khr_fp64" in cl.device().extensions) From 1fff39742050878669aa26f3e4d6ba0db1d51cf8 Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Thu, 20 Nov 2025 10:08:50 -0600 Subject: [PATCH 5/9] Check results --- test/atomics.jl | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/test/atomics.jl b/test/atomics.jl index d9a284e8..33929beb 100644 --- a/test/atomics.jl +++ b/test/atomics.jl @@ -6,20 +6,20 @@ float_types = [Float32, Float64] all_types = vcat(integer_types, float_types) dev = OpenCL.cl.device() -# Define atomic operations to test +# Define atomic operations to test, with init value and expected value atomic_operations = [ - :atomic_add!, - :atomic_sub!, - :atomic_and!, - :atomic_or!, - :atomic_xor!, - :atomic_max!, - :atomic_min!, - :atomic_xchg!, - :atomic_cas!, + (:atomic_add!, 0, 1), + (:atomic_sub!, 1, 0), + (:atomic_and!, 3, 1), + (:atomic_or!, 2, 3), + (:atomic_xor!, 3, 2), + (:atomic_max!, 0, 1), + (:atomic_min!, 2, 1), + (:atomic_xchg!, 0, 1), + (:atomic_cas!, 0, 1), ] @testset "atomics" begin -for op in atomic_operations +for (op, init_val, expected_val) in atomic_operations for T in all_types # Skip Int64/UInt64 if not supported if sizeof(T) == 8 && T <: Integer && !("cl_khr_int64_extended_atomics" in dev.extensions) @@ -110,8 +110,11 @@ for op in atomic_operations # Try to compile the kernel - this is the key test a = OpenCL.zeros(T) + OpenCL.fill!(a, init_val) kernel_func = @eval $test_name OpenCL.@opencl kernel_func(a) + result_val = Array(a)[1] + @test result_val == expected_val end end From 3c58e28ccdc4a57920a9d9fece1902fad0dfcb9f Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Thu, 20 Nov 2025 10:23:51 -0600 Subject: [PATCH 6/9] Using 1000 items and checking type --- test/atomics.jl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/test/atomics.jl b/test/atomics.jl index 33929beb..92d3fbbf 100644 --- a/test/atomics.jl +++ b/test/atomics.jl @@ -8,11 +8,11 @@ all_types = vcat(integer_types, float_types) dev = OpenCL.cl.device() # Define atomic operations to test, with init value and expected value atomic_operations = [ - (:atomic_add!, 0, 1), - (:atomic_sub!, 1, 0), + (:atomic_add!, 0, 1000), + (:atomic_sub!, 1000, 0), (:atomic_and!, 3, 1), - (:atomic_or!, 2, 3), - (:atomic_xor!, 3, 2), + (:atomic_or!, 3, 3), + (:atomic_xor!, 3, 3), (:atomic_max!, 0, 1), (:atomic_min!, 2, 1), (:atomic_xchg!, 0, 1), @@ -112,9 +112,10 @@ for (op, init_val, expected_val) in atomic_operations a = OpenCL.zeros(T) OpenCL.fill!(a, init_val) kernel_func = @eval $test_name - OpenCL.@opencl kernel_func(a) + OpenCL.@opencl global_size=1000 kernel_func(a) result_val = Array(a)[1] - @test result_val == expected_val + @test typeof(result_val) == T + @test result_val == T(expected_val) end end From 58f3b276cce734e99c293ff5d8182bd6791692d0 Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Thu, 20 Nov 2025 10:29:39 -0600 Subject: [PATCH 7/9] Fix --- test/atomics.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/atomics.jl b/test/atomics.jl index 92d3fbbf..bcfaa145 100644 --- a/test/atomics.jl +++ b/test/atomics.jl @@ -6,8 +6,9 @@ float_types = [Float32, Float64] all_types = vcat(integer_types, float_types) dev = OpenCL.cl.device() -# Define atomic operations to test, with init value and expected value +# Define atomic operations to test atomic_operations = [ + # op, init_val, expected_val (:atomic_add!, 0, 1000), (:atomic_sub!, 1000, 0), (:atomic_and!, 3, 1), From 5a1a66bb98339f1d0e1a8072037bdb7f0952b46a Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Wed, 26 Nov 2025 14:03:12 +0100 Subject: [PATCH 8/9] avoid use of eval, improve some of the tests --- test/atomics.jl | 179 +++++++++++++++++++++--------------------------- 1 file changed, 77 insertions(+), 102 deletions(-) diff --git a/test/atomics.jl b/test/atomics.jl index bcfaa145..71fefa8b 100644 --- a/test/atomics.jl +++ b/test/atomics.jl @@ -6,118 +6,93 @@ float_types = [Float32, Float64] all_types = vcat(integer_types, float_types) dev = OpenCL.cl.device() + +# Arithmetic operations +function test_atomic_add(counter::AbstractArray{T}) where T + OpenCL.@atomic counter[] += one(T) + return +end +function test_atomic_sub(counter::AbstractArray{T}) where T + OpenCL.@atomic counter[] -= one(T) + return +end +# Bitwise operations +function test_atomic_and(counter::AbstractArray{T}) where T + OpenCL.@atomic counter[] &= ~(one(T) << (get_global_id() - 1)) + return +end +function test_atomic_or(counter::AbstractArray{T}) where T + OpenCL.@atomic counter[] |= one(T) << (get_global_id() - 1) + return +end +function test_atomic_xor(counter::AbstractArray{T}) where T + OpenCL.@atomic counter[] ⊻= one(T) << ((get_global_id() - 1) % 32) + return +end +# Min/max operations - use low-level API directly +function test_atomic_max(counter::AbstractArray{T}) where T + OpenCL.atomic_max!(pointer(counter), T(get_global_id())) + return +end +function test_atomic_min(counter::AbstractArray{T}) where T + OpenCL.atomic_min!(pointer(counter), T(get_global_id())) + return +end +# Exchange operation - use low-level API directly +function test_atomic_xchg(counter::AbstractArray{T}) where T + OpenCL.atomic_xchg!(pointer(counter), one(T)) + return +end +# Compare-and-swap operation - use low-level API directly +function test_atomic_cas(counter::AbstractArray{T}) where T + OpenCL.atomic_cmpxchg!(pointer(counter), zero(T), one(T)) + return +end + # Define atomic operations to test atomic_operations = [ # op, init_val, expected_val - (:atomic_add!, 0, 1000), - (:atomic_sub!, 1000, 0), - (:atomic_and!, 3, 1), - (:atomic_or!, 3, 3), - (:atomic_xor!, 3, 3), - (:atomic_max!, 0, 1), - (:atomic_min!, 2, 1), - (:atomic_xchg!, 0, 1), - (:atomic_cas!, 0, 1), + (test_atomic_add, 0, 1000), + (test_atomic_sub, 1000, 0), + (test_atomic_and, typemax(UInt64), 0), + (test_atomic_or, 0, typemax(UInt64)), + (test_atomic_xor, 0, typemax(UInt32) << 8), + (test_atomic_max, 0, 1000), + (test_atomic_min, 1000, 1), + (test_atomic_xchg, 0, 1), + (test_atomic_cas, 0, 1), ] @testset "atomics" begin -for (op, init_val, expected_val) in atomic_operations - for T in all_types - # Skip Int64/UInt64 if not supported - if sizeof(T) == 8 && T <: Integer && !("cl_khr_int64_extended_atomics" in dev.extensions) - continue - end - - # Skip Float64 if not supported - if T == Float64 && !("cl_khr_fp64" in dev.extensions) - continue - end - - # Bitwise operations (only valid for integers) - if op in [:atomic_and!, :atomic_or!, :atomic_xor!] && T <: AbstractFloat - continue - end - - # Min/max operations (only supported for 32-bit integers in OpenCL) - if op in [:atomic_min!, :atomic_max!] && !(T in [Int32, UInt32]) - continue - end +@testset "$kernel_func - $T" for (kernel_func, init_val, expected_val) in atomic_operations, T in all_types + # Skip Int64/UInt64 if not supported + if sizeof(T) == 8 && T <: Integer && !("cl_khr_int64_extended_atomics" in dev.extensions) + continue + end - test_name = Symbol("test_", op, "_", T) + # Skip Float64 if not supported + if T == Float64 && !("cl_khr_fp64" in dev.extensions) + continue + end - if op in [:atomic_add!, :atomic_sub!] - # Arithmetic operations - if op == :atomic_add! - @eval function $test_name(counter) - OpenCL.@atomic counter[] += one($T) - return - end - else - @eval function $test_name(counter) - OpenCL.@atomic counter[] -= one($T) - return - end - end - elseif op in [:atomic_and!, :atomic_or!, :atomic_xor!] - # Bitwise operations - if op == :atomic_and! - @eval function $test_name(counter) - OpenCL.@atomic counter[] &= one($T) - return - end - elseif op == :atomic_or! - @eval function $test_name(counter) - OpenCL.@atomic counter[] |= one($T) - return - end - else # xor - @eval function $test_name(counter) - OpenCL.@atomic counter[] ⊻= one($T) - return - end - end - elseif op in [:atomic_max!, :atomic_min!] - # Min/max operations - use low-level API directly - if op == :atomic_max! - @eval function $test_name(counter) - ptr = OpenCL.pointer(counter, 1) - OpenCL.atomic_max!(ptr, one($T)) - return - end - else - @eval function $test_name(counter) - ptr = OpenCL.pointer(counter, 1) - OpenCL.atomic_min!(ptr, one($T)) - return - end - end - elseif op == :atomic_xchg! - # Exchange operation - use low-level API directly - @eval function $test_name(counter) - ptr = OpenCL.pointer(counter, 1) - OpenCL.atomic_xchg!(ptr, one($T)) - return - end - elseif op == :atomic_cas! - # CAS operation - use low-level API directly (it's called atomic_cmpxchg!) - @eval function $test_name(counter) - ptr = OpenCL.pointer(counter, 1) - OpenCL.atomic_cmpxchg!(ptr, $T(0), one($T)) - return - end - else - error("Unknown operation: $op") - end + # Bitwise operations (only valid for integers) + if kernel_func in [test_atomic_and, test_atomic_or, test_atomic_xor] && T <: AbstractFloat + continue + end + # Min/max operations (only supported for 32-bit integers in OpenCL) + if kernel_func in [test_atomic_min, test_atomic_max] && !(T in [Int32, UInt32]) + continue + end - # Try to compile the kernel - this is the key test - a = OpenCL.zeros(T) - OpenCL.fill!(a, init_val) - kernel_func = @eval $test_name - OpenCL.@opencl global_size=1000 kernel_func(a) - result_val = Array(a)[1] - @test typeof(result_val) == T - @test result_val == T(expected_val) + if T <: Integer + init_val %= T + expected_val %= T end + + a = OpenCL.fill(T(init_val)) + @opencl global_size=1000 kernel_func(a) + result_val = OpenCL.@allowscalar a[] + @test result_val === T(expected_val) end From e12f093219ce39b06e5b5b7ca4107cadc2bd66ac Mon Sep 17 00:00:00 2001 From: Simeon David Schaub Date: Thu, 27 Nov 2025 13:43:23 +0100 Subject: [PATCH 9/9] bump SPIRVIntrinsics version --- lib/intrinsics/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/intrinsics/Project.toml b/lib/intrinsics/Project.toml index ead792b9..338bb329 100644 --- a/lib/intrinsics/Project.toml +++ b/lib/intrinsics/Project.toml @@ -1,7 +1,7 @@ name = "SPIRVIntrinsics" uuid = "71d1d633-e7e8-4a92-83a1-de8814b09ba8" authors = ["Tim Besard "] -version = "0.5.5" +version = "0.5.6" [deps] ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04"