From 1900e8b173ca6eae25e8fdb5f195a748545f4363 Mon Sep 17 00:00:00 2001
From: Michel Schanen <michel.schanen@gmail.com>
Date: Fri, 31 Oct 2025 12:04:58 -0500
Subject: [PATCH 1/9] Add atomic float support

---
 lib/intrinsics/src/atomic.jl | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/lib/intrinsics/src/atomic.jl b/lib/intrinsics/src/atomic.jl
index 9bbbdbe6..a93f926e 100644
--- a/lib/intrinsics/src/atomic.jl
+++ b/lib/intrinsics/src/atomic.jl
@@ -3,13 +3,15 @@
 # provides atomic functions that rely on the OpenCL base atomics, as well as the
 # cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics extensions.
 
+const atomic_float_types = [Float32, Float64]
 const atomic_integer_types = [UInt32, Int32, UInt64, Int64]
 const atomic_memory_types = [AS.Workgroup, AS.CrossWorkgroup]
+const atomic_types = vcat(atomic_float_types, atomic_integer_types)
 
 
 # generically typed
 
-for gentype in atomic_integer_types, as in atomic_memory_types
+for gentype in atomic_types, as in atomic_memory_types
 @eval begin
 
 @device_function atomic_add!(p::LLVMPtr{$gentype,$as}, val::$gentype) =
@@ -45,15 +47,17 @@ for gentype in atomic_integer_types, as in atomic_memory_types
 @device_function atomic_xor!(p::LLVMPtr{$gentype,$as}, val::$gentype) =
     @builtin_ccall("atomic_xor", $gentype,
                    (LLVMPtr{$gentype,$as}, $gentype), p, val)
-
-@device_function atomic_xchg!(p::LLVMPtr{$gentype,$as}, val::$gentype) =
-    @builtin_ccall("atomic_xchg", $gentype,
-                   (LLVMPtr{$gentype,$as}, $gentype), p, val)
-
-@device_function atomic_cmpxchg!(p::LLVMPtr{$gentype,$as}, cmp::$gentype, val::$gentype) =
-    @builtin_ccall("atomic_cmpxchg", $gentype,
-                   (LLVMPtr{$gentype,$as}, $gentype, $gentype), p, cmp, val)
-
+end
+if gentype in atomic_integer_types
+    @eval begin
+    @device_function atomic_xchg!(p::LLVMPtr{$gentype,$as}, val::$gentype) =
+        @builtin_ccall("atomic_xchg", $gentype,
+                    (LLVMPtr{$gentype,$as}, $gentype), p, val)
+
+    @device_function atomic_cmpxchg!(p::LLVMPtr{$gentype,$as}, cmp::$gentype, val::$gentype) =
+        @builtin_ccall("atomic_cmpxchg", $gentype,
+                    (LLVMPtr{$gentype,$as}, $gentype, $gentype), p, cmp, val)
+    end
 end
 end
 

From 5a8199fddf274959c0b7a147458d27210df0651e Mon Sep 17 00:00:00 2001
From: Michel Schanen <Michel.Schanen@gmail.com>
Date: Tue, 18 Nov 2025 08:29:22 -0600
Subject: [PATCH 2/9] Add tests

---
 test/atomics.jl | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test/atomics.jl b/test/atomics.jl
index f46a535b..175e87a4 100644
--- a/test/atomics.jl
+++ b/test/atomics.jl
@@ -15,6 +15,11 @@ end
     end
 end
 
+@testset "atomic_add! ($T)" for T in [Float32, Float64]
+    # Float64 requires cl_khr_fp64 extension
+    if T == Float64 && !("cl_khr_fp64" in cl.device().extensions)
+        continue
+    end
     if "cl_ext_float_atomics" in cl.device().extensions
         function atomic_float_add(counter, val)
             @builtin_ccall(
@@ -39,3 +44,4 @@ end
     end
 
 end
+end

From c1b2f5fcfc11fdb46166f81b12b703fe7c1c2a90 Mon Sep 17 00:00:00 2001
From: Michel Schanen <Michel.Schanen@gmail.com>
Date: Wed, 19 Nov 2025 13:01:52 -0600
Subject: [PATCH 3/9] More tests

---
 test/atomics.jl | 49 +++++++++++++++++++++++++++++++++++--------------
 1 file changed, 35 insertions(+), 14 deletions(-)

diff --git a/test/atomics.jl b/test/atomics.jl
index 175e87a4..c6bd9a22 100644
--- a/test/atomics.jl
+++ b/test/atomics.jl
@@ -1,18 +1,39 @@
 using SPIRVIntrinsics: @builtin_ccall, @typed_ccall, LLVMPtr, known_intrinsics
 
+skip_int64(T) = sizeof(T) == 8 && T <: Integer && !("cl_khr_int64_extended_atomics" in cl.device().extensions)
+skip_float64(T) = T == Float64 && !("cl_khr_fp64" in cl.device().extensions)
+integer_types = [Int32, UInt32, Int64, UInt64]
+float_types = [Float32, Float64]
 @testset "atomics" begin
 
-function atomic_count(counter)
-    OpenCL.@atomic counter[] += 1
+function atomic_add_(counter, ::Val{T}) where T
+    OpenCL.@atomic counter[] += one(T)
     return
 end
 
-@testset "atomic_add! ($T)" for T in [Int32, UInt32, Int64, UInt64]
-    if sizeof(T) == 4 || "cl_khr_int64_extended_atomics" in cl.device().extensions
-        a = OpenCL.zeros(T)
-        @opencl global_size=1000 atomic_count(a)
-        @test OpenCL.@allowscalar a[] == 1000
+@testset "atomic_add! ($T)" for T in vcat(integer_types, float_types)
+    if skip_int64(T) || skip_float64(T)
+        continue
+    end
+    @show T
+    a = OpenCL.zeros(T)
+    @opencl global_size=1000 atomic_add_(a, Val(T))
+    @test OpenCL.@allowscalar a[] == T(1000)
+end
+
+function atomic_sub_(counter, ::Val{T}) where T
+    OpenCL.@atomic counter[] -= one(T)
+    return
+end
+
+@testset "atomic_sub! ($T)" for T in vcat(integer_types, float_types)
+    if skip_int64(T) || skip_float64(T)
+        continue
     end
+    @show T
+    a = T(1000.0)
+    @opencl global_size=1000 atomic_sub_(a, Val(T))
+    @test OpenCL.@allowscalar a[] == zero(T)
 end
 
 @testset "atomic_add! ($T)" for T in [Float32, Float64]
@@ -21,22 +42,22 @@ end
         continue
     end
     if "cl_ext_float_atomics" in cl.device().extensions
-        function atomic_float_add(counter, val)
+        @eval function atomic_float_add(counter, val::$T)
             @builtin_ccall(
-                "atomic_add", Float32,
-                (LLVMPtr{Float32, AS.CrossWorkgroup}, Float32),
+                "atomic_add", $T,
+                (LLVMPtr{$T, AS.CrossWorkgroup}, $T),
                 pointer(counter), val,
             )
             return
         end
 
         @testset "SPV_EXT_shader_atomic_float_add extension" begin
-            a = OpenCL.zeros(Float32)
-            @opencl global_size = 1000 extensions = ["SPV_EXT_shader_atomic_float_add"] atomic_float_add(a, 1.0f0)
-            @test OpenCL.@allowscalar a[] == 1000.0f0
+            a = OpenCL.zeros(T)
+            @opencl global_size = 1000 extensions = ["SPV_EXT_shader_atomic_float_add"] atomic_float_add(a, one(T))
+            @test OpenCL.@allowscalar a[] == T(1000.0)
 
             spv = sprint() do io
-                OpenCL.code_native(io, atomic_float_add, Tuple{CLDeviceArray{Float32, 0, 1}, Float32}; extensions = ["SPV_EXT_shader_atomic_float_add"])
+                OpenCL.code_native(io, atomic_float_add, Tuple{CLDeviceArray{T, 0, 1}, T}; extensions = ["SPV_EXT_shader_atomic_float_add"])
             end
             @test occursin("OpExtension \"SPV_EXT_shader_atomic_float_add\"", spv)
             @test occursin("OpAtomicFAddEXT", spv)

From a9a6373e28f05c1d1bc3677be0b6294917a1e029 Mon Sep 17 00:00:00 2001
From: Michel Schanen <Michel.Schanen@gmail.com>
Date: Thu, 20 Nov 2025 09:51:43 -0600
Subject: [PATCH 4/9] Compilation works

---
 test/atomics.jl | 132 ++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 106 insertions(+), 26 deletions(-)

diff --git a/test/atomics.jl b/test/atomics.jl
index c6bd9a22..d9a284e8 100644
--- a/test/atomics.jl
+++ b/test/atomics.jl
@@ -1,41 +1,121 @@
 using SPIRVIntrinsics: @builtin_ccall, @typed_ccall, LLVMPtr, known_intrinsics
 
-skip_int64(T) = sizeof(T) == 8 && T <: Integer && !("cl_khr_int64_extended_atomics" in cl.device().extensions)
-skip_float64(T) = T == Float64 && !("cl_khr_fp64" in cl.device().extensions)
+# Define the types to test
 integer_types = [Int32, UInt32, Int64, UInt64]
 float_types = [Float32, Float64]
+all_types = vcat(integer_types, float_types)
+
+dev = OpenCL.cl.device()
+# Define atomic operations to test
+atomic_operations = [
+    :atomic_add!,
+    :atomic_sub!,
+    :atomic_and!,
+    :atomic_or!,
+    :atomic_xor!,
+    :atomic_max!,
+    :atomic_min!,
+    :atomic_xchg!,
+    :atomic_cas!,
+]
 @testset "atomics" begin
+for op in atomic_operations
+    for T in all_types
+        # Skip Int64/UInt64 if not supported
+        if sizeof(T) == 8 && T <: Integer && !("cl_khr_int64_extended_atomics" in dev.extensions)
+            continue
+        end
 
-function atomic_add_(counter, ::Val{T}) where T
-    OpenCL.@atomic counter[] += one(T)
-    return
-end
+        # Skip Float64 if not supported
+        if T == Float64 && !("cl_khr_fp64" in dev.extensions)
+            continue
+        end
 
-@testset "atomic_add! ($T)" for T in vcat(integer_types, float_types)
-    if skip_int64(T) || skip_float64(T)
-        continue
-    end
-    @show T
-    a = OpenCL.zeros(T)
-    @opencl global_size=1000 atomic_add_(a, Val(T))
-    @test OpenCL.@allowscalar a[] == T(1000)
-end
+        # Bitwise operations (only valid for integers)
+        if op in [:atomic_and!, :atomic_or!, :atomic_xor!] && T <: AbstractFloat
+            continue
+        end
 
-function atomic_sub_(counter, ::Val{T}) where T
-    OpenCL.@atomic counter[] -= one(T)
-    return
-end
+        # Min/max operations (only supported for 32-bit integers in OpenCL)
+        if op in [:atomic_min!, :atomic_max!] && !(T in [Int32, UInt32])
+            continue
+        end
 
-@testset "atomic_sub! ($T)" for T in vcat(integer_types, float_types)
-    if skip_int64(T) || skip_float64(T)
-        continue
+        test_name = Symbol("test_", op, "_", T)
+
+        if op in [:atomic_add!, :atomic_sub!]
+            # Arithmetic operations
+            if op == :atomic_add!
+                @eval function $test_name(counter)
+                    OpenCL.@atomic counter[] += one($T)
+                    return
+                end
+            else
+                @eval function $test_name(counter)
+                    OpenCL.@atomic counter[] -= one($T)
+                    return
+                end
+            end
+        elseif op in [:atomic_and!, :atomic_or!, :atomic_xor!]
+            # Bitwise operations
+            if op == :atomic_and!
+                @eval function $test_name(counter)
+                    OpenCL.@atomic counter[] &= one($T)
+                    return
+                end
+            elseif op == :atomic_or!
+                @eval function $test_name(counter)
+                    OpenCL.@atomic counter[] |= one($T)
+                    return
+                end
+            else # xor
+                @eval function $test_name(counter)
+                    OpenCL.@atomic counter[] ⊻= one($T)
+                    return
+                end
+            end
+        elseif op in [:atomic_max!, :atomic_min!]
+            # Min/max operations - use low-level API directly
+            if op == :atomic_max!
+                @eval function $test_name(counter)
+                    ptr = OpenCL.pointer(counter, 1)
+                    OpenCL.atomic_max!(ptr, one($T))
+                    return
+                end
+            else
+                @eval function $test_name(counter)
+                    ptr = OpenCL.pointer(counter, 1)
+                    OpenCL.atomic_min!(ptr, one($T))
+                    return
+                end
+            end
+        elseif op == :atomic_xchg!
+            # Exchange operation - use low-level API directly
+            @eval function $test_name(counter)
+                ptr = OpenCL.pointer(counter, 1)
+                OpenCL.atomic_xchg!(ptr, one($T))
+                return
+            end
+        elseif op == :atomic_cas!
+            # CAS operation - use low-level API directly (it's called atomic_cmpxchg!)
+            @eval function $test_name(counter)
+                ptr = OpenCL.pointer(counter, 1)
+                OpenCL.atomic_cmpxchg!(ptr, $T(0), one($T))
+                return
+            end
+        else
+            error("Unknown operation: $op")
+        end
+
+
+        # Try to compile the kernel - this is the key test
+        a = OpenCL.zeros(T)
+        kernel_func = @eval $test_name
+        OpenCL.@opencl kernel_func(a)
     end
-    @show T
-    a = T(1000.0)
-    @opencl global_size=1000 atomic_sub_(a, Val(T))
-    @test OpenCL.@allowscalar a[] == zero(T)
 end
 
+
 @testset "atomic_add! ($T)" for T in [Float32, Float64]
     # Float64 requires cl_khr_fp64 extension
     if T == Float64 && !("cl_khr_fp64" in cl.device().extensions)

From 1fff39742050878669aa26f3e4d6ba0db1d51cf8 Mon Sep 17 00:00:00 2001
From: Michel Schanen <Michel.Schanen@gmail.com>
Date: Thu, 20 Nov 2025 10:08:50 -0600
Subject: [PATCH 5/9] Check results

---
 test/atomics.jl | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/test/atomics.jl b/test/atomics.jl
index d9a284e8..33929beb 100644
--- a/test/atomics.jl
+++ b/test/atomics.jl
@@ -6,20 +6,20 @@ float_types = [Float32, Float64]
 all_types = vcat(integer_types, float_types)
 
 dev = OpenCL.cl.device()
-# Define atomic operations to test
+# Define atomic operations to test, with init value and expected value
 atomic_operations = [
-    :atomic_add!,
-    :atomic_sub!,
-    :atomic_and!,
-    :atomic_or!,
-    :atomic_xor!,
-    :atomic_max!,
-    :atomic_min!,
-    :atomic_xchg!,
-    :atomic_cas!,
+    (:atomic_add!, 0, 1),
+    (:atomic_sub!, 1, 0),
+    (:atomic_and!, 3, 1),
+    (:atomic_or!, 2, 3),
+    (:atomic_xor!, 3, 2),
+    (:atomic_max!, 0, 1),
+    (:atomic_min!, 2, 1),
+    (:atomic_xchg!, 0, 1),
+    (:atomic_cas!, 0, 1),
 ]
 @testset "atomics" begin
-for op in atomic_operations
+for (op, init_val, expected_val) in atomic_operations
     for T in all_types
         # Skip Int64/UInt64 if not supported
         if sizeof(T) == 8 && T <: Integer && !("cl_khr_int64_extended_atomics" in dev.extensions)
@@ -110,8 +110,11 @@ for op in atomic_operations
 
         # Try to compile the kernel - this is the key test
         a = OpenCL.zeros(T)
+        OpenCL.fill!(a, init_val)
         kernel_func = @eval $test_name
         OpenCL.@opencl kernel_func(a)
+        result_val = Array(a)[1]
+        @test result_val == expected_val
     end
 end
 

From 3c58e28ccdc4a57920a9d9fece1902fad0dfcb9f Mon Sep 17 00:00:00 2001
From: Michel Schanen <Michel.Schanen@gmail.com>
Date: Thu, 20 Nov 2025 10:23:51 -0600
Subject: [PATCH 6/9] Using 1000 items and checking type

---
 test/atomics.jl | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/test/atomics.jl b/test/atomics.jl
index 33929beb..92d3fbbf 100644
--- a/test/atomics.jl
+++ b/test/atomics.jl
@@ -8,11 +8,11 @@ all_types = vcat(integer_types, float_types)
 dev = OpenCL.cl.device()
 # Define atomic operations to test, with init value and expected value
 atomic_operations = [
-    (:atomic_add!, 0, 1),
-    (:atomic_sub!, 1, 0),
+    (:atomic_add!, 0, 1000),
+    (:atomic_sub!, 1000, 0),
     (:atomic_and!, 3, 1),
-    (:atomic_or!, 2, 3),
-    (:atomic_xor!, 3, 2),
+    (:atomic_or!, 3, 3),
+    (:atomic_xor!, 3, 3),
     (:atomic_max!, 0, 1),
     (:atomic_min!, 2, 1),
     (:atomic_xchg!, 0, 1),
@@ -112,9 +112,10 @@ for (op, init_val, expected_val) in atomic_operations
         a = OpenCL.zeros(T)
         OpenCL.fill!(a, init_val)
         kernel_func = @eval $test_name
-        OpenCL.@opencl kernel_func(a)
+        OpenCL.@opencl global_size=1000 kernel_func(a)
         result_val = Array(a)[1]
-        @test result_val == expected_val
+        @test typeof(result_val) == T
+        @test result_val == T(expected_val)
     end
 end
 

From 58f3b276cce734e99c293ff5d8182bd6791692d0 Mon Sep 17 00:00:00 2001
From: Michel Schanen <Michel.Schanen@gmail.com>
Date: Thu, 20 Nov 2025 10:29:39 -0600
Subject: [PATCH 7/9] Fix

---
 test/atomics.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/atomics.jl b/test/atomics.jl
index 92d3fbbf..bcfaa145 100644
--- a/test/atomics.jl
+++ b/test/atomics.jl
@@ -6,8 +6,9 @@ float_types = [Float32, Float64]
 all_types = vcat(integer_types, float_types)
 
 dev = OpenCL.cl.device()
-# Define atomic operations to test, with init value and expected value
+# Define atomic operations to test
 atomic_operations = [
+    # op, init_val, expected_val
     (:atomic_add!, 0, 1000),
     (:atomic_sub!, 1000, 0),
     (:atomic_and!, 3, 1),

From 5a1a66bb98339f1d0e1a8072037bdb7f0952b46a Mon Sep 17 00:00:00 2001
From: Simeon David Schaub <simeon@schaub.rocks>
Date: Wed, 26 Nov 2025 14:03:12 +0100
Subject: [PATCH 8/9] avoid use of eval, improve some of the tests

---
 test/atomics.jl | 179 +++++++++++++++++++++---------------------------
 1 file changed, 77 insertions(+), 102 deletions(-)

diff --git a/test/atomics.jl b/test/atomics.jl
index bcfaa145..71fefa8b 100644
--- a/test/atomics.jl
+++ b/test/atomics.jl
@@ -6,118 +6,93 @@ float_types = [Float32, Float64]
 all_types = vcat(integer_types, float_types)
 
 dev = OpenCL.cl.device()
+
+# Arithmetic operations
+function test_atomic_add(counter::AbstractArray{T}) where T
+    OpenCL.@atomic counter[] += one(T)
+    return
+end
+function test_atomic_sub(counter::AbstractArray{T}) where T
+    OpenCL.@atomic counter[] -= one(T)
+    return
+end
+# Bitwise operations
+function test_atomic_and(counter::AbstractArray{T}) where T
+    OpenCL.@atomic counter[] &= ~(one(T) << (get_global_id() - 1))
+    return
+end
+function test_atomic_or(counter::AbstractArray{T}) where T
+    OpenCL.@atomic counter[] |= one(T) << (get_global_id() - 1)
+    return
+end
+function test_atomic_xor(counter::AbstractArray{T}) where T
+    OpenCL.@atomic counter[] ⊻= one(T) << ((get_global_id() - 1) % 32)
+    return
+end
+# Min/max operations - use low-level API directly
+function test_atomic_max(counter::AbstractArray{T}) where T
+    OpenCL.atomic_max!(pointer(counter), T(get_global_id()))
+    return
+end
+function test_atomic_min(counter::AbstractArray{T}) where T
+    OpenCL.atomic_min!(pointer(counter), T(get_global_id()))
+    return
+end
+# Exchange operation - use low-level API directly
+function test_atomic_xchg(counter::AbstractArray{T}) where T
+    OpenCL.atomic_xchg!(pointer(counter), one(T))
+    return
+end
+# Compare-and-swap operation - use low-level API directly
+function test_atomic_cas(counter::AbstractArray{T}) where T
+    OpenCL.atomic_cmpxchg!(pointer(counter), zero(T), one(T))
+    return
+end
+
 # Define atomic operations to test
 atomic_operations = [
     # op, init_val, expected_val
-    (:atomic_add!, 0, 1000),
-    (:atomic_sub!, 1000, 0),
-    (:atomic_and!, 3, 1),
-    (:atomic_or!, 3, 3),
-    (:atomic_xor!, 3, 3),
-    (:atomic_max!, 0, 1),
-    (:atomic_min!, 2, 1),
-    (:atomic_xchg!, 0, 1),
-    (:atomic_cas!, 0, 1),
+    (test_atomic_add, 0, 1000),
+    (test_atomic_sub, 1000, 0),
+    (test_atomic_and, typemax(UInt64), 0),
+    (test_atomic_or, 0, typemax(UInt64)),
+    (test_atomic_xor, 0, typemax(UInt32) << 8),
+    (test_atomic_max, 0, 1000),
+    (test_atomic_min, 1000, 1),
+    (test_atomic_xchg, 0, 1),
+    (test_atomic_cas, 0, 1),
 ]
 @testset "atomics" begin
-for (op, init_val, expected_val) in atomic_operations
-    for T in all_types
-        # Skip Int64/UInt64 if not supported
-        if sizeof(T) == 8 && T <: Integer && !("cl_khr_int64_extended_atomics" in dev.extensions)
-            continue
-        end
-
-        # Skip Float64 if not supported
-        if T == Float64 && !("cl_khr_fp64" in dev.extensions)
-            continue
-        end
-
-        # Bitwise operations (only valid for integers)
-        if op in [:atomic_and!, :atomic_or!, :atomic_xor!] && T <: AbstractFloat
-            continue
-        end
-
-        # Min/max operations (only supported for 32-bit integers in OpenCL)
-        if op in [:atomic_min!, :atomic_max!] && !(T in [Int32, UInt32])
-            continue
-        end
+@testset "$kernel_func - $T" for (kernel_func, init_val, expected_val) in atomic_operations, T in all_types
+    # Skip Int64/UInt64 if not supported
+    if sizeof(T) == 8 && T <: Integer && !("cl_khr_int64_extended_atomics" in dev.extensions)
+        continue
+    end
 
-        test_name = Symbol("test_", op, "_", T)
+    # Skip Float64 if not supported
+    if T == Float64 && !("cl_khr_fp64" in dev.extensions)
+        continue
+    end
 
-        if op in [:atomic_add!, :atomic_sub!]
-            # Arithmetic operations
-            if op == :atomic_add!
-                @eval function $test_name(counter)
-                    OpenCL.@atomic counter[] += one($T)
-                    return
-                end
-            else
-                @eval function $test_name(counter)
-                    OpenCL.@atomic counter[] -= one($T)
-                    return
-                end
-            end
-        elseif op in [:atomic_and!, :atomic_or!, :atomic_xor!]
-            # Bitwise operations
-            if op == :atomic_and!
-                @eval function $test_name(counter)
-                    OpenCL.@atomic counter[] &= one($T)
-                    return
-                end
-            elseif op == :atomic_or!
-                @eval function $test_name(counter)
-                    OpenCL.@atomic counter[] |= one($T)
-                    return
-                end
-            else # xor
-                @eval function $test_name(counter)
-                    OpenCL.@atomic counter[] ⊻= one($T)
-                    return
-                end
-            end
-        elseif op in [:atomic_max!, :atomic_min!]
-            # Min/max operations - use low-level API directly
-            if op == :atomic_max!
-                @eval function $test_name(counter)
-                    ptr = OpenCL.pointer(counter, 1)
-                    OpenCL.atomic_max!(ptr, one($T))
-                    return
-                end
-            else
-                @eval function $test_name(counter)
-                    ptr = OpenCL.pointer(counter, 1)
-                    OpenCL.atomic_min!(ptr, one($T))
-                    return
-                end
-            end
-        elseif op == :atomic_xchg!
-            # Exchange operation - use low-level API directly
-            @eval function $test_name(counter)
-                ptr = OpenCL.pointer(counter, 1)
-                OpenCL.atomic_xchg!(ptr, one($T))
-                return
-            end
-        elseif op == :atomic_cas!
-            # CAS operation - use low-level API directly (it's called atomic_cmpxchg!)
-            @eval function $test_name(counter)
-                ptr = OpenCL.pointer(counter, 1)
-                OpenCL.atomic_cmpxchg!(ptr, $T(0), one($T))
-                return
-            end
-        else
-            error("Unknown operation: $op")
-        end
+    # Bitwise operations (only valid for integers)
+    if kernel_func in [test_atomic_and, test_atomic_or, test_atomic_xor] && T <: AbstractFloat
+        continue
+    end
 
+    # Min/max operations (only supported for 32-bit integers in OpenCL)
+    if kernel_func in [test_atomic_min, test_atomic_max] && !(T in [Int32, UInt32])
+        continue
+    end
 
-        # Try to compile the kernel - this is the key test
-        a = OpenCL.zeros(T)
-        OpenCL.fill!(a, init_val)
-        kernel_func = @eval $test_name
-        OpenCL.@opencl global_size=1000 kernel_func(a)
-        result_val = Array(a)[1]
-        @test typeof(result_val) == T
-        @test result_val == T(expected_val)
+    if T <: Integer
+        init_val %= T
+        expected_val %= T
     end
+
+    a = OpenCL.fill(T(init_val))
+    @opencl global_size=1000 kernel_func(a)
+    result_val = OpenCL.@allowscalar a[]
+    @test result_val === T(expected_val)
 end
 
 

From e12f093219ce39b06e5b5b7ca4107cadc2bd66ac Mon Sep 17 00:00:00 2001
From: Simeon David Schaub <simeon@schaub.rocks>
Date: Thu, 27 Nov 2025 13:43:23 +0100
Subject: [PATCH 9/9] bump SPIRVIntrinsics version

---
 lib/intrinsics/Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/intrinsics/Project.toml b/lib/intrinsics/Project.toml
index ead792b9..338bb329 100644
--- a/lib/intrinsics/Project.toml
+++ b/lib/intrinsics/Project.toml
@@ -1,7 +1,7 @@
 name = "SPIRVIntrinsics"
 uuid = "71d1d633-e7e8-4a92-83a1-de8814b09ba8"
 authors = ["Tim Besard <tim.besard@gmail.com>"]
-version = "0.5.5"
+version = "0.5.6"
 
 [deps]
 ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04"