From 6144837de85725b0c58ba5a22b6cc4005ca55c99 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 19 May 2026 20:58:07 +0200 Subject: [PATCH 1/3] PTX: Adjust child-call test for single-line emission on LLVM 21. --- test/ptx.jl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/ptx.jl b/test/ptx.jl index 1bda96ef..dd89d61a 100644 --- a/test/ptx.jl +++ b/test/ptx.jl @@ -188,10 +188,13 @@ if :NVPTX in LLVM.backends() end end + # the assembler emits `call.uni` and the callee name on the same line in + # LLVM 21+, but on separate lines on older releases. @test @filecheck begin @check_label ".visible .func {{(julia|j)_parent[0-9_]*}}" @check "call.uni" - @check_next "{{(julia|j)_child_}}" + @check_same cond=(LLVM.version() >= v"21") "{{(julia|j)_child_}}" + @check_next cond=(LLVM.version() < v"21") "{{(julia|j)_child_}}" PTX.code_native(mod.parent, Tuple{Int64}) end end From d29798016bed977e67cf119f5b0d36ea884239bd Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 19 May 2026 20:58:16 +0200 Subject: [PATCH 2/3] PTX: Emit kernel property annotations as function attributes on LLVM 21+. The NVPTX back-end on LLVM 21 dropped its dependence on the legacy nvvm.annotations metadata for maxntid/reqntid/minctasm/maxnreg; the asm printer now reads function-level attributes that LLVM auto-upgrades the annotations into at IR parse time. Modules built in-memory don't go through that auto-upgrade, so emit the attributes ourselves on LLVM 21+. Also move the metadata emission ahead of optimization so the AnnotationCache lookups done by NVVMIntrRangePass on older releases don't latch onto a stale empty entry. --- src/ptx.jl | 98 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 59 insertions(+), 39 deletions(-) diff --git a/src/ptx.jl b/src/ptx.jl index bd824642..aac856b7 100644 --- a/src/ptx.jl +++ b/src/ptx.jl @@ -164,6 +164,65 @@ function finish_module!(@nospecialize(job::CompilerJob{PTXCompilerTarget}), if job.config.kernel # work around bad byval codegen (JuliaGPU/GPUCompiler.jl#92) entry = lower_byval(job, mod, entry) + + # emit kernel property annotations into the module. these have to be in + # place before optimization runs: LLVM's NVPTX target machine registers a + # PipelineStart EP callback that schedules NVVMIntrRangePass, which calls + # `getMaxNTID` on every function. That populates a module-keyed + # `AnnotationCache` entry (empty, because `nvvm.annotations` isn't there + # yet), and subsequent lookups by the asm printer hit the stale empty + # entry instead of re-reading the metadata. + annotations = Metadata[entry] + + ## kernel metadata + append!(annotations, [MDString("kernel"), + ConstantInt(Int32(1))]) + + ## expected CTA sizes + if job.config.target.minthreads !== nothing + bounds = ntuple(i -> i <= length(job.config.target.minthreads) ? + job.config.target.minthreads[i] : 1, 3) + for (bound, name) in zip(bounds, (:x, :y, :z)) + append!(annotations, [MDString("reqntid$name"), + ConstantInt(Int32(bound))]) + end + if LLVM.version() >= v"21" + push!(function_attributes(entry), + StringAttribute("nvvm.reqntid", join(bounds, ","))) + end + end + if job.config.target.maxthreads !== nothing + bounds = ntuple(i -> i <= length(job.config.target.maxthreads) ? + job.config.target.maxthreads[i] : 1, 3) + for (bound, name) in zip(bounds, (:x, :y, :z)) + append!(annotations, [MDString("maxntid$name"), + ConstantInt(Int32(bound))]) + end + if LLVM.version() >= v"21" + push!(function_attributes(entry), + StringAttribute("nvvm.maxntid", join(bounds, ","))) + end + end + + if job.config.target.blocks_per_sm !== nothing + append!(annotations, [MDString("minctasm"), + ConstantInt(Int32(job.config.target.blocks_per_sm))]) + if LLVM.version() >= v"21" + push!(function_attributes(entry), + StringAttribute("nvvm.minctasm", string(job.config.target.blocks_per_sm))) + end + end + + if job.config.target.maxregs !== nothing + append!(annotations, [MDString("maxnreg"), + ConstantInt(Int32(job.config.target.maxregs))]) + if LLVM.version() >= v"21" + push!(function_attributes(entry), + StringAttribute("nvvm.maxnreg", string(job.config.target.maxregs))) + end + end + + push!(metadata(mod)["nvvm.annotations"], MDNode(annotations)) end # we emit properties (of the device and ptx isa) as private global constants, @@ -227,45 +286,6 @@ function finish_ir!(@nospecialize(job::CompilerJob{PTXCompilerTarget}), end end - if job.config.kernel - # add metadata annotations for the assembler to the module - - # property annotations - annotations = Metadata[entry] - - ## kernel metadata - append!(annotations, [MDString("kernel"), - ConstantInt(Int32(1))]) - - ## expected CTA sizes - if job.config.target.minthreads !== nothing - for (dim, name) in enumerate([:x, :y, :z]) - bound = dim <= length(job.config.target.minthreads) ? job.config.target.minthreads[dim] : 1 - append!(annotations, [MDString("reqntid$name"), - ConstantInt(Int32(bound))]) - end - end - if job.config.target.maxthreads !== nothing - for (dim, name) in enumerate([:x, :y, :z]) - bound = dim <= length(job.config.target.maxthreads) ? job.config.target.maxthreads[dim] : 1 - append!(annotations, [MDString("maxntid$name"), - ConstantInt(Int32(bound))]) - end - end - - if job.config.target.blocks_per_sm !== nothing - append!(annotations, [MDString("minctasm"), - ConstantInt(Int32(job.config.target.blocks_per_sm))]) - end - - if job.config.target.maxregs !== nothing - append!(annotations, [MDString("maxnreg"), - ConstantInt(Int32(job.config.target.maxregs))]) - end - - push!(metadata(mod)["nvvm.annotations"], MDNode(annotations)) - end - return entry end From 8d5e172a1806129431af300c088c56164e90b15d Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Wed, 20 May 2026 06:41:41 +0200 Subject: [PATCH 3/3] Use captures(none) on LLVM 21+ in place of the nocapture attribute. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LLVM 21 (commit 29441e4f5fa5, llvmorg-21.1.0) converted the `nocapture` enum attribute into an integer-valued `captures(N)` IntAttr, and the `nocapture` name is no longer registered. The C API's LLVMGetEnumAttributeKindForName("nocapture") therefore returns 0 (Attribute::None) on LLVM 21, and `EnumAttribute("nocapture", 0)` silently produces a None-kinded attribute. When that attribute reaches the LLVM IR verifier — in this repo via emit_llvm()'s `optimize!` call on a Metal kernel with a by-reference scalar argument — `Verifier::verifyParameterAttrs()` calls `Attribute::canUseAsParamAttr(0)`, which does `Index = Kind - 1` and dereferences `AttrPropTable[(unsigned)-1]`. That OOB read manifested on nightly CI as a silent metal worker SIGSEGV. Emit `captures(none)` (value 0 == `CaptureInfo::none()`) on LLVM 21+, keep `nocapture` on older releases. Same change in irgen.jl's kernel-state parameter setup prevents the equivalent crash for backends that take a state pointer. --- src/irgen.jl | 7 +++++-- src/metal.jl | 8 ++++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/irgen.jl b/src/irgen.jl index 744904e5..8282becd 100644 --- a/src/irgen.jl +++ b/src/irgen.jl @@ -901,8 +901,11 @@ function kernel_state_to_reference!(@nospecialize(job::CompilerJob), mod::LLVM.M # set the attributes for the state pointer parameter attrs = parameter_attributes(new_f, 1) - # the pointer itself cannot be captured since we immediately load from it - push!(attrs, EnumAttribute("nocapture", 0)) + # the pointer itself cannot be captured since we immediately load from it. + # `nocapture` was replaced by `captures(none)` (an integer-valued IntAttr, + # value 0 == CaptureInfo::none()) in LLVM 21. + push!(attrs, LLVM.version() >= v"21" ? EnumAttribute("captures", 0) + : EnumAttribute("nocapture", 0)) # each kernel state is separate push!(attrs, EnumAttribute("noalias", 0)) # the state is read-only diff --git a/src/metal.jl b/src/metal.jl index bd4f8e66..cc192294 100644 --- a/src/metal.jl +++ b/src/metal.jl @@ -540,8 +540,12 @@ function pass_by_reference!(@nospecialize(job::CompilerJob), mod::LLVM.Module, f if bits_as_reference[i] # add appropriate attributes # TODO: other attributes (nonnull, readonly, align, dereferenceable)? - ## we've just emitted a load, so the pointer itself cannot be captured - push!(parameter_attributes(new_f, i), EnumAttribute("nocapture", 0)) + ## we've just emitted a load, so the pointer itself cannot be captured. + ## `nocapture` was replaced by `captures(none)` in LLVM 21 (an + ## integer-valued IntAttr, value 0 == CaptureInfo::none()). + push!(parameter_attributes(new_f, i), + LLVM.version() >= v"21" ? EnumAttribute("captures", 0) + : EnumAttribute("nocapture", 0)) ## Metal.jl emits separate buffers for each scalar argument push!(parameter_attributes(new_f, i), EnumAttribute("noalias", 0)) end