diff --git a/src/irgen.jl b/src/irgen.jl index 744904e5..8282becd 100644 --- a/src/irgen.jl +++ b/src/irgen.jl @@ -901,8 +901,11 @@ function kernel_state_to_reference!(@nospecialize(job::CompilerJob), mod::LLVM.M # set the attributes for the state pointer parameter attrs = parameter_attributes(new_f, 1) - # the pointer itself cannot be captured since we immediately load from it - push!(attrs, EnumAttribute("nocapture", 0)) + # the pointer itself cannot be captured since we immediately load from it. + # `nocapture` was replaced by `captures(none)` (an integer-valued IntAttr, + # value 0 == CaptureInfo::none()) in LLVM 21. + push!(attrs, LLVM.version() >= v"21" ? EnumAttribute("captures", 0) + : EnumAttribute("nocapture", 0)) # each kernel state is separate push!(attrs, EnumAttribute("noalias", 0)) # the state is read-only diff --git a/src/metal.jl b/src/metal.jl index bd4f8e66..cc192294 100644 --- a/src/metal.jl +++ b/src/metal.jl @@ -540,8 +540,12 @@ function pass_by_reference!(@nospecialize(job::CompilerJob), mod::LLVM.Module, f if bits_as_reference[i] # add appropriate attributes # TODO: other attributes (nonnull, readonly, align, dereferenceable)? - ## we've just emitted a load, so the pointer itself cannot be captured - push!(parameter_attributes(new_f, i), EnumAttribute("nocapture", 0)) + ## we've just emitted a load, so the pointer itself cannot be captured. + ## `nocapture` was replaced by `captures(none)` in LLVM 21 (an + ## integer-valued IntAttr, value 0 == CaptureInfo::none()). + push!(parameter_attributes(new_f, i), + LLVM.version() >= v"21" ? EnumAttribute("captures", 0) + : EnumAttribute("nocapture", 0)) ## Metal.jl emits separate buffers for each scalar argument push!(parameter_attributes(new_f, i), EnumAttribute("noalias", 0)) end diff --git a/src/ptx.jl b/src/ptx.jl index bd824642..aac856b7 100644 --- a/src/ptx.jl +++ b/src/ptx.jl @@ -164,6 +164,65 @@ function finish_module!(@nospecialize(job::CompilerJob{PTXCompilerTarget}), if job.config.kernel # work around bad byval codegen (JuliaGPU/GPUCompiler.jl#92) entry = lower_byval(job, mod, entry) + + # emit kernel property annotations into the module. these have to be in + # place before optimization runs: LLVM's NVPTX target machine registers a + # PipelineStart EP callback that schedules NVVMIntrRangePass, which calls + # `getMaxNTID` on every function. That populates a module-keyed + # `AnnotationCache` entry (empty, because `nvvm.annotations` isn't there + # yet), and subsequent lookups by the asm printer hit the stale empty + # entry instead of re-reading the metadata. + annotations = Metadata[entry] + + ## kernel metadata + append!(annotations, [MDString("kernel"), + ConstantInt(Int32(1))]) + + ## expected CTA sizes + if job.config.target.minthreads !== nothing + bounds = ntuple(i -> i <= length(job.config.target.minthreads) ? + job.config.target.minthreads[i] : 1, 3) + for (bound, name) in zip(bounds, (:x, :y, :z)) + append!(annotations, [MDString("reqntid$name"), + ConstantInt(Int32(bound))]) + end + if LLVM.version() >= v"21" + push!(function_attributes(entry), + StringAttribute("nvvm.reqntid", join(bounds, ","))) + end + end + if job.config.target.maxthreads !== nothing + bounds = ntuple(i -> i <= length(job.config.target.maxthreads) ? + job.config.target.maxthreads[i] : 1, 3) + for (bound, name) in zip(bounds, (:x, :y, :z)) + append!(annotations, [MDString("maxntid$name"), + ConstantInt(Int32(bound))]) + end + if LLVM.version() >= v"21" + push!(function_attributes(entry), + StringAttribute("nvvm.maxntid", join(bounds, ","))) + end + end + + if job.config.target.blocks_per_sm !== nothing + append!(annotations, [MDString("minctasm"), + ConstantInt(Int32(job.config.target.blocks_per_sm))]) + if LLVM.version() >= v"21" + push!(function_attributes(entry), + StringAttribute("nvvm.minctasm", string(job.config.target.blocks_per_sm))) + end + end + + if job.config.target.maxregs !== nothing + append!(annotations, [MDString("maxnreg"), + ConstantInt(Int32(job.config.target.maxregs))]) + if LLVM.version() >= v"21" + push!(function_attributes(entry), + StringAttribute("nvvm.maxnreg", string(job.config.target.maxregs))) + end + end + + push!(metadata(mod)["nvvm.annotations"], MDNode(annotations)) end # we emit properties (of the device and ptx isa) as private global constants, @@ -227,45 +286,6 @@ function finish_ir!(@nospecialize(job::CompilerJob{PTXCompilerTarget}), end end - if job.config.kernel - # add metadata annotations for the assembler to the module - - # property annotations - annotations = Metadata[entry] - - ## kernel metadata - append!(annotations, [MDString("kernel"), - ConstantInt(Int32(1))]) - - ## expected CTA sizes - if job.config.target.minthreads !== nothing - for (dim, name) in enumerate([:x, :y, :z]) - bound = dim <= length(job.config.target.minthreads) ? job.config.target.minthreads[dim] : 1 - append!(annotations, [MDString("reqntid$name"), - ConstantInt(Int32(bound))]) - end - end - if job.config.target.maxthreads !== nothing - for (dim, name) in enumerate([:x, :y, :z]) - bound = dim <= length(job.config.target.maxthreads) ? job.config.target.maxthreads[dim] : 1 - append!(annotations, [MDString("maxntid$name"), - ConstantInt(Int32(bound))]) - end - end - - if job.config.target.blocks_per_sm !== nothing - append!(annotations, [MDString("minctasm"), - ConstantInt(Int32(job.config.target.blocks_per_sm))]) - end - - if job.config.target.maxregs !== nothing - append!(annotations, [MDString("maxnreg"), - ConstantInt(Int32(job.config.target.maxregs))]) - end - - push!(metadata(mod)["nvvm.annotations"], MDNode(annotations)) - end - return entry end diff --git a/test/ptx.jl b/test/ptx.jl index 1bda96ef..dd89d61a 100644 --- a/test/ptx.jl +++ b/test/ptx.jl @@ -188,10 +188,13 @@ if :NVPTX in LLVM.backends() end end + # the assembler emits `call.uni` and the callee name on the same line in + # LLVM 21+, but on separate lines on older releases. @test @filecheck begin @check_label ".visible .func {{(julia|j)_parent[0-9_]*}}" @check "call.uni" - @check_next "{{(julia|j)_child_}}" + @check_same cond=(LLVM.version() >= v"21") "{{(julia|j)_child_}}" + @check_next cond=(LLVM.version() < v"21") "{{(julia|j)_child_}}" PTX.code_native(mod.parent, Tuple{Int64}) end end