From 4559355399cd8da14de09965378a27b3ddf89e86 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Thu, 23 Apr 2026 10:59:22 +0200 Subject: [PATCH 1/7] Deprecate 3-arg library linking interface. Instead parse and link things lazily. --- src/GPUCompiler.jl | 2 ++ src/deprecated.jl | 23 ++++++++++++++++++++ src/driver.jl | 52 ++++++++++++++++++++++++++++++++++++---------- src/interface.jl | 5 +++-- src/rtlib.jl | 11 ---------- src/utils.jl | 4 ---- 6 files changed, 69 insertions(+), 28 deletions(-) create mode 100644 src/deprecated.jl diff --git a/src/GPUCompiler.jl b/src/GPUCompiler.jl index 72bf0855..2e646267 100644 --- a/src/GPUCompiler.jl +++ b/src/GPUCompiler.jl @@ -67,6 +67,8 @@ include("driver.jl") include("execution.jl") include("reflection.jl") +include("deprecated.jl") + include("precompile.jl") function __init__() diff --git a/src/deprecated.jl b/src/deprecated.jl new file mode 100644 index 00000000..a86b27d6 --- /dev/null +++ b/src/deprecated.jl @@ -0,0 +1,23 @@ +# Deprecations scheduled for removal in the next major release. + +function defs(mod::LLVM.Module) + Base.depwarn("`GPUCompiler.defs(mod)` is deprecated; inline `filter(f -> !isdeclaration(f), collect(functions(mod)))`.", + :defs) + filter(f -> !isdeclaration(f), collect(functions(mod))) +end + +function decls(mod::LLVM.Module) + Base.depwarn("`GPUCompiler.decls(mod)` is deprecated; inline `filter(f -> isdeclaration(f) && !LLVM.isintrinsic(f), collect(functions(mod)))`.", + :decls) + filter(f -> isdeclaration(f) && !LLVM.isintrinsic(f), collect(functions(mod))) +end + +link_library!(mod::LLVM.Module, lib::LLVM.Module) = link_library!(mod, [lib]) +function link_library!(mod::LLVM.Module, libs::Vector{LLVM.Module}) + Base.depwarn("`GPUCompiler.link_library!` is deprecated; call `LLVM.link!(mod, copy(lib))` directly, or `LLVM.link!(mod, lib; only_needed=true)` with a freshly-parsed library.", + :link_library!) + libs = [copy(lib) for lib in libs] + for lib in libs + link!(mod, lib) + end +end diff --git a/src/driver.jl b/src/driver.jl index 94179b56..73d1b35f 100644 --- a/src/driver.jl +++ b/src/driver.jl @@ -303,20 +303,50 @@ const __llvm_initialized = Ref(false) # load the runtime outside of a timing block (because it recurses into the compiler) if !uses_julia_runtime(job) runtime = load_runtime(job) - runtime_fns = LLVM.name.(defs(runtime)) - runtime_intrinsics = ["julia.gc_alloc_obj"] end @tracepoint "Library linking" begin - # target-specific libraries - undefined_fns = LLVM.name.(decls(ir)) - @tracepoint "target libraries" link_libraries!(job, ir, undefined_fns) - - # GPU run-time library - if !uses_julia_runtime(job) && any(fn -> fn in runtime_fns || - fn in runtime_intrinsics, - undefined_fns) - @tracepoint "runtime library" link_library!(ir, runtime) + # target-specific libraries. the legacy 3-arg override + # `link_libraries!(job, mod, undefined_fns)` is still honored with a + # depwarn; new overrides should target the 2-arg form. + @tracepoint "target libraries" begin + if hasmethod(link_libraries!, + Tuple{typeof(job), LLVM.Module, Vector{String}}) + Base.depwarn( + "3-arg `link_libraries!(job, mod, undefined_fns)` is deprecated; " * + "migrate your override to the 2-arg form `link_libraries!(job, mod)`. " * + "Instead of inspecting `undefined_fns` to decide what to link, " * + "parse the library lazily with `parse(LLVM.Module, bytes; lazy=true)` " * + "and link it with `LLVM.link!(mod, lib; only_needed=true)` — " * + "the linker will then materialize only the referenced symbols.", + :link_libraries!) + undefined_fns = [LLVM.name(f) for f in functions(ir) + if isdeclaration(f) && !LLVM.isintrinsic(f)] + link_libraries!(job, ir, undefined_fns) + else + link_libraries!(job, ir) + end + end + + # GPU run-time library: link if any of `ir`'s undefined functions are + # defined in the runtime, or if `julia.gc_alloc_obj` is present (which + # lower_gc_frame! rewrites into a `gc_pool_alloc` call later on) + if !uses_julia_runtime(job) + runtime_fns = Set{String}() + for f in functions(runtime) + isdeclaration(f) || push!(runtime_fns, LLVM.name(f)) + end + need_runtime = haskey(functions(ir), "julia.gc_alloc_obj") || + any(f -> isdeclaration(f) && !LLVM.isintrinsic(f) && + LLVM.name(f) in runtime_fns, + functions(ir)) + if need_runtime + # `load_runtime` returns a freshly-parsed module, so linking is + # destructive but safe — no defensive copy needed + @tracepoint "runtime library" link!(ir, runtime) + else + dispose(runtime) + end end end end diff --git a/src/interface.jl b/src/interface.jl index d7eb3406..1bace4b7 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -360,8 +360,9 @@ prepare_job!(@nospecialize(job::CompilerJob)) = return # early extension point used to link-in external bitcode files. # this is typically used by downstream packages to link vendor libraries. -link_libraries!(@nospecialize(job::CompilerJob), mod::LLVM.Module, - undefined_fns::Vector{String}) = return +# the legacy 3-arg form (taking an `undefined_fns::Vector{String}`) is still +# detected and called by the driver for back-compat, with a depwarn. +link_libraries!(@nospecialize(job::CompilerJob), mod::LLVM.Module) = return # finalization of the module, before deferred codegen and optimization finish_module!(@nospecialize(job::CompilerJob), mod::LLVM.Module, entry::LLVM.Function) = diff --git a/src/rtlib.jl b/src/rtlib.jl index 0ec50df1..e52af8c0 100644 --- a/src/rtlib.jl +++ b/src/rtlib.jl @@ -1,16 +1,5 @@ # compiler support for working with run-time libraries -link_library!(mod::LLVM.Module, lib::LLVM.Module) = link_library!(mod, [lib]) -function link_library!(mod::LLVM.Module, libs::Vector{LLVM.Module}) - # linking is destructive, so copy the libraries - libs = [copy(lib) for lib in libs] - - for lib in libs - link!(mod, lib) - end -end - - # # GPU run-time library # diff --git a/src/utils.jl b/src/utils.jl index 8be6a4b4..84aac2f1 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,7 +1,3 @@ -defs(mod::LLVM.Module) = filter(f -> !isdeclaration(f), collect(functions(mod))) -decls(mod::LLVM.Module) = filter(f -> isdeclaration(f) && !LLVM.isintrinsic(f), - collect(functions(mod))) - ## debug verification should_verify() = ccall(:jl_is_debugbuild, Cint, ()) == 1 || From 509dd3a35f46b67b42c2934b7a724a74e78f34dd Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Thu, 23 Apr 2026 11:18:42 +0200 Subject: [PATCH 2/7] Lazily parse and link the run-time library. Use `parse(LLVM.Module, bytes; lazy=true)` when reading the cached runtime bitcode, and link it with `only_needed=true` so the LLVM linker materializes just the functions `ir` references. This replaces the pre-scan over runtime function names and the conditional `link!`/`dispose` branch with a single unconditional call. `lower_gc_frame!` introduces `gc_pool_alloc` calls _after_ linking, so pre-declare it in `ir` when `julia.gc_alloc_obj` is present, ensuring the linker still picks the definition up. Co-Authored-By: Claude Opus 4.7 (1M context) --- Project.toml | 2 +- src/driver.jl | 27 ++++++++++----------------- src/rtlib.jl | 8 +++++--- 3 files changed, 16 insertions(+), 21 deletions(-) diff --git a/Project.toml b/Project.toml index c42cf446..267997d5 100644 --- a/Project.toml +++ b/Project.toml @@ -23,7 +23,7 @@ UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" [compat] ExprTools = "0.1" InteractiveUtils = "1" -LLVM = "9.5" +LLVM = "9.6" Libdl = "1" Logging = "1" PrecompileTools = "1" diff --git a/src/driver.jl b/src/driver.jl index 73d1b35f..c8b235eb 100644 --- a/src/driver.jl +++ b/src/driver.jl @@ -328,25 +328,18 @@ const __llvm_initialized = Ref(false) end end - # GPU run-time library: link if any of `ir`'s undefined functions are - # defined in the runtime, or if `julia.gc_alloc_obj` is present (which - # lower_gc_frame! rewrites into a `gc_pool_alloc` call later on) + # GPU run-time library. `load_runtime` returns a lazily-parsed module, so + # `only_needed=true` linking only materializes functions that `ir` references. + # `lower_gc_frame!` rewrites `julia.gc_alloc_obj` into `gc_pool_alloc` _after_ + # linking, so pre-declare `gc_pool_alloc` in `ir` to make the linker pick it up. if !uses_julia_runtime(job) - runtime_fns = Set{String}() - for f in functions(runtime) - isdeclaration(f) || push!(runtime_fns, LLVM.name(f)) - end - need_runtime = haskey(functions(ir), "julia.gc_alloc_obj") || - any(f -> isdeclaration(f) && !LLVM.isintrinsic(f) && - LLVM.name(f) in runtime_fns, - functions(ir)) - if need_runtime - # `load_runtime` returns a freshly-parsed module, so linking is - # destructive but safe — no defensive copy needed - @tracepoint "runtime library" link!(ir, runtime) - else - dispose(runtime) + if haskey(functions(ir), "julia.gc_alloc_obj") + rt = Runtime.get(:gc_pool_alloc) + if !haskey(functions(ir), rt.llvm_name) + LLVM.Function(ir, rt.llvm_name, convert(LLVM.FunctionType, rt)) + end end + @tracepoint "runtime library" link!(ir, runtime; only_needed=true) end end end diff --git a/src/rtlib.jl b/src/rtlib.jl index e52af8c0..dee5f352 100644 --- a/src/rtlib.jl +++ b/src/rtlib.jl @@ -133,14 +133,16 @@ const runtime_cache = Dict{String, Vector{UInt8}}() name = "runtime_$(slug).bc" path = joinpath(compile_cache, name) - # cache the runtime library on disk and in memory + # cache the runtime library on disk and in memory. the bytes are kept alive by + # `runtime_cache`, so we can parse them lazily and let the linker materialize only + # the functions that end up being referenced. lib = if haskey(runtime_cache, slug) - parse(LLVM.Module, runtime_cache[slug]) + parse(LLVM.Module, runtime_cache[slug]; lazy=true) elseif ispath(path) runtime_cache[slug] = open(path) do io read(io) end - parse(LLVM.Module, runtime_cache[slug]) + parse(LLVM.Module, runtime_cache[slug]; lazy=true) end if lib === nothing From 5dab6d06601fcb13c8adf468b0d91d4755353706 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Thu, 23 Apr 2026 13:43:51 +0200 Subject: [PATCH 3/7] Improve comments. --- src/driver.jl | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/driver.jl b/src/driver.jl index c8b235eb..f2499e8d 100644 --- a/src/driver.jl +++ b/src/driver.jl @@ -257,7 +257,7 @@ const __llvm_initialized = Ref(false) merge!(compiled, dyn_meta.compiled) if haskey(dyn_meta, :gv_to_value) merge!(gv_to_value, dyn_meta.gv_to_value) - end + end @assert context(dyn_ir) == context(ir) link!(ir, dyn_ir) changed = true @@ -306,10 +306,9 @@ const __llvm_initialized = Ref(false) end @tracepoint "Library linking" begin - # target-specific libraries. the legacy 3-arg override - # `link_libraries!(job, mod, undefined_fns)` is still honored with a - # depwarn; new overrides should target the 2-arg form. + # target-specific libraries @tracepoint "target libraries" begin + # 3-arg version has been deprecated if hasmethod(link_libraries!, Tuple{typeof(job), LLVM.Module, Vector{String}}) Base.depwarn( @@ -328,17 +327,17 @@ const __llvm_initialized = Ref(false) end end - # GPU run-time library. `load_runtime` returns a lazily-parsed module, so - # `only_needed=true` linking only materializes functions that `ir` references. - # `lower_gc_frame!` rewrites `julia.gc_alloc_obj` into `gc_pool_alloc` _after_ - # linking, so pre-declare `gc_pool_alloc` in `ir` to make the linker pick it up. + # GPU run-time library if !uses_julia_runtime(job) + # Calls to `gc_pool_alloc` are inserted after linking, so spoof + # it here so that lazy linking pulls it in, if needed. if haskey(functions(ir), "julia.gc_alloc_obj") rt = Runtime.get(:gc_pool_alloc) if !haskey(functions(ir), rt.llvm_name) LLVM.Function(ir, rt.llvm_name, convert(LLVM.FunctionType, rt)) end end + @tracepoint "runtime library" link!(ir, runtime; only_needed=true) end end From dfd30c6d2ff0925199a8d1a4e497864404c3eb57 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Fri, 24 Apr 2026 08:16:43 +0200 Subject: [PATCH 4/7] Restore 3-arg fallback. --- src/deprecated.jl | 14 ++++++++++++++ src/driver.jl | 4 +--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/deprecated.jl b/src/deprecated.jl index a86b27d6..0209c49c 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -21,3 +21,17 @@ function link_library!(mod::LLVM.Module, libs::Vector{LLVM.Module}) link!(mod, lib) end end + +# no-op 3-arg fallback so downstream overrides that chain via +# `invoke(GPUCompiler.link_libraries!, Tuple{CompilerJob, Module, +# Vector{String}}, ...)` still resolve. +link_libraries!(@nospecialize(job::CompilerJob), mod::LLVM.Module, + undefined_fns::Vector{String}) = return + +# `true` when a downstream package has defined a 3-arg `link_libraries!` +# override for `job`, i.e. the dispatched method isn't our fallback above. +function has_legacy_link_libraries(@nospecialize(job::CompilerJob)) + m = which(link_libraries!, + Tuple{typeof(job), LLVM.Module, Vector{String}}) + return m.module !== @__MODULE__ +end diff --git a/src/driver.jl b/src/driver.jl index f2499e8d..089adae2 100644 --- a/src/driver.jl +++ b/src/driver.jl @@ -308,9 +308,7 @@ const __llvm_initialized = Ref(false) @tracepoint "Library linking" begin # target-specific libraries @tracepoint "target libraries" begin - # 3-arg version has been deprecated - if hasmethod(link_libraries!, - Tuple{typeof(job), LLVM.Module, Vector{String}}) + if has_legacy_link_libraries(job) Base.depwarn( "3-arg `link_libraries!(job, mod, undefined_fns)` is deprecated; " * "migrate your override to the 2-arg form `link_libraries!(job, mod)`. " * From c0cc414432c7221a9618cb907aaf583a92f03663 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Fri, 24 Apr 2026 08:45:20 +0200 Subject: [PATCH 5/7] More hacks for Enzyme. --- src/deprecated.jl | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/deprecated.jl b/src/deprecated.jl index 0209c49c..e1d2af7d 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -30,8 +30,16 @@ link_libraries!(@nospecialize(job::CompilerJob), mod::LLVM.Module, # `true` when a downstream package has defined a 3-arg `link_libraries!` # override for `job`, i.e. the dispatched method isn't our fallback above. +# +# Uses the same `jl_gf_invoke_lookup` path as `Core._hasmethod` rather than +# `which`, so it's safe to call from generated-function-adjacent contexts +# where `Base.get_world_counter()` returns `typemax(UInt)` and reflection +# queries like `which` / `methods` fail (see JuliaLang/julia#48611). +# All this because Enzyme.jl calls GPUCompiler.jl from a generated function. function has_legacy_link_libraries(@nospecialize(job::CompilerJob)) - m = which(link_libraries!, - Tuple{typeof(job), LLVM.Module, Vector{String}}) - return m.module !== @__MODULE__ + tt = Tuple{typeof(link_libraries!), typeof(job), + LLVM.Module, Vector{String}} + world = ccall(:jl_get_tls_world_age, UInt, ()) + m = ccall(:jl_gf_invoke_lookup, Any, (Any, Any, UInt), tt, nothing, world) + return m !== nothing && (m::Method).module !== @__MODULE__ end From fd74bd5a8a7e71e01743c88e2bba19858dedf872 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Fri, 24 Apr 2026 08:48:26 +0200 Subject: [PATCH 6/7] Tweak comment. [ci skip] --- src/interface.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/interface.jl b/src/interface.jl index 1bace4b7..46389621 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -360,8 +360,6 @@ prepare_job!(@nospecialize(job::CompilerJob)) = return # early extension point used to link-in external bitcode files. # this is typically used by downstream packages to link vendor libraries. -# the legacy 3-arg form (taking an `undefined_fns::Vector{String}`) is still -# detected and called by the driver for back-compat, with a depwarn. link_libraries!(@nospecialize(job::CompilerJob), mod::LLVM.Module) = return # finalization of the module, before deferred codegen and optimization From 5e9195e00746268ba58addad44f2a6e9bc0bb4c0 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Fri, 24 Apr 2026 09:04:49 +0200 Subject: [PATCH 7/7] Get rid of the runtime cache. --- src/rtlib.jl | 70 +++++++++++++++++----------------------------------- 1 file changed, 23 insertions(+), 47 deletions(-) diff --git a/src/rtlib.jl b/src/rtlib.jl index dee5f352..042d76db 100644 --- a/src/rtlib.jl +++ b/src/rtlib.jl @@ -115,62 +115,38 @@ function build_runtime(@nospecialize(job::CompilerJob)) mod end -const runtime_lock = ReentrantLock() - -const runtime_cache = Dict{String, Vector{UInt8}}() - @locked function load_runtime(@nospecialize(job::CompilerJob)) global compile_cache if compile_cache === nothing # during precompilation return build_runtime(job) end - lock(runtime_lock) do - slug = runtime_slug(job) - if !supports_typed_pointers(context()) - slug *= "-opaque" - end - name = "runtime_$(slug).bc" - path = joinpath(compile_cache, name) - - # cache the runtime library on disk and in memory. the bytes are kept alive by - # `runtime_cache`, so we can parse them lazily and let the linker materialize only - # the functions that end up being referenced. - lib = if haskey(runtime_cache, slug) - parse(LLVM.Module, runtime_cache[slug]; lazy=true) - elseif ispath(path) - runtime_cache[slug] = open(path) do io - read(io) - end - parse(LLVM.Module, runtime_cache[slug]; lazy=true) - end - - if lib === nothing - @debug "Building the GPU runtime library at $path" - mkpath(compile_cache) - lib = build_runtime(job) - - # atomic write to disk - temp_path, io = mktemp(dirname(path); cleanup=false) - write(io, lib) - close(io) - @static if VERSION >= v"1.12.0-DEV.1023" - mv(temp_path, path; force=true) - else - Base.rename(temp_path, path, force=true) - end + slug = runtime_slug(job) + if !supports_typed_pointers(context()) + slug *= "-opaque" + end + name = "runtime_$(slug).bc" + path = joinpath(compile_cache, name) + + if !ispath(path) + @debug "Building the GPU runtime library at $path" + mkpath(compile_cache) + lib = build_runtime(job) + + # atomic write to disk + temp_path, io = mktemp(dirname(path); cleanup=false) + write(io, lib) + close(io) + @static if VERSION >= v"1.12.0-DEV.1023" + mv(temp_path, path; force=true) + else + Base.rename(temp_path, path, force=true) end - - return lib end + + return parse(LLVM.Module, MemoryBufferFile(path); lazy=true) end # remove the existing cache # NOTE: call this function from global scope, so any change triggers recompilation. -function reset_runtime() - lock(runtime_lock) do - rm(compile_cache; recursive=true, force=true) - end - - return -end +reset_runtime() = rm(compile_cache; recursive=true, force=true)