From 1dca9203e03929868c601f6855fcd231e6aa087b Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Tue, 5 May 2026 20:46:48 +0200 Subject: [PATCH] Revert "Add @device_function macro for AOT compilation (#749)" This reverts commit 1976f0dd8880003c4407666f348d48003a3e27fc. --- src/driver.jl | 1 + src/jlgen.jl | 9 +++++---- src/rtlib.jl | 1 - src/runtime.jl | 7 ++----- src/utils.jl | 26 -------------------------- test/utils.jl | 37 ------------------------------------- 6 files changed, 8 insertions(+), 73 deletions(-) diff --git a/src/driver.jl b/src/driver.jl index 9ebd5e3d..4f94b0bb 100644 --- a/src/driver.jl +++ b/src/driver.jl @@ -93,6 +93,7 @@ function compile_unhooked(output::Symbol, @nospecialize(job::CompilerJob); kwarg ## LLVM IR ir, ir_meta = emit_llvm(job) + if output == :llvm if job.config.strip @tracepoint "strip debug info" strip_debuginfo!(ir) diff --git a/src/jlgen.jl b/src/jlgen.jl index db3ab6e7..21216d79 100644 --- a/src/jlgen.jl +++ b/src/jlgen.jl @@ -293,6 +293,10 @@ end end # !HAS_INTEGRATED_CACHE +## method overrides + +Base.Experimental.@MethodTable(GLOBAL_METHOD_TABLE) + # Implements a priority lookup for method tables, where the first match in the stack get's returned. # An alternative to this would be to use a "Union" where we would query the parent method table and # do a most-specific match. @@ -489,10 +493,7 @@ CC.lock_mi_inference(interp::GPUInterpreter, mi::MethodInstance) = nothing CC.unlock_mi_inference(interp::GPUInterpreter, mi::MethodInstance) = nothing function CC.add_remark!(interp::GPUInterpreter, sv::CC.InferenceState, msg) - # NOTE: @safe_debug is disabled here because including logging/warning code causes - # CPU runtime functions (ccalls to Julia internals) to leak into the GPU IR, - # breaking AOT compilation. See PR #749 for details. - return nothing + @safe_debug "Inference remark during GPU compilation of $(sv.linfo): $msg" end CC.may_optimize(interp::GPUInterpreter) = true diff --git a/src/rtlib.jl b/src/rtlib.jl index 042d76db..12fc321b 100644 --- a/src/rtlib.jl +++ b/src/rtlib.jl @@ -66,7 +66,6 @@ function emit_function!(mod, config::CompilerConfig, f, method) new_mod, meta = compile_unhooked(:llvm, CompilerJob(source, config)) ft = function_type(meta.entry) expected_ft = convert(LLVM.FunctionType, method) - if return_type(ft) != return_type(expected_ft) error("Invalid return type for runtime function '$(method.name)': expected $(return_type(expected_ft)), got $(return_type(ft))") end diff --git a/src/runtime.jl b/src/runtime.jl index 2f7312e1..2b11d915 100644 --- a/src/runtime.jl +++ b/src/runtime.jl @@ -71,7 +71,6 @@ function compile(def, return_type, types, llvm_return_type=nothing, llvm_types=n meth = RuntimeMethodInstance(def, return_type, types, name, llvm_return_type, llvm_types, llvm_name) - if haskey(methods, name) error("Runtime function $name has already been registered!") end @@ -83,10 +82,8 @@ function compile(def, return_type, types, llvm_return_type=nothing, llvm_types=n # using the new nonrecursive codegen to handle function lookup ourselves? if def isa Symbol args = [gensym() for typ in types] - @eval GPUCompiler.@device_function($return_type, - @inline $def($(args...)) = - ccall($("extern $llvm_name"), llvmcall, $return_type, ($(types...),), $(args...)) - ) + @eval @inline $def($(args...)) = + ccall($("extern $llvm_name"), llvmcall, $return_type, ($(types...),), $(args...)) end return diff --git a/src/utils.jl b/src/utils.jl index 84aac2f1..a403408f 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -234,29 +234,3 @@ end return inits end end -## method overrides - -Base.Experimental.@MethodTable(GLOBAL_METHOD_TABLE) -using ExprTools: splitdef, combinedef -macro device_function(rt, ex) - ex = macroexpand(__module__, ex) - def = splitdef(ex) - - # generate a function that warns and returns the expected type - # FIXME: The type may not have a default constructor, what do we do then? - # Currently we are using the constructor with an Int64(1) as an argument. - # NOTE: using Int64(1) is a bit odd. This is because Ptr(Int64(0)) == C_NULL, and julia code lowering - # seems to get rid of this automatically. - def[:body] = quote - $rt(1) - end - - esc(quote - $(combinedef(def)) - - # NOTE: no use of `@consistent_overlay` here because the regular function errors - Base.Experimental.@overlay($(GPUCompiler).GLOBAL_METHOD_TABLE, $ex) - end) -end - - diff --git a/test/utils.jl b/test/utils.jl index 0b9ff992..4ce2258c 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -171,40 +171,3 @@ end # Check that we can call this function from the CPU, to support deferred codegen for Enzyme. @test ccall("extern deferred_codegen", llvmcall, UInt, (UInt,), 3) == 3 end - -@testset "@device_function macro" begin - # Test that @device_function creates both CPU stub and overlay - # The macro should: - # 1. Define a CPU-visible function that returns the expected type - # 2. Register an overlay in GLOBAL_METHOD_TABLE for GPU compilation - - # Create a test module to contain the device functions - test_mod = @eval module $(gensym("DeviceFunctionTest")) - using GPUCompiler - - # Test with Ptr return type (common for runtime functions) - GPUCompiler.@device_function(Ptr{Nothing}, - @inline test_device_ptr() = ccall("extern gpu_test", llvmcall, Ptr{Nothing}, ()) - ) - - # Test with primitive return type - GPUCompiler.@device_function(Nothing, - @inline test_device_nothing() = ccall("extern gpu_test2", llvmcall, Nothing, ()) - ) - end - - # Verify the functions are defined in the test module - @test isdefined(test_mod, :test_device_ptr) - @test isdefined(test_mod, :test_device_nothing) - - # Verify the overlay exists in the global method table - mt_view = GPUCompiler.get_method_table_view(Base.get_world_counter(), GPUCompiler.GLOBAL_METHOD_TABLE) - sig_ptr = Tuple{typeof(test_mod.test_device_ptr)} - sig_nothing = Tuple{typeof(test_mod.test_device_nothing)} - - # The overlay should be findable in the method table - result_ptr = findsup(sig_ptr, mt_view) - result_nothing = findsup(sig_nothing, mt_view) - @test result_ptr !== nothing - @test result_nothing !== nothing -end