From 5b3f0ac4642b39d6f848f8eae3be59b0f38dd212 Mon Sep 17 00:00:00 2001 From: Billy Moses Date: Wed, 6 May 2026 13:38:38 -0500 Subject: [PATCH 1/3] Turn instcombine into a flag (and use instsimplify otherwise) --- src/driver.jl | 12 ++++++++++-- src/gcn.jl | 6 +++++- src/interface.jl | 3 +++ src/metal.jl | 28 ++++++++++++++++++++++------ src/optim.jl | 30 +++++++++++++++++++++++++----- src/ptx.jl | 11 ++++++++++- test/helpers/ptx.jl | 3 ++- test/ptx.jl | 8 ++++++++ 8 files changed, 85 insertions(+), 16 deletions(-) diff --git a/src/driver.jl b/src/driver.jl index 4f94b0bb..b656ebdd 100644 --- a/src/driver.jl +++ b/src/driver.jl @@ -295,7 +295,11 @@ const __llvm_initialized = Ref(false) # minimal optimization to convert the inttoptr/call into a direct call @dispose pb=NewPMPassBuilder() begin add!(pb, NewPMFunctionPassManager()) do fpm - add!(fpm, InstCombinePass()) + if use_instcombine(job) + add!(fpm, InstCombinePass()) + else + add!(fpm, InstSimplifyPass()) + end end run!(pb, ir, llvm_machine(job.config.target)) end @@ -386,7 +390,11 @@ const __llvm_initialized = Ref(false) if has_deferred_jobs @dispose pb=NewPMPassBuilder() begin add!(pb, NewPMFunctionPassManager()) do fpm - add!(fpm, InstCombinePass()) + if use_instcombine(job) + add!(fpm, InstCombinePass()) + else + add!(fpm, InstSimplifyPass()) + end end add!(pb, AlwaysInlinerPass()) add!(pb, NewPMFunctionPassManager()) do fpm diff --git a/src/gcn.jl b/src/gcn.jl index e310b5c5..d186c3cf 100644 --- a/src/gcn.jl +++ b/src/gcn.jl @@ -68,7 +68,11 @@ function finish_ir!( add!(pb, NewPMFunctionPassManager()) do fpm add!(fpm, InferAddressSpacesPass()) add!(fpm, SROAPass()) - add!(fpm, InstCombinePass()) + if use_instcombine(job) + add!(fpm, InstCombinePass()) + else + add!(fpm, InstSimplifyPass()) + end add!(fpm, EarlyCSEPass()) add!(fpm, SimplifyCFGPass()) end diff --git a/src/interface.jl b/src/interface.jl index 46389621..4e2576ef 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -226,6 +226,9 @@ end # Has the runtime available and does not require special handling uses_julia_runtime(@nospecialize(job::CompilerJob)) = false +# whether to use instcombine or instsimplify +use_instcombine(@nospecialize(job::CompilerJob)) = true + # Is it legal to run vectorization passes on this target can_vectorize(@nospecialize(job::CompilerJob)) = false diff --git a/src/metal.jl b/src/metal.jl index 77c9ff89..b7a0c40d 100644 --- a/src/metal.jl +++ b/src/metal.jl @@ -162,7 +162,7 @@ end # note that it isn't enough to remove the function attribute, because the Metal LLVM # compiler re-optimizes and will rediscover the property. to avoid this, we inline # all functions that are marked noreturn, i.e., until LLVM cannot rediscover it. -function hide_noreturn!(mod::LLVM.Module) +function hide_noreturn!(job::CompilerJob, mod::LLVM.Module) noreturn_attr = EnumAttribute("noreturn", 0) noinline_attr = EnumAttribute("noinline", 0) alwaysinline_attr = EnumAttribute("alwaysinline", 0) @@ -184,7 +184,11 @@ function hide_noreturn!(mod::LLVM.Module) add!(pb, AlwaysInlinerPass()) add!(pb, NewPMFunctionPassManager()) do fpm add!(fpm, SimplifyCFGPass()) - add!(fpm, InstCombinePass()) + if use_instcombine(job) + add!(fpm, InstCombinePass()) + else + add!(fpm, InstSimplifyPass()) + end end run!(pb, mod) end @@ -215,7 +219,11 @@ function finish_ir!(@nospecialize(job::CompilerJob{MetalCompilerTarget}), mod::L add!(pb, NewPMFunctionPassManager()) do fpm add!(fpm, InferAddressSpacesPass()) add!(fpm, SROAPass()) - add!(fpm, InstCombinePass()) + if use_instcombine(job) + add!(fpm, InstCombinePass()) + else + add!(fpm, InstSimplifyPass()) + end add!(fpm, EarlyCSEPass()) add!(fpm, SimplifyCFGPass()) end @@ -228,7 +236,7 @@ function finish_ir!(@nospecialize(job::CompilerJob{MetalCompilerTarget}), mod::L end # JuliaGPU/Metal.jl#113 - hide_noreturn!(mod) + hide_noreturn!(job, mod) # get rid of unreachable control flow (JuliaGPU/Metal.jl#370). # note that this currently works in tandem with the `hide_noreturn!` pass above, @@ -250,7 +258,11 @@ function finish_ir!(@nospecialize(job::CompilerJob{MetalCompilerTarget}), mod::L add!(pb, AlwaysInlinerPass()) add!(pb, NewPMFunctionPassManager()) do fpm add!(fpm, SimplifyCFGPass()) - add!(fpm, InstCombinePass()) + if use_instcombine(job) + add!(fpm, InstCombinePass()) + else + add!(fpm, InstSimplifyPass()) + end end run!(pb, mod) end @@ -386,7 +398,11 @@ function add_parameter_address_spaces!(@nospecialize(job::CompilerJob), mod::LLV add!(pb, SimplifyCFGPass()) add!(pb, SROAPass()) add!(pb, EarlyCSEPass()) - add!(pb, InstCombinePass()) + if use_instcombine(job) + add!(pb, InstCombinePass()) + else + add!(pb, InstSimplifyPass()) + end run!(pb, mod) end diff --git a/src/optim.jl b/src/optim.jl index 95834f0b..a200a353 100644 --- a/src/optim.jl +++ b/src/optim.jl @@ -99,14 +99,22 @@ function buildEarlyOptimizerPipeline(mpm, @nospecialize(job::CompilerJob), opt_l add!(mpm, NewPMFunctionPassManager()) do fpm if opt_level >= 2 add!(fpm, SROAPass()) - add!(fpm, InstCombinePass()) + if use_instcombine(job) + add!(fpm, InstCombinePass()) + else + add!(fpm, InstSimplifyPass()) + end add!(fpm, JumpThreadingPass()) add!(fpm, CorrelatedValuePropagationPass()) add!(fpm, ReassociatePass()) add!(fpm, EarlyCSEPass()) add!(fpm, AllocOptPass()) else - add!(fpm, InstCombinePass()) + if use_instcombine(job) + add!(fpm, InstCombinePass()) + else + add!(fpm, InstSimplifyPass()) + end add!(fpm, EarlyCSEPass()) end end @@ -157,7 +165,11 @@ function buildScalarOptimizerPipeline(fpm, @nospecialize(job::CompilerJob), opt_ add!(fpm, CorrelatedValuePropagationPass()) add!(fpm, DCEPass()) add!(fpm, IRCEPass()) - add!(fpm, InstCombinePass()) + if use_instcombine(job) + add!(fpm, InstCombinePass()) + else + add!(fpm, InstSimplifyPass()) + end add!(fpm, JumpThreadingPass()) end if opt_level >= 3 @@ -181,7 +193,11 @@ function buildVectorPipeline(fpm, @nospecialize(job::CompilerJob), opt_level) add!(fpm, InjectTLIMappings()) add!(fpm, LoopVectorizePass()) add!(fpm, LoopLoadEliminationPass()) - add!(fpm, InstCombinePass()) + if use_instcombine(job) + add!(fpm, InstCombinePass()) + else + add!(fpm, InstSimplifyPass()) + end add!(fpm, SimplifyCFGPass(; AggressiveSimplifyCFGOptions...)) add!(fpm, SLPVectorizerPass()) add!(fpm, VectorCombinePass()) @@ -250,7 +266,11 @@ function buildIntrinsicLoweringPipeline(mpm, @nospecialize(job::CompilerJob), op if opt_level >= 1 add!(mpm, NewPMFunctionPassManager()) do fpm - add!(fpm, InstCombinePass()) + if use_instcombine(job) + add!(fpm, InstCombinePass()) + else + add!(fpm, InstSimplifyPass()) + end add!(fpm, SimplifyCFGPass(; AggressiveSimplifyCFGOptions...)) end end diff --git a/src/ptx.jl b/src/ptx.jl index 3bdc3d3e..43afdb5a 100644 --- a/src/ptx.jl +++ b/src/ptx.jl @@ -19,12 +19,15 @@ Base.@kwdef struct PTXCompilerTarget <: AbstractCompilerTarget maxregs::Union{Nothing,Int} = nothing fastmath::Bool = Base.JLOptions().fast_math == 1 + instcombine::Bool = true # deprecated; remove with next major version exitable::Union{Nothing,Bool} = nothing unreachable::Union{Nothing,Bool} = nothing end +use_instcombine(@nospecialize(job::CompilerJob{PTXCompilerTarget})) = job.config.target.instcombine + function Base.hash(target::PTXCompilerTarget, h::UInt) h = hash(target.cap, h) h = hash(target.ptx, h) @@ -36,6 +39,7 @@ function Base.hash(target::PTXCompilerTarget, h::UInt) h = hash(target.blocks_per_sm, h) h = hash(target.maxregs, h) h = hash(target.fastmath, h) + h = hash(target.instcombine, h) h end @@ -91,6 +95,7 @@ function Base.show(io::IO, @nospecialize(job::CompilerJob{PTXCompilerTarget})) job.config.target.blocks_per_sm !== nothing && print(io, ", blocks_per_sm=$(job.config.target.blocks_per_sm)") job.config.target.maxregs !== nothing && print(io, ", maxregs=$(job.config.target.maxregs)") job.config.target.fastmath && print(io, ", fast math enabled") + !job.config.target.instcombine && print(io, ", instcombine disabled") end const ptx_intrinsics = ("vprintf", "__assertfail", "malloc", "free") @@ -164,7 +169,11 @@ function optimize_module!(@nospecialize(job::CompilerJob{PTXCompilerTarget}), # NVPTX's target machine info enables runtime unrolling, # but Julia's pass sequence only invokes the simple unroller. add!(fpm, LoopUnrollPass(; job.config.opt_level)) - add!(fpm, InstCombinePass()) # clean-up redundancy + if use_instcombine(job) + add!(fpm, InstCombinePass()) # clean-up redundancy + else + add!(fpm, InstSimplifyPass()) + end add!(fpm, NewPMLoopPassManager(; use_memory_ssa=true)) do lpm add!(lpm, LICMPass()) # the inner runtime check might be # outer loop invariant diff --git a/test/helpers/ptx.jl b/test/helpers/ptx.jl index e82416bc..e670ed0b 100644 --- a/test/helpers/ptx.jl +++ b/test/helpers/ptx.jl @@ -38,10 +38,11 @@ GPUCompiler.runtime_module(::PTXCompilerJob) = PTXTestRuntime function create_job(@nospecialize(func), @nospecialize(types); minthreads=nothing, maxthreads=nothing, blocks_per_sm=nothing, maxregs=nothing, + instcombine=true, kwargs...) config_kwargs, kwargs = split_kwargs(kwargs, GPUCompiler.CONFIG_KWARGS) source = methodinstance(typeof(func), Base.to_tuple_type(types), Base.get_world_counter()) - target = PTXCompilerTarget(; cap=v"7.0", minthreads, maxthreads, blocks_per_sm, maxregs) + target = PTXCompilerTarget(; cap=v"7.0", minthreads, maxthreads, blocks_per_sm, maxregs, instcombine) params = CompilerParams() config = CompilerConfig(target, params; kernel=false, config_kwargs...) CompilerJob(source, config), kwargs diff --git a/test/ptx.jl b/test/ptx.jl index 7010917a..7a19336b 100644 --- a/test/ptx.jl +++ b/test/ptx.jl @@ -169,6 +169,14 @@ end @test occursin("call void @julia_", ir) end +@testset "instcombine" begin + mod = @eval module $(gensym()) + foobar(x) = x + 1 + end + PTX.code_llvm(mod.foobar, Tuple{Int}; instcombine=false) + @test true +end + end ############################################################################################ From c133dce5e8e71aed8ab5d8629b814362c26c3404 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Thu, 7 May 2026 17:13:54 +0200 Subject: [PATCH 2/3] Switch to extensible interface. --- src/driver.jl | 12 ++---------- src/gcn.jl | 6 +----- src/interface.jl | 13 +++++++++++-- src/metal.jl | 24 ++++-------------------- src/optim.jl | 41 ++++++++++++++++------------------------- src/ptx.jl | 11 +---------- test/helpers/ptx.jl | 11 ++++++++--- 7 files changed, 43 insertions(+), 75 deletions(-) diff --git a/src/driver.jl b/src/driver.jl index b656ebdd..53accfc9 100644 --- a/src/driver.jl +++ b/src/driver.jl @@ -295,11 +295,7 @@ const __llvm_initialized = Ref(false) # minimal optimization to convert the inttoptr/call into a direct call @dispose pb=NewPMPassBuilder() begin add!(pb, NewPMFunctionPassManager()) do fpm - if use_instcombine(job) - add!(fpm, InstCombinePass()) - else - add!(fpm, InstSimplifyPass()) - end + add!(fpm, instcombine_pass(job)) end run!(pb, ir, llvm_machine(job.config.target)) end @@ -390,11 +386,7 @@ const __llvm_initialized = Ref(false) if has_deferred_jobs @dispose pb=NewPMPassBuilder() begin add!(pb, NewPMFunctionPassManager()) do fpm - if use_instcombine(job) - add!(fpm, InstCombinePass()) - else - add!(fpm, InstSimplifyPass()) - end + add!(fpm, instcombine_pass(job)) end add!(pb, AlwaysInlinerPass()) add!(pb, NewPMFunctionPassManager()) do fpm diff --git a/src/gcn.jl b/src/gcn.jl index d186c3cf..8cc0ef56 100644 --- a/src/gcn.jl +++ b/src/gcn.jl @@ -68,11 +68,7 @@ function finish_ir!( add!(pb, NewPMFunctionPassManager()) do fpm add!(fpm, InferAddressSpacesPass()) add!(fpm, SROAPass()) - if use_instcombine(job) - add!(fpm, InstCombinePass()) - else - add!(fpm, InstSimplifyPass()) - end + add!(fpm, instcombine_pass(job)) add!(fpm, EarlyCSEPass()) add!(fpm, SimplifyCFGPass()) end diff --git a/src/interface.jl b/src/interface.jl index 4e2576ef..dd82630b 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -226,8 +226,17 @@ end # Has the runtime available and does not require special handling uses_julia_runtime(@nospecialize(job::CompilerJob)) = false -# whether to use instcombine or instsimplify -use_instcombine(@nospecialize(job::CompilerJob)) = true +# Optional toggles consulted by the optimization pipeline. Override this method to return +# a `NamedTuple` with any of the following keys (defaults shown): +# +# - `instcombine::Bool = true`: when `false`, the pipeline substitutes `InstSimplifyPass` +# for `InstCombinePass`, retaining only the simplification subset of the peephole +# transforms (useful e.g. for downstream rewriters like Enzyme that get confused by +# InstCombine's more aggressive rewrites). +# +# Returning a `NamedTuple` keeps this single extension point lightweight: downstream +# users add new keys without GPUCompiler having to grow an interface method per option. +optimization_options(@nospecialize(job::CompilerJob)) = (;) # Is it legal to run vectorization passes on this target can_vectorize(@nospecialize(job::CompilerJob)) = false diff --git a/src/metal.jl b/src/metal.jl index b7a0c40d..bd4f8e66 100644 --- a/src/metal.jl +++ b/src/metal.jl @@ -184,11 +184,7 @@ function hide_noreturn!(job::CompilerJob, mod::LLVM.Module) add!(pb, AlwaysInlinerPass()) add!(pb, NewPMFunctionPassManager()) do fpm add!(fpm, SimplifyCFGPass()) - if use_instcombine(job) - add!(fpm, InstCombinePass()) - else - add!(fpm, InstSimplifyPass()) - end + add!(fpm, instcombine_pass(job)) end run!(pb, mod) end @@ -219,11 +215,7 @@ function finish_ir!(@nospecialize(job::CompilerJob{MetalCompilerTarget}), mod::L add!(pb, NewPMFunctionPassManager()) do fpm add!(fpm, InferAddressSpacesPass()) add!(fpm, SROAPass()) - if use_instcombine(job) - add!(fpm, InstCombinePass()) - else - add!(fpm, InstSimplifyPass()) - end + add!(fpm, instcombine_pass(job)) add!(fpm, EarlyCSEPass()) add!(fpm, SimplifyCFGPass()) end @@ -258,11 +250,7 @@ function finish_ir!(@nospecialize(job::CompilerJob{MetalCompilerTarget}), mod::L add!(pb, AlwaysInlinerPass()) add!(pb, NewPMFunctionPassManager()) do fpm add!(fpm, SimplifyCFGPass()) - if use_instcombine(job) - add!(fpm, InstCombinePass()) - else - add!(fpm, InstSimplifyPass()) - end + add!(fpm, instcombine_pass(job)) end run!(pb, mod) end @@ -398,11 +386,7 @@ function add_parameter_address_spaces!(@nospecialize(job::CompilerJob), mod::LLV add!(pb, SimplifyCFGPass()) add!(pb, SROAPass()) add!(pb, EarlyCSEPass()) - if use_instcombine(job) - add!(pb, InstCombinePass()) - else - add!(pb, InstSimplifyPass()) - end + add!(pb, instcombine_pass(job)) run!(pb, mod) end diff --git a/src/optim.jl b/src/optim.jl index a200a353..8bdb110e 100644 --- a/src/optim.jl +++ b/src/optim.jl @@ -1,5 +1,16 @@ # LLVM IR optimization +# Pick the peephole pass according to `optimization_options(job).instcombine`. Defaults to +# `InstCombinePass` to match LLVM's standard pipeline; `InstSimplifyPass` is the fallback +# for back-ends that need only the simplification subset. +function instcombine_pass(@nospecialize(job::CompilerJob)) + if get(optimization_options(job), :instcombine, true) + InstCombinePass() + else + InstSimplifyPass() + end +end + function optimize!(@nospecialize(job::CompilerJob), mod::LLVM.Module; opt_level=2) tm = llvm_machine(job.config.target) tti = llvm_targetinfo(job.config.target) @@ -99,22 +110,14 @@ function buildEarlyOptimizerPipeline(mpm, @nospecialize(job::CompilerJob), opt_l add!(mpm, NewPMFunctionPassManager()) do fpm if opt_level >= 2 add!(fpm, SROAPass()) - if use_instcombine(job) - add!(fpm, InstCombinePass()) - else - add!(fpm, InstSimplifyPass()) - end + add!(fpm, instcombine_pass(job)) add!(fpm, JumpThreadingPass()) add!(fpm, CorrelatedValuePropagationPass()) add!(fpm, ReassociatePass()) add!(fpm, EarlyCSEPass()) add!(fpm, AllocOptPass()) else - if use_instcombine(job) - add!(fpm, InstCombinePass()) - else - add!(fpm, InstSimplifyPass()) - end + add!(fpm, instcombine_pass(job)) add!(fpm, EarlyCSEPass()) end end @@ -165,11 +168,7 @@ function buildScalarOptimizerPipeline(fpm, @nospecialize(job::CompilerJob), opt_ add!(fpm, CorrelatedValuePropagationPass()) add!(fpm, DCEPass()) add!(fpm, IRCEPass()) - if use_instcombine(job) - add!(fpm, InstCombinePass()) - else - add!(fpm, InstSimplifyPass()) - end + add!(fpm, instcombine_pass(job)) add!(fpm, JumpThreadingPass()) end if opt_level >= 3 @@ -193,11 +192,7 @@ function buildVectorPipeline(fpm, @nospecialize(job::CompilerJob), opt_level) add!(fpm, InjectTLIMappings()) add!(fpm, LoopVectorizePass()) add!(fpm, LoopLoadEliminationPass()) - if use_instcombine(job) - add!(fpm, InstCombinePass()) - else - add!(fpm, InstSimplifyPass()) - end + add!(fpm, instcombine_pass(job)) add!(fpm, SimplifyCFGPass(; AggressiveSimplifyCFGOptions...)) add!(fpm, SLPVectorizerPass()) add!(fpm, VectorCombinePass()) @@ -266,11 +261,7 @@ function buildIntrinsicLoweringPipeline(mpm, @nospecialize(job::CompilerJob), op if opt_level >= 1 add!(mpm, NewPMFunctionPassManager()) do fpm - if use_instcombine(job) - add!(fpm, InstCombinePass()) - else - add!(fpm, InstSimplifyPass()) - end + add!(fpm, instcombine_pass(job)) add!(fpm, SimplifyCFGPass(; AggressiveSimplifyCFGOptions...)) end end diff --git a/src/ptx.jl b/src/ptx.jl index 43afdb5a..66880850 100644 --- a/src/ptx.jl +++ b/src/ptx.jl @@ -19,15 +19,12 @@ Base.@kwdef struct PTXCompilerTarget <: AbstractCompilerTarget maxregs::Union{Nothing,Int} = nothing fastmath::Bool = Base.JLOptions().fast_math == 1 - instcombine::Bool = true # deprecated; remove with next major version exitable::Union{Nothing,Bool} = nothing unreachable::Union{Nothing,Bool} = nothing end -use_instcombine(@nospecialize(job::CompilerJob{PTXCompilerTarget})) = job.config.target.instcombine - function Base.hash(target::PTXCompilerTarget, h::UInt) h = hash(target.cap, h) h = hash(target.ptx, h) @@ -39,7 +36,6 @@ function Base.hash(target::PTXCompilerTarget, h::UInt) h = hash(target.blocks_per_sm, h) h = hash(target.maxregs, h) h = hash(target.fastmath, h) - h = hash(target.instcombine, h) h end @@ -95,7 +91,6 @@ function Base.show(io::IO, @nospecialize(job::CompilerJob{PTXCompilerTarget})) job.config.target.blocks_per_sm !== nothing && print(io, ", blocks_per_sm=$(job.config.target.blocks_per_sm)") job.config.target.maxregs !== nothing && print(io, ", maxregs=$(job.config.target.maxregs)") job.config.target.fastmath && print(io, ", fast math enabled") - !job.config.target.instcombine && print(io, ", instcombine disabled") end const ptx_intrinsics = ("vprintf", "__assertfail", "malloc", "free") @@ -169,11 +164,7 @@ function optimize_module!(@nospecialize(job::CompilerJob{PTXCompilerTarget}), # NVPTX's target machine info enables runtime unrolling, # but Julia's pass sequence only invokes the simple unroller. add!(fpm, LoopUnrollPass(; job.config.opt_level)) - if use_instcombine(job) - add!(fpm, InstCombinePass()) # clean-up redundancy - else - add!(fpm, InstSimplifyPass()) - end + add!(fpm, instcombine_pass(job)) # clean-up redundancy add!(fpm, NewPMLoopPassManager(; use_memory_ssa=true)) do lpm add!(lpm, LICMPass()) # the inner runtime check might be # outer loop invariant diff --git a/test/helpers/ptx.jl b/test/helpers/ptx.jl index e670ed0b..2e973b2a 100644 --- a/test/helpers/ptx.jl +++ b/test/helpers/ptx.jl @@ -3,10 +3,15 @@ module PTX using ..GPUCompiler import ..TestRuntime -struct CompilerParams <: AbstractCompilerParams end +Base.@kwdef struct CompilerParams <: AbstractCompilerParams + instcombine::Bool = true +end PTXCompilerJob = CompilerJob{PTXCompilerTarget,CompilerParams} +GPUCompiler.optimization_options(@nospecialize(job::PTXCompilerJob)) = + (; instcombine = job.config.params.instcombine) + struct PTXKernelState data::Int64 end @@ -42,8 +47,8 @@ function create_job(@nospecialize(func), @nospecialize(types); kwargs...) config_kwargs, kwargs = split_kwargs(kwargs, GPUCompiler.CONFIG_KWARGS) source = methodinstance(typeof(func), Base.to_tuple_type(types), Base.get_world_counter()) - target = PTXCompilerTarget(; cap=v"7.0", minthreads, maxthreads, blocks_per_sm, maxregs, instcombine) - params = CompilerParams() + target = PTXCompilerTarget(; cap=v"7.0", minthreads, maxthreads, blocks_per_sm, maxregs) + params = CompilerParams(; instcombine) config = CompilerConfig(target, params; kernel=false, config_kwargs...) CompilerJob(source, config), kwargs end From 7889824ace689a7633746a8bf9472495f86f8cf7 Mon Sep 17 00:00:00 2001 From: Tim Besard Date: Thu, 7 May 2026 17:15:37 +0200 Subject: [PATCH 3/3] Remove test. --- test/helpers/ptx.jl | 10 ++-------- test/ptx.jl | 8 -------- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/test/helpers/ptx.jl b/test/helpers/ptx.jl index 2e973b2a..e82416bc 100644 --- a/test/helpers/ptx.jl +++ b/test/helpers/ptx.jl @@ -3,15 +3,10 @@ module PTX using ..GPUCompiler import ..TestRuntime -Base.@kwdef struct CompilerParams <: AbstractCompilerParams - instcombine::Bool = true -end +struct CompilerParams <: AbstractCompilerParams end PTXCompilerJob = CompilerJob{PTXCompilerTarget,CompilerParams} -GPUCompiler.optimization_options(@nospecialize(job::PTXCompilerJob)) = - (; instcombine = job.config.params.instcombine) - struct PTXKernelState data::Int64 end @@ -43,12 +38,11 @@ GPUCompiler.runtime_module(::PTXCompilerJob) = PTXTestRuntime function create_job(@nospecialize(func), @nospecialize(types); minthreads=nothing, maxthreads=nothing, blocks_per_sm=nothing, maxregs=nothing, - instcombine=true, kwargs...) config_kwargs, kwargs = split_kwargs(kwargs, GPUCompiler.CONFIG_KWARGS) source = methodinstance(typeof(func), Base.to_tuple_type(types), Base.get_world_counter()) target = PTXCompilerTarget(; cap=v"7.0", minthreads, maxthreads, blocks_per_sm, maxregs) - params = CompilerParams(; instcombine) + params = CompilerParams() config = CompilerConfig(target, params; kernel=false, config_kwargs...) CompilerJob(source, config), kwargs end diff --git a/test/ptx.jl b/test/ptx.jl index 7a19336b..7010917a 100644 --- a/test/ptx.jl +++ b/test/ptx.jl @@ -169,14 +169,6 @@ end @test occursin("call void @julia_", ir) end -@testset "instcombine" begin - mod = @eval module $(gensym()) - foobar(x) = x + 1 - end - PTX.code_llvm(mod.foobar, Tuple{Int}; instcombine=false) - @test true -end - end ############################################################################################