Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/driver.jl
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ const __llvm_initialized = Ref(false)
# minimal optimization to convert the inttoptr/call into a direct call
@dispose pb=NewPMPassBuilder() begin
add!(pb, NewPMFunctionPassManager()) do fpm
add!(fpm, InstCombinePass())
add!(fpm, instcombine_pass(job))
end
run!(pb, ir, llvm_machine(job.config.target))
end
Expand Down Expand Up @@ -386,7 +386,7 @@ const __llvm_initialized = Ref(false)
if has_deferred_jobs
@dispose pb=NewPMPassBuilder() begin
add!(pb, NewPMFunctionPassManager()) do fpm
add!(fpm, InstCombinePass())
add!(fpm, instcombine_pass(job))
end
add!(pb, AlwaysInlinerPass())
add!(pb, NewPMFunctionPassManager()) do fpm
Expand Down
2 changes: 1 addition & 1 deletion src/gcn.jl
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ function finish_ir!(
add!(pb, NewPMFunctionPassManager()) do fpm
add!(fpm, InferAddressSpacesPass())
add!(fpm, SROAPass())
add!(fpm, InstCombinePass())
add!(fpm, instcombine_pass(job))
add!(fpm, EarlyCSEPass())
add!(fpm, SimplifyCFGPass())
end
Expand Down
12 changes: 12 additions & 0 deletions src/interface.jl
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,18 @@ end
# Has the runtime available and does not require special handling
uses_julia_runtime(@nospecialize(job::CompilerJob)) = false

# Optional toggles consulted by the optimization pipeline. Override this method to return
# a `NamedTuple` with any of the following keys (defaults shown):
#
# - `instcombine::Bool = true`: when `false`, the pipeline substitutes `InstSimplifyPass`
# for `InstCombinePass`, retaining only the simplification subset of the peephole
# transforms (useful e.g. for downstream rewriters like Enzyme that get confused by
# InstCombine's more aggressive rewrites).
#
# Returning a `NamedTuple` keeps this single extension point lightweight: downstream
# users add new keys without GPUCompiler having to grow an interface method per option.
optimization_options(@nospecialize(job::CompilerJob)) = (;)

# Is it legal to run vectorization passes on this target
can_vectorize(@nospecialize(job::CompilerJob)) = false

Expand Down
12 changes: 6 additions & 6 deletions src/metal.jl
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ end
# note that it isn't enough to remove the function attribute, because the Metal LLVM
# compiler re-optimizes and will rediscover the property. to avoid this, we inline
# all functions that are marked noreturn, i.e., until LLVM cannot rediscover it.
function hide_noreturn!(mod::LLVM.Module)
function hide_noreturn!(job::CompilerJob, mod::LLVM.Module)
noreturn_attr = EnumAttribute("noreturn", 0)
noinline_attr = EnumAttribute("noinline", 0)
alwaysinline_attr = EnumAttribute("alwaysinline", 0)
Expand All @@ -184,7 +184,7 @@ function hide_noreturn!(mod::LLVM.Module)
add!(pb, AlwaysInlinerPass())
add!(pb, NewPMFunctionPassManager()) do fpm
add!(fpm, SimplifyCFGPass())
add!(fpm, InstCombinePass())
add!(fpm, instcombine_pass(job))
end
run!(pb, mod)
end
Expand Down Expand Up @@ -215,7 +215,7 @@ function finish_ir!(@nospecialize(job::CompilerJob{MetalCompilerTarget}), mod::L
add!(pb, NewPMFunctionPassManager()) do fpm
add!(fpm, InferAddressSpacesPass())
add!(fpm, SROAPass())
add!(fpm, InstCombinePass())
add!(fpm, instcombine_pass(job))
add!(fpm, EarlyCSEPass())
add!(fpm, SimplifyCFGPass())
end
Expand All @@ -228,7 +228,7 @@ function finish_ir!(@nospecialize(job::CompilerJob{MetalCompilerTarget}), mod::L
end

# JuliaGPU/Metal.jl#113
hide_noreturn!(mod)
hide_noreturn!(job, mod)

# get rid of unreachable control flow (JuliaGPU/Metal.jl#370).
# note that this currently works in tandem with the `hide_noreturn!` pass above,
Expand All @@ -250,7 +250,7 @@ function finish_ir!(@nospecialize(job::CompilerJob{MetalCompilerTarget}), mod::L
add!(pb, AlwaysInlinerPass())
add!(pb, NewPMFunctionPassManager()) do fpm
add!(fpm, SimplifyCFGPass())
add!(fpm, InstCombinePass())
add!(fpm, instcombine_pass(job))
end
run!(pb, mod)
end
Expand Down Expand Up @@ -386,7 +386,7 @@ function add_parameter_address_spaces!(@nospecialize(job::CompilerJob), mod::LLV
add!(pb, SimplifyCFGPass())
add!(pb, SROAPass())
add!(pb, EarlyCSEPass())
add!(pb, InstCombinePass())
add!(pb, instcombine_pass(job))

run!(pb, mod)
end
Expand Down
21 changes: 16 additions & 5 deletions src/optim.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
# LLVM IR optimization

# Pick the peephole pass according to `optimization_options(job).instcombine`. Defaults to
# `InstCombinePass` to match LLVM's standard pipeline; `InstSimplifyPass` is the fallback
# for back-ends that need only the simplification subset.
function instcombine_pass(@nospecialize(job::CompilerJob))
if get(optimization_options(job), :instcombine, true)
InstCombinePass()
else
InstSimplifyPass()
end
end

function optimize!(@nospecialize(job::CompilerJob), mod::LLVM.Module; opt_level=2)
tm = llvm_machine(job.config.target)
tti = llvm_targetinfo(job.config.target)
Expand Down Expand Up @@ -99,14 +110,14 @@ function buildEarlyOptimizerPipeline(mpm, @nospecialize(job::CompilerJob), opt_l
add!(mpm, NewPMFunctionPassManager()) do fpm
if opt_level >= 2
add!(fpm, SROAPass())
add!(fpm, InstCombinePass())
add!(fpm, instcombine_pass(job))
add!(fpm, JumpThreadingPass())
add!(fpm, CorrelatedValuePropagationPass())
add!(fpm, ReassociatePass())
add!(fpm, EarlyCSEPass())
add!(fpm, AllocOptPass())
else
add!(fpm, InstCombinePass())
add!(fpm, instcombine_pass(job))
add!(fpm, EarlyCSEPass())
end
end
Expand Down Expand Up @@ -157,7 +168,7 @@ function buildScalarOptimizerPipeline(fpm, @nospecialize(job::CompilerJob), opt_
add!(fpm, CorrelatedValuePropagationPass())
add!(fpm, DCEPass())
add!(fpm, IRCEPass())
add!(fpm, InstCombinePass())
add!(fpm, instcombine_pass(job))
add!(fpm, JumpThreadingPass())
end
if opt_level >= 3
Expand All @@ -181,7 +192,7 @@ function buildVectorPipeline(fpm, @nospecialize(job::CompilerJob), opt_level)
add!(fpm, InjectTLIMappings())
add!(fpm, LoopVectorizePass())
add!(fpm, LoopLoadEliminationPass())
add!(fpm, InstCombinePass())
add!(fpm, instcombine_pass(job))
add!(fpm, SimplifyCFGPass(; AggressiveSimplifyCFGOptions...))
add!(fpm, SLPVectorizerPass())
add!(fpm, VectorCombinePass())
Expand Down Expand Up @@ -250,7 +261,7 @@ function buildIntrinsicLoweringPipeline(mpm, @nospecialize(job::CompilerJob), op

if opt_level >= 1
add!(mpm, NewPMFunctionPassManager()) do fpm
add!(fpm, InstCombinePass())
add!(fpm, instcombine_pass(job))
add!(fpm, SimplifyCFGPass(; AggressiveSimplifyCFGOptions...))
end
end
Expand Down
2 changes: 1 addition & 1 deletion src/ptx.jl
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ function optimize_module!(@nospecialize(job::CompilerJob{PTXCompilerTarget}),
# NVPTX's target machine info enables runtime unrolling,
# but Julia's pass sequence only invokes the simple unroller.
add!(fpm, LoopUnrollPass(; job.config.opt_level))
add!(fpm, InstCombinePass()) # clean-up redundancy
add!(fpm, instcombine_pass(job)) # clean-up redundancy
add!(fpm, NewPMLoopPassManager(; use_memory_ssa=true)) do lpm
add!(lpm, LICMPass()) # the inner runtime check might be
# outer loop invariant
Expand Down
Loading