diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index a8457ae6..5cf7750e 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -100,7 +100,7 @@ steps:
   - label: "OpenCL.jl"
     plugins:
       - JuliaCI/julia#v1:
-          version: "1.10"
+          version: "1.11"
       - JuliaCI/julia-coverage#v1:
           codecov: true
     command: |
diff --git a/.github/workflows/Test.yml b/.github/workflows/Test.yml
index c8fe4f08..1bef508b 100644
--- a/.github/workflows/Test.yml
+++ b/.github/workflows/Test.yml
@@ -19,7 +19,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        version: ['1.10', '1.11', '1.12', '1.13-nightly', 'nightly']
+        version: ['1.11', '1.12', '1.13-nightly', 'nightly']
         os: [ubuntu-24.04, ubuntu-24.04-arm, macOS-15, macOS-15-intel, windows-2025]
         arch: [x64, arm64]
         llvm_args: ['']
@@ -38,26 +38,6 @@ jobs:
         include:
           # starting with Julia 1.10, we can enable opaque pointers
           # from Julia 1.12 on, this is the default.
-          - version: '1.10'
-            os: 'ubuntu-24.04'
-            arch: 'x64'
-            llvm_args: '--opaque-pointers'
-          - version: '1.10'
-            os: 'ubuntu-24.04-arm'
-            arch: 'arm64'
-            llvm_args: '--opaque-pointers'
-          - version: '1.10'
-            os: 'macOS-15'
-            arch: 'arm64'
-            llvm_args: '--opaque-pointers'
-          - version: '1.10'
-            os: 'macOS-15-intel'
-            arch: 'x64'
-            llvm_args: '--opaque-pointers'
-          - version: '1.10'
-            os: 'windows-2025'
-            arch: 'x64'
-            llvm_args: '--opaque-pointers'
           - version: '1.11'
             os: 'ubuntu-24.04'
             arch: 'x64'
diff --git a/Project.toml b/Project.toml
index a90f67b8..62fbbe6c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -7,6 +7,7 @@ authors = ["Tim Besard <tim.besard@gmail.com>"]
 projects = ["test"]
 
 [deps]
+CompilerCaching = "9db33cc3-5358-4881-8759-fa4194144afd"
 ExprTools = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
@@ -14,13 +15,12 @@ Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
 Preferences = "21216c6a-2e73-6563-6e65-726566657250"
-Scratch = "6c6a2e73-6563-6170-7368-637461726353"
-Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
 Tracy = "e689c965-62c8-4b79-b2c5-8359227902fd"
 UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 
 [compat]
+CompilerCaching = "0.2.4"
 ExprTools = "0.1"
 InteractiveUtils = "1"
 LLVM = "9.6"
@@ -28,9 +28,7 @@ Libdl = "1"
 Logging = "1"
 PrecompileTools = "1"
 Preferences = "1"
-Scratch = "1"
-Serialization = "1"
 TOML = "1"
 Tracy = "0.1.4"
 UUIDs = "1"
-julia = "1.10"
+julia = "1.11"
diff --git a/src/GPUCompiler.jl b/src/GPUCompiler.jl
index 2e646267..098f1617 100644
--- a/src/GPUCompiler.jl
+++ b/src/GPUCompiler.jl
@@ -8,8 +8,6 @@ using ExprTools: splitdef, combinedef
 
 using Libdl
 
-using Serialization
-using Scratch: @get_scratch!
 using Preferences
 
 const ENABLE_TRACY = parse(Bool, @load_preference("tracy", "false"))
@@ -35,9 +33,6 @@ end
 const CC = Core.Compiler
 using Core: MethodInstance, CodeInstance, CodeInfo
 
-compile_cache = nothing # set during __init__()
-const pkgver = Base.pkgversion(GPUCompiler)
-
 include("utils.jl")
 include("mangling.jl")
 
@@ -67,25 +62,11 @@ include("driver.jl")
 include("execution.jl")
 include("reflection.jl")
 
-include("deprecated.jl")
-
 include("precompile.jl")
 
 function __init__()
     STDERR_HAS_COLOR[] = get(stderr, :color, false)
 
-    dir = @get_scratch!("compiled")
-    ## add the Julia version
-    dir = joinpath(dir, "v$(VERSION.major).$(VERSION.minor)")
-    ## also add the package version
-    if pkgver !== nothing
-        # XXX: `Base.pkgversion` is buggy and sometimes returns `nothing`, see e.g.
-        #       JuliaLang/PackageCompiler.jl#896 and JuliaGPU/GPUCompiler.jl#593
-        dir = joinpath(dir, "v$(pkgver.major).$(pkgver.minor)")
-    end
-    mkpath(dir)
-    global compile_cache = dir
-
     @static if ENABLE_TRACY
         Tracy.@register_tracepoints()
     end
diff --git a/src/bpf.jl b/src/bpf.jl
index 80070d6d..ff75ea23 100644
--- a/src/bpf.jl
+++ b/src/bpf.jl
@@ -26,8 +26,6 @@ end
 
 ## job
 
-runtime_slug(job::CompilerJob{BPFCompilerTarget}) = "bpf"
-
 const bpf_intrinsics = () # TODO
 isintrinsic(::CompilerJob{BPFCompilerTarget}, fn::String) = in(fn, bpf_intrinsics)
 
diff --git a/src/deprecated.jl b/src/deprecated.jl
deleted file mode 100644
index e1d2af7d..00000000
--- a/src/deprecated.jl
+++ /dev/null
@@ -1,45 +0,0 @@
-# Deprecations scheduled for removal in the next major release.
-
-function defs(mod::LLVM.Module)
-    Base.depwarn("`GPUCompiler.defs(mod)` is deprecated; inline `filter(f -> !isdeclaration(f), collect(functions(mod)))`.",
-                 :defs)
-    filter(f -> !isdeclaration(f), collect(functions(mod)))
-end
-
-function decls(mod::LLVM.Module)
-    Base.depwarn("`GPUCompiler.decls(mod)` is deprecated; inline `filter(f -> isdeclaration(f) && !LLVM.isintrinsic(f), collect(functions(mod)))`.",
-                 :decls)
-    filter(f -> isdeclaration(f) && !LLVM.isintrinsic(f), collect(functions(mod)))
-end
-
-link_library!(mod::LLVM.Module, lib::LLVM.Module) = link_library!(mod, [lib])
-function link_library!(mod::LLVM.Module, libs::Vector{LLVM.Module})
-    Base.depwarn("`GPUCompiler.link_library!` is deprecated; call `LLVM.link!(mod, copy(lib))` directly, or `LLVM.link!(mod, lib; only_needed=true)` with a freshly-parsed library.",
-                 :link_library!)
-    libs = [copy(lib) for lib in libs]
-    for lib in libs
-        link!(mod, lib)
-    end
-end
-
-# no-op 3-arg fallback so downstream overrides that chain via
-# `invoke(GPUCompiler.link_libraries!, Tuple{CompilerJob, Module,
-# Vector{String}}, ...)` still resolve.
-link_libraries!(@nospecialize(job::CompilerJob), mod::LLVM.Module,
-                undefined_fns::Vector{String}) = return
-
-# `true` when a downstream package has defined a 3-arg `link_libraries!`
-# override for `job`, i.e. the dispatched method isn't our fallback above.
-#
-# Uses the same `jl_gf_invoke_lookup` path as `Core._hasmethod` rather than
-# `which`, so it's safe to call from generated-function-adjacent contexts
-# where `Base.get_world_counter()` returns `typemax(UInt)` and reflection
-# queries like `which` / `methods` fail (see JuliaLang/julia#48611).
-# All this because Enzyme.jl calls GPUCompiler.jl from a generated function.
-function has_legacy_link_libraries(@nospecialize(job::CompilerJob))
-    tt = Tuple{typeof(link_libraries!), typeof(job),
-               LLVM.Module, Vector{String}}
-    world = ccall(:jl_get_tls_world_age, UInt, ())
-    m = ccall(:jl_gf_invoke_lookup, Any, (Any, Any, UInt), tt, nothing, world)
-    return m !== nothing && (m::Method).module !== @__MODULE__
-end
diff --git a/src/driver.jl b/src/driver.jl
index 53accfc9..83e1d237 100644
--- a/src/driver.jl
+++ b/src/driver.jl
@@ -50,34 +50,16 @@ const compile_hook = Ref{Union{Nothing,Function}}(nothing)
     compile(target::Symbol, job::CompilerJob)
 
 Compile a `job` to one of the following formats as specified by the `target` argument:
-`:julia` for Julia IR, `:llvm` for LLVM IR and `:asm` for machine code.
+`:llvm` for LLVM IR, `:asm` for assembly, or `:obj` for object code.
 """
-function compile(target::Symbol, @nospecialize(job::CompilerJob); kwargs...)
-    # XXX: remove on next major version
-    if !isempty(kwargs)
-        Base.depwarn("The GPUCompiler `compile` API does not take keyword arguments anymore. Use CompilerConfig instead.", :compile)
-        config = CompilerConfig(job.config; kwargs...)
-        job = CompilerJob(job.source, config)
-    end
-
+function compile(target::Symbol, @nospecialize(job::CompilerJob))
     if compile_hook[] !== nothing
         Base.invokelatest(compile_hook[], job)
     end
-
     return compile_unhooked(target, job)
 end
 
-# XXX: remove on next major version
-function codegen(output::Symbol, @nospecialize(job::CompilerJob); kwargs...)
-    if !isempty(kwargs)
-        Base.depwarn("The GPUCompiler `codegen` function is an internal API. Use `GPUCompiler.compile` (with any kwargs passed to `CompilerConfig`) instead.", :codegen)
-        config = CompilerConfig(job.config; kwargs...)
-        job = CompilerJob(job.source, config)
-    end
-    compile_unhooked(output, job)
-end
-
-function compile_unhooked(output::Symbol, @nospecialize(job::CompilerJob); kwargs...)
+function compile_unhooked(output::Symbol, @nospecialize(job::CompilerJob))
     if context(; throw_error=false) === nothing
         error("No active LLVM context. Use `JuliaContext()` do-block syntax to create one.")
     end
@@ -179,14 +161,7 @@ end
 
 const __llvm_initialized = Ref(false)
 
-@locked function emit_llvm(@nospecialize(job::CompilerJob); kwargs...)
-    # XXX: remove on next major version
-    if !isempty(kwargs)
-        Base.depwarn("The GPUCompiler `emit_llvm` function is an internal API. Use `GPUCompiler.compile` (with any kwargs passed to `CompilerConfig`) instead.", :emit_llvm)
-        config = CompilerConfig(job.config; kwargs...)
-        job = CompilerJob(job.source, config)
-    end
-
+@locked function emit_llvm(@nospecialize(job::CompilerJob))
     if !__llvm_initialized[]
         InitializeAllTargets()
         InitializeAllTargetInfos()
@@ -250,7 +225,7 @@ const __llvm_initialized = Ref(false)
                 target = nest_target(dyn_job.config.target, job.config.target)
                 params = nest_params(dyn_job.config.params, job.config.params)
                 config = CompilerConfig(dyn_job.config; toplevel=false, target, params)
-                return codegen(:llvm, CompilerJob(dyn_job; config))
+                return compile_unhooked(:llvm, CompilerJob(dyn_job; config))
             end
 
             # compile and link
@@ -314,23 +289,7 @@ const __llvm_initialized = Ref(false)
 
         @tracepoint "Library linking" begin
             # target-specific libraries
-            @tracepoint "target libraries" begin
-                if has_legacy_link_libraries(job)
-                    Base.depwarn(
-                        "3-arg `link_libraries!(job, mod, undefined_fns)` is deprecated; " *
-                        "migrate your override to the 2-arg form `link_libraries!(job, mod)`. " *
-                        "Instead of inspecting `undefined_fns` to decide what to link, " *
-                        "parse the library lazily with `parse(LLVM.Module, bytes; lazy=true)` " *
-                        "and link it with `LLVM.link!(mod, lib; only_needed=true)` — " *
-                        "the linker will then materialize only the referenced symbols.",
-                        :link_libraries!)
-                    undefined_fns = [LLVM.name(f) for f in functions(ir)
-                                     if isdeclaration(f) && !LLVM.isintrinsic(f)]
-                    link_libraries!(job, ir, undefined_fns)
-                else
-                    link_libraries!(job, ir)
-                end
-            end
+            @tracepoint "target libraries" link_libraries!(job, ir)
 
             # GPU run-time library
             if !uses_julia_runtime(job)
diff --git a/src/execution.jl b/src/execution.jl
index 9b4940a7..dcdd4236 100644
--- a/src/execution.jl
+++ b/src/execution.jl
@@ -66,224 +66,3 @@ function assign_args!(code, _args)
 
     return vars, var_exprs
 end
-
-
-## cached compilation
-
-### Notes on interactions with package images and disk cache.
-# Julia uses package images (pkgimg) to cache both the result of inference,
-# and the result of native code emissions. Up until Julia v1.11 neither the
-# inferred nor the nativce code of foreign abstract interpreters was cached
-# across sessions. Julia v1.11 allows for caching of inference results across
-# sessions as long as those inference results are created during precompilation.
-#
-# Julia cache hierarchy is roughly as follows:
-# Function (name of a thing)
-# -> Method (particular piece of code to dispatch to with a signature)
-#  -> MethodInstance (A particular Method + particular signature)
-#    -> CodeInstance (A MethodInstance compiled for a world)
-#
-# In order to cache code across sessions we need to insert CodeInstance(owner=GPUCompilerCacheToken)
-# into the internal cache. Once we have done so we know that a particular CodeInstance is unique in
-# the system. (During pkgimg loading conflicts will be resolved).
-#
-# When a pkgimg is loaded we check it's validity, this means checking that all depdencies are the same,
-# the pkgimg was created for the right set of compiler flags, and that all source code that was used
-# to create this pkgimg is the same. When a CodeInstance is inside a pkgimg we can extend the chain of
-# validity even for GPU code, we cannot verify a "runtime" CodeInstance in the same way.
-#
-# Therefore when we see a compilation request for a CodeInstance that is originating from a pkgimg
-# we can use it as part of the hash for the on-disk cache. (see `cache_file`)
-
-"""
-    disk_cache_enabled()
-
-Query if caching to disk is enabled.
-"""
-disk_cache_enabled() = parse(Bool, @load_preference("disk_cache", "false"))
-
-"""
-    enable_disk_cache!(state::Bool=true)
-
-Activate the GPUCompiler disk cache in the current environment.
-You will need to restart your Julia environment for it to take effect.
-
-!!! note
-    The cache functionality requires Julia 1.11
-"""
-function enable_disk_cache!(state::Bool=true)
-    @set_preferences!("disk_cache"=>string(state))
-end
-
-disk_cache_path() = @get_scratch!("disk_cache")
-clear_disk_cache!() = rm(disk_cache_path(); recursive=true, force=true)
-
-const cache_lock = ReentrantLock()
-
-"""
-    cached_compilation(cache::Dict{Any}, src::MethodInstance, cfg::CompilerConfig,
-                       compiler, linker)
-
-Compile a method instance `src` with configuration `cfg`, by invoking `compiler` and
-`linker` and storing the result in `cache`.
-
-The `cache` argument should be a dictionary that can be indexed using any value and store
-whatever the `linker` function returns. The `compiler` function should take a `CompilerJob`
-and return data that can be cached across sessions (e.g., LLVM IR). This data is then
-forwarded, along with the `CompilerJob`, to the `linker` function which is allowed to create
-session-dependent objects (e.g., a `CuModule`).
-"""
-function cached_compilation(cache::AbstractDict{<:Any,V},
-                            src::MethodInstance, cfg::CompilerConfig,
-                            compiler::Function, linker::Function) where {V}
-    # NOTE: we index the cach both using (mi, world, cfg) keys, for the fast look-up,
-    #       and using CodeInfo keys for the slow look-up. we need to cache both for
-    #       performance, but cannot use a separate private cache for the ci->obj lookup
-    #       (e.g. putting it next to the CodeInfo's in the CodeCache) because some clients
-    #       expect to be able to wipe the cache (e.g. CUDA.jl's `device_reset!`)
-
-    # fast path: index the cache directly for the *current* world + compiler config
-
-    world = tls_world_age()
-    key = (objectid(src), world, cfg)
-    # NOTE: we store the MethodInstance's objectid to avoid an expensive allocation.
-    #       Base does this with a multi-level lookup, first keyed on the mi,
-    #       then a linear scan over the (typically few) entries.
-
-    # NOTE: no use of lock(::Function)/@lock/get! to avoid try/catch and closure overhead
-    lock(cache_lock)
-    obj = get(cache, key, nothing)
-    unlock(cache_lock)
-
-    if obj === nothing || compile_hook[] !== nothing
-        obj = actual_compilation(cache, src, world, cfg, compiler, linker)::V
-        lock(cache_lock)
-        cache[key] = obj
-        unlock(cache_lock)
-    end
-    return obj::V
-end
-
-@noinline function cache_file(ci::CodeInstance, cfg::CompilerConfig)
-    h = hash(Base.objectid(ci))
-    @static if isdefined(Base, :object_build_id)
-        bid = Base.object_build_id(ci)
-        if bid === nothing # CI is from a runtime compilation, not worth caching on disk
-            return nothing
-        else
-            bid = bid % UInt64 # The upper 64bit are a checksum, unavailable during precompilation
-        end
-        h = hash(bid, h)
-    end
-    h = hash(cfg, h)
-
-    gpucompiler_buildid = Base.module_build_id(@__MODULE__)
-    if (gpucompiler_buildid >> 64) % UInt64 == 0xffffffffffffffff
-        return nothing # Don't cache during precompilation of GPUCompiler
-    end
-
-    return joinpath(
-        disk_cache_path(),
-        # bifurcate the cache by build id of GPUCompiler
-        string(gpucompiler_buildid),
-        string(h, ".jls"))
-end
-
-struct DiskCacheEntry
-    src::Type # Originally MethodInstance, but upon deserialize they were not uniqued...
-    cfg::CompilerConfig
-    asm
-end
-
-@noinline function actual_compilation(cache::AbstractDict, src::MethodInstance, world::UInt,
-                                      cfg::CompilerConfig, compiler::Function, linker::Function)
-    job = CompilerJob(src, cfg, world)
-    obj = nothing
-
-    # fast path: find an applicable CodeInstance and see if we have compiled it before
-    ci = ci_cache_lookup(ci_cache(job), src, world, world)::Union{Nothing,CodeInstance}
-    if ci !== nothing
-        key = (ci, cfg)
-        obj = get(cache, key, nothing)
-    end
-
-    # slow path: compile and link
-    if obj === nothing || compile_hook[] !== nothing
-        asm = nothing
-        path = nothing
-        ondisk_hit = false
-        @static if VERSION >= v"1.11.0-"
-            # Don't try to hit the disk cache if we are for a *compile* hook
-            # TODO:
-            #  - Sould we hit disk cache if Base.generating_output()
-            #  - Should we allow backend to opt out?
-            if ci !== nothing && obj === nothing && disk_cache_enabled()
-                path = cache_file(ci, cfg)
-                @debug "Looking for on-disk cache" job path
-                if path !== nothing && isfile(path)
-                    ondisk_hit = true
-                    try
-                        @debug "Loading compiled kernel" job path
-                        # The MI we deserialize here didn't get uniqued...
-                        entry = deserialize(path)::DiskCacheEntry
-                        if entry.src == src.specTypes && entry.cfg == cfg
-                            asm = entry.asm
-                        else
-                            @show entry.src == src.specTypes
-                            @show entry.cfg == cfg
-                            @warn "Cache missmatch" src.specTypes cfg entry.src entry.cfg
-                        end
-                    catch ex
-                        @warn "Failed to load compiled kernel" job path exception=(ex, catch_backtrace())
-                    end
-                end
-            end
-        end
-
-        if asm === nothing || compile_hook[] !== nothing
-            # Run the compiler in-case we need to hook it.
-            asm = compiler(job)
-        end
-        if obj !== nothing
-            # we got here because of a *compile* hook; don't bother linking
-            return obj
-        end
-
-        @static if VERSION >= v"1.11.0-"
-            if !ondisk_hit && path !== nothing && disk_cache_enabled()
-                @debug "Writing out on-disk cache" job path
-                mkpath(dirname(path))
-                entry = DiskCacheEntry(src.specTypes, cfg, asm)
-
-                # atomic write to disk
-                tmppath, io = mktemp(dirname(path); cleanup=false)
-                serialize(io, entry)
-                close(io)
-                @static if VERSION >= v"1.12.0-DEV.1023"
-                    mv(tmppath, path; force=true)
-                else
-                    Base.rename(tmppath, path, force=true)
-                end
-            end
-        end
-
-        obj = linker(job, asm)
-
-        if ci === nothing
-            ci = ci_cache_lookup(ci_cache(job), src, world, world)
-            if ci === nothing
-                error("""Did not find CodeInstance for $job.
-
-                         Pleaase make sure that the `compiler` function passed to `cached_compilation`
-                         invokes GPUCompiler with exactly the same configuration as passed to the API.
-
-                         Note that you should do this by calling `GPUCompiler.compile`, and not by
-                         using reflection functions (which alter the compiler configuration).""")
-            end
-            key = (ci, cfg)
-        end
-        cache[key] = obj
-    end
-
-    return obj
-end
diff --git a/src/gcn.jl b/src/gcn.jl
index 8cc0ef56..3cbe91c6 100644
--- a/src/gcn.jl
+++ b/src/gcn.jl
@@ -33,10 +33,6 @@ end
 
 ## job
 
-# TODO: encode debug build or not in the compiler job
-#       https://github.com/JuliaGPU/CUDAnative.jl/issues/368
-runtime_slug(job::CompilerJob{GCNCompilerTarget}) = "gcn-$(job.config.target.dev_isa)$(job.config.target.features)"
-
 const gcn_intrinsics = () # TODO: ("vprintf", "__assertfail", "malloc", "free")
 isintrinsic(::CompilerJob{GCNCompilerTarget}, fn::String) = in(fn, gcn_intrinsics)
 
diff --git a/src/interface.jl b/src/interface.jl
index dd82630b..80021af6 100644
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -253,15 +253,11 @@ runtime_module(@nospecialize(job::CompilerJob)) = error("Not implemented")
 isintrinsic(@nospecialize(job::CompilerJob), fn::String) = false
 
 # provide a specific interpreter to use.
-if VERSION >= v"1.11.0-DEV.1552"
-get_interpreter(@nospecialize(job::CompilerJob)) =
-    GPUInterpreter(job.world; method_table_view=maybe_cached(method_table_view(job)),
-                   token=ci_cache_token(job), inf_params=inference_params(job),
-                   opt_params=optimization_params(job))
-else
-get_interpreter(@nospecialize(job::CompilerJob)) =
-    GPUInterpreter(job.world; method_table_view=maybe_cached(method_table_view(job)),
-                   code_cache=ci_cache(job), inf_params=inference_params(job),
+function get_interpreter(@nospecialize(job::CompilerJob))
+    GPUInterpreter(job.world;
+                   method_table_view=maybe_cached(method_table_view(job)),
+                   cache=cache_view(job),
+                   inf_params=inference_params(job),
                    opt_params=optimization_params(job))
 end
 
@@ -273,11 +269,6 @@ can_throw(@nospecialize(job::CompilerJob)) = uses_julia_runtime(job)
 # if not, safepoints at function entry will not be emitted
 can_safepoint(@nospecialize(job::CompilerJob)) = uses_julia_runtime(job)
 
-# generate a string that represents the type of compilation, for selecting a compiled
-# instance of the runtime library. this slug should encode everything that affects
-# the generated code of this compiler job (with exception of the function source)
-runtime_slug(@nospecialize(job::CompilerJob)) = error("Not implemented")
-
 # the type of the kernel state object, or Nothing if this back-end doesn't need one.
 #
 # the generated code will be rewritten to include an object of this type as the first
@@ -297,36 +288,62 @@ pass_by_ref(@nospecialize(job::CompilerJob)) = false
 # whether pointer is a valid call target
 valid_function_pointer(@nospecialize(job::CompilerJob), ptr::Ptr{Cvoid}) = false
 
+# Cache partitioning. The owner is stored on every CodeInstance and compared via `jl_egal`,
+# so it (and every field) must be immutable for cross-session matches (e.g. via package
+# precompilation); custom `target` / `params` types must be `struct`s, not `mutable struct`s.
 # Care is required for anything that impacts:
 #   - method_table
 #   - inference_params
 #   - optimization_params
-# By default that is just always_inline
-# the cache token is compared with jl_egal
-struct GPUCompilerCacheToken
-    target_type::Type
+# The default covers the full target+params instances (so backends with version- or
+# arch-specific knobs partition cleanly), `always_inline` (which feeds optimization_params),
+# and the method table.
+struct GPUCompilerCacheToken{T<:AbstractCompilerTarget, P<:AbstractCompilerParams}
+    target::T
+    params::P
     always_inline::Bool
     method_table::Core.MethodTable
 end
 
-ci_cache_token(@nospecialize(job::CompilerJob)) =
-    GPUCompilerCacheToken(typeof(job.config.target), job.config.always_inline, method_table(job))
-
-# the codeinstance cache to use -- should only be used for the constructor
-if VERSION >= v"1.11.0-DEV.1552"
-    # Soft deprecated user should use `CC.code_cache(get_interpreter(job))`
-    ci_cache(@nospecialize(job::CompilerJob)) = CC.code_cache(get_interpreter(job))
-else
-function ci_cache(@nospecialize(job::CompilerJob))
-    lock(GLOBAL_CI_CACHES_LOCK) do
-        cache = get!(GLOBAL_CI_CACHES, job.config) do
-            CodeCache()
-        end
-        return cache
-    end
-end
+cache_owner(@nospecialize(job::CompilerJob)) =
+    GPUCompilerCacheToken(job.config.target, job.config.params,
+                          job.config.always_inline, method_table(job))
+
+"""
+    GPUCompiler.NoResults()
+
+Default results type carried on each cached `CodeInstance` when the consumer hasn't
+overridden [`results_type`](@ref). Carries no fields; useful for compiler jobs that
+don't need to memoize compiled artifacts (e.g., reflection, precompile workloads).
+"""
+mutable struct NoResults end
+
+# The consumer's results struct type, stored on each CodeInstance via `CompilerCaching`.
+# Override to attach session-portable artifacts (IR, object bytes) and session-local handles
+# (e.g., `CuModule`, `MTLComputePipelineState`) to compiled CIs. The struct must be a
+# `mutable struct` with a zero-arg constructor.
+results_type(@nospecialize(job::CompilerJob)) = NoResults
+
+# Construct a `CompilerCaching.CacheView` partitioned by `cache_owner(job)` and parametrized
+# by `results_type(job)`.
+function cache_view(@nospecialize(job::CompilerJob))
+    K = typeof(cache_owner(job))
+    V = results_type(job)
+    CompilerCaching.CacheView{K, V}(cache_owner(job), job.world)
 end
 
+# Optional consumer hooks for caching post-codegen LLVM bitcode on the results struct.
+# Override these on your `results_type(job)` to opt in to cross-session bitcode caching
+# (most relevant for runtime library functions, which GPUCompiler compiles per-target and
+# would otherwise rebuild every session). The `opaque_pointers` flag tracks the LLVM
+# context's pointer mode at compile time — `bitcode` should reject mismatches by
+# returning `nothing`, since opaque- and typed-pointer IR aren't interchangeable.
+bitcode(@nospecialize(results), opaque_pointers::Bool) = nothing
+bitcode!(@nospecialize(results), bytes::Vector{UInt8}, opaque_pointers::Bool) = nothing
+
+public GPUCompilerCacheToken, cache_owner, NoResults, results_type, cache_view,
+       bitcode, bitcode!
+
 # the method table to use
 # deprecate method_table on next-breaking release
 method_table(@nospecialize(job::CompilerJob)) = GLOBAL_METHOD_TABLE
@@ -390,9 +407,3 @@ finish_ir!(@nospecialize(job::CompilerJob), mod::LLVM.Module, entry::LLVM.Functi
 
 # whether an LLVM function is valid for this back-end
 validate_ir(@nospecialize(job::CompilerJob), mod::LLVM.Module) = IRError[]
-
-# deprecated
-struct DeprecationMarker end
-process_module!(@nospecialize(job::CompilerJob), mod::LLVM.Module) = DeprecationMarker()
-process_entry!(@nospecialize(job::CompilerJob), mod::LLVM.Module, entry::LLVM.Function) =
-    DeprecationMarker()
diff --git a/src/irgen.jl b/src/irgen.jl
index 744904e5..2ce2e416 100644
--- a/src/irgen.jl
+++ b/src/irgen.jl
@@ -43,11 +43,6 @@ function irgen(@nospecialize(job::CompilerJob))
         end
     end
 
-    deprecation_marker = process_module!(job, mod)
-    if deprecation_marker != DeprecationMarker()
-        Base.depwarn("GPUCompiler.process_module! is deprecated; implement GPUCompiler.finish_module! instead", :process_module)
-    end
-
     # sanitize global values (Julia doesn't when using the external codegen policy)
     for val in [collect(globals(mod)); collect(functions(mod))]
         isdeclaration(val) && continue
@@ -69,11 +64,6 @@ function irgen(@nospecialize(job::CompilerJob))
     elseif job.config.kernel
         LLVM.name!(entry, mangle_sig(job.source.specTypes))
     end
-    deprecation_marker = process_entry!(job, mod, entry)
-    if deprecation_marker != DeprecationMarker()
-        Base.depwarn("GPUCompiler.process_entry! is deprecated; implement GPUCompiler.finish_module! instead", :process_entry)
-        entry = deprecation_marker
-    end
     if job.config.entry_abi === :specfunc
         func = compiled[job.source].func
         specfunc = LLVM.name(entry)
diff --git a/src/jlgen.jl b/src/jlgen.jl
index 21216d79..d7b823a0 100644
--- a/src/jlgen.jl
+++ b/src/jlgen.jl
@@ -1,16 +1,13 @@
 # Julia compiler integration
 
+import CompilerCaching
+using CompilerCaching: CacheView, @setup_caching
+
 
 ## world age lookups
 
-# `tls_world_age` should be used to look up the current world age. in most cases, this is
-# what you should use to invoke the compiler with.
+import Base: tls_world_age
 
-if isdefined(Base, :tls_world_age)
-    import Base: tls_world_age
-else
-    tls_world_age() = ccall(:jl_get_tls_world_age, UInt, ())
-end
 
 ## looking up method instances
 
@@ -38,11 +35,6 @@ function MethodError(ft::Type{<:Function}, tt::Type, world::Integer=typemax(UInt
 end
 MethodError(ft, tt, world=typemax(UInt)) = Base.MethodError(ft, tt, world)
 
-# generate a LineInfoNode for the current source code location
-macro LineInfoNode(method)
-    Core.LineInfoNode(__module__, method, __source__.file, Int32(__source__.line), Int32(0))
-end
-
 """
     methodinstance(ft::Type, tt::Type, [world::UInt])
 
@@ -56,31 +48,11 @@ This function is highly optimized, and results do not need to be cached addition
 
 Only use this function with concrete signatures, i.e., using the types of values you would
 pass at run time. For non-concrete signatures, use `generic_methodinstance` instead.
-
 """
-methodinstance
-
-function generic_methodinstance(@nospecialize(ft::Type), @nospecialize(tt::Type),
-                                world::Integer=tls_world_age())
-    sig = signature_type_by_tt(ft, tt)
-
-    match, _ = CC._findsup(sig, nothing, world)
-    match === nothing && throw(MethodError(ft, tt, world))
-
-    mi = CC.specialize_method(match)
-
-    return mi::MethodInstance
-end
-
-# on 1.11 (JuliaLang/julia#52572, merged as part of JuliaLang/julia#52233) we can use
-# Julia's cached method lookup to simply look up method instances at run time.
-@static if VERSION >= v"1.11.0-DEV.1552"
-
-# XXX: version of Base.method_instance that uses a function type
-@inline function methodinstance(@nospecialize(ft::Type), @nospecialize(tt::Type),
-                                world::Integer=tls_world_age())
+function methodinstance(@nospecialize(ft::Type), @nospecialize(tt::Type),
+                        world::Integer=tls_world_age())
     sig = signature_type_by_tt(ft, tt)
-    @assert Base.isdispatchtuple(sig)   # JuliaLang/julia#52233
+    @assert Base.isdispatchtuple(sig)
 
     mi = ccall(:jl_method_lookup_by_tt, Any,
                (Any, Csize_t, Any),
@@ -96,201 +68,17 @@ end
     return mi
 end
 
-# on older versions of Julia, we always need to use the generic lookup
-else
-
-const methodinstance = generic_methodinstance
-
-function methodinstance_generator(world::UInt, source, self, ft::Type, tt::Type)
-    @nospecialize
-    @assert CC.isType(ft) && CC.isType(tt)
-    ft = ft.parameters[1]
-    tt = tt.parameters[1]
-
-    stub = Core.GeneratedFunctionStub(identity, Core.svec(:methodinstance, :ft, :tt), Core.svec())
-
-    # look up the method match
-    method_error = :(throw(MethodError(ft, tt, $world)))
-    sig = Tuple{ft, tt.parameters...}
-    min_world = Ref{UInt}(typemin(UInt))
-    max_world = Ref{UInt}(typemax(UInt))
-    match = ccall(:jl_gf_invoke_lookup_worlds, Any,
-                  (Any, Any, Csize_t, Ref{Csize_t}, Ref{Csize_t}),
-                  sig, #=mt=# nothing, world, min_world, max_world)
-    match === nothing && return stub(world, source, method_error)
-
-    # look up the method and code instance
-    mi = ccall(:jl_specializations_get_linfo, Ref{MethodInstance},
-               (Any, Any, Any), match.method, match.spec_types, match.sparams)
-    ci = CC.retrieve_code_info(mi, world)
-
-    # prepare a new code info
-    new_ci = copy(ci)
-    empty!(new_ci.code)
-    empty!(new_ci.codelocs)
-    empty!(new_ci.linetable)
-    empty!(new_ci.ssaflags)
-    new_ci.ssavaluetypes = 0
-
-    # propagate edge metadata
-    new_ci.min_world = min_world[]
-    new_ci.max_world = max_world[]
-    new_ci.edges = Any[mi]
-
-    # prepare the slots
-    new_ci.slotnames = Symbol[Symbol("#self#"), :ft, :tt]
-    new_ci.slotflags = UInt8[0x00 for i = 1:3]
-
-    # return the method instance
-    push!(new_ci.code, CC.ReturnNode(mi))
-    push!(new_ci.ssaflags, 0x00)
-    push!(new_ci.linetable, @LineInfoNode(methodinstance))
-    push!(new_ci.codelocs, 1)
-    new_ci.ssavaluetypes += 1
-
-    return new_ci
-end
-
-@eval function methodinstance(ft, tt)
-    $(Expr(:meta, :generated_only))
-    $(Expr(:meta, :generated, methodinstance_generator))
-end
-
-end
-
-
-## code instance cache
-const HAS_INTEGRATED_CACHE = VERSION >= v"1.11.0-DEV.1552"
-
-if !HAS_INTEGRATED_CACHE
-struct CodeCache
-    dict::IdDict{MethodInstance,Vector{CodeInstance}}
-
-    CodeCache() = new(IdDict{MethodInstance,Vector{CodeInstance}}())
-end
-
-function Base.show(io::IO, ::MIME"text/plain", cc::CodeCache)
-    print(io, "CodeCache with $(mapreduce(length, +, values(cc.dict); init=0)) entries")
-    if !isempty(cc.dict)
-        print(io, ": ")
-        for (mi, cis) in cc.dict
-            println(io)
-            print(io, "  ")
-            show(io, mi)
-
-            function worldstr(min_world, max_world)
-                if min_world == typemax(UInt)
-                    "empty world range"
-                elseif max_world == typemax(UInt)
-                    "worlds $(Int(min_world))+"
-                else
-                    "worlds $(Int(min_world)) to $(Int(max_world))"
-                end
-            end
-
-            for (i,ci) in enumerate(cis)
-                println(io)
-                print(io, "    CodeInstance for ", worldstr(ci.min_world, ci.max_world))
-            end
-        end
-    end
-end
-
-Base.empty!(cc::CodeCache) = empty!(cc.dict)
-
-const GLOBAL_CI_CACHES = Dict{CompilerConfig, CodeCache}()
-const GLOBAL_CI_CACHES_LOCK = ReentrantLock()
-
-
-## method invalidations
-
-function CC.setindex!(cache::CodeCache, ci::CodeInstance, mi::MethodInstance)
-    # make sure the invalidation callback is attached to the method instance
-    add_codecache_callback!(cache, mi)
-    cis = get!(cache.dict, mi, CodeInstance[])
-    push!(cis, ci)
-end
-
-# invalidation (like invalidate_method_instance, but for our cache)
-struct CodeCacheCallback
-    cache::CodeCache
-end
-
-@static if VERSION ≥ v"1.11.0-DEV.798"
-
-function add_codecache_callback!(cache::CodeCache, mi::MethodInstance)
-    callback = CodeCacheCallback(cache)
-    CC.add_invalidation_callback!(callback, mi)
-end
-function (callback::CodeCacheCallback)(replaced::MethodInstance, max_world::UInt32)
-    cis = get(callback.cache.dict, replaced, nothing)
-    if cis === nothing
-        return
-    end
-    for ci in cis
-        if ci.max_world == ~0 % Csize_t
-            @assert ci.min_world - 1 <= max_world "attempting to set illogical constraints"
-@static if VERSION >= v"1.11.0-DEV.1390"
-            @atomic ci.max_world = max_world
-else
-            ci.max_world = max_world
-end
-        end
-        @assert ci.max_world <= max_world
-    end
-end
-
-else
-
-function add_codecache_callback!(cache::CodeCache, mi::MethodInstance)
-    callback = CodeCacheCallback(cache)
-    if !isdefined(mi, :callbacks)
-        mi.callbacks = Any[callback]
-    elseif !in(callback, mi.callbacks)
-        push!(mi.callbacks, callback)
-    end
-end
-function (callback::CodeCacheCallback)(replaced::MethodInstance, max_world::UInt32,
-                                       seen::Set{MethodInstance}=Set{MethodInstance}())
-    push!(seen, replaced)
-
-    cis = get(callback.cache.dict, replaced, nothing)
-    if cis === nothing
-        return
-    end
-    for ci in cis
-        if ci.max_world == ~0 % Csize_t
-            @assert ci.min_world - 1 <= max_world "attempting to set illogical constraints"
-            ci.max_world = max_world
-        end
-        @assert ci.max_world <= max_world
-    end
-
-    # recurse to all backedges to update their valid range also
-    if isdefined(replaced, :backedges)
-        backedges = filter(replaced.backedges) do @nospecialize(mi)
-            if mi isa MethodInstance
-                mi ∉ seen
-            elseif mi isa Type
-                # an `invoke` call, which is a `(sig, MethodInstance)` pair.
-                # let's ignore the `sig` and process the `MethodInstance` next.
-                false
-            else
-                error("invalid backedge")
-            end
-        end
+function generic_methodinstance(@nospecialize(ft::Type), @nospecialize(tt::Type),
+                                world::Integer=tls_world_age())
+    sig = signature_type_by_tt(ft, tt)
 
-        # Don't touch/empty backedges `invalidate_method_instance` in C will do that later
-        # replaced.backedges = Any[]
+    match, _ = CC._findsup(sig, nothing, world)
+    match === nothing && throw(MethodError(ft, tt, world))
 
-        for mi in backedges
-            callback(mi::MethodInstance, max_world, seen)
-        end
-    end
-end
+    mi = CC.specialize_method(match)
 
+    return mi::MethodInstance
 end
-end # !HAS_INTEGRATED_CACHE
 
 
 ## method overrides
@@ -298,8 +86,6 @@ end # !HAS_INTEGRATED_CACHE
 Base.Experimental.@MethodTable(GLOBAL_METHOD_TABLE)
 
 # Implements a priority lookup for method tables, where the first match in the stack get's returned.
-# An alternative to this would be to use a "Union" where we would query the parent method table and
-# do a most-specific match.
 struct StackedMethodTable{MTV<:CC.MethodTableView} <: CC.MethodTableView
     world::UInt
     mt::Core.MethodTable
@@ -310,91 +96,45 @@ StackedMethodTable(world::UInt, mt::Core.MethodTable, parent::Core.MethodTable)
 
 CC.isoverlayed(::StackedMethodTable) = true
 
-@static if VERSION >= v"1.11.0-DEV.363"
-    # https://github.com/JuliaLang/julia/pull/51078
-    # same API as before but without returning isoverlayed flag
-    function CC.findall(@nospecialize(sig::Type), table::StackedMethodTable; limit::Int=-1)
-        result = CC._findall(sig, table.mt, table.world, limit)
-        result === nothing && return nothing # to many matches
-        nr = CC.length(result)
-        if nr ≥ 1 && CC.getindex(result, nr).fully_covers
-            # no need to fall back to the parent method view
-            return result
-        end
-
-        parent_result = CC.findall(sig, table.parent; limit)::Union{Nothing, CC.MethodLookupResult}
-        parent_result === nothing && return nothing #too many matches
-
-        # merge the parent match results with the internal method table
-        return CC.MethodLookupResult(
-            CC.vcat(result.matches, parent_result.matches),
-            CC.WorldRange(
-                CC.max(result.valid_worlds.min_world, parent_result.valid_worlds.min_world),
-                CC.min(result.valid_worlds.max_world, parent_result.valid_worlds.max_world)),
-            result.ambig | parent_result.ambig)
+function CC.findall(@nospecialize(sig::Type), table::StackedMethodTable; limit::Int=-1)
+    result = CC._findall(sig, table.mt, table.world, limit)
+    result === nothing && return nothing # to many matches
+    nr = CC.length(result)
+    if nr ≥ 1 && CC.getindex(result, nr).fully_covers
+        # no need to fall back to the parent method view
+        return result
     end
 
-    function CC.findsup(@nospecialize(sig::Type), table::StackedMethodTable)
-        match, valid_worlds = CC._findsup(sig, table.mt, table.world)
-        match !== nothing && return match, valid_worlds
-        parent_match, parent_valid_worlds = CC.findsup(sig, table.parent)
-        return (
-            parent_match,
-            CC.WorldRange(
-                max(valid_worlds.min_world, parent_valid_worlds.min_world),
-                min(valid_worlds.max_world, parent_valid_worlds.max_world))
-            )
-    end
-else
-    function CC.findall(@nospecialize(sig::Type), table::StackedMethodTable; limit::Int=-1)
-        result = CC._findall(sig, table.mt, table.world, limit)
-        result === nothing && return nothing # to many matches
-        nr = CC.length(result)
-        if nr ≥ 1 && CC.getindex(result, nr).fully_covers
-            # no need to fall back to the parent method view
-            return CC.MethodMatchResult(result, true)
-        end
+    parent_result = CC.findall(sig, table.parent; limit)::Union{Nothing, CC.MethodLookupResult}
+    parent_result === nothing && return nothing #too many matches
 
-        parent_result = CC.findall(sig, table.parent; limit)::Union{Nothing, CC.MethodMatchResult}
-        parent_result === nothing && return nothing #too many matches
-
-        overlayed = parent_result.overlayed | !CC.isempty(result)
-        parent_result = parent_result.matches::CC.MethodLookupResult
-
-        # merge the parent match results with the internal method table
-        return CC.MethodMatchResult(
-        CC.MethodLookupResult(
-            CC.vcat(result.matches, parent_result.matches),
-            CC.WorldRange(
-                CC.max(result.valid_worlds.min_world, parent_result.valid_worlds.min_world),
-                CC.min(result.valid_worlds.max_world, parent_result.valid_worlds.max_world)),
-            result.ambig | parent_result.ambig),
-        overlayed)
-    end
+    # merge the parent match results with the internal method table
+    return CC.MethodLookupResult(
+        CC.vcat(result.matches, parent_result.matches),
+        CC.WorldRange(
+            CC.max(result.valid_worlds.min_world, parent_result.valid_worlds.min_world),
+            CC.min(result.valid_worlds.max_world, parent_result.valid_worlds.max_world)),
+        result.ambig | parent_result.ambig)
+end
 
-    function CC.findsup(@nospecialize(sig::Type), table::StackedMethodTable)
-        match, valid_worlds = CC._findsup(sig, table.mt, table.world)
-        match !== nothing && return match, valid_worlds, true
-        parent_match, parent_valid_worlds, overlayed = CC.findsup(sig, table.parent)
-        return (
-            parent_match,
-            CC.WorldRange(
-                max(valid_worlds.min_world, parent_valid_worlds.min_world),
-                min(valid_worlds.max_world, parent_valid_worlds.max_world)),
-            overlayed)
-    end
+function CC.findsup(@nospecialize(sig::Type), table::StackedMethodTable)
+    match, valid_worlds = CC._findsup(sig, table.mt, table.world)
+    match !== nothing && return match, valid_worlds
+    parent_match, parent_valid_worlds = CC.findsup(sig, table.parent)
+    return (
+        parent_match,
+        CC.WorldRange(
+            max(valid_worlds.min_world, parent_valid_worlds.min_world),
+            min(valid_worlds.max_world, parent_valid_worlds.max_world))
+        )
 end
 
+
 ## interpreter
 
-@static if VERSION >= v"1.11.0-DEV.1498"
-    import Core.Compiler: get_inference_world
-    using Base: get_world_counter
-else
-    import Core.Compiler: get_world_counter, get_world_counter as get_inference_world
-end
+import Core.Compiler: get_inference_world
+using Base: get_world_counter
 
-const MTType = Core.MethodTable
 if isdefined(Core.Compiler, :CachedMethodTable)
     using Core.Compiler: CachedMethodTable
     maybe_cached(mtv::CC.MethodTableView) = CachedMethodTable(mtv)
@@ -407,86 +147,49 @@ get_method_table_view(world::UInt, mt::CC.MethodTable) = CC.OverlayMethodTable(w
 # VERSION >= v"1.14.0-DEV.1691"
 const INFERENCE_CACHE_TYPE = isdefined(CC, :InferenceCache) ? CC.InferenceCache : Vector{CC.InferenceResult}
 
-struct GPUInterpreter{MTV<:CC.MethodTableView} <: CC.AbstractInterpreter
+"""
+    GPUInterpreter{MTV, K, V}
+
+Foreign abstract interpreter that drives Julia inference for GPU compilation. Parametric
+on the method-table view (`MTV`), the cache owner type (`K`), and the consumer's results
+struct type (`V`). The cache is a `CompilerCaching.CacheView{K, V}` — `@setup_caching`
+wires `cache_owner` and `finish!` so each `CodeInstance` carries a `V()` results struct
+on its `analysis_results` chain, partitioned in the global CI cache by the owner token.
+"""
+struct GPUInterpreter{MTV<:CC.MethodTableView, K, V} <: CC.AbstractInterpreter
     world::UInt
     method_table_view::MTV
-
-@static if HAS_INTEGRATED_CACHE
-    token::Any
-else
-    code_cache::CodeCache
-end
+    cache::CacheView{K, V}
     inf_cache::INFERENCE_CACHE_TYPE
-
     inf_params::CC.InferenceParams
     opt_params::CC.OptimizationParams
 end
 
-@static if HAS_INTEGRATED_CACHE
-function GPUInterpreter(world::UInt=Base.get_world_counter();
-                        method_table_view::CC.MethodTableView,
-                        token::Any,
+function GPUInterpreter(world::UInt; method_table_view::CC.MethodTableView,
+                        cache::CacheView,
                         inf_params::CC.InferenceParams,
                         opt_params::CC.OptimizationParams)
     @assert world <= Base.get_world_counter()
-
-    inf_cache = INFERENCE_CACHE_TYPE()
-
-    return GPUInterpreter(world, method_table_view,
-                          token, inf_cache,
-                          inf_params, opt_params)
+    GPUInterpreter(world, method_table_view, cache, INFERENCE_CACHE_TYPE(),
+                   inf_params, opt_params)
 end
 
 function GPUInterpreter(interp::GPUInterpreter;
                         world::UInt=interp.world,
                         method_table_view::CC.MethodTableView=interp.method_table_view,
-                        token::Any=interp.token,
+                        cache::CacheView=interp.cache,
                         inf_cache::INFERENCE_CACHE_TYPE=interp.inf_cache,
                         inf_params::CC.InferenceParams=interp.inf_params,
                         opt_params::CC.OptimizationParams=interp.opt_params)
-    return GPUInterpreter(world, method_table_view,
-                          token, inf_cache,
-                          inf_params, opt_params)
+    GPUInterpreter(world, method_table_view, cache, inf_cache, inf_params, opt_params)
 end
 
-else
-
-function GPUInterpreter(world::UInt=Base.get_world_counter();
-                        method_table_view::CC.MethodTableView,
-                        code_cache::CodeCache,
-                        inf_params::CC.InferenceParams,
-                        opt_params::CC.OptimizationParams)
-    @assert world <= Base.get_world_counter()
-
-    inf_cache = Vector{CC.InferenceResult}()
-
-    return GPUInterpreter(world, method_table_view,
-                          code_cache, inf_cache,
-                          inf_params, opt_params)
-end
-
-function GPUInterpreter(interp::GPUInterpreter;
-                        world::UInt=interp.world,
-                        method_table_view::CC.MethodTableView=interp.method_table_view,
-                        code_cache::CodeCache=interp.code_cache,
-                        inf_cache::Vector{CC.InferenceResult}=interp.inf_cache,
-                        inf_params::CC.InferenceParams=interp.inf_params,
-                        opt_params::CC.OptimizationParams=interp.opt_params)
-    return GPUInterpreter(world, method_table_view,
-                          code_cache, inf_cache,
-                          inf_params, opt_params)
-end
-end # HAS_INTEGRATED_CACHE
-
 CC.InferenceParams(interp::GPUInterpreter) = interp.inf_params
 CC.OptimizationParams(interp::GPUInterpreter) = interp.opt_params
-#=CC.=#get_inference_world(interp::GPUInterpreter) = interp.world
+get_inference_world(interp::GPUInterpreter) = interp.world
 CC.get_inference_cache(interp::GPUInterpreter) = interp.inf_cache
-@static if HAS_INTEGRATED_CACHE
-    CC.cache_owner(interp::GPUInterpreter) = interp.token
-else
-    CC.code_cache(interp::GPUInterpreter) = WorldView(interp.code_cache, interp.world)
-end
+
+@setup_caching GPUInterpreter.cache
 
 # No need to do any locking since we're not putting our results into the runtime cache
 CC.lock_mi_inference(interp::GPUInterpreter, mi::MethodInstance) = nothing
@@ -507,8 +210,6 @@ CC.method_table(interp::GPUInterpreter) = interp.method_table_view
 # semi-concrete interepretation is broken with overlays (JuliaLang/julia#47349)
 function CC.concrete_eval_eligible(interp::GPUInterpreter,
     @nospecialize(f), result::CC.MethodCallResult, arginfo::CC.ArgInfo, sv::CC.InferenceState)
-    # NOTE it's fine to skip overloading with `sv::IRInterpretationState` since we disables
-    #      semi-concrete interpretation anyway.
     ret = @invoke CC.concrete_eval_eligible(interp::CC.AbstractInterpreter,
         f::Any, result::CC.MethodCallResult, arginfo::CC.ArgInfo, sv::CC.InferenceState)
     if ret === :semi_concrete_eval
@@ -516,172 +217,17 @@ function CC.concrete_eval_eligible(interp::GPUInterpreter,
     end
     return ret
 end
-function CC.concrete_eval_eligible(interp::GPUInterpreter,
-    @nospecialize(f), result::CC.MethodCallResult, arginfo::CC.ArgInfo)
-    ret = @invoke CC.concrete_eval_eligible(interp::CC.AbstractInterpreter,
-        f::Any, result::CC.MethodCallResult, arginfo::CC.ArgInfo)
-    ret === false && return nothing
-    return ret
-end
-
-
-## world view of the cache
-@static if VERSION < v"1.14-"
-    using Core.Compiler: WorldView
-end
-
-if !HAS_INTEGRATED_CACHE
-
-function CC.haskey(wvc::WorldView{CodeCache}, mi::MethodInstance)
-    CC.get(wvc, mi, nothing) !== nothing
-end
 
-function CC.get(wvc::WorldView{CodeCache}, mi::MethodInstance, default)
-    # check the cache
-    for ci in get!(wvc.cache.dict, mi, CodeInstance[])
-        if ci.min_world <= wvc.worlds.min_world && wvc.worlds.max_world <= ci.max_world
-            # TODO: if (code && (code == jl_nothing || jl_ir_flag_inferred((jl_array_t*)code)))
-            src = if ci.inferred isa Vector{UInt8}
-                ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any),
-                       mi.def, C_NULL, ci.inferred)
-            else
-                ci.inferred
-            end
-            return ci
-        end
-    end
-
-    return default
-end
-
-function CC.getindex(wvc::WorldView{CodeCache}, mi::MethodInstance)
-    r = CC.get(wvc, mi, nothing)
-    r === nothing && throw(KeyError(mi))
-    return r::CodeInstance
-end
-
-function CC.setindex!(wvc::WorldView{CodeCache}, ci::CodeInstance, mi::MethodInstance)
-    CC.setindex!(wvc.cache, ci, mi)
-end
-
-end # HAS_INTEGRATED_CACHE
 
 ## codegen/inference integration
 
-function ci_cache_populate(interp, cache, mi, min_world, max_world)
-    codeinfos = Pair{CodeInstance, CodeInfo}[]
-    @static if VERSION >= v"1.12.0-DEV.1434"
-        # see typeinfer.jl: typeinf_ext_toplevel
-        has_compilequeue = VERSION >= v"1.13.0-DEV.499" || v"1.12-beta3" <= VERSION < v"1.13-"
-        ci = CC.typeinf_ext(interp, mi, CC.SOURCE_MODE_NOT_REQUIRED)
-        if has_compilequeue
-            workqueue = CC.CompilationQueue(; interp)
-            push!(workqueue, ci)
-        else
-            workqueue = CodeInstance[ci]
-            inspected = IdSet{CodeInstance}()
-        end
-        while !isempty(workqueue)
-            callee = pop!(workqueue)
-            if has_compilequeue
-                CC.isinspected(workqueue, callee) && continue
-                CC.markinspected!(workqueue, callee)
-            else
-                callee in inspected && continue
-                push!(inspected, callee)
-            end
-
-            # now make sure everything has source code, if desired
-            mi = CC.get_ci_mi(callee)
-            if CC.use_const_api(callee)
-                if VERSION >= v"1.13.0-DEV.1121"
-                    src = CC.codeinfo_for_const(interp, mi, CC.WorldRange(callee.min_world, callee.max_world), callee.edges, callee.rettype_const)
-                else
-                    src = CC.codeinfo_for_const(interp, mi, callee.rettype_const)
-                end
-            else
-                # TODO: typeinf_code could return something with different edges/ages/owner/abi (needing an update to callee), which we don't handle here
-                src = CC.typeinf_code(interp, mi, true)
-            end
-            if src isa CodeInfo
-                if has_compilequeue
-                    sptypes = CC.sptypes_from_meth_instance(mi)
-                    CC.collectinvokes!(workqueue, src, sptypes)
-                else
-                    CC.collectinvokes!(workqueue, src)
-                end
-                push!(codeinfos, callee => src)
-            end
-        end
-    elseif VERSION >= v"1.12.0-DEV.15"
-        inferred_ci = CC.typeinf_ext_toplevel(interp, mi, CC.SOURCE_MODE_FORCE_SOURCE)
-        @assert inferred_ci !== nothing "Inference of $mi failed"
-
-        # inference should have populated our cache
-        wvc = WorldView(cache, min_world, max_world)
-        @assert CC.haskey(wvc, mi) "GPUCompiler: Failed to compile method for $mi, between worlds $min_world and $max_world"
-        ci = CC.getindex(wvc, mi)
-
-        # if ci is rettype_const, the inference result won't have been cached
-        # (because it is normally not supposed to be used ever again).
-        # to avoid the need to re-infer, set that field here.
-        if ci.inferred === nothing
-            CC.setindex!(wvc, inferred_ci, mi)
-            ci = CC.getindex(wvc, mi)
-        end
-    else
-        src = CC.typeinf_ext_toplevel(interp, mi)
-
-        # inference should have populated our cache
-        wvc = WorldView(cache, min_world, max_world)
-
-        @assert CC.haskey(wvc, mi) "GPUCompiler: Failed to compile method for $mi, between worlds $min_world and $max_world"
-        ci = CC.getindex(wvc, mi)
-
-        # if ci is rettype_const, the inference result won't have been cached
-        # (because it is normally not supposed to be used ever again).
-        # to avoid the need to re-infer, set that field here.
-        if ci.inferred === nothing
-            @atomic ci.inferred = src
-        end
-    end
-
-    return codeinfos
-end
-
-@static if VERSION >= v"1.14-"
-function ci_cache_lookup(cache, mi, min_world, max_world)
-    # In Julia 1.14+, WorldView was replaced by InternalCodeCache with WorldRange
-    # cache is OverlayCodeCache{InternalCodeCache}, extract owner from globalcache
-    owner = cache.globalcache.owner
-    wvc = CC.InternalCodeCache(owner, CC.WorldRange(min_world, max_world))
-    ci = CC.get(wvc, mi, nothing)
-    return ci
-end
-else
-function ci_cache_lookup(cache, mi, min_world, max_world)
-    wvc = WorldView(cache, min_world, max_world)
-    ci = CC.get(wvc, mi, nothing)
-    if VERSION < v"1.12.0-DEV.1434" && ci !== nothing && ci.inferred === nothing
-        # if for some reason we did end up with a codeinfo without inferred source, e.g.,
-        # because of calling `Base.return_types` which only sets rettyp, pretend we didn't
-        # run inference so that we re-infer now and not during codegen (which is disallowed)
-        return nothing
-    end
-    return ci
-end
-end # @static if
-
-
-## interface
-
-# for platforms without @cfunction-with-closure support
-const _method_instances = Ref{Any}()
-const _cache = Ref{Any}()
-function _lookup_fun(mi, min_world, max_world)
-    push!(_method_instances[], mi)
-    ci_cache_lookup(_cache[], mi, min_world, max_world)
-end
+const HAS_LLVM_GET_CIS = (
+    VERSION >= v"1.13.0-DEV.1120" || (
+        Libdl.dlsym(
+            unsafe_load(cglobal(:jl_libjulia_handle, Ptr{Cvoid})), :jl_get_llvm_cis, throw_error = false
+        ) !== nothing
+    )
+)
 
 @enum CompilationPolicy::Cint begin
     CompilationPolicyDefault = 0
@@ -698,32 +244,37 @@ function Base.precompile(@nospecialize(job::CompilerJob))
     if job.source.def.primary_world > job.world
         error("Cannot compile $(job.source) for world $(job.world); method is only valid from world $(job.source.def.primary_world) onwards")
     end
-
-    # populate the cache
     interp = get_interpreter(job)
-    cache = CC.code_cache(interp)
-    ci_cache_populate(interp, cache, job.source, job.world, job.world)
+    CompilerCaching.typeinf!(interp.cache, interp, job.source)
     return true
 end
 
-
-const HAS_LLVM_GET_CIS = (
-    VERSION >= v"1.13.0-DEV.1120" || (
-        Libdl.dlsym(
-            unsafe_load(cglobal(:jl_libjulia_handle, Ptr{Cvoid})), :jl_get_llvm_cis, throw_error = false
-        ) !== nothing
-    )
-)
+# for platforms without @cfunction-with-closure support
+const _method_instances = Ref{Any}()
+const _cache = Ref{Any}()
+function _lookup_fun(mi, min_world, max_world)
+    push!(_method_instances[], mi)
+    cache = _cache[]::CacheView
+    get(cache, mi, nothing)
+end
 
 function compile_method_instance(@nospecialize(job::CompilerJob))
     if job.source.def.primary_world > job.world
         error("Cannot compile $(job.source) for world $(job.world); method is only valid from world $(job.source.def.primary_world) onwards")
     end
 
-    # populate the cache
+    # populate the cache (inference)
     interp = get_interpreter(job)
-    cache = CC.code_cache(interp)
-    populated = ci_cache_populate(interp, cache, job.source, job.world, job.world)
+    cache = interp.cache
+    CompilerCaching.typeinf!(cache, interp, job.source)
+    ci = get(cache, job.source, nothing)::CodeInstance
+
+    # collect (CI, CodeInfo) pairs for jl_emit_native
+    @static if VERSION >= v"1.12.0-DEV.1823"
+        codeinfo_pairs = CompilerCaching.get_codeinfos(ci)
+    else
+        codeinfo_pairs = nothing
+    end
 
     # create a callback to look-up function in our cache,
     # and keep track of the method instances we needed.
@@ -731,7 +282,7 @@ function compile_method_instance(@nospecialize(job::CompilerJob))
     if Sys.ARCH == :x86 || Sys.ARCH == :x86_64
         function lookup_fun(mi, min_world, max_world)
             push!(method_instances, mi)
-            ci_cache_lookup(cache, mi, min_world, max_world)
+            get(cache, mi, nothing)
         end
         lookup_cb = @cfunction($lookup_fun, Any, (Any, UInt, UInt))
     else
@@ -775,10 +326,8 @@ function compile_method_instance(@nospecialize(job::CompilerJob))
 
         native_code = if VERSION >= v"1.12.0-DEV.1823"
             codeinfos = Any[]
-            for (ci, src) in populated
-                # each item in the list should be a CodeInstance followed by a CodeInfo
-                # indicating something to compile
-                push!(codeinfos, ci::CodeInstance)
+            for (ci′, src) in codeinfo_pairs
+                push!(codeinfos, ci′::CodeInstance)
                 push!(codeinfos, src::CodeInfo)
             end
             @ccall jl_emit_native(codeinfos::Vector{Any}, ts_mod::LLVM.API.LLVMOrcThreadSafeModuleRef, Ref(params)::Ptr{Base.CodegenParams}, #=extern linkage=# false::Cint)::Ptr{Cvoid}
@@ -800,22 +349,15 @@ function compile_method_instance(@nospecialize(job::CompilerJob))
 
         # XXX: this is wrong; we can't expose the underlying LLVM module, but should
         #      instead always go through the callback in order to unlock it properly.
-        #      rework this once we depend on Julia 1.9 or later.
         llvm_ts_mod = LLVM.ThreadSafeModule(llvm_mod_ref)
         llvm_mod = nothing
         llvm_ts_mod() do mod
             llvm_mod = mod
         end
     end
-    if !(Sys.ARCH == :x86 || Sys.ARCH == :x86_64)
-        cache_gbl = nothing
-    end
 
     # Since Julia 1.13, the caller is responsible for initializing global variables that
     # point to global values or bindings with their address in memory.
-    # Similarly on previous versions when imaging=true, it is also the caller's responsibility
-    # (see https://github.com/JuliaGPU/GPUCompiler.jl/issues/753), but we can support this on versions
-    # that have HAS_LLVM_GVS_GLOBALS.
     gvs = nothing
     inits = nothing
     @static if VERSION >= v"1.13.0-DEV.623"
@@ -853,17 +395,9 @@ function compile_method_instance(@nospecialize(job::CompilerJob))
 
     # Maintain a map from global variables to their initialized Julia values.
     # The objects pointed to are perma-rooted, during codegen.
-    # It is legal to call `Base.unsafe_pointer_to_objref` on `values(gv_to_value)`,
-    # but x->pointer_from_objref(Base.unsafe_pointer_to_objref(x)) is not idempotent,
-    # thus we store raw pointers here.
-    # Currently GVs are privatized, so users may have to handle embedded pointers,
-    # but this dictionary provides a clear indication that the embedded pointer is
-    # indeed avalid Julia object.
     gv_to_value = Dict{String, Ptr{Cvoid}}()
 
-    # On certain version of Julia we have no reliable way to match the `gvs` to their initializers `inits`.
     if gvs === nothing
-        # global variables here properly.
         for gv in globals(llvm_mod)
             if !haskey(metadata(gv), "julia.constgv")
                 continue
@@ -889,8 +423,6 @@ function compile_method_instance(@nospecialize(job::CompilerJob))
         for (gv_ref, init) in zip(gvs, inits)
             gv = GlobalVariable(gv_ref)
             gv_to_value[LLVM.name(gv)] = init
-            # set the initializer
-            # TODO(vc): To enable full relocation we should actually strip out the initializers here.
             if LLVM.isnull(initializer(gv))
                 val = const_inttoptr(ConstantInt(Int64(init)), value_type(initializer(gv)))
                 initializer!(gv, val)
@@ -902,9 +434,6 @@ function compile_method_instance(@nospecialize(job::CompilerJob))
 
     if HAS_LLVM_GET_CIS
         # on sufficiently recent versions of Julia, we can query the CIs compiled.
-        # this is required after the move to `invoke(::CodeInstance)`, because our
-        # lookup function (used to populate method_instances) isn't always called then.
-
         num_cis = Ref{Csize_t}(0)
         @ccall jl_get_llvm_cis(native_code::Ptr{Cvoid}, num_cis::Ptr{Csize_t},
                                C_NULL::Ptr{Cvoid})::Nothing
@@ -914,7 +443,6 @@ function compile_method_instance(@nospecialize(job::CompilerJob))
         )::Nothing
     elseif VERSION >= v"1.12.0-DEV.1703"
         # slightly older versions of Julia used MIs directly
-
         num_mis = Ref{Csize_t}(0)
         @ccall jl_get_llvm_mis(native_code::Ptr{Cvoid}, num_mis::Ptr{Csize_t},
                                C_NULL::Ptr{Cvoid})::Nothing
@@ -925,58 +453,51 @@ function compile_method_instance(@nospecialize(job::CompilerJob))
 
     if !HAS_LLVM_GET_CIS
         for mi in method_instances
-            ci = ci_cache_lookup(cache, mi, job.world, job.world)
-            ci === nothing && continue
+            ci′ = get(cache, mi, nothing)
+            ci′ === nothing && continue
 
             llvm_func_idx = Ref{Int32}(-1)
             llvm_specfunc_idx = Ref{Int32}(-1)
             ccall(
                 :jl_get_function_id, Nothing,
                 (Ptr{Cvoid}, Any, Ptr{Int32}, Ptr{Int32}),
-                native_code, ci, llvm_func_idx, llvm_specfunc_idx
+                native_code, ci′, llvm_func_idx, llvm_specfunc_idx
             )
-            # Suppose we have two nested interpreters in use at the same time.
-            # Looking up a ci from the cache is not unique for a given mi.
-            # Consequently its possible we may not have compiled the ci found
-            # by the cache (instead having compiled the ci from the other interp).
             if llvm_func_idx[] == -1
                 continue
             end
-            push!(code_instances, ci)
+            push!(code_instances, ci′)
         end
     else
-        # To avoid a clash in the compiled cache containing both with an interpreter token (like GPUCompiler.GPUCompilerCacheToken) and native,
+        # To avoid a clash in the compiled cache containing both an interpreter token and native,
         # prefer the non-native code-instance.
-        # TODO: in the future we should migrate compiled to have the ci as the key, not the mi.
         native_mis = Set{MethodInstance}()
-        for ci in code_instances
-            if ci.owner !== nothing
-                push!(native_mis, ci.def::MethodInstance)
+        for ci′ in code_instances
+            if ci′.owner !== nothing
+                push!(native_mis, ci′.def::MethodInstance)
             end
         end
-        filter!(code_instances) do ci
-            return ci.owner !== nothing || in(ci.def, native_mis)
+        filter!(code_instances) do ci′
+            return ci′.owner !== nothing || in(ci′.def, native_mis)
         end
     end
 
-    # Avoid redundant code_instances. This is necessary to avoid false positives trying to add the same key'd mi to the compiled Dict.
     unique!(code_instances)
 
     resize!(method_instances, length(code_instances))
-    for (i, ci) in enumerate(code_instances)
-        method_instances[i] = ci.def::MethodInstance
+    for (i, ci′) in enumerate(code_instances)
+        method_instances[i] = ci′.def::MethodInstance
     end
 
     # process all compiled method instances
     compiled = Dict()
-    for (ci, mi) in zip(code_instances, method_instances)
-
+    for (ci′, mi) in zip(code_instances, method_instances)
         # get the function index
         llvm_func_idx = Ref{Int32}(-1)
         llvm_specfunc_idx = Ref{Int32}(-1)
         ccall(:jl_get_function_id, Nothing,
               (Ptr{Cvoid}, Any, Ptr{Int32}, Ptr{Int32}),
-              native_code, ci, llvm_func_idx, llvm_specfunc_idx)
+              native_code, ci′, llvm_func_idx, llvm_specfunc_idx)
         @assert llvm_func_idx[] != -1 || llvm_specfunc_idx[] != -1 "Static compilation failed"
 
         # get the function
@@ -1002,7 +523,7 @@ function compile_method_instance(@nospecialize(job::CompilerJob))
 
         # NOTE: it's not safe to store raw LLVM functions here, since those may get
         #       removed or renamed during optimization, so we store their name instead.
-        compiled[mi] = (; ci, func=llvm_func, specfunc=llvm_specfunc)
+        compiled[mi] = (; ci=ci′, func=llvm_func, specfunc=llvm_specfunc)
     end
 
     # ensure that the requested method instance was compiled
@@ -1010,18 +531,3 @@ function compile_method_instance(@nospecialize(job::CompilerJob))
 
     return llvm_mod, compiled, gv_to_value
 end
-
-# partially revert JuliaLangjulia#49391
-@static if v"1.11.0-DEV.1603" <= VERSION < v"1.12.0-DEV.347" && # reverted on master
-           !(v"1.11-beta2" <= VERSION < v"1.12")                # reverted on 1.11-beta2
-function CC.typeinf(interp::GPUInterpreter, frame::CC.InferenceState)
-    if CC.__measure_typeinf__[]
-        CC.Timings.enter_new_timer(frame)
-        v = CC._typeinf(interp, frame)
-        CC.Timings.exit_current_timer(frame)
-        return v
-    else
-        return CC._typeinf(interp, frame)
-    end
-end
-end
diff --git a/src/metal.jl b/src/metal.jl
index bd4f8e66..90edb87b 100644
--- a/src/metal.jl
+++ b/src/metal.jl
@@ -78,10 +78,6 @@ pass_by_value(job::CompilerJob{MetalCompilerTarget}) = false
 
 ## job
 
-# TODO: encode debug build or not in the compiler job
-#       https://github.com/JuliaGPU/CUDAnative.jl/issues/368
-runtime_slug(job::CompilerJob{MetalCompilerTarget}) = "metal-macos$(job.config.target.macos)"
-
 isintrinsic(@nospecialize(job::CompilerJob{MetalCompilerTarget}), fn::String) =
     return startswith(fn, "air.")
 
diff --git a/src/native.jl b/src/native.jl
index fdd880ec..536c31d4 100644
--- a/src/native.jl
+++ b/src/native.jl
@@ -33,6 +33,5 @@ end
 
 ## job
 
-runtime_slug(job::CompilerJob{NativeCompilerTarget}) = "native_$(job.config.target.cpu)-$(hash(job.config.target.features))$(job.config.target.jlruntime ? "-jlrt" : "")"
 uses_julia_runtime(job::CompilerJob{NativeCompilerTarget}) = job.config.target.jlruntime
 can_vectorize(job::CompilerJob{NativeCompilerTarget}) = true
diff --git a/src/optim.jl b/src/optim.jl
index 8bdb110e..594ecf29 100644
--- a/src/optim.jl
+++ b/src/optim.jl
@@ -238,13 +238,10 @@ function buildIntrinsicLoweringPipeline(mpm, @nospecialize(job::CompilerJob), op
         end
         add!(fpm, GCInvariantVerifierPass())
         add!(fpm, LateLowerGCPass())
-        if uses_julia_runtime(job) && VERSION >= v"1.11.0-DEV.208"
+        if uses_julia_runtime(job)
             add!(fpm, FinalLowerGCPass())
         end
     end
-    if uses_julia_runtime(job) && VERSION < v"1.11.0-DEV.208"
-        add!(mpm, FinalLowerGCPass())
-    end
 
     if opt_level >= 2
         add!(mpm, NewPMFunctionPassManager()) do fpm
diff --git a/src/ptx.jl b/src/ptx.jl
index 66880850..c21dd6b8 100644
--- a/src/ptx.jl
+++ b/src/ptx.jl
@@ -97,12 +97,6 @@ const ptx_intrinsics = ("vprintf", "__assertfail", "malloc", "free")
 isintrinsic(@nospecialize(job::CompilerJob{PTXCompilerTarget}), fn::String) =
     in(fn, ptx_intrinsics)
 
-# XXX: the debuginfo part should be handled by GPUCompiler as it applies to all back-ends.
-runtime_slug(@nospecialize(job::CompilerJob{PTXCompilerTarget})) =
-    "ptx$(job.config.target.ptx.major)$(job.config.target.ptx.minor)" *
-    "-sm_$(job.config.target.cap.major)$(job.config.target.cap.minor)" *
-    "-debuginfo=$(Int(llvm_debug_info(job)))"
-
 function finish_module!(@nospecialize(job::CompilerJob{PTXCompilerTarget}),
                         mod::LLVM.Module, entry::LLVM.Function)
     # emit the device capability and ptx isa version as constants in the module. this makes
diff --git a/src/rtlib.jl b/src/rtlib.jl
index 12fc321b..d5488944 100644
--- a/src/rtlib.jl
+++ b/src/rtlib.jl
@@ -60,9 +60,30 @@ end
 
 ## functionality to build the runtime library
 
-function emit_function!(mod, config::CompilerConfig, f, method)
+# Compile a single runtime function and link it into `mod`. If `cache` is provided and the
+# consumer's results type opts in via `GPUCompiler.bitcode`/`bitcode!`, per-function bitcode
+# is read from / written to the cached `CodeInstance` to avoid recompilation across sessions.
+function emit_function!(mod, config::CompilerConfig, f, method,
+                        cache::Union{Nothing, CacheView}=nothing)
     tt = Base.to_tuple_type(method.types)
     source = generic_methodinstance(f, tt)
+    name = method.llvm_name
+    opaque_pointers = !supports_typed_pointers(context())
+
+    # fast path: pull renamed bitcode straight from the cached CI
+    if cache !== nothing
+        hit = CompilerCaching.lookup(cache, source)
+        if hit !== nothing
+            cached = bitcode(hit[2], opaque_pointers)
+            if cached !== nothing
+                func_mod = parse(LLVM.Module, MemoryBuffer(cached))
+                link!(mod, func_mod)
+                return
+            end
+        end
+    end
+
+    # slow path: compile, rename, optionally cache, link
     new_mod, meta = compile_unhooked(:llvm, CompilerJob(source, config))
     ft = function_type(meta.entry)
     expected_ft = convert(LLVM.FunctionType, method)
@@ -73,20 +94,26 @@ function emit_function!(mod, config::CompilerConfig, f, method)
     # recent Julia versions include prototypes for all runtime functions, even if unused
     run!(StripDeadPrototypesPass(), new_mod, llvm_machine(config.target))
 
-    temp_name = LLVM.name(meta.entry)
-    link!(mod, new_mod)
-    entry = functions(mod)[temp_name]
-
-    # if a declaration already existed, replace it with the function to avoid aliasing
-    # (and getting function names like gpu_signal_exception1)
-    name = method.llvm_name
-    if haskey(functions(mod), name)
-        decl = functions(mod)[name]
-        @assert value_type(decl) == value_type(entry)
-        replace_uses!(decl, entry)
+    # rename to the final `gpu_*` name on the per-function module, so the cached bitcode is
+    # immediately link-ready (no per-session rename pass).
+    if haskey(functions(new_mod), name) && functions(new_mod)[name] !== meta.entry
+        decl = functions(new_mod)[name]
+        @assert value_type(decl) == value_type(meta.entry)
+        replace_uses!(decl, meta.entry)
         erase!(decl)
     end
-    LLVM.name!(entry, name)
+    LLVM.name!(meta.entry, name)
+
+    if cache !== nothing
+        hit = CompilerCaching.lookup(cache, source)
+        if hit !== nothing
+            io = IOBuffer()
+            write(io, new_mod)
+            bitcode!(hit[2], take!(io), opaque_pointers)
+        end
+    end
+
+    link!(mod, new_mod)
 end
 
 function build_runtime(@nospecialize(job::CompilerJob))
@@ -96,6 +123,11 @@ function build_runtime(@nospecialize(job::CompilerJob))
     # derive a job that represents the runtime itself (notably with kernel=false).
     config = CompilerConfig(job.config; kernel=false, toplevel=false, only_entry=false, strip=false)
 
+    # cache view shared with the runtime functions' compile_unhooked calls — owner is
+    # determined by target+params+always_inline+method_table, all preserved on `config`.
+    proto = CompilerJob(job.source, config, job.world)
+    cache = cache_view(proto)
+
     for method in values(Runtime.methods)
         def = if isa(method.def, Symbol)
             isdefined(runtime_module(job), method.def) || continue
@@ -103,7 +135,7 @@ function build_runtime(@nospecialize(job::CompilerJob))
         else
             method.def
         end
-        emit_function!(mod, config, typeof(def), method)
+        emit_function!(mod, config, typeof(def), method, cache)
     end
 
     # we cannot optimize the runtime library, because the code would then be optimized again
@@ -114,38 +146,25 @@ function build_runtime(@nospecialize(job::CompilerJob))
     mod
 end
 
-@locked function load_runtime(@nospecialize(job::CompilerJob))
-    global compile_cache
-    if compile_cache === nothing    # during precompilation
-        return build_runtime(job)
-    end
+# session-local cache of assembled runtime libraries, keyed by
+# `(cache_owner, opaque_pointers)`. Avoids re-running `build_runtime` (which re-parses and
+# re-links per-function bitcode) on every kernel compile within a session. Cross-session
+# persistence happens at the per-function level via the `bitcode`/`bitcode!` hooks.
+const _runtime_libs = Dict{Tuple{Any, Bool}, Vector{UInt8}}()
+const _runtime_libs_lock = ReentrantLock()
 
-    slug = runtime_slug(job)
-    if !supports_typed_pointers(context())
-        slug *= "-opaque"
-    end
-    name = "runtime_$(slug).bc"
-    path = joinpath(compile_cache, name)
+@locked function load_runtime(@nospecialize(job::CompilerJob))
+    key = (cache_owner(job), !supports_typed_pointers(context()))
 
-    if !ispath(path)
-        @debug "Building the GPU runtime library at $path"
-        mkpath(compile_cache)
+    bytes = Base.@lock _runtime_libs_lock get!(_runtime_libs, key) do
         lib = build_runtime(job)
-
-        # atomic write to disk
-        temp_path, io = mktemp(dirname(path); cleanup=false)
+        io = IOBuffer()
         write(io, lib)
-        close(io)
-        @static if VERSION >= v"1.12.0-DEV.1023"
-            mv(temp_path, path; force=true)
-        else
-            Base.rename(temp_path, path, force=true)
-        end
+        take!(io)
     end
 
-    return parse(LLVM.Module, MemoryBufferFile(path); lazy=true)
+    return parse(LLVM.Module, MemoryBuffer(bytes); lazy=true)
 end
 
-# remove the existing cache
-# NOTE: call this function from global scope, so any change triggers recompilation.
-reset_runtime() = rm(compile_cache; recursive=true, force=true)
+# clear the session-local runtime library cache
+reset_runtime() = Base.@lock _runtime_libs_lock empty!(_runtime_libs)
diff --git a/src/spirv.jl b/src/spirv.jl
index ef96f4c4..c23f971b 100644
--- a/src/spirv.jl
+++ b/src/spirv.jl
@@ -58,11 +58,6 @@ llvm_datalayout(::SPIRVCompilerTarget) = Int===Int64 ?
 
 ## job
 
-# TODO: encode debug build or not in the compiler job
-#       https://github.com/JuliaGPU/CUDAnative.jl/issues/368
-runtime_slug(job::CompilerJob{SPIRVCompilerTarget}) =
-    "spirv-" * String(job.config.target.backend)
-
 function finish_module!(job::CompilerJob{SPIRVCompilerTarget}, mod::LLVM.Module,
                         entry::LLVM.Function)
     # update calling convention
diff --git a/src/utils.jl b/src/utils.jl
index a403408f..76b07679 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -60,20 +60,9 @@ for level in [:debug, :info, :warn, :error]
                 # using with_logger would create a closure, which is incompatible with
                 # generated functions, so instead we reproduce its implementation here
                 safe_logstate = Base.CoreLogging.LogState(safe_logger)
-                @static if VERSION < v"1.11-"
-                    t = current_task()
-                    old_logstate = t.logstate
-                    try
-                        t.logstate = safe_logstate
-                        $(esc(macrocall))
-                    finally
-                        t.logstate = old_logstate
-                    end
-                else
-                    Base.ScopedValues.@with(
-                        Base.CoreLogging.CURRENT_LOGSTATE => safe_logstate, $(esc(macrocall))
-                    )
-                end
+                Base.ScopedValues.@with(
+                    Base.CoreLogging.CURRENT_LOGSTATE => safe_logstate, $(esc(macrocall))
+                )
             end
         end
     end
diff --git a/src/validation.jl b/src/validation.jl
index 0190d1c9..2a7172a2 100644
--- a/src/validation.jl
+++ b/src/validation.jl
@@ -14,18 +14,7 @@ function method_matches(@nospecialize(tt::Type{<:Tuple}); world::Integer)
 end
 
 function typeinf_type(mi::MethodInstance; interp::CC.AbstractInterpreter)
-    @static if VERSION < v"1.11.0"
-        code = Core.Compiler.get(Core.Compiler.code_cache(interp), mi, nothing)
-        if code isa Core.Compiler.CodeInstance
-            return code.rettype
-        end
-        result = Core.Compiler.InferenceResult(mi, Core.Compiler.typeinf_lattice(interp))
-        Core.Compiler.typeinf(interp, result, :global)
-        Core.Compiler.is_inferred(result) || return Any
-        Core.Compiler.widenconst(Core.Compiler.ignorelimited(result.result))
-    else
-        something(Core.Compiler.typeinf_type(interp, mi), Any)
-    end
+    something(Core.Compiler.typeinf_type(interp, mi), Any)
 end
 
 function check_method(@nospecialize(job::CompilerJob))
diff --git a/test/helpers/native.jl b/test/helpers/native.jl
index 656028f4..63d8a0f3 100644
--- a/test/helpers/native.jl
+++ b/test/helpers/native.jl
@@ -69,22 +69,4 @@ function code_execution(@nospecialize(func), @nospecialize(types); kwargs...)
     end
 end
 
-const runtime_cache = Dict{Any, Any}()
-
-function compiler(job)
-    JuliaContext() do ctx
-        GPUCompiler.compile(:asm, job)
-    end
-end
-
-function linker(job, asm)
-    asm
-end
-
-# simulates cached codegen
-function cached_execution(@nospecialize(func), @nospecialize(types); kwargs...)
-    job, kwargs = create_job(func, types; validate=false, kwargs...)
-    GPUCompiler.cached_compilation(runtime_cache, job.source, job.config, compiler, linker)
-end
-
 end
diff --git a/test/native.jl b/test/native.jl
index f97f659a..54145248 100644
--- a/test/native.jl
+++ b/test/native.jl
@@ -118,98 +118,6 @@ end
             @check "add i64 %{{[0-9]+}}, 2"
             GPUCompiler.code_llvm(job)
         end
-
-        # cached_compilation interface
-        invocations = Ref(0)
-        function compiler(job)
-            invocations[] += 1
-            JuliaContext() do ctx
-                ir, ir_meta = GPUCompiler.compile(:llvm, job)
-                string(ir)
-            end
-        end
-        linker(job, compiled) = compiled
-        cache = Dict()
-        ft = typeof(mod.kernel)
-        tt = Tuple{Int64}
-
-        # initial compilation
-        source = methodinstance(ft, tt, Base.get_world_counter())
-        @test @filecheck begin
-            @check_label "define i64 @{{(julia|j)_kernel_[0-9]+}}"
-            @check "add i64 %{{[0-9]+}}, 2"
-            Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker)
-        end
-        @test invocations[] == 1
-
-        # cached compilation
-        @test @filecheck begin
-            @check_label "define i64 @{{(julia|j)_kernel_[0-9]+}}"
-            @check "add i64 %{{[0-9]+}}, 2"
-            Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker)
-        end
-        @test invocations[] == 1
-
-        # redefinition
-        @eval mod kernel(i) = child(i)+3
-        source = methodinstance(ft, tt, Base.get_world_counter())
-        @test @filecheck begin
-            @check_label "define i64 @{{(julia|j)_kernel_[0-9]+}}"
-            @check "add i64 %{{[0-9]+}}, 3"
-            Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker)
-        end
-        @test invocations[] == 2
-
-        # cached compilation
-        @test @filecheck begin
-            @check_label "define i64 @{{(julia|j)_kernel_[0-9]+}}"
-            @check "add i64 %{{[0-9]+}}, 3"
-            Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker)
-        end
-        @test invocations[] == 2
-
-        # redefinition of an unrelated function
-        @eval mod unrelated(i) = 42
-        Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker)
-        @test invocations[] == 2
-
-        # redefining child functions
-        @eval mod @noinline child(i) = i+1
-        Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker)
-        @test invocations[] == 3
-
-        # cached compilation
-        Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker)
-        @test invocations[] == 3
-
-        # change in configuration
-        config = CompilerConfig(job.config; name="foobar")
-        @test @filecheck begin
-            @check "define i64 @foobar"
-            Base.invokelatest(GPUCompiler.cached_compilation, cache, source, config, compiler, linker)
-        end
-        @test invocations[] == 4
-
-        # tasks running in the background should keep on using the old version
-        c1, c2 = Condition(), Condition()
-        function background(job)
-            local_source = methodinstance(ft, tt, Base.get_world_counter())
-            notify(c1)
-            wait(c2)    # wait for redefinition
-            GPUCompiler.cached_compilation(cache, local_source, job.config, compiler, linker)
-        end
-        t = @async Base.invokelatest(background, job)
-        wait(c1)        # make sure the task has started
-        @eval mod kernel(i) = child(i)+4
-        source = methodinstance(ft, tt, Base.get_world_counter())
-        ir = Base.invokelatest(GPUCompiler.cached_compilation, cache, source, job.config, compiler, linker)
-        @test contains(ir, r"add i64 %\d+, 4")
-        notify(c2)      # wake up the task
-        @test @filecheck begin
-            @check_label "define i64 @{{(julia|j)_kernel_[0-9]+}}"
-            @check "add i64 %{{[0-9]+}}, 3"
-            fetch(t)
-        end
     end
 
     @testset "allowed mutable types" begin
diff --git a/test/native/precompile.jl b/test/native/precompile.jl
index 14ac418f..439384a4 100644
--- a/test/native/precompile.jl
+++ b/test/native/precompile.jl
@@ -59,7 +59,7 @@ precompile_test_harness("Inference caching") do load_path
 
         token = let
             job, _ = NativeCompiler.Native.create_job(identity, (Int,))
-            GPUCompiler.ci_cache_token(job)
+            GPUCompiler.cache_owner(job)
         end
         @test !check_presence(identity_mi, token)
 
@@ -77,27 +77,5 @@ precompile_test_harness("Inference caching") do load_path
 
         # check that identity survived
         @test check_presence(identity_mi, token) broken=(v"1.12.0-DEV.1268" <= VERSION < v"1.12.5" || v"1.13.0-" <= VERSION < v"1.13.0-beta3"|| v"1.14.0-" <= VERSION < v"1.14.0-DEV.1843")
-
-        GPUCompiler.clear_disk_cache!()
-        @test GPUCompiler.disk_cache_enabled() == false
-
-        GPUCompiler.enable_disk_cache!()
-        @test GPUCompiler.disk_cache_enabled() == true
-
-        job, _ = NativeCompiler.Native.create_job(NativeBackend.kernel, (Vector{Int}, Int); validate=false)
-        @assert job.source == kernel_mi
-        ci = GPUCompiler.ci_cache_lookup(GPUCompiler.ci_cache(job), job.source, job.world, job.world)
-        @assert ci !== nothing
-        @assert ci.inferred !== nothing
-        path = GPUCompiler.cache_file(ci, job.config)
-        @test path !== nothing
-        @test !ispath(path)
-        NativeCompiler.Native.cached_execution(NativeBackend.kernel, (Vector{Int}, Int))
-        @test ispath(path)
-        GPUCompiler.clear_disk_cache!()
-        @test !ispath(path)
-
-        GPUCompiler.enable_disk_cache!(false)
-        @test GPUCompiler.disk_cache_enabled() == false
     end
 end
diff --git a/test/ptx/precompile.jl b/test/ptx/precompile.jl
index 290c60be..853dce8d 100644
--- a/test/ptx/precompile.jl
+++ b/test/ptx/precompile.jl
@@ -34,7 +34,7 @@ precompile_test_harness("Inference caching") do load_path
 
         token = let
             job, _ = PTX.create_job(identity, (Int,))
-            GPUCompiler.ci_cache_token(job)
+            GPUCompiler.cache_owner(job)
         end
         ci = isdefined(identity_mi, :cache) ? identity_mi.cache : nothing
         while ci !== nothing