Skip to content

Support for opaque closures #421

@maleadt

Description

@maleadt

I'm not sure if these make much sense in the context of GPU compilation, but here's at least a sketch of how initial support could look like:

using GPUCompiler, LLVM
include("test/definitions/native.jl")


## GPU-compatible opaque closures

using Core.Compiler: IRCode
using Core: CodeInfo, MethodInstance, CodeInstance, LineNumberNode

struct OpaqueGPUClosure{F, E, A, R}    # func, env, args, ret
    env::E
end

function compute_ir_rettype(ir::IRCode)
    rt = Union{}
    for i = 1:length(ir.stmts)
        stmt = ir.stmts[i][:inst]
        if isa(stmt, Core.Compiler.ReturnNode) && isdefined(stmt, :val)
            rt = Core.Compiler.tmerge(Core.Compiler.argextype(stmt.val, ir), rt)
        end
    end
    return Core.Compiler.widenconst(rt)
end

function compute_oc_signature(ir::IRCode, nargs::Int, isva::Bool)
    argtypes = Vector{Any}(undef, nargs)
    for i = 1:nargs
        argtypes[i] = Core.Compiler.widenconst(ir.argtypes[i+1])
    end
    if isva
        lastarg = pop!(argtypes)
        if lastarg <: Tuple
            append!(argtypes, lastarg.parameters)
        else
            push!(argtypes, Vararg{Any})
        end
    end
    return Tuple{argtypes...}
end

function OpaqueGPUClosure(config::CompilerConfig, ir::IRCode, @nospecialize env...;
                          isva::Bool = false)
    # NOTE: we need ir.argtypes[1] == typeof(env)
    ir = Core.Compiler.copy(ir)
    nargs = length(ir.argtypes)-1
    sig = compute_oc_signature(ir, nargs, isva)
    rt = compute_ir_rettype(ir)
    src = ccall(:jl_new_code_info_uninit, Ref{CodeInfo}, ())
    src.slotnames = fill(:none, nargs+1)
    src.slotflags = fill(zero(UInt8), length(ir.argtypes))
    src.slottypes = copy(ir.argtypes)
    src.rettype = rt
    src = Core.Compiler.ir_to_codeinf!(src, ir)
    return generate_opaque_closure(config, src, sig, rt, nargs, isva, env...)
end

function generate_opaque_closure(config::CompilerConfig, src::CodeInfo,
                                 @nospecialize(sig), @nospecialize(rt),
                                 nargs::Int, isva::Bool, @nospecialize env...;
                                 mod::Module=@__MODULE__,
                                 line::Int=0,
                                 file::Union{Nothing,Symbol}=nothing)
    # create a method (like `jl_make_opaque_closure_method`)
    meth = ccall(:jl_new_method_uninit, Ref{Method}, (Any,), Main)
    meth.sig = Tuple
    meth.isva = isva
    meth.is_for_opaque_closure = 0  # XXX: this drives heuristics, some of which we want, others we don't
    meth.name = Symbol("opaque gpu closure")
    meth.nargs = nargs + 1
    meth.file = something(file, Symbol())
    meth.line = line
    ccall(:jl_method_set_source, Nothing, (Any, Any), meth, src)

    # look up a method instance and create a compiler job
    full_sig = Tuple{typeof(env), sig.parameters...}
    mi = ccall(:jl_specializations_get_linfo, Ref{MethodInstance}, (Any, Any, Any), meth, full_sig, Core.svec())
    job = CompilerJob(mi, config)

    # create a code instance and store it in the cache
    ci = CodeInstance(mi, rt, C_NULL, src, Int32(0), meth.primary_world, typemax(UInt), UInt32(0), UInt32(0), nothing, UInt8(0))
    Core.Compiler.setindex!(GPUCompiler.ci_cache(job), ci, mi)

    id = length(GPUCompiler.deferred_codegen_jobs) + 1
    GPUCompiler.deferred_codegen_jobs[id] = job
    return OpaqueGPUClosure{id, typeof(env), sig, rt}(env)
end

# generate a call overload for the opaque closure
function (oc::OpaqueGPUClosure{F})(a, b) where F
    ptr = ccall("extern deferred_codegen", llvmcall, Ptr{Cvoid}, (Int,), F)
    LLVM.Interop.assume(ptr != C_NULL)
    return ccall(ptr, Int, (Int, Int), a, b)
end


## demo

function kernel(oc, c, a, b)
    unsafe_store!(c, oc(unsafe_load(a), unsafe_load(b)))
    return
end

function main()
    target = NativeCompilerTarget()
    params = TestCompilerParams()

    ir, rettyp = only(Base.code_ircode(+, (Int, Int)))
    config = CompilerConfig(target, params; kernel=false)
    oc = OpaqueGPUClosure(config, ir)

    GPUCompiler.JuliaContext() do ctx
        source = methodinstance(typeof(kernel), Tuple{typeof(oc), Ptr{Int}, Ptr{Int}, Ptr{Int}})
        config = CompilerConfig(target, params)
        job = CompilerJob(source, config)
        println(GPUCompiler.compile(:llvm, job; ctx)[1])
    end
end

isinteractive() || main()

These don't actually implement the OpaqueClosure semantics and features (world freezing, env/varargs support, etc), and can only be constructed from typed IR, but it's a start at least. For my use case, I only need to inline typed IR, so it doesn't make sense to accurately implement the OpaqueClosure semantics. That use case works nicely though:

define void @_Z6kernel16OpaqueGPUClosureILi1E5TupleS0_I5Int64S1_ES1_EPS1_PS1_PS1_(i64 zeroext %0, i64 zeroext %1, i64 zeroext %2) local_unnamed_addr #0 !dbg !64 {
top:
  %3 = inttoptr i64 %2 to i64*, !dbg !68
  %4 = load i64, i64* %3, align 1, !dbg !68, !tbaa !73, !alias.scope !77, !noalias !80
  %5 = inttoptr i64 %1 to i64*, !dbg !68
  %6 = load i64, i64* %5, align 1, !dbg !68, !tbaa !73, !alias.scope !77, !noalias !80
  %7 = call i64 @julia_opaque_gpu_closure_487(i64 %6, i64 %4), !dbg !85
  %8 = inttoptr i64 %0 to i64*, !dbg !87
  store i64 %7, i64* %8, align 1, !dbg !87, !tbaa !73, !alias.scope !77, !noalias !80
  ret void, !dbg !90
}

define i64 @julia_opaque_gpu_closure_487(i64 signext %0, i64 signext %1) local_unnamed_addr #0 !dbg !91 {
top:
  %2 = add i64 %1, %0, !dbg !93
  ret i64 %2, !dbg !93
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions