diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 46a549cb59f9d..1fbfd459fc2ea 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -71,6 +71,8 @@ static void addComdat(GlobalValue *G, Triple &T) } typedef struct { + orc::ThreadSafeModule TSM; + orc::ThreadSafeModule *TSM_ref; std::unique_ptr out; SmallVector jl_sysimg_fvars; SmallVector jl_sysimg_gvars; @@ -166,7 +168,7 @@ LLVMOrcThreadSafeModuleRef jl_get_llvm_module_impl(void *native_code) { jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code; if (data) - return wrap(&data->out->get_tsm()); + return wrap(data->TSM_ref); else return NULL; } @@ -581,7 +583,7 @@ void *jl_create_native_impl(LLVMOrcThreadSafeModuleRef llvmmod, int trim, int ex // move everything inside, now that we've merged everything // (before adding the exported headers) - data->out->get_tsm().withModuleDo([&](Module &M) { + data->TSM_ref->withModuleDo([&](Module &M) { auto TT = Triple(M.getTargetTriple()); Function *juliapersonality_func = nullptr; if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) { @@ -770,32 +772,14 @@ static void aot_link_output(jl_codegen_output_t &out) } } -// also be used be extern consumers like GPUCompiler.jl to obtain a module containing -// all reachable & inferrrable functions. -extern "C" JL_DLLEXPORT_CODEGEN -void *jl_emit_native_impl(jl_array_t *codeinfos, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int external_linkage) +static void jl_emit_native_to_output(jl_native_code_desc_t *data, jl_array_t *codeinfos, + const jl_cgparams_t *cgparams, int external_linkage) { - JL_TIMING(NATIVE_AOT, NATIVE_Create); - ++CreateNativeCalls; - CreateNativeMax.updateMax(jl_array_nrows(codeinfos)); - if (cgparams == NULL) - cgparams = &jl_default_cgparams; jl_cgparams_t target_cgparams = *cgparams; target_cgparams.sanitize_memory = jl_options.target_sanitize_memory; target_cgparams.sanitize_thread = jl_options.target_sanitize_thread; target_cgparams.sanitize_address = jl_options.target_sanitize_address; - jl_native_code_desc_t *data = new jl_native_code_desc_t; - if (llvmmod) { - data->out = std::make_unique(*unwrap(llvmmod)); - } - else { - const DataLayout &DL = jl_ExecutionEngine->getDataLayout(); - const Triple &triple = jl_ExecutionEngine->getTargetTriple(); - data->out = std::make_unique("text", DL, triple); - data->out->get_context().setDiscardValueNames(true); - } auto &out = *data->out; - // compile all methods for the current world and type-inference world DenseMap ci_infos; egal_set method_roots; @@ -895,8 +879,36 @@ void *jl_emit_native_impl(jl_array_t *codeinfos, LLVMOrcThreadSafeModuleRef llvm } data->jl_fvar_map[ci] = {invoke_id, specptr_id}; } +} + +// also be used be extern consumers like GPUCompiler.jl to obtain a module containing +// all reachable & inferrrable functions. +extern "C" JL_DLLEXPORT_CODEGEN +void *jl_emit_native_impl(jl_array_t *codeinfos, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int external_linkage) +{ + JL_TIMING(NATIVE_AOT, NATIVE_Create); + ++CreateNativeCalls; + CreateNativeMax.updateMax(jl_array_nrows(codeinfos)); + if (cgparams == NULL) + cgparams = &jl_default_cgparams; + jl_native_code_desc_t *data = new jl_native_code_desc_t; + if (llvmmod) { + data->TSM_ref = unwrap(llvmmod); + } else { + const DataLayout &DL = jl_ExecutionEngine->getDataLayout(); + const Triple &triple = jl_ExecutionEngine->getTargetTriple(); + auto ctx = std::make_unique(); + auto M = jl_create_llvm_module("text", *ctx, DL, triple); + ctx->setDiscardValueNames(true); + data->TSM = orc::ThreadSafeModule(std::move(M), std::move(ctx)); + data->TSM_ref = &data->TSM; + } + + data->TSM_ref->withModuleDo([&](Module &M) { + data->out = std::make_unique(M); + jl_emit_native_to_output(data, codeinfos, cgparams, external_linkage); + }); - out.unlock(); return (void *)data; } @@ -1957,30 +1969,11 @@ static unsigned compute_image_thread_count(const ModuleInfo &info) { jl_emission_params_t default_emission_params = { 1 }; -// takes the running content that has collected in the shadow module and dump it to disk -// this builds the object file portion of the sysimage files for fast startup -extern "C" JL_DLLEXPORT_CODEGEN -void jl_dump_native_impl(void *native_code, - const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, - const char *asm_fname, - ios_t *z, ios_t *s, - jl_emission_params_t *params) +void jl_dump_native_locked(jl_native_code_desc_t *data, const char *bc_fname, + const char *unopt_bc_fname, const char *obj_fname, + const char *asm_fname, ios_t *z, ios_t *s, + jl_emission_params_t *params, Module &dataM) { - JL_TIMING(NATIVE_AOT, NATIVE_Dump); - jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code; - if (!bc_fname && !unopt_bc_fname && !obj_fname && !asm_fname) { - LLVM_DEBUG(dbgs() << "No output requested, skipping native code dump?\n"); - delete data; - return; - } - - if (!params) { - params = &default_emission_params; - } - - data->out->lock(); - Module &dataM = data->out->get_module(); - // We don't want to use MCJIT's target machine because // it uses the large code model and we may potentially // want less optimizations there. @@ -2349,6 +2342,33 @@ void jl_dump_native_impl(void *native_code, } } +// takes the running content that has collected in the shadow module and dump it to disk +// this builds the object file portion of the sysimage files for fast startup +extern "C" JL_DLLEXPORT_CODEGEN +void jl_dump_native_impl(void *native_code, + const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, + const char *asm_fname, + ios_t *z, ios_t *s, + jl_emission_params_t *params) +{ + JL_TIMING(NATIVE_AOT, NATIVE_Dump); + jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code; + if (!bc_fname && !unopt_bc_fname && !obj_fname && !asm_fname) { + LLVM_DEBUG(dbgs() << "No output requested, skipping native code dump?\n"); + delete data; + return; + } + + if (!params) { + params = &default_emission_params; + } + + data->TSM_ref->withModuleDo([&](Module &dataM) { + jl_dump_native_locked(data, bc_fname, unopt_bc_fname, obj_fname, asm_fname, z, s, + params, dataM); + }); +} + // sometimes in GDB you want to find out what code would be created from a mi extern "C" JL_DLLEXPORT_CODEGEN jl_code_info_t *jl_gdbdumpcode(jl_method_instance_t *mi) @@ -2409,7 +2429,9 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t *dump, jl_method_instance_t *mi, jl_ if (src && jl_is_code_info(src)) { const auto &DL = jl_ExecutionEngine->getDataLayout(); const auto &TT = jl_ExecutionEngine->getTargetTriple(); - jl_codegen_output_t output{name_from_method_instance(mi), DL, TT}; + auto ctx = std::make_unique(); + auto mod = jl_create_llvm_module(name_from_method_instance(mi), *ctx, DL, TT); + jl_codegen_output_t output{*mod}; Function *F = nullptr; { uint64_t compiler_start_time = 0; @@ -2490,8 +2512,7 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t *dump, jl_method_instance_t *mi, jl_ } } if (F) { - output.unlock(); - dump->TSM = wrap(new orc::ThreadSafeModule(std::move(output.get_tsm()))); + dump->TSM = wrap(new orc::ThreadSafeModule(std::move(mod), std::move(ctx))); dump->F = wrap(F); return; } diff --git a/src/codegen.cpp b/src/codegen.cpp index e2d15eedef4bf..df93aec4655da 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1114,7 +1114,7 @@ static const auto jl_excstack_state_func = new JuliaFunction{ diff --git a/src/disasm.cpp b/src/disasm.cpp index 30e0e44b9d86b..6ef9cf59df765 100644 --- a/src/disasm.cpp +++ b/src/disasm.cpp @@ -498,8 +498,13 @@ void jl_strip_llvm_addrspaces(Module *m) JL_NOTSAFEPOINT extern "C" JL_DLLEXPORT_CODEGEN jl_value_t *jl_dump_function_ir_impl(jl_llvmf_dump_t *dump, char strip_ir_metadata, char dump_module, const char *debuginfo) { + if (!dump->F) + return jl_pchar_to_string("", 0); + std::string code; raw_string_ostream stream(code); + //RAII will release the module + auto TSM = std::unique_ptr(unwrap(dump->TSM)); // Prepend pass instrumentation output if present if (dump->pass_output) { @@ -508,15 +513,7 @@ jl_value_t *jl_dump_function_ir_impl(jl_llvmf_dump_t *dump, char strip_ir_metada dump->pass_output = nullptr; } - if (dump->F) { - //RAII will release the module - auto TSM = std::unique_ptr(unwrap(dump->TSM)); - //If TSM is not passed in, then the context MUST be locked externally. - //RAII will release the lock - std::optional lock; - if (TSM) { - lock.emplace(TSM->getContext().getLock()); - } + auto go = [&]() { Function *llvmf = cast(unwrap(dump->F)); if (!llvmf || (!llvmf->isDeclaration() && !llvmf->getParent())) jl_error("jl_dump_function_ir: Expected Function* in a temporary Module"); @@ -544,7 +541,13 @@ jl_value_t *jl_dump_function_ir_impl(jl_llvmf_dump_t *dump, char strip_ir_metada llvmf->print(stream, &AAW); } } - } + }; + + // If TSM is not passed in, then the context MUST be locked externally. + if (TSM) + TSM->withModuleDo([&](Module &M) { go(); }); + else + go(); return jl_pchar_to_string(stream.str().data(), stream.str().size()); } diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 7421e7876a845..acfd7de43838e 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -43,6 +43,10 @@ #include #include +#if JL_LLVM_VERSION >= 210000 +#include +#include +#endif #include #include #include @@ -281,9 +285,10 @@ StringRef jl_codegen_output_t::get_call_target(jl_code_instance_t *ci, bool spec return target.decl->getName(); } -jl_emitted_output_t jl_codegen_output_t::finish(orc::SymbolStringPool &SSP) +jl_emitted_output_t jl_codegen_output_t::finish(std::unique_ptr ctx, + std::unique_ptr mod, + orc::SymbolStringPool &SSP) { - auto info = std::make_unique(); auto intern = [&](StringRef name) { SmallString<128> buf; @@ -304,8 +309,7 @@ jl_emitted_output_t jl_codegen_output_t::finish(orc::SymbolStringPool &SSP) info->global_targets[val] = intern(gv->getName()); } - unlock(); - return {std::move(get_tsm()), std::move(info)}; + return {std::move(ctx), std::move(mod), std::move(info)}; } // Return a specptr that is ABI-compatible with `from_abi` which invokes `codeinst`. @@ -350,27 +354,28 @@ void *jl_jit_abi_converter_impl(jl_task_t *ct, jl_abi_t from_abi, orc::ThreadSafeModule result_m; std::string gf_thunk_name; - auto out = std::make_unique("gfthunk", - jl_ExecutionEngine->getDataLayout(), - jl_ExecutionEngine->getTargetTriple()); + auto ctx = std::make_unique(); + auto mod = jl_create_llvm_module("gfthunk", *ctx, jl_ExecutionEngine->getDataLayout(), + jl_ExecutionEngine->getTargetTriple()); + jl_codegen_output_t out{*mod}; { - out->get_context().setDiscardValueNames(true); - out->imaging_mode = 0; - auto &ctx = out->get_context(); + ctx->setDiscardValueNames(true); + out.imaging_mode = 0; if (target) { - Value *llvmtarget = literal_static_pointer_val((void*)target, PointerType::get(ctx, 0)); - gf_thunk_name = emit_abi_converter(*out, from_abi, codeinst, llvmtarget, target_specsig); + Value *llvmtarget = literal_static_pointer_val((void*)target, PointerType::get(*ctx, 0)); + gf_thunk_name = emit_abi_converter(out, from_abi, codeinst, llvmtarget, target_specsig); } else if (invoke == jl_fptr_const_return_addr) { - gf_thunk_name = emit_abi_constreturn(*out, from_abi, codeinst->rettype_const); + assert(codeinst); // Convince the static analyzer + gf_thunk_name = emit_abi_constreturn(out, from_abi, codeinst->rettype_const); } else { - Value *llvminvoke = invoke ? literal_static_pointer_val((void*)invoke, PointerType::get(ctx, 0)) : nullptr; - gf_thunk_name = emit_abi_dispatcher(*out, from_abi, codeinst, llvminvoke); + Value *llvminvoke = invoke ? literal_static_pointer_val((void*)invoke, PointerType::get(*ctx, 0)) : nullptr; + gf_thunk_name = emit_abi_dispatcher(out, from_abi, codeinst, llvminvoke); } } auto &ES = jl_ExecutionEngine->getExecutionSession(); - auto emitted = out->finish(*ES.getSymbolStringPool()); + auto emitted = out.finish(std::move(ctx), std::move(mod), *ES.getSymbolStringPool()); jl_ExecutionEngine->addOutput(std::move(emitted)); uintptr_t Addr = jl_ExecutionEngine->getFunctionAddress(gf_thunk_name); assert(Addr); @@ -445,9 +450,12 @@ jl_emit_codeinst_to_jit_impl(jl_code_instance_t *codeinst, jl_code_info_t *src) JL_TIMING(CODEINST_COMPILE, CODEINST_COMPILE); jl_method_instance_t *mi = jl_get_ci_mi(codeinst); - jl_codegen_output_t out{name_from_method_instance(mi), - jl_ExecutionEngine->getDataLayout(), - jl_ExecutionEngine->getTargetTriple()}; + const char *name = name_from_method_instance(mi); + auto ctx = std::make_unique(); + auto &dl = jl_ExecutionEngine->getDataLayout(); + auto &tt = jl_ExecutionEngine->getTargetTriple(); + auto mod = jl_create_llvm_module(name, *ctx, dl, tt); + jl_codegen_output_t out{*mod}; out.get_context().setDiscardValueNames(true); out.imaging_mode = false; out.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0); @@ -473,7 +481,8 @@ jl_emit_codeinst_to_jit_impl(jl_code_instance_t *codeinst, jl_code_info_t *src) jl_as_global_root((jl_value_t*)mi, 1); auto &ES = jl_ExecutionEngine->getExecutionSession(); - jl_emitted_output_t emitted = out.finish(*ES.getSymbolStringPool()); + jl_emitted_output_t emitted = + out.finish(std::move(ctx), std::move(mod), *ES.getSymbolStringPool()); jl_ExecutionEngine->addOutput(std::move(emitted)); } @@ -604,37 +613,31 @@ static auto countBasicBlocks(const Function &F) JL_NOTSAFEPOINT static constexpr size_t N_optlevels = 4; -static orc::ThreadSafeModule selectOptLevel(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT { - TSM.withModuleDo([](Module &M) JL_NOTSAFEPOINT { - size_t opt_level = std::max(static_cast(jl_options.opt_level), 0); - do { - if (jl_generating_output()) { - opt_level = 0; - break; - } - size_t opt_level_min = std::max(static_cast(jl_options.opt_level_min), 0); - for (auto &F : M) { - if (!F.isDeclaration()) { - Attribute attr = F.getFnAttribute("julia-optimization-level"); - StringRef val = attr.getValueAsString(); - if (val != "") { - size_t ol = (size_t)val[0] - '0'; - if (ol < opt_level) - opt_level = ol; - } +static void selectOptLevel(Module &M) JL_NOTSAFEPOINT { + size_t opt_level = std::max(static_cast(jl_options.opt_level), 0); + do { + if (jl_generating_output()) { + opt_level = 0; + break; + } + size_t opt_level_min = std::max(static_cast(jl_options.opt_level_min), 0); + for (auto &F : M) { + if (!F.isDeclaration()) { + Attribute attr = F.getFnAttribute("julia-optimization-level"); + StringRef val = attr.getValueAsString(); + if (val != "") { + size_t ol = (size_t)val[0] - '0'; + if (ol < opt_level) + opt_level = ol; } } - if (opt_level < opt_level_min) - opt_level = opt_level_min; - } while (0); - // currently -O3 is max - opt_level = std::min(opt_level, N_optlevels - 1); - M.addModuleFlag(Module::Warning, "julia.optlevel", opt_level); - }); - return TSM; -} -static orc::ThreadSafeModule selectOptLevel(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { - return selectOptLevel(std::move(TSM)); + } + if (opt_level < opt_level_min) + opt_level = opt_level_min; + } while (0); + // currently -O3 is max + opt_level = std::min(opt_level, N_optlevels - 1); + M.addModuleFlag(Module::Warning, "julia.optlevel", opt_level); } void jl_register_jit_object(const object::ObjectFile &Object, @@ -843,20 +846,18 @@ class JLMaterializationUnit : public orc::MaterializationUnit { // Tell ORC about all the other definition in this module. When // linker_info contains enough information to produce the full // Interface, remove this. - Out.module.withModuleDo([&](Module &M) JL_NOTSAFEPOINT { - auto SSP = JIT.getExecutionSession().getSymbolStringPool(); - for (auto &G : M.global_objects()) { - if (G.isDeclaration() || !G.hasExternalLinkage()) - continue; - auto Flags = JITSymbolFlags::Exported; - if (isa(&G)) - Flags |= JITSymbolFlags::Callable; - auto S = JIT.mangle(G.getName()); - if (CISyms.contains(S)) - continue; - Syms[S] = Flags; - } - }); + auto SSP = JIT.getExecutionSession().getSymbolStringPool(); + for (auto &G : Out.module->global_objects()) { + if (G.isDeclaration() || !G.hasExternalLinkage()) + continue; + auto Flags = JITSymbolFlags::Exported; + if (isa(&G)) + Flags |= JITSymbolFlags::Callable; + auto S = JIT.mangle(G.getName()); + if (CISyms.contains(S)) + continue; + Syms[S] = Flags; + } return JLMaterializationUnit{JIT, OL, std::move(Out), std::move(I)}; } @@ -870,22 +871,24 @@ class JLMaterializationUnit : public orc::MaterializationUnit { // TODO: Tell GCChecker that materialize can have safepoints. #ifndef __clang_analyzer__ { - auto Lock = Out.module.getContext().getLock(); uint8_t state = jl_gc_unsafe_enter(ct->ptls); - JIT.optimizeDLSyms(*Out.module.getModuleUnlocked()); // May safepoint + JIT.optimizeDLSyms(*Out.module); // May safepoint jl_gc_unsafe_leave(ct->ptls, state); } #endif std::unique_ptr Obj; uint64_t start_time = jl_hrtime(); { - TimeTraceScope CompileScope( - "JIT Compile", Out.module.getModuleUnlocked()->getModuleIdentifier()); - Obj = JIT.compileModule(JIT.optimizeModule(std::move(Out.module))); + TimeTraceScope CompileScope("JIT Compile", Out.module->getModuleIdentifier()); + JIT.optimizeModule(*Out.module); + Obj = JIT.compileModule(*Out.module); if (!Obj) { R->failMaterialization(); return; } + // Save some memory + auto Ctx = std::move(Out.ctx); + auto M = std::move(Out.module); } uint64_t end_time = jl_hrtime(); @@ -921,8 +924,7 @@ class JLMaterializationUnit : public orc::MaterializationUnit { StringRef getName() const override JL_NOTSAFEPOINT { - return Out.module.withModuleDo([](Module &M) - JL_NOTSAFEPOINT { return M.getName(); }); + return Out.module->getName(); } void discard(const JITDylib &JD, const SymbolStringPtr &Name) override {} @@ -962,7 +964,10 @@ class JLTrampolineMaterializationUnit : public orc::MaterializationUnit { // During materializtion: finalizers disabled, GC safe void materialize(std::unique_ptr R) override { - jl_codegen_output_t Out{*Sym, JIT.getDataLayout(), JIT.getTargetTriple()}; + auto Ctx = std::make_unique(); + auto Mod = + jl_create_llvm_module(*Sym, *Ctx, JIT.getDataLayout(), JIT.getTargetTriple()); + jl_codegen_output_t Out{*Mod}; jl_task_t *ct = jl_current_task; uint8_t state = jl_gc_unsafe_enter(ct->ptls); @@ -977,7 +982,8 @@ class JLTrampolineMaterializationUnit : public orc::MaterializationUnit { if (auto Err = R->replace( std::make_unique(JLMaterializationUnit::Create( JIT, OL, - Out.finish(*R->getExecutionSession().getSymbolStringPool()))))) { + Out.finish(std::move(Ctx), std::move(Mod), + *R->getExecutionSession().getSymbolStringPool()))))) { R->getExecutionSession().reportError(std::move(Err)); R->failMaterialization(); } @@ -995,18 +1001,49 @@ class JLTrampolineMaterializationUnit : public orc::MaterializationUnit { jl_invoke_api_t API; }; +#if defined(LLVM_SHLIB) +namespace JLEHFrames { +Error registerEHFrames(orc::ExecutorAddrRange EHFrameSection) { + register_eh_frames(EHFrameSection.Start.toPtr(), static_cast(EHFrameSection.size())); + return Error::success(); +} + +Error deregisterEHFrames(orc::ExecutorAddrRange EHFrameSection) { + deregister_eh_frames(EHFrameSection.Start.toPtr(), static_cast(EHFrameSection.size())); + return Error::success(); +} +} +#if JL_LLVM_VERSION < 210000 class JLEHFrameRegistrar final : public jitlink::EHFrameRegistrar { public: Error registerEHFrames(orc::ExecutorAddrRange EHFrameSection) override { - register_eh_frames(EHFrameSection.Start.toPtr(), static_cast(EHFrameSection.size())); - return Error::success(); + return JLEHFrames::registerEHFrames(EHFrameSection); } Error deregisterEHFrames(orc::ExecutorAddrRange EHFrameSection) override { - deregister_eh_frames(EHFrameSection.Start.toPtr(), static_cast(EHFrameSection.size())); - return Error::success(); + return JLEHFrames::deregisterEHFrames(EHFrameSection); } }; +#else +namespace JLEHFrames { + static orc::shared::CWrapperFunctionResult + registerEHFrameSectionAllocAction(const char *ArgData, size_t ArgSize) { + using namespace llvm::orc::shared; + return WrapperFunction::handle( + ArgData, ArgSize, registerEHFrames) + .release(); + } + + static orc::shared::CWrapperFunctionResult + deregisterEHFrameSectionAllocAction(const char *ArgData, size_t ArgSize) { + using namespace llvm::orc::shared; + return WrapperFunction::handle( + ArgData, ArgSize, deregisterEHFrames) + .release(); + } +} +#endif +#endif RTDyldMemoryManager *createRTDyldMemoryManager(void) JL_NOTSAFEPOINT; std::unique_ptr createJITLinkMemoryManager() JL_NOTSAFEPOINT; @@ -1221,113 +1258,110 @@ namespace { } } - orc::ThreadSafeModule operator()(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT { - TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT { - auto PoolIdx = cast(cast(M.getModuleFlag("julia.optlevel"))->getValue())->getZExtValue(); - assert(PoolIdx < N && "Invalid optimization pool index"); + void operator()(Module &M) JL_NOTSAFEPOINT { + auto PoolIdx = cast(cast(M.getModuleFlag("julia.optlevel"))->getValue())->getZExtValue(); + assert(PoolIdx < N && "Invalid optimization pool index"); - uint64_t start_time = 0; + uint64_t start_time = 0; - struct Stat { - std::string name; - uint64_t insts; - uint64_t bbs; + struct Stat { + std::string name; + uint64_t insts; + uint64_t bbs; - void dump(ios_t *stream) JL_NOTSAFEPOINT { - ios_printf(stream, " \"%s\":\n", name.c_str()); - ios_printf(stream, " instructions: %u\n", insts); - ios_printf(stream, " basicblocks: %zd\n", bbs); - } + void dump(ios_t *stream) JL_NOTSAFEPOINT { + ios_printf(stream, " \"%s\":\n", name.c_str()); + ios_printf(stream, " instructions: %u\n", insts); + ios_printf(stream, " basicblocks: %zd\n", bbs); + } - Stat(Function &F) JL_NOTSAFEPOINT : name(F.getName().str()), insts(F.getInstructionCount()), bbs(countBasicBlocks(F)) {} + Stat(Function &F) JL_NOTSAFEPOINT : name(F.getName().str()), insts(F.getInstructionCount()), bbs(countBasicBlocks(F)) {} - ~Stat() JL_NOTSAFEPOINT = default; - }; - SmallVector before_stats; - { - if (*jl_ExecutionEngine->get_dump_llvm_opt_stream()) { - for (auto &F : M.functions()) { - if (F.isDeclaration() || F.getName().starts_with(JL_SYM_INVOKE_SPECSIG)) { - continue; - } - // Each function is printed as a YAML object with several attributes - before_stats.emplace_back(F); + ~Stat() JL_NOTSAFEPOINT = default; + }; + SmallVector before_stats; + { + if (*jl_ExecutionEngine->get_dump_llvm_opt_stream()) { + for (auto &F : M.functions()) { + if (F.isDeclaration() || F.getName().starts_with(JL_SYM_INVOKE_SPECSIG)) { + continue; } - - start_time = jl_hrtime(); + // Each function is printed as a YAML object with several attributes + before_stats.emplace_back(F); } - } - { - JL_TIMING(LLVM_JIT, JIT_Opt); - TimeTraceScope OptimizeScope("JIT Optimize", M.getModuleIdentifier()); - //Run the optimization - (****PMs[PoolIdx]).run(M); - assert(!verifyLLVMIR(M)); + start_time = jl_hrtime(); } + } - { - // Print optimization statistics as a YAML object - // Looks like: - // - - // before: - // "foo": - // instructions: uint64 - // basicblocks: uint64 - // "bar": - // instructions: uint64 - // basicblocks: uint64 - // time_ns: uint64 - // optlevel: int - // after: - // "foo": - // instructions: uint64 - // basicblocks: uint64 - // "bar": - // instructions: uint64 - // basicblocks: uint64 - if (auto stream = *jl_ExecutionEngine->get_dump_llvm_opt_stream()) { - uint64_t end_time = jl_hrtime(); - ios_printf(stream, "- \n"); - - // Print LLVM function statistic _before_ optimization - ios_printf(stream, " before: \n"); - for (auto &s : before_stats) { - s.dump(stream); - } - ios_printf(stream, " time_ns: %" PRIu64 "\n", end_time - start_time); - ios_printf(stream, " optlevel: %d\n", PoolIdx); - - // Print LLVM function statistics _after_ optimization - ios_printf(stream, " after: \n"); - for (auto &F : M.functions()) { - if (F.isDeclaration() || F.getName().starts_with(JL_SYM_INVOKE_SPECSIG)) { - continue; - } - Stat(F).dump(stream); + { + JL_TIMING(LLVM_JIT, JIT_Opt); + TimeTraceScope OptimizeScope("JIT Optimize", M.getModuleIdentifier()); + //Run the optimization + (****PMs[PoolIdx]).run(M); + assert(!verifyLLVMIR(M)); + } + + { + // Print optimization statistics as a YAML object + // Looks like: + // - + // before: + // "foo": + // instructions: uint64 + // basicblocks: uint64 + // "bar": + // instructions: uint64 + // basicblocks: uint64 + // time_ns: uint64 + // optlevel: int + // after: + // "foo": + // instructions: uint64 + // basicblocks: uint64 + // "bar": + // instructions: uint64 + // basicblocks: uint64 + if (auto stream = *jl_ExecutionEngine->get_dump_llvm_opt_stream()) { + uint64_t end_time = jl_hrtime(); + ios_printf(stream, "- \n"); + + // Print LLVM function statistic _before_ optimization + ios_printf(stream, " before: \n"); + for (auto &s : before_stats) { + s.dump(stream); + } + ios_printf(stream, " time_ns: %" PRIu64 "\n", end_time - start_time); + ios_printf(stream, " optlevel: %d\n", PoolIdx); + + // Print LLVM function statistics _after_ optimization + ios_printf(stream, " after: \n"); + for (auto &F : M.functions()) { + if (F.isDeclaration() || F.getName().starts_with(JL_SYM_INVOKE_SPECSIG)) { + continue; } + Stat(F).dump(stream); } } - ++ModulesOptimized; - switch (PoolIdx) { - case 0: - ++OptO0; - break; - case 1: - ++OptO1; - break; - case 2: - ++OptO2; - break; - case 3: - ++OptO3; - break; - default: - // Change this if we ever gain other optlevels - llvm_unreachable("optlevel is between 0 and 3!"); - } - }); - return TSM; + } + ++ModulesOptimized; + switch (PoolIdx) { + case 0: + ++OptO0; + break; + case 1: + ++OptO1; + break; + case 2: + ++OptO2; + break; + case 3: + ++OptO3; + break; + default: + // Change this if we ever gain other optlevels + llvm_unreachable("optlevel is between 0 and 3!"); + } } private: std::array>>, N> PMs; @@ -1338,7 +1372,10 @@ namespace { struct IRTransformRef { IRTransformRef(T &transform) : transform(transform) {} OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { - return transform(std::move(TSM), R); + TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT { + transform(M, R); + }); + return std::move(TSM); } private: T &transform; @@ -1397,11 +1434,11 @@ namespace { struct JuliaOJIT::OptimizerT { OptimizerT(TargetMachine &TM, SmallVector, 0> &printers, std::mutex &llvm_printing_mutex) : opt(TM, printers, llvm_printing_mutex) {} - orc::ThreadSafeModule operator()(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT { - return opt(std::move(TSM)); + void operator()(Module &M) JL_NOTSAFEPOINT { + opt(M); } - OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { - return opt(std::move(TSM)); + void operator()(Module &M, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { + return opt(M); } private: struct sizedOptimizerT opt; @@ -1411,26 +1448,23 @@ struct JuliaOJIT::JITPointersT { JITPointersT(SharedBytesT &SharedBytes, std::mutex &Lock) JL_NOTSAFEPOINT : SharedBytes(SharedBytes), Lock(Lock) {} - orc::ThreadSafeModule operator()(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT { - TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT { - std::lock_guard locked(Lock); - for (auto &GV : make_early_inc_range(M.globals())) { - if (auto *Shared = getSharedBytes(GV)) { - ++InternedGlobals; - GV.replaceAllUsesWith(Shared); - GV.eraseFromParent(); - } + void operator()(Module &M) JL_NOTSAFEPOINT { + std::lock_guard locked(Lock); + for (auto &GV : make_early_inc_range(M.globals())) { + if (auto *Shared = getSharedBytes(GV)) { + ++InternedGlobals; + GV.replaceAllUsesWith(Shared); + GV.eraseFromParent(); } + } - // Windows needs some inline asm to help - // build unwind tables, if they have any functions to decorate - if (!M.functions().empty()) - jl_decorate_module(M); - }); - return TSM; + // Windows needs some inline asm to help + // build unwind tables, if they have any functions to decorate + if (!M.functions().empty()) + jl_decorate_module(M); } - Expected operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { - return operator()(std::move(TSM)); + void operator()(Module &M, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT { + return operator()(M); } private: @@ -1683,9 +1717,9 @@ JuliaOJIT::JuliaOJIT() JITPointersLayer(ES, CompileLayer, IRTransformRef(*JITPointers)), Optimizers(std::make_unique(*TM, PrintLLVMTimers, llvm_printing_mutex)), OptimizeLayer(ES, JITPointersLayer, IRTransformRef(*Optimizers)), - OptSelLayer(ES, OptimizeLayer, static_cast(selectOptLevel)), DebuginfoPlugin(std::make_shared()) { +#if JL_LLVM_VERSION < 210000 # if defined(LLVM_SHLIB) // When dynamically linking against LLVM, use our custom EH frame registration code // also used with RTDyld to inform both our and the libc copy of libunwind. @@ -1695,6 +1729,16 @@ JuliaOJIT::JuliaOJIT() # endif ObjectLayer.addPlugin(std::make_unique( ES, std::move(ehRegistrar))); +#else + // LLVM 21+ removed EHFrameRegistrar. Use our own plugin for custom registration + // when dynamically linking, plus the built-in plugin for standard registration. +# if defined(LLVM_SHLIB) + ObjectLayer.addPlugin(std::make_unique( + ExecutorAddr::fromPtr(JLEHFrames::registerEHFrameSectionAllocAction), + ExecutorAddr::fromPtr(JLEHFrames::deregisterEHFrameSectionAllocAction))); +#endif + ObjectLayer.addPlugin(cantFail(EHFrameRegistrationPlugin::Create(ES))); +#endif ObjectLayer.addPlugin(DebuginfoPlugin); ObjectLayer.addPlugin(std::make_unique(&jit_bytes_size)); @@ -1853,15 +1897,13 @@ void JuliaOJIT::addGlobalMapping(StringRef Name, uint64_t Addr) #ifdef ENABLE_TIMINGS static void timing_print_module_names(jl_timing_block_t *block, - ThreadSafeModule &TSM) JL_NOTSAFEPOINT + Module &M) JL_NOTSAFEPOINT { - TSM.withModuleDo([block](Module &M) { - for (auto &f : M) { - if (!f.isDeclaration()) { - jl_timing_puts(block, f.getName().str().c_str()); - } + for (auto &f : M) { + if (!f.isDeclaration()) { + jl_timing_puts(block, f.getName().str().c_str()); } - }); + } } #endif @@ -1870,7 +1912,7 @@ void JuliaOJIT::addOutput(jl_emitted_output_t O) JL_TIMING(LLVM_JIT, JIT_Total); ++ModulesAdded; #ifdef ENABLE_TIMINGS - timing_print_module_names(JL_TIMING_DEFAULT_BLOCK, O.module); + timing_print_module_names(JL_TIMING_DEFAULT_BLOCK, *O.module); #endif std::unique_lock Lock{LinkerMutex}; @@ -2310,18 +2352,15 @@ CISymbolPtr *JuliaOJIT::linkCISymbol(jl_code_instance_t *CI) return &CISym; } -orc::ThreadSafeModule JuliaOJIT::optimizeModule(orc::ThreadSafeModule TSM) +void JuliaOJIT::optimizeModule(Module &M) { - TSM = selectOptLevel(std::move(TSM)); - TSM = (*Optimizers)(std::move(TSM)); - TSM = (*JITPointers)(std::move(TSM)); - return TSM; + selectOptLevel(M); + (*Optimizers)(M); + (*JITPointers)(M); } -std::unique_ptr JuliaOJIT::compileModule(orc::ThreadSafeModule TSM) +std::unique_ptr JuliaOJIT::compileModule(Module &M) { - auto Lock = TSM.getContext().getLock(); - Module &M = *TSM.getModuleUnlocked(); // Treat this as if one of the passes might contain a safepoint // even though that shouldn't be the case and might be unwise Expected> Obj = CompileLayer.getCompiler()(M); diff --git a/src/jitlayers.h b/src/jitlayers.h index f81f247fbd349..9a84aba220501 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -325,7 +325,8 @@ struct jl_linker_info_t { }; struct jl_emitted_output_t { - orc::ThreadSafeModule module; + std::unique_ptr ctx; + std::unique_ptr module; std::unique_ptr linker_info; jl_emitted_output_t() JL_NOTSAFEPOINT = default; @@ -334,23 +335,19 @@ struct jl_emitted_output_t { ~jl_emitted_output_t() JL_NOTSAFEPOINT = default; }; -// A jl_codegen_output_t is the target for LLVM IR generation, containing an -// LLVM module and the metadata for linking it into the current session or a -// system image. Many code instances can be emitted to a single codegen output. +// A jl_codegen_output_t is the target for LLVM IR generation, containing a +// reference to the destination LLVM module and the metadata for linking it into +// the current session or a system image. Many code instances can be emitted to +// a single codegen output. class jl_codegen_output_t { private: - orc::ThreadSafeModule owned_TSM; - orc::ThreadSafeModule *TSM; - orc::ThreadSafeContext::Lock tsctx_lock; + Module &M; jl_name_counter_t names; public: - LLVMContext &get_context() { return *get_tsm().getContext().getContext(); } - Module &get_module() { return *get_tsm().getModuleUnlocked(); } - orc::ThreadSafeModule &get_tsm() { return owned_TSM ? owned_TSM : *TSM; } - void lock() { tsctx_lock = get_tsm().getContext().getLock(); } - void unlock() { auto _ = std::move(tsctx_lock); } + LLVMContext &get_context() { return M.getContext(); } + Module &get_module() { return M; } StringRef strip_linux(StringRef name); std::string make_name(jl_symbol_prefix_t type, jl_invoke_api_t api, @@ -361,8 +358,10 @@ class jl_codegen_output_t { StringRef get_call_target(jl_code_instance_t *ci, bool specsig, bool always_inline); // Discard all the context that will be invalidated when we compile the - // module. Must hold the context lock. - jl_emitted_output_t finish(orc::SymbolStringPool &SSP) JL_NOTSAFEPOINT; + // module. The context and module will be moved to the jl_emitted_output_t. + jl_emitted_output_t finish(std::unique_ptr ctx, + std::unique_ptr mod, + orc::SymbolStringPool &SSP) JL_NOTSAFEPOINT; public: // outputs @@ -407,30 +406,8 @@ class jl_codegen_output_t { bool safepoint_on_entry = true; bool use_swiftcc = true; - jl_codegen_output_t(orc::ThreadSafeModule &TSM) - : TSM(&TSM), - tsctx_lock(TSM.getContext().getLock()), - DL(TSM.getModuleUnlocked()->getDataLayout()), - TargetTriple(TSM.getModuleUnlocked()->getTargetTriple()) - { - if (TargetTriple.isRISCV()) - use_swiftcc = false; - } - - static orc::ThreadSafeModule create_ts_module(StringRef name, const DataLayout &DL, - const Triple &triple) - { - auto ctx = std::make_unique(); - auto M = jl_create_llvm_module(name, *ctx, DL, triple); - return orc::ThreadSafeModule(std::move(M), std::move(ctx)); - } - - jl_codegen_output_t(StringRef name, const DataLayout &DL, const Triple &triple) - : owned_TSM(create_ts_module(name, DL, triple)), - TSM(nullptr), - tsctx_lock(owned_TSM.getContext().getLock()), - DL(DL), - TargetTriple(triple) + jl_codegen_output_t(Module &M) + : M(M), DL(M.getDataLayout()), TargetTriple(M.getTargetTriple()) { if (TargetTriple.isRISCV()) use_swiftcc = false; @@ -605,13 +582,18 @@ class JuliaOJIT { // any verification the user wants to do when adding an OwningResource to the pool template static void verifyResource(AnyT &resource) JL_NOTSAFEPOINT { } - static void verifyResource(orc::ThreadSafeContext &context) JL_NOTSAFEPOINT { assert(context.getContext()); } + static void verifyResource(orc::ThreadSafeContext &context) JL_NOTSAFEPOINT { +#if JL_LLVM_VERSION < 210000 + assert(context.getContext()); +#else + context.withContextDo([](LLVMContext *ctx) { assert(ctx); }); +#endif + } public: typedef orc::ObjectLinkingLayer ObjLayerT; typedef orc::IRCompileLayer CompileLayerT; typedef orc::IRTransformLayer JITPointersLayerT; typedef orc::IRTransformLayer OptimizeLayerT; - typedef orc::IRTransformLayer OptSelLayerT; typedef object::OwningBinary OwningObj; template compileModule(orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT; + void optimizeModule(Module &M) JL_NOTSAFEPOINT; + std::unique_ptr compileModule(Module &M) JL_NOTSAFEPOINT; private: @@ -885,20 +867,10 @@ class JuliaOJIT { JITPointersLayerT JITPointersLayer; std::unique_ptr Optimizers; OptimizeLayerT OptimizeLayer; - OptSelLayerT OptSelLayer; std::shared_ptr DebuginfoPlugin; }; extern JuliaOJIT *jl_ExecutionEngine; -inline orc::ThreadSafeModule jl_create_ts_module(StringRef name, orc::ThreadSafeContext ctx, - const DataLayout &DL, const Triple &triple, - Module *source = nullptr) JL_NOTSAFEPOINT -{ - auto lock = ctx.getLock(); - return orc::ThreadSafeModule( - jl_create_llvm_module(name, *ctx.getContext(), DL, triple, source), ctx); -} - void fixupTM(TargetMachine &TM) JL_NOTSAFEPOINT; void optimizeDLSyms(Module &M);