From 0ee5f51f5a5de3c2e4e7e7fb31f7e63727d591d0 Mon Sep 17 00:00:00 2001 From: Damyan Pepper Date: Thu, 19 Feb 2026 13:07:25 -0800 Subject: [PATCH] Fix perf regression: limit debug info to relevant subprograms (#8174) PR #7799 added debug info for global variables across all inlined subprograms, creating O(subprograms x globals) debug instructions. This caused 2-3x slower compilation for shaders with many globals and deeply-inlined functions. Fix: Instead of collecting ALL subprograms for a function (walking every instruction), only collect subprograms from the alloca's actual users -- the instructions that reference the specific global variable. This preserves the debug info feature (globals visible in inlined scopes that use them) while eliminating work for unrelated scopes. The 'Lower static global into Alloca' pass is 3.8x faster on the stress test (41ms -> 11ms), and overall debug compilation is ~10% faster. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Scalar/ScalarReplAggregatesHLSL.cpp | 70 +++++++++++-------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp index dd72786d99..d80a678651 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp @@ -6505,47 +6505,49 @@ ModulePass *llvm::createSROA_Parameter_HLSL() { namespace { struct GVDebugInfoPatchCache { - DenseMap> SubprogramsForFunction; - DenseSet Seen; + DenseMap LocToSubprogram; + DenseMap FuncToSubprogram; DITypeIdentifierMap EmptyMap; + DISubprogram *GetSubprogramForLoc(DILocation *Loc) { + auto It = LocToSubprogram.find(Loc); + if (It != LocToSubprogram.end()) + return It->second; + DISubprogram *Result = nullptr; + auto *Scope = dyn_cast(Loc->getScope()); + while (Scope) { + if (auto SubP = dyn_cast(Scope)) { + Result = SubP; + break; + } + Scope = Scope->getScope().resolve(EmptyMap); + } + LocToSubprogram[Loc] = Result; + return Result; + } + + // Collect DISubprograms from a DILocation's inlined-at chain. void CollectSubprograms(DILocation *Loc, SetVector &Set) { while (Loc) { - // This is potentially very expensive. Avoid repeatedly looking for - // DISubprogram's - if (Seen.count(Loc)) - return; - Seen.insert(Loc); - auto *Scope = dyn_cast(Loc->getScope()); - while (Scope) { - if (auto SubP = dyn_cast(Scope)) { - Set.insert(SubP); - break; - } - Scope = Scope->getScope().resolve(EmptyMap); - } + if (DISubprogram *SP = GetSubprogramForLoc(Loc)) + Set.insert(SP); Loc = Loc->getInlinedAt(); } } - SetVector & - GetSubprogramsForFunction(Function *F, DebugInfoFinder &DbgFinder) { - auto It = SubprogramsForFunction.find(F); - if (It != SubprogramsForFunction.end()) + DISubprogram *GetFuncSubprogram(Function *F, DebugInfoFinder &DbgFinder) { + auto It = FuncToSubprogram.find(F); + if (It != FuncToSubprogram.end()) return It->second; - - SetVector &Ret = SubprogramsForFunction[F]; + DISubprogram *Result = nullptr; for (DISubprogram *SP : DbgFinder.subprograms()) { if (SP->getFunction() == F) { - Ret.insert(SP); + Result = SP; break; } } - - for (BasicBlock &BB : *F) - for (Instruction &I : BB) - CollectSubprograms(I.getDebugLoc(), Ret); - return Ret; + FuncToSubprogram[F] = Result; + return Result; } }; @@ -6771,8 +6773,18 @@ static void PatchDebugInfo(GVDebugInfoPatchCache &Cache, DIBuilder DIB(*GV->getParent()); - SetVector &Subprograms = - Cache.GetSubprogramsForFunction(F, DbgFinder); + // Only collect subprograms relevant to this GV to avoid creating + // O(subprograms × globals) debug instructions. + SetVector Subprograms; + + if (DISubprogram *SP = Cache.GetFuncSubprogram(F, DbgFinder)) + Subprograms.insert(SP); + + for (User *U : AI->users()) { + if (Instruction *I = dyn_cast(U)) + Cache.CollectSubprograms(I->getDebugLoc(), Subprograms); + } + for (DISubprogram *Subprogram : Subprograms) { DIScope *Scope = Subprogram; DebugLoc Loc = DebugLoc::get(DGV->getLine(), 0, Scope);