From 95eaf56811f9b83296e54c9dd96a45d9b99e2e18 Mon Sep 17 00:00:00 2001 From: Rich Lander Date: Thu, 18 Jun 2026 05:47:53 -0700 Subject: [PATCH] Lift a spilled base/this constructor argument into the chain initializer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A base(...)/this(...) argument that carries control flow — the ubiquitous base(message ?? "default") / base(code > 0 ? "x" : null) exception shape — gets spilled to a temporary the general inliner declines to fold: the chain receiver is evaluated first, and TypeRef cannot yet prove it is a class rather than a mutable byref struct, so it reads as an impure leaf the stored value may not be reordered past. The printer's lift only fires when the call is the body's first statement, so the spilled-argument call stayed an invalid `base(temp);` body statement (CS0175) that dropped its argument on recompile. But the receiver of a constructor's own base/this call is the object under construction — an immutable reference with no observable evaluation effect — so moving an argument's computation past it never reorders anything. New ConstructorChainArgumentPass (after structuring folds the ??/?:) inlines each single-use argument spill into the chain call, so the call lands first and the existing lift renders it as a `: base(args)` / `: this(args)` initializer. The compile-back harness ignored the lifted initializer and emitted class skeletons with no base type, so lifted chains could not recompile. It now renders the initializer and emits a `: Base` clause for non-generic in-assembly class bases (object/value-type/generic/out-of-assembly bases stay unspelled, costing at most a base-call diff, never a miscompile). Soundness: the pass reuses single-store/single-load/address-not-taken safety and only inlines temps stored in the run immediately preceding the chain call, where the sole prior evaluation is the effect-free under-construction `this`. The base clause is conservative (TypeDefinition, non-generic, non-object only). On a broad System.Linq corpus the base clause left recompile-fail unchanged (424). Compile-back fixture corpus: exact 147 -> 151 (+4), docket 14 -> 13, recompile-fail 528 -> 525 (no new failures). CtorChainSamples base(message), base(message ?? "default"), and this(value.ToString()) now opcode-exact; the remaining ctor#2 diff is the orthogonal ternary-sense equivalence. Decompiler tests 432 pass; main suite 1140 pass (9 skipped). Found via #604. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../IrImporterTests.cs | 27 +++- .../Passes/ConstructorChainArgumentPass.cs | 137 ++++++++++++++++++ .../Pipeline/Passes/IrPass.cs | 6 + tools/DecompilerHarness/CompileBack.cs | 58 ++++++-- 4 files changed, 207 insertions(+), 21 deletions(-) create mode 100644 src/ILInspector.Decompiler/Pipeline/Passes/ConstructorChainArgumentPass.cs diff --git a/src/ILInspector.Decompiler.Tests/IrImporterTests.cs b/src/ILInspector.Decompiler.Tests/IrImporterTests.cs index 92092c5b..0df23a99 100644 --- a/src/ILInspector.Decompiler.Tests/IrImporterTests.cs +++ b/src/ILInspector.Decompiler.Tests/IrImporterTests.cs @@ -2081,16 +2081,31 @@ public void BaseCall_LiftsToInitializer() } [Fact] - public void SpilledThis_CoalesceArgument_CanonicalizesToBase() + public void SpilledCoalesceArgument_LiftsToInitializer() { // ctor#3: base(message ?? "default") — the ?? spill keeps the base call - // off the first statement, so it stays a (still-canonical) body call - // rather than lifting; never the invalid S_0..ctor form. - var (body, _) = RaiseCtor(3); + // off the first statement; the constructor-chain argument pass inlines + // the spill so the call lifts to the signature initializer instead of + // leaking as an invalid base(temp); body statement (CS0175). + var (body, chain) = RaiseCtor(3); - Assert.Contains("base(", body); + Assert.Equal("base(message ?? \"default\")", chain); + Assert.DoesNotContain("base(", body); Assert.DoesNotContain("..ctor", body); - Assert.DoesNotContain("= this;", body); + } + + [Fact] + public void SpilledTernaryArgument_LiftsWithArgumentInlined() + { + // ctor#2: base(code > 0 ? "positive" : null) — the ternary argument + // spills to a temp; the pass inlines it into the lifted initializer so + // the base argument survives (it would otherwise drop on recompile). + var (body, chain) = RaiseCtor(2); + + Assert.NotNull(chain); + Assert.StartsWith("base(", chain); + Assert.Contains("\"positive\"", chain); + Assert.DoesNotContain("base(", body); } [Fact] diff --git a/src/ILInspector.Decompiler/Pipeline/Passes/ConstructorChainArgumentPass.cs b/src/ILInspector.Decompiler/Pipeline/Passes/ConstructorChainArgumentPass.cs new file mode 100644 index 00000000..dc1d91c2 --- /dev/null +++ b/src/ILInspector.Decompiler/Pipeline/Passes/ConstructorChainArgumentPass.cs @@ -0,0 +1,137 @@ +namespace ILInspector.Decompiler.Pipeline; + +/// +/// Collapses a constructor-chain call's spilled argument temporaries into the +/// call so it lands as the body's first statement, where the printer lifts it +/// to a : base(args) / : this(args) signature initializer. +/// +/// The compiler spills any base/this argument that carries control flow — the +/// ubiquitous base(message ?? SR.Default) exception shape — into a +/// temporary the general inliner then declines to fold: the chain receiver is +/// evaluated first, and cannot yet prove it is a class +/// rather than a mutable byref struct, so it reads as an impure leaf the stored +/// value may not be reordered past. But the receiver of a constructor's own +/// base/this call is the object under construction — an immutable reference with +/// no observable evaluation effect — so moving an argument's computation past it +/// never reorders anything. This pass makes that one safe move +/// set up: it inlines each single-use +/// argument temp stored in the run of statements immediately preceding the +/// chain call. Left in place the call prints as an invalid base(temp); +/// body statement (CS0175) that drops its argument on recompile. +/// +public sealed class ConstructorChainArgumentPass : IIrPass +{ + public string Name => "constructor-chain-argument"; + + public void Run(IrFunction function) + { + if (!function.Signature.HasThis) + return; + + // ConstructorChainPass has already canonicalized the receiver to `this`. + if (FindChainCall(function) is not { } call + || call.Parent is not ExpressionStatement statement + || statement.Parent is not Block block) + { + return; + } + + var usage = CountPlaces(function); + + // Inline the contiguous run of single-use argument spills immediately + // preceding the call. Each inline detaches its store, so the call's + // predecessor shifts down and the next iteration re-checks it. + while (statement.ChildIndex > 0 + && TryInlineSpill(block.Children[statement.ChildIndex - 1], call, usage)) + { + } + } + + static Call? FindChainCall(IrFunction function) + { + foreach (var node in function.Descendants) + { + if (node is Call { Callee: { Name: ".ctor", HasThis: true } } call + && call.Arguments is [LoadArgument { Index: 0 }, ..]) + { + return call; + } + } + return null; + } + + /// + /// Inlines into when it + /// is the single store of a temporary loaded exactly once, inside the call, + /// with its address never taken. Returns false (and changes nothing) for + /// anything else. + /// + static bool TryInlineSpill(IrNode previous, Call call, Dictionary<(bool IsSlot, int Index), Place> usage) + { + (bool IsSlot, int Index)? key = previous switch + { + StoreLocal store => (false, store.Index), + StoreStackSlot store => (true, store.Slot), + _ => null, + }; + if (key is not { } place + || !usage.TryGetValue(place, out var record) + || record.AddressTaken + || record.Stores != 1 + || record.Loads.Count != 1) + { + return false; + } + + var load = record.Loads[0]; + if (!IsInside(load, call)) + return false; + + var value = (IrExpression)previous.DetachChildren()[0]; + previous.Detach(); + load.ReplaceWith(value); + return true; + } + + static Dictionary<(bool IsSlot, int Index), Place> CountPlaces(IrFunction function) + { + var places = new Dictionary<(bool IsSlot, int Index), Place>(); + + Place Entry(bool isSlot, int index) + { + if (!places.TryGetValue((isSlot, index), out var entry)) + places[(isSlot, index)] = entry = new Place(); + return entry; + } + + foreach (var node in function.Descendants) + { + switch (node) + { + case LoadLocal load: Entry(false, load.Index).Loads.Add(load); break; + case StoreLocal store: Entry(false, store.Index).Stores++; break; + case LoadLocalAddress address: Entry(false, address.Index).AddressTaken = true; break; + case LoadStackSlot load: Entry(true, load.Slot).Loads.Add(load); break; + case StoreStackSlot store: Entry(true, store.Slot).Stores++; break; + } + } + return places; + } + + static bool IsInside(IrNode node, IrNode root) + { + for (var current = node; current is not null; current = current.Parent) + { + if (ReferenceEquals(current, root)) + return true; + } + return false; + } + + sealed class Place + { + public List Loads { get; } = []; + public int Stores { get; set; } + public bool AddressTaken { get; set; } + } +} diff --git a/src/ILInspector.Decompiler/Pipeline/Passes/IrPass.cs b/src/ILInspector.Decompiler/Pipeline/Passes/IrPass.cs index 4b96ddbd..2ee9b6df 100644 --- a/src/ILInspector.Decompiler/Pipeline/Passes/IrPass.cs +++ b/src/ILInspector.Decompiler/Pipeline/Passes/IrPass.cs @@ -75,6 +75,12 @@ public static class IrPasses // Folding merges slot diamonds into single stores; a second inlining // run collapses those slots into their uses (ternaries inline). new ExpressionInliningPass(), + // With structuring done, a spilled base/this constructor argument is a + // folded expression (base(message ?? "default")); collapse it into the + // chain call so the call lands as the body's first statement and the + // printer lifts it to a signature initializer rather than an invalid + // base(temp); body call (CS0175). + new ConstructorChainArgumentPass(), // Raise any static function-pointer load still standing into &Method // (it feeds a calli, native callback, or delegate*-typed field — all of // which take a function pointer directly). diff --git a/tools/DecompilerHarness/CompileBack.cs b/tools/DecompilerHarness/CompileBack.cs index 6f8356e0..b36faa5b 100644 --- a/tools/DecompilerHarness/CompileBack.cs +++ b/tools/DecompilerHarness/CompileBack.cs @@ -166,7 +166,8 @@ static void EvaluateType( if (function is null) continue; string? body; - try { body = CSharpPrinter.PrintRaised(function).Output; } + string? chain; + try { var printed = CSharpPrinter.PrintRaised(function); body = printed.Output; chain = printed.ConstructorChain; } catch { continue; } if (body is null) continue; @@ -179,7 +180,7 @@ static void EvaluateType( string origText = string.Join(" ", origOps); string unit; - try { unit = BuildUnit(reader, mh, body); } + try { unit = BuildUnit(reader, mh, body, chain); } catch { results.Add(new(fullType, name, overload, CompileBackStatus.ContextFail, origText, "", "skeleton-emit")); @@ -257,7 +258,8 @@ static void RunType( if (function is null) continue; string? body; - try { body = CSharpPrinter.PrintRaised(function).Output; } + string? chain; + try { var printed = CSharpPrinter.PrintRaised(function); body = printed.Output; chain = printed.ConstructorChain; } catch { continue; } if (body is null) continue; @@ -271,7 +273,7 @@ static void RunType( var origOps = original.Select(i => CanonicalOpcode(i.OpCodeName)).ToList(); string unit; - try { unit = BuildUnit(reader, mh, body); } + try { unit = BuildUnit(reader, mh, body, chain); } catch { contextFail++; continue; } var tree = CSharpSyntaxTree.ParseText(unit, parseOptions); @@ -353,7 +355,7 @@ static void Report( /// body's references to same-assembly types and members all bind — so a /// dropped or mis-bound access surfaces as a true opcode diff, not CS0234/CS0122. /// - static string BuildUnit(MetadataReader reader, MethodDefinitionHandle target, string targetBody) + static string BuildUnit(MetadataReader reader, MethodDefinitionHandle target, string targetBody, string? targetChain) { var sb = new StringBuilder(); sb.AppendLine("#pragma warning disable"); @@ -370,19 +372,42 @@ static string BuildUnit(MetadataReader reader, MethodDefinitionHandle target, st { sb.AppendLine($"namespace {ns}"); sb.AppendLine("{"); - EmitType(reader, typeHandle, target, targetBody, sb, 1); + EmitType(reader, typeHandle, target, targetBody, targetChain, sb, 1); sb.AppendLine("}"); } else { - EmitType(reader, typeHandle, target, targetBody, sb, 0); + EmitType(reader, typeHandle, target, targetBody, targetChain, sb, 0); } } return sb.ToString(); } + /// + /// A : Base clause for a class whose base is a non-generic type in + /// this assembly (so its constructors are visible to a lifted + /// : base(args) initializer). Object and value-type bases need no + /// clause; generic bases (TypeSpec) and out-of-assembly bases are skipped — + /// the skeleton cannot always spell those, and an absent clause only costs a + /// base-call diff, never a miscompile. + /// + static string BaseClause(MetadataReader reader, TypeDefinition typeDef, TypeKind kind) + { + if (kind != TypeKind.Class || typeDef.BaseType.IsNil) + return ""; + if (typeDef.BaseType.Kind != HandleKind.TypeDefinition) + return ""; // TypeReference (out of assembly) / TypeSpec (generic) — not reliably spellable + var baseDef = reader.GetTypeDefinition((TypeDefinitionHandle)typeDef.BaseType); + if (baseDef.GetGenericParameters().Count != 0) + return ""; + string baseName = BaseTypeName(reader, typeDef.BaseType); + if (baseName is "System.Object") + return ""; + return $" : {baseName}"; + } + static void EmitType(MetadataReader reader, TypeDefinitionHandle typeHandle, - MethodDefinitionHandle target, string targetBody, StringBuilder sb, int indent) + MethodDefinitionHandle target, string targetBody, string? targetChain, StringBuilder sb, int indent) { var typeDef = reader.GetTypeDefinition(typeHandle); var kind = ShapeOf(reader, typeDef); @@ -411,20 +436,21 @@ static void EmitType(MetadataReader reader, TypeDefinitionHandle typeHandle, } string keyword = kind == TypeKind.Struct ? "struct" : "class"; - sb.AppendLine($"{pad}public unsafe {keyword} {Identifier(name)}{genParams}"); + string baseClause = BaseClause(reader, typeDef, kind); + sb.AppendLine($"{pad}public unsafe {keyword} {Identifier(name)}{genParams}{baseClause}"); sb.AppendLine($"{pad}{{"); foreach (var fh in typeDef.GetFields()) EmitField(reader, fh, typeContext, sb, pad + " "); foreach (var mh in typeDef.GetMethods()) - EmitMethod(reader, typeHandle, mh, mh == target ? targetBody : null, sb, pad + " "); + EmitMethod(reader, typeHandle, mh, mh == target ? targetBody : null, mh == target ? targetChain : null, sb, pad + " "); foreach (var nested in typeDef.GetNestedTypes()) { if (reader.GetString(reader.GetTypeDefinition(nested).Name).Contains('<')) continue; // compiler-generated (display class, iterator) — not valid C# - EmitType(reader, nested, target, targetBody, sb, indent + 1); + EmitType(reader, nested, target, targetBody, targetChain, sb, indent + 1); } sb.AppendLine($"{pad}}}"); @@ -505,7 +531,7 @@ static void EmitInterface(MetadataReader reader, TypeDefinitionHandle typeHandle { if (reader.GetString(reader.GetTypeDefinition(nested).Name).Contains('<')) continue; - EmitType(reader, nested, default, "", sb, indent + 1); + EmitType(reader, nested, default, "", null, sb, indent + 1); } sb.AppendLine($"{pad}}}"); @@ -561,7 +587,7 @@ static void EmitField(MetadataReader reader, FieldDefinitionHandle fh, GenericCo } static void EmitMethod(MetadataReader reader, TypeDefinitionHandle typeHandle, - MethodDefinitionHandle mh, string? realBody, StringBuilder sb, string pad) + MethodDefinitionHandle mh, string? realBody, string? realChain, StringBuilder sb, string pad) { var typeDef = reader.GetTypeDefinition(typeHandle); var method = reader.GetMethodDefinition(mh); @@ -583,8 +609,10 @@ static void EmitMethod(MetadataReader reader, TypeDefinitionHandle typeHandle, if (name is ".ctor") { // Instance constructor: emit as the type's ctor so signature codegen - // (field inits, base call) matches; strip the IL name. - sb.AppendLine($"{pad}public {Identifier(StripArity(reader.GetString(typeDef.Name)))}({parameters}) {{{body}}}"); + // (field inits, base call) matches; strip the IL name. A lifted + // base(...)/this(...) chain prints as the signature initializer it is. + string initializer = realChain is { Length: > 0 } ? $" : {realChain}" : ""; + sb.AppendLine($"{pad}public {Identifier(StripArity(reader.GetString(typeDef.Name)))}({parameters}){initializer} {{{body}}}"); return; } if (name is ".cctor")