diff --git a/src/ILInspector.Decompiler.Tests/IrImporterTests.cs b/src/ILInspector.Decompiler.Tests/IrImporterTests.cs index 92092c5b..0df23a99 100644 --- a/src/ILInspector.Decompiler.Tests/IrImporterTests.cs +++ b/src/ILInspector.Decompiler.Tests/IrImporterTests.cs @@ -2081,16 +2081,31 @@ public void BaseCall_LiftsToInitializer() } [Fact] - public void SpilledThis_CoalesceArgument_CanonicalizesToBase() + public void SpilledCoalesceArgument_LiftsToInitializer() { // ctor#3: base(message ?? "default") — the ?? spill keeps the base call - // off the first statement, so it stays a (still-canonical) body call - // rather than lifting; never the invalid S_0..ctor form. - var (body, _) = RaiseCtor(3); + // off the first statement; the constructor-chain argument pass inlines + // the spill so the call lifts to the signature initializer instead of + // leaking as an invalid base(temp); body statement (CS0175). + var (body, chain) = RaiseCtor(3); - Assert.Contains("base(", body); + Assert.Equal("base(message ?? \"default\")", chain); + Assert.DoesNotContain("base(", body); Assert.DoesNotContain("..ctor", body); - Assert.DoesNotContain("= this;", body); + } + + [Fact] + public void SpilledTernaryArgument_LiftsWithArgumentInlined() + { + // ctor#2: base(code > 0 ? "positive" : null) — the ternary argument + // spills to a temp; the pass inlines it into the lifted initializer so + // the base argument survives (it would otherwise drop on recompile). + var (body, chain) = RaiseCtor(2); + + Assert.NotNull(chain); + Assert.StartsWith("base(", chain); + Assert.Contains("\"positive\"", chain); + Assert.DoesNotContain("base(", body); } [Fact] diff --git a/src/ILInspector.Decompiler/Pipeline/Passes/ConstructorChainArgumentPass.cs b/src/ILInspector.Decompiler/Pipeline/Passes/ConstructorChainArgumentPass.cs new file mode 100644 index 00000000..dc1d91c2 --- /dev/null +++ b/src/ILInspector.Decompiler/Pipeline/Passes/ConstructorChainArgumentPass.cs @@ -0,0 +1,137 @@ +namespace ILInspector.Decompiler.Pipeline; + +/// +/// Collapses a constructor-chain call's spilled argument temporaries into the +/// call so it lands as the body's first statement, where the printer lifts it +/// to a : base(args) / : this(args) signature initializer. +/// +/// The compiler spills any base/this argument that carries control flow — the +/// ubiquitous base(message ?? SR.Default) exception shape — into a +/// temporary the general inliner then declines to fold: the chain receiver is +/// evaluated first, and cannot yet prove it is a class +/// rather than a mutable byref struct, so it reads as an impure leaf the stored +/// value may not be reordered past. But the receiver of a constructor's own +/// base/this call is the object under construction — an immutable reference with +/// no observable evaluation effect — so moving an argument's computation past it +/// never reorders anything. This pass makes that one safe move +/// set up: it inlines each single-use +/// argument temp stored in the run of statements immediately preceding the +/// chain call. Left in place the call prints as an invalid base(temp); +/// body statement (CS0175) that drops its argument on recompile. +/// +public sealed class ConstructorChainArgumentPass : IIrPass +{ + public string Name => "constructor-chain-argument"; + + public void Run(IrFunction function) + { + if (!function.Signature.HasThis) + return; + + // ConstructorChainPass has already canonicalized the receiver to `this`. + if (FindChainCall(function) is not { } call + || call.Parent is not ExpressionStatement statement + || statement.Parent is not Block block) + { + return; + } + + var usage = CountPlaces(function); + + // Inline the contiguous run of single-use argument spills immediately + // preceding the call. Each inline detaches its store, so the call's + // predecessor shifts down and the next iteration re-checks it. + while (statement.ChildIndex > 0 + && TryInlineSpill(block.Children[statement.ChildIndex - 1], call, usage)) + { + } + } + + static Call? FindChainCall(IrFunction function) + { + foreach (var node in function.Descendants) + { + if (node is Call { Callee: { Name: ".ctor", HasThis: true } } call + && call.Arguments is [LoadArgument { Index: 0 }, ..]) + { + return call; + } + } + return null; + } + + /// + /// Inlines into when it + /// is the single store of a temporary loaded exactly once, inside the call, + /// with its address never taken. Returns false (and changes nothing) for + /// anything else. + /// + static bool TryInlineSpill(IrNode previous, Call call, Dictionary<(bool IsSlot, int Index), Place> usage) + { + (bool IsSlot, int Index)? key = previous switch + { + StoreLocal store => (false, store.Index), + StoreStackSlot store => (true, store.Slot), + _ => null, + }; + if (key is not { } place + || !usage.TryGetValue(place, out var record) + || record.AddressTaken + || record.Stores != 1 + || record.Loads.Count != 1) + { + return false; + } + + var load = record.Loads[0]; + if (!IsInside(load, call)) + return false; + + var value = (IrExpression)previous.DetachChildren()[0]; + previous.Detach(); + load.ReplaceWith(value); + return true; + } + + static Dictionary<(bool IsSlot, int Index), Place> CountPlaces(IrFunction function) + { + var places = new Dictionary<(bool IsSlot, int Index), Place>(); + + Place Entry(bool isSlot, int index) + { + if (!places.TryGetValue((isSlot, index), out var entry)) + places[(isSlot, index)] = entry = new Place(); + return entry; + } + + foreach (var node in function.Descendants) + { + switch (node) + { + case LoadLocal load: Entry(false, load.Index).Loads.Add(load); break; + case StoreLocal store: Entry(false, store.Index).Stores++; break; + case LoadLocalAddress address: Entry(false, address.Index).AddressTaken = true; break; + case LoadStackSlot load: Entry(true, load.Slot).Loads.Add(load); break; + case StoreStackSlot store: Entry(true, store.Slot).Stores++; break; + } + } + return places; + } + + static bool IsInside(IrNode node, IrNode root) + { + for (var current = node; current is not null; current = current.Parent) + { + if (ReferenceEquals(current, root)) + return true; + } + return false; + } + + sealed class Place + { + public List Loads { get; } = []; + public int Stores { get; set; } + public bool AddressTaken { get; set; } + } +} diff --git a/src/ILInspector.Decompiler/Pipeline/Passes/IrPass.cs b/src/ILInspector.Decompiler/Pipeline/Passes/IrPass.cs index 4b96ddbd..2ee9b6df 100644 --- a/src/ILInspector.Decompiler/Pipeline/Passes/IrPass.cs +++ b/src/ILInspector.Decompiler/Pipeline/Passes/IrPass.cs @@ -75,6 +75,12 @@ public static class IrPasses // Folding merges slot diamonds into single stores; a second inlining // run collapses those slots into their uses (ternaries inline). new ExpressionInliningPass(), + // With structuring done, a spilled base/this constructor argument is a + // folded expression (base(message ?? "default")); collapse it into the + // chain call so the call lands as the body's first statement and the + // printer lifts it to a signature initializer rather than an invalid + // base(temp); body call (CS0175). + new ConstructorChainArgumentPass(), // Raise any static function-pointer load still standing into &Method // (it feeds a calli, native callback, or delegate*-typed field — all of // which take a function pointer directly). diff --git a/tools/DecompilerHarness/CompileBack.cs b/tools/DecompilerHarness/CompileBack.cs index 6f8356e0..b36faa5b 100644 --- a/tools/DecompilerHarness/CompileBack.cs +++ b/tools/DecompilerHarness/CompileBack.cs @@ -166,7 +166,8 @@ static void EvaluateType( if (function is null) continue; string? body; - try { body = CSharpPrinter.PrintRaised(function).Output; } + string? chain; + try { var printed = CSharpPrinter.PrintRaised(function); body = printed.Output; chain = printed.ConstructorChain; } catch { continue; } if (body is null) continue; @@ -179,7 +180,7 @@ static void EvaluateType( string origText = string.Join(" ", origOps); string unit; - try { unit = BuildUnit(reader, mh, body); } + try { unit = BuildUnit(reader, mh, body, chain); } catch { results.Add(new(fullType, name, overload, CompileBackStatus.ContextFail, origText, "", "skeleton-emit")); @@ -257,7 +258,8 @@ static void RunType( if (function is null) continue; string? body; - try { body = CSharpPrinter.PrintRaised(function).Output; } + string? chain; + try { var printed = CSharpPrinter.PrintRaised(function); body = printed.Output; chain = printed.ConstructorChain; } catch { continue; } if (body is null) continue; @@ -271,7 +273,7 @@ static void RunType( var origOps = original.Select(i => CanonicalOpcode(i.OpCodeName)).ToList(); string unit; - try { unit = BuildUnit(reader, mh, body); } + try { unit = BuildUnit(reader, mh, body, chain); } catch { contextFail++; continue; } var tree = CSharpSyntaxTree.ParseText(unit, parseOptions); @@ -353,7 +355,7 @@ static void Report( /// body's references to same-assembly types and members all bind — so a /// dropped or mis-bound access surfaces as a true opcode diff, not CS0234/CS0122. /// - static string BuildUnit(MetadataReader reader, MethodDefinitionHandle target, string targetBody) + static string BuildUnit(MetadataReader reader, MethodDefinitionHandle target, string targetBody, string? targetChain) { var sb = new StringBuilder(); sb.AppendLine("#pragma warning disable"); @@ -370,19 +372,42 @@ static string BuildUnit(MetadataReader reader, MethodDefinitionHandle target, st { sb.AppendLine($"namespace {ns}"); sb.AppendLine("{"); - EmitType(reader, typeHandle, target, targetBody, sb, 1); + EmitType(reader, typeHandle, target, targetBody, targetChain, sb, 1); sb.AppendLine("}"); } else { - EmitType(reader, typeHandle, target, targetBody, sb, 0); + EmitType(reader, typeHandle, target, targetBody, targetChain, sb, 0); } } return sb.ToString(); } + /// + /// A : Base clause for a class whose base is a non-generic type in + /// this assembly (so its constructors are visible to a lifted + /// : base(args) initializer). Object and value-type bases need no + /// clause; generic bases (TypeSpec) and out-of-assembly bases are skipped — + /// the skeleton cannot always spell those, and an absent clause only costs a + /// base-call diff, never a miscompile. + /// + static string BaseClause(MetadataReader reader, TypeDefinition typeDef, TypeKind kind) + { + if (kind != TypeKind.Class || typeDef.BaseType.IsNil) + return ""; + if (typeDef.BaseType.Kind != HandleKind.TypeDefinition) + return ""; // TypeReference (out of assembly) / TypeSpec (generic) — not reliably spellable + var baseDef = reader.GetTypeDefinition((TypeDefinitionHandle)typeDef.BaseType); + if (baseDef.GetGenericParameters().Count != 0) + return ""; + string baseName = BaseTypeName(reader, typeDef.BaseType); + if (baseName is "System.Object") + return ""; + return $" : {baseName}"; + } + static void EmitType(MetadataReader reader, TypeDefinitionHandle typeHandle, - MethodDefinitionHandle target, string targetBody, StringBuilder sb, int indent) + MethodDefinitionHandle target, string targetBody, string? targetChain, StringBuilder sb, int indent) { var typeDef = reader.GetTypeDefinition(typeHandle); var kind = ShapeOf(reader, typeDef); @@ -411,20 +436,21 @@ static void EmitType(MetadataReader reader, TypeDefinitionHandle typeHandle, } string keyword = kind == TypeKind.Struct ? "struct" : "class"; - sb.AppendLine($"{pad}public unsafe {keyword} {Identifier(name)}{genParams}"); + string baseClause = BaseClause(reader, typeDef, kind); + sb.AppendLine($"{pad}public unsafe {keyword} {Identifier(name)}{genParams}{baseClause}"); sb.AppendLine($"{pad}{{"); foreach (var fh in typeDef.GetFields()) EmitField(reader, fh, typeContext, sb, pad + " "); foreach (var mh in typeDef.GetMethods()) - EmitMethod(reader, typeHandle, mh, mh == target ? targetBody : null, sb, pad + " "); + EmitMethod(reader, typeHandle, mh, mh == target ? targetBody : null, mh == target ? targetChain : null, sb, pad + " "); foreach (var nested in typeDef.GetNestedTypes()) { if (reader.GetString(reader.GetTypeDefinition(nested).Name).Contains('<')) continue; // compiler-generated (display class, iterator) — not valid C# - EmitType(reader, nested, target, targetBody, sb, indent + 1); + EmitType(reader, nested, target, targetBody, targetChain, sb, indent + 1); } sb.AppendLine($"{pad}}}"); @@ -505,7 +531,7 @@ static void EmitInterface(MetadataReader reader, TypeDefinitionHandle typeHandle { if (reader.GetString(reader.GetTypeDefinition(nested).Name).Contains('<')) continue; - EmitType(reader, nested, default, "", sb, indent + 1); + EmitType(reader, nested, default, "", null, sb, indent + 1); } sb.AppendLine($"{pad}}}"); @@ -561,7 +587,7 @@ static void EmitField(MetadataReader reader, FieldDefinitionHandle fh, GenericCo } static void EmitMethod(MetadataReader reader, TypeDefinitionHandle typeHandle, - MethodDefinitionHandle mh, string? realBody, StringBuilder sb, string pad) + MethodDefinitionHandle mh, string? realBody, string? realChain, StringBuilder sb, string pad) { var typeDef = reader.GetTypeDefinition(typeHandle); var method = reader.GetMethodDefinition(mh); @@ -583,8 +609,10 @@ static void EmitMethod(MetadataReader reader, TypeDefinitionHandle typeHandle, if (name is ".ctor") { // Instance constructor: emit as the type's ctor so signature codegen - // (field inits, base call) matches; strip the IL name. - sb.AppendLine($"{pad}public {Identifier(StripArity(reader.GetString(typeDef.Name)))}({parameters}) {{{body}}}"); + // (field inits, base call) matches; strip the IL name. A lifted + // base(...)/this(...) chain prints as the signature initializer it is. + string initializer = realChain is { Length: > 0 } ? $" : {realChain}" : ""; + sb.AppendLine($"{pad}public {Identifier(StripArity(reader.GetString(typeDef.Name)))}({parameters}){initializer} {{{body}}}"); return; } if (name is ".cctor")