From 131523f48b4d312bc7d91437143af08f09247459 Mon Sep 17 00:00:00 2001 From: Rich Lander Date: Thu, 18 Jun 2026 06:18:29 -0700 Subject: [PATCH] Lift constructor field initializers ahead of the base call to the field declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit C# emits field initializers before the base constructor call, so a `this.field = value` store the IL places before the chain call is a field initializer — not a body assignment. The printer rendered it as a body statement, which recompiles to AFTER the base call: an opcode-order diff (the two remaining `.ctor` compile-back docket items, `CfgSampleClass::.ctor` spelling `_shadowed = 1;` and `LockFixtureSamples::.ctor` spelling `_root = new object();`). Generalize the constructor-prologue detection: scan the entry block for the base/this `.ctor` chain call; if every statement before it is a qualifying field-initializer store, lift each to the field declaration (new `DecompilerResult.FieldInitializers`) and let the existing chain lift fire on the call. The compile-back harness threads the initializers through to the target type's field declarations. Soundness: a store only qualifies when its receiver is the under-construction `this` (ldarg.0) AND its value references no place — no `this`, parameter, local, or stack-slot load (`ReferencesPlace`). That is exactly C#'s field-initializer legality rule (initializers cannot read the instance or constructor parameters), so a lifted store is always legal where it lands. Detection requires the whole pre-chain prologue to be such stores, matching the compiler's prologue shape exactly; anything else bails and keeps the current body rendering. Constants, `new T()`, and static reads qualify; a field init that reads a parameter does not and stays in the body. Compile-back fixture exact 151 -> 153 (+2), docket 13 -> 11 (-2), recompile-fail unchanged (527; System.Linq unchanged 395). Decompiler tests 432 -> 434. Pinned `CfgSampleClass::.ctor` exact in the gate. Found via #604. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../CompileBackGateTests.cs | 15 ++-- .../IrImporterTests.cs | 48 ++++++++++-- .../DecompilerResult.cs | 9 +++ .../Pipeline/Ir/CSharpPrinter.cs | 75 ++++++++++++++++--- tools/DecompilerHarness/CompileBack.cs | 37 +++++---- 5 files changed, 147 insertions(+), 37 deletions(-) diff --git a/src/ILInspector.Decompiler.Tests/CompileBackGateTests.cs b/src/ILInspector.Decompiler.Tests/CompileBackGateTests.cs index cf24478b..918f522b 100644 --- a/src/ILInspector.Decompiler.Tests/CompileBackGateTests.cs +++ b/src/ILInspector.Decompiler.Tests/CompileBackGateTests.cs @@ -20,14 +20,12 @@ public class CompileBackGateTests /// decompiler docket. Each is a tracked defect or a benign over-render; the gate /// tolerates these but fails if a NEW method joins the set. Shrink this list as /// fixes land. Tracked defects include StaleFieldRead (issue #605), Shadowed - /// (a dropped this.field load), the .ctor field-initializer ordering, and the - /// definite-assignment default-init over-render (TryFinallyAdd, PowerOfTwo, - /// CatchEverything, SwitchCase, WhileLoop, ClassicLock, TryFinallyTwoReturns, - /// ManualDisposeAsyncInFinally). + /// (a dropped this.field load), and the definite-assignment default-init + /// over-render (TryFinallyAdd, PowerOfTwo, CatchEverything, SwitchCase, + /// WhileLoop, ClassicLock, TryFinallyTwoReturns, ManualDisposeAsyncInFinally). /// static readonly HashSet KnownDiffs = new(StringComparer.Ordinal) { - ".ctor", "BothPositive", "CatchEverything", "ClassicLock", @@ -48,10 +46,11 @@ public class CompileBackGateTests /// /// Methods a prior compile-back fix turned opcode-exact. Pinning them guards the /// fix durably: CheckedAdd must keep the overflow check (#604), UnsignedShift - /// must keep dropping the redundant width mask (#606), and Shadowed must keep - /// qualifying the shadowed this.field load (#607). + /// must keep dropping the redundant width mask (#606), Shadowed must keep + /// qualifying the shadowed this.field load (#607), and .ctor must keep lifting + /// its field initializer ahead of the base call to the field declaration. /// - static readonly string[] PinnedExact = { "CheckedAdd", "UnsignedShift", "Shadowed" }; + static readonly string[] PinnedExact = { "CheckedAdd", "UnsignedShift", "Shadowed", ".ctor" }; static IReadOnlyList EvaluateFixtures() { diff --git a/src/ILInspector.Decompiler.Tests/IrImporterTests.cs b/src/ILInspector.Decompiler.Tests/IrImporterTests.cs index 0df23a99..fcc90c08 100644 --- a/src/ILInspector.Decompiler.Tests/IrImporterTests.cs +++ b/src/ILInspector.Decompiler.Tests/IrImporterTests.cs @@ -960,17 +960,20 @@ static string ReturnOf(string sourceType, string returnType) [Fact] public void Constructor_ImplicitBaseCall_IsSuppressed() { - // A no-argument base-constructor call is implicit in C#; only the - // field initializer remains in the body. (Argumentful base(...) - // still prints until constructor initializers are modeled.) + // A no-argument base-constructor call is implicit in C#, and the field + // initializer (`_shadowed = 1`) the compiler emits before that base call + // lifts to the field declaration — not the body, where it would + // recompile to AFTER the base call. using var source = MetadataSource.Open(typeof(CfgSampleClass).Assembly.Location); var function = IrImporter.Import(source, typeof(CfgSampleClass).FullName!, ".ctor"); Assert.NotNull(function); - string output = CSharpPrinter.Print(function).Output!; + var result = CSharpPrinter.Print(function); + string output = result.Output!; Assert.DoesNotContain("base(", output); Assert.DoesNotContain(".ctor", output); - Assert.Contains("_shadowed = 1;", output); + Assert.DoesNotContain("_shadowed", output); + Assert.Contains(("_shadowed", "1"), result.FieldInitializers); } [Fact] @@ -2118,6 +2121,41 @@ public void ThisDelegation_LiftsToInitializer() Assert.DoesNotContain("base(", chain); Assert.DoesNotContain("..ctor", chain); } + + static DecompilerResult RaiseDefaultCtor(Type type) + { + using var source = MetadataSource.Open(type.Assembly.Location); + var function = IrImporter.Import(source, type.FullName!, ".ctor"); + Assert.NotNull(function); + var result = CSharpPrinter.PrintRaised(function); + Assert.True(result.Succeeded); + return result; + } + + [Fact] + public void ConstantFieldInitializer_LiftsToFieldDeclaration() + { + // CfgSampleClass declares `int _shadowed = 1;`. The compiler emits that + // store before the (implicit) base call, so it is a field initializer, + // not a body assignment — lift it to the field declaration and drop it + // from the body (where it would recompile to AFTER the base call). + var result = RaiseDefaultCtor(typeof(CfgSampleClass)); + + Assert.Contains(("_shadowed", "1"), result.FieldInitializers); + Assert.DoesNotContain("_shadowed", result.Output); + } + + [Fact] + public void NewObjectFieldInitializer_LiftsToFieldDeclaration() + { + // LockFixtureSamples declares `readonly object _root = new();`. The + // `new object()` initializer is self-contained (no this/parameter/local + // load), so it is legal in field-declaration context and lifts there. + var result = RaiseDefaultCtor(typeof(LockFixtureSamples)); + + Assert.Contains(("_root", "new object()"), result.FieldInitializers); + Assert.DoesNotContain("_root", result.Output); + } } public class IdentityConvertTests diff --git a/src/ILInspector.Decompiler/DecompilerResult.cs b/src/ILInspector.Decompiler/DecompilerResult.cs index 0e537fd6..f211e573 100644 --- a/src/ILInspector.Decompiler/DecompilerResult.cs +++ b/src/ILInspector.Decompiler/DecompilerResult.cs @@ -91,6 +91,15 @@ public sealed record DecompilerResult( /// public string? ConstructorChain { get; init; } + /// + /// For a constructor, the field initializers (field, value + /// pairs) lifted out of this.field = value + /// stores the IL placed before the base call, which C# spells on the field + /// declaration, not in the body. The formatter that renders the field + /// declarations places them; empty when there are none. + /// + public IReadOnlyList<(string Field, string Value)> FieldInitializers { get; init; } = []; + public static DecompilerResult Success(string output) => new(output, DecompilationFidelity.Full, []); diff --git a/src/ILInspector.Decompiler/Pipeline/Ir/CSharpPrinter.cs b/src/ILInspector.Decompiler/Pipeline/Ir/CSharpPrinter.cs index 49ad6b34..bb3568aa 100644 --- a/src/ILInspector.Decompiler/Pipeline/Ir/CSharpPrinter.cs +++ b/src/ILInspector.Decompiler/Pipeline/Ir/CSharpPrinter.cs @@ -42,6 +42,7 @@ public static DecompilerResult Print(IrFunction function) return new DecompilerResult(output, function.Fidelity, [.. function.Diagnostics]) { ConstructorChain = printer._constructorChain, + FieldInitializers = printer._fieldInitializers, }; } catch (Exception ex) @@ -63,6 +64,10 @@ public static DecompilerResult Print(IrFunction function) string? _constructorChain; IrNode? _chainStatement; + /// Field initializers (this.f = value stores preceding the base call) lifted out of a constructor body to the field declarations, keyed in source order. + readonly List<(string Field, string Value)> _fieldInitializers = []; + readonly HashSet _fieldInitStores = []; + string PrintBody(IrFunction function) { var sb = new StringBuilder(); @@ -70,16 +75,30 @@ string PrintBody(IrFunction function) CollectDeclaringStores(function); _readBeforeAssign = ComputeReadBeforeAssign(function); - // An explicit base(...)/this(...) chain opens a constructor body; lift - // it to a signature initializer so the body stays valid C# (a base/this - // call as a body statement is CS0175). - if (function.Body.Blocks is [{ Children: [var first, ..] }, ..] - && first is ExpressionStatement { Expression: Call { Callee: { Name: ".ctor", HasThis: true } callee } call } - && call.Arguments is [LoadArgument { Index: 0 }, ..] - && ConstructorChainText(callee, call) is { } chain) + // A constructor prologue is leading field-initializer stores + // (this.f = value) followed by the base(...)/this(...) chain call. C# + // emits field initializers before the base call, so a this-field store + // preceding the chain call is a field initializer — not a body + // assignment — and the chain call is invalid as a body statement + // (CS0175). Lift both out of the body so they render where they belong: + // the initializers on the field declarations, the chain on the + // signature. + if (function.Body.Blocks is [{ } entry, ..] + && ChainCallIndex(entry) is { } chainIndex + && entry.Children.Take(chainIndex).All(IsFieldInitializerStore)) { - _constructorChain = chain.TrimEnd(';'); - _chainStatement = first; + foreach (var store in entry.Children.Take(chainIndex).Cast()) + { + _fieldInitStores.Add(store); + _fieldInitializers.Add((store.Field.Name, Expression(store.Value))); + } + + var chainCall = (Call)((ExpressionStatement)entry.Children[chainIndex]).Expression; + if (ConstructorChainText(chainCall.Callee, chainCall) is { } chain) + { + _constructorChain = chain.TrimEnd(';'); + _chainStatement = entry.Children[chainIndex]; + } } // Remaining locals and slots declare up front, current-style. @@ -107,8 +126,8 @@ void AppendContainer(StringBuilder sb, BlockContainer container, int indent, boo bool labeledReturnOnly = _labelTargets.Contains(block.StartOffset) && block.Children.Count == 1; foreach (var statement in block.Children) { - if (ReferenceEquals(statement, _chainStatement)) - continue; // lifted to the signature initializer + if (ReferenceEquals(statement, _chainStatement) || _fieldInitStores.Contains(statement)) + continue; // lifted to the signature initializer / field declarations bool isLast = topLevel && i == blocks.Count - 1 && ReferenceEquals(statement, block.Children[^1]); if (isLast && !labeledReturnOnly && statement is Return { Value: null }) break; @@ -687,6 +706,40 @@ void AppendStatement(StringBuilder sb, IrNode node, int indent) return $"{(isThis ? "this" : "base")}({Arguments(arguments, callee.ParameterTypes, callee.ParameterRefKinds)});"; } + /// The index of the base/this .ctor chain call in the entry block, or null when the body has none (a struct ctor, a static method, a body that never chains). + static int? ChainCallIndex(Block entry) + { + for (int i = 0; i < entry.Children.Count; i++) + { + if (entry.Children[i] is ExpressionStatement { Expression: Call { Callee: { Name: ".ctor", HasThis: true } } call } + && call.Arguments is [LoadArgument { Index: 0 }, ..]) + { + return i; + } + } + return null; + } + + /// + /// A this.field = value store whose value is a field initializer: + /// self-contained (no this, parameter, local, or slot load), so it is + /// legal in field-declaration context. C# field initializers cannot read the + /// instance or constructor parameters, which is exactly the place-load ban. + /// + static bool IsFieldInitializerStore(IrNode node) + => node is StoreField { HasInstance: true, Instance: LoadArgument { Index: 0 } } store + && !ReferencesPlace(store.Value); + + static bool ReferencesPlace(IrExpression value) + { + foreach (var node in (IEnumerable)[value, .. value.Descendants]) + { + if (node is LoadArgument or LoadLocal or LoadStackSlot or LoadLocalAddress or LoadArgumentAddress) + return true; + } + return false; + } + /// Baseline-style clause headers: bare catch for object (the catch-all), the variable form when the entry store folded into the clause. string CatchHeader(CatchClause clause) => clause.ExceptionType is { Namespace: "System", Name: "Object" } diff --git a/tools/DecompilerHarness/CompileBack.cs b/tools/DecompilerHarness/CompileBack.cs index b36faa5b..c396882f 100644 --- a/tools/DecompilerHarness/CompileBack.cs +++ b/tools/DecompilerHarness/CompileBack.cs @@ -167,7 +167,8 @@ static void EvaluateType( continue; string? body; string? chain; - try { var printed = CSharpPrinter.PrintRaised(function); body = printed.Output; chain = printed.ConstructorChain; } + IReadOnlyList<(string Field, string Value)> fieldInits; + try { var printed = CSharpPrinter.PrintRaised(function); body = printed.Output; chain = printed.ConstructorChain; fieldInits = printed.FieldInitializers; } catch { continue; } if (body is null) continue; @@ -180,7 +181,7 @@ static void EvaluateType( string origText = string.Join(" ", origOps); string unit; - try { unit = BuildUnit(reader, mh, body, chain); } + try { unit = BuildUnit(reader, mh, body, chain, fieldInits); } catch { results.Add(new(fullType, name, overload, CompileBackStatus.ContextFail, origText, "", "skeleton-emit")); @@ -259,7 +260,8 @@ static void RunType( continue; string? body; string? chain; - try { var printed = CSharpPrinter.PrintRaised(function); body = printed.Output; chain = printed.ConstructorChain; } + IReadOnlyList<(string Field, string Value)> fieldInits; + try { var printed = CSharpPrinter.PrintRaised(function); body = printed.Output; chain = printed.ConstructorChain; fieldInits = printed.FieldInitializers; } catch { continue; } if (body is null) continue; @@ -273,7 +275,7 @@ static void RunType( var origOps = original.Select(i => CanonicalOpcode(i.OpCodeName)).ToList(); string unit; - try { unit = BuildUnit(reader, mh, body, chain); } + try { unit = BuildUnit(reader, mh, body, chain, fieldInits); } catch { contextFail++; continue; } var tree = CSharpSyntaxTree.ParseText(unit, parseOptions); @@ -355,7 +357,8 @@ static void Report( /// body's references to same-assembly types and members all bind — so a /// dropped or mis-bound access surfaces as a true opcode diff, not CS0234/CS0122. /// - static string BuildUnit(MetadataReader reader, MethodDefinitionHandle target, string targetBody, string? targetChain) + static string BuildUnit(MetadataReader reader, MethodDefinitionHandle target, string targetBody, string? targetChain, + IReadOnlyList<(string Field, string Value)> targetFieldInits) { var sb = new StringBuilder(); sb.AppendLine("#pragma warning disable"); @@ -372,12 +375,12 @@ static string BuildUnit(MetadataReader reader, MethodDefinitionHandle target, st { sb.AppendLine($"namespace {ns}"); sb.AppendLine("{"); - EmitType(reader, typeHandle, target, targetBody, targetChain, sb, 1); + EmitType(reader, typeHandle, target, targetBody, targetChain, targetFieldInits, sb, 1); sb.AppendLine("}"); } else { - EmitType(reader, typeHandle, target, targetBody, targetChain, sb, 0); + EmitType(reader, typeHandle, target, targetBody, targetChain, targetFieldInits, sb, 0); } } return sb.ToString(); @@ -407,7 +410,8 @@ static string BaseClause(MetadataReader reader, TypeDefinition typeDef, TypeKind } static void EmitType(MetadataReader reader, TypeDefinitionHandle typeHandle, - MethodDefinitionHandle target, string targetBody, string? targetChain, StringBuilder sb, int indent) + MethodDefinitionHandle target, string targetBody, string? targetChain, + IReadOnlyList<(string Field, string Value)> targetFieldInits, StringBuilder sb, int indent) { var typeDef = reader.GetTypeDefinition(typeHandle); var kind = ShapeOf(reader, typeDef); @@ -440,8 +444,13 @@ static void EmitType(MetadataReader reader, TypeDefinitionHandle typeHandle, sb.AppendLine($"{pad}public unsafe {keyword} {Identifier(name)}{genParams}{baseClause}"); sb.AppendLine($"{pad}{{"); + // Field initializers lifted from the target ctor apply to this type's + // fields only when it declares that ctor. + bool declaresTarget = !target.IsNil && typeDef.GetMethods().Any(mh => mh == target); + var fieldInits = declaresTarget ? targetFieldInits : []; + foreach (var fh in typeDef.GetFields()) - EmitField(reader, fh, typeContext, sb, pad + " "); + EmitField(reader, fh, typeContext, fieldInits, sb, pad + " "); foreach (var mh in typeDef.GetMethods()) EmitMethod(reader, typeHandle, mh, mh == target ? targetBody : null, mh == target ? targetChain : null, sb, pad + " "); @@ -450,7 +459,7 @@ static void EmitType(MetadataReader reader, TypeDefinitionHandle typeHandle, { if (reader.GetString(reader.GetTypeDefinition(nested).Name).Contains('<')) continue; // compiler-generated (display class, iterator) — not valid C# - EmitType(reader, nested, target, targetBody, targetChain, sb, indent + 1); + EmitType(reader, nested, target, targetBody, targetChain, targetFieldInits, sb, indent + 1); } sb.AppendLine($"{pad}}}"); @@ -531,7 +540,7 @@ static void EmitInterface(MetadataReader reader, TypeDefinitionHandle typeHandle { if (reader.GetString(reader.GetTypeDefinition(nested).Name).Contains('<')) continue; - EmitType(reader, nested, default, "", null, sb, indent + 1); + EmitType(reader, nested, default, "", null, [], sb, indent + 1); } sb.AppendLine($"{pad}}}"); @@ -563,7 +572,7 @@ static void EmitEnum(MetadataReader reader, TypeDefinition typeDef, string name, } static void EmitField(MetadataReader reader, FieldDefinitionHandle fh, GenericContext context, - StringBuilder sb, string pad) + IReadOnlyList<(string Field, string Value)> fieldInits, StringBuilder sb, string pad) { var field = reader.GetFieldDefinition(fh); string name = reader.GetString(field.Name); @@ -583,7 +592,9 @@ static void EmitField(MetadataReader reader, FieldDefinitionHandle fh, GenericCo sb.AppendLine($"{pad}public const {Clean(type)} {Identifier(name)} = {value};"); return; } - sb.AppendLine($"{pad}public {(isStatic ? "static " : "")}{Clean(type)} {Identifier(name)};"); + string? initializer = fieldInits.FirstOrDefault(fi => fi.Field == name).Value; + string suffix = initializer is not null && !isStatic ? $" = {initializer}" : ""; + sb.AppendLine($"{pad}public {(isStatic ? "static " : "")}{Clean(type)} {Identifier(name)}{suffix};"); } static void EmitMethod(MetadataReader reader, TypeDefinitionHandle typeHandle,