From 180d76db3d2e39d045a3c4d4df9cbfb984b0df98 Mon Sep 17 00:00:00 2001 From: Markus Handell Date: Mon, 8 Jun 2026 11:49:45 +0100 Subject: [PATCH] Enable offline symbolization and improve process grouping Emit Mapping entries with Breakpad build IDs and RVA-normalized addresses so symbolization servers can resolve unsymbolized frames without local PDBs, with Has* flags set per-mapping based on actual coverage. Also add --noSplitChromeProcesses and --listImageIds flags, merge same-named processes when PIDs are not included, and update to net10.0. --- EtwToPprof.csproj | 4 +- ProfileWriter.cs | 139 ++++++++++++++++++++++++++++++++++++++++++---- Program.cs | 60 +++++++++++++++++++- README.md | 23 +++++++- 4 files changed, 212 insertions(+), 14 deletions(-) diff --git a/EtwToPprof.csproj b/EtwToPprof.csproj index 85bdf1a..98e0d9e 100644 --- a/EtwToPprof.csproj +++ b/EtwToPprof.csproj @@ -1,8 +1,8 @@ - + Exe - netcoreapp3.1 + net10.0 0.1.0 Sunny Sachanandani Google LLC diff --git a/ProfileWriter.cs b/ProfileWriter.cs index 83578d6..0560cf1 100644 --- a/ProfileWriter.cs +++ b/ProfileWriter.cs @@ -1,4 +1,4 @@ -// Copyright 2020 Google LLC +// Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -67,6 +67,16 @@ public ProfileWriter(Options options) functions = new Dictionary(); nextFunctionId = 1; + + mappings = new Dictionary(); + nextMappingId = 1; + + unsymbolizedLocations = new Dictionary(); + + mappingsWithUnsymbolized = new HashSet(); + // Maps mapping ID -> absolute base address of the loaded image. + // Used to convert absolute VAs to RVAs for Location.address. + mappingBaseAddresses = new Dictionary(); } public void AddSample(ICpuSample sample) @@ -79,7 +89,7 @@ public void AddSample(ICpuSample sample) if (timestamp < options.timeStart || timestamp > options.timeEnd) return; - if (options.processFilterSet?.Count != 0) + if (options.processFilterSet?.Count > 0) { var processImage = sample.Process.Images.FirstOrDefault( image => image.FileName == sample.Process.ImageName); @@ -102,14 +112,12 @@ public void AddSample(ICpuSample sample) { if (stackFrame.HasValue && stackFrame.Symbol != null) { - sampleProto.LocationId.Add(GetLocationId(stackFrame.Symbol)); + sampleProto.LocationId.Add(GetLocationId(stackFrame.Symbol, stackFrame.Address)); } - else + else if (stackFrame.HasValue) { - string imageName = stackFrame.Image?.FileName ?? ""; - string functionLabel = ""; sampleProto.LocationId.Add( - GetPseudoLocationId(processId, imageName, null, functionLabel)); + GetUnsymbolizedLocationId(stackFrame.Address, stackFrame.Image)); } } string processName = sample.Process.ImageName; @@ -120,8 +128,12 @@ public void AddSample(ICpuSample sample) { threadLabel = String.Format("{0} ({1})", threadLabel, sample.Thread?.Id ?? 0); } + // When process IDs are not included, use 0/null so that processes + // with the same label merge into a single flame graph entry. + int threadPseudoProcessId = (options.includeProcessIds || options.includeProcessAndThreadIds) + ? processId : 0; sampleProto.LocationId.Add( - GetPseudoLocationId(processId, processName, sample.Thread?.StartAddress, threadLabel)); + GetPseudoLocationId(threadPseudoProcessId, processName, sample.Thread?.StartAddress, threadLabel)); string processLabel = processName; if (options.splitChromeProcesses && processName == "chrome.exe" && @@ -156,8 +168,12 @@ public void AddSample(ICpuSample sample) { processLabel = processLabel + $" ({processId})"; } + // When process IDs are not included, use 0/null so that processes + // with the same label merge into a single flame graph entry. + int pseudoProcessId = (options.includeProcessIds || options.includeProcessAndThreadIds) + ? processId : 0; sampleProto.LocationId.Add( - GetPseudoLocationId(processId, processName, sample.Process.ObjectAddress, processLabel)); + GetPseudoLocationId(pseudoProcessId, processName, null, processLabel)); if (processThreadCpuTimes.ContainsKey(processLabel)) { @@ -225,6 +241,16 @@ public long Write(string outputFileName) { profile.Comment.Add(GetStringId("No samples exported")); } + // Set Has* flags on mappings: only claim symbolization for mappings + // where all locations were successfully resolved. + foreach (var mappingProto in profile.Mapping) + { + bool fullySymbolized = !mappingsWithUnsymbolized.Contains(mappingProto.Id); + mappingProto.HasFunctions = fullySymbolized; + mappingProto.HasFilenames = fullySymbolized; + mappingProto.HasLineNumbers = fullySymbolized; + mappingProto.HasInlineFrames = fullySymbolized && options.includeInlinedFunctions; + } using (FileStream output = File.Create(outputFileName)) { using (GZipStream gzip = new GZipStream(output, CompressionMode.Compress)) @@ -278,6 +304,8 @@ ulong GetPseudoLocationId(int processId, string imageName, Address? address, str var locationProto = new pb.Location(); locationProto.Id = locationId; + if (address.HasValue) + locationProto.Address = unchecked((ulong)address.Value.Value); var line = new pb.Line(); line.FunctionId = GetFunctionId(imageName, label); @@ -288,7 +316,34 @@ ulong GetPseudoLocationId(int processId, string imageName, Address? address, str return locationId; } - ulong GetLocationId(IStackSymbol stackSymbol) + ulong GetUnsymbolizedLocationId(Address address, IImage image) + { + // Use the raw address as the dedup key for unsymbolized frames. + ulong addr = unchecked((ulong)address.Value); + if (!unsymbolizedLocations.TryGetValue(addr, out ulong locationId)) + { + locationId = nextLocationId++; + unsymbolizedLocations.Add(addr, locationId); + + var locationProto = new pb.Location(); + locationProto.Id = locationId; + locationProto.Address = addr; + if (image != null) + { + ulong mid = GetMappingId(image); + locationProto.MappingId = mid; + // Convert absolute VA to RVA (see comment in GetMappingId). + locationProto.Address = addr - mappingBaseAddresses[mid]; + mappingsWithUnsymbolized.Add(mid); + } + + // No Line entries — leaves the location bare for offline symbolization. + profile.Location.Add(locationProto); + } + return locationId; + } + + ulong GetLocationId(IStackSymbol stackSymbol, Address instructionAddress) { var processId = stackSymbol.Image?.ProcessId ?? 0; var imageName = stackSymbol.Image?.FileName; @@ -306,6 +361,15 @@ ulong GetLocationId(IStackSymbol stackSymbol) var locationProto = new pb.Location(); locationProto.Id = locationId; + // Store the RVA (see comment in GetMappingId). + ulong absAddr = unchecked((ulong)instructionAddress.Value); + locationProto.Address = absAddr; + if (stackSymbol.Image != null) + { + ulong mid = GetMappingId(stackSymbol.Image); + locationProto.MappingId = mid; + locationProto.Address = absAddr - mappingBaseAddresses[mid]; + } pb.Line line; if (options.includeInlinedFunctions && stackSymbol.InlinedFunctionNames != null) @@ -396,11 +460,66 @@ long GetStringId(string str) private readonly Options options; Dictionary locations; + Dictionary unsymbolizedLocations; ulong nextLocationId; Dictionary functions; ulong nextFunctionId; + Dictionary mappings; + ulong nextMappingId; + HashSet mappingsWithUnsymbolized; + // Maps mapping ID -> absolute base address of the loaded image. + // Used to convert absolute VAs to RVAs for Location.address. + Dictionary mappingBaseAddresses; + + static string FormatBreakpadBuildId(IImage image) + { + if (image.Pdb == null) + return null; + return image.Pdb.Id.ToString("N").ToLowerInvariant() + + image.Pdb.Age.ToString("x"); + } + + ulong GetMappingId(IImage image) + { + // Key by image path to deduplicate mappings for the same binary. + string key = image.Path ?? image.FileName ?? ""; + ulong mappingId; + if (!mappings.TryGetValue(key, out mappingId)) + { + mappingId = nextMappingId++; + mappings.Add(key, mappingId); + + var mappingProto = new pb.Mapping(); + mappingProto.Id = mappingId; + + // Workaround for pprof symbolization servers that assume ELF binaries: + // Some servers reject memory_start values that don't match standard + // Linux load addresses (0, 0x400000, 0x8048000) when ElfHeaders are + // absent. Windows PE/PDB binaries never have ElfHeaders, so any real + // Windows load address causes a symbolization failure. + // By setting memory_start=0 and memory_limit=module_size, and storing + // RVAs in Location.address, we ensure compatibility with servers that + // use memory_start==0 as a passthrough for RVA-based symbol lookup. + ulong baseAddr = unchecked((ulong)image.AddressRange.BaseAddress.Value); + mappingBaseAddresses.Add(mappingId, baseAddr); + mappingProto.MemoryStart = 0; + mappingProto.MemoryLimit = (ulong)image.Size.Bytes; + mappingProto.FileOffset = 0; + mappingProto.Filename = GetStringId(image.Path ?? image.FileName ?? ""); + + string buildId = FormatBreakpadBuildId(image); + if (buildId != null) + mappingProto.BuildId = GetStringId(buildId); + + // Has* flags are finalized in Write() after all samples are processed. + + profile.Mapping.Add(mappingProto); + } + return mappingId; + } + Dictionary strings; long nextStringId; diff --git a/Program.cs b/Program.cs index fdc846d..1fee59a 100644 --- a/Program.cs +++ b/Program.cs @@ -14,6 +14,7 @@ using System; using System.Collections.Generic; +using System.Linq; using CommandLine; using CommandLine.Text; @@ -86,8 +87,16 @@ public static IEnumerable Examples HelpText = "Whether chrome.exe processes are split by type (parsed from command line).")] public bool splitChromeProcesses { get; set; } + [Option("noSplitChromeProcesses", Required = false, Default = false, + HelpText = "Merge all chrome.exe processes under a single heading.")] + public bool noSplitChromeProcesses { get; set; } + [Option("loadSymbols", Required = false, Default = true, HelpText = "Whether symbols should be loaded.")] public bool? loadSymbols { get; set; } + + [Option("listImageIds", Required = false, Default = false, + HelpText = "List all unique image (module) names found in the trace and exit.")] + public bool listImageIds { get; set; } } static void Main(string[] args) @@ -123,12 +132,61 @@ static void RunWithOptions(Options opts) ICpuSampleDataSource cpuSampleData = pendingCpuSampleData.Result; + // --listImageIds: list all unique images with their Breakpad build IDs and exit. + if (opts.listImageIds) + { + var images = new Dictionary>(); + var seenProcessIds = new HashSet(); + foreach (var sample in cpuSampleData.Samples) + { + if (sample.Process == null) + continue; + if (!seenProcessIds.Add(sample.Process.Id)) + continue; + foreach (var image in sample.Process.Images) + { + string fileName = image.FileName; + if (fileName == null) + continue; + if (!images.ContainsKey(fileName)) + images[fileName] = new List<(string, string, long)>(); + + string buildId = null; + if (image.Pdb != null) + { + buildId = image.Pdb.Id.ToString("N").ToLowerInvariant() + + image.Pdb.Age.ToString("x"); + } + long timestamp = image.Timestamp; + string path = image.Path; + + if (!images[fileName].Any(e => e.buildId == buildId && e.timestamp == timestamp)) + { + images[fileName].Add((path, buildId, timestamp)); + } + } + } + + foreach (var kvp in images.OrderBy(k => k.Key)) + { + Console.WriteLine($"{kvp.Key}:"); + foreach (var (path, buildId, timestamp) in kvp.Value) + { + Console.WriteLine($" Path: {path}"); + Console.WriteLine($" BuildId: {buildId ?? "(none)"}"); + Console.WriteLine($" Timestamp: {timestamp}"); + Console.WriteLine(); + } + } + return; + } + var profileOpts = new ProfileWriter.Options(); profileOpts.etlFileName = opts.etlFileName; profileOpts.includeInlinedFunctions = opts.includeInlinedFunctions; profileOpts.includeProcessIds = opts.includeProcessIds; profileOpts.includeProcessAndThreadIds = opts.includeProcessAndThreadIds; - profileOpts.splitChromeProcesses = opts.splitChromeProcesses; + profileOpts.splitChromeProcesses = opts.splitChromeProcesses && !opts.noSplitChromeProcesses; profileOpts.stripSourceFileNamePrefix = opts.stripSourceFileNamePrefix; profileOpts.timeStart = opts.timeStart ?? 0; profileOpts.timeEnd = opts.timeEnd ?? decimal.MaxValue; diff --git a/README.md b/README.md index 57f48e2..afd9f0c 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,9 @@ symbolizing traces if set, otherwise it uses WPA defaults. ## Building -Build the provided Visual Studio Solution with VS 2019. +Build with the .NET 10 SDK: + + dotnet build -c Release ### Nuget dependencies (included in solution) - CommandLineParser v2.8.0 @@ -36,6 +38,14 @@ Export inlined functions and thread/process ids: EtwToPprof --includeInlinedFunctions --includeProcessAndThreadIds trace.etl +Merge all chrome.exe processes under a single heading: + + EtwToPprof --noSplitChromeProcesses trace.etl + +List all loaded images with their Breakpad build IDs: + + EtwToPprof --listImageIds trace.etl + ## Command line flags -o, --outputFileName (Default: profile.pb.gz) Output file name for gzipped pprof profile. @@ -56,14 +66,25 @@ Export inlined functions and thread/process ids: --splitChromeProcesses (Default: true) Whether chrome.exe processes are split by type (parsed from command line). + --noSplitChromeProcesses (Default: false) Merge all chrome.exe processes under a single heading. + --loadSymbols (Default: true) Whether symbols should be loaded. + --listImageIds (Default: false) List all unique image (module) names with Breakpad build IDs and exit. + --help Display this help screen. --version Display version information. etlFileName (pos. 0) Required. ETL trace file name. +## Offline Symbolization + +Profiles include Mapping entries with Breakpad-format build IDs and RVA-based +addresses. Unsymbolized frames are emitted as bare locations (address + mapping +only) so that pprof symbolization servers can resolve them using symbol servers +without requiring local PDBs. + ## Disclaimer: **This is not an officially supported Google product.**