From 24618a8ef7b28fe74cad9733a34a879c5e8e5edd Mon Sep 17 00:00:00 2001 From: Max <34987259+mparisi20@users.noreply.github.com> Date: Sat, 4 Apr 2026 15:32:17 -0400 Subject: [PATCH 1/2] Extract splits from PDB Section Contributions - Upgrade pdb crate to pdb2 to enable parsing of S_SECTION and S_COFFGROUP symbol records - Process the Section Contributions stream from the PDB to deduce ObjSplits for every ObjSection. As with map-based parsing, this uses the 'rename' attribute to disambiguate contributions to various COFF groups within the same section --- Cargo.lock | 16 +-- Cargo.toml | 2 +- src/cmd/xex.rs | 31 +++++- src/obj/splits.rs | 17 +++- src/util/xpdb.rs | 247 ++++++++++++++++++++++++++++++++++++++++------ 5 files changed, 270 insertions(+), 43 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6526f69a..6253b3d9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -413,9 +413,9 @@ dependencies = [ [[package]] name = "fallible-iterator" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" [[package]] name = "fastrand" @@ -671,7 +671,7 @@ dependencies = [ "object 0.37.1", "once_cell", "owo-colors", - "pdb", + "pdb2", "powerpc", "regex", "serde", @@ -1068,10 +1068,10 @@ dependencies = [ ] [[package]] -name = "pdb" -version = "0.8.0" +name = "pdb2" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82040a392923abe6279c00ab4aff62d5250d1c8555dc780e4b02783a7aa74863" +checksum = "408d6fa13d943ee4b76ffda52cc28e817df9c2c4b2c46bd9aec8bff574377e1a" dependencies = [ "fallible-iterator", "scroll", @@ -1356,9 +1356,9 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "scroll" -version = "0.11.0" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04c565b551bafbef4157586fa379538366e4385d42082f255bfd96e4fe8519da" +checksum = "6ab8598aa408498679922eff7fa985c25d58a90771bd6be794434c5277eab1a6" [[package]] name = "serde" diff --git a/Cargo.toml b/Cargo.toml index e8224a5d..8cb44a53 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -71,7 +71,7 @@ tracing-attributes = "0.1" tracing-subscriber = { version = "0.3", features = ["env-filter"] } xxhash-rust = { version = "0.8", features = ["xxh3"] } zerocopy = { version = "0.8", features = ["derive"] } -pdb = "0.8.0" +pdb2 = "0.10.1" lzxd = "0.2.6" [target.'cfg(target_env = "musl")'.dependencies] diff --git a/src/cmd/xex.rs b/src/cmd/xex.rs index 39ee8d43..ffc657ab 100644 --- a/src/cmd/xex.rs +++ b/src/cmd/xex.rs @@ -1,5 +1,5 @@ use std::{ - collections::BTreeMap, + collections::{BTreeMap, HashSet}, fs::{self, DirBuilder, File}, io::{BufWriter, Write}, time::UNIX_EPOCH, @@ -32,7 +32,7 @@ use crate::{ }, obj::{ best_match_for_reloc, ObjInfo, ObjKind, ObjRelocKind, ObjSectionKind, ObjSections, - ObjSymbolKind, ObjSymbolScope, SectionIndex, SymbolIndex, + ObjSymbolKind, ObjSymbolScope, ObjUnit, SectionIndex, SymbolIndex, }, util::{ asm::write_asm, @@ -472,7 +472,32 @@ fn load_analyze_xex(config: &ProjectConfig) -> Result { if let Some(pdb_path) = &config.base.pdb { let pdb_path: Utf8NativePathBuf = pdb_path.with_encoding(); - let pdb_syms = try_parse_pdb(&pdb_path, &obj.sections)?; + let (pdb_units, pdb_splits, pdb_syms) = try_parse_pdb(&pdb_path, &obj.sections)?; + + // Apply all the splits + // FIXME: Don't add splits unconditionally here; it may conflict with + // user-provided splits. For now, users can comment out the pdb key + // in config.yml after initial analysis + for (i, splits_for_section) in pdb_splits.into_iter().enumerate() { + for (start, split) in splits_for_section.iter() { + obj.sections[i as u32].splits.push(start, split.clone()); + } + } + + // Apply all the units, discarding the ones with no splits + let mut nonempty_mods = HashSet::new(); + for split in obj.sections.all_splits() { + nonempty_mods.insert(&split.3.unit); + } + for unit in pdb_units { + if nonempty_mods.contains(&unit) { + obj.link_order.push(ObjUnit { name: unit, autogenerated: false, order: None }); + } else { + log::debug!("Module {} is empty", unit); + } + } + + // Apply all the symbols for mut sym in pdb_syms.into_iter() { if !is_reg_intrinsic(&sym.name) && sym.name != "__NLG_Return" { match obj.sections.at_address(sym.address as u32).ok() { diff --git a/src/obj/splits.rs b/src/obj/splits.rs index 9681d4af..005ff2e6 100644 --- a/src/obj/splits.rs +++ b/src/obj/splits.rs @@ -9,7 +9,7 @@ use crate::{ }; /// Marks a split point within a section. -#[derive(Debug, Clone, Eq, PartialEq)] +#[derive(Debug, Default, Clone, Eq, PartialEq)] pub struct ObjSplit { pub unit: String, pub end: u32, @@ -106,6 +106,21 @@ impl ObjSplits { .map_err(|_| anyhow!("Multiple splits for unit {}", unit)) } + /// Get the ObjSplit provided by unit with the specified rename, + /// if it exists + pub fn for_unit_rename( + &mut self, + unit: &str, + rename: Option<&str>, + ) -> Result> { + self.splits + .iter_mut() + .flat_map(|(addr, v)| v.iter_mut().map(move |u| (*addr, u))) + .filter(|(_, split)| split.unit == unit && split.rename.as_deref() == rename) + .at_most_one() + .map_err(|_| anyhow!("Multiple splits for unit {} with rename {:?}", unit, rename)) + } + pub fn push(&mut self, address: u32, split: ObjSplit) { let out = self.splits.entry(address).or_default(); out.push(split); diff --git a/src/util/xpdb.rs b/src/util/xpdb.rs index 1bb43ad0..c7dc8b0a 100644 --- a/src/util/xpdb.rs +++ b/src/util/xpdb.rs @@ -6,20 +6,20 @@ use std::{ use anyhow::{ensure, Result}; use itertools::Itertools; -use pdb::{self, FallibleIterator}; +use pdb2::{self, FallibleIterator}; use typed_path::Utf8NativePathBuf; use crate::{ analysis::cfa::SectionAddress, obj::{ - ObjDataKind, ObjSection, ObjSections, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags, - ObjSymbolKind, ObjSymbolScope, + ObjDataKind, ObjSection, ObjSections, ObjSplit, ObjSplits, ObjSymbol, ObjSymbolFlagSet, + ObjSymbolFlags, ObjSymbolKind, ObjSymbolScope, }, }; /// This map is only used to give descriptive names to the SymbolKinds that /// the pdb crate cannot parse; it doesn't need to be exhaustive. -fn sym_kind_name(kind: pdb::SymbolKind) -> &'static str { +fn sym_kind_name(kind: pdb2::SymbolKind) -> &'static str { match kind { 0x1012 => "S_FRAMEPROC", 0x1136 => "S_SECTION", @@ -28,7 +28,7 @@ fn sym_kind_name(kind: pdb::SymbolKind) -> &'static str { } } -fn warn_unsupported_sym_kind(sym: &pdb::Symbol, set: &mut HashSet) { +fn warn_unsupported_sym_kind(sym: &pdb2::Symbol, set: &mut HashSet) { if set.insert(sym.raw_kind()) { log::warn!( "Unsupported symbol kind: {} (0x{:X})", @@ -40,8 +40,8 @@ fn warn_unsupported_sym_kind(sym: &pdb::Symbol, set: &mut HashSet SectionAddress { let s_addr = pdb_offs.to_section_offset(pdbmap).unwrap_or_default(); SectionAddress { @@ -51,21 +51,54 @@ fn to_section_addr( } } +fn section_addr_to_virtual_addr(section_addrs: &ObjSections, s_addr: &SectionAddress) -> u64 { + let sect_base = section_addrs.get(s_addr.section).unwrap_or(&ObjSection::default()).address; + s_addr.address as u64 + sect_base +} + fn to_virtual_address( - pdbmap: &pdb::AddressMap, + pdbmap: &pdb2::AddressMap, section_addrs: &ObjSections, - pdb_offs: &pdb::PdbInternalSectionOffset, -) -> Result { - let s_addr = to_section_addr(pdbmap, pdb_offs); - let sect_base = section_addrs.get(s_addr.section).unwrap_or(&ObjSection::default()).address; - Ok(s_addr.address as u64 + sect_base) + pdb_offs: &pdb2::PdbInternalSectionOffset, +) -> u64 { + section_addr_to_virtual_addr(section_addrs, &to_section_addr(pdbmap, pdb_offs)) +} + +/// Section contributions from a given module are not guaranteed to form +/// a single, continuous block per group, as one might expect. This is the case +/// at least for the .xidata group. The solution implemented here is to add +/// "pseudo-modules" as needed to hold any additional, non-contiguous chunks. +/// In practice, this should only serve to handle a few unusual contribution +/// sequences in the XDK modules +struct PseudoModuleState { + /// Pointer into mod_indices + pub curr: i32, + /// Elements after the first are pseudo-modules + pub mod_indices: Vec, +} + +impl PseudoModuleState { + const UNSEEN: i32 = -1; +} + +#[derive(Debug, PartialEq, PartialOrd, Eq, Ord)] +struct CoffGroup { + /// Starting address of the group + pub address: u64, + /// jeff section number + pub section: u32, + /// Group size in bytes + pub size: u32, + /// Full COFF group name + pub name: String, } +/// Extract translation units, splits, and symbols from a PDB pub fn try_parse_pdb( path: &Utf8NativePathBuf, section_addrs: &ObjSections, -) -> Result> { - let mut dbfile = pdb::PDB::open(File::open(path)?)?; +) -> Result<(Vec, Vec, Vec)> { + let mut dbfile = pdb2::PDB::open(File::open(path)?)?; // Ensure pdb sections match the exe sections and that all the names match { @@ -104,8 +137,10 @@ pub fn try_parse_pdb( let mut syms: BTreeMap = BTreeMap::new(); let dbi = dbfile.debug_information()?; + + // Parse symbols let global_symtable = dbfile.global_symbols()?; - let mut all_syms: Vec = vec![]; + let mut all_syms: Vec = vec![]; // Collect Global and Module symbol streams into one combined iterator let mut global_syms = global_symtable.iter(); @@ -128,10 +163,11 @@ pub fn try_parse_pdb( } let all_syms_iter = all_syms.into_iter(); + let mut groups: Vec = vec![]; let mut ldata_dupes: HashMap = HashMap::new(); for symbol in all_syms_iter { match symbol.parse() { - Ok(pdb::SymbolData::Public(data)) => { + Ok(pdb2::SymbolData::Public(data)) => { let symaddr = to_section_addr(&pdbmap, &data.offset); let obj_sym = syms.entry(symaddr).or_default(); @@ -145,14 +181,14 @@ pub fn try_parse_pdb( // TODO: Not all S_PUB32 records represent functions or objects; // Some may just be labels, which can be skipped obj_sym.name = data.name.to_string().into(); - obj_sym.address = to_virtual_address(&pdbmap, section_addrs, &data.offset)?; + obj_sym.address = to_virtual_address(&pdbmap, section_addrs, &data.offset); obj_sym.section = Some(symaddr.section); obj_sym.flags = ObjSymbolFlagSet(ObjSymbolFlags::Global.into()); obj_sym.kind = if data.function { ObjSymbolKind::Function } else { ObjSymbolKind::Object }; obj_sym.data_kind = ObjDataKind::Unknown; } - Ok(pdb::SymbolData::Data(data)) => { + Ok(pdb2::SymbolData::Data(data)) => { let symaddr = to_section_addr(&pdbmap, &data.offset); let obj_sym = syms.entry(symaddr).or_default(); @@ -162,6 +198,9 @@ pub fn try_parse_pdb( } else { obj_sym.flags.set_scope(ObjSymbolScope::Local); obj_sym.kind = ObjSymbolKind::Object; + // TODO: Now that we extract object files and splits, we can + // update this renaming so it is only done for repeat + // names of symbols in the same file let name = data.name.to_string().clone(); let c = *ldata_dupes.entry(name.to_string()).and_modify(|c| *c += 1).or_insert(1); @@ -171,16 +210,16 @@ pub fn try_parse_pdb( data.name.to_string().into() }; obj_sym.name = name; - obj_sym.address = to_virtual_address(&pdbmap, section_addrs, &data.offset)?; + obj_sym.address = to_virtual_address(&pdbmap, section_addrs, &data.offset); obj_sym.section = Some(symaddr.section); } // TODO: We can also deduce the size by using the type // field to index into the TPI. // Build a TypeFinder, then use it to compute object sizes // while iterating through the data symbols. - // See https://docs.rs/pdb/latest/pdb/struct.ItemInformation.html + // See https://docs.rs/pdb2/latest/pdb2/struct.ItemInformation.html } - Ok(pdb::SymbolData::ThreadStorage(data)) => { + Ok(pdb2::SymbolData::ThreadStorage(data)) => { let symaddr = to_section_addr(&pdbmap, &data.offset); let obj_sym = syms.entry(symaddr).or_default(); @@ -191,13 +230,13 @@ pub fn try_parse_pdb( obj_sym.flags.set_scope(ObjSymbolScope::Local); obj_sym.kind = ObjSymbolKind::Object; obj_sym.name = data.name.to_string().into(); - obj_sym.address = to_virtual_address(&pdbmap, section_addrs, &data.offset)?; + obj_sym.address = to_virtual_address(&pdbmap, section_addrs, &data.offset); obj_sym.section = Some(symaddr.section); } // TODO: Above note for DATA records also applies here } - Ok(pdb::SymbolData::Procedure(data)) => { + Ok(pdb2::SymbolData::Procedure(data)) => { let symaddr = to_section_addr(&pdbmap, &data.offset); let obj_sym = syms.entry(symaddr).or_default(); @@ -211,11 +250,11 @@ pub fn try_parse_pdb( obj_sym.flags.set_scope(ObjSymbolScope::Local); obj_sym.kind = ObjSymbolKind::Function; obj_sym.name = data.name.to_string().into(); - obj_sym.address = to_virtual_address(&pdbmap, section_addrs, &data.offset)?; + obj_sym.address = to_virtual_address(&pdbmap, section_addrs, &data.offset); obj_sym.section = Some(symaddr.section); } } - Ok(pdb::SymbolData::Thunk(data)) => { + Ok(pdb2::SymbolData::Thunk(data)) => { let symaddr = to_section_addr(&pdbmap, &data.offset); let obj_sym = syms.entry(symaddr).or_default(); @@ -224,10 +263,19 @@ pub fn try_parse_pdb( obj_sym.size_known = true; obj_sym.align = Some(4); } - // TODO: S_SECTION and S_COFFGROUP records are also useful, - // but pdb 0.8.0 apparently can't parse them + Ok(pdb2::SymbolData::CoffGroup(data)) => groups.push(CoffGroup { + address: to_virtual_address(&pdbmap, section_addrs, &data.offset), + size: data.cb, + name: data.name.to_string().into(), + section: to_section_addr(&pdbmap, &data.offset).section, + }), + Ok(pdb2::SymbolData::Section(_data)) => { + // TODO: We already have most section info from the EXE, but + // S_SECTION records contain the unabbreviated section names, + // which serve as an alternative solution for .embsec_ issues + } Ok(_) => {} - Err(pdb::Error::UnimplementedSymbolKind(_)) => { + Err(pdb2::Error::UnimplementedSymbolKind(_)) => { warn_unsupported_sym_kind(&symbol, &mut unsupported_sym_kinds); } Err(parse_error) => { @@ -236,6 +284,145 @@ pub fn try_parse_pdb( } } + // Sort by address and append a sentinel + groups.sort(); + groups.push(CoffGroup { + address: groups[groups.len() - 1].address + groups[groups.len() - 1].size as u64, + size: 0, + name: "END".to_string(), + section: u32::MAX, + }); + log::debug!("COFF Sections"); + for sec in section_addrs.iter() { + log::debug!("#{}: name = {}, addr = 0x{:X}", sec.0, sec.1.name, sec.1.address); + } + log::debug!("COFF Groups:"); + for grp in groups.iter() { + log::debug!( + "address: 0x{:X}, section: {}, size: 0x{:X}, name: {}", + grp.address, + grp.section, + grp.size, + grp.name + ); + } + + // Begin parsing splits + let mut splits_by_section: Vec = vec![]; + splits_by_section.resize_with(section_addrs.len() as usize, Default::default); + + let num_modules = dbi.modules()?.count().unwrap_or(0) as i32; + + // The next available module index, to be incremented each time a new + // pseudo-module is created + let mut next_avail = num_modules; + let mut module_map: HashMap = HashMap::new(); + let mut module_names: Vec = vec![]; + for i in 0..num_modules { + module_map + .insert(i, PseudoModuleState { curr: PseudoModuleState::UNSEEN, mod_indices: vec![i] }); + module_names.push(format!("module_{}.cpp", i)); + } + + // curr_grp will increase monotonically, since contributions are sorted + let mut curr_grp = PseudoModuleState::UNSEEN; + let mut curr_mod = PseudoModuleState::UNSEEN; + let mut curr_split: &mut ObjSplit = &mut Default::default(); + + let mut contribs = dbi.section_contributions()?; + while let Some(contrib) = contribs.next()? { + // TODO: Extract file names from the Sources substream to replace the + // auto-generated names. Take only the base name, fix the extension, + // and disambiguate identical names with a prefix + let s_addr = to_section_addr(&pdbmap, &contrib.offset); + let sec_idx = s_addr.section as usize; + let start = section_addr_to_virtual_addr(section_addrs, &s_addr); + let end = start + contrib.size as u64; + let mut mod_idx = contrib.module as i32; + + let is_new_grp = start >= groups[(curr_grp + 1) as usize].address; + let is_new_mod = mod_idx != curr_mod; + if is_new_grp { + // Reset state + for key in module_map.iter_mut() { + key.1.curr = PseudoModuleState::UNSEEN; + } + // Skip empty groups + loop { + curr_grp += 1; + if start < groups[(curr_grp + 1) as usize].address { + break; + } + } + } + + let ent = module_map.get_mut(&mod_idx).expect("Out-of-range module index"); + if is_new_grp || is_new_mod { + // This increments to 0 the first time around per group, but + // if it increments again, we need a pseudo-module + ent.curr += 1; + if ent.curr >= ent.mod_indices.len() as i32 { + ent.mod_indices.push(next_avail); + module_names.push(format!( + "module_{}_part_{}.cpp", + ent.mod_indices[0], + ent.curr + 1 + )); + log::info!( + "Created pseudo-module #{}, named {}", + next_avail, + module_names[next_avail as usize] + ); + next_avail += 1; + assert!( + module_names.len() == next_avail as usize, + "name table size should track with module count" + ); + } + curr_mod = mod_idx; + + mod_idx = ent.mod_indices[ent.curr as usize]; + let mod_name = &module_names[mod_idx as usize]; + let rename = if groups[curr_grp as usize].name == section_addrs[sec_idx as u32].name { + None + } else { + Some(groups[curr_grp as usize].name.clone()) + }; + + splits_by_section[sec_idx].push(start as u32, ObjSplit { + unit: mod_name.clone(), + end: end as u32, + align: None, + autogenerated: false, + common: false, + skip: false, + rename: rename.clone(), + }); + // Get a mutable reference to the ObjSplit we just pushed, so + // subsequent contributions to it can update its size + curr_split = splits_by_section[sec_idx] + .for_unit_rename(mod_name, rename.as_deref())? + .expect("Failed to get newly-created ObjSplit") + .1; + } + // FIXME: This currently requires detect_objects=false to work. + // Deducing exact object sizes from the PDB should fix this + curr_split.end = end as u32; + } + + for (i, splits) in splits_by_section.iter().enumerate() { + log::debug!("Splits for section {}:", i); + for split in splits.iter() { + log::debug!( + "From {}: 0x{:X} - 0x{:X}, rename {:?}", + split.1.unit, + split.0, + split.1.end, + split.1.rename + ); + } + } + let mut addr_vec = syms.into_values().collect_vec(); // weed out xidata and _RtlCheckStack symbols (jeff finds them later) @@ -266,5 +453,5 @@ pub fn try_parse_pdb( }; } - Ok(addr_vec) + Ok((module_names, splits_by_section, addr_vec)) } From 16fb65491c1a5079407990b851d553fae5a9afea Mon Sep 17 00:00:00 2001 From: Max <34987259+mparisi20@users.noreply.github.com> Date: Sat, 4 Apr 2026 23:08:10 -0400 Subject: [PATCH 2/2] Fix a couple of bugs in PDB splits - Check for and discard symbols in section 0. These symbols are either invalid or undefined. - Get rid of the 'pseudo-module' concept, as it is unnecessary. Units in splits.txt can simply contain multiple splits for the same section --- src/obj/splits.rs | 4 ++- src/util/xpdb.rs | 81 ++++++++++++----------------------------------- 2 files changed, 24 insertions(+), 61 deletions(-) diff --git a/src/obj/splits.rs b/src/obj/splits.rs index 005ff2e6..cbdcce06 100644 --- a/src/obj/splits.rs +++ b/src/obj/splits.rs @@ -121,10 +121,12 @@ impl ObjSplits { .map_err(|_| anyhow!("Multiple splits for unit {} with rename {:?}", unit, rename)) } - pub fn push(&mut self, address: u32, split: ObjSplit) { + /// Add the split, returning a mutable reference to it within the vector + pub fn push(&mut self, address: u32, split: ObjSplit) -> &mut ObjSplit { let out = self.splits.entry(address).or_default(); out.push(split); out.sort_by_key(|s| s.end); + out.last_mut().unwrap() } pub fn remove(&mut self, address: u32) -> Option> { self.splits.remove(&address) } diff --git a/src/util/xpdb.rs b/src/util/xpdb.rs index c7dc8b0a..283dde01 100644 --- a/src/util/xpdb.rs +++ b/src/util/xpdb.rs @@ -64,23 +64,6 @@ fn to_virtual_address( section_addr_to_virtual_addr(section_addrs, &to_section_addr(pdbmap, pdb_offs)) } -/// Section contributions from a given module are not guaranteed to form -/// a single, continuous block per group, as one might expect. This is the case -/// at least for the .xidata group. The solution implemented here is to add -/// "pseudo-modules" as needed to hold any additional, non-contiguous chunks. -/// In practice, this should only serve to handle a few unusual contribution -/// sequences in the XDK modules -struct PseudoModuleState { - /// Pointer into mod_indices - pub curr: i32, - /// Elements after the first are pseudo-modules - pub mod_indices: Vec, -} - -impl PseudoModuleState { - const UNSEEN: i32 = -1; -} - #[derive(Debug, PartialEq, PartialOrd, Eq, Ord)] struct CoffGroup { /// Starting address of the group @@ -168,6 +151,9 @@ pub fn try_parse_pdb( for symbol in all_syms_iter { match symbol.parse() { Ok(pdb2::SymbolData::Public(data)) => { + if data.offset.section == 0 { + continue; + } let symaddr = to_section_addr(&pdbmap, &data.offset); let obj_sym = syms.entry(symaddr).or_default(); @@ -189,6 +175,9 @@ pub fn try_parse_pdb( obj_sym.data_kind = ObjDataKind::Unknown; } Ok(pdb2::SymbolData::Data(data)) => { + if data.offset.section == 0 { + continue; + } let symaddr = to_section_addr(&pdbmap, &data.offset); let obj_sym = syms.entry(symaddr).or_default(); @@ -220,6 +209,9 @@ pub fn try_parse_pdb( // See https://docs.rs/pdb2/latest/pdb2/struct.ItemInformation.html } Ok(pdb2::SymbolData::ThreadStorage(data)) => { + if data.offset.section == 0 { + continue; + } let symaddr = to_section_addr(&pdbmap, &data.offset); let obj_sym = syms.entry(symaddr).or_default(); @@ -237,6 +229,9 @@ pub fn try_parse_pdb( // TODO: Above note for DATA records also applies here } Ok(pdb2::SymbolData::Procedure(data)) => { + if data.offset.section == 0 { + continue; + } let symaddr = to_section_addr(&pdbmap, &data.offset); let obj_sym = syms.entry(symaddr).or_default(); @@ -255,6 +250,9 @@ pub fn try_parse_pdb( } } Ok(pdb2::SymbolData::Thunk(data)) => { + if data.offset.section == 0 { + continue; + } let symaddr = to_section_addr(&pdbmap, &data.offset); let obj_sym = syms.entry(symaddr).or_default(); @@ -313,20 +311,14 @@ pub fn try_parse_pdb( let num_modules = dbi.modules()?.count().unwrap_or(0) as i32; - // The next available module index, to be incremented each time a new - // pseudo-module is created - let mut next_avail = num_modules; - let mut module_map: HashMap = HashMap::new(); let mut module_names: Vec = vec![]; for i in 0..num_modules { - module_map - .insert(i, PseudoModuleState { curr: PseudoModuleState::UNSEEN, mod_indices: vec![i] }); module_names.push(format!("module_{}.cpp", i)); } // curr_grp will increase monotonically, since contributions are sorted - let mut curr_grp = PseudoModuleState::UNSEEN; - let mut curr_mod = PseudoModuleState::UNSEEN; + let mut curr_grp = -1; + let mut curr_mod = -1; let mut curr_split: &mut ObjSplit = &mut Default::default(); let mut contribs = dbi.section_contributions()?; @@ -338,15 +330,11 @@ pub fn try_parse_pdb( let sec_idx = s_addr.section as usize; let start = section_addr_to_virtual_addr(section_addrs, &s_addr); let end = start + contrib.size as u64; - let mut mod_idx = contrib.module as i32; + let mod_idx = contrib.module as i32; let is_new_grp = start >= groups[(curr_grp + 1) as usize].address; let is_new_mod = mod_idx != curr_mod; if is_new_grp { - // Reset state - for key in module_map.iter_mut() { - key.1.curr = PseudoModuleState::UNSEEN; - } // Skip empty groups loop { curr_grp += 1; @@ -356,32 +344,9 @@ pub fn try_parse_pdb( } } - let ent = module_map.get_mut(&mod_idx).expect("Out-of-range module index"); if is_new_grp || is_new_mod { - // This increments to 0 the first time around per group, but - // if it increments again, we need a pseudo-module - ent.curr += 1; - if ent.curr >= ent.mod_indices.len() as i32 { - ent.mod_indices.push(next_avail); - module_names.push(format!( - "module_{}_part_{}.cpp", - ent.mod_indices[0], - ent.curr + 1 - )); - log::info!( - "Created pseudo-module #{}, named {}", - next_avail, - module_names[next_avail as usize] - ); - next_avail += 1; - assert!( - module_names.len() == next_avail as usize, - "name table size should track with module count" - ); - } curr_mod = mod_idx; - mod_idx = ent.mod_indices[ent.curr as usize]; let mod_name = &module_names[mod_idx as usize]; let rename = if groups[curr_grp as usize].name == section_addrs[sec_idx as u32].name { None @@ -389,7 +354,9 @@ pub fn try_parse_pdb( Some(groups[curr_grp as usize].name.clone()) }; - splits_by_section[sec_idx].push(start as u32, ObjSplit { + // Get a mutable reference to the ObjSplit we just pushed, so + // subsequent contributions to it can update its size + curr_split = splits_by_section[sec_idx].push(start as u32, ObjSplit { unit: mod_name.clone(), end: end as u32, align: None, @@ -398,12 +365,6 @@ pub fn try_parse_pdb( skip: false, rename: rename.clone(), }); - // Get a mutable reference to the ObjSplit we just pushed, so - // subsequent contributions to it can update its size - curr_split = splits_by_section[sec_idx] - .for_unit_rename(mod_name, rename.as_deref())? - .expect("Failed to get newly-created ObjSplit") - .1; } // FIXME: This currently requires detect_objects=false to work. // Deducing exact object sizes from the PDB should fix this