From 24618a8ef7b28fe74cad9733a34a879c5e8e5edd Mon Sep 17 00:00:00 2001
From: Max <34987259+mparisi20@users.noreply.github.com>
Date: Sat, 4 Apr 2026 15:32:17 -0400
Subject: [PATCH 1/2] Extract splits from PDB Section Contributions

- Upgrade pdb crate to pdb2 to enable parsing of
  S_SECTION and S_COFFGROUP symbol records
- Process the Section Contributions stream from the
  PDB to deduce ObjSplits for every ObjSection.
  As with map-based parsing, this uses the 'rename'
  attribute to disambiguate contributions to various
  COFF groups within the same section
---
 Cargo.lock        |  16 +--
 Cargo.toml        |   2 +-
 src/cmd/xex.rs    |  31 +++++-
 src/obj/splits.rs |  17 +++-
 src/util/xpdb.rs  | 247 ++++++++++++++++++++++++++++++++++++++++------
 5 files changed, 270 insertions(+), 43 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 6526f69a..6253b3d9 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -413,9 +413,9 @@ dependencies = [
 
 [[package]]
 name = "fallible-iterator"
-version = "0.2.0"
+version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7"
+checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
 
 [[package]]
 name = "fastrand"
@@ -671,7 +671,7 @@ dependencies = [
  "object 0.37.1",
  "once_cell",
  "owo-colors",
- "pdb",
+ "pdb2",
  "powerpc",
  "regex",
  "serde",
@@ -1068,10 +1068,10 @@ dependencies = [
 ]
 
 [[package]]
-name = "pdb"
-version = "0.8.0"
+name = "pdb2"
+version = "0.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "82040a392923abe6279c00ab4aff62d5250d1c8555dc780e4b02783a7aa74863"
+checksum = "408d6fa13d943ee4b76ffda52cc28e817df9c2c4b2c46bd9aec8bff574377e1a"
 dependencies = [
  "fallible-iterator",
  "scroll",
@@ -1356,9 +1356,9 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
 
 [[package]]
 name = "scroll"
-version = "0.11.0"
+version = "0.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04c565b551bafbef4157586fa379538366e4385d42082f255bfd96e4fe8519da"
+checksum = "6ab8598aa408498679922eff7fa985c25d58a90771bd6be794434c5277eab1a6"
 
 [[package]]
 name = "serde"
diff --git a/Cargo.toml b/Cargo.toml
index e8224a5d..8cb44a53 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -71,7 +71,7 @@ tracing-attributes = "0.1"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 xxhash-rust = { version = "0.8", features = ["xxh3"] }
 zerocopy = { version = "0.8", features = ["derive"] }
-pdb = "0.8.0"
+pdb2 = "0.10.1"
 lzxd = "0.2.6"
 
 [target.'cfg(target_env = "musl")'.dependencies]
diff --git a/src/cmd/xex.rs b/src/cmd/xex.rs
index 39ee8d43..ffc657ab 100644
--- a/src/cmd/xex.rs
+++ b/src/cmd/xex.rs
@@ -1,5 +1,5 @@
 use std::{
-    collections::BTreeMap,
+    collections::{BTreeMap, HashSet},
     fs::{self, DirBuilder, File},
     io::{BufWriter, Write},
     time::UNIX_EPOCH,
@@ -32,7 +32,7 @@ use crate::{
     },
     obj::{
         best_match_for_reloc, ObjInfo, ObjKind, ObjRelocKind, ObjSectionKind, ObjSections,
-        ObjSymbolKind, ObjSymbolScope, SectionIndex, SymbolIndex,
+        ObjSymbolKind, ObjSymbolScope, ObjUnit, SectionIndex, SymbolIndex,
     },
     util::{
         asm::write_asm,
@@ -472,7 +472,32 @@ fn load_analyze_xex(config: &ProjectConfig) -> Result<ExeAnalyzeResult> {
 
     if let Some(pdb_path) = &config.base.pdb {
         let pdb_path: Utf8NativePathBuf = pdb_path.with_encoding();
-        let pdb_syms = try_parse_pdb(&pdb_path, &obj.sections)?;
+        let (pdb_units, pdb_splits, pdb_syms) = try_parse_pdb(&pdb_path, &obj.sections)?;
+
+        // Apply all the splits
+        // FIXME: Don't add splits unconditionally here; it may conflict with
+        // user-provided splits. For now, users can comment out the pdb key
+        // in config.yml after initial analysis
+        for (i, splits_for_section) in pdb_splits.into_iter().enumerate() {
+            for (start, split) in splits_for_section.iter() {
+                obj.sections[i as u32].splits.push(start, split.clone());
+            }
+        }
+
+        // Apply all the units, discarding the ones with no splits
+        let mut nonempty_mods = HashSet::new();
+        for split in obj.sections.all_splits() {
+            nonempty_mods.insert(&split.3.unit);
+        }
+        for unit in pdb_units {
+            if nonempty_mods.contains(&unit) {
+                obj.link_order.push(ObjUnit { name: unit, autogenerated: false, order: None });
+            } else {
+                log::debug!("Module {} is empty", unit);
+            }
+        }
+
+        // Apply all the symbols
         for mut sym in pdb_syms.into_iter() {
             if !is_reg_intrinsic(&sym.name) && sym.name != "__NLG_Return" {
                 match obj.sections.at_address(sym.address as u32).ok() {
diff --git a/src/obj/splits.rs b/src/obj/splits.rs
index 9681d4af..005ff2e6 100644
--- a/src/obj/splits.rs
+++ b/src/obj/splits.rs
@@ -9,7 +9,7 @@ use crate::{
 };
 
 /// Marks a split point within a section.
-#[derive(Debug, Clone, Eq, PartialEq)]
+#[derive(Debug, Default, Clone, Eq, PartialEq)]
 pub struct ObjSplit {
     pub unit: String,
     pub end: u32,
@@ -106,6 +106,21 @@ impl ObjSplits {
             .map_err(|_| anyhow!("Multiple splits for unit {}", unit))
     }
 
+    /// Get the ObjSplit provided by unit with the specified rename,
+    /// if it exists
+    pub fn for_unit_rename(
+        &mut self,
+        unit: &str,
+        rename: Option<&str>,
+    ) -> Result<Option<(u32, &mut ObjSplit)>> {
+        self.splits
+            .iter_mut()
+            .flat_map(|(addr, v)| v.iter_mut().map(move |u| (*addr, u)))
+            .filter(|(_, split)| split.unit == unit && split.rename.as_deref() == rename)
+            .at_most_one()
+            .map_err(|_| anyhow!("Multiple splits for unit {} with rename {:?}", unit, rename))
+    }
+
     pub fn push(&mut self, address: u32, split: ObjSplit) {
         let out = self.splits.entry(address).or_default();
         out.push(split);
diff --git a/src/util/xpdb.rs b/src/util/xpdb.rs
index 1bb43ad0..c7dc8b0a 100644
--- a/src/util/xpdb.rs
+++ b/src/util/xpdb.rs
@@ -6,20 +6,20 @@ use std::{
 
 use anyhow::{ensure, Result};
 use itertools::Itertools;
-use pdb::{self, FallibleIterator};
+use pdb2::{self, FallibleIterator};
 use typed_path::Utf8NativePathBuf;
 
 use crate::{
     analysis::cfa::SectionAddress,
     obj::{
-        ObjDataKind, ObjSection, ObjSections, ObjSymbol, ObjSymbolFlagSet, ObjSymbolFlags,
-        ObjSymbolKind, ObjSymbolScope,
+        ObjDataKind, ObjSection, ObjSections, ObjSplit, ObjSplits, ObjSymbol, ObjSymbolFlagSet,
+        ObjSymbolFlags, ObjSymbolKind, ObjSymbolScope,
     },
 };
 
 /// This map is only used to give descriptive names to the SymbolKinds that
 /// the pdb crate cannot parse; it doesn't need to be exhaustive.
-fn sym_kind_name(kind: pdb::SymbolKind) -> &'static str {
+fn sym_kind_name(kind: pdb2::SymbolKind) -> &'static str {
     match kind {
         0x1012 => "S_FRAMEPROC",
         0x1136 => "S_SECTION",
@@ -28,7 +28,7 @@ fn sym_kind_name(kind: pdb::SymbolKind) -> &'static str {
     }
 }
 
-fn warn_unsupported_sym_kind(sym: &pdb::Symbol, set: &mut HashSet<pdb::SymbolKind>) {
+fn warn_unsupported_sym_kind(sym: &pdb2::Symbol, set: &mut HashSet<pdb2::SymbolKind>) {
     if set.insert(sym.raw_kind()) {
         log::warn!(
             "Unsupported symbol kind: {} (0x{:X})",
@@ -40,8 +40,8 @@ fn warn_unsupported_sym_kind(sym: &pdb::Symbol, set: &mut HashSet<pdb::SymbolKin
 
 /// Convert to jeff's SectionAddress type
 fn to_section_addr(
-    pdbmap: &pdb::AddressMap,
-    pdb_offs: &pdb::PdbInternalSectionOffset,
+    pdbmap: &pdb2::AddressMap,
+    pdb_offs: &pdb2::PdbInternalSectionOffset,
 ) -> SectionAddress {
     let s_addr = pdb_offs.to_section_offset(pdbmap).unwrap_or_default();
     SectionAddress {
@@ -51,21 +51,54 @@ fn to_section_addr(
     }
 }
 
+fn section_addr_to_virtual_addr(section_addrs: &ObjSections, s_addr: &SectionAddress) -> u64 {
+    let sect_base = section_addrs.get(s_addr.section).unwrap_or(&ObjSection::default()).address;
+    s_addr.address as u64 + sect_base
+}
+
 fn to_virtual_address(
-    pdbmap: &pdb::AddressMap,
+    pdbmap: &pdb2::AddressMap,
     section_addrs: &ObjSections,
-    pdb_offs: &pdb::PdbInternalSectionOffset,
-) -> Result<u64> {
-    let s_addr = to_section_addr(pdbmap, pdb_offs);
-    let sect_base = section_addrs.get(s_addr.section).unwrap_or(&ObjSection::default()).address;
-    Ok(s_addr.address as u64 + sect_base)
+    pdb_offs: &pdb2::PdbInternalSectionOffset,
+) -> u64 {
+    section_addr_to_virtual_addr(section_addrs, &to_section_addr(pdbmap, pdb_offs))
+}
+
+/// Section contributions from a given module are not guaranteed to form
+/// a single, continuous block per group, as one might expect. This is the case
+/// at least for the .xidata group. The solution implemented here is to add
+/// "pseudo-modules" as needed to hold any additional, non-contiguous chunks.
+/// In practice, this should only serve to handle a few unusual contribution
+/// sequences in the XDK modules
+struct PseudoModuleState {
+    /// Pointer into mod_indices
+    pub curr: i32,
+    /// Elements after the first are pseudo-modules
+    pub mod_indices: Vec<i32>,
+}
+
+impl PseudoModuleState {
+    const UNSEEN: i32 = -1;
+}
+
+#[derive(Debug, PartialEq, PartialOrd, Eq, Ord)]
+struct CoffGroup {
+    /// Starting address of the group
+    pub address: u64,
+    /// jeff section number
+    pub section: u32,
+    /// Group size in bytes
+    pub size: u32,
+    /// Full COFF group name
+    pub name: String,
 }
 
+/// Extract translation units, splits, and symbols from a PDB
 pub fn try_parse_pdb(
     path: &Utf8NativePathBuf,
     section_addrs: &ObjSections,
-) -> Result<Vec<ObjSymbol>> {
-    let mut dbfile = pdb::PDB::open(File::open(path)?)?;
+) -> Result<(Vec<String>, Vec<ObjSplits>, Vec<ObjSymbol>)> {
+    let mut dbfile = pdb2::PDB::open(File::open(path)?)?;
 
     // Ensure pdb sections match the exe sections and that all the names match
     {
@@ -104,8 +137,10 @@ pub fn try_parse_pdb(
     let mut syms: BTreeMap<SectionAddress, ObjSymbol> = BTreeMap::new();
 
     let dbi = dbfile.debug_information()?;
+
+    // Parse symbols
     let global_symtable = dbfile.global_symbols()?;
-    let mut all_syms: Vec<pdb::Symbol> = vec![];
+    let mut all_syms: Vec<pdb2::Symbol> = vec![];
 
     // Collect Global and Module symbol streams into one combined iterator
     let mut global_syms = global_symtable.iter();
@@ -128,10 +163,11 @@ pub fn try_parse_pdb(
     }
 
     let all_syms_iter = all_syms.into_iter();
+    let mut groups: Vec<CoffGroup> = vec![];
     let mut ldata_dupes: HashMap<String, u32> = HashMap::new();
     for symbol in all_syms_iter {
         match symbol.parse() {
-            Ok(pdb::SymbolData::Public(data)) => {
+            Ok(pdb2::SymbolData::Public(data)) => {
                 let symaddr = to_section_addr(&pdbmap, &data.offset);
                 let obj_sym = syms.entry(symaddr).or_default();
 
@@ -145,14 +181,14 @@ pub fn try_parse_pdb(
                 // TODO: Not all S_PUB32 records represent functions or objects;
                 // Some may just be labels, which can be skipped
                 obj_sym.name = data.name.to_string().into();
-                obj_sym.address = to_virtual_address(&pdbmap, section_addrs, &data.offset)?;
+                obj_sym.address = to_virtual_address(&pdbmap, section_addrs, &data.offset);
                 obj_sym.section = Some(symaddr.section);
                 obj_sym.flags = ObjSymbolFlagSet(ObjSymbolFlags::Global.into());
                 obj_sym.kind =
                     if data.function { ObjSymbolKind::Function } else { ObjSymbolKind::Object };
                 obj_sym.data_kind = ObjDataKind::Unknown;
             }
-            Ok(pdb::SymbolData::Data(data)) => {
+            Ok(pdb2::SymbolData::Data(data)) => {
                 let symaddr = to_section_addr(&pdbmap, &data.offset);
                 let obj_sym = syms.entry(symaddr).or_default();
 
@@ -162,6 +198,9 @@ pub fn try_parse_pdb(
                 } else {
                     obj_sym.flags.set_scope(ObjSymbolScope::Local);
                     obj_sym.kind = ObjSymbolKind::Object;
+                    // TODO: Now that we extract object files and splits, we can
+                    // update this renaming so it is only done for repeat
+                    // names of symbols in the same file
                     let name = data.name.to_string().clone();
                     let c =
                         *ldata_dupes.entry(name.to_string()).and_modify(|c| *c += 1).or_insert(1);
@@ -171,16 +210,16 @@ pub fn try_parse_pdb(
                         data.name.to_string().into()
                     };
                     obj_sym.name = name;
-                    obj_sym.address = to_virtual_address(&pdbmap, section_addrs, &data.offset)?;
+                    obj_sym.address = to_virtual_address(&pdbmap, section_addrs, &data.offset);
                     obj_sym.section = Some(symaddr.section);
                 }
                 // TODO: We can also deduce the size by using the type
                 // field to index into the TPI.
                 // Build a TypeFinder, then use it to compute object sizes
                 // while iterating through the data symbols.
-                // See https://docs.rs/pdb/latest/pdb/struct.ItemInformation.html
+                // See https://docs.rs/pdb2/latest/pdb2/struct.ItemInformation.html
             }
-            Ok(pdb::SymbolData::ThreadStorage(data)) => {
+            Ok(pdb2::SymbolData::ThreadStorage(data)) => {
                 let symaddr = to_section_addr(&pdbmap, &data.offset);
                 let obj_sym = syms.entry(symaddr).or_default();
 
@@ -191,13 +230,13 @@ pub fn try_parse_pdb(
                     obj_sym.flags.set_scope(ObjSymbolScope::Local);
                     obj_sym.kind = ObjSymbolKind::Object;
                     obj_sym.name = data.name.to_string().into();
-                    obj_sym.address = to_virtual_address(&pdbmap, section_addrs, &data.offset)?;
+                    obj_sym.address = to_virtual_address(&pdbmap, section_addrs, &data.offset);
                     obj_sym.section = Some(symaddr.section);
                 }
 
                 // TODO: Above note for DATA records also applies here
             }
-            Ok(pdb::SymbolData::Procedure(data)) => {
+            Ok(pdb2::SymbolData::Procedure(data)) => {
                 let symaddr = to_section_addr(&pdbmap, &data.offset);
                 let obj_sym = syms.entry(symaddr).or_default();
 
@@ -211,11 +250,11 @@ pub fn try_parse_pdb(
                     obj_sym.flags.set_scope(ObjSymbolScope::Local);
                     obj_sym.kind = ObjSymbolKind::Function;
                     obj_sym.name = data.name.to_string().into();
-                    obj_sym.address = to_virtual_address(&pdbmap, section_addrs, &data.offset)?;
+                    obj_sym.address = to_virtual_address(&pdbmap, section_addrs, &data.offset);
                     obj_sym.section = Some(symaddr.section);
                 }
             }
-            Ok(pdb::SymbolData::Thunk(data)) => {
+            Ok(pdb2::SymbolData::Thunk(data)) => {
                 let symaddr = to_section_addr(&pdbmap, &data.offset);
                 let obj_sym = syms.entry(symaddr).or_default();
 
@@ -224,10 +263,19 @@ pub fn try_parse_pdb(
                 obj_sym.size_known = true;
                 obj_sym.align = Some(4);
             }
-            // TODO: S_SECTION and S_COFFGROUP records are also useful,
-            // but pdb 0.8.0 apparently can't parse them
+            Ok(pdb2::SymbolData::CoffGroup(data)) => groups.push(CoffGroup {
+                address: to_virtual_address(&pdbmap, section_addrs, &data.offset),
+                size: data.cb,
+                name: data.name.to_string().into(),
+                section: to_section_addr(&pdbmap, &data.offset).section,
+            }),
+            Ok(pdb2::SymbolData::Section(_data)) => {
+                // TODO: We already have most section info from the EXE, but
+                // S_SECTION records contain the unabbreviated section names,
+                // which serve as an alternative solution for .embsec_ issues
+            }
             Ok(_) => {}
-            Err(pdb::Error::UnimplementedSymbolKind(_)) => {
+            Err(pdb2::Error::UnimplementedSymbolKind(_)) => {
                 warn_unsupported_sym_kind(&symbol, &mut unsupported_sym_kinds);
             }
             Err(parse_error) => {
@@ -236,6 +284,145 @@ pub fn try_parse_pdb(
         }
     }
 
+    // Sort by address and append a sentinel
+    groups.sort();
+    groups.push(CoffGroup {
+        address: groups[groups.len() - 1].address + groups[groups.len() - 1].size as u64,
+        size: 0,
+        name: "END".to_string(),
+        section: u32::MAX,
+    });
+    log::debug!("COFF Sections");
+    for sec in section_addrs.iter() {
+        log::debug!("#{}: name = {}, addr = 0x{:X}", sec.0, sec.1.name, sec.1.address);
+    }
+    log::debug!("COFF Groups:");
+    for grp in groups.iter() {
+        log::debug!(
+            "address: 0x{:X}, section: {}, size: 0x{:X}, name: {}",
+            grp.address,
+            grp.section,
+            grp.size,
+            grp.name
+        );
+    }
+
+    // Begin parsing splits
+    let mut splits_by_section: Vec<ObjSplits> = vec![];
+    splits_by_section.resize_with(section_addrs.len() as usize, Default::default);
+
+    let num_modules = dbi.modules()?.count().unwrap_or(0) as i32;
+
+    // The next available module index, to be incremented each time a new
+    // pseudo-module is created
+    let mut next_avail = num_modules;
+    let mut module_map: HashMap<i32, PseudoModuleState> = HashMap::new();
+    let mut module_names: Vec<String> = vec![];
+    for i in 0..num_modules {
+        module_map
+            .insert(i, PseudoModuleState { curr: PseudoModuleState::UNSEEN, mod_indices: vec![i] });
+        module_names.push(format!("module_{}.cpp", i));
+    }
+
+    // curr_grp will increase monotonically, since contributions are sorted
+    let mut curr_grp = PseudoModuleState::UNSEEN;
+    let mut curr_mod = PseudoModuleState::UNSEEN;
+    let mut curr_split: &mut ObjSplit = &mut Default::default();
+
+    let mut contribs = dbi.section_contributions()?;
+    while let Some(contrib) = contribs.next()? {
+        // TODO: Extract file names from the Sources substream to replace the
+        // auto-generated names. Take only the base name, fix the extension,
+        // and disambiguate identical names with a prefix
+        let s_addr = to_section_addr(&pdbmap, &contrib.offset);
+        let sec_idx = s_addr.section as usize;
+        let start = section_addr_to_virtual_addr(section_addrs, &s_addr);
+        let end = start + contrib.size as u64;
+        let mut mod_idx = contrib.module as i32;
+
+        let is_new_grp = start >= groups[(curr_grp + 1) as usize].address;
+        let is_new_mod = mod_idx != curr_mod;
+        if is_new_grp {
+            // Reset state
+            for key in module_map.iter_mut() {
+                key.1.curr = PseudoModuleState::UNSEEN;
+            }
+            // Skip empty groups
+            loop {
+                curr_grp += 1;
+                if start < groups[(curr_grp + 1) as usize].address {
+                    break;
+                }
+            }
+        }
+
+        let ent = module_map.get_mut(&mod_idx).expect("Out-of-range module index");
+        if is_new_grp || is_new_mod {
+            // This increments to 0 the first time around per group, but
+            // if it increments again, we need a pseudo-module
+            ent.curr += 1;
+            if ent.curr >= ent.mod_indices.len() as i32 {
+                ent.mod_indices.push(next_avail);
+                module_names.push(format!(
+                    "module_{}_part_{}.cpp",
+                    ent.mod_indices[0],
+                    ent.curr + 1
+                ));
+                log::info!(
+                    "Created pseudo-module #{}, named {}",
+                    next_avail,
+                    module_names[next_avail as usize]
+                );
+                next_avail += 1;
+                assert!(
+                    module_names.len() == next_avail as usize,
+                    "name table size should track with module count"
+                );
+            }
+            curr_mod = mod_idx;
+
+            mod_idx = ent.mod_indices[ent.curr as usize];
+            let mod_name = &module_names[mod_idx as usize];
+            let rename = if groups[curr_grp as usize].name == section_addrs[sec_idx as u32].name {
+                None
+            } else {
+                Some(groups[curr_grp as usize].name.clone())
+            };
+
+            splits_by_section[sec_idx].push(start as u32, ObjSplit {
+                unit: mod_name.clone(),
+                end: end as u32,
+                align: None,
+                autogenerated: false,
+                common: false,
+                skip: false,
+                rename: rename.clone(),
+            });
+            // Get a mutable reference to the ObjSplit we just pushed, so
+            // subsequent contributions to it can update its size
+            curr_split = splits_by_section[sec_idx]
+                .for_unit_rename(mod_name, rename.as_deref())?
+                .expect("Failed to get newly-created ObjSplit")
+                .1;
+        }
+        // FIXME: This currently requires detect_objects=false to work.
+        // Deducing exact object sizes from the PDB should fix this
+        curr_split.end = end as u32;
+    }
+
+    for (i, splits) in splits_by_section.iter().enumerate() {
+        log::debug!("Splits for section {}:", i);
+        for split in splits.iter() {
+            log::debug!(
+                "From {}: 0x{:X} - 0x{:X}, rename {:?}",
+                split.1.unit,
+                split.0,
+                split.1.end,
+                split.1.rename
+            );
+        }
+    }
+
     let mut addr_vec = syms.into_values().collect_vec();
 
     // weed out xidata and _RtlCheckStack symbols (jeff finds them later)
@@ -266,5 +453,5 @@ pub fn try_parse_pdb(
         };
     }
 
-    Ok(addr_vec)
+    Ok((module_names, splits_by_section, addr_vec))
 }

From 16fb65491c1a5079407990b851d553fae5a9afea Mon Sep 17 00:00:00 2001
From: Max <34987259+mparisi20@users.noreply.github.com>
Date: Sat, 4 Apr 2026 23:08:10 -0400
Subject: [PATCH 2/2] Fix a couple of bugs in PDB splits

- Check for and discard symbols in section 0.
  These symbols are either invalid or undefined.
- Get rid of the 'pseudo-module' concept, as it
  is unnecessary. Units in splits.txt can simply
  contain multiple splits for the same section
---
 src/obj/splits.rs |  4 ++-
 src/util/xpdb.rs  | 81 ++++++++++++-----------------------------------
 2 files changed, 24 insertions(+), 61 deletions(-)

diff --git a/src/obj/splits.rs b/src/obj/splits.rs
index 005ff2e6..cbdcce06 100644
--- a/src/obj/splits.rs
+++ b/src/obj/splits.rs
@@ -121,10 +121,12 @@ impl ObjSplits {
             .map_err(|_| anyhow!("Multiple splits for unit {} with rename {:?}", unit, rename))
     }
 
-    pub fn push(&mut self, address: u32, split: ObjSplit) {
+    /// Add the split, returning a mutable reference to it within the vector
+    pub fn push(&mut self, address: u32, split: ObjSplit) -> &mut ObjSplit {
         let out = self.splits.entry(address).or_default();
         out.push(split);
         out.sort_by_key(|s| s.end);
+        out.last_mut().unwrap()
     }
 
     pub fn remove(&mut self, address: u32) -> Option<Vec<ObjSplit>> { self.splits.remove(&address) }
diff --git a/src/util/xpdb.rs b/src/util/xpdb.rs
index c7dc8b0a..283dde01 100644
--- a/src/util/xpdb.rs
+++ b/src/util/xpdb.rs
@@ -64,23 +64,6 @@ fn to_virtual_address(
     section_addr_to_virtual_addr(section_addrs, &to_section_addr(pdbmap, pdb_offs))
 }
 
-/// Section contributions from a given module are not guaranteed to form
-/// a single, continuous block per group, as one might expect. This is the case
-/// at least for the .xidata group. The solution implemented here is to add
-/// "pseudo-modules" as needed to hold any additional, non-contiguous chunks.
-/// In practice, this should only serve to handle a few unusual contribution
-/// sequences in the XDK modules
-struct PseudoModuleState {
-    /// Pointer into mod_indices
-    pub curr: i32,
-    /// Elements after the first are pseudo-modules
-    pub mod_indices: Vec<i32>,
-}
-
-impl PseudoModuleState {
-    const UNSEEN: i32 = -1;
-}
-
 #[derive(Debug, PartialEq, PartialOrd, Eq, Ord)]
 struct CoffGroup {
     /// Starting address of the group
@@ -168,6 +151,9 @@ pub fn try_parse_pdb(
     for symbol in all_syms_iter {
         match symbol.parse() {
             Ok(pdb2::SymbolData::Public(data)) => {
+                if data.offset.section == 0 {
+                    continue;
+                }
                 let symaddr = to_section_addr(&pdbmap, &data.offset);
                 let obj_sym = syms.entry(symaddr).or_default();
 
@@ -189,6 +175,9 @@ pub fn try_parse_pdb(
                 obj_sym.data_kind = ObjDataKind::Unknown;
             }
             Ok(pdb2::SymbolData::Data(data)) => {
+                if data.offset.section == 0 {
+                    continue;
+                }
                 let symaddr = to_section_addr(&pdbmap, &data.offset);
                 let obj_sym = syms.entry(symaddr).or_default();
 
@@ -220,6 +209,9 @@ pub fn try_parse_pdb(
                 // See https://docs.rs/pdb2/latest/pdb2/struct.ItemInformation.html
             }
             Ok(pdb2::SymbolData::ThreadStorage(data)) => {
+                if data.offset.section == 0 {
+                    continue;
+                }
                 let symaddr = to_section_addr(&pdbmap, &data.offset);
                 let obj_sym = syms.entry(symaddr).or_default();
 
@@ -237,6 +229,9 @@ pub fn try_parse_pdb(
                 // TODO: Above note for DATA records also applies here
             }
             Ok(pdb2::SymbolData::Procedure(data)) => {
+                if data.offset.section == 0 {
+                    continue;
+                }
                 let symaddr = to_section_addr(&pdbmap, &data.offset);
                 let obj_sym = syms.entry(symaddr).or_default();
 
@@ -255,6 +250,9 @@ pub fn try_parse_pdb(
                 }
             }
             Ok(pdb2::SymbolData::Thunk(data)) => {
+                if data.offset.section == 0 {
+                    continue;
+                }
                 let symaddr = to_section_addr(&pdbmap, &data.offset);
                 let obj_sym = syms.entry(symaddr).or_default();
 
@@ -313,20 +311,14 @@ pub fn try_parse_pdb(
 
     let num_modules = dbi.modules()?.count().unwrap_or(0) as i32;
 
-    // The next available module index, to be incremented each time a new
-    // pseudo-module is created
-    let mut next_avail = num_modules;
-    let mut module_map: HashMap<i32, PseudoModuleState> = HashMap::new();
     let mut module_names: Vec<String> = vec![];
     for i in 0..num_modules {
-        module_map
-            .insert(i, PseudoModuleState { curr: PseudoModuleState::UNSEEN, mod_indices: vec![i] });
         module_names.push(format!("module_{}.cpp", i));
     }
 
     // curr_grp will increase monotonically, since contributions are sorted
-    let mut curr_grp = PseudoModuleState::UNSEEN;
-    let mut curr_mod = PseudoModuleState::UNSEEN;
+    let mut curr_grp = -1;
+    let mut curr_mod = -1;
     let mut curr_split: &mut ObjSplit = &mut Default::default();
 
     let mut contribs = dbi.section_contributions()?;
@@ -338,15 +330,11 @@ pub fn try_parse_pdb(
         let sec_idx = s_addr.section as usize;
         let start = section_addr_to_virtual_addr(section_addrs, &s_addr);
         let end = start + contrib.size as u64;
-        let mut mod_idx = contrib.module as i32;
+        let mod_idx = contrib.module as i32;
 
         let is_new_grp = start >= groups[(curr_grp + 1) as usize].address;
         let is_new_mod = mod_idx != curr_mod;
         if is_new_grp {
-            // Reset state
-            for key in module_map.iter_mut() {
-                key.1.curr = PseudoModuleState::UNSEEN;
-            }
             // Skip empty groups
             loop {
                 curr_grp += 1;
@@ -356,32 +344,9 @@ pub fn try_parse_pdb(
             }
         }
 
-        let ent = module_map.get_mut(&mod_idx).expect("Out-of-range module index");
         if is_new_grp || is_new_mod {
-            // This increments to 0 the first time around per group, but
-            // if it increments again, we need a pseudo-module
-            ent.curr += 1;
-            if ent.curr >= ent.mod_indices.len() as i32 {
-                ent.mod_indices.push(next_avail);
-                module_names.push(format!(
-                    "module_{}_part_{}.cpp",
-                    ent.mod_indices[0],
-                    ent.curr + 1
-                ));
-                log::info!(
-                    "Created pseudo-module #{}, named {}",
-                    next_avail,
-                    module_names[next_avail as usize]
-                );
-                next_avail += 1;
-                assert!(
-                    module_names.len() == next_avail as usize,
-                    "name table size should track with module count"
-                );
-            }
             curr_mod = mod_idx;
 
-            mod_idx = ent.mod_indices[ent.curr as usize];
             let mod_name = &module_names[mod_idx as usize];
             let rename = if groups[curr_grp as usize].name == section_addrs[sec_idx as u32].name {
                 None
@@ -389,7 +354,9 @@ pub fn try_parse_pdb(
                 Some(groups[curr_grp as usize].name.clone())
             };
 
-            splits_by_section[sec_idx].push(start as u32, ObjSplit {
+            // Get a mutable reference to the ObjSplit we just pushed, so
+            // subsequent contributions to it can update its size
+            curr_split = splits_by_section[sec_idx].push(start as u32, ObjSplit {
                 unit: mod_name.clone(),
                 end: end as u32,
                 align: None,
@@ -398,12 +365,6 @@ pub fn try_parse_pdb(
                 skip: false,
                 rename: rename.clone(),
             });
-            // Get a mutable reference to the ObjSplit we just pushed, so
-            // subsequent contributions to it can update its size
-            curr_split = splits_by_section[sec_idx]
-                .for_unit_rename(mod_name, rename.as_deref())?
-                .expect("Failed to get newly-created ObjSplit")
-                .1;
         }
         // FIXME: This currently requires detect_objects=false to work.
         // Deducing exact object sizes from the PDB should fix this