From 3c401d84cd83ad3318a9914a714d87afeca1d763 Mon Sep 17 00:00:00 2001
From: Max <34987259+mparisi20@users.noreply.github.com>
Date: Sun, 12 Apr 2026 02:13:18 -0400
Subject: [PATCH 1/3] Extract object size information from the PDB

- When possible, deduce the size of an object symbol
  from the type information associated with it
- Deduce the sizes of floats, doubles, strings,
  and wide strings from the decorated names of
  their symbols
---
 src/util/xpdb.rs | 159 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 144 insertions(+), 15 deletions(-)
diff --git a/src/util/xpdb.rs b/src/util/xpdb.rs
index 16b8df80..fa31093e 100644
--- a/src/util/xpdb.rs
+++ b/src/util/xpdb.rs
@@ -6,7 +6,7 @@ use std::{
 
 use anyhow::{ensure, Result};
 use itertools::Itertools;
-use pdb2::{self, FallibleIterator};
+use pdb2::{self, FallibleIterator, Indirection, PrimitiveKind, TypeFinder, TypeIndex};
 use typed_path::Utf8NativePathBuf;
 
 use crate::{
@@ -52,6 +52,132 @@ fn to_section_addr(
     SectionAddress { section: jeff_sect, address: sect_base as u32 + sect_offs.offset }
 }
 
+/// Attempt to deduce the size of the type referenced by index using the
+/// TypeFinder
+fn lookup_type_size(ty_finder: &TypeFinder, index: TypeIndex) -> Result<u64> {
+    match ty_finder.find(index)?.parse()? {
+        pdb2::TypeData::Array(data) => {
+            // Note: the last "dimension" is the total size in bytes of the
+            // array. See the documentation for the dimensions field
+            Ok(data.dimensions[data.dimensions.len() - 1].into())
+        }
+        pdb2::TypeData::Class(data) => Ok(data.size),
+        pdb2::TypeData::Enumeration(data) => lookup_type_size(ty_finder, data.underlying_type),
+        pdb2::TypeData::Modifier(data) => lookup_type_size(ty_finder, data.underlying_type),
+        pdb2::TypeData::Pointer(_) => Ok(4),
+        pdb2::TypeData::Union(data) => Ok(data.size),
+        pdb2::TypeData::Primitive(data) => {
+            // Check if this is a pointer to a primitive type
+            match data.indirection {
+                Some(Indirection::Near32) => {
+                    return Ok(4);
+                }
+                Some(Indirection::Near64) => {
+                    return Ok(8);
+                }
+                None => {}
+                _ => {
+                    return Err(anyhow::anyhow!(format!(
+                        "Unsupported pointer kind {:?} for index #{}",
+                        data.indirection, index.0
+                    )));
+                }
+            }
+            match data.kind {
+                PrimitiveKind::Char
+                | PrimitiveKind::RChar
+                | PrimitiveKind::UChar
+                | PrimitiveKind::I8
+                | PrimitiveKind::U8
+                | PrimitiveKind::Bool8 => Ok(1),
+                PrimitiveKind::WChar
+                | PrimitiveKind::RChar16
+                | PrimitiveKind::Short
+                | PrimitiveKind::UShort
+                | PrimitiveKind::I16
+                | PrimitiveKind::U16
+                | PrimitiveKind::F16
+                | PrimitiveKind::Bool16 => Ok(2),
+                PrimitiveKind::RChar32
+                | PrimitiveKind::Long
+                | PrimitiveKind::ULong
+                | PrimitiveKind::I32
+                | PrimitiveKind::U32
+                | PrimitiveKind::F32
+                | PrimitiveKind::Complex32
+                | PrimitiveKind::Bool32 => Ok(4),
+                PrimitiveKind::Quad
+                | PrimitiveKind::UQuad
+                | PrimitiveKind::I64
+                | PrimitiveKind::U64
+                | PrimitiveKind::F64
+                | PrimitiveKind::Complex64
+                | PrimitiveKind::Bool64 => Ok(8),
+                PrimitiveKind::Octa
+                | PrimitiveKind::UOcta
+                | PrimitiveKind::I128
+                | PrimitiveKind::U128
+                | PrimitiveKind::F128
+                | PrimitiveKind::Complex128 => Ok(16),
+                _ => Err(anyhow::anyhow!(format!(
+                    "Unsupported PrimitiveKind {:?} for index #{}",
+                    data, index.0
+                ))),
+            }
+        }
+        _ => Err(anyhow::anyhow!(format!("Unrecognized type record for index 0x{:X}", index.0))),
+    }
+}
+
+/// Try to set the size of the object symbol according to its type
+fn set_obj_size_by_type(obj_sym: &mut ObjSymbol, ty_finder: &TypeFinder, index: TypeIndex) {
+    match lookup_type_size(ty_finder, index) {
+        Ok(ty_size) => {
+            obj_sym.size = ty_size;
+            obj_sym.size_known = true;
+        }
+        Err(err) => {
+            log::warn!("Object size lookup failed for {}: {:?}", obj_sym.name, err);
+        }
+    }
+}
+
+/// Try to set the size of the object symbol according to its name
+fn set_obj_size_by_name(obj_sym: &mut ObjSymbol, name: &str) {
+    if name.starts_with("??_C@_0") || name.starts_with("??_C@_1") {
+        // This is a string or a wide string. In either case, the size
+        // is encoded into the symbol name itself.
+        obj_sym.data_kind = ObjDataKind::String;
+        let ptr = &mut name["??_C@_0".len()..].chars();
+        let mut str_size = 0;
+        for ch in ptr.by_ref() {
+            if ch.is_ascii_digit() {
+                str_size = ch.to_digit(10).unwrap() as u64 + 1;
+                break;
+            } else if ('A'..='P').contains(&ch) {
+                str_size = str_size * 16 + (ch as u8 - b'A') as u64;
+            } else {
+                assert!(
+                    ch == '@',
+                    "Expected '@'-terminator while parsing length of string constant"
+                );
+                break;
+            }
+        }
+        obj_sym.size = str_size;
+        obj_sym.size_known = true;
+    } else if name.starts_with("__real@") {
+        if name.len() == "__real@00000000".len() {
+            obj_sym.data_kind = ObjDataKind::Float;
+            obj_sym.size = 4;
+        } else if name.len() == "__real@0000000000000000".len() {
+            obj_sym.data_kind = ObjDataKind::Double;
+            obj_sym.size = 8;
+        }
+        obj_sym.size_known = true;
+    }
+}
+
 #[derive(Debug, PartialEq, PartialOrd, Eq, Ord)]
 struct CoffGroup {
     /// Starting address of the group
@@ -116,6 +242,16 @@ pub fn try_parse_pdb(
         }
     }
 
+    // Index all of the type records from the TPI; type data will be parsed
+    // as needed to deduce the sizes of object symbols
+    let tpi = dbfile.type_information()?;
+    let mut ty_finder = tpi.finder();
+    let mut ty_iter = tpi.iter();
+
+    while (ty_iter.next()?).is_some() {
+        ty_finder.update(&ty_iter);
+    }
+
     let pdbmap = dbfile.address_map()?;
     let mut unsupported_sym_kinds = HashSet::new();
     let mut syms: BTreeMap<SectionAddress, ObjSymbol> = BTreeMap::new();
@@ -165,15 +301,16 @@ pub fn try_parse_pdb(
                 // TODO: handle code/data merging properly, instead of
                 // overwriting the name
 
-                // TODO: Not all S_PUB32 records represent functions or objects;
-                // Some may just be labels, which can be skipped
-                obj_sym.name = data.name.to_string().into();
                 obj_sym.address = symaddr.address.into();
                 obj_sym.section = Some(symaddr.section);
                 obj_sym.flags = ObjSymbolFlagSet(ObjSymbolFlags::Global.into());
                 obj_sym.kind =
                     if data.function { ObjSymbolKind::Function } else { ObjSymbolKind::Object };
                 obj_sym.data_kind = ObjDataKind::Unknown;
+
+                let name: String = data.name.to_string().into();
+                set_obj_size_by_name(obj_sym, &name);
+                obj_sym.name = name;
             }
             Ok(pdb2::SymbolData::Data(data)) => {
                 if data.offset.section == 0 {
@@ -192,11 +329,7 @@ pub fn try_parse_pdb(
                     obj_sym.address = symaddr.address.into();
                     obj_sym.section = Some(symaddr.section);
                 }
-                // TODO: We can also deduce the size by using the type
-                // field to index into the TPI.
-                // Build a TypeFinder, then use it to compute object sizes
-                // while iterating through the data symbols.
-                // See https://docs.rs/pdb2/latest/pdb2/struct.ItemInformation.html
+                set_obj_size_by_type(obj_sym, &ty_finder, data.type_index);
             }
             Ok(pdb2::SymbolData::ThreadStorage(data)) => {
                 if data.offset.section == 0 {
@@ -215,8 +348,7 @@ pub fn try_parse_pdb(
                     obj_sym.address = symaddr.address.into();
                     obj_sym.section = Some(symaddr.section);
                 }
-
-                // TODO: Above note for DATA records also applies here
+                set_obj_size_by_type(obj_sym, &ty_finder, data.type_index);
             }
             Ok(pdb2::SymbolData::Procedure(data)) => {
                 if data.offset.section == 0 {
@@ -249,7 +381,7 @@ pub fn try_parse_pdb(
                 // This is an S_THUNK32 record
                 obj_sym.size = data.len as u64;
                 obj_sym.size_known = true;
-                obj_sym.align = Some(4);
+                obj_sym.align = Some(8);
             }
             Ok(pdb2::SymbolData::Label(data)) => {
                 if data.offset.section == 0 {
@@ -372,9 +504,6 @@ pub fn try_parse_pdb(
 
     let mut contribs = dbi.section_contributions()?;
     while let Some(contrib) = contribs.next()? {
-        // TODO: Extract file names from the Sources substream to replace the
-        // auto-generated names. Take only the base name, fix the extension,
-        // and disambiguate identical names with a prefix
         let s_addr = to_section_addr(&pdbmap, section_addrs, &contrib.offset);
         let sec_idx = s_addr.section as usize;
         let start: u64 = s_addr.address.into();

From b0013bcf6e16ccde6b8320617b1468194a72703f Mon Sep 17 00:00:00 2001
From: Max <34987259+mparisi20@users.noreply.github.com>
Date: Sun, 12 Apr 2026 12:35:56 -0400
Subject: [PATCH 2/3] Add use_pdb_types config key

- Projects can add use_pdb_types: false to their
  config.ymls to avoid TPI parsing, e.g. if the
  type data in their PDB does not parse correctly
---
 src/cmd/dol.rs   |  3 +++
 src/cmd/xex.rs   |  4 ++--
 src/util/xpdb.rs | 31 ++++++++++++++++++++-----------
 3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/src/cmd/dol.rs b/src/cmd/dol.rs
index 531cba79..2461d18b 100644
--- a/src/cmd/dol.rs
+++ b/src/cmd/dol.rs
@@ -79,6 +79,8 @@ pub struct ProjectConfig {
     #[serde(default = "bool_true", skip_serializing_if = "is_true")]
     pub detect_strings: bool,
     #[serde(default = "bool_true", skip_serializing_if = "is_true")]
+    pub use_pdb_types: bool,
+    #[serde(default = "bool_true", skip_serializing_if = "is_true")]
     pub write_asm: bool,
     /// Specifies the start of the common BSS section.
     #[serde(skip_serializing_if = "is_default")]
@@ -113,6 +115,7 @@ impl Default for ProjectConfig {
             modules: vec![],
             detect_objects: true,
             detect_strings: true,
+            use_pdb_types: true,
             write_asm: true,
             common_start: None,
             symbols_known: false,
diff --git a/src/cmd/xex.rs b/src/cmd/xex.rs
index 1534651c..4f63676a 100644
--- a/src/cmd/xex.rs
+++ b/src/cmd/xex.rs
@@ -473,7 +473,7 @@ fn load_analyze_xex(config: &ProjectConfig) -> Result<ExeAnalyzeResult> {
 
     if let Some(pdb_path) = &config.base.pdb {
         let pdb_path: Utf8NativePathBuf = pdb_path.with_encoding();
-        let pdb = try_parse_pdb(&pdb_path, &obj.sections)?;
+        let pdb = try_parse_pdb(&pdb_path, &obj.sections, config.use_pdb_types)?;
 
         // Apply all the splits
         // FIXME: Don't add splits unconditionally here; it may conflict with
@@ -742,7 +742,7 @@ fn map(args: MapArgs) -> Result<()> {
 
 fn pdb(args: PdbArgs) -> Result<()> {
     println!("pdb: {}", args.input);
-    let data = try_parse_pdb(&args.input, &ObjSections::new(ObjKind::Executable, vec![]))?;
+    let data = try_parse_pdb(&args.input, &ObjSections::new(ObjKind::Executable, vec![]), true)?;
     println!("{:#?}", data);
     Ok(())
 }
diff --git a/src/util/xpdb.rs b/src/util/xpdb.rs
index fa31093e..39035ad4 100644
--- a/src/util/xpdb.rs
+++ b/src/util/xpdb.rs
@@ -144,10 +144,11 @@ fn set_obj_size_by_type(obj_sym: &mut ObjSymbol, ty_finder: &TypeFinder, index:
 
 /// Try to set the size of the object symbol according to its name
 fn set_obj_size_by_name(obj_sym: &mut ObjSymbol, name: &str) {
-    if name.starts_with("??_C@_0") || name.starts_with("??_C@_1") {
-        // This is a string or a wide string. In either case, the size
-        // is encoded into the symbol name itself.
-        obj_sym.data_kind = ObjDataKind::String;
+    let is_string = name.starts_with("??_C@_0");
+    let is_wstring = name.starts_with("??_C@_1");
+    if is_string || is_wstring {
+        // In either case, the size is encoded into the symbol name itself
+        obj_sym.data_kind = if is_string { ObjDataKind::String } else { ObjDataKind::String16 };
         let ptr = &mut name["??_C@_0".len()..].chars();
         let mut str_size = 0;
         for ch in ptr.by_ref() {
@@ -203,10 +204,14 @@ pub struct PdbAnalyzeResult {
     pub labels: Vec<SectionAddress>,
 }
 
-/// Extract translation units, splits, and symbols from a PDB
+/// Extract translation units, splits, and symbols from a PDB. The use_types
+/// flag enables the parsing of the type information stream to deduce object
+/// sizes. This is optional, since some PDBs are known to have corrupt data
+/// in this stream.
 pub fn try_parse_pdb(
     path: &Utf8NativePathBuf,
     section_addrs: &ObjSections,
+    use_types: bool,
 ) -> Result<PdbAnalyzeResult> {
     let mut dbfile = pdb2::PDB::open(File::open(path)?)?;
 
@@ -248,8 +253,10 @@ pub fn try_parse_pdb(
     let mut ty_finder = tpi.finder();
     let mut ty_iter = tpi.iter();
 
-    while (ty_iter.next()?).is_some() {
-        ty_finder.update(&ty_iter);
+    if use_types {
+        while (ty_iter.next()?).is_some() {
+            ty_finder.update(&ty_iter);
+        }
     }
 
     let pdbmap = dbfile.address_map()?;
@@ -329,7 +336,9 @@ pub fn try_parse_pdb(
                     obj_sym.address = symaddr.address.into();
                     obj_sym.section = Some(symaddr.section);
                 }
-                set_obj_size_by_type(obj_sym, &ty_finder, data.type_index);
+                if use_types {
+                    set_obj_size_by_type(obj_sym, &ty_finder, data.type_index);
+                }
             }
             Ok(pdb2::SymbolData::ThreadStorage(data)) => {
                 if data.offset.section == 0 {
@@ -348,7 +357,9 @@ pub fn try_parse_pdb(
                     obj_sym.address = symaddr.address.into();
                     obj_sym.section = Some(symaddr.section);
                 }
-                set_obj_size_by_type(obj_sym, &ty_finder, data.type_index);
+                if use_types {
+                    set_obj_size_by_type(obj_sym, &ty_finder, data.type_index);
+                }
             }
             Ok(pdb2::SymbolData::Procedure(data)) => {
                 if data.offset.section == 0 {
@@ -544,8 +555,6 @@ pub fn try_parse_pdb(
                 rename: rename.clone(),
             });
         }
-        // FIXME: This currently requires detect_objects=false to work.
-        // Deducing exact object sizes from the PDB should fix this
         curr_split.end = end as u32;
     }
 

From baed9a06be93f3ffd53d3033360563640dc8aa6c Mon Sep 17 00:00:00 2001
From: Max <34987259+mparisi20@users.noreply.github.com>
Date: Sun, 12 Apr 2026 15:35:19 -0400
Subject: [PATCH 3/3] Remove update_splits from the quick_analysis path

- This function interferes with PDB-aided analysis, from
  which we already get accurate .pdata splits
---
 src/cmd/map.rs    |  2 +-
 src/cmd/xex.rs    |  4 ++--
 src/util/split.rs | 11 +++++++++--
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/cmd/map.rs b/src/cmd/map.rs
index 43ee2c8b..d66aba0e 100644
--- a/src/cmd/map.rs
+++ b/src/cmd/map.rs
@@ -187,7 +187,7 @@ fn config(args: ConfigArgs) -> Result<()> {
     log::info!("Processing map...");
     let entries = process_map(file.as_mut(), None, None)?;
     let mut obj = create_obj(&entries)?;
-    if let Err(e) = update_splits(&mut obj, None, false) {
+    if let Err(e) = update_splits(&mut obj, None, false, true) {
         error!("Failed to update splits: {}", e)
     }
     DirBuilder::new().recursive(true).create(&args.out_dir)?;
diff --git a/src/cmd/xex.rs b/src/cmd/xex.rs
index 4f63676a..f5da7c06 100644
--- a/src/cmd/xex.rs
+++ b/src/cmd/xex.rs
@@ -255,7 +255,7 @@ fn split_write_obj_exe(
 
     debug!("Adjusting splits");
     let module_id = module.obj.module_id;
-    update_splits(&mut module.obj, None, false)?;
+    update_splits(&mut module.obj, None, false, !config.quick_analysis)?;
 
     debug!("Writing configuration");
     if let Some(symbols_path) = &module.config.symbols {
@@ -633,7 +633,7 @@ fn disasm(args: DisasmArgs) -> Result<()> {
 
     // Gamepad Release
     apply_splits_file(&args.out, &mut obj)?;
-    update_splits(&mut obj, None, false)?;
+    update_splits(&mut obj, None, false, true)?;
     let split_objs = split_obj(&obj, None)?;
 
     for coff_obj in &split_objs {
diff --git a/src/util/split.rs b/src/util/split.rs
index 18c9595d..439bb240 100644
--- a/src/util/split.rs
+++ b/src/util/split.rs
@@ -544,7 +544,12 @@ fn split_pdata(obj: &mut ObjInfo) -> Result<()> {
 /// - Creating splits for gaps between existing splits
 /// - Resolving a new object link order
 #[instrument(level = "debug", skip(obj))]
-pub fn update_splits(obj: &mut ObjInfo, common_start: Option<u32>, fill_gaps: bool) -> Result<()> {
+pub fn update_splits(
+    obj: &mut ObjInfo,
+    common_start: Option<u32>,
+    fill_gaps: bool,
+    redo_pdata_splits: bool,
+) -> Result<()> {
     // // Create splits for extab and extabindex entries
     // if let Some((section_index, section)) = obj.sections.by_name("extabindex")? {
     //     if !section.data.is_empty() {
@@ -576,7 +581,9 @@ pub fn update_splits(obj: &mut ObjInfo, common_start: Option<u32>, fill_gaps: bo
     // }
 
     // Create splits for .pdata entries
-    split_pdata(obj)?;
+    if redo_pdata_splits {
+        split_pdata(obj)?;
+    }
 
     // Remove linker generated symbols from splits
     // trim_linker_generated_symbols(obj)?;