Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions src/analysis/pass.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,13 @@ impl AnalysisPass for FindSaveRestSledsXbox {

// save/restore gpr/fpr/vmx should've been found in pdata
if !func.contains("_upper") {
assert!(obj.known_functions.contains_key(&start),
"Could not find reg intrinsic from pdata. Is that even possible for an xex?");
assert!(
obj.known_functions.contains_key(&start),
concat!(
"Could not find reg intrinsic from pdata. ",
"For PDB analysis, add 'symbols_known: true' to config.yml"
)
);
}
// add known symbols for them
if obj.known_functions.contains_key(&start) {
Expand Down
71 changes: 35 additions & 36 deletions src/cmd/xex.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::{
cmp,
collections::{BTreeMap, HashSet},
fs::{self, DirBuilder, File},
io::{BufWriter, Write},
Expand Down Expand Up @@ -39,7 +40,7 @@ use crate::{
config::{apply_splits_file, apply_symbols_file, write_splits_file, write_symbols_file},
dep::DepFile,
file::{buf_writer, FileReadInfo},
map_exe::{apply_map_file_exe, is_reg_intrinsic, process_map_exe},
map_exe::{apply_map_file_exe, process_map_exe},
path::native_path,
split::{split_obj, update_splits},
xex::{
Expand Down Expand Up @@ -472,13 +473,13 @@ fn load_analyze_xex(config: &ProjectConfig) -> Result<ExeAnalyzeResult> {

if let Some(pdb_path) = &config.base.pdb {
let pdb_path: Utf8NativePathBuf = pdb_path.with_encoding();
let (pdb_units, pdb_splits, pdb_syms) = try_parse_pdb(&pdb_path, &obj.sections)?;
let pdb = try_parse_pdb(&pdb_path, &obj.sections)?;

// Apply all the splits
// FIXME: Don't add splits unconditionally here; it may conflict with
// user-provided splits. For now, users can comment out the pdb key
// in config.yml after initial analysis
for (i, splits_for_section) in pdb_splits.into_iter().enumerate() {
for (i, splits_for_section) in pdb.splits.into_iter().enumerate() {
for (start, split) in splits_for_section.iter() {
obj.sections[i as u32].splits.push(start, split.clone());
}
Expand All @@ -489,48 +490,46 @@ fn load_analyze_xex(config: &ProjectConfig) -> Result<ExeAnalyzeResult> {
for split in obj.sections.all_splits() {
nonempty_mods.insert(&split.3.unit);
}
for unit in pdb_units {
for unit in pdb.units {
if nonempty_mods.contains(&unit) {
obj.link_order.push(ObjUnit { name: unit, autogenerated: false, order: None });
} else {
log::debug!("Module {} is empty", unit);
}
}

// Remove known labels from known_functions/pdata_funcs
obj.pdata_funcs.sort();
for known_label in pdb.labels {
if obj.known_functions.remove(&known_label).is_some() {
log::debug!("Demoted {} from func to label", known_label);
}
if let Result::Ok(idx) = obj.pdata_funcs.binary_search(&known_label) {
obj.pdata_funcs.remove(idx);
};
}

// Apply all the symbols
for mut sym in pdb_syms.into_iter() {
if !is_reg_intrinsic(&sym.name) && sym.name != "__NLG_Return" {
match obj.sections.at_address(sym.address as u32).ok() {
Some((sec_idx, _sec)) => {
let the_sec_addr = SectionAddress::new(sec_idx, sym.address as u32);
if obj.pdata_funcs.contains(&the_sec_addr) {
let pdata_sz =
obj.known_functions.get(&the_sec_addr).unwrap().unwrap() as u64;
let pdb_sz = sym.size;
if pdata_sz != pdb_sz {
log::debug!(
concat!(
"Size of {} according to .pdata is {}",
", but according to pdb is {}. "
),
&sym.name,
pdata_sz,
pdb_sz
);
// Defer to .pdata size for now. Empirically,
// the sizes only differ for XDK functions.
// Note that this approach results in extra
// 'fn_XXXXXXXX' symbols for certain XDK labels.
// TODO: Fix this without breaking CFA
sym.size = pdata_sz;
}
}
obj.add_symbol(sym, true)?;
}
// if we couldn't find the section (like maybe it was stripped), just continue on
_ => continue,
};
for mut sym in pdb.symbols.into_iter() {
let (sec_idx, _sec) = obj.sections.at_address(sym.address as u32)?;
let the_sec_addr = SectionAddress::new(sec_idx, sym.address as u32);
if obj.pdata_funcs.contains(&the_sec_addr) {
let pdata_sz = obj.known_functions.get(&the_sec_addr).unwrap().unwrap() as u64;
let pdb_sz = sym.size;
if pdata_sz != pdb_sz {
log::debug!(
concat!(
"Size of {} according to .pdata is {}",
", but according to pdb is {}. "
),
&sym.name,
pdata_sz,
pdb_sz
);
sym.size = cmp::max(pdata_sz, pdb_sz);
}
}
obj.add_symbol(sym, true)?;
}
dep.push(pdb_path);
}
Expand Down
113 changes: 64 additions & 49 deletions src/util/xpdb.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use std::{
collections::{BTreeMap, HashMap, HashSet},
collections::{BTreeMap, HashSet},
fs::File,
vec::Vec,
};
Expand Down Expand Up @@ -40,28 +40,16 @@ fn warn_unsupported_sym_kind(sym: &pdb2::Symbol, set: &mut HashSet<pdb2::SymbolK

/// Convert to jeff's SectionAddress type
fn to_section_addr(
pdbmap: &pdb2::AddressMap,
pdb_offs: &pdb2::PdbInternalSectionOffset,
) -> SectionAddress {
let s_addr = pdb_offs.to_section_offset(pdbmap).unwrap_or_default();
SectionAddress {
address: s_addr.offset,
// jeff sections are 0-indexed
section: s_addr.section as u32 - 1,
}
}

fn section_addr_to_virtual_addr(section_addrs: &ObjSections, s_addr: &SectionAddress) -> u64 {
let sect_base = section_addrs.get(s_addr.section).unwrap_or(&ObjSection::default()).address;
s_addr.address as u64 + sect_base
}

fn to_virtual_address(
pdbmap: &pdb2::AddressMap,
section_addrs: &ObjSections,
pdb_offs: &pdb2::PdbInternalSectionOffset,
) -> u64 {
section_addr_to_virtual_addr(section_addrs, &to_section_addr(pdbmap, pdb_offs))
) -> SectionAddress {
let sect_offs = pdb_offs.to_section_offset(pdbmap).unwrap_or_default();
// Subtracting 1 because jeff sections are 0-indexed
// TODO: Do optimized binaries have a different mapping?
let jeff_sect = (sect_offs.section - 1) as u32;
let sect_base = section_addrs.get(jeff_sect).unwrap_or(&ObjSection::default()).address;
SectionAddress { section: jeff_sect, address: sect_base as u32 + sect_offs.offset }
}

#[derive(Debug, PartialEq, PartialOrd, Eq, Ord)]
Expand All @@ -76,11 +64,24 @@ struct CoffGroup {
pub name: String,
}

/// All the useful information parsed from a PDB
#[derive(Debug)]
pub struct PdbAnalyzeResult {
/// Names of translation units
pub units: Vec<String>,
/// Splits derived from the Section Contributions Stream
pub splits: Vec<ObjSplits>,
/// Symbols derived from the Global and Module Symbol Streams
pub symbols: Vec<ObjSymbol>,
/// Locations of labels. Note that all labels are symbols
pub labels: Vec<SectionAddress>,
}

/// Extract translation units, splits, and symbols from a PDB
pub fn try_parse_pdb(
path: &Utf8NativePathBuf,
section_addrs: &ObjSections,
) -> Result<(Vec<String>, Vec<ObjSplits>, Vec<ObjSymbol>)> {
) -> Result<PdbAnalyzeResult> {
let mut dbfile = pdb2::PDB::open(File::open(path)?)?;

// Ensure pdb sections match the exe sections and that all the names match
Expand Down Expand Up @@ -147,14 +148,14 @@ pub fn try_parse_pdb(

let all_syms_iter = all_syms.into_iter();
let mut groups: Vec<CoffGroup> = vec![];
let mut ldata_dupes: HashMap<String, u32> = HashMap::new();
let mut known_labels: Vec<SectionAddress> = vec![];
for symbol in all_syms_iter {
match symbol.parse() {
Ok(pdb2::SymbolData::Public(data)) => {
if data.offset.section == 0 {
continue;
}
let symaddr = to_section_addr(&pdbmap, &data.offset);
let symaddr = to_section_addr(&pdbmap, section_addrs, &data.offset);
let obj_sym = syms.entry(symaddr).or_default();

// We get almost everything we need to know about a symbol
Expand All @@ -167,7 +168,7 @@ pub fn try_parse_pdb(
// TODO: Not all S_PUB32 records represent functions or objects;
// Some may just be labels, which can be skipped
obj_sym.name = data.name.to_string().into();
obj_sym.address = to_virtual_address(&pdbmap, section_addrs, &data.offset);
obj_sym.address = symaddr.address.into();
obj_sym.section = Some(symaddr.section);
obj_sym.flags = ObjSymbolFlagSet(ObjSymbolFlags::Global.into());
obj_sym.kind =
Expand All @@ -178,7 +179,7 @@ pub fn try_parse_pdb(
if data.offset.section == 0 {
continue;
}
let symaddr = to_section_addr(&pdbmap, &data.offset);
let symaddr = to_section_addr(&pdbmap, section_addrs, &data.offset);
let obj_sym = syms.entry(symaddr).or_default();

// This is an S_GDATA32 or S_LDATA32 record
Expand All @@ -187,19 +188,8 @@ pub fn try_parse_pdb(
} else {
obj_sym.flags.set_scope(ObjSymbolScope::Local);
obj_sym.kind = ObjSymbolKind::Object;
// TODO: Now that we extract object files and splits, we can
// update this renaming so it is only done for repeat
// names of symbols in the same file
let name = data.name.to_string().clone();
let c =
*ldata_dupes.entry(name.to_string()).and_modify(|c| *c += 1).or_insert(1);
let name: String = if c > 1 {
format!("static_dup_{}_{}", c - 1, name)
} else {
data.name.to_string().into()
};
obj_sym.name = name;
obj_sym.address = to_virtual_address(&pdbmap, section_addrs, &data.offset);
obj_sym.name = data.name.to_string().into();
obj_sym.address = symaddr.address.into();
obj_sym.section = Some(symaddr.section);
}
// TODO: We can also deduce the size by using the type
Expand All @@ -212,7 +202,7 @@ pub fn try_parse_pdb(
if data.offset.section == 0 {
continue;
}
let symaddr = to_section_addr(&pdbmap, &data.offset);
let symaddr = to_section_addr(&pdbmap, section_addrs, &data.offset);
let obj_sym = syms.entry(symaddr).or_default();

// This is an S_GTHREAD32 or S_LTHREAD32 record
Expand All @@ -222,7 +212,7 @@ pub fn try_parse_pdb(
obj_sym.flags.set_scope(ObjSymbolScope::Local);
obj_sym.kind = ObjSymbolKind::Object;
obj_sym.name = data.name.to_string().into();
obj_sym.address = to_virtual_address(&pdbmap, section_addrs, &data.offset);
obj_sym.address = symaddr.address.into();
obj_sym.section = Some(symaddr.section);
}

Expand All @@ -232,40 +222,60 @@ pub fn try_parse_pdb(
if data.offset.section == 0 {
continue;
}
let symaddr = to_section_addr(&pdbmap, &data.offset);
let symaddr = to_section_addr(&pdbmap, section_addrs, &data.offset);
let obj_sym = syms.entry(symaddr).or_default();

// This is an S_GPROC32 or S_LPROC32 record
obj_sym.size = data.len as u64;
obj_sym.size_known = true;
obj_sym.align = Some(4);
obj_sym.align = Some(8);
if data.global {
obj_sym.flags.set_scope(ObjSymbolScope::Global);
} else {
obj_sym.flags.set_scope(ObjSymbolScope::Local);
obj_sym.kind = ObjSymbolKind::Function;
obj_sym.name = data.name.to_string().into();
obj_sym.address = to_virtual_address(&pdbmap, section_addrs, &data.offset);
obj_sym.address = symaddr.address.into();
obj_sym.section = Some(symaddr.section);
}
}
Ok(pdb2::SymbolData::Thunk(data)) => {
if data.offset.section == 0 {
continue;
}
let symaddr = to_section_addr(&pdbmap, &data.offset);
let symaddr = to_section_addr(&pdbmap, section_addrs, &data.offset);
let obj_sym = syms.entry(symaddr).or_default();

// This is an S_THUNK32 record
obj_sym.size = data.len as u64;
obj_sym.size_known = true;
obj_sym.align = Some(4);
}
Ok(pdb2::SymbolData::Label(data)) => {
if data.offset.section == 0 {
continue;
}
let name: String = data.name.to_string().into();
if name.starts_with("$M") || name.starts_with("$LN") {
// These are uninteresting auto-generated symbols
continue;
}
let symaddr = to_section_addr(&pdbmap, section_addrs, &data.offset);
let obj_sym = syms.entry(symaddr).or_default();
obj_sym.address = symaddr.address.into();
obj_sym.data_kind = ObjDataKind::Unknown;
obj_sym.flags.set_scope(ObjSymbolScope::Local);
obj_sym.kind = ObjSymbolKind::Unknown;
obj_sym.name = name;
obj_sym.section = Some(symaddr.section);

known_labels.push(symaddr);
}
Ok(pdb2::SymbolData::CoffGroup(data)) => groups.push(CoffGroup {
address: to_virtual_address(&pdbmap, section_addrs, &data.offset),
address: to_section_addr(&pdbmap, section_addrs, &data.offset).address.into(),
size: data.cb,
name: data.name.to_string().into(),
section: to_section_addr(&pdbmap, &data.offset).section,
section: to_section_addr(&pdbmap, section_addrs, &data.offset).section,
}),
Ok(pdb2::SymbolData::Section(_data)) => {
// TODO: We already have most section info from the EXE, but
Expand Down Expand Up @@ -326,9 +336,9 @@ pub fn try_parse_pdb(
// TODO: Extract file names from the Sources substream to replace the
// auto-generated names. Take only the base name, fix the extension,
// and disambiguate identical names with a prefix
let s_addr = to_section_addr(&pdbmap, &contrib.offset);
let s_addr = to_section_addr(&pdbmap, section_addrs, &contrib.offset);
let sec_idx = s_addr.section as usize;
let start = section_addr_to_virtual_addr(section_addrs, &s_addr);
let start: u64 = s_addr.address.into();
let end = start + contrib.size as u64;
let mod_idx = contrib.module as i32;

Expand Down Expand Up @@ -414,5 +424,10 @@ pub fn try_parse_pdb(
};
}

Ok((module_names, splits_by_section, addr_vec))
Ok(PdbAnalyzeResult {
units: module_names,
splits: splits_by_section,
symbols: addr_vec,
labels: known_labels,
})
}
Loading