From 999a5346b396dcd86b655d38906ebd2db6ecfcee Mon Sep 17 00:00:00 2001 From: Luke Street Date: Wed, 24 Sep 2025 14:59:45 -0600 Subject: [PATCH 1/8] Initial read support for OMF --- Cargo.toml | 7 +- src/build/elf.rs | 4 +- src/common.rs | 1 + src/lib.rs | 2 + src/omf.rs | 323 ++++++++ src/read/any.rs | 46 ++ src/read/mod.rs | 17 + src/read/omf/file.rs | 444 +++++++++++ src/read/omf/mod.rs | 1438 ++++++++++++++++++++++++++++++++++++ src/read/omf/relocation.rs | 86 +++ src/read/omf/section.rs | 219 ++++++ src/read/omf/symbol.rs | 149 ++++ tests/read/mod.rs | 1 + tests/read/omf.rs | 114 +++ 14 files changed, 2846 insertions(+), 5 deletions(-) create mode 100644 src/omf.rs create mode 100644 src/read/omf/file.rs create mode 100644 src/read/omf/mod.rs create mode 100644 src/read/omf/relocation.rs create mode 100644 src/read/omf/section.rs create mode 100644 src/read/omf/symbol.rs create mode 100644 tests/read/omf.rs diff --git a/Cargo.toml b/Cargo.toml index 37366a29..a6eda404 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,7 +44,7 @@ alloc = { version = '1.0.0', optional = true, package = 'rustc-std-workspace-all # Core read support. You will need to enable some file formats too. read_core = [] # Read support for most file formats (including unaligned files). -read = ["read_core", "archive", "coff", "elf", "macho", "pe", "xcoff", "unaligned"] +read = ["read_core", "archive", "coff", "elf", "macho", "pe", "xcoff", "omf", "unaligned"] # Core write support. You will need to enable some file formats too. write_core = ["dep:crc32fast", "dep:indexmap", "dep:hashbrown"] # Core write support with libstd features. You will need to enable some file formats too. @@ -81,6 +81,7 @@ macho = [] pe = ["coff"] wasm = ["dep:wasmparser"] xcoff = [] +omf = [] #======================================= # By default, support all read features. @@ -89,7 +90,7 @@ default = ["read", "compression"] #======================================= # Umbrella feature for enabling all user-facing features of this crate. Does not # enable internal features like `rustc-dep-of-std`. -all = ["read", "write", "build", "std", "compression", "wasm"] +all = ["read", "write", "build", "std", "compression", "wasm", "omf"] # Use of --all-features is not supported. # This is a dummy feature to detect when --all-features is used. @@ -100,7 +101,7 @@ cargo-all = [] doc = [ "read_core", "write_std", "build_core", "std", "compression", - "archive", "coff", "elf", "macho", "pe", "wasm", "xcoff", + "archive", "coff", "elf", "macho", "pe", "wasm", "xcoff", "omf", ] #======================================= diff --git a/src/build/elf.rs b/src/build/elf.rs index 9ade35ca..df8ae7a7 100644 --- a/src/build/elf.rs +++ b/src/build/elf.rs @@ -434,10 +434,10 @@ impl<'data> Builder<'data> { ) .map(SectionData::Relocation) } else { - return Err(Error(format!( + Err(Error(format!( "Invalid sh_link {} in relocation section at index {}", link.0, index, - ))); + ))) } } diff --git a/src/common.rs b/src/common.rs index d0a4cdbb..7b67bd1d 100644 --- a/src/common.rs +++ b/src/common.rs @@ -132,6 +132,7 @@ pub enum BinaryFormat { Pe, Wasm, Xcoff, + Omf, } impl BinaryFormat { diff --git a/src/lib.rs b/src/lib.rs index 9a2fdd41..41ed0770 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -88,6 +88,8 @@ pub mod archive; pub mod elf; #[cfg(feature = "macho")] pub mod macho; +#[cfg(feature = "omf")] +pub mod omf; #[cfg(any(feature = "coff", feature = "pe"))] pub mod pe; #[cfg(feature = "xcoff")] diff --git a/src/omf.rs b/src/omf.rs new file mode 100644 index 00000000..7c4483c9 --- /dev/null +++ b/src/omf.rs @@ -0,0 +1,323 @@ +//! Object Module Format (OMF) definitions for classic DOS object files. +//! +//! This module provides type definitions and constants for working with OMF files, +//! as defined in the TIS Relocatable Object Module Format (OMF) Specification v1.1. +//! +//! OMF was commonly used by DOS compilers like Borland C++ and Watcom C. + +use crate::endian::U16; +use crate::pod::Pod; + +/// OMF record type constants +pub mod record_type { + /// Translator Header Record + pub const THEADR: u8 = 0x80; + /// Library Module Header Record + pub const LHEADR: u8 = 0x82; + /// Comment Record + pub const COMENT: u8 = 0x88; + /// Module End Record (16-bit) + pub const MODEND: u8 = 0x8A; + /// Module End Record (32-bit) + pub const MODEND32: u8 = 0x8B; + /// External Names Definition Record + pub const EXTDEF: u8 = 0x8C; + /// Type Definition Record (obsolete) + pub const TYPDEF: u8 = 0x8E; + /// Public Names Definition Record (16-bit) + pub const PUBDEF: u8 = 0x90; + /// Public Names Definition Record (32-bit) + pub const PUBDEF32: u8 = 0x91; + /// Line Numbers Record (16-bit) + pub const LINNUM: u8 = 0x94; + /// Line Numbers Record (32-bit) + pub const LINNUM32: u8 = 0x95; + /// List of Names Record + pub const LNAMES: u8 = 0x96; + /// Segment Definition Record (16-bit) + pub const SEGDEF: u8 = 0x98; + /// Segment Definition Record (32-bit) + pub const SEGDEF32: u8 = 0x99; + /// Group Definition Record + pub const GRPDEF: u8 = 0x9A; + /// Fixup Record (16-bit) + pub const FIXUPP: u8 = 0x9C; + /// Fixup Record (32-bit) + pub const FIXUPP32: u8 = 0x9D; + /// Logical Enumerated Data Record (16-bit) + pub const LEDATA: u8 = 0xA0; + /// Logical Enumerated Data Record (32-bit) + pub const LEDATA32: u8 = 0xA1; + /// Logical Iterated Data Record (16-bit) + pub const LIDATA: u8 = 0xA2; + /// Logical Iterated Data Record (32-bit) + pub const LIDATA32: u8 = 0xA3; + /// Communal Names Definition Record + pub const COMDEF: u8 = 0xB0; + /// Backpatch Record (16-bit) + pub const BAKPAT: u8 = 0xB2; + /// Backpatch Record (32-bit) + pub const BAKPAT32: u8 = 0xB3; + /// Local External Names Definition Record (16-bit) + pub const LEXTDEF: u8 = 0xB4; + /// Local External Names Definition Record (32-bit) + pub const LEXTDEF32: u8 = 0xB5; + /// Local Public Names Definition Record (16-bit) + pub const LPUBDEF: u8 = 0xB6; + /// Local Public Names Definition Record (32-bit) + pub const LPUBDEF32: u8 = 0xB7; + /// Local Communal Names Definition Record + pub const LCOMDEF: u8 = 0xB8; + /// COMDAT External Names Definition Record + pub const CEXTDEF: u8 = 0xBC; + /// Initialized Communal Data Record (16-bit) + pub const COMDAT: u8 = 0xC2; + /// Initialized Communal Data Record (32-bit) + pub const COMDAT32: u8 = 0xC3; + /// Symbol Line Numbers Record (16-bit) + pub const LINSYM: u8 = 0xC4; + /// Symbol Line Numbers Record (32-bit) + pub const LINSYM32: u8 = 0xC5; + /// Alias Definition Record + pub const ALIAS: u8 = 0xC6; + /// Named Backpatch Record (16-bit) + pub const NBKPAT: u8 = 0xC8; + /// Named Backpatch Record (32-bit) + pub const NBKPAT32: u8 = 0xC9; + /// Local Logical Names Definition Record + pub const LLNAMES: u8 = 0xCA; + /// OMF Version Number Record + pub const VERNUM: u8 = 0xCC; + /// Vendor-specific OMF Extension Record + pub const VENDEXT: u8 = 0xCE; +} + +/// OMF record header - common to all record types +#[derive(Debug, Clone, Copy)] +#[repr(C)] +pub struct RecordHeader { + /// Record type identifier + pub record_type: u8, + /// Length of the record contents (excluding header and checksum) + pub length: U16, +} + +unsafe impl Pod for RecordHeader {} + +/// Segment alignment types for SEGDEF records +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u8)] +pub enum SegmentAlignment { + /// Absolute segment + Absolute = 0, + /// Byte aligned + Byte = 1, + /// Word (2-byte) aligned + Word = 2, + /// Paragraph (16-byte) aligned + Paragraph = 3, + /// Page (256-byte) aligned + Page = 4, + /// Double word (4-byte) aligned + DWord = 5, + /// 4K page aligned + Page4K = 6, +} + +/// Segment combination types for SEGDEF records +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u8)] +pub enum SegmentCombination { + /// Private segment + Private = 0, + /// Public segment (concatenated) + Public = 2, + /// Stack segment + Stack = 5, + /// Common segment (overlapped) + Common = 6, +} + +/// Fixup location types +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u8)] +pub enum FixupLocation { + /// Low-order byte + LowByte = 0, + /// 16-bit offset + Offset = 1, + /// 16-bit base/segment + Base = 2, + /// 32-bit pointer (16:16) + Pointer = 3, + /// High-order byte + HighByte = 4, + /// 16-bit loader-resolved offset + LoaderOffset = 5, + /// 32-bit offset + Offset32 = 9, + /// 48-bit pointer (16:32) + Pointer48 = 11, + /// 32-bit loader-resolved offset + LoaderOffset32 = 13, +} + +/// Target method types for fixups +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u8)] +pub enum TargetMethod { + /// Segment index + SegmentIndex = 0, + /// Group index + GroupIndex = 1, + /// External index + ExternalIndex = 2, + /// Frame number (absolute) + FrameNumber = 3, +} + +/// Frame method types for fixups +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u8)] +pub enum FrameMethod { + /// Segment index + SegmentIndex = 0, + /// Group index + GroupIndex = 1, + /// External index + ExternalIndex = 2, + /// Frame number (absolute) + FrameNumber = 3, + /// Location (use fixup location) + Location = 4, + /// Target (use target's frame) + Target = 5, +} + +/// Check if a byte is a valid OMF record type +pub fn is_omf_record_type(byte: u8) -> bool { + use record_type::*; + matches!( + byte, + THEADR + | LHEADR + | COMENT + | MODEND + | MODEND32 + | EXTDEF + | TYPDEF + | PUBDEF + | PUBDEF32 + | LINNUM + | LINNUM32 + | LNAMES + | SEGDEF + | SEGDEF32 + | GRPDEF + | FIXUPP + | FIXUPP32 + | LEDATA + | LEDATA32 + | LIDATA + | LIDATA32 + | COMDEF + | BAKPAT + | BAKPAT32 + | LEXTDEF + | LEXTDEF32 + | LPUBDEF + | LPUBDEF32 + | LCOMDEF + | CEXTDEF + | COMDAT + | COMDAT32 + | LINSYM + | LINSYM32 + | ALIAS + | NBKPAT + | NBKPAT32 + | LLNAMES + | VERNUM + | VENDEXT + ) +} + +/// Check if a record type uses 32-bit fields +pub fn is_32bit_record(record_type: u8) -> bool { + record_type & 0x01 != 0 +} + +/// Helper to read an OMF index (1 or 2 bytes) +pub fn read_index(data: &[u8]) -> Option<(u16, usize)> { + if data.is_empty() { + return None; + } + + let first_byte = data[0]; + if first_byte & 0x80 == 0 { + // 1-byte index + Some((first_byte as u16, 1)) + } else if data.len() >= 2 { + // 2-byte index + let high = (first_byte & 0x7F) as u16; + let low = data[1] as u16; + Some((high << 8 | low, 2)) + } else { + None + } +} + +/// Helper to read a counted string (length byte followed by string) +pub fn read_counted_string(data: &[u8]) -> Option<(&[u8], usize)> { + if data.is_empty() { + return None; + } + + let length = data[0] as usize; + if data.len() > length { + Some((&data[1..1 + length], 1 + length)) + } else { + None + } +} + +/// Read an encoded value (used in LIDATA for repeat counts and block counts) +/// Returns the value and number of bytes consumed +pub fn read_encoded_value(data: &[u8]) -> Option<(u32, usize)> { + if data.is_empty() { + return None; + } + + let first_byte = data[0]; + if first_byte < 0x80 { + // Single byte value (0-127) + Some((first_byte as u32, 1)) + } else if first_byte == 0x81 { + // Two byte value: 0x81 followed by 16-bit little-endian value + if data.len() >= 3 { + let value = u16::from_le_bytes([data[1], data[2]]) as u32; + Some((value, 3)) + } else { + None + } + } else if first_byte == 0x84 { + // Three byte value: 0x84 followed by 24-bit little-endian value + if data.len() >= 4 { + let value = u32::from_le_bytes([data[1], data[2], data[3], 0]); + Some((value, 4)) + } else { + None + } + } else if first_byte == 0x88 { + // Four byte value: 0x88 followed by 32-bit little-endian value + if data.len() >= 5 { + let value = u32::from_le_bytes([data[1], data[2], data[3], data[4]]); + Some((value, 5)) + } else { + None + } + } else { + // Unknown encoding + None + } +} diff --git a/src/read/any.rs b/src/read/any.rs index d6408ad4..3857a020 100644 --- a/src/read/any.rs +++ b/src/read/any.rs @@ -10,6 +10,8 @@ use crate::read::coff; use crate::read::elf; #[cfg(feature = "macho")] use crate::read::macho; +#[cfg(feature = "omf")] +use crate::read::omf; #[cfg(feature = "pe")] use crate::read::pe; #[cfg(feature = "wasm")] @@ -52,6 +54,8 @@ macro_rules! with_inner { $enum::Xcoff32(ref $var) => $body, #[cfg(feature = "xcoff")] $enum::Xcoff64(ref $var) => $body, + #[cfg(feature = "omf")] + $enum::Omf(ref $var) => $body, } }; } @@ -81,6 +85,8 @@ macro_rules! with_inner_mut { $enum::Xcoff32(ref mut $var) => $body, #[cfg(feature = "xcoff")] $enum::Xcoff64(ref mut $var) => $body, + #[cfg(feature = "omf")] + $enum::Omf(ref mut $var) => $body, } }; } @@ -111,6 +117,8 @@ macro_rules! map_inner { $from::Xcoff32(ref $var) => $to::Xcoff32($body), #[cfg(feature = "xcoff")] $from::Xcoff64(ref $var) => $to::Xcoff64($body), + #[cfg(feature = "omf")] + $from::Omf(ref $var) => $to::Omf($body), } }; } @@ -141,6 +149,8 @@ macro_rules! map_inner_option { $from::Xcoff32(ref $var) => $body.map($to::Xcoff32), #[cfg(feature = "xcoff")] $from::Xcoff64(ref $var) => $body.map($to::Xcoff64), + #[cfg(feature = "omf")] + $from::Omf(ref $var) => $body.map($to::Omf), } }; } @@ -170,6 +180,8 @@ macro_rules! map_inner_option_mut { $from::Xcoff32(ref mut $var) => $body.map($to::Xcoff32), #[cfg(feature = "xcoff")] $from::Xcoff64(ref mut $var) => $body.map($to::Xcoff64), + #[cfg(feature = "omf")] + $from::Omf(ref mut $var) => $body.map($to::Omf), } }; } @@ -200,6 +212,8 @@ macro_rules! next_inner { $from::Xcoff32(ref mut iter) => iter.next().map($to::Xcoff32), #[cfg(feature = "xcoff")] $from::Xcoff64(ref mut iter) => iter.next().map($to::Xcoff64), + #[cfg(feature = "omf")] + $from::Omf(ref mut iter) => iter.next().map($to::Omf), } }; } @@ -233,6 +247,8 @@ pub enum File<'data, R: ReadRef<'data> = &'data [u8]> { Xcoff32(xcoff::XcoffFile32<'data, R>), #[cfg(feature = "xcoff")] Xcoff64(xcoff::XcoffFile64<'data, R>), + #[cfg(feature = "omf")] + Omf(omf::OmfFile<'data, R>), } impl<'data, R: ReadRef<'data>> File<'data, R> { @@ -261,6 +277,8 @@ impl<'data, R: ReadRef<'data>> File<'data, R> { FileKind::Xcoff32 => File::Xcoff32(xcoff::XcoffFile32::parse(data)?), #[cfg(feature = "xcoff")] FileKind::Xcoff64 => File::Xcoff64(xcoff::XcoffFile64::parse(data)?), + #[cfg(feature = "omf")] + FileKind::Omf => File::Omf(omf::OmfFile::parse(data)?), #[allow(unreachable_patterns)] _ => return Err(Error("Unsupported file format")), }) @@ -297,6 +315,8 @@ impl<'data, R: ReadRef<'data>> File<'data, R> { File::Wasm(_) => BinaryFormat::Wasm, #[cfg(feature = "xcoff")] File::Xcoff32(_) | File::Xcoff64(_) => BinaryFormat::Xcoff, + #[cfg(feature = "omf")] + File::Omf(_) => BinaryFormat::Omf, } } } @@ -557,6 +577,8 @@ enum SegmentIteratorInternal<'data, 'file, R: ReadRef<'data>> { Xcoff32(xcoff::XcoffSegmentIterator32<'data, 'file, R>), #[cfg(feature = "xcoff")] Xcoff64(xcoff::XcoffSegmentIterator64<'data, 'file, R>), + #[cfg(feature = "omf")] + Omf(omf::OmfSegmentIterator<'data, 'file, R>), } impl<'data, 'file, R: ReadRef<'data>> Iterator for SegmentIterator<'data, 'file, R> { @@ -599,6 +621,8 @@ enum SegmentInternal<'data, 'file, R: ReadRef<'data>> { Xcoff32(xcoff::XcoffSegment32<'data, 'file, R>), #[cfg(feature = "xcoff")] Xcoff64(xcoff::XcoffSegment64<'data, 'file, R>), + #[cfg(feature = "omf")] + Omf(omf::OmfSegmentRef<'data, 'file, R>), } impl<'data, 'file, R: ReadRef<'data>> fmt::Debug for Segment<'data, 'file, R> { @@ -691,6 +715,8 @@ enum SectionIteratorInternal<'data, 'file, R: ReadRef<'data>> { Xcoff32(xcoff::XcoffSectionIterator32<'data, 'file, R>), #[cfg(feature = "xcoff")] Xcoff64(xcoff::XcoffSectionIterator64<'data, 'file, R>), + #[cfg(feature = "omf")] + Omf(omf::OmfSectionIterator<'data, 'file, R>), } impl<'data, 'file, R: ReadRef<'data>> Iterator for SectionIterator<'data, 'file, R> { @@ -732,6 +758,8 @@ enum SectionInternal<'data, 'file, R: ReadRef<'data>> { Xcoff32(xcoff::XcoffSection32<'data, 'file, R>), #[cfg(feature = "xcoff")] Xcoff64(xcoff::XcoffSection64<'data, 'file, R>), + #[cfg(feature = "omf")] + Omf(omf::OmfSection<'data, 'file, R>), } impl<'data, 'file, R: ReadRef<'data>> fmt::Debug for Section<'data, 'file, R> { @@ -798,6 +826,10 @@ impl<'data, 'file, R: ReadRef<'data>> ObjectSection<'data> for Section<'data, 'f with_inner!(self.inner, SectionInternal, |x| x.compressed_data()) } + fn uncompressed_data(&self) -> Result> { + with_inner!(self.inner, SectionInternal, |x| x.uncompressed_data()) + } + fn name_bytes(&self) -> Result<&'data [u8]> { with_inner!(self.inner, SectionInternal, |x| x.name_bytes()) } @@ -868,6 +900,8 @@ enum ComdatIteratorInternal<'data, 'file, R: ReadRef<'data>> { Xcoff32(xcoff::XcoffComdatIterator32<'data, 'file, R>), #[cfg(feature = "xcoff")] Xcoff64(xcoff::XcoffComdatIterator64<'data, 'file, R>), + #[cfg(feature = "omf")] + Omf(omf::OmfComdatIterator<'data, 'file, R>), } impl<'data, 'file, R: ReadRef<'data>> Iterator for ComdatIterator<'data, 'file, R> { @@ -909,6 +943,8 @@ enum ComdatInternal<'data, 'file, R: ReadRef<'data>> { Xcoff32(xcoff::XcoffComdat32<'data, 'file, R>), #[cfg(feature = "xcoff")] Xcoff64(xcoff::XcoffComdat64<'data, 'file, R>), + #[cfg(feature = "omf")] + Omf(omf::OmfComdat<'data, 'file, R>), } impl<'data, 'file, R: ReadRef<'data>> fmt::Debug for Comdat<'data, 'file, R> { @@ -984,6 +1020,8 @@ enum ComdatSectionIteratorInternal<'data, 'file, R: ReadRef<'data>> { Xcoff32(xcoff::XcoffComdatSectionIterator32<'data, 'file, R>), #[cfg(feature = "xcoff")] Xcoff64(xcoff::XcoffComdatSectionIterator64<'data, 'file, R>), + #[cfg(feature = "omf")] + Omf(omf::OmfComdatSectionIterator<'data, 'file, R>), } impl<'data, 'file, R: ReadRef<'data>> Iterator for ComdatSectionIterator<'data, 'file, R> { @@ -1052,6 +1090,8 @@ where Xcoff32((xcoff::XcoffSymbolTable32<'data, 'file, R>, PhantomData)), #[cfg(feature = "xcoff")] Xcoff64((xcoff::XcoffSymbolTable64<'data, 'file, R>, PhantomData)), + #[cfg(feature = "omf")] + Omf((omf::OmfSymbolTable<'data, 'file, R>, PhantomData)), } impl<'data, 'file, R: ReadRef<'data>> read::private::Sealed for SymbolTable<'data, 'file, R> {} @@ -1146,6 +1186,8 @@ where PhantomData, ), ), + #[cfg(feature = "omf")] + Omf((omf::OmfSymbolIterator<'data, 'file>, PhantomData)), } impl<'data, 'file, R: ReadRef<'data>> Iterator for SymbolIterator<'data, 'file, R> { @@ -1215,6 +1257,8 @@ where Xcoff32((xcoff::XcoffSymbol32<'data, 'file, R>, PhantomData)), #[cfg(feature = "xcoff")] Xcoff64((xcoff::XcoffSymbol64<'data, 'file, R>, PhantomData)), + #[cfg(feature = "omf")] + Omf((omf::OmfSymbol<'data>, PhantomData)), } impl<'data, 'file, R: ReadRef<'data>> fmt::Debug for Symbol<'data, 'file, R> { @@ -1363,6 +1407,8 @@ enum SectionRelocationIteratorInternal<'data, 'file, R: ReadRef<'data>> { Xcoff32(xcoff::XcoffRelocationIterator32<'data, 'file, R>), #[cfg(feature = "xcoff")] Xcoff64(xcoff::XcoffRelocationIterator64<'data, 'file, R>), + #[cfg(feature = "omf")] + Omf(omf::OmfRelocationIterator<'data, 'file, R>), } impl<'data, 'file, R: ReadRef<'data>> Iterator for SectionRelocationIterator<'data, 'file, R> { diff --git a/src/read/mod.rs b/src/read/mod.rs index 11d2e732..f302aba6 100644 --- a/src/read/mod.rs +++ b/src/read/mod.rs @@ -104,6 +104,9 @@ pub mod wasm; #[cfg(feature = "xcoff")] pub mod xcoff; +#[cfg(feature = "omf")] +pub mod omf; + mod traits; pub use traits::*; @@ -278,6 +281,11 @@ pub enum FileKind { /// See [`xcoff::XcoffFile64`]. #[cfg(feature = "xcoff")] Xcoff64, + /// An OMF object file. + /// + /// See [`omf::OmfFile`]. + #[cfg(feature = "omf")] + Omf, } impl FileKind { @@ -360,6 +368,15 @@ impl FileKind { [0x01, 0xdf, ..] => FileKind::Xcoff32, #[cfg(feature = "xcoff")] [0x01, 0xf7, ..] => FileKind::Xcoff64, + #[cfg(feature = "omf")] + [0x80, ..] | [0x82, ..] => { + // Check if it's a valid OMF record type + if crate::omf::is_omf_record_type(magic[0]) { + FileKind::Omf + } else { + return Err(Error("Unknown file magic")); + } + } _ => return Err(Error("Unknown file magic")), }; Ok(kind) diff --git a/src/read/omf/file.rs b/src/read/omf/file.rs new file mode 100644 index 00000000..806404e0 --- /dev/null +++ b/src/read/omf/file.rs @@ -0,0 +1,444 @@ +//! OMF file implementation for the unified read API. + +use crate::read::{ + self, Architecture, ByteString, ComdatKind, Error, Export, FileFlags, Import, + NoDynamicRelocationIterator, Object, ObjectComdat, ObjectKind, ObjectSection, ObjectSegment, + ReadRef, Result, SectionIndex, SegmentFlags, SymbolIndex, +}; +use crate::SubArchitecture; + +use super::{OmfFile, OmfSection, OmfSymbol, OmfSymbolIterator, OmfSymbolTable}; + +impl<'data, R: ReadRef<'data>> Object<'data> for OmfFile<'data, R> { + type Segment<'file> + = OmfSegmentRef<'data, 'file, R> + where + Self: 'file, + 'data: 'file; + type SegmentIterator<'file> + = OmfSegmentIterator<'data, 'file, R> + where + Self: 'file, + 'data: 'file; + type Section<'file> + = OmfSection<'data, 'file, R> + where + Self: 'file, + 'data: 'file; + type SectionIterator<'file> + = OmfSectionIterator<'data, 'file, R> + where + Self: 'file, + 'data: 'file; + type Comdat<'file> + = OmfComdat<'data, 'file, R> + where + Self: 'file, + 'data: 'file; + type ComdatIterator<'file> + = OmfComdatIterator<'data, 'file, R> + where + Self: 'file, + 'data: 'file; + type Symbol<'file> + = OmfSymbol<'data> + where + Self: 'file, + 'data: 'file; + type SymbolIterator<'file> + = OmfSymbolIterator<'data, 'file> + where + Self: 'file, + 'data: 'file; + type SymbolTable<'file> + = OmfSymbolTable<'data, 'file, R> + where + Self: 'file, + 'data: 'file; + type DynamicRelocationIterator<'file> + = NoDynamicRelocationIterator + where + Self: 'file, + 'data: 'file; + + fn architecture(&self) -> Architecture { + Architecture::I386 + } + + fn sub_architecture(&self) -> Option { + None + } + + fn is_little_endian(&self) -> bool { + true + } + + fn is_64(&self) -> bool { + false + } + + fn kind(&self) -> ObjectKind { + ObjectKind::Relocatable + } + + fn segments(&self) -> Self::SegmentIterator<'_> { + OmfSegmentIterator { + file: self, + index: 0, + } + } + + fn section_by_name_bytes<'file>( + &'file self, + section_name: &[u8], + ) -> Option> { + self.sections() + .find(|section| section.name_bytes() == Ok(section_name)) + } + + fn section_by_index(&self, index: SectionIndex) -> Result> { + let idx = index + .0 + .checked_sub(1) + .ok_or(Error("Invalid section index"))?; + if idx < self.segments.len() { + Ok(OmfSection { + file: self, + index: idx, + }) + } else { + Err(Error("Section index out of bounds")) + } + } + + fn sections(&self) -> Self::SectionIterator<'_> { + OmfSectionIterator { + file: self, + index: 0, + } + } + + fn comdats(&self) -> Self::ComdatIterator<'_> { + OmfComdatIterator { + file: self, + index: 0, + } + } + + fn symbol_by_index(&self, index: SymbolIndex) -> Result> { + let idx = index.0; + let total_publics = self.publics.len(); + let total_externals = self.externals.len(); + let total_before_communals = total_publics + total_externals; + + if idx < total_publics { + Ok(self.publics[idx].clone()) + } else if idx < total_before_communals { + Ok(self.externals[idx - total_publics].clone()) + } else if idx < total_before_communals + self.communals.len() { + Ok(self.communals[idx - total_before_communals].clone()) + } else { + Err(Error("Symbol index out of bounds")) + } + } + + fn symbols(&self) -> Self::SymbolIterator<'_> { + OmfSymbolIterator { + publics: &self.publics, + externals: &self.externals, + communals: &self.communals, + index: 0, + } + } + + fn symbol_table(&self) -> Option> { + Some(OmfSymbolTable { file: self }) + } + + fn dynamic_symbols(&self) -> Self::SymbolIterator<'_> { + OmfSymbolIterator { + publics: &[], + externals: &[], + communals: &[], + index: 0, + } + } + + fn dynamic_symbol_table(&self) -> Option> { + None + } + + fn dynamic_relocations(&self) -> Option> { + None + } + + fn imports(&self) -> Result>> { + // External symbols are imports in OMF + Ok(self + .externals + .iter() + .map(|ext| Import { + library: ByteString(b""), + name: ByteString(ext.name), + }) + .collect()) + } + + fn exports(&self) -> Result>> { + // Public symbols are exports in OMF + Ok(self + .publics + .iter() + .map(|pub_sym| Export { + name: ByteString(pub_sym.name), + address: pub_sym.offset as u64, + }) + .collect()) + } + + fn has_debug_symbols(&self) -> bool { + false + } + + fn mach_uuid(&self) -> Result> { + Ok(None) + } + + fn build_id(&self) -> Result> { + Ok(None) + } + + fn gnu_debuglink(&self) -> Result> { + Ok(None) + } + + fn gnu_debugaltlink(&self) -> Result> { + Ok(None) + } + + fn pdb_info(&self) -> Result>> { + Ok(None) + } + + fn relative_address_base(&self) -> u64 { + 0 + } + + fn entry(&self) -> u64 { + 0 + } + + fn flags(&self) -> FileFlags { + FileFlags::None + } +} + +/// An OMF segment reference. +#[derive(Debug)] +pub struct OmfSegmentRef<'data, 'file, R: ReadRef<'data>> { + file: &'file OmfFile<'data, R>, + index: usize, +} + +impl<'data, 'file, R: ReadRef<'data>> read::private::Sealed for OmfSegmentRef<'data, 'file, R> {} + +impl<'data, 'file, R: ReadRef<'data>> ObjectSegment<'data> for OmfSegmentRef<'data, 'file, R> { + fn address(&self) -> u64 { + 0 + } + + fn size(&self) -> u64 { + self.file.segments[self.index].length as u64 + } + + fn align(&self) -> u64 { + match self.file.segments[self.index].alignment { + crate::omf::SegmentAlignment::Byte => 1, + crate::omf::SegmentAlignment::Word => 2, + crate::omf::SegmentAlignment::Paragraph => 16, + crate::omf::SegmentAlignment::Page => 256, + crate::omf::SegmentAlignment::DWord => 4, + crate::omf::SegmentAlignment::Page4K => 4096, + _ => 1, + } + } + + fn file_range(&self) -> (u64, u64) { + (0, 0) + } + + fn data(&self) -> Result<&'data [u8]> { + // OMF segments don't have direct file mapping + Ok(&[]) + } + + fn data_range(&self, _address: u64, _size: u64) -> Result> { + Ok(None) + } + + fn name_bytes(&self) -> Result> { + Ok(self + .file + .get_name(self.file.segments[self.index].name_index)) + } + + fn name(&self) -> Result> { + let index = self.file.segments[self.index].name_index; + let name_opt = self.file.get_name(index); + match name_opt { + Some(bytes) => Ok(core::str::from_utf8(bytes).ok()), + None => Ok(None), + } + } + + fn flags(&self) -> SegmentFlags { + SegmentFlags::None + } +} + +/// An iterator over OMF segments. +#[derive(Debug)] +pub struct OmfSegmentIterator<'data, 'file, R: ReadRef<'data>> { + file: &'file OmfFile<'data, R>, + index: usize, +} + +impl<'data, 'file, R: ReadRef<'data>> Iterator for OmfSegmentIterator<'data, 'file, R> { + type Item = OmfSegmentRef<'data, 'file, R>; + + fn next(&mut self) -> Option { + if self.index < self.file.segments.len() { + let segment = OmfSegmentRef { + file: self.file, + index: self.index, + }; + self.index += 1; + Some(segment) + } else { + None + } + } +} + +/// An iterator over OMF sections. +#[derive(Debug)] +pub struct OmfSectionIterator<'data, 'file, R: ReadRef<'data>> { + file: &'file OmfFile<'data, R>, + index: usize, +} + +impl<'data, 'file, R: ReadRef<'data>> Iterator for OmfSectionIterator<'data, 'file, R> { + type Item = OmfSection<'data, 'file, R>; + + fn next(&mut self) -> Option { + if self.index < self.file.segments.len() { + let section = OmfSection { + file: self.file, + index: self.index, + }; + self.index += 1; + Some(section) + } else { + None + } + } +} + +/// A COMDAT section in an OMF file. +#[derive(Debug)] +pub struct OmfComdat<'data, 'file, R: ReadRef<'data>> { + file: &'file OmfFile<'data, R>, + index: usize, + _phantom: core::marker::PhantomData<&'data ()>, +} + +impl<'data, 'file, R: ReadRef<'data>> read::private::Sealed for OmfComdat<'data, 'file, R> {} + +impl<'data, 'file, R: ReadRef<'data>> ObjectComdat<'data> for OmfComdat<'data, 'file, R> { + type SectionIterator = OmfComdatSectionIterator<'data, 'file, R>; + + fn kind(&self) -> ComdatKind { + let comdat = &self.file.comdats[self.index]; + match comdat.selection { + super::OmfComdatSelection::Explicit => ComdatKind::NoDuplicates, + super::OmfComdatSelection::UseAny => ComdatKind::Any, + super::OmfComdatSelection::SameSize => ComdatKind::SameSize, + super::OmfComdatSelection::ExactMatch => ComdatKind::ExactMatch, + } + } + + fn symbol(&self) -> SymbolIndex { + // COMDAT symbols don't have a direct symbol index in OMF + // Return an invalid index + SymbolIndex(usize::MAX) + } + + fn name_bytes(&self) -> Result<&'data [u8]> { + let comdat = &self.file.comdats[self.index]; + Ok(comdat.name) + } + + fn name(&self) -> Result<&'data str> { + let comdat = &self.file.comdats[self.index]; + core::str::from_utf8(comdat.name).map_err(|_| Error("Invalid UTF-8 in COMDAT name")) + } + + fn sections(&self) -> Self::SectionIterator { + let comdat = &self.file.comdats[self.index]; + OmfComdatSectionIterator { + segment_index: if comdat.segment_index > 0 { + Some(comdat.segment_index as usize - 1) + } else { + None + }, + returned: false, + _phantom: core::marker::PhantomData, + } + } +} + +/// An iterator over COMDAT sections. +#[derive(Debug)] +pub struct OmfComdatIterator<'data, 'file, R: ReadRef<'data>> { + file: &'file OmfFile<'data, R>, + index: usize, +} + +impl<'data, 'file, R: ReadRef<'data>> Iterator for OmfComdatIterator<'data, 'file, R> { + type Item = OmfComdat<'data, 'file, R>; + + fn next(&mut self) -> Option { + if self.index < self.file.comdats.len() { + let comdat = OmfComdat { + file: self.file, + index: self.index, + _phantom: core::marker::PhantomData, + }; + self.index += 1; + Some(comdat) + } else { + None + } + } +} + +/// An iterator over sections in a COMDAT. +#[derive(Debug)] +pub struct OmfComdatSectionIterator<'data, 'file, R: ReadRef<'data>> { + segment_index: Option, + returned: bool, + _phantom: core::marker::PhantomData<(&'data (), &'file (), R)>, +} + +impl<'data, 'file, R: ReadRef<'data>> Iterator for OmfComdatSectionIterator<'data, 'file, R> { + type Item = SectionIndex; + + fn next(&mut self) -> Option { + if !self.returned { + self.returned = true; + self.segment_index.map(|idx| SectionIndex(idx + 1)) + } else { + None + } + } +} diff --git a/src/read/omf/mod.rs b/src/read/omf/mod.rs new file mode 100644 index 00000000..959d033b --- /dev/null +++ b/src/read/omf/mod.rs @@ -0,0 +1,1438 @@ +//! OMF file reading support. + +use alloc::str; +use alloc::vec::Vec; + +use crate::omf; +use crate::read::{self, Error, ReadRef, Result}; + +mod file; +pub use file::*; + +mod section; +pub use section::*; + +mod symbol; +pub use symbol::*; + +mod relocation; +pub use relocation::*; + +/// An OMF object file. +/// +/// This handles both 16-bit and 32-bit OMF variants. +#[derive(Debug)] +pub struct OmfFile<'data, R: ReadRef<'data> = &'data [u8]> { + data: R, + /// The module name from THEADR/LHEADR record + module_name: Option<&'data str>, + /// Segment definitions + segments: Vec>, + /// Public symbols + publics: Vec>, + /// External symbols + externals: Vec>, + /// Communal symbols from COMDEF + communals: Vec>, + /// Weak externals from WKEXT comment records + weak_externals: Vec>, + /// COMDAT sections + comdats: Vec>, + /// Name table (LNAMES/LLNAMES) + names: Vec<&'data [u8]>, + /// Group definitions + groups: Vec, +} + +/// Data chunk for a segment +#[derive(Debug, Clone)] +pub enum OmfDataChunk<'data> { + /// Direct data from LEDATA record + Direct(&'data [u8]), + /// Compressed/iterated data from LIDATA record (needs expansion) + Iterated(&'data [u8]), +} + +/// An OMF segment definition +#[derive(Debug, Clone)] +pub struct OmfSegment<'data> { + /// Segment name index (into names table) + pub name_index: u16, + /// Class name index (into names table) + pub class_index: u16, + /// Overlay name index (into names table) + pub overlay_index: u16, + /// Segment alignment + pub alignment: omf::SegmentAlignment, + /// Segment combination + pub combination: omf::SegmentCombination, + /// Whether this is a 32-bit segment + pub use32: bool, + /// Segment length + pub length: u32, + /// Segment data chunks (offset, data) + /// Multiple LEDATA/LIDATA records can contribute to a single segment + pub data_chunks: Vec<(u32, OmfDataChunk<'data>)>, + /// Relocations for this segment + pub relocations: Vec, +} + +/// An OMF symbol (public or external) +#[derive(Debug, Clone)] +pub struct OmfSymbol<'data> { + /// Symbol table index + pub symbol_index: usize, + /// Symbol name + pub name: &'data [u8], + /// Group index (0 if none) + pub group_index: u16, + /// Segment index (0 if external) + pub segment_index: u16, + /// Offset within segment + pub offset: u32, + /// Type index (usually 0) + pub type_index: u16, + /// Pre-computed symbol kind + pub kind: read::SymbolKind, +} + +/// An OMF group definition +#[derive(Debug, Clone)] +pub struct OmfGroup { + /// Group name index (into names table) + pub name_index: u16, + /// Segment indices in this group + pub segments: Vec, +} + +/// An OMF relocation/fixup +#[derive(Debug, Clone)] +pub struct OmfRelocation { + /// Offset in segment where fixup is applied + pub offset: u32, + /// Location type (what to patch) + pub location: omf::FixupLocation, + /// Frame method + pub frame_method: omf::FrameMethod, + /// Target method + pub target_method: omf::TargetMethod, + /// Frame index (meaning depends on frame_method) + pub frame_index: u16, + /// Target index (meaning depends on target_method) + pub target_index: u16, + /// Target displacement + pub target_displacement: u32, + /// M-bit: true for segment-relative, false for PC-relative + pub is_segment_relative: bool, +} + +/// A COMDAT (communal data) section +#[derive(Debug, Clone)] +pub struct OmfComdatData<'data> { + /// Symbol name + pub name: &'data [u8], + /// Segment index where this COMDAT belongs + pub segment_index: u16, + /// Selection/allocation method + pub selection: OmfComdatSelection, + /// Alignment + pub alignment: omf::SegmentAlignment, + /// Data + pub data: &'data [u8], +} + +/// COMDAT selection methods +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum OmfComdatSelection { + /// Explicit: may not be combined, produce error if multiple definitions + Explicit = 0, + /// Use any: pick any instance + UseAny = 1, + /// Same size: all instances must be same size + SameSize = 2, + /// Exact match: all instances must have identical content + ExactMatch = 3, +} + +/// A weak extern definition +#[derive(Debug, Clone)] +pub struct OmfWeakExtern<'data> { + /// Weak symbol index (external symbol) + pub weak_symbol_index: u16, + /// Default resolution symbol index + pub default_symbol_index: u16, + /// Weak symbol name + pub weak_name: &'data [u8], + /// Default symbol name + pub default_name: &'data [u8], +} + +/// Thread definition for FIXUPP parsing +#[derive(Debug, Clone, Copy)] +struct ThreadDef { + /// 3-bit method (frame or target method) + method: u8, + /// Index value (meaning depends on method) + index: u16, +} + +impl<'data> OmfSegment<'data> { + /// Get the raw data of the segment if it's a single contiguous chunk + pub fn get_single_chunk(&self) -> Option<&'data [u8]> { + if self.data_chunks.len() == 1 { + let (offset, chunk) = &self.data_chunks[0]; + if *offset == 0 { + match chunk { + OmfDataChunk::Direct(data) if data.len() == self.length as usize => { + return Some(data); + } + _ => {} + } + } + } + None + } + + /// Check if any data chunk needs expansion (LIDATA) + pub fn has_iterated_data(&self) -> bool { + self.data_chunks + .iter() + .any(|(_, chunk)| matches!(chunk, OmfDataChunk::Iterated(_))) + } +} + +impl<'data, R: ReadRef<'data>> read::private::Sealed for OmfFile<'data, R> {} + +impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { + /// Parse an OMF file from raw data + pub fn parse(data: R) -> Result { + let mut file = OmfFile { + data, + module_name: None, + segments: Vec::new(), + publics: Vec::new(), + externals: Vec::new(), + communals: Vec::new(), + weak_externals: Vec::new(), + comdats: Vec::new(), + names: Vec::new(), + groups: Vec::new(), + }; + + file.parse_records()?; + file.assign_symbol_indices(); + Ok(file) + } + + fn assign_symbol_indices(&mut self) { + let mut index = 0; + + // First, compute kinds for public symbols based on their segments + let public_kinds = self + .publics + .iter() + .map(|sym| { + if sym.segment_index > 0 && (sym.segment_index as usize) <= self.segments.len() { + let segment_idx = (sym.segment_index - 1) as usize; + let section_kind = self.segment_section_kind(segment_idx); + Self::symbol_kind_from_section_kind(section_kind) + } else { + read::SymbolKind::Unknown + } + }) + .collect::>(); + + // Assign indices to public symbols and their pre-computed kinds + for (sym, kind) in self.publics.iter_mut().zip(public_kinds) { + sym.symbol_index = index; + sym.kind = kind; + index += 1; + } + + // Assign indices to external symbols + for sym in self.externals.iter_mut() { + sym.symbol_index = index; + sym.kind = read::SymbolKind::Unknown; + index += 1; + } + + // Assign indices to communal symbols + for sym in self.communals.iter_mut() { + sym.symbol_index = index; + sym.kind = read::SymbolKind::Data; + index += 1; + } + } + + fn symbol_kind_from_section_kind(section_kind: read::SectionKind) -> read::SymbolKind { + match section_kind { + read::SectionKind::Text => read::SymbolKind::Text, + read::SectionKind::Data | read::SectionKind::ReadOnlyData => read::SymbolKind::Data, + read::SectionKind::UninitializedData => read::SymbolKind::Data, + _ => read::SymbolKind::Unknown, + } + } + + /// Get the section kind for a segment (reusing logic from OmfSection) + pub fn segment_section_kind(&self, segment_index: usize) -> read::SectionKind { + if segment_index >= self.segments.len() { + return read::SectionKind::Unknown; + } + + let segment = &self.segments[segment_index]; + + // Check segment name first for special cases + if let Some(seg_name) = self.get_name(segment.name_index) { + // Segments named CONST are always read-only regardless of class + match seg_name { + b"CONST" | b"_CONST" | b"CONST2" | b"RDATA" | b"_RDATA" => { + return read::SectionKind::ReadOnlyData; + } + _ => {} + } + + // Check for debug sections by name + if seg_name.starts_with(b"$$") { + // Watcom-style debug sections + return read::SectionKind::Debug; + } + if seg_name == b".drectve" || seg_name == b".DRECTVE" { + return read::SectionKind::Linker; + } + + // Check other common names + let name_upper = seg_name.to_ascii_uppercase(); + if name_upper == b"_TEXT" || name_upper == b"CODE" || name_upper == b".TEXT" { + return read::SectionKind::Text; + } else if name_upper == b"_DATA" || name_upper == b"DATA" || name_upper == b".DATA" { + return read::SectionKind::Data; + } else if name_upper == b"_BSS" || name_upper == b"BSS" || name_upper == b".BSS" + || name_upper == b"STACK" { + return read::SectionKind::UninitializedData; + } + } + + // Determine kind from class name + if let Some(class_name) = self.get_name(segment.class_index) { + // Check for exact matches first (most common case) + match class_name { + b"CODE" | b"_TEXT" | b"TEXT" => return read::SectionKind::Text, + b"CONST" | b"_CONST" | b"CONST2" | b"RDATA" | b"_RDATA" => { + return read::SectionKind::ReadOnlyData; + } + b"BSS" | b"_BSS" => return read::SectionKind::UninitializedData, + b"STACK" | b"_STACK" => return read::SectionKind::UninitializedData, + b"DEBUG" | b"_DEBUG" | b"DEBSYM" | b"DEBTYP" => return read::SectionKind::Debug, + b"DATA" | b"_DATA" => { + // DATA sections with no actual data are treated as uninitialized + if segment.data_chunks.is_empty() { + return read::SectionKind::UninitializedData; + } else { + return read::SectionKind::Data; + } + } + _ => {} + } + + // Check for case-insensitive substring matches for less common variations + let class_upper = class_name.to_ascii_uppercase(); + if class_upper.windows(4).any(|w| w == b"CODE") { + return read::SectionKind::Text; + } else if class_upper.windows(5).any(|w| w == b"CONST") { + return read::SectionKind::ReadOnlyData; + } else if class_upper.windows(3).any(|w| w == b"BSS") + || class_upper.windows(5).any(|w| w == b"STACK") + { + return read::SectionKind::UninitializedData; + } else if class_upper.windows(5).any(|w| w == b"DEBUG") { + return read::SectionKind::Debug; + } else if class_upper.windows(4).any(|w| w == b"DATA") { + // DATA sections with no actual data are treated as uninitialized + if segment.data_chunks.is_empty() { + return read::SectionKind::UninitializedData; + } else { + return read::SectionKind::Data; + } + } + } + + // Final fallback based on whether segment has data + if segment.data_chunks.is_empty() { + read::SectionKind::UninitializedData + } else { + read::SectionKind::Unknown + } + } + + fn parse_records(&mut self) -> Result<()> { + let len = self + .data + .len() + .map_err(|_| Error("Failed to get data length"))?; + let data = self + .data + .read_bytes_at(0, len) + .map_err(|_| Error("Failed to read OMF data"))?; + let mut offset = 0; + + // First record must be THEADR or LHEADR + if data.is_empty() { + return Err(Error("Empty OMF file")); + } + + let first_type = data[0]; + if first_type != omf::record_type::THEADR && first_type != omf::record_type::LHEADR { + return Err(Error( + "Invalid OMF file: first record must be THEADR or LHEADR", + )); + } + + let mut current_segment: Option = None; + let mut current_data_offset: Option = None; + + // Thread storage for FIXUPP parsing + let mut frame_threads: [Option; 4] = [None; 4]; + let mut target_threads: [Option; 4] = [None; 4]; + + while offset < data.len() { + // Read record header + if offset + 3 > data.len() { + break; + } + + let record_type = data[offset]; + let length = u16::from_le_bytes([data[offset + 1], data[offset + 2]]) as usize; + + // Length includes the checksum byte at the end + if offset + 3 + length > data.len() { + return Err(Error("Truncated OMF record")); + } + + // Record data excludes the checksum + let record_data = &data[offset + 3..offset + 3 + length - 1]; + let checksum = data[offset + 3 + length - 1]; + + // Verify checksum + if !Self::verify_checksum(record_type, length, record_data, checksum) { + return Err(Error("Invalid OMF record checksum")); + } + + // Process record based on type + match record_type { + omf::record_type::THEADR | omf::record_type::LHEADR => { + self.parse_header(record_data)?; + } + omf::record_type::LNAMES | omf::record_type::LLNAMES => { + self.parse_names(record_data)?; + } + omf::record_type::SEGDEF | omf::record_type::SEGDEF32 => { + self.parse_segdef(record_data, record_type == omf::record_type::SEGDEF32)?; + } + omf::record_type::GRPDEF => { + self.parse_grpdef(record_data)?; + } + omf::record_type::PUBDEF | omf::record_type::PUBDEF32 => { + self.parse_pubdef(record_data, record_type == omf::record_type::PUBDEF32)?; + } + omf::record_type::EXTDEF => { + self.parse_extdef(record_data)?; + } + omf::record_type::COMDEF => { + self.parse_comdef(record_data)?; + } + omf::record_type::COMDAT | omf::record_type::COMDAT32 => { + self.parse_comdat(record_data, record_type == omf::record_type::COMDAT32)?; + } + omf::record_type::COMENT => { + self.parse_comment(record_data)?; + } + omf::record_type::LEDATA | omf::record_type::LEDATA32 => { + let (seg_idx, offset) = + self.parse_ledata(record_data, record_type == omf::record_type::LEDATA32)?; + current_segment = Some(seg_idx); + current_data_offset = Some(offset); + } + omf::record_type::LIDATA | omf::record_type::LIDATA32 => { + let (seg_idx, offset) = + self.parse_lidata(record_data, record_type == omf::record_type::LIDATA32)?; + current_segment = Some(seg_idx); + current_data_offset = Some(offset); + } + omf::record_type::FIXUPP | omf::record_type::FIXUPP32 => { + if let (Some(seg_idx), Some(data_offset)) = + (current_segment, current_data_offset) + { + self.parse_fixupp( + record_data, + record_type == omf::record_type::FIXUPP32, + seg_idx, + data_offset, + &mut frame_threads, + &mut target_threads, + )?; + } + } + omf::record_type::MODEND | omf::record_type::MODEND32 => { + // End of module + break; + } + _ => { + // Skip unknown record types + } + } + + offset += 3 + length; // header + data (which includes checksum) + } + + Ok(()) + } + + fn parse_header(&mut self, data: &'data [u8]) -> Result<()> { + if let Some((name, _)) = omf::read_counted_string(data) { + self.module_name = core::str::from_utf8(name).ok(); + } + Ok(()) + } + + fn parse_names(&mut self, data: &'data [u8]) -> Result<()> { + let mut offset = 0; + while offset < data.len() { + if let Some((name, size)) = omf::read_counted_string(&data[offset..]) { + self.names.push(name); + offset += size; + } else { + break; + } + } + Ok(()) + } + + fn parse_segdef(&mut self, data: &'data [u8], is_32bit: bool) -> Result<()> { + let mut offset = 0; + + // Parse ACBP byte + if offset >= data.len() { + return Err(Error("Truncated SEGDEF record")); + } + let acbp = data[offset]; + offset += 1; + + let alignment = match (acbp >> 5) & 0x07 { + 0 => omf::SegmentAlignment::Absolute, + 1 => omf::SegmentAlignment::Byte, + 2 => omf::SegmentAlignment::Word, + 3 => omf::SegmentAlignment::Paragraph, + 4 => omf::SegmentAlignment::Page, + 5 => omf::SegmentAlignment::DWord, + 6 => omf::SegmentAlignment::Page4K, + _ => return Err(Error("Invalid segment alignment")), + }; + + let combination = match (acbp >> 2) & 0x07 { + 0 => omf::SegmentCombination::Private, + 2 => omf::SegmentCombination::Public, + 5 => omf::SegmentCombination::Stack, + 6 => omf::SegmentCombination::Common, + _ => return Err(Error("Invalid segment combination")), + }; + + let use32 = (acbp & 0x01) != 0; + + // Skip frame number and offset for absolute segments + if alignment == omf::SegmentAlignment::Absolute { + offset += 3; // frame (2) + offset (1) + } + + // Parse segment length + let length = if is_32bit || use32 { + if offset + 4 > data.len() { + return Err(Error("Truncated SEGDEF record")); + } + let length = u32::from_le_bytes([ + data[offset], + data[offset + 1], + data[offset + 2], + data[offset + 3], + ]); + offset += 4; + length + } else { + if offset + 2 > data.len() { + return Err(Error("Truncated SEGDEF record")); + } + let length = u16::from_le_bytes([data[offset], data[offset + 1]]) as u32; + offset += 2; + length + }; + + // Parse segment name index + let (name_index, size) = + omf::read_index(&data[offset..]).ok_or(Error("Invalid segment name index"))?; + offset += size; + + // Parse class name index + let (class_index, size) = + omf::read_index(&data[offset..]).ok_or(Error("Invalid class name index"))?; + offset += size; + + // Parse overlay name index + let (overlay_index, _) = + omf::read_index(&data[offset..]).ok_or(Error("Invalid overlay name index"))?; + + self.segments.push(OmfSegment { + name_index, + class_index, + overlay_index, + alignment, + combination, + use32, + length, + data_chunks: Vec::new(), + relocations: Vec::new(), + }); + + Ok(()) + } + + fn parse_grpdef(&mut self, data: &'data [u8]) -> Result<()> { + let mut offset = 0; + + // Parse group name index + let (name_index, size) = omf::read_index(data).ok_or(Error("Invalid group name index"))?; + offset += size; + + let mut segments = Vec::new(); + + // Parse segment indices + while offset < data.len() { + if data[offset] == 0xFF { + // Segment index follows + offset += 1; + let (seg_index, size) = omf::read_index(&data[offset..]) + .ok_or(Error("Invalid segment index in group"))?; + offset += size; + segments.push(seg_index); + } else { + break; + } + } + + self.groups.push(OmfGroup { + name_index, + segments, + }); + + Ok(()) + } + + fn parse_pubdef(&mut self, data: &'data [u8], is_32bit: bool) -> Result<()> { + let mut offset = 0; + + // Parse group index + let (group_index, size) = omf::read_index(data).ok_or(Error("Invalid group index"))?; + offset += size; + + // Parse segment index + let (segment_index, size) = + omf::read_index(&data[offset..]).ok_or(Error("Invalid segment index"))?; + offset += size; + + // Skip frame number if segment index is 0 + if segment_index == 0 { + offset += 2; + } + + // Parse public definitions + while offset < data.len() { + // Parse name + let Some((name, size)) = omf::read_counted_string(&data[offset..]) else { + break; + }; + offset += size; + + // Parse offset + let pub_offset = if is_32bit { + if offset + 4 > data.len() { + break; + } + let off = u32::from_le_bytes([ + data[offset], + data[offset + 1], + data[offset + 2], + data[offset + 3], + ]); + offset += 4; + off + } else { + if offset + 2 > data.len() { + break; + } + let off = u16::from_le_bytes([data[offset], data[offset + 1]]) as u32; + offset += 2; + off + }; + + // Parse type index + let (type_index, size) = omf::read_index(&data[offset..]) + .ok_or(Error("Invalid type index in EXTDEF record"))?; + offset += size; + + self.publics.push(OmfSymbol { + symbol_index: 0, // Will be assigned later + name, + group_index, + segment_index, + offset: pub_offset, + type_index, + kind: read::SymbolKind::Unknown, // Will be computed later + }); + } + + Ok(()) + } + + fn parse_extdef(&mut self, data: &'data [u8]) -> Result<()> { + let mut offset = 0; + + while offset < data.len() { + // Parse name + let Some((name, size)) = omf::read_counted_string(&data[offset..]) else { + break; + }; + offset += size; + + // Parse type index + let (type_index, size) = omf::read_index(&data[offset..]) + .ok_or(Error("Invalid type index in EXTDEF record"))?; + offset += size; + + self.externals.push(OmfSymbol { + symbol_index: 0, // Will be assigned later + name, + group_index: 0, + segment_index: 0, + offset: 0, + type_index, + kind: read::SymbolKind::Unknown, + }); + } + + Ok(()) + } + + fn parse_comdef(&mut self, data: &'data [u8]) -> Result<()> { + let mut offset = 0; + + while offset < data.len() { + // Parse name + let Some((name, size)) = omf::read_counted_string(&data[offset..]) else { + break; + }; + offset += size; + + // Parse type index + let (type_index, size) = omf::read_index(&data[offset..]) + .ok_or(Error("Invalid type index in COMDEF record"))?; + offset += size; + + // Parse data type and communal length + if offset >= data.len() { + break; + } + let data_type = data[offset]; + offset += 1; + + let communal_length = match data_type { + 0x61 => { + // FAR data - number of elements followed by element size + let (num_elements, size1) = omf::read_encoded_value(&data[offset..]) + .ok_or(Error("Invalid number of elements in FAR COMDEF"))?; + offset += size1; + let (element_size, size2) = omf::read_encoded_value(&data[offset..]) + .ok_or(Error("Invalid element size in FAR COMDEF"))?; + offset += size2; + num_elements * element_size + } + 0x62 => { + // NEAR data - size in bytes + let (size_val, size_bytes) = omf::read_encoded_value(&data[offset..]) + .ok_or(Error("Invalid size in NEAR COMDEF"))?; + offset += size_bytes; + size_val + } + _ => { + // Unknown data type, skip + continue; + } + }; + + self.communals.push(OmfSymbol { + symbol_index: 0, // Will be assigned later + name, + group_index: 0, + segment_index: 0, + offset: communal_length, // Store size in offset field + type_index, + kind: read::SymbolKind::Data, + }); + } + + Ok(()) + } + + fn parse_comdat(&mut self, data: &'data [u8], is_32bit: bool) -> Result<()> { + let mut offset = 0; + + // Parse flags byte + if offset >= data.len() { + return Err(Error("Truncated COMDAT record")); + } + let _flags = data[offset]; + offset += 1; + + // Parse attributes byte + if offset >= data.len() { + return Err(Error("Truncated COMDAT record")); + } + let attributes = data[offset]; + offset += 1; + + // Extract selection criteria from high nibble of attributes + let selection = match (attributes >> 4) & 0x0F { + 0x00 => OmfComdatSelection::Explicit, // No match + 0x01 => OmfComdatSelection::UseAny, // Pick any + 0x02 => OmfComdatSelection::SameSize, // Same size + 0x03 => OmfComdatSelection::ExactMatch, // Exact match + _ => OmfComdatSelection::UseAny, + }; + + // Extract allocation type from low nibble of attributes + let allocation_type = attributes & 0x0F; + + // Parse align/segment index field + let (segment_index, size) = + omf::read_index(&data[offset..]).ok_or(Error("Invalid COMDAT segment index"))?; + offset += size; + + // Determine alignment - if segment index is 0-7, it's actually an alignment value + let alignment = if segment_index <= 7 { + match segment_index { + 0 => omf::SegmentAlignment::Absolute, // Use value from SEGDEF + 1 => omf::SegmentAlignment::Byte, + 2 => omf::SegmentAlignment::Word, + 3 => omf::SegmentAlignment::Paragraph, + 4 => omf::SegmentAlignment::Page, + 5 => omf::SegmentAlignment::DWord, + 6 => omf::SegmentAlignment::Page4K, + _ => omf::SegmentAlignment::Byte, + } + } else { + omf::SegmentAlignment::Byte // Default alignment + }; + + // Parse data offset + let _data_offset = if is_32bit { + if offset + 4 > data.len() { + return Err(Error("Truncated COMDAT record")); + } + let off = u32::from_le_bytes([ + data[offset], + data[offset + 1], + data[offset + 2], + data[offset + 3], + ]); + offset += 4; + off + } else { + if offset + 2 > data.len() { + return Err(Error("Truncated COMDAT record")); + } + let off = u16::from_le_bytes([data[offset], data[offset + 1]]) as u32; + offset += 2; + off + }; + + // Parse type index + let (_type_index, size) = + omf::read_index(&data[offset..]).ok_or(Error("Invalid type index in COMDAT record"))?; + offset += size; + + // Parse public base (only if allocation type is 0x00 - Explicit) + if allocation_type == 0x00 { + // Has public base (Base Group, Base Segment, Base Frame) + let (_group_index, size) = omf::read_index(&data[offset..]) + .ok_or(Error("Invalid group index in COMDAT record"))?; + offset += size; + let (_seg_idx, size) = omf::read_index(&data[offset..]) + .ok_or(Error("Invalid segment index in COMDAT record"))?; + offset += size; + if _seg_idx == 0 { + if offset + 2 <= data.len() { + offset += 2; // Skip frame number + } + } + } + + // Parse public name - this is an index into LNAMES + let (name_index, size) = + omf::read_index(&data[offset..]).ok_or(Error("Invalid name index in COMDAT record"))?; + offset += size; + + // Look up the name from the names table + let name = name_index + .checked_sub(1) + .and_then(|i| self.names.get(i as usize).copied()) + .unwrap_or(b""); + + // Remaining data is the COMDAT content + let comdat_data = &data[offset..]; + + self.comdats.push(OmfComdatData { + name, + segment_index, + selection, + alignment, + data: comdat_data, + }); + + Ok(()) + } + + fn parse_comment(&mut self, data: &'data [u8]) -> Result<()> { + if data.len() < 2 { + return Ok(()); // Ignore truncated comments + } + + let _comment_type = data[0]; // Usually 0x00 for non-purge, 0x40 for purge + let comment_class = data[1]; + + // Check for WKEXT (Weak Extern) comment class 0xA8 + if comment_class == 0xA8 && data.len() > 2 { + self.parse_wkext(&data[2..])?; + } + + Ok(()) + } + + fn parse_wkext(&mut self, data: &'data [u8]) -> Result<()> { + let mut offset = 0; + + while offset + 2 <= data.len() { + // Parse weak extern index (index into EXTDEF) + let (weak_index, size) = + omf::read_index(&data[offset..]).ok_or(Error("Invalid WKEXT weak index"))?; + offset += size; + + // Parse default resolution index + let (default_index, size) = + omf::read_index(&data[offset..]).ok_or(Error("Invalid WKEXT default index"))?; + offset += size; + + // Look up names if indices are valid + let weak_name = weak_index + .checked_sub(1) + .and_then(|idx| self.externals.get(idx as usize).map(|s| s.name)) + .unwrap_or(b""); + let default_name = default_index + .checked_sub(1) + .and_then(|idx| self.externals.get(idx as usize).map(|s| s.name)) + .unwrap_or(b""); + + self.weak_externals.push(OmfWeakExtern { + weak_symbol_index: weak_index, + default_symbol_index: default_index, + weak_name, + default_name, + }); + } + + Ok(()) + } + + fn parse_ledata(&mut self, data: &'data [u8], is_32bit: bool) -> Result<(usize, u32)> { + let mut offset = 0; + + // Parse segment index + let (segment_index, size) = + omf::read_index(data).ok_or(Error("Invalid segment index in LEDATA"))?; + offset += size; + + if segment_index == 0 || segment_index > self.segments.len() as u16 { + return Err(Error("Invalid segment index in LEDATA")); + } + + // Parse data offset + let data_offset = if is_32bit { + if offset + 4 > data.len() { + return Err(Error("Truncated LEDATA record")); + } + let off = u32::from_le_bytes([ + data[offset], + data[offset + 1], + data[offset + 2], + data[offset + 3], + ]); + offset += 4; + off + } else { + if offset + 2 > data.len() { + return Err(Error("Truncated LEDATA record")); + } + let off = u16::from_le_bytes([data[offset], data[offset + 1]]) as u32; + offset += 2; + off + }; + + // Store reference to data chunk + let seg_idx = (segment_index - 1) as usize; + let segment = &mut self.segments[seg_idx]; + + // Store the data chunk reference + if offset < data.len() { + segment + .data_chunks + .push((data_offset, OmfDataChunk::Direct(&data[offset..]))); + } + + Ok((seg_idx, data_offset)) + } + + fn parse_fixupp( + &mut self, + data: &'data [u8], + is_32bit: bool, + seg_idx: usize, + data_offset: u32, + frame_threads: &mut [Option; 4], + target_threads: &mut [Option; 4], + ) -> Result<()> { + let mut offset = 0; + + while offset < data.len() { + let b = data[offset]; + offset += 1; + + if (b & 0x80) == 0 { + // THREAD subrecord + let is_frame = (b & 0x40) != 0; // D-bit + let method = (b >> 2) & 0x07; // Method bits + let thread_num = (b & 0x03) as usize; // Thread number (0-3) + + let index = if method < 3 { + // Methods 0-2 have an index + let (idx, size) = omf::read_index(&data[offset..]) + .ok_or(Error("Invalid index in THREAD subrecord"))?; + offset += size; + idx + } else if method == 3 { + // Method 3 has a raw frame number + if offset + 2 > data.len() { + return Err(Error("Invalid frame number in THREAD subrecord")); + } + let frame_num = u16::from_le_bytes([data[offset], data[offset + 1]]); + offset += 2; + frame_num + } else { + 0 + }; + + // Store the thread definition + let thread_def = ThreadDef { method, index }; + if is_frame { + frame_threads[thread_num] = Some(thread_def); + } else { + target_threads[thread_num] = Some(thread_def); + } + } else { + // FIXUP subrecord + if offset + 1 > data.len() { + return Err(Error("Truncated FIXUP location")); + } + let locat = data[offset] as u32 | (((b as u32) & 0x03) << 8); + offset += 1; + + let location = match (b >> 2) & 0x0F { + 0 => omf::FixupLocation::LowByte, + 1 => omf::FixupLocation::Offset, + 2 => omf::FixupLocation::Base, + 3 => omf::FixupLocation::Pointer, + 4 => omf::FixupLocation::HighByte, + 5 => omf::FixupLocation::LoaderOffset, + 9 => omf::FixupLocation::Offset32, + 11 => omf::FixupLocation::Pointer48, + 13 => omf::FixupLocation::LoaderOffset32, + _ => continue, // Skip unknown fixup types + }; + + // Parse fix data byte + if offset >= data.len() { + return Err(Error("Truncated FIXUP fix data")); + } + let fix_data = data[offset]; + offset += 1; + + // Check F-bit (bit 7 of fix_data) + let (frame_method, frame_index) = if (fix_data & 0x80) != 0 { + // F=1: Use frame thread + let thread_num = ((fix_data >> 4) & 0x03) as usize; + match frame_threads[thread_num] { + Some(thread) => { + let method = match thread.method { + 0 => omf::FrameMethod::SegmentIndex, + 1 => omf::FrameMethod::GroupIndex, + 2 => omf::FrameMethod::ExternalIndex, + 3 => omf::FrameMethod::FrameNumber, + 4 => omf::FrameMethod::Location, + 5 => omf::FrameMethod::Target, + _ => return Err(Error("Invalid frame method in thread")), + }; + (method, thread.index) + } + None => return Err(Error("Undefined frame thread in FIXUP")), + } + } else { + // F=0: Read frame datum + let method_bits = (fix_data >> 4) & 0x07; + let method = match method_bits { + 0 => omf::FrameMethod::SegmentIndex, + 1 => omf::FrameMethod::GroupIndex, + 2 => omf::FrameMethod::ExternalIndex, + 3 => omf::FrameMethod::FrameNumber, + 4 => omf::FrameMethod::Location, + 5 => omf::FrameMethod::Target, + _ => return Err(Error("Invalid frame method in FIXUP")), + }; + let index = match method { + omf::FrameMethod::SegmentIndex + | omf::FrameMethod::GroupIndex + | omf::FrameMethod::ExternalIndex => { + let (idx, size) = omf::read_index(&data[offset..]) + .ok_or(Error("Truncated FIXUP frame datum: missing index data"))?; + offset += size; + idx + } + omf::FrameMethod::FrameNumber => { + if offset + 2 > data.len() { + return Err(Error( + "Truncated FIXUP frame datum: missing frame number", + )); + } + let frame_num = u16::from_le_bytes([data[offset], data[offset + 1]]); + offset += 2; + frame_num + } + omf::FrameMethod::Location | omf::FrameMethod::Target => 0, + }; + (method, index) + }; + + // Check T-bit (bit 3 of fix_data) + let (target_method, target_index) = if (fix_data & 0x08) != 0 { + // T=1: Use target thread + let thread_num = (fix_data & 0x03) as usize; + match target_threads[thread_num] { + Some(thread) => { + // Only check the low 2 bits of method for target + let method = match thread.method & 0x03 { + 0 => omf::TargetMethod::SegmentIndex, + 1 => omf::TargetMethod::GroupIndex, + 2 => omf::TargetMethod::ExternalIndex, + 3 => omf::TargetMethod::FrameNumber, + _ => return Err(Error("Invalid target method in thread")), + }; + (method, thread.index) + } + None => return Err(Error("Undefined target thread in FIXUP")), + } + } else { + // T=0: Read target datum + // Only check the low 2 bits of method for target + let method = match fix_data & 0x03 { + 0 => omf::TargetMethod::SegmentIndex, + 1 => omf::TargetMethod::GroupIndex, + 2 => omf::TargetMethod::ExternalIndex, + 3 => omf::TargetMethod::FrameNumber, + _ => return Err(Error("Invalid frame method in FIXUP")), + }; + let index = match method { + omf::TargetMethod::SegmentIndex + | omf::TargetMethod::GroupIndex + | omf::TargetMethod::ExternalIndex => { + let (idx, size) = omf::read_index(&data[offset..]) + .ok_or(Error("Truncated FIXUP target datum: missing index data"))?; + offset += size; + idx + } + omf::TargetMethod::FrameNumber => { + if offset + 2 > data.len() { + return Err(Error( + "Truncated FIXUP target datum: missing frame number", + )); + } + let frame_num = u16::from_le_bytes([data[offset], data[offset + 1]]); + offset += 2; + frame_num + } + }; + (method, index) + }; + + // Parse target displacement if present (P=0) + let target_displacement = if fix_data & 0x04 == 0 { + if is_32bit { + if offset + 4 <= data.len() { + let disp = u32::from_le_bytes([ + data[offset], + data[offset + 1], + data[offset + 2], + data[offset + 3], + ]); + offset += 4; + disp + } else { + return Err(Error("Truncated FIXUP 32-bit displacement")); + } + } else { + if offset + 2 <= data.len() { + let disp = u16::from_le_bytes([data[offset], data[offset + 1]]) as u32; + offset += 2; + disp + } else { + return Err(Error("Truncated FIXUP 16-bit displacement")); + } + } + } else { + 0 + }; + + // Extract M-bit (bit 6 of fix_data) + let is_segment_relative = (fix_data & 0x40) != 0; + self.segments[seg_idx].relocations.push(OmfRelocation { + offset: data_offset + locat, + location, + frame_method, + target_method, + frame_index, + target_index, + target_displacement, + is_segment_relative, + }); + } + } + + Ok(()) + } + + fn parse_lidata(&mut self, data: &'data [u8], is_32bit: bool) -> Result<(usize, u32)> { + let mut offset = 0; + + // Read segment index + let (segment_index, size) = + omf::read_index(&data[offset..]).ok_or(Error("Invalid segment index in LIDATA"))?; + offset += size; + + if segment_index == 0 || segment_index > self.segments.len() as u16 { + return Err(Error("Invalid segment index in LIDATA")); + } + + // Read data offset + let data_offset = if is_32bit { + if offset + 4 > data.len() { + return Err(Error("Truncated LIDATA record")); + } + let off = u32::from_le_bytes([ + data[offset], + data[offset + 1], + data[offset + 2], + data[offset + 3], + ]); + offset += 4; + off + } else { + if offset + 2 > data.len() { + return Err(Error("Truncated LIDATA record")); + } + let off = u16::from_le_bytes([data[offset], data[offset + 1]]) as u32; + offset += 2; + off + }; + + // For LIDATA, we need to store the unexpanded data and expand on demand + let seg_idx = (segment_index - 1) as usize; + if offset < data.len() { + self.segments[seg_idx] + .data_chunks + .push((data_offset, OmfDataChunk::Iterated(&data[offset..]))); + } + + Ok((seg_idx, data_offset)) + } + + /// Expand a LIDATA block into its uncompressed form + pub fn expand_lidata_block(&self, data: &[u8]) -> Result> { + let mut offset = 0; + let mut result = Vec::new(); + + // Read repeat count + let (repeat_count, size) = omf::read_encoded_value(&data[offset..]) + .ok_or(Error("Invalid repeat count in LIDATA block"))?; + offset += size; + + if repeat_count == 0 { + return Ok(result); + } + + // Read block count + let (block_count, size) = omf::read_encoded_value(&data[offset..]) + .ok_or(Error("Invalid block count in LIDATA block"))?; + offset += size; + + if block_count == 0 { + // Leaf block: contains actual data + if offset >= data.len() { + return Ok(result); + } + let data_length = data[offset] as usize; + offset += 1; + + if offset + data_length > data.len() { + return Err(Error("Truncated LIDATA block")); + } + + let block_data = &data[offset..offset + data_length]; + + // Repeat the data block + for _ in 0..repeat_count { + result.extend_from_slice(block_data); + } + } else { + // Nested blocks: recurse for each block + for _ in 0..block_count { + let block_data = self.expand_lidata_block(&data[offset..])?; + let block_size = self.lidata_block_size(&data[offset..])?; + offset += block_size; + + // Repeat the expanded block + for _ in 0..repeat_count { + result.extend_from_slice(&block_data); + } + } + } + + Ok(result) + } + + fn lidata_block_size(&self, data: &[u8]) -> Result { + lidata_block_size_impl(data) + } + + /// Get the module name + pub fn module_name(&self) -> Option<&'data str> { + self.module_name + } + + /// Get the segments as a slice + pub fn segments_slice(&self) -> &[OmfSegment<'data>] { + &self.segments + } + + /// Get the public symbols + pub fn publics(&self) -> &[OmfSymbol<'data>] { + &self.publics + } + + /// Get the external symbols + pub fn externals(&self) -> &[OmfSymbol<'data>] { + &self.externals + } + + /// Get a name by index (1-based) + pub fn get_name(&self, index: u16) -> Option<&'data [u8]> { + if index > 0 && (index as usize) <= self.names.len() { + Some(self.names[(index - 1) as usize]) + } else { + None + } + } + + /// Get a symbol by index + pub fn symbol_by_index(&self, index: read::SymbolIndex) -> Result> { + let idx = index.0; + let total_publics = self.publics.len(); + let total_externals = self.externals.len(); + let total_before_communals = total_publics + total_externals; + let total = total_before_communals + self.communals.len(); + + if idx >= total { + return Err(Error("Invalid symbol index")); + } + + let symbol = if idx < total_publics { + self.publics[idx].clone() + } else if idx < total_before_communals { + self.externals[idx - total_publics].clone() + } else { + self.communals[idx - total_before_communals].clone() + }; + + Ok(symbol) + } + + /// Verify the checksum of an OMF record + /// + /// The checksum is calculated so that the sum of all bytes in the record, + /// including the checksum byte itself, equals 0 (modulo 256). + fn verify_checksum(record_type: u8, length: usize, body: &[u8], checksum: u8) -> bool { + // Some compilers write a 0 byte rather than computing the checksum, + // so we accept that as valid + if checksum == 0 { + return true; + } + + let mut sum = u32::from(record_type); + // Add length bytes (little-endian) + sum = sum.wrapping_add((length & 0xff) as u32); + sum = sum.wrapping_add((length >> 8) as u32); + // Add all body bytes + for &byte in body { + sum = sum.wrapping_add(u32::from(byte)); + } + // Add checksum byte + sum = sum.wrapping_add(u32::from(checksum)); + + // The sum should be 0 (modulo 256) + (sum & 0xff) == 0 + } +} + +/// Helper function to calculate LIDATA block size +fn lidata_block_size_impl(data: &[u8]) -> Result { + let mut offset = 0; + + // Read repeat count + let (_, size) = omf::read_encoded_value(&data[offset..]) + .ok_or(Error("Invalid repeat count in LIDATA block"))?; + offset += size; + + // Read block count + let (block_count, size) = omf::read_encoded_value(&data[offset..]) + .ok_or(Error("Invalid block count in LIDATA block"))?; + offset += size; + + if block_count == 0 { + // Leaf block + if offset >= data.len() { + return Ok(offset); + } + let data_length = data[offset] as usize; + offset += 1 + data_length; + } else { + // Nested blocks + for _ in 0..block_count { + let nested_size = lidata_block_size_impl(&data[offset..])?; + offset += nested_size; + } + } + + Ok(offset) +} diff --git a/src/read/omf/relocation.rs b/src/read/omf/relocation.rs new file mode 100644 index 00000000..58744029 --- /dev/null +++ b/src/read/omf/relocation.rs @@ -0,0 +1,86 @@ +use crate::{omf, read, Relocation, SectionIndex}; + +use super::OmfFile; + +/// An iterator over OMF relocations. +#[derive(Debug)] +pub struct OmfRelocationIterator<'data, 'file, R: read::ReadRef<'data>> { + pub(super) file: &'file OmfFile<'data, R>, + pub(super) segment_index: usize, + pub(super) index: usize, +} + +impl<'data, 'file, R: read::ReadRef<'data>> Iterator for OmfRelocationIterator<'data, 'file, R> { + type Item = (u64, Relocation); + + fn next(&mut self) -> Option { + let relocations = &self.file.segments[self.segment_index].relocations; + if self.index >= relocations.len() { + return None; + } + + let reloc = &relocations[self.index]; + self.index += 1; + + // Convert OMF relocation to generic relocation + let (kind, size, addend) = match reloc.location { + omf::FixupLocation::LowByte => (read::RelocationKind::Absolute, 8, 0), + omf::FixupLocation::HighByte => (read::RelocationKind::Absolute, 8, 0), + omf::FixupLocation::Offset | omf::FixupLocation::LoaderOffset => { + if reloc.is_segment_relative { + // M=1: Segment-relative + (read::RelocationKind::Absolute, 16, 0) + } else { + // M=0: PC-relative (self-relative) + (read::RelocationKind::Relative, 16, -2) + } + } + omf::FixupLocation::Offset32 | omf::FixupLocation::LoaderOffset32 => { + if reloc.is_segment_relative { + // M=1: Segment-relative + (read::RelocationKind::Absolute, 32, 0) + } else { + // M=0: PC-relative (self-relative) + (read::RelocationKind::Relative, 32, -4) + } + } + omf::FixupLocation::Base => (read::RelocationKind::Absolute, 16, 0), + omf::FixupLocation::Pointer => (read::RelocationKind::Absolute, 32, 0), + omf::FixupLocation::Pointer48 => (read::RelocationKind::Absolute, 48, 0), + }; + + let relocation = Relocation { + kind, + encoding: read::RelocationEncoding::Generic, + size, + target: match reloc.target_method { + omf::TargetMethod::SegmentIndex => { + read::RelocationTarget::Section(SectionIndex(reloc.target_index as usize)) + } + omf::TargetMethod::ExternalIndex => { + // External indices in OMF are 1-based indices into the EXTDEF table + // Our symbol table has publics first, then externals + // So we need to adjust: symbol_index = publics.len() + (external_idx - 1) + if reloc.target_index > 0 { + let symbol_idx = + self.file.publics.len() + (reloc.target_index as usize - 1); + read::RelocationTarget::Symbol(read::SymbolIndex(symbol_idx)) + } else { + // Invalid external index + read::RelocationTarget::Absolute + } + } + _ => read::RelocationTarget::Absolute, + }, + addend: reloc.target_displacement as i64 + addend, + implicit_addend: false, + flags: read::RelocationFlags::Generic { + kind, + encoding: read::RelocationEncoding::Generic, + size, + }, + }; + + Some((reloc.offset as u64, relocation)) + } +} diff --git a/src/read/omf/section.rs b/src/read/omf/section.rs new file mode 100644 index 00000000..b8d5a7ce --- /dev/null +++ b/src/read/omf/section.rs @@ -0,0 +1,219 @@ +//! OMF section implementation. + +use alloc::borrow::Cow; +use alloc::vec; +use core::str; + +use crate::read::{ + self, CompressedData, CompressedFileRange, Error, ObjectSection, ReadRef, RelocationMap, + Result, SectionFlags, SectionIndex, SectionKind, +}; + +use super::{relocation::OmfRelocationIterator, OmfDataChunk, OmfFile, OmfSegment}; + +/// A section in an OMF file. +#[derive(Debug)] +pub struct OmfSection<'data, 'file, R: ReadRef<'data>> { + pub(super) file: &'file OmfFile<'data, R>, + pub(super) index: usize, +} + +impl<'data, 'file, R: ReadRef<'data>> OmfSection<'data, 'file, R> { + fn segment(&self) -> &OmfSegment<'data> { + &self.file.segments[self.index] + } +} + +impl<'data, 'file, R: ReadRef<'data>> read::private::Sealed for OmfSection<'data, 'file, R> {} + +impl<'data, 'file, R: ReadRef<'data>> ObjectSection<'data> for OmfSection<'data, 'file, R> { + type RelocationIterator = OmfRelocationIterator<'data, 'file, R>; + + fn index(&self) -> SectionIndex { + SectionIndex(self.index + 1) + } + + fn address(&self) -> u64 { + 0 + } + + fn size(&self) -> u64 { + self.segment().length as u64 + } + + fn align(&self) -> u64 { + match self.segment().alignment { + crate::omf::SegmentAlignment::Byte => 1, + crate::omf::SegmentAlignment::Word => 2, + crate::omf::SegmentAlignment::Paragraph => 16, + crate::omf::SegmentAlignment::Page => 256, + crate::omf::SegmentAlignment::DWord => 4, + crate::omf::SegmentAlignment::Page4K => 4096, + _ => 1, + } + } + + fn file_range(&self) -> Option<(u64, u64)> { + None + } + + fn data(&self) -> Result<&'data [u8]> { + let segment = self.segment(); + + // Check if we have a single contiguous chunk that doesn't need expansion + if let Some(data) = segment.get_single_chunk() { + return Ok(data); + } + + // If we have no chunks, return empty slice + if segment.data_chunks.is_empty() { + return Ok(&[]); + } + + // For multiple chunks, LIDATA, or non-contiguous data, we can't return a reference + // Users should use uncompressed_data() instead for these cases + Err(Error("OMF segment data is not contiguous; use uncompressed_data() instead")) + } + + fn data_range(&self, address: u64, size: u64) -> Result> { + let segment = self.segment(); + let offset = address as usize; + let end = offset + .checked_add(size as usize) + .ok_or(Error("Invalid data range"))?; + + // Check if we have a single contiguous chunk that covers the range + if let Some(data) = segment.get_single_chunk() { + if offset > data.len() || end > data.len() { + return Ok(None); + } + return Ok(Some(&data[offset..end])); + } + + // For multiple chunks, check if the requested range is within a single chunk + for (chunk_offset, chunk) in &segment.data_chunks { + let chunk_start = *chunk_offset as usize; + + // Only handle direct data chunks for now + if let OmfDataChunk::Direct(chunk_data) = chunk { + let chunk_end = chunk_start + chunk_data.len(); + + if offset >= chunk_start && end <= chunk_end { + let relative_offset = offset - chunk_start; + let relative_end = end - chunk_start; + return Ok(Some(&chunk_data[relative_offset..relative_end])); + } + } + } + + // Range spans multiple chunks, includes LIDATA, or is not available + Ok(None) + } + + fn compressed_file_range(&self) -> Result { + Ok(CompressedFileRange::none(self.file_range())) + } + + fn compressed_data(&self) -> Result> { + Ok(CompressedData::none(self.data()?)) + } + + fn uncompressed_data(&self) -> Result> { + let segment = self.segment(); + + // Check if we have a single contiguous chunk that doesn't need expansion + if let Some(data) = segment.get_single_chunk() { + return Ok(Cow::Borrowed(data)); + } + + // If we have no chunks, return empty + if segment.data_chunks.is_empty() { + return Ok(Cow::Borrowed(&[])); + } + + // We need to construct the full segment data + let mut result = vec![0u8; segment.length as usize]; + + for (offset, chunk) in &segment.data_chunks { + let start = *offset as usize; + + match chunk { + OmfDataChunk::Direct(data) => { + // Direct data - just copy it + let end = start + data.len(); + if end <= result.len() { + result[start..end].copy_from_slice(data); + } else { + return Err(Error("OMF segment data chunk exceeds segment length")); + } + } + OmfDataChunk::Iterated(lidata) => { + // LIDATA needs expansion + if let Ok(expanded) = self.file.expand_lidata_block(lidata) { + let end = start + expanded.len(); + if end <= result.len() { + result[start..end].copy_from_slice(&expanded); + } else { + return Err(Error("OMF LIDATA expansion exceeds segment length")); + } + } + } + } + } + + Ok(Cow::Owned(result)) + } + + fn name_bytes(&self) -> Result<&'data [u8]> { + let segment = self.segment(); + self.file + .get_name(segment.name_index) + .ok_or(Error("Invalid segment name index")) + } + + fn name(&self) -> Result<&'data str> { + str::from_utf8(self.name_bytes()?).map_err(|_| Error("Invalid UTF-8 in segment name")) + } + + fn segment_name_bytes(&self) -> Result> { + Ok(None) + } + + fn segment_name(&self) -> Result> { + Ok(None) + } + + fn kind(&self) -> SectionKind { + self.file.segment_section_kind(self.index) + } + + fn relocations(&self) -> Self::RelocationIterator { + OmfRelocationIterator { + file: self.file, + segment_index: self.index, + index: 0, + } + } + + fn relocation_map(&self) -> Result { + RelocationMap::new(self.file, self) + } + + fn flags(&self) -> SectionFlags { + let segment = self.segment(); + let flags = SectionFlags::None; + + // Set flags based on segment properties + match segment.combination { + crate::omf::SegmentCombination::Public => { + // Public segments are like COMDAT sections + } + crate::omf::SegmentCombination::Stack => { + // Stack segments + } + _ => {} + } + + flags + } +} diff --git a/src/read/omf/symbol.rs b/src/read/omf/symbol.rs new file mode 100644 index 00000000..8e4a1ec3 --- /dev/null +++ b/src/read/omf/symbol.rs @@ -0,0 +1,149 @@ +//! OMF symbol implementation. + +use core::str; + +use crate::read::{ + self, ObjectSymbol, ObjectSymbolTable, ReadRef, Result, SectionIndex, SymbolFlags, SymbolIndex, + SymbolKind, SymbolScope, SymbolSection, +}; +use crate::Error; + +use super::{OmfFile, OmfSymbol}; + +impl<'data> read::private::Sealed for OmfSymbol<'data> {} + +impl<'data> ObjectSymbol<'data> for OmfSymbol<'data> { + fn index(&self) -> SymbolIndex { + SymbolIndex(self.symbol_index) + } + + fn name_bytes(&self) -> Result<&'data [u8]> { + Ok(self.name) + } + + fn name(&self) -> Result<&'data str> { + core::str::from_utf8(self.name).map_err(|_| Error("Invalid UTF-8 in OMF symbol name")) + } + + fn address(&self) -> u64 { + self.offset as u64 + } + + fn size(&self) -> u64 { + 0 // OMF doesn't store symbol sizes + } + + fn kind(&self) -> SymbolKind { + self.kind + } + + fn section(&self) -> SymbolSection { + if self.segment_index == 0 { + SymbolSection::Undefined + } else { + SymbolSection::Section(SectionIndex(self.segment_index as usize)) + } + } + + fn is_undefined(&self) -> bool { + self.segment_index == 0 + } + + fn is_definition(&self) -> bool { + self.segment_index != 0 + } + + fn is_common(&self) -> bool { + // Communal symbols have segment_index == 0 but are not externals + // We identify them by checking if this is a communal symbol + // (communals are stored after externals in our symbol table) + self.segment_index == 0 && self.offset != 0 + } + + fn is_weak(&self) -> bool { + // Weak symbols would be marked via WKEXT comment records + // For now, we don't have a direct way to check this from the symbol alone + false + } + + fn scope(&self) -> SymbolScope { + if self.segment_index == 0 { + SymbolScope::Unknown + } else { + SymbolScope::Linkage + } + } + + fn is_global(&self) -> bool { + true + } + + fn is_local(&self) -> bool { + false + } + + fn flags(&self) -> SymbolFlags { + SymbolFlags::None + } +} + +/// An iterator over OMF symbols. +#[derive(Debug)] +pub struct OmfSymbolIterator<'data, 'file> { + pub(super) publics: &'file [OmfSymbol<'data>], + pub(super) externals: &'file [OmfSymbol<'data>], + pub(super) communals: &'file [OmfSymbol<'data>], + pub(super) index: usize, +} + +impl<'data, 'file> Iterator for OmfSymbolIterator<'data, 'file> { + type Item = OmfSymbol<'data>; + + fn next(&mut self) -> Option { + let total_publics = self.publics.len(); + let total_externals = self.externals.len(); + let total_before_communals = total_publics + total_externals; + let total = total_before_communals + self.communals.len(); + + if self.index >= total { + return None; + } + + let symbol = if self.index < total_publics { + self.publics[self.index].clone() + } else if self.index < total_before_communals { + self.externals[self.index - total_publics].clone() + } else { + self.communals[self.index - total_before_communals].clone() + }; + + self.index += 1; + Some(symbol) + } +} + +/// An OMF symbol table. +#[derive(Debug)] +pub struct OmfSymbolTable<'data, 'file, R: ReadRef<'data>> { + pub(super) file: &'file OmfFile<'data, R>, +} + +impl<'data, 'file, R: ReadRef<'data>> read::private::Sealed for OmfSymbolTable<'data, 'file, R> {} + +impl<'data, 'file, R: ReadRef<'data>> ObjectSymbolTable<'data> for OmfSymbolTable<'data, 'file, R> { + type Symbol = OmfSymbol<'data>; + type SymbolIterator = OmfSymbolIterator<'data, 'file>; + + fn symbols(&self) -> Self::SymbolIterator { + OmfSymbolIterator { + publics: &self.file.publics, + externals: &self.file.externals, + communals: &self.file.communals, + index: 0, + } + } + + fn symbol_by_index(&self, index: SymbolIndex) -> Result { + self.file.symbol_by_index(index) + } +} diff --git a/tests/read/mod.rs b/tests/read/mod.rs index 48e005ee..77931a43 100644 --- a/tests/read/mod.rs +++ b/tests/read/mod.rs @@ -3,3 +3,4 @@ mod coff; mod elf; mod macho; +mod omf; diff --git a/tests/read/omf.rs b/tests/read/omf.rs new file mode 100644 index 00000000..b520d926 --- /dev/null +++ b/tests/read/omf.rs @@ -0,0 +1,114 @@ +#[cfg(feature = "std")] +use object::{Object, ObjectSection, ObjectSymbol, RelocationKind}; + +#[cfg(feature = "std")] +#[test] +fn test_comprehensive() { + let path = "testfiles/omf/comprehensive_test.obj"; + let data = std::fs::read(path).unwrap_or_else(|_| panic!("Failed to read {}", path)); + + let file = object::File::parse(&data[..]).unwrap(); + assert_eq!(file.format(), object::BinaryFormat::Omf); + + // Check sections + let sections: Vec<_> = file.sections().collect(); + assert!( + sections.len() >= 3, + "Should have at least CODE, DATA, and BSS sections" + ); + + // Check for relocations (tests thread subrecord and F/T bit handling) + let mut total_relocations = 0; + for section in file.sections() { + let relocs: Vec<_> = section.relocations().collect(); + total_relocations += relocs.len(); + + // Check for both PC-relative and segment-relative relocations (M-bit test) + for (_offset, reloc) in &relocs { + let _kind = reloc.kind(); // Should have both Relative and Absolute + } + } + assert!( + total_relocations > 0, + "Should have relocations (tests thread/fixup parsing)" + ); + + // Check symbols (tests PUBDEF/EXTDEF parsing) + let symbols: Vec<_> = file.symbols().collect(); + assert!(!symbols.is_empty(), "Should have symbols"); + + // Check for COMDEF symbols if supported + let has_comdef = symbols + .iter() + .any(|sym| sym.name().unwrap_or("").contains("shared")); + assert!(has_comdef, "Should have COMDEF symbols (shared variables)"); +} + +#[cfg(feature = "std")] +#[test] +fn test_lidata() { + let path = "testfiles/omf/test_lidata.obj"; + let data = std::fs::read(path).unwrap_or_else(|_| panic!("Failed to read {}", path)); + + let file = object::File::parse(&data[..]).unwrap(); + assert_eq!(file.format(), object::BinaryFormat::Omf); + + // Check that sections have data (LIDATA should be expanded) + let mut total_data_size = 0; + for section in file.sections() { + // Use uncompressed_data to get expanded LIDATA + if let Ok(data) = section.uncompressed_data() { + total_data_size += data.len(); + } + } + + // With LIDATA support, we should have expanded data for the arrays + assert!( + total_data_size >= 200, + "Section data should be expanded from LIDATA, got {} bytes", total_data_size + ); +} + +#[cfg(feature = "std")] +#[test] +fn test_relocations() { + let path = "testfiles/omf/comprehensive_test.obj"; + let data = std::fs::read(path).unwrap_or_else(|_| panic!("Failed to read {}", path)); + let file = object::File::parse(&data[..]).unwrap(); + + let mut has_relative = false; + let mut has_absolute = false; + + for section in file.sections() { + for (_offset, reloc) in section.relocations() { + match reloc.kind() { + RelocationKind::Relative => has_relative = true, + RelocationKind::Absolute => has_absolute = true, + _ => {} + } + } + } + + // With M-bit support, we should see both types + assert!(has_relative, "Should have Relative relocations (M=0)"); + assert!(has_absolute, "Should have Absolute relocations (M=1)"); +} + +#[cfg(feature = "std")] +#[test] +fn test_comdat() { + let path = "testfiles/omf/test_comdat.obj"; + let data = std::fs::read(path).unwrap_or_else(|_| panic!("Failed to read {}", path)); + + let file = object::File::parse(&data[..]).unwrap(); + assert_eq!(file.format(), object::BinaryFormat::Omf); + + // COMDAT support would show up as sections or symbols + let sections: Vec<_> = file.sections().collect(); + let symbols: Vec<_> = file.symbols().collect(); + + assert!( + !sections.is_empty() || !symbols.is_empty(), + "Should have parsed some content from COMDAT file" + ); +} From 814cef4691d4f37a0bf5cde12089f6e0bfd505b8 Mon Sep 17 00:00:00 2001 From: Luke Street Date: Wed, 24 Sep 2025 16:16:23 -0600 Subject: [PATCH 2/8] Remove WKEXT handling and handle ABS symbols --- src/read/omf/mod.rs | 63 +++++++++++------------------------------- src/read/omf/symbol.rs | 28 ++++++++++++------- tests/read/omf.rs | 3 +- 3 files changed, 36 insertions(+), 58 deletions(-) diff --git a/src/read/omf/mod.rs b/src/read/omf/mod.rs index 959d033b..c67f436c 100644 --- a/src/read/omf/mod.rs +++ b/src/read/omf/mod.rs @@ -34,8 +34,6 @@ pub struct OmfFile<'data, R: ReadRef<'data> = &'data [u8]> { externals: Vec>, /// Communal symbols from COMDEF communals: Vec>, - /// Weak externals from WKEXT comment records - weak_externals: Vec>, /// COMDAT sections comdats: Vec>, /// Name table (LNAMES/LLNAMES) @@ -88,6 +86,8 @@ pub struct OmfSymbol<'data> { pub group_index: u16, /// Segment index (0 if external) pub segment_index: u16, + /// Frame number (for absolute symbols when segment_index == 0) + pub frame_number: u16, /// Offset within segment pub offset: u32, /// Type index (usually 0) @@ -213,7 +213,6 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { publics: Vec::new(), externals: Vec::new(), communals: Vec::new(), - weak_externals: Vec::new(), comdats: Vec::new(), names: Vec::new(), groups: Vec::new(), @@ -637,10 +636,17 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { omf::read_index(&data[offset..]).ok_or(Error("Invalid segment index"))?; offset += size; - // Skip frame number if segment index is 0 - if segment_index == 0 { + // Read frame number if segment index is 0 (for absolute symbols) + let frame_number = if segment_index == 0 { + if offset + 2 > data.len() { + return Err(Error("Invalid frame number in PUBDEF")); + } + let frame = u16::from_le_bytes([data[offset], data[offset + 1]]); offset += 2; - } + frame + } else { + 0 + }; // Parse public definitions while offset < data.len() { @@ -682,6 +688,7 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { name, group_index, segment_index, + frame_number, offset: pub_offset, type_index, kind: read::SymbolKind::Unknown, // Will be computed later @@ -711,6 +718,7 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { name, group_index: 0, segment_index: 0, + frame_number: 0, offset: 0, type_index, kind: read::SymbolKind::Unknown, @@ -771,6 +779,7 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { name, group_index: 0, segment_index: 0, + frame_number: 0, offset: communal_length, // Store size in offset field type_index, kind: read::SymbolKind::Data, @@ -904,47 +913,7 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { } let _comment_type = data[0]; // Usually 0x00 for non-purge, 0x40 for purge - let comment_class = data[1]; - - // Check for WKEXT (Weak Extern) comment class 0xA8 - if comment_class == 0xA8 && data.len() > 2 { - self.parse_wkext(&data[2..])?; - } - - Ok(()) - } - - fn parse_wkext(&mut self, data: &'data [u8]) -> Result<()> { - let mut offset = 0; - - while offset + 2 <= data.len() { - // Parse weak extern index (index into EXTDEF) - let (weak_index, size) = - omf::read_index(&data[offset..]).ok_or(Error("Invalid WKEXT weak index"))?; - offset += size; - - // Parse default resolution index - let (default_index, size) = - omf::read_index(&data[offset..]).ok_or(Error("Invalid WKEXT default index"))?; - offset += size; - - // Look up names if indices are valid - let weak_name = weak_index - .checked_sub(1) - .and_then(|idx| self.externals.get(idx as usize).map(|s| s.name)) - .unwrap_or(b""); - let default_name = default_index - .checked_sub(1) - .and_then(|idx| self.externals.get(idx as usize).map(|s| s.name)) - .unwrap_or(b""); - - self.weak_externals.push(OmfWeakExtern { - weak_symbol_index: weak_index, - default_symbol_index: default_index, - weak_name, - default_name, - }); - } + let _comment_class = data[1]; Ok(()) } diff --git a/src/read/omf/symbol.rs b/src/read/omf/symbol.rs index 8e4a1ec3..f0094168 100644 --- a/src/read/omf/symbol.rs +++ b/src/read/omf/symbol.rs @@ -26,7 +26,13 @@ impl<'data> ObjectSymbol<'data> for OmfSymbol<'data> { } fn address(&self) -> u64 { - self.offset as u64 + if self.segment_index == 0 && self.frame_number != 0 { + // For absolute symbols, compute the linear address from frame:offset + // Frame number is in paragraphs (16-byte units) + ((self.frame_number as u64) << 4) + (self.offset as u64) + } else { + self.offset as u64 + } } fn size(&self) -> u64 { @@ -39,30 +45,32 @@ impl<'data> ObjectSymbol<'data> for OmfSymbol<'data> { fn section(&self) -> SymbolSection { if self.segment_index == 0 { - SymbolSection::Undefined + if self.frame_number != 0 { + SymbolSection::Absolute + } else { + SymbolSection::Undefined + } } else { SymbolSection::Section(SectionIndex(self.segment_index as usize)) } } fn is_undefined(&self) -> bool { - self.segment_index == 0 + self.segment_index == 0 && self.frame_number == 0 } fn is_definition(&self) -> bool { - self.segment_index != 0 + self.segment_index != 0 || self.frame_number != 0 } fn is_common(&self) -> bool { - // Communal symbols have segment_index == 0 but are not externals - // We identify them by checking if this is a communal symbol - // (communals are stored after externals in our symbol table) - self.segment_index == 0 && self.offset != 0 + // Communal symbols have segment_index == 0, frame_number == 0, but offset != 0 + // The offset field stores the size of the communal symbol + // This excludes both externals (offset == 0) and absolute symbols (frame_number != 0) + self.segment_index == 0 && self.frame_number == 0 && self.offset != 0 } fn is_weak(&self) -> bool { - // Weak symbols would be marked via WKEXT comment records - // For now, we don't have a direct way to check this from the symbol alone false } diff --git a/tests/read/omf.rs b/tests/read/omf.rs index b520d926..4e85ea03 100644 --- a/tests/read/omf.rs +++ b/tests/read/omf.rs @@ -65,7 +65,8 @@ fn test_lidata() { // With LIDATA support, we should have expanded data for the arrays assert!( total_data_size >= 200, - "Section data should be expanded from LIDATA, got {} bytes", total_data_size + "Section data should be expanded from LIDATA, got {} bytes", + total_data_size ); } From db3de2d7b3d4ed802e074a278ea8e8c5d4dccb40 Mon Sep 17 00:00:00 2001 From: Luke Street Date: Wed, 24 Sep 2025 17:30:45 -0600 Subject: [PATCH 3/8] Rework internal symbol layout; support local records --- src/omf.rs | 15 +-- src/read/any.rs | 2 +- src/read/omf/file.rs | 265 ++++--------------------------------- src/read/omf/mod.rs | 244 +++++++++++++++++----------------- src/read/omf/relocation.rs | 16 +-- src/read/omf/section.rs | 134 ++++++++++++++++++- src/read/omf/segment.rs | 90 +++++++++++++ src/read/omf/symbol.rs | 73 +++++----- 8 files changed, 416 insertions(+), 423 deletions(-) create mode 100644 src/read/omf/segment.rs diff --git a/src/omf.rs b/src/omf.rs index 7c4483c9..41c8b737 100644 --- a/src/omf.rs +++ b/src/omf.rs @@ -92,7 +92,7 @@ pub mod record_type { pub const VENDEXT: u8 = 0xCE; } -/// OMF record header - common to all record types +/// OMF record header #[derive(Debug, Clone, Copy)] #[repr(C)] pub struct RecordHeader { @@ -195,7 +195,7 @@ pub enum FrameMethod { } /// Check if a byte is a valid OMF record type -pub fn is_omf_record_type(byte: u8) -> bool { +pub(crate) fn is_omf_record_type(byte: u8) -> bool { use record_type::*; matches!( byte, @@ -242,13 +242,8 @@ pub fn is_omf_record_type(byte: u8) -> bool { ) } -/// Check if a record type uses 32-bit fields -pub fn is_32bit_record(record_type: u8) -> bool { - record_type & 0x01 != 0 -} - /// Helper to read an OMF index (1 or 2 bytes) -pub fn read_index(data: &[u8]) -> Option<(u16, usize)> { +pub(crate) fn read_index(data: &[u8]) -> Option<(u16, usize)> { if data.is_empty() { return None; } @@ -268,7 +263,7 @@ pub fn read_index(data: &[u8]) -> Option<(u16, usize)> { } /// Helper to read a counted string (length byte followed by string) -pub fn read_counted_string(data: &[u8]) -> Option<(&[u8], usize)> { +pub(crate) fn read_counted_string(data: &[u8]) -> Option<(&[u8], usize)> { if data.is_empty() { return None; } @@ -283,7 +278,7 @@ pub fn read_counted_string(data: &[u8]) -> Option<(&[u8], usize)> { /// Read an encoded value (used in LIDATA for repeat counts and block counts) /// Returns the value and number of bytes consumed -pub fn read_encoded_value(data: &[u8]) -> Option<(u32, usize)> { +pub(crate) fn read_encoded_value(data: &[u8]) -> Option<(u32, usize)> { if data.is_empty() { return None; } diff --git a/src/read/any.rs b/src/read/any.rs index 3857a020..e7492fcd 100644 --- a/src/read/any.rs +++ b/src/read/any.rs @@ -1187,7 +1187,7 @@ where ), ), #[cfg(feature = "omf")] - Omf((omf::OmfSymbolIterator<'data, 'file>, PhantomData)), + Omf((omf::OmfSymbolIterator<'data, 'file, R>, PhantomData)), } impl<'data, 'file, R: ReadRef<'data>> Iterator for SymbolIterator<'data, 'file, R> { diff --git a/src/read/omf/file.rs b/src/read/omf/file.rs index 806404e0..056d9775 100644 --- a/src/read/omf/file.rs +++ b/src/read/omf/file.rs @@ -1,13 +1,15 @@ //! OMF file implementation for the unified read API. use crate::read::{ - self, Architecture, ByteString, ComdatKind, Error, Export, FileFlags, Import, - NoDynamicRelocationIterator, Object, ObjectComdat, ObjectKind, ObjectSection, ObjectSegment, - ReadRef, Result, SectionIndex, SegmentFlags, SymbolIndex, + Architecture, ByteString, Error, Export, FileFlags, Import, NoDynamicRelocationIterator, + Object, ObjectKind, ObjectSection, ReadRef, Result, SectionIndex, SymbolIndex, }; use crate::SubArchitecture; -use super::{OmfFile, OmfSection, OmfSymbol, OmfSymbolIterator, OmfSymbolTable}; +use super::{ + OmfComdat, OmfComdatIterator, OmfFile, OmfSection, OmfSectionIterator, OmfSegmentIterator, + OmfSegmentRef, OmfSymbol, OmfSymbolIterator, OmfSymbolTable, +}; impl<'data, R: ReadRef<'data>> Object<'data> for OmfFile<'data, R> { type Segment<'file> @@ -46,7 +48,7 @@ impl<'data, R: ReadRef<'data>> Object<'data> for OmfFile<'data, R> { Self: 'file, 'data: 'file; type SymbolIterator<'file> - = OmfSymbolIterator<'data, 'file> + = OmfSymbolIterator<'data, 'file, R> where Self: 'file, 'data: 'file; @@ -127,26 +129,15 @@ impl<'data, R: ReadRef<'data>> Object<'data> for OmfFile<'data, R> { fn symbol_by_index(&self, index: SymbolIndex) -> Result> { let idx = index.0; - let total_publics = self.publics.len(); - let total_externals = self.externals.len(); - let total_before_communals = total_publics + total_externals; - - if idx < total_publics { - Ok(self.publics[idx].clone()) - } else if idx < total_before_communals { - Ok(self.externals[idx - total_publics].clone()) - } else if idx < total_before_communals + self.communals.len() { - Ok(self.communals[idx - total_before_communals].clone()) - } else { - Err(Error("Symbol index out of bounds")) + if idx >= self.symbols.len() { + return Err(Error("Symbol index out of bounds")); } + Ok(self.symbols[idx].clone()) } fn symbols(&self) -> Self::SymbolIterator<'_> { OmfSymbolIterator { - publics: &self.publics, - externals: &self.externals, - communals: &self.communals, + file: self, index: 0, } } @@ -157,10 +148,8 @@ impl<'data, R: ReadRef<'data>> Object<'data> for OmfFile<'data, R> { fn dynamic_symbols(&self) -> Self::SymbolIterator<'_> { OmfSymbolIterator { - publics: &[], - externals: &[], - communals: &[], - index: 0, + file: self, + index: usize::MAX, // Empty iterator } } @@ -173,10 +162,18 @@ impl<'data, R: ReadRef<'data>> Object<'data> for OmfFile<'data, R> { } fn imports(&self) -> Result>> { - // External symbols are imports in OMF + // Only true external symbols are imports in OMF + // LocalExternal (LEXTDEF) are module-local references that should be resolved + // within the same module by LocalPublic (LPUBDEF) symbols Ok(self - .externals + .all_symbols() .iter() + .filter(|sym| { + matches!( + sym.class, + super::OmfSymbolClass::External | super::OmfSymbolClass::ComdatExternal + ) + }) .map(|ext| Import { library: ByteString(b""), name: ByteString(ext.name), @@ -185,10 +182,12 @@ impl<'data, R: ReadRef<'data>> Object<'data> for OmfFile<'data, R> { } fn exports(&self) -> Result>> { - // Public symbols are exports in OMF + // Only true public symbols are exports in OMF + // LocalPublic (LPUBDEF) are module-local symbols not visible outside Ok(self - .publics + .all_symbols() .iter() + .filter(|sym| sym.class == super::OmfSymbolClass::Public) .map(|pub_sym| Export { name: ByteString(pub_sym.name), address: pub_sym.offset as u64, @@ -232,213 +231,3 @@ impl<'data, R: ReadRef<'data>> Object<'data> for OmfFile<'data, R> { FileFlags::None } } - -/// An OMF segment reference. -#[derive(Debug)] -pub struct OmfSegmentRef<'data, 'file, R: ReadRef<'data>> { - file: &'file OmfFile<'data, R>, - index: usize, -} - -impl<'data, 'file, R: ReadRef<'data>> read::private::Sealed for OmfSegmentRef<'data, 'file, R> {} - -impl<'data, 'file, R: ReadRef<'data>> ObjectSegment<'data> for OmfSegmentRef<'data, 'file, R> { - fn address(&self) -> u64 { - 0 - } - - fn size(&self) -> u64 { - self.file.segments[self.index].length as u64 - } - - fn align(&self) -> u64 { - match self.file.segments[self.index].alignment { - crate::omf::SegmentAlignment::Byte => 1, - crate::omf::SegmentAlignment::Word => 2, - crate::omf::SegmentAlignment::Paragraph => 16, - crate::omf::SegmentAlignment::Page => 256, - crate::omf::SegmentAlignment::DWord => 4, - crate::omf::SegmentAlignment::Page4K => 4096, - _ => 1, - } - } - - fn file_range(&self) -> (u64, u64) { - (0, 0) - } - - fn data(&self) -> Result<&'data [u8]> { - // OMF segments don't have direct file mapping - Ok(&[]) - } - - fn data_range(&self, _address: u64, _size: u64) -> Result> { - Ok(None) - } - - fn name_bytes(&self) -> Result> { - Ok(self - .file - .get_name(self.file.segments[self.index].name_index)) - } - - fn name(&self) -> Result> { - let index = self.file.segments[self.index].name_index; - let name_opt = self.file.get_name(index); - match name_opt { - Some(bytes) => Ok(core::str::from_utf8(bytes).ok()), - None => Ok(None), - } - } - - fn flags(&self) -> SegmentFlags { - SegmentFlags::None - } -} - -/// An iterator over OMF segments. -#[derive(Debug)] -pub struct OmfSegmentIterator<'data, 'file, R: ReadRef<'data>> { - file: &'file OmfFile<'data, R>, - index: usize, -} - -impl<'data, 'file, R: ReadRef<'data>> Iterator for OmfSegmentIterator<'data, 'file, R> { - type Item = OmfSegmentRef<'data, 'file, R>; - - fn next(&mut self) -> Option { - if self.index < self.file.segments.len() { - let segment = OmfSegmentRef { - file: self.file, - index: self.index, - }; - self.index += 1; - Some(segment) - } else { - None - } - } -} - -/// An iterator over OMF sections. -#[derive(Debug)] -pub struct OmfSectionIterator<'data, 'file, R: ReadRef<'data>> { - file: &'file OmfFile<'data, R>, - index: usize, -} - -impl<'data, 'file, R: ReadRef<'data>> Iterator for OmfSectionIterator<'data, 'file, R> { - type Item = OmfSection<'data, 'file, R>; - - fn next(&mut self) -> Option { - if self.index < self.file.segments.len() { - let section = OmfSection { - file: self.file, - index: self.index, - }; - self.index += 1; - Some(section) - } else { - None - } - } -} - -/// A COMDAT section in an OMF file. -#[derive(Debug)] -pub struct OmfComdat<'data, 'file, R: ReadRef<'data>> { - file: &'file OmfFile<'data, R>, - index: usize, - _phantom: core::marker::PhantomData<&'data ()>, -} - -impl<'data, 'file, R: ReadRef<'data>> read::private::Sealed for OmfComdat<'data, 'file, R> {} - -impl<'data, 'file, R: ReadRef<'data>> ObjectComdat<'data> for OmfComdat<'data, 'file, R> { - type SectionIterator = OmfComdatSectionIterator<'data, 'file, R>; - - fn kind(&self) -> ComdatKind { - let comdat = &self.file.comdats[self.index]; - match comdat.selection { - super::OmfComdatSelection::Explicit => ComdatKind::NoDuplicates, - super::OmfComdatSelection::UseAny => ComdatKind::Any, - super::OmfComdatSelection::SameSize => ComdatKind::SameSize, - super::OmfComdatSelection::ExactMatch => ComdatKind::ExactMatch, - } - } - - fn symbol(&self) -> SymbolIndex { - // COMDAT symbols don't have a direct symbol index in OMF - // Return an invalid index - SymbolIndex(usize::MAX) - } - - fn name_bytes(&self) -> Result<&'data [u8]> { - let comdat = &self.file.comdats[self.index]; - Ok(comdat.name) - } - - fn name(&self) -> Result<&'data str> { - let comdat = &self.file.comdats[self.index]; - core::str::from_utf8(comdat.name).map_err(|_| Error("Invalid UTF-8 in COMDAT name")) - } - - fn sections(&self) -> Self::SectionIterator { - let comdat = &self.file.comdats[self.index]; - OmfComdatSectionIterator { - segment_index: if comdat.segment_index > 0 { - Some(comdat.segment_index as usize - 1) - } else { - None - }, - returned: false, - _phantom: core::marker::PhantomData, - } - } -} - -/// An iterator over COMDAT sections. -#[derive(Debug)] -pub struct OmfComdatIterator<'data, 'file, R: ReadRef<'data>> { - file: &'file OmfFile<'data, R>, - index: usize, -} - -impl<'data, 'file, R: ReadRef<'data>> Iterator for OmfComdatIterator<'data, 'file, R> { - type Item = OmfComdat<'data, 'file, R>; - - fn next(&mut self) -> Option { - if self.index < self.file.comdats.len() { - let comdat = OmfComdat { - file: self.file, - index: self.index, - _phantom: core::marker::PhantomData, - }; - self.index += 1; - Some(comdat) - } else { - None - } - } -} - -/// An iterator over sections in a COMDAT. -#[derive(Debug)] -pub struct OmfComdatSectionIterator<'data, 'file, R: ReadRef<'data>> { - segment_index: Option, - returned: bool, - _phantom: core::marker::PhantomData<(&'data (), &'file (), R)>, -} - -impl<'data, 'file, R: ReadRef<'data>> Iterator for OmfComdatSectionIterator<'data, 'file, R> { - type Item = SectionIndex; - - fn next(&mut self) -> Option { - if !self.returned { - self.returned = true; - self.segment_index.map(|idx| SectionIndex(idx + 1)) - } else { - None - } - } -} diff --git a/src/read/omf/mod.rs b/src/read/omf/mod.rs index c67f436c..606561ca 100644 --- a/src/read/omf/mod.rs +++ b/src/read/omf/mod.rs @@ -9,14 +9,36 @@ use crate::read::{self, Error, ReadRef, Result}; mod file; pub use file::*; +mod relocation; +pub use relocation::*; + mod section; pub use section::*; +mod segment; +pub use segment::*; + mod symbol; pub use symbol::*; -mod relocation; -pub use relocation::*; +/// Symbol class for OMF symbols +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum OmfSymbolClass { + /// Public symbol (PUBDEF) + Public, + /// Local public symbol (LPUBDEF) + LocalPublic, + /// External symbol (EXTDEF) + External, + /// Local external symbol (LEXTDEF) + LocalExternal, + /// Communal symbol (COMDEF) + Communal, + /// Local communal symbol (LCOMDEF) + LocalCommunal, + /// COMDAT external symbol (CEXTDEF) + ComdatExternal, +} /// An OMF object file. /// @@ -28,12 +50,10 @@ pub struct OmfFile<'data, R: ReadRef<'data> = &'data [u8]> { module_name: Option<&'data str>, /// Segment definitions segments: Vec>, - /// Public symbols - publics: Vec>, - /// External symbols - externals: Vec>, - /// Communal symbols from COMDEF - communals: Vec>, + /// All symbols (publics, externals, communals, locals) in occurrence order + symbols: Vec>, + /// Maps external-name table index (1-based) to SymbolIndex + external_order: Vec, /// COMDAT sections comdats: Vec>, /// Name table (LNAMES/LLNAMES) @@ -75,13 +95,15 @@ pub struct OmfSegment<'data> { pub relocations: Vec, } -/// An OMF symbol (public or external) +/// An OMF symbol #[derive(Debug, Clone)] pub struct OmfSymbol<'data> { /// Symbol table index pub symbol_index: usize, /// Symbol name pub name: &'data [u8], + /// Symbol class (Public, External, etc.) + pub class: OmfSymbolClass, /// Group index (0 if none) pub group_index: u16, /// Segment index (0 if external) @@ -154,19 +176,6 @@ pub enum OmfComdatSelection { ExactMatch = 3, } -/// A weak extern definition -#[derive(Debug, Clone)] -pub struct OmfWeakExtern<'data> { - /// Weak symbol index (external symbol) - pub weak_symbol_index: u16, - /// Default resolution symbol index - pub default_symbol_index: u16, - /// Weak symbol name - pub weak_name: &'data [u8], - /// Default symbol name - pub default_name: &'data [u8], -} - /// Thread definition for FIXUPP parsing #[derive(Debug, Clone, Copy)] struct ThreadDef { @@ -210,56 +219,42 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { data, module_name: None, segments: Vec::new(), - publics: Vec::new(), - externals: Vec::new(), - communals: Vec::new(), + symbols: Vec::new(), + external_order: Vec::new(), comdats: Vec::new(), names: Vec::new(), groups: Vec::new(), }; file.parse_records()?; - file.assign_symbol_indices(); + file.assign_symbol_kinds(); Ok(file) } - fn assign_symbol_indices(&mut self) { - let mut index = 0; - - // First, compute kinds for public symbols based on their segments - let public_kinds = self - .publics + fn assign_symbol_kinds(&mut self) { + // Compute kinds for symbols based on their segments + let kinds: Vec = self + .symbols .iter() - .map(|sym| { - if sym.segment_index > 0 && (sym.segment_index as usize) <= self.segments.len() { - let segment_idx = (sym.segment_index - 1) as usize; - let section_kind = self.segment_section_kind(segment_idx); - Self::symbol_kind_from_section_kind(section_kind) - } else { - read::SymbolKind::Unknown + .map(|sym| match sym.class { + OmfSymbolClass::Public | OmfSymbolClass::LocalPublic => { + if sym.segment_index > 0 && (sym.segment_index as usize) <= self.segments.len() + { + let segment_idx = (sym.segment_index - 1) as usize; + let section_kind = self.segment_section_kind(segment_idx); + Self::symbol_kind_from_section_kind(section_kind) + } else { + read::SymbolKind::Unknown + } } + OmfSymbolClass::Communal | OmfSymbolClass::LocalCommunal => read::SymbolKind::Data, + _ => read::SymbolKind::Unknown, }) - .collect::>(); + .collect(); - // Assign indices to public symbols and their pre-computed kinds - for (sym, kind) in self.publics.iter_mut().zip(public_kinds) { - sym.symbol_index = index; + // Apply computed kinds + for (sym, kind) in self.symbols.iter_mut().zip(kinds) { sym.kind = kind; - index += 1; - } - - // Assign indices to external symbols - for sym in self.externals.iter_mut() { - sym.symbol_index = index; - sym.kind = read::SymbolKind::Unknown; - index += 1; - } - - // Assign indices to communal symbols - for sym in self.communals.iter_mut() { - sym.symbol_index = index; - sym.kind = read::SymbolKind::Data; - index += 1; } } @@ -273,7 +268,7 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { } /// Get the section kind for a segment (reusing logic from OmfSection) - pub fn segment_section_kind(&self, segment_index: usize) -> read::SectionKind { + fn segment_section_kind(&self, segment_index: usize) -> read::SectionKind { if segment_index >= self.segments.len() { return read::SectionKind::Unknown; } @@ -305,8 +300,11 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { return read::SectionKind::Text; } else if name_upper == b"_DATA" || name_upper == b"DATA" || name_upper == b".DATA" { return read::SectionKind::Data; - } else if name_upper == b"_BSS" || name_upper == b"BSS" || name_upper == b".BSS" - || name_upper == b"STACK" { + } else if name_upper == b"_BSS" + || name_upper == b"BSS" + || name_upper == b".BSS" + || name_upper == b"STACK" + { return read::SectionKind::UninitializedData; } } @@ -431,13 +429,33 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { self.parse_grpdef(record_data)?; } omf::record_type::PUBDEF | omf::record_type::PUBDEF32 => { - self.parse_pubdef(record_data, record_type == omf::record_type::PUBDEF32)?; + self.parse_pubdef( + record_data, + record_type == omf::record_type::PUBDEF32, + OmfSymbolClass::Public, + )?; + } + omf::record_type::LPUBDEF | omf::record_type::LPUBDEF32 => { + self.parse_pubdef( + record_data, + record_type == omf::record_type::LPUBDEF32, + OmfSymbolClass::LocalPublic, + )?; } omf::record_type::EXTDEF => { - self.parse_extdef(record_data)?; + self.parse_extdef(record_data, OmfSymbolClass::External)?; + } + omf::record_type::LEXTDEF | omf::record_type::LEXTDEF32 => { + self.parse_extdef(record_data, OmfSymbolClass::LocalExternal)?; + } + omf::record_type::CEXTDEF => { + self.parse_extdef(record_data, OmfSymbolClass::ComdatExternal)?; } omf::record_type::COMDEF => { - self.parse_comdef(record_data)?; + self.parse_comdef(record_data, OmfSymbolClass::Communal)?; + } + omf::record_type::LCOMDEF => { + self.parse_comdef(record_data, OmfSymbolClass::LocalCommunal)?; } omf::record_type::COMDAT | omf::record_type::COMDAT32 => { self.parse_comdat(record_data, record_type == omf::record_type::COMDAT32)?; @@ -624,7 +642,12 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { Ok(()) } - fn parse_pubdef(&mut self, data: &'data [u8], is_32bit: bool) -> Result<()> { + fn parse_pubdef( + &mut self, + data: &'data [u8], + is_32bit: bool, + class: OmfSymbolClass, + ) -> Result<()> { let mut offset = 0; // Parse group index @@ -680,12 +703,13 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { // Parse type index let (type_index, size) = omf::read_index(&data[offset..]) - .ok_or(Error("Invalid type index in EXTDEF record"))?; + .ok_or(Error("Invalid type index in PUBDEF/LPUBDEF record"))?; offset += size; - self.publics.push(OmfSymbol { - symbol_index: 0, // Will be assigned later + self.symbols.push(OmfSymbol { + symbol_index: self.symbols.len(), name, + class, group_index, segment_index, frame_number, @@ -698,7 +722,7 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { Ok(()) } - fn parse_extdef(&mut self, data: &'data [u8]) -> Result<()> { + fn parse_extdef(&mut self, data: &'data [u8], class: OmfSymbolClass) -> Result<()> { let mut offset = 0; while offset < data.len() { @@ -710,12 +734,14 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { // Parse type index let (type_index, size) = omf::read_index(&data[offset..]) - .ok_or(Error("Invalid type index in EXTDEF record"))?; + .ok_or(Error("Invalid type index in EXTDEF/LEXTDEF/CEXTDEF record"))?; offset += size; - self.externals.push(OmfSymbol { - symbol_index: 0, // Will be assigned later + let sym_idx = self.symbols.len(); + self.symbols.push(OmfSymbol { + symbol_index: sym_idx, name, + class, group_index: 0, segment_index: 0, frame_number: 0, @@ -723,12 +749,15 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { type_index, kind: read::SymbolKind::Unknown, }); + + // Add to external_order for symbols that contribute to external-name table + self.external_order.push(read::SymbolIndex(sym_idx)); } Ok(()) } - fn parse_comdef(&mut self, data: &'data [u8]) -> Result<()> { + fn parse_comdef(&mut self, data: &'data [u8], class: OmfSymbolClass) -> Result<()> { let mut offset = 0; while offset < data.len() { @@ -740,7 +769,7 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { // Parse type index let (type_index, size) = omf::read_index(&data[offset..]) - .ok_or(Error("Invalid type index in COMDEF record"))?; + .ok_or(Error("Invalid type index in COMDEF/LCOMDEF record"))?; offset += size; // Parse data type and communal length @@ -774,9 +803,11 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { } }; - self.communals.push(OmfSymbol { - symbol_index: 0, // Will be assigned later + let sym_idx = self.symbols.len(); + self.symbols.push(OmfSymbol { + symbol_index: sym_idx, name, + class, group_index: 0, segment_index: 0, frame_number: 0, @@ -784,6 +815,9 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { type_index, kind: read::SymbolKind::Data, }); + + // Add to external_order for symbols that contribute to external-name table + self.external_order.push(read::SymbolIndex(sym_idx)); } Ok(()) @@ -1238,7 +1272,7 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { } /// Expand a LIDATA block into its uncompressed form - pub fn expand_lidata_block(&self, data: &[u8]) -> Result> { + fn expand_lidata_block(&self, data: &[u8]) -> Result> { let mut offset = 0; let mut result = Vec::new(); @@ -1278,7 +1312,7 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { // Nested blocks: recurse for each block for _ in 0..block_count { let block_data = self.expand_lidata_block(&data[offset..])?; - let block_size = self.lidata_block_size(&data[offset..])?; + let block_size = lidata_block_size(&data[offset..])?; offset += block_size; // Repeat the expanded block @@ -1291,10 +1325,6 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { Ok(result) } - fn lidata_block_size(&self, data: &[u8]) -> Result { - lidata_block_size_impl(data) - } - /// Get the module name pub fn module_name(&self) -> Option<&'data str> { self.module_name @@ -1305,46 +1335,23 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { &self.segments } - /// Get the public symbols - pub fn publics(&self) -> &[OmfSymbol<'data>] { - &self.publics - } - - /// Get the external symbols - pub fn externals(&self) -> &[OmfSymbol<'data>] { - &self.externals + /// Get symbol by external-name index (1-based, as used in FIXUPP records) + pub fn external_symbol(&self, external_index: u16) -> Option<&OmfSymbol<'data>> { + let symbol_index = self + .external_order + .get(external_index.checked_sub(1)? as usize)?; + self.symbols.get(symbol_index.0) } /// Get a name by index (1-based) pub fn get_name(&self, index: u16) -> Option<&'data [u8]> { - if index > 0 && (index as usize) <= self.names.len() { - Some(self.names[(index - 1) as usize]) - } else { - None - } + let name_index = index.checked_sub(1)?; + self.names.get(name_index as usize).copied() } - /// Get a symbol by index - pub fn symbol_by_index(&self, index: read::SymbolIndex) -> Result> { - let idx = index.0; - let total_publics = self.publics.len(); - let total_externals = self.externals.len(); - let total_before_communals = total_publics + total_externals; - let total = total_before_communals + self.communals.len(); - - if idx >= total { - return Err(Error("Invalid symbol index")); - } - - let symbol = if idx < total_publics { - self.publics[idx].clone() - } else if idx < total_before_communals { - self.externals[idx - total_publics].clone() - } else { - self.communals[idx - total_before_communals].clone() - }; - - Ok(symbol) + /// Get all symbols (for iteration) + pub fn all_symbols(&self) -> &[OmfSymbol<'data>] { + &self.symbols } /// Verify the checksum of an OMF record @@ -1375,7 +1382,7 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { } /// Helper function to calculate LIDATA block size -fn lidata_block_size_impl(data: &[u8]) -> Result { +fn lidata_block_size(data: &[u8]) -> Result { let mut offset = 0; // Read repeat count @@ -1398,8 +1405,7 @@ fn lidata_block_size_impl(data: &[u8]) -> Result { } else { // Nested blocks for _ in 0..block_count { - let nested_size = lidata_block_size_impl(&data[offset..])?; - offset += nested_size; + offset += lidata_block_size(&data[offset..])?; } } diff --git a/src/read/omf/relocation.rs b/src/read/omf/relocation.rs index 58744029..60d2941f 100644 --- a/src/read/omf/relocation.rs +++ b/src/read/omf/relocation.rs @@ -15,11 +15,7 @@ impl<'data, 'file, R: read::ReadRef<'data>> Iterator for OmfRelocationIterator<' fn next(&mut self) -> Option { let relocations = &self.file.segments[self.segment_index].relocations; - if self.index >= relocations.len() { - return None; - } - - let reloc = &relocations[self.index]; + let reloc = relocations.get(self.index)?; self.index += 1; // Convert OMF relocation to generic relocation @@ -58,13 +54,9 @@ impl<'data, 'file, R: read::ReadRef<'data>> Iterator for OmfRelocationIterator<' read::RelocationTarget::Section(SectionIndex(reloc.target_index as usize)) } omf::TargetMethod::ExternalIndex => { - // External indices in OMF are 1-based indices into the EXTDEF table - // Our symbol table has publics first, then externals - // So we need to adjust: symbol_index = publics.len() + (external_idx - 1) - if reloc.target_index > 0 { - let symbol_idx = - self.file.publics.len() + (reloc.target_index as usize - 1); - read::RelocationTarget::Symbol(read::SymbolIndex(symbol_idx)) + // External indices in OMF are 1-based indices into the external-name table + if let Some(symbol) = self.file.external_symbol(reloc.target_index) { + read::RelocationTarget::Symbol(read::SymbolIndex(symbol.symbol_index)) } else { // Invalid external index read::RelocationTarget::Absolute diff --git a/src/read/omf/section.rs b/src/read/omf/section.rs index b8d5a7ce..43970aa3 100644 --- a/src/read/omf/section.rs +++ b/src/read/omf/section.rs @@ -4,9 +4,12 @@ use alloc::borrow::Cow; use alloc::vec; use core::str; -use crate::read::{ - self, CompressedData, CompressedFileRange, Error, ObjectSection, ReadRef, RelocationMap, - Result, SectionFlags, SectionIndex, SectionKind, +use crate::{ + read::{ + self, CompressedData, CompressedFileRange, Error, ObjectSection, ReadRef, RelocationMap, + Result, SectionFlags, SectionIndex, SectionKind, + }, + ComdatKind, ObjectComdat, SymbolIndex, }; use super::{relocation::OmfRelocationIterator, OmfDataChunk, OmfFile, OmfSegment}; @@ -71,8 +74,9 @@ impl<'data, 'file, R: ReadRef<'data>> ObjectSection<'data> for OmfSection<'data, } // For multiple chunks, LIDATA, or non-contiguous data, we can't return a reference - // Users should use uncompressed_data() instead for these cases - Err(Error("OMF segment data is not contiguous; use uncompressed_data() instead")) + Err(Error( + "OMF segment data is not contiguous; use uncompressed_data() instead", + )) } fn data_range(&self, address: u64, size: u64) -> Result> { @@ -139,7 +143,7 @@ impl<'data, 'file, R: ReadRef<'data>> ObjectSection<'data> for OmfSection<'data, match chunk { OmfDataChunk::Direct(data) => { - // Direct data - just copy it + // Direct data let end = start + data.len(); if end <= result.len() { result[start..end].copy_from_slice(data); @@ -217,3 +221,121 @@ impl<'data, 'file, R: ReadRef<'data>> ObjectSection<'data> for OmfSection<'data, flags } } + +/// An iterator over OMF sections. +#[derive(Debug)] +pub struct OmfSectionIterator<'data, 'file, R: ReadRef<'data>> { + pub(super) file: &'file OmfFile<'data, R>, + pub(super) index: usize, +} + +impl<'data, 'file, R: ReadRef<'data>> Iterator for OmfSectionIterator<'data, 'file, R> { + type Item = OmfSection<'data, 'file, R>; + + fn next(&mut self) -> Option { + if self.index < self.file.segments.len() { + let section = OmfSection { + file: self.file, + index: self.index, + }; + self.index += 1; + Some(section) + } else { + None + } + } +} + +/// A COMDAT section in an OMF file. +#[derive(Debug)] +pub struct OmfComdat<'data, 'file, R: ReadRef<'data>> { + file: &'file OmfFile<'data, R>, + index: usize, + _phantom: core::marker::PhantomData<&'data ()>, +} + +impl<'data, 'file, R: ReadRef<'data>> read::private::Sealed for OmfComdat<'data, 'file, R> {} + +impl<'data, 'file, R: ReadRef<'data>> ObjectComdat<'data> for OmfComdat<'data, 'file, R> { + type SectionIterator = OmfComdatSectionIterator<'data, 'file, R>; + + fn kind(&self) -> ComdatKind { + let comdat = &self.file.comdats[self.index]; + match comdat.selection { + super::OmfComdatSelection::Explicit => ComdatKind::NoDuplicates, + super::OmfComdatSelection::UseAny => ComdatKind::Any, + super::OmfComdatSelection::SameSize => ComdatKind::SameSize, + super::OmfComdatSelection::ExactMatch => ComdatKind::ExactMatch, + } + } + + fn symbol(&self) -> SymbolIndex { + // COMDAT symbols don't have a direct symbol index in OMF + SymbolIndex(usize::MAX) + } + + fn name_bytes(&self) -> Result<&'data [u8]> { + let comdat = &self.file.comdats[self.index]; + Ok(comdat.name) + } + + fn name(&self) -> Result<&'data str> { + let comdat = &self.file.comdats[self.index]; + core::str::from_utf8(comdat.name).map_err(|_| Error("Invalid UTF-8 in COMDAT name")) + } + + fn sections(&self) -> Self::SectionIterator { + let comdat = &self.file.comdats[self.index]; + OmfComdatSectionIterator { + segment_index: (comdat.segment_index as usize).checked_sub(1), + returned: false, + _phantom: core::marker::PhantomData, + } + } +} + +/// An iterator over COMDAT sections. +#[derive(Debug)] +pub struct OmfComdatIterator<'data, 'file, R: ReadRef<'data>> { + pub(super) file: &'file OmfFile<'data, R>, + pub(super) index: usize, +} + +impl<'data, 'file, R: ReadRef<'data>> Iterator for OmfComdatIterator<'data, 'file, R> { + type Item = OmfComdat<'data, 'file, R>; + + fn next(&mut self) -> Option { + if self.index < self.file.comdats.len() { + let comdat = OmfComdat { + file: self.file, + index: self.index, + _phantom: core::marker::PhantomData, + }; + self.index += 1; + Some(comdat) + } else { + None + } + } +} + +/// An iterator over sections in a COMDAT. +#[derive(Debug)] +pub struct OmfComdatSectionIterator<'data, 'file, R: ReadRef<'data>> { + segment_index: Option, + returned: bool, + _phantom: core::marker::PhantomData<(&'data (), &'file (), R)>, +} + +impl<'data, 'file, R: ReadRef<'data>> Iterator for OmfComdatSectionIterator<'data, 'file, R> { + type Item = SectionIndex; + + fn next(&mut self) -> Option { + if !self.returned { + self.returned = true; + self.segment_index.map(|idx| SectionIndex(idx + 1)) + } else { + None + } + } +} diff --git a/src/read/omf/segment.rs b/src/read/omf/segment.rs new file mode 100644 index 00000000..684a0322 --- /dev/null +++ b/src/read/omf/segment.rs @@ -0,0 +1,90 @@ +use crate::{read, ObjectSegment, ReadRef, Result, SegmentFlags}; + +use super::OmfFile; + +/// An OMF segment reference. +#[derive(Debug)] +pub struct OmfSegmentRef<'data, 'file, R: ReadRef<'data>> { + file: &'file OmfFile<'data, R>, + index: usize, +} + +impl<'data, 'file, R: ReadRef<'data>> read::private::Sealed for OmfSegmentRef<'data, 'file, R> {} + +impl<'data, 'file, R: ReadRef<'data>> ObjectSegment<'data> for OmfSegmentRef<'data, 'file, R> { + fn address(&self) -> u64 { + 0 + } + + fn size(&self) -> u64 { + self.file.segments[self.index].length as u64 + } + + fn align(&self) -> u64 { + match self.file.segments[self.index].alignment { + crate::omf::SegmentAlignment::Byte => 1, + crate::omf::SegmentAlignment::Word => 2, + crate::omf::SegmentAlignment::Paragraph => 16, + crate::omf::SegmentAlignment::Page => 256, + crate::omf::SegmentAlignment::DWord => 4, + crate::omf::SegmentAlignment::Page4K => 4096, + _ => 1, + } + } + + fn file_range(&self) -> (u64, u64) { + (0, 0) + } + + fn data(&self) -> Result<&'data [u8]> { + // OMF segments don't have direct file mapping + Ok(&[]) + } + + fn data_range(&self, _address: u64, _size: u64) -> Result> { + Ok(None) + } + + fn name_bytes(&self) -> Result> { + Ok(self + .file + .get_name(self.file.segments[self.index].name_index)) + } + + fn name(&self) -> Result> { + let index = self.file.segments[self.index].name_index; + let name_opt = self.file.get_name(index); + match name_opt { + Some(bytes) => Ok(core::str::from_utf8(bytes).ok()), + None => Ok(None), + } + } + + fn flags(&self) -> SegmentFlags { + SegmentFlags::None + } +} + +/// An iterator over OMF segments. +#[derive(Debug)] +pub struct OmfSegmentIterator<'data, 'file, R: ReadRef<'data>> { + pub(super) file: &'file OmfFile<'data, R>, + pub(super) index: usize, +} + +impl<'data, 'file, R: ReadRef<'data>> Iterator for OmfSegmentIterator<'data, 'file, R> { + type Item = OmfSegmentRef<'data, 'file, R>; + + fn next(&mut self) -> Option { + if self.index < self.file.segments.len() { + let segment = OmfSegmentRef { + file: self.file, + index: self.index, + }; + self.index += 1; + Some(segment) + } else { + None + } + } +} diff --git a/src/read/omf/symbol.rs b/src/read/omf/symbol.rs index f0094168..3c3dfa46 100644 --- a/src/read/omf/symbol.rs +++ b/src/read/omf/symbol.rs @@ -64,10 +64,10 @@ impl<'data> ObjectSymbol<'data> for OmfSymbol<'data> { } fn is_common(&self) -> bool { - // Communal symbols have segment_index == 0, frame_number == 0, but offset != 0 - // The offset field stores the size of the communal symbol - // This excludes both externals (offset == 0) and absolute symbols (frame_number != 0) - self.segment_index == 0 && self.frame_number == 0 && self.offset != 0 + matches!( + self.class, + super::OmfSymbolClass::Communal | super::OmfSymbolClass::LocalCommunal + ) } fn is_weak(&self) -> bool { @@ -75,19 +75,34 @@ impl<'data> ObjectSymbol<'data> for OmfSymbol<'data> { } fn scope(&self) -> SymbolScope { - if self.segment_index == 0 { - SymbolScope::Unknown - } else { - SymbolScope::Linkage + match self.class { + super::OmfSymbolClass::LocalPublic + | super::OmfSymbolClass::LocalExternal + | super::OmfSymbolClass::LocalCommunal => SymbolScope::Compilation, + super::OmfSymbolClass::Public + | super::OmfSymbolClass::External + | super::OmfSymbolClass::Communal + | super::OmfSymbolClass::ComdatExternal => { + if self.segment_index == 0 { + SymbolScope::Unknown + } else { + SymbolScope::Linkage + } + } } } fn is_global(&self) -> bool { - true + !self.is_local() } fn is_local(&self) -> bool { - false + matches!( + self.class, + super::OmfSymbolClass::LocalPublic + | super::OmfSymbolClass::LocalExternal + | super::OmfSymbolClass::LocalCommunal + ) } fn flags(&self) -> SymbolFlags { @@ -97,34 +112,16 @@ impl<'data> ObjectSymbol<'data> for OmfSymbol<'data> { /// An iterator over OMF symbols. #[derive(Debug)] -pub struct OmfSymbolIterator<'data, 'file> { - pub(super) publics: &'file [OmfSymbol<'data>], - pub(super) externals: &'file [OmfSymbol<'data>], - pub(super) communals: &'file [OmfSymbol<'data>], +pub struct OmfSymbolIterator<'data, 'file, R: ReadRef<'data> = &'data [u8]> { + pub(super) file: &'file OmfFile<'data, R>, pub(super) index: usize, } -impl<'data, 'file> Iterator for OmfSymbolIterator<'data, 'file> { +impl<'data, 'file, R: ReadRef<'data>> Iterator for OmfSymbolIterator<'data, 'file, R> { type Item = OmfSymbol<'data>; fn next(&mut self) -> Option { - let total_publics = self.publics.len(); - let total_externals = self.externals.len(); - let total_before_communals = total_publics + total_externals; - let total = total_before_communals + self.communals.len(); - - if self.index >= total { - return None; - } - - let symbol = if self.index < total_publics { - self.publics[self.index].clone() - } else if self.index < total_before_communals { - self.externals[self.index - total_publics].clone() - } else { - self.communals[self.index - total_before_communals].clone() - }; - + let symbol = self.file.symbols.get(self.index)?.clone(); self.index += 1; Some(symbol) } @@ -140,18 +137,20 @@ impl<'data, 'file, R: ReadRef<'data>> read::private::Sealed for OmfSymbolTable<' impl<'data, 'file, R: ReadRef<'data>> ObjectSymbolTable<'data> for OmfSymbolTable<'data, 'file, R> { type Symbol = OmfSymbol<'data>; - type SymbolIterator = OmfSymbolIterator<'data, 'file>; + type SymbolIterator = OmfSymbolIterator<'data, 'file, R>; fn symbols(&self) -> Self::SymbolIterator { OmfSymbolIterator { - publics: &self.file.publics, - externals: &self.file.externals, - communals: &self.file.communals, + file: self.file, index: 0, } } fn symbol_by_index(&self, index: SymbolIndex) -> Result { - self.file.symbol_by_index(index) + self.file + .symbols + .get(index.0) + .cloned() + .ok_or(Error("Symbol index out of bounds")) } } From 3feef1df2e055d9496569a4967fcc57f86aea045 Mon Sep 17 00:00:00 2001 From: Luke Street Date: Wed, 24 Sep 2025 19:46:41 -0600 Subject: [PATCH 4/8] Check for LEDATA/LIDATA before FIXUPP/FIXUPP32 --- src/read/omf/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/read/omf/mod.rs b/src/read/omf/mod.rs index 606561ca..94a2bad9 100644 --- a/src/read/omf/mod.rs +++ b/src/read/omf/mod.rs @@ -487,6 +487,10 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { &mut frame_threads, &mut target_threads, )?; + } else { + return Err(Error( + "FIXUPP/FIXUPP32 record encountered without preceding LEDATA/LIDATA", + )); } } omf::record_type::MODEND | omf::record_type::MODEND32 => { From 89eece708702ee61392f6475be84399b75e6f5cc Mon Sep 17 00:00:00 2001 From: Luke Street Date: Wed, 24 Sep 2025 23:48:30 -0600 Subject: [PATCH 5/8] OMF module reorganization --- src/common.rs | 11 + src/omf.rs | 245 +++---- src/read/mod.rs | 2 + src/read/omf/comdat.rs | 128 ++++ src/read/omf/file.rs | 1376 ++++++++++++++++++++++++++++++++++- src/read/omf/mod.rs | 1404 +----------------------------------- src/read/omf/relocation.rs | 156 +++- src/read/omf/section.rs | 121 +--- src/read/omf/segment.rs | 67 +- src/read/omf/symbol.rs | 51 +- 10 files changed, 1836 insertions(+), 1725 deletions(-) create mode 100644 src/read/omf/comdat.rs diff --git a/src/common.rs b/src/common.rs index 7b67bd1d..5386d7d3 100644 --- a/src/common.rs +++ b/src/common.rs @@ -617,4 +617,15 @@ pub enum RelocationFlags { /// `r_rsize` field in the XCOFF relocation. r_rsize: u8, }, + /// OMF relocation metadata. + Omf { + /// The location field describing what bytes are being fixed up. + location: crate::omf::FixupLocation, + /// Whether the relocation is applied segment-relative (`M = 1`) or self-relative (`M = 0`). + mode: crate::omf::FixupMode, + /// The frame datum used to establish the base reference for the relocation. + frame: crate::omf::FixupFrame, + /// The target datum identifying the entity being referenced. + target: crate::omf::FixupTarget, + }, } diff --git a/src/omf.rs b/src/omf.rs index 41c8b737..b1bd5b3d 100644 --- a/src/omf.rs +++ b/src/omf.rs @@ -92,6 +92,94 @@ pub mod record_type { pub const VENDEXT: u8 = 0xCE; } +/// Check if a byte is a valid OMF record type +pub fn is_omf_record_type(byte: u8) -> bool { + use crate::omf::record_type::*; + matches!( + byte, + THEADR + | LHEADR + | COMENT + | MODEND + | MODEND32 + | EXTDEF + | TYPDEF + | PUBDEF + | PUBDEF32 + | LINNUM + | LINNUM32 + | LNAMES + | SEGDEF + | SEGDEF32 + | GRPDEF + | FIXUPP + | FIXUPP32 + | LEDATA + | LEDATA32 + | LIDATA + | LIDATA32 + | COMDEF + | BAKPAT + | BAKPAT32 + | LEXTDEF + | LEXTDEF32 + | LPUBDEF + | LPUBDEF32 + | LCOMDEF + | CEXTDEF + | COMDAT + | COMDAT32 + | LINSYM + | LINSYM32 + | ALIAS + | NBKPAT + | NBKPAT32 + | LLNAMES + | VERNUM + | VENDEXT + ) +} + +/// The addressing mode for an OMF relocation. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[repr(u8)] +pub enum FixupMode { + /// Segment-relative relocation (`M = 1`). + SegmentRelative = 0, + /// Self-relative relocation (`M = 0`). + SelfRelative = 1, +} + +/// Frame datum variants as defined by the OMF specification. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum FixupFrame { + /// Segment frame datum referencing a 1-based segment index. + Segment(u16), + /// Group frame datum referencing a 1-based group index. + Group(u16), + /// External frame datum referencing a 1-based entry in the external-name table. + External(u16), + /// Explicit frame number datum. + FrameNumber(u16), + /// Use the location of the fixup as the frame datum. + Location, + /// Use the target's frame datum. + Target, +} + +/// Target datum variants as defined by the OMF specification. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum FixupTarget { + /// Segment target datum referencing a 1-based segment index. + Segment(u16), + /// Group target datum referencing a 1-based group index. + Group(u16), + /// External target datum referencing a 1-based entry in the external-name table. + External(u16), + /// Explicit frame number datum. + FrameNumber(u16), +} + /// OMF record header #[derive(Debug, Clone, Copy)] #[repr(C)] @@ -139,7 +227,7 @@ pub enum SegmentCombination { } /// Fixup location types -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[repr(u8)] pub enum FixupLocation { /// Low-order byte @@ -161,158 +249,3 @@ pub enum FixupLocation { /// 32-bit loader-resolved offset LoaderOffset32 = 13, } - -/// Target method types for fixups -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -#[repr(u8)] -pub enum TargetMethod { - /// Segment index - SegmentIndex = 0, - /// Group index - GroupIndex = 1, - /// External index - ExternalIndex = 2, - /// Frame number (absolute) - FrameNumber = 3, -} - -/// Frame method types for fixups -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -#[repr(u8)] -pub enum FrameMethod { - /// Segment index - SegmentIndex = 0, - /// Group index - GroupIndex = 1, - /// External index - ExternalIndex = 2, - /// Frame number (absolute) - FrameNumber = 3, - /// Location (use fixup location) - Location = 4, - /// Target (use target's frame) - Target = 5, -} - -/// Check if a byte is a valid OMF record type -pub(crate) fn is_omf_record_type(byte: u8) -> bool { - use record_type::*; - matches!( - byte, - THEADR - | LHEADR - | COMENT - | MODEND - | MODEND32 - | EXTDEF - | TYPDEF - | PUBDEF - | PUBDEF32 - | LINNUM - | LINNUM32 - | LNAMES - | SEGDEF - | SEGDEF32 - | GRPDEF - | FIXUPP - | FIXUPP32 - | LEDATA - | LEDATA32 - | LIDATA - | LIDATA32 - | COMDEF - | BAKPAT - | BAKPAT32 - | LEXTDEF - | LEXTDEF32 - | LPUBDEF - | LPUBDEF32 - | LCOMDEF - | CEXTDEF - | COMDAT - | COMDAT32 - | LINSYM - | LINSYM32 - | ALIAS - | NBKPAT - | NBKPAT32 - | LLNAMES - | VERNUM - | VENDEXT - ) -} - -/// Helper to read an OMF index (1 or 2 bytes) -pub(crate) fn read_index(data: &[u8]) -> Option<(u16, usize)> { - if data.is_empty() { - return None; - } - - let first_byte = data[0]; - if first_byte & 0x80 == 0 { - // 1-byte index - Some((first_byte as u16, 1)) - } else if data.len() >= 2 { - // 2-byte index - let high = (first_byte & 0x7F) as u16; - let low = data[1] as u16; - Some((high << 8 | low, 2)) - } else { - None - } -} - -/// Helper to read a counted string (length byte followed by string) -pub(crate) fn read_counted_string(data: &[u8]) -> Option<(&[u8], usize)> { - if data.is_empty() { - return None; - } - - let length = data[0] as usize; - if data.len() > length { - Some((&data[1..1 + length], 1 + length)) - } else { - None - } -} - -/// Read an encoded value (used in LIDATA for repeat counts and block counts) -/// Returns the value and number of bytes consumed -pub(crate) fn read_encoded_value(data: &[u8]) -> Option<(u32, usize)> { - if data.is_empty() { - return None; - } - - let first_byte = data[0]; - if first_byte < 0x80 { - // Single byte value (0-127) - Some((first_byte as u32, 1)) - } else if first_byte == 0x81 { - // Two byte value: 0x81 followed by 16-bit little-endian value - if data.len() >= 3 { - let value = u16::from_le_bytes([data[1], data[2]]) as u32; - Some((value, 3)) - } else { - None - } - } else if first_byte == 0x84 { - // Three byte value: 0x84 followed by 24-bit little-endian value - if data.len() >= 4 { - let value = u32::from_le_bytes([data[1], data[2], data[3], 0]); - Some((value, 4)) - } else { - None - } - } else if first_byte == 0x88 { - // Four byte value: 0x88 followed by 32-bit little-endian value - if data.len() >= 5 { - let value = u32::from_le_bytes([data[1], data[2], data[3], data[4]]); - Some((value, 5)) - } else { - None - } - } else { - // Unknown encoding - None - } -} diff --git a/src/read/mod.rs b/src/read/mod.rs index f302aba6..59a9c9f6 100644 --- a/src/read/mod.rs +++ b/src/read/mod.rs @@ -371,6 +371,8 @@ impl FileKind { #[cfg(feature = "omf")] [0x80, ..] | [0x82, ..] => { // Check if it's a valid OMF record type + // TODO this is tautological, 0x80 and 0x82 are valid OMF record types + // how can we check better? if crate::omf::is_omf_record_type(magic[0]) { FileKind::Omf } else { diff --git a/src/read/omf/comdat.rs b/src/read/omf/comdat.rs new file mode 100644 index 00000000..3d018537 --- /dev/null +++ b/src/read/omf/comdat.rs @@ -0,0 +1,128 @@ +use crate::read::{self, Error, Result}; +use crate::{omf, ComdatKind, ObjectComdat, ReadRef, SectionIndex, SymbolIndex}; + +use super::OmfFile; + +/// A COMDAT (communal data) section +#[derive(Debug, Clone)] +pub(super) struct OmfComdatData<'data> { + /// Symbol name + pub(super) name: &'data [u8], + /// Segment index where this COMDAT belongs + pub(super) segment_index: u16, + /// Selection/allocation method + pub(super) selection: OmfComdatSelection, + /// Alignment + #[allow(unused)] + pub(super) alignment: omf::SegmentAlignment, + /// Data + #[allow(unused)] + pub(super) data: &'data [u8], +} + +/// COMDAT selection methods +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(super) enum OmfComdatSelection { + /// Explicit: may not be combined, produce error if multiple definitions + Explicit = 0, + /// Use any: pick any instance + UseAny = 1, + /// Same size: all instances must be same size + SameSize = 2, + /// Exact match: all instances must have identical content + ExactMatch = 3, +} + +/// A COMDAT section in an OMF file. +#[derive(Debug)] +pub struct OmfComdat<'data, 'file, R: ReadRef<'data>> { + file: &'file OmfFile<'data, R>, + index: usize, + _phantom: core::marker::PhantomData<&'data ()>, +} + +impl<'data, 'file, R: ReadRef<'data>> read::private::Sealed for OmfComdat<'data, 'file, R> {} + +impl<'data, 'file, R: ReadRef<'data>> ObjectComdat<'data> for OmfComdat<'data, 'file, R> { + type SectionIterator = OmfComdatSectionIterator<'data, 'file, R>; + + fn kind(&self) -> ComdatKind { + let comdat = &self.file.comdats[self.index]; + match comdat.selection { + OmfComdatSelection::Explicit => ComdatKind::NoDuplicates, + OmfComdatSelection::UseAny => ComdatKind::Any, + OmfComdatSelection::SameSize => ComdatKind::SameSize, + OmfComdatSelection::ExactMatch => ComdatKind::ExactMatch, + } + } + + fn symbol(&self) -> SymbolIndex { + // COMDAT symbols don't have a direct symbol index in OMF + SymbolIndex(usize::MAX) + } + + fn name_bytes(&self) -> Result<&'data [u8]> { + let comdat = &self.file.comdats[self.index]; + Ok(comdat.name) + } + + fn name(&self) -> Result<&'data str> { + let comdat = &self.file.comdats[self.index]; + core::str::from_utf8(comdat.name).map_err(|_| Error("Invalid UTF-8 in COMDAT name")) + } + + fn sections(&self) -> Self::SectionIterator { + let comdat = &self.file.comdats[self.index]; + OmfComdatSectionIterator { + segment_index: (comdat.segment_index as usize).checked_sub(1), + returned: false, + _phantom: core::marker::PhantomData, + } + } +} + +/// An iterator over COMDAT sections. +#[derive(Debug)] +pub struct OmfComdatIterator<'data, 'file, R: ReadRef<'data>> { + pub(super) file: &'file OmfFile<'data, R>, + pub(super) index: usize, +} + +impl<'data, 'file, R: ReadRef<'data>> Iterator for OmfComdatIterator<'data, 'file, R> { + type Item = OmfComdat<'data, 'file, R>; + + fn next(&mut self) -> Option { + if self.index < self.file.comdats.len() { + let comdat = OmfComdat { + file: self.file, + index: self.index, + _phantom: core::marker::PhantomData, + }; + self.index += 1; + Some(comdat) + } else { + None + } + } +} + +/// An iterator over sections in a COMDAT. +#[derive(Debug)] +pub struct OmfComdatSectionIterator<'data, 'file, R: ReadRef<'data>> { + segment_index: Option, + returned: bool, + _phantom: core::marker::PhantomData<(&'data (), &'file (), R)>, +} + +impl<'data, 'file, R: ReadRef<'data>> Iterator for OmfComdatSectionIterator<'data, 'file, R> { + type Item = SectionIndex; + + fn next(&mut self) -> Option { + if !self.returned { + self.returned = true; + self.segment_index.map(|idx| SectionIndex(idx + 1)) + } else { + None + } + } +} diff --git a/src/read/omf/file.rs b/src/read/omf/file.rs index 056d9775..441c9ef3 100644 --- a/src/read/omf/file.rs +++ b/src/read/omf/file.rs @@ -1,16 +1,1164 @@ //! OMF file implementation for the unified read API. +use alloc::vec::Vec; + use crate::read::{ - Architecture, ByteString, Error, Export, FileFlags, Import, NoDynamicRelocationIterator, - Object, ObjectKind, ObjectSection, ReadRef, Result, SectionIndex, SymbolIndex, + self, Architecture, ByteString, CodeView, Error, Export, FileFlags, Import, + NoDynamicRelocationIterator, Object, ObjectKind, ObjectSection, ReadRef, Result, SectionIndex, + SymbolIndex, }; -use crate::SubArchitecture; +use crate::{omf, SubArchitecture}; use super::{ - OmfComdat, OmfComdatIterator, OmfFile, OmfSection, OmfSectionIterator, OmfSegmentIterator, - OmfSegmentRef, OmfSymbol, OmfSymbolIterator, OmfSymbolTable, + OmfComdat, OmfComdatData, OmfComdatIterator, OmfComdatSelection, OmfDataChunk, OmfFixup, + OmfGroup, OmfSection, OmfSectionIterator, OmfSegment, OmfSegmentIterator, OmfSegmentRef, + OmfSymbol, OmfSymbolClass, OmfSymbolIterator, OmfSymbolTable, }; +/// An OMF object file. +/// +/// This handles both 16-bit and 32-bit OMF variants. +#[derive(Debug)] +pub struct OmfFile<'data, R: ReadRef<'data> = &'data [u8]> { + pub(super) data: R, + /// The module name from THEADR/LHEADR record + pub(super) module_name: Option<&'data str>, + /// Segment definitions + pub(super) segments: Vec>, + /// All symbols (publics, externals, communals, locals) in occurrence order + pub(super) symbols: Vec>, + /// Maps external-name table index (1-based) to SymbolIndex + pub(super) external_order: Vec, + /// COMDAT sections + pub(super) comdats: Vec>, + /// Name table (LNAMES/LLNAMES) + pub(super) names: Vec<&'data [u8]>, + /// Group definitions + pub(super) groups: Vec, +} + +impl<'data, R: ReadRef<'data>> read::private::Sealed for OmfFile<'data, R> {} + +impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { + /// Parse an OMF file from raw data + pub fn parse(data: R) -> Result { + let mut file = OmfFile { + data, + module_name: None, + segments: Vec::new(), + symbols: Vec::new(), + external_order: Vec::new(), + comdats: Vec::new(), + names: Vec::new(), + groups: Vec::new(), + }; + + file.parse_records()?; + file.assign_symbol_kinds(); + Ok(file) + } + + fn assign_symbol_kinds(&mut self) { + // Compute kinds for symbols based on their segments + let kinds: Vec = self + .symbols + .iter() + .map(|sym| match sym.class { + OmfSymbolClass::Public | OmfSymbolClass::LocalPublic => { + if sym.segment_index > 0 && (sym.segment_index as usize) <= self.segments.len() + { + let segment_idx = (sym.segment_index - 1) as usize; + let section_kind = self.segment_section_kind(segment_idx); + Self::symbol_kind_from_section_kind(section_kind) + } else { + read::SymbolKind::Unknown + } + } + OmfSymbolClass::Communal | OmfSymbolClass::LocalCommunal => read::SymbolKind::Data, + _ => read::SymbolKind::Unknown, + }) + .collect(); + + // Apply computed kinds + for (sym, kind) in self.symbols.iter_mut().zip(kinds) { + sym.kind = kind; + } + } + + fn symbol_kind_from_section_kind(section_kind: read::SectionKind) -> read::SymbolKind { + match section_kind { + read::SectionKind::Text => read::SymbolKind::Text, + read::SectionKind::Data | read::SectionKind::ReadOnlyData => read::SymbolKind::Data, + read::SectionKind::UninitializedData => read::SymbolKind::Data, + _ => read::SymbolKind::Unknown, + } + } + + /// Get the section kind for a segment (reusing logic from OmfSection) + pub(super) fn segment_section_kind(&self, segment_index: usize) -> read::SectionKind { + if segment_index >= self.segments.len() { + return read::SectionKind::Unknown; + } + + let segment = &self.segments[segment_index]; + + // Check segment name first for special cases + if let Some(seg_name) = self.get_name(segment.name_index) { + // Segments named CONST are always read-only regardless of class + match seg_name { + b"CONST" | b"_CONST" | b"CONST2" | b"RDATA" | b"_RDATA" => { + return read::SectionKind::ReadOnlyData; + } + _ => {} + } + + // Check for debug sections by name + if seg_name.starts_with(b"$$") { + // Watcom-style debug sections + return read::SectionKind::Debug; + } + if seg_name == b".drectve" || seg_name == b".DRECTVE" { + return read::SectionKind::Linker; + } + + // Check other common names + let name_upper = seg_name.to_ascii_uppercase(); + if name_upper == b"_TEXT" || name_upper == b"CODE" || name_upper == b".TEXT" { + return read::SectionKind::Text; + } else if name_upper == b"_DATA" || name_upper == b"DATA" || name_upper == b".DATA" { + return read::SectionKind::Data; + } else if name_upper == b"_BSS" + || name_upper == b"BSS" + || name_upper == b".BSS" + || name_upper == b"STACK" + { + return read::SectionKind::UninitializedData; + } + } + + // Determine kind from class name + if let Some(class_name) = self.get_name(segment.class_index) { + // Check for exact matches first (most common case) + match class_name { + b"CODE" | b"_TEXT" | b"TEXT" => return read::SectionKind::Text, + b"CONST" | b"_CONST" | b"CONST2" | b"RDATA" | b"_RDATA" => { + return read::SectionKind::ReadOnlyData; + } + b"BSS" | b"_BSS" => return read::SectionKind::UninitializedData, + b"STACK" | b"_STACK" => return read::SectionKind::UninitializedData, + b"DEBUG" | b"_DEBUG" | b"DEBSYM" | b"DEBTYP" => return read::SectionKind::Debug, + b"DATA" | b"_DATA" => { + // DATA sections with no actual data are treated as uninitialized + if segment.data_chunks.is_empty() { + return read::SectionKind::UninitializedData; + } else { + return read::SectionKind::Data; + } + } + _ => {} + } + + // Check for case-insensitive substring matches for less common variations + let class_upper = class_name.to_ascii_uppercase(); + if class_upper.windows(4).any(|w| w == b"CODE") { + return read::SectionKind::Text; + } else if class_upper.windows(5).any(|w| w == b"CONST") { + return read::SectionKind::ReadOnlyData; + } else if class_upper.windows(3).any(|w| w == b"BSS") + || class_upper.windows(5).any(|w| w == b"STACK") + { + return read::SectionKind::UninitializedData; + } else if class_upper.windows(5).any(|w| w == b"DEBUG") { + return read::SectionKind::Debug; + } else if class_upper.windows(4).any(|w| w == b"DATA") { + // DATA sections with no actual data are treated as uninitialized + if segment.data_chunks.is_empty() { + return read::SectionKind::UninitializedData; + } else { + return read::SectionKind::Data; + } + } + } + + // Final fallback based on whether segment has data + if segment.data_chunks.is_empty() { + read::SectionKind::UninitializedData + } else { + read::SectionKind::Unknown + } + } + + fn parse_records(&mut self) -> Result<()> { + let len = self + .data + .len() + .map_err(|_| Error("Failed to get data length"))?; + let data = self + .data + .read_bytes_at(0, len) + .map_err(|_| Error("Failed to read OMF data"))?; + let mut offset = 0; + + // First record must be THEADR or LHEADR + if data.is_empty() { + return Err(Error("Empty OMF file")); + } + + let first_type = data[0]; + if first_type != omf::record_type::THEADR && first_type != omf::record_type::LHEADR { + return Err(Error( + "Invalid OMF file: first record must be THEADR or LHEADR", + )); + } + + let mut current_segment: Option = None; + let mut current_data_offset: Option = None; + + // Thread storage for FIXUPP parsing + let mut frame_threads: [Option; 4] = [None; 4]; + let mut target_threads: [Option; 4] = [None; 4]; + + while offset < data.len() { + // Read record header + if offset + 3 > data.len() { + break; + } + + let record_type = data[offset]; + let length = u16::from_le_bytes([data[offset + 1], data[offset + 2]]) as usize; + + // Length includes the checksum byte at the end + if offset + 3 + length > data.len() { + return Err(Error("Truncated OMF record")); + } + + // Record data excludes the checksum + let record_data = &data[offset + 3..offset + 3 + length - 1]; + let checksum = data[offset + 3 + length - 1]; + + // Verify checksum + if !Self::verify_checksum(record_type, length, record_data, checksum) { + return Err(Error("Invalid OMF record checksum")); + } + + // Process record based on type + match record_type { + omf::record_type::THEADR | omf::record_type::LHEADR => { + self.parse_header(record_data)?; + } + omf::record_type::LNAMES | omf::record_type::LLNAMES => { + self.parse_names(record_data)?; + } + omf::record_type::SEGDEF | omf::record_type::SEGDEF32 => { + self.parse_segdef(record_data, record_type == omf::record_type::SEGDEF32)?; + } + omf::record_type::GRPDEF => { + self.parse_grpdef(record_data)?; + } + omf::record_type::PUBDEF | omf::record_type::PUBDEF32 => { + self.parse_pubdef( + record_data, + record_type == omf::record_type::PUBDEF32, + OmfSymbolClass::Public, + )?; + } + omf::record_type::LPUBDEF | omf::record_type::LPUBDEF32 => { + self.parse_pubdef( + record_data, + record_type == omf::record_type::LPUBDEF32, + OmfSymbolClass::LocalPublic, + )?; + } + omf::record_type::EXTDEF => { + self.parse_extdef(record_data, OmfSymbolClass::External)?; + } + omf::record_type::LEXTDEF | omf::record_type::LEXTDEF32 => { + self.parse_extdef(record_data, OmfSymbolClass::LocalExternal)?; + } + omf::record_type::CEXTDEF => { + self.parse_extdef(record_data, OmfSymbolClass::ComdatExternal)?; + } + omf::record_type::COMDEF => { + self.parse_comdef(record_data, OmfSymbolClass::Communal)?; + } + omf::record_type::LCOMDEF => { + self.parse_comdef(record_data, OmfSymbolClass::LocalCommunal)?; + } + omf::record_type::COMDAT | omf::record_type::COMDAT32 => { + self.parse_comdat(record_data, record_type == omf::record_type::COMDAT32)?; + } + omf::record_type::COMENT => { + self.parse_comment(record_data)?; + } + omf::record_type::LEDATA | omf::record_type::LEDATA32 => { + let (seg_idx, offset) = + self.parse_ledata(record_data, record_type == omf::record_type::LEDATA32)?; + current_segment = Some(seg_idx); + current_data_offset = Some(offset); + } + omf::record_type::LIDATA | omf::record_type::LIDATA32 => { + let (seg_idx, offset) = + self.parse_lidata(record_data, record_type == omf::record_type::LIDATA32)?; + current_segment = Some(seg_idx); + current_data_offset = Some(offset); + } + omf::record_type::FIXUPP | omf::record_type::FIXUPP32 => { + if let (Some(seg_idx), Some(data_offset)) = + (current_segment, current_data_offset) + { + self.parse_fixupp( + record_data, + record_type == omf::record_type::FIXUPP32, + seg_idx, + data_offset, + &mut frame_threads, + &mut target_threads, + )?; + } else { + return Err(Error( + "FIXUPP/FIXUPP32 record encountered without preceding LEDATA/LIDATA", + )); + } + } + omf::record_type::MODEND | omf::record_type::MODEND32 => { + // End of module + break; + } + _ => { + // Skip unknown record types + } + } + + offset += 3 + length; // header + data (which includes checksum) + } + + Ok(()) + } + + fn parse_header(&mut self, data: &'data [u8]) -> Result<()> { + if let Some((name, _)) = read_counted_string(data) { + self.module_name = core::str::from_utf8(name).ok(); + } + Ok(()) + } + + fn parse_names(&mut self, data: &'data [u8]) -> Result<()> { + let mut offset = 0; + while offset < data.len() { + if let Some((name, size)) = read_counted_string(&data[offset..]) { + self.names.push(name); + offset += size; + } else { + break; + } + } + Ok(()) + } + + fn parse_segdef(&mut self, data: &'data [u8], is_32bit: bool) -> Result<()> { + let mut offset = 0; + + // Parse ACBP byte + if offset >= data.len() { + return Err(Error("Truncated SEGDEF record")); + } + let acbp = data[offset]; + offset += 1; + + let alignment = match (acbp >> 5) & 0x07 { + 0 => omf::SegmentAlignment::Absolute, + 1 => omf::SegmentAlignment::Byte, + 2 => omf::SegmentAlignment::Word, + 3 => omf::SegmentAlignment::Paragraph, + 4 => omf::SegmentAlignment::Page, + 5 => omf::SegmentAlignment::DWord, + 6 => omf::SegmentAlignment::Page4K, + _ => return Err(Error("Invalid segment alignment")), + }; + + let combination = match (acbp >> 2) & 0x07 { + 0 => omf::SegmentCombination::Private, + 2 => omf::SegmentCombination::Public, + 5 => omf::SegmentCombination::Stack, + 6 => omf::SegmentCombination::Common, + _ => return Err(Error("Invalid segment combination")), + }; + + let use32 = (acbp & 0x01) != 0; + + // Skip frame number and offset for absolute segments + if alignment == omf::SegmentAlignment::Absolute { + offset += 3; // frame (2) + offset (1) + } + + // Parse segment length + let length = if is_32bit || use32 { + if offset + 4 > data.len() { + return Err(Error("Truncated SEGDEF record")); + } + let length = u32::from_le_bytes([ + data[offset], + data[offset + 1], + data[offset + 2], + data[offset + 3], + ]); + offset += 4; + length + } else { + if offset + 2 > data.len() { + return Err(Error("Truncated SEGDEF record")); + } + let length = u16::from_le_bytes([data[offset], data[offset + 1]]) as u32; + offset += 2; + length + }; + + // Parse segment name index + let (name_index, size) = + read_index(&data[offset..]).ok_or(Error("Invalid segment name index"))?; + offset += size; + + // Parse class name index + let (class_index, size) = + read_index(&data[offset..]).ok_or(Error("Invalid class name index"))?; + offset += size; + + // Parse overlay name index + let (overlay_index, _) = + read_index(&data[offset..]).ok_or(Error("Invalid overlay name index"))?; + + self.segments.push(OmfSegment { + name_index, + class_index, + overlay_index, + alignment, + combination, + use32, + length, + data_chunks: Vec::new(), + relocations: Vec::new(), + }); + + Ok(()) + } + + fn parse_grpdef(&mut self, data: &'data [u8]) -> Result<()> { + let mut offset = 0; + + // Parse group name index + let (name_index, size) = read_index(data).ok_or(Error("Invalid group name index"))?; + offset += size; + + let mut segments = Vec::new(); + + // Parse segment indices + while offset < data.len() { + if data[offset] == 0xFF { + // Segment index follows + offset += 1; + let (seg_index, size) = + read_index(&data[offset..]).ok_or(Error("Invalid segment index in group"))?; + offset += size; + segments.push(seg_index); + } else { + break; + } + } + + self.groups.push(OmfGroup { + name_index, + segments, + }); + + Ok(()) + } + + fn parse_pubdef( + &mut self, + data: &'data [u8], + is_32bit: bool, + class: OmfSymbolClass, + ) -> Result<()> { + let mut offset = 0; + + // Parse group index + let (group_index, size) = read_index(data).ok_or(Error("Invalid group index"))?; + offset += size; + + // Parse segment index + let (segment_index, size) = + read_index(&data[offset..]).ok_or(Error("Invalid segment index"))?; + offset += size; + + // Read frame number if segment index is 0 (for absolute symbols) + let frame_number = if segment_index == 0 { + if offset + 2 > data.len() { + return Err(Error("Invalid frame number in PUBDEF")); + } + let frame = u16::from_le_bytes([data[offset], data[offset + 1]]); + offset += 2; + frame + } else { + 0 + }; + + // Parse public definitions + while offset < data.len() { + // Parse name + let Some((name, size)) = read_counted_string(&data[offset..]) else { + break; + }; + offset += size; + + // Parse offset + let pub_offset = if is_32bit { + if offset + 4 > data.len() { + break; + } + let off = u32::from_le_bytes([ + data[offset], + data[offset + 1], + data[offset + 2], + data[offset + 3], + ]); + offset += 4; + off + } else { + if offset + 2 > data.len() { + break; + } + let off = u16::from_le_bytes([data[offset], data[offset + 1]]) as u32; + offset += 2; + off + }; + + // Parse type index + let (type_index, size) = read_index(&data[offset..]) + .ok_or(Error("Invalid type index in PUBDEF/LPUBDEF record"))?; + offset += size; + + self.symbols.push(OmfSymbol { + symbol_index: self.symbols.len(), + name, + class, + group_index, + segment_index, + frame_number, + offset: pub_offset, + type_index, + kind: read::SymbolKind::Unknown, // Will be computed later + }); + } + + Ok(()) + } + + fn parse_extdef(&mut self, data: &'data [u8], class: OmfSymbolClass) -> Result<()> { + let mut offset = 0; + + while offset < data.len() { + // Parse name + let Some((name, size)) = read_counted_string(&data[offset..]) else { + break; + }; + offset += size; + + // Parse type index + let (type_index, size) = read_index(&data[offset..]) + .ok_or(Error("Invalid type index in EXTDEF/LEXTDEF/CEXTDEF record"))?; + offset += size; + + let sym_idx = self.symbols.len(); + self.symbols.push(OmfSymbol { + symbol_index: sym_idx, + name, + class, + group_index: 0, + segment_index: 0, + frame_number: 0, + offset: 0, + type_index, + kind: read::SymbolKind::Unknown, + }); + + // Add to external_order for symbols that contribute to external-name table + self.external_order.push(read::SymbolIndex(sym_idx)); + } + + Ok(()) + } + + fn parse_comdef(&mut self, data: &'data [u8], class: OmfSymbolClass) -> Result<()> { + let mut offset = 0; + + while offset < data.len() { + // Parse name + let Some((name, size)) = read_counted_string(&data[offset..]) else { + break; + }; + offset += size; + + // Parse type index + let (type_index, size) = read_index(&data[offset..]) + .ok_or(Error("Invalid type index in COMDEF/LCOMDEF record"))?; + offset += size; + + // Parse data type and communal length + if offset >= data.len() { + break; + } + let data_type = data[offset]; + offset += 1; + + let communal_length = match data_type { + 0x61 => { + // FAR data - number of elements followed by element size + let (num_elements, size1) = read_encoded_value(&data[offset..]) + .ok_or(Error("Invalid number of elements in FAR COMDEF"))?; + offset += size1; + let (element_size, size2) = read_encoded_value(&data[offset..]) + .ok_or(Error("Invalid element size in FAR COMDEF"))?; + offset += size2; + num_elements * element_size + } + 0x62 => { + // NEAR data - size in bytes + let (size_val, size_bytes) = read_encoded_value(&data[offset..]) + .ok_or(Error("Invalid size in NEAR COMDEF"))?; + offset += size_bytes; + size_val + } + _ => { + // Unknown data type, skip + continue; + } + }; + + let sym_idx = self.symbols.len(); + self.symbols.push(OmfSymbol { + symbol_index: sym_idx, + name, + class, + group_index: 0, + segment_index: 0, + frame_number: 0, + offset: communal_length, // Store size in offset field + type_index, + kind: read::SymbolKind::Data, + }); + + // Add to external_order for symbols that contribute to external-name table + self.external_order.push(read::SymbolIndex(sym_idx)); + } + + Ok(()) + } + + fn parse_comdat(&mut self, data: &'data [u8], is_32bit: bool) -> Result<()> { + let mut offset = 0; + + // Parse flags byte + if offset >= data.len() { + return Err(Error("Truncated COMDAT record")); + } + let _flags = data[offset]; + offset += 1; + + // Parse attributes byte + if offset >= data.len() { + return Err(Error("Truncated COMDAT record")); + } + let attributes = data[offset]; + offset += 1; + + // Extract selection criteria from high nibble of attributes + let selection = match (attributes >> 4) & 0x0F { + 0x00 => OmfComdatSelection::Explicit, // No match + 0x01 => OmfComdatSelection::UseAny, // Pick any + 0x02 => OmfComdatSelection::SameSize, // Same size + 0x03 => OmfComdatSelection::ExactMatch, // Exact match + _ => OmfComdatSelection::UseAny, + }; + + // Extract allocation type from low nibble of attributes + let allocation_type = attributes & 0x0F; + + // Parse align/segment index field + let (segment_index, size) = + read_index(&data[offset..]).ok_or(Error("Invalid COMDAT segment index"))?; + offset += size; + + // Determine alignment - if segment index is 0-7, it's actually an alignment value + let alignment = if segment_index <= 7 { + match segment_index { + 0 => omf::SegmentAlignment::Absolute, // Use value from SEGDEF + 1 => omf::SegmentAlignment::Byte, + 2 => omf::SegmentAlignment::Word, + 3 => omf::SegmentAlignment::Paragraph, + 4 => omf::SegmentAlignment::Page, + 5 => omf::SegmentAlignment::DWord, + 6 => omf::SegmentAlignment::Page4K, + _ => omf::SegmentAlignment::Byte, + } + } else { + omf::SegmentAlignment::Byte // Default alignment + }; + + // Parse data offset + let _data_offset = if is_32bit { + if offset + 4 > data.len() { + return Err(Error("Truncated COMDAT record")); + } + let off = u32::from_le_bytes([ + data[offset], + data[offset + 1], + data[offset + 2], + data[offset + 3], + ]); + offset += 4; + off + } else { + if offset + 2 > data.len() { + return Err(Error("Truncated COMDAT record")); + } + let off = u16::from_le_bytes([data[offset], data[offset + 1]]) as u32; + offset += 2; + off + }; + + // Parse type index + let (_type_index, size) = + read_index(&data[offset..]).ok_or(Error("Invalid type index in COMDAT record"))?; + offset += size; + + // Parse public base (only if allocation type is 0x00 - Explicit) + if allocation_type == 0x00 { + // Has public base (Base Group, Base Segment, Base Frame) + let (_group_index, size) = + read_index(&data[offset..]).ok_or(Error("Invalid group index in COMDAT record"))?; + offset += size; + let (_seg_idx, size) = read_index(&data[offset..]) + .ok_or(Error("Invalid segment index in COMDAT record"))?; + offset += size; + if _seg_idx == 0 { + if offset + 2 <= data.len() { + offset += 2; // Skip frame number + } + } + } + + // Parse public name - this is an index into LNAMES + let (name_index, size) = + read_index(&data[offset..]).ok_or(Error("Invalid name index in COMDAT record"))?; + offset += size; + + // Look up the name from the names table + let name = name_index + .checked_sub(1) + .and_then(|i| self.names.get(i as usize).copied()) + .unwrap_or(b""); + + // Remaining data is the COMDAT content + let comdat_data = &data[offset..]; + + self.comdats.push(OmfComdatData { + name, + segment_index, + selection, + alignment, + data: comdat_data, + }); + + Ok(()) + } + + fn parse_comment(&mut self, data: &'data [u8]) -> Result<()> { + if data.len() < 2 { + return Ok(()); // Ignore truncated comments + } + + let _comment_type = data[0]; // Usually 0x00 for non-purge, 0x40 for purge + let _comment_class = data[1]; + + Ok(()) + } + + fn parse_ledata(&mut self, data: &'data [u8], is_32bit: bool) -> Result<(usize, u32)> { + let mut offset = 0; + + // Parse segment index + let (segment_index, size) = + read_index(data).ok_or(Error("Invalid segment index in LEDATA"))?; + offset += size; + + if segment_index == 0 || segment_index > self.segments.len() as u16 { + return Err(Error("Invalid segment index in LEDATA")); + } + + // Parse data offset + let data_offset = if is_32bit { + if offset + 4 > data.len() { + return Err(Error("Truncated LEDATA record")); + } + let off = u32::from_le_bytes([ + data[offset], + data[offset + 1], + data[offset + 2], + data[offset + 3], + ]); + offset += 4; + off + } else { + if offset + 2 > data.len() { + return Err(Error("Truncated LEDATA record")); + } + let off = u16::from_le_bytes([data[offset], data[offset + 1]]) as u32; + offset += 2; + off + }; + + // Store reference to data chunk + let seg_idx = (segment_index - 1) as usize; + let segment = &mut self.segments[seg_idx]; + + // Store the data chunk reference + if offset < data.len() { + segment + .data_chunks + .push((data_offset, OmfDataChunk::Direct(&data[offset..]))); + } + + Ok((seg_idx, data_offset)) + } + + fn parse_fixupp( + &mut self, + data: &'data [u8], + is_32bit: bool, + seg_idx: usize, + data_offset: u32, + frame_threads: &mut [Option; 4], + target_threads: &mut [Option; 4], + ) -> Result<()> { + let mut offset = 0; + + while offset < data.len() { + let b = data[offset]; + offset += 1; + + if (b & 0x80) == 0 { + // THREAD subrecord + let is_frame = (b & 0x40) != 0; // D-bit + let method = (b >> 2) & 0x07; // Method bits + let thread_num = (b & 0x03) as usize; // Thread number (0-3) + + let index = if method < 3 { + // Methods 0-2 have an index + let (idx, size) = read_index(&data[offset..]) + .ok_or(Error("Invalid index in THREAD subrecord"))?; + offset += size; + idx + } else if method == 3 { + // Method 3 has a raw frame number + if offset + 2 > data.len() { + return Err(Error("Invalid frame number in THREAD subrecord")); + } + let frame_num = u16::from_le_bytes([data[offset], data[offset + 1]]); + offset += 2; + frame_num + } else { + 0 + }; + + // Store the thread definition + let thread_def = ThreadDef { method, index }; + if is_frame { + frame_threads[thread_num] = Some(thread_def); + } else { + target_threads[thread_num] = Some(thread_def); + } + } else { + // FIXUP subrecord + if offset + 1 > data.len() { + return Err(Error("Truncated FIXUP location")); + } + let locat = data[offset] as u32 | (((b as u32) & 0x03) << 8); + offset += 1; + + let location = match (b >> 2) & 0x0F { + 0 => omf::FixupLocation::LowByte, + 1 => omf::FixupLocation::Offset, + 2 => omf::FixupLocation::Base, + 3 => omf::FixupLocation::Pointer, + 4 => omf::FixupLocation::HighByte, + 5 => omf::FixupLocation::LoaderOffset, + 9 => omf::FixupLocation::Offset32, + 11 => omf::FixupLocation::Pointer48, + 13 => omf::FixupLocation::LoaderOffset32, + _ => continue, // Skip unknown fixup types + }; + + // Parse fix data byte + if offset >= data.len() { + return Err(Error("Truncated FIXUP fix data")); + } + let fix_data = data[offset]; + offset += 1; + + // Check F-bit (bit 7 of fix_data) + let frame_via_thread = (fix_data & 0x80) != 0; + let (frame_method, frame_index) = if frame_via_thread { + // F=1: Use frame thread + let thread_num = ((fix_data >> 4) & 0x03) as usize; + match frame_threads[thread_num] { + Some(thread) => { + let method = match thread.method { + 0 => FrameMethod::SegmentIndex, + 1 => FrameMethod::GroupIndex, + 2 => FrameMethod::ExternalIndex, + 3 => FrameMethod::FrameNumber, + 4 => FrameMethod::Location, + 5 => FrameMethod::Target, + _ => return Err(Error("Invalid frame method in thread")), + }; + (method, thread.index) + } + None => return Err(Error("Undefined frame thread in FIXUP")), + } + } else { + // F=0: Read frame datum + let method_bits = (fix_data >> 4) & 0x07; + let method = match method_bits { + 0 => FrameMethod::SegmentIndex, + 1 => FrameMethod::GroupIndex, + 2 => FrameMethod::ExternalIndex, + 3 => FrameMethod::FrameNumber, + 4 => FrameMethod::Location, + 5 => FrameMethod::Target, + _ => return Err(Error("Invalid frame method in FIXUP")), + }; + let index = match method { + FrameMethod::SegmentIndex + | FrameMethod::GroupIndex + | FrameMethod::ExternalIndex => { + let (idx, size) = read_index(&data[offset..]) + .ok_or(Error("Truncated FIXUP frame datum: missing index data"))?; + offset += size; + idx + } + FrameMethod::FrameNumber => { + if offset + 2 > data.len() { + return Err(Error( + "Truncated FIXUP frame datum: missing frame number", + )); + } + let frame_num = u16::from_le_bytes([data[offset], data[offset + 1]]); + offset += 2; + frame_num + } + FrameMethod::Location | FrameMethod::Target => 0, + }; + (method, index) + }; + + // Check T-bit (bit 3 of fix_data) + let target_via_thread = (fix_data & 0x08) != 0; + let (target_method, target_index) = if target_via_thread { + // T=1: Use target thread + let thread_num = (fix_data & 0x03) as usize; + match target_threads[thread_num] { + Some(thread) => { + // Only check the low 2 bits of method for target + let method = match thread.method & 0x03 { + 0 => TargetMethod::SegmentIndex, + 1 => TargetMethod::GroupIndex, + 2 => TargetMethod::ExternalIndex, + 3 => TargetMethod::FrameNumber, + _ => return Err(Error("Invalid target method in thread")), + }; + (method, thread.index) + } + None => return Err(Error("Undefined target thread in FIXUP")), + } + } else { + // T=0: Read target datum + // Only check the low 2 bits of method for target + let method = match fix_data & 0x03 { + 0 => TargetMethod::SegmentIndex, + 1 => TargetMethod::GroupIndex, + 2 => TargetMethod::ExternalIndex, + 3 => TargetMethod::FrameNumber, + _ => return Err(Error("Invalid frame method in FIXUP")), + }; + let index = match method { + TargetMethod::SegmentIndex + | TargetMethod::GroupIndex + | TargetMethod::ExternalIndex => { + let (idx, size) = read_index(&data[offset..]) + .ok_or(Error("Truncated FIXUP target datum: missing index data"))?; + offset += size; + idx + } + TargetMethod::FrameNumber => { + if offset + 2 > data.len() { + return Err(Error( + "Truncated FIXUP target datum: missing frame number", + )); + } + let frame_num = u16::from_le_bytes([data[offset], data[offset + 1]]); + offset += 2; + frame_num + } + }; + (method, index) + }; + + // Parse target displacement if present (P=0) + let has_displacement = (fix_data & 0x04) == 0; + let target_displacement = if has_displacement { + if is_32bit { + if offset + 4 <= data.len() { + let disp = u32::from_le_bytes([ + data[offset], + data[offset + 1], + data[offset + 2], + data[offset + 3], + ]); + offset += 4; + disp + } else { + return Err(Error("Truncated FIXUP 32-bit displacement")); + } + } else if offset + 2 <= data.len() { + let disp = u16::from_le_bytes([data[offset], data[offset + 1]]) as u32; + offset += 2; + disp + } else { + return Err(Error("Truncated FIXUP 16-bit displacement")); + } + } else { + 0 + }; + + // Extract M-bit (bit 6 of fix_data) + let is_segment_relative = (fix_data & 0x40) != 0; + self.segments[seg_idx].relocations.push(OmfFixup { + offset: data_offset + locat, + location, + frame_method, + target_method, + frame_index, + target_index, + target_displacement, + is_segment_relative, + }); + } + } + + Ok(()) + } + + fn parse_lidata(&mut self, data: &'data [u8], is_32bit: bool) -> Result<(usize, u32)> { + let mut offset = 0; + + // Read segment index + let (segment_index, size) = + read_index(&data[offset..]).ok_or(Error("Invalid segment index in LIDATA"))?; + offset += size; + + if segment_index == 0 || segment_index > self.segments.len() as u16 { + return Err(Error("Invalid segment index in LIDATA")); + } + + // Read data offset + let data_offset = if is_32bit { + if offset + 4 > data.len() { + return Err(Error("Truncated LIDATA record")); + } + let off = u32::from_le_bytes([ + data[offset], + data[offset + 1], + data[offset + 2], + data[offset + 3], + ]); + offset += 4; + off + } else { + if offset + 2 > data.len() { + return Err(Error("Truncated LIDATA record")); + } + let off = u16::from_le_bytes([data[offset], data[offset + 1]]) as u32; + offset += 2; + off + }; + + // For LIDATA, we need to store the unexpanded data and expand on demand + let seg_idx = (segment_index - 1) as usize; + if offset < data.len() { + self.segments[seg_idx] + .data_chunks + .push((data_offset, OmfDataChunk::Iterated(&data[offset..]))); + } + + Ok((seg_idx, data_offset)) + } + + /// Get the module name + pub fn module_name(&self) -> Option<&'data str> { + self.module_name + } + + /// Get the segments as a slice + pub fn segments_slice(&self) -> &[OmfSegment<'data>] { + &self.segments + } + + /// Get symbol by external-name index (1-based, as used in FIXUPP records) + pub fn external_symbol(&self, external_index: u16) -> Option<&OmfSymbol<'data>> { + let symbol_index = self + .external_order + .get(external_index.checked_sub(1)? as usize)?; + self.symbols.get(symbol_index.0) + } + + /// Get a name by index (1-based) + pub fn get_name(&self, index: u16) -> Option<&'data [u8]> { + let name_index = index.checked_sub(1)?; + self.names.get(name_index as usize).copied() + } + + /// Get all symbols (for iteration) + pub fn all_symbols(&self) -> &[OmfSymbol<'data>] { + &self.symbols + } + + /// Verify the checksum of an OMF record + /// + /// The checksum is calculated so that the sum of all bytes in the record, + /// including the checksum byte itself, equals 0 (modulo 256). + fn verify_checksum(record_type: u8, length: usize, body: &[u8], checksum: u8) -> bool { + // Some compilers write a 0 byte rather than computing the checksum, + // so we accept that as valid + if checksum == 0 { + return true; + } + + let mut sum = u32::from(record_type); + // Add length bytes (little-endian) + sum = sum.wrapping_add((length & 0xff) as u32); + sum = sum.wrapping_add((length >> 8) as u32); + // Add all body bytes + for &byte in body { + sum = sum.wrapping_add(u32::from(byte)); + } + // Add checksum byte + sum = sum.wrapping_add(u32::from(checksum)); + + // The sum should be 0 (modulo 256) + (sum & 0xff) == 0 + } +} + impl<'data, R: ReadRef<'data>> Object<'data> for OmfFile<'data, R> { type Segment<'file> = OmfSegmentRef<'data, 'file, R> @@ -161,17 +1309,14 @@ impl<'data, R: ReadRef<'data>> Object<'data> for OmfFile<'data, R> { None } - fn imports(&self) -> Result>> { - // Only true external symbols are imports in OMF - // LocalExternal (LEXTDEF) are module-local references that should be resolved - // within the same module by LocalPublic (LPUBDEF) symbols + fn imports(&self) -> Result>> { Ok(self .all_symbols() .iter() .filter(|sym| { matches!( sym.class, - super::OmfSymbolClass::External | super::OmfSymbolClass::ComdatExternal + OmfSymbolClass::External | OmfSymbolClass::ComdatExternal ) }) .map(|ext| Import { @@ -181,13 +1326,11 @@ impl<'data, R: ReadRef<'data>> Object<'data> for OmfFile<'data, R> { .collect()) } - fn exports(&self) -> Result>> { - // Only true public symbols are exports in OMF - // LocalPublic (LPUBDEF) are module-local symbols not visible outside + fn exports(&self) -> Result>> { Ok(self .all_symbols() .iter() - .filter(|sym| sym.class == super::OmfSymbolClass::Public) + .filter(|sym| sym.class == OmfSymbolClass::Public) .map(|pub_sym| Export { name: ByteString(pub_sym.name), address: pub_sym.offset as u64, @@ -215,7 +1358,7 @@ impl<'data, R: ReadRef<'data>> Object<'data> for OmfFile<'data, R> { Ok(None) } - fn pdb_info(&self) -> Result>> { + fn pdb_info(&self) -> Result>> { Ok(None) } @@ -231,3 +1374,206 @@ impl<'data, R: ReadRef<'data>> Object<'data> for OmfFile<'data, R> { FileFlags::None } } + +/// Thread definition for FIXUPP parsing +#[derive(Debug, Clone, Copy)] +struct ThreadDef { + /// 3-bit method (frame or target method) + method: u8, + /// Index value (meaning depends on method) + index: u16, +} + +/// Target method types for fixups +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[repr(u8)] +pub(super) enum TargetMethod { + /// Segment index + SegmentIndex = 0, + /// Group index + GroupIndex = 1, + /// External index + ExternalIndex = 2, + /// Frame number (absolute) + FrameNumber = 3, +} + +/// Frame method types for fixups +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[repr(u8)] +pub(super) enum FrameMethod { + /// Segment index + SegmentIndex = 0, + /// Group index + GroupIndex = 1, + /// External index + ExternalIndex = 2, + /// Frame number (absolute) + FrameNumber = 3, + /// Location (use fixup location) + Location = 4, + /// Target (use target's frame) + Target = 5, +} + +/// Expand a LIDATA block into its uncompressed form +pub(super) fn expand_lidata_block(data: &[u8]) -> Result> { + let mut offset = 0; + let mut result = Vec::new(); + + // Read repeat count + let (repeat_count, size) = + read_encoded_value(&data[offset..]).ok_or(Error("Invalid repeat count in LIDATA block"))?; + offset += size; + + if repeat_count == 0 { + return Ok(result); + } + + // Read block count + let (block_count, size) = + read_encoded_value(&data[offset..]).ok_or(Error("Invalid block count in LIDATA block"))?; + offset += size; + + if block_count == 0 { + // Leaf block: contains actual data + if offset >= data.len() { + return Ok(result); + } + let data_length = data[offset] as usize; + offset += 1; + + if offset + data_length > data.len() { + return Err(Error("Truncated LIDATA block")); + } + + let block_data = &data[offset..offset + data_length]; + + // Repeat the data block + for _ in 0..repeat_count { + result.extend_from_slice(block_data); + } + } else { + // Nested blocks: recurse for each block + for _ in 0..block_count { + // TODO this is bad, we should instead start by recursively calculating the size, + // allocating the buffer up front, then writing directly into it (e.g. expand_lidata_block_into(&[u8], &mut [u8])) + let block_data = expand_lidata_block(&data[offset..])?; + let block_size = lidata_block_size(&data[offset..])?; + offset += block_size; + + // Repeat the expanded block + for _ in 0..repeat_count { + result.extend_from_slice(&block_data); + } + } + } + + Ok(result) +} + +/// Helper function to calculate LIDATA block size +fn lidata_block_size(data: &[u8]) -> Result { + let mut offset = 0; + + // Read repeat count + let (_, size) = + read_encoded_value(&data[offset..]).ok_or(Error("Invalid repeat count in LIDATA block"))?; + offset += size; + + // Read block count + let (block_count, size) = + read_encoded_value(&data[offset..]).ok_or(Error("Invalid block count in LIDATA block"))?; + offset += size; + + if block_count == 0 { + // Leaf block + if offset >= data.len() { + return Ok(offset); + } + let data_length = data[offset] as usize; + offset += 1 + data_length; + } else { + // Nested blocks + for _ in 0..block_count { + offset += lidata_block_size(&data[offset..])?; + } + } + + Ok(offset) +} + +/// Helper to read an OMF index (1 or 2 bytes) +fn read_index(data: &[u8]) -> Option<(u16, usize)> { + if data.is_empty() { + return None; + } + + let first_byte = data[0]; + if first_byte & 0x80 == 0 { + // 1-byte index + Some((first_byte as u16, 1)) + } else if data.len() >= 2 { + // 2-byte index + let high = (first_byte & 0x7F) as u16; + let low = data[1] as u16; + Some((high << 8 | low, 2)) + } else { + None + } +} + +/// Helper to read a counted string (length byte followed by string) +fn read_counted_string(data: &[u8]) -> Option<(&[u8], usize)> { + if data.is_empty() { + return None; + } + + let length = data[0] as usize; + if data.len() > length { + Some((&data[1..1 + length], 1 + length)) + } else { + None + } +} + +/// Read an encoded value (used in LIDATA for repeat counts and block counts) +/// Returns the value and number of bytes consumed +fn read_encoded_value(data: &[u8]) -> Option<(u32, usize)> { + if data.is_empty() { + return None; + } + + let first_byte = data[0]; + if first_byte < 0x80 { + // Single byte value (0-127) + Some((first_byte as u32, 1)) + } else if first_byte == 0x81 { + // Two byte value: 0x81 followed by 16-bit little-endian value + if data.len() >= 3 { + let value = u16::from_le_bytes([data[1], data[2]]) as u32; + Some((value, 3)) + } else { + None + } + } else if first_byte == 0x84 { + // Three byte value: 0x84 followed by 24-bit little-endian value + if data.len() >= 4 { + let value = u32::from_le_bytes([data[1], data[2], data[3], 0]); + Some((value, 4)) + } else { + None + } + } else if first_byte == 0x88 { + // Four byte value: 0x88 followed by 32-bit little-endian value + if data.len() >= 5 { + let value = u32::from_le_bytes([data[1], data[2], data[3], data[4]]); + Some((value, 5)) + } else { + None + } + } else { + // Unknown encoding + None + } +} diff --git a/src/read/omf/mod.rs b/src/read/omf/mod.rs index 94a2bad9..756900fd 100644 --- a/src/read/omf/mod.rs +++ b/src/read/omf/mod.rs @@ -1,10 +1,7 @@ -//! OMF file reading support. +//! Support for reading OMF files. -use alloc::str; -use alloc::vec::Vec; - -use crate::omf; -use crate::read::{self, Error, ReadRef, Result}; +mod comdat; +pub use comdat::*; mod file; pub use file::*; @@ -20,1398 +17,3 @@ pub use segment::*; mod symbol; pub use symbol::*; - -/// Symbol class for OMF symbols -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum OmfSymbolClass { - /// Public symbol (PUBDEF) - Public, - /// Local public symbol (LPUBDEF) - LocalPublic, - /// External symbol (EXTDEF) - External, - /// Local external symbol (LEXTDEF) - LocalExternal, - /// Communal symbol (COMDEF) - Communal, - /// Local communal symbol (LCOMDEF) - LocalCommunal, - /// COMDAT external symbol (CEXTDEF) - ComdatExternal, -} - -/// An OMF object file. -/// -/// This handles both 16-bit and 32-bit OMF variants. -#[derive(Debug)] -pub struct OmfFile<'data, R: ReadRef<'data> = &'data [u8]> { - data: R, - /// The module name from THEADR/LHEADR record - module_name: Option<&'data str>, - /// Segment definitions - segments: Vec>, - /// All symbols (publics, externals, communals, locals) in occurrence order - symbols: Vec>, - /// Maps external-name table index (1-based) to SymbolIndex - external_order: Vec, - /// COMDAT sections - comdats: Vec>, - /// Name table (LNAMES/LLNAMES) - names: Vec<&'data [u8]>, - /// Group definitions - groups: Vec, -} - -/// Data chunk for a segment -#[derive(Debug, Clone)] -pub enum OmfDataChunk<'data> { - /// Direct data from LEDATA record - Direct(&'data [u8]), - /// Compressed/iterated data from LIDATA record (needs expansion) - Iterated(&'data [u8]), -} - -/// An OMF segment definition -#[derive(Debug, Clone)] -pub struct OmfSegment<'data> { - /// Segment name index (into names table) - pub name_index: u16, - /// Class name index (into names table) - pub class_index: u16, - /// Overlay name index (into names table) - pub overlay_index: u16, - /// Segment alignment - pub alignment: omf::SegmentAlignment, - /// Segment combination - pub combination: omf::SegmentCombination, - /// Whether this is a 32-bit segment - pub use32: bool, - /// Segment length - pub length: u32, - /// Segment data chunks (offset, data) - /// Multiple LEDATA/LIDATA records can contribute to a single segment - pub data_chunks: Vec<(u32, OmfDataChunk<'data>)>, - /// Relocations for this segment - pub relocations: Vec, -} - -/// An OMF symbol -#[derive(Debug, Clone)] -pub struct OmfSymbol<'data> { - /// Symbol table index - pub symbol_index: usize, - /// Symbol name - pub name: &'data [u8], - /// Symbol class (Public, External, etc.) - pub class: OmfSymbolClass, - /// Group index (0 if none) - pub group_index: u16, - /// Segment index (0 if external) - pub segment_index: u16, - /// Frame number (for absolute symbols when segment_index == 0) - pub frame_number: u16, - /// Offset within segment - pub offset: u32, - /// Type index (usually 0) - pub type_index: u16, - /// Pre-computed symbol kind - pub kind: read::SymbolKind, -} - -/// An OMF group definition -#[derive(Debug, Clone)] -pub struct OmfGroup { - /// Group name index (into names table) - pub name_index: u16, - /// Segment indices in this group - pub segments: Vec, -} - -/// An OMF relocation/fixup -#[derive(Debug, Clone)] -pub struct OmfRelocation { - /// Offset in segment where fixup is applied - pub offset: u32, - /// Location type (what to patch) - pub location: omf::FixupLocation, - /// Frame method - pub frame_method: omf::FrameMethod, - /// Target method - pub target_method: omf::TargetMethod, - /// Frame index (meaning depends on frame_method) - pub frame_index: u16, - /// Target index (meaning depends on target_method) - pub target_index: u16, - /// Target displacement - pub target_displacement: u32, - /// M-bit: true for segment-relative, false for PC-relative - pub is_segment_relative: bool, -} - -/// A COMDAT (communal data) section -#[derive(Debug, Clone)] -pub struct OmfComdatData<'data> { - /// Symbol name - pub name: &'data [u8], - /// Segment index where this COMDAT belongs - pub segment_index: u16, - /// Selection/allocation method - pub selection: OmfComdatSelection, - /// Alignment - pub alignment: omf::SegmentAlignment, - /// Data - pub data: &'data [u8], -} - -/// COMDAT selection methods -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum OmfComdatSelection { - /// Explicit: may not be combined, produce error if multiple definitions - Explicit = 0, - /// Use any: pick any instance - UseAny = 1, - /// Same size: all instances must be same size - SameSize = 2, - /// Exact match: all instances must have identical content - ExactMatch = 3, -} - -/// Thread definition for FIXUPP parsing -#[derive(Debug, Clone, Copy)] -struct ThreadDef { - /// 3-bit method (frame or target method) - method: u8, - /// Index value (meaning depends on method) - index: u16, -} - -impl<'data> OmfSegment<'data> { - /// Get the raw data of the segment if it's a single contiguous chunk - pub fn get_single_chunk(&self) -> Option<&'data [u8]> { - if self.data_chunks.len() == 1 { - let (offset, chunk) = &self.data_chunks[0]; - if *offset == 0 { - match chunk { - OmfDataChunk::Direct(data) if data.len() == self.length as usize => { - return Some(data); - } - _ => {} - } - } - } - None - } - - /// Check if any data chunk needs expansion (LIDATA) - pub fn has_iterated_data(&self) -> bool { - self.data_chunks - .iter() - .any(|(_, chunk)| matches!(chunk, OmfDataChunk::Iterated(_))) - } -} - -impl<'data, R: ReadRef<'data>> read::private::Sealed for OmfFile<'data, R> {} - -impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { - /// Parse an OMF file from raw data - pub fn parse(data: R) -> Result { - let mut file = OmfFile { - data, - module_name: None, - segments: Vec::new(), - symbols: Vec::new(), - external_order: Vec::new(), - comdats: Vec::new(), - names: Vec::new(), - groups: Vec::new(), - }; - - file.parse_records()?; - file.assign_symbol_kinds(); - Ok(file) - } - - fn assign_symbol_kinds(&mut self) { - // Compute kinds for symbols based on their segments - let kinds: Vec = self - .symbols - .iter() - .map(|sym| match sym.class { - OmfSymbolClass::Public | OmfSymbolClass::LocalPublic => { - if sym.segment_index > 0 && (sym.segment_index as usize) <= self.segments.len() - { - let segment_idx = (sym.segment_index - 1) as usize; - let section_kind = self.segment_section_kind(segment_idx); - Self::symbol_kind_from_section_kind(section_kind) - } else { - read::SymbolKind::Unknown - } - } - OmfSymbolClass::Communal | OmfSymbolClass::LocalCommunal => read::SymbolKind::Data, - _ => read::SymbolKind::Unknown, - }) - .collect(); - - // Apply computed kinds - for (sym, kind) in self.symbols.iter_mut().zip(kinds) { - sym.kind = kind; - } - } - - fn symbol_kind_from_section_kind(section_kind: read::SectionKind) -> read::SymbolKind { - match section_kind { - read::SectionKind::Text => read::SymbolKind::Text, - read::SectionKind::Data | read::SectionKind::ReadOnlyData => read::SymbolKind::Data, - read::SectionKind::UninitializedData => read::SymbolKind::Data, - _ => read::SymbolKind::Unknown, - } - } - - /// Get the section kind for a segment (reusing logic from OmfSection) - fn segment_section_kind(&self, segment_index: usize) -> read::SectionKind { - if segment_index >= self.segments.len() { - return read::SectionKind::Unknown; - } - - let segment = &self.segments[segment_index]; - - // Check segment name first for special cases - if let Some(seg_name) = self.get_name(segment.name_index) { - // Segments named CONST are always read-only regardless of class - match seg_name { - b"CONST" | b"_CONST" | b"CONST2" | b"RDATA" | b"_RDATA" => { - return read::SectionKind::ReadOnlyData; - } - _ => {} - } - - // Check for debug sections by name - if seg_name.starts_with(b"$$") { - // Watcom-style debug sections - return read::SectionKind::Debug; - } - if seg_name == b".drectve" || seg_name == b".DRECTVE" { - return read::SectionKind::Linker; - } - - // Check other common names - let name_upper = seg_name.to_ascii_uppercase(); - if name_upper == b"_TEXT" || name_upper == b"CODE" || name_upper == b".TEXT" { - return read::SectionKind::Text; - } else if name_upper == b"_DATA" || name_upper == b"DATA" || name_upper == b".DATA" { - return read::SectionKind::Data; - } else if name_upper == b"_BSS" - || name_upper == b"BSS" - || name_upper == b".BSS" - || name_upper == b"STACK" - { - return read::SectionKind::UninitializedData; - } - } - - // Determine kind from class name - if let Some(class_name) = self.get_name(segment.class_index) { - // Check for exact matches first (most common case) - match class_name { - b"CODE" | b"_TEXT" | b"TEXT" => return read::SectionKind::Text, - b"CONST" | b"_CONST" | b"CONST2" | b"RDATA" | b"_RDATA" => { - return read::SectionKind::ReadOnlyData; - } - b"BSS" | b"_BSS" => return read::SectionKind::UninitializedData, - b"STACK" | b"_STACK" => return read::SectionKind::UninitializedData, - b"DEBUG" | b"_DEBUG" | b"DEBSYM" | b"DEBTYP" => return read::SectionKind::Debug, - b"DATA" | b"_DATA" => { - // DATA sections with no actual data are treated as uninitialized - if segment.data_chunks.is_empty() { - return read::SectionKind::UninitializedData; - } else { - return read::SectionKind::Data; - } - } - _ => {} - } - - // Check for case-insensitive substring matches for less common variations - let class_upper = class_name.to_ascii_uppercase(); - if class_upper.windows(4).any(|w| w == b"CODE") { - return read::SectionKind::Text; - } else if class_upper.windows(5).any(|w| w == b"CONST") { - return read::SectionKind::ReadOnlyData; - } else if class_upper.windows(3).any(|w| w == b"BSS") - || class_upper.windows(5).any(|w| w == b"STACK") - { - return read::SectionKind::UninitializedData; - } else if class_upper.windows(5).any(|w| w == b"DEBUG") { - return read::SectionKind::Debug; - } else if class_upper.windows(4).any(|w| w == b"DATA") { - // DATA sections with no actual data are treated as uninitialized - if segment.data_chunks.is_empty() { - return read::SectionKind::UninitializedData; - } else { - return read::SectionKind::Data; - } - } - } - - // Final fallback based on whether segment has data - if segment.data_chunks.is_empty() { - read::SectionKind::UninitializedData - } else { - read::SectionKind::Unknown - } - } - - fn parse_records(&mut self) -> Result<()> { - let len = self - .data - .len() - .map_err(|_| Error("Failed to get data length"))?; - let data = self - .data - .read_bytes_at(0, len) - .map_err(|_| Error("Failed to read OMF data"))?; - let mut offset = 0; - - // First record must be THEADR or LHEADR - if data.is_empty() { - return Err(Error("Empty OMF file")); - } - - let first_type = data[0]; - if first_type != omf::record_type::THEADR && first_type != omf::record_type::LHEADR { - return Err(Error( - "Invalid OMF file: first record must be THEADR or LHEADR", - )); - } - - let mut current_segment: Option = None; - let mut current_data_offset: Option = None; - - // Thread storage for FIXUPP parsing - let mut frame_threads: [Option; 4] = [None; 4]; - let mut target_threads: [Option; 4] = [None; 4]; - - while offset < data.len() { - // Read record header - if offset + 3 > data.len() { - break; - } - - let record_type = data[offset]; - let length = u16::from_le_bytes([data[offset + 1], data[offset + 2]]) as usize; - - // Length includes the checksum byte at the end - if offset + 3 + length > data.len() { - return Err(Error("Truncated OMF record")); - } - - // Record data excludes the checksum - let record_data = &data[offset + 3..offset + 3 + length - 1]; - let checksum = data[offset + 3 + length - 1]; - - // Verify checksum - if !Self::verify_checksum(record_type, length, record_data, checksum) { - return Err(Error("Invalid OMF record checksum")); - } - - // Process record based on type - match record_type { - omf::record_type::THEADR | omf::record_type::LHEADR => { - self.parse_header(record_data)?; - } - omf::record_type::LNAMES | omf::record_type::LLNAMES => { - self.parse_names(record_data)?; - } - omf::record_type::SEGDEF | omf::record_type::SEGDEF32 => { - self.parse_segdef(record_data, record_type == omf::record_type::SEGDEF32)?; - } - omf::record_type::GRPDEF => { - self.parse_grpdef(record_data)?; - } - omf::record_type::PUBDEF | omf::record_type::PUBDEF32 => { - self.parse_pubdef( - record_data, - record_type == omf::record_type::PUBDEF32, - OmfSymbolClass::Public, - )?; - } - omf::record_type::LPUBDEF | omf::record_type::LPUBDEF32 => { - self.parse_pubdef( - record_data, - record_type == omf::record_type::LPUBDEF32, - OmfSymbolClass::LocalPublic, - )?; - } - omf::record_type::EXTDEF => { - self.parse_extdef(record_data, OmfSymbolClass::External)?; - } - omf::record_type::LEXTDEF | omf::record_type::LEXTDEF32 => { - self.parse_extdef(record_data, OmfSymbolClass::LocalExternal)?; - } - omf::record_type::CEXTDEF => { - self.parse_extdef(record_data, OmfSymbolClass::ComdatExternal)?; - } - omf::record_type::COMDEF => { - self.parse_comdef(record_data, OmfSymbolClass::Communal)?; - } - omf::record_type::LCOMDEF => { - self.parse_comdef(record_data, OmfSymbolClass::LocalCommunal)?; - } - omf::record_type::COMDAT | omf::record_type::COMDAT32 => { - self.parse_comdat(record_data, record_type == omf::record_type::COMDAT32)?; - } - omf::record_type::COMENT => { - self.parse_comment(record_data)?; - } - omf::record_type::LEDATA | omf::record_type::LEDATA32 => { - let (seg_idx, offset) = - self.parse_ledata(record_data, record_type == omf::record_type::LEDATA32)?; - current_segment = Some(seg_idx); - current_data_offset = Some(offset); - } - omf::record_type::LIDATA | omf::record_type::LIDATA32 => { - let (seg_idx, offset) = - self.parse_lidata(record_data, record_type == omf::record_type::LIDATA32)?; - current_segment = Some(seg_idx); - current_data_offset = Some(offset); - } - omf::record_type::FIXUPP | omf::record_type::FIXUPP32 => { - if let (Some(seg_idx), Some(data_offset)) = - (current_segment, current_data_offset) - { - self.parse_fixupp( - record_data, - record_type == omf::record_type::FIXUPP32, - seg_idx, - data_offset, - &mut frame_threads, - &mut target_threads, - )?; - } else { - return Err(Error( - "FIXUPP/FIXUPP32 record encountered without preceding LEDATA/LIDATA", - )); - } - } - omf::record_type::MODEND | omf::record_type::MODEND32 => { - // End of module - break; - } - _ => { - // Skip unknown record types - } - } - - offset += 3 + length; // header + data (which includes checksum) - } - - Ok(()) - } - - fn parse_header(&mut self, data: &'data [u8]) -> Result<()> { - if let Some((name, _)) = omf::read_counted_string(data) { - self.module_name = core::str::from_utf8(name).ok(); - } - Ok(()) - } - - fn parse_names(&mut self, data: &'data [u8]) -> Result<()> { - let mut offset = 0; - while offset < data.len() { - if let Some((name, size)) = omf::read_counted_string(&data[offset..]) { - self.names.push(name); - offset += size; - } else { - break; - } - } - Ok(()) - } - - fn parse_segdef(&mut self, data: &'data [u8], is_32bit: bool) -> Result<()> { - let mut offset = 0; - - // Parse ACBP byte - if offset >= data.len() { - return Err(Error("Truncated SEGDEF record")); - } - let acbp = data[offset]; - offset += 1; - - let alignment = match (acbp >> 5) & 0x07 { - 0 => omf::SegmentAlignment::Absolute, - 1 => omf::SegmentAlignment::Byte, - 2 => omf::SegmentAlignment::Word, - 3 => omf::SegmentAlignment::Paragraph, - 4 => omf::SegmentAlignment::Page, - 5 => omf::SegmentAlignment::DWord, - 6 => omf::SegmentAlignment::Page4K, - _ => return Err(Error("Invalid segment alignment")), - }; - - let combination = match (acbp >> 2) & 0x07 { - 0 => omf::SegmentCombination::Private, - 2 => omf::SegmentCombination::Public, - 5 => omf::SegmentCombination::Stack, - 6 => omf::SegmentCombination::Common, - _ => return Err(Error("Invalid segment combination")), - }; - - let use32 = (acbp & 0x01) != 0; - - // Skip frame number and offset for absolute segments - if alignment == omf::SegmentAlignment::Absolute { - offset += 3; // frame (2) + offset (1) - } - - // Parse segment length - let length = if is_32bit || use32 { - if offset + 4 > data.len() { - return Err(Error("Truncated SEGDEF record")); - } - let length = u32::from_le_bytes([ - data[offset], - data[offset + 1], - data[offset + 2], - data[offset + 3], - ]); - offset += 4; - length - } else { - if offset + 2 > data.len() { - return Err(Error("Truncated SEGDEF record")); - } - let length = u16::from_le_bytes([data[offset], data[offset + 1]]) as u32; - offset += 2; - length - }; - - // Parse segment name index - let (name_index, size) = - omf::read_index(&data[offset..]).ok_or(Error("Invalid segment name index"))?; - offset += size; - - // Parse class name index - let (class_index, size) = - omf::read_index(&data[offset..]).ok_or(Error("Invalid class name index"))?; - offset += size; - - // Parse overlay name index - let (overlay_index, _) = - omf::read_index(&data[offset..]).ok_or(Error("Invalid overlay name index"))?; - - self.segments.push(OmfSegment { - name_index, - class_index, - overlay_index, - alignment, - combination, - use32, - length, - data_chunks: Vec::new(), - relocations: Vec::new(), - }); - - Ok(()) - } - - fn parse_grpdef(&mut self, data: &'data [u8]) -> Result<()> { - let mut offset = 0; - - // Parse group name index - let (name_index, size) = omf::read_index(data).ok_or(Error("Invalid group name index"))?; - offset += size; - - let mut segments = Vec::new(); - - // Parse segment indices - while offset < data.len() { - if data[offset] == 0xFF { - // Segment index follows - offset += 1; - let (seg_index, size) = omf::read_index(&data[offset..]) - .ok_or(Error("Invalid segment index in group"))?; - offset += size; - segments.push(seg_index); - } else { - break; - } - } - - self.groups.push(OmfGroup { - name_index, - segments, - }); - - Ok(()) - } - - fn parse_pubdef( - &mut self, - data: &'data [u8], - is_32bit: bool, - class: OmfSymbolClass, - ) -> Result<()> { - let mut offset = 0; - - // Parse group index - let (group_index, size) = omf::read_index(data).ok_or(Error("Invalid group index"))?; - offset += size; - - // Parse segment index - let (segment_index, size) = - omf::read_index(&data[offset..]).ok_or(Error("Invalid segment index"))?; - offset += size; - - // Read frame number if segment index is 0 (for absolute symbols) - let frame_number = if segment_index == 0 { - if offset + 2 > data.len() { - return Err(Error("Invalid frame number in PUBDEF")); - } - let frame = u16::from_le_bytes([data[offset], data[offset + 1]]); - offset += 2; - frame - } else { - 0 - }; - - // Parse public definitions - while offset < data.len() { - // Parse name - let Some((name, size)) = omf::read_counted_string(&data[offset..]) else { - break; - }; - offset += size; - - // Parse offset - let pub_offset = if is_32bit { - if offset + 4 > data.len() { - break; - } - let off = u32::from_le_bytes([ - data[offset], - data[offset + 1], - data[offset + 2], - data[offset + 3], - ]); - offset += 4; - off - } else { - if offset + 2 > data.len() { - break; - } - let off = u16::from_le_bytes([data[offset], data[offset + 1]]) as u32; - offset += 2; - off - }; - - // Parse type index - let (type_index, size) = omf::read_index(&data[offset..]) - .ok_or(Error("Invalid type index in PUBDEF/LPUBDEF record"))?; - offset += size; - - self.symbols.push(OmfSymbol { - symbol_index: self.symbols.len(), - name, - class, - group_index, - segment_index, - frame_number, - offset: pub_offset, - type_index, - kind: read::SymbolKind::Unknown, // Will be computed later - }); - } - - Ok(()) - } - - fn parse_extdef(&mut self, data: &'data [u8], class: OmfSymbolClass) -> Result<()> { - let mut offset = 0; - - while offset < data.len() { - // Parse name - let Some((name, size)) = omf::read_counted_string(&data[offset..]) else { - break; - }; - offset += size; - - // Parse type index - let (type_index, size) = omf::read_index(&data[offset..]) - .ok_or(Error("Invalid type index in EXTDEF/LEXTDEF/CEXTDEF record"))?; - offset += size; - - let sym_idx = self.symbols.len(); - self.symbols.push(OmfSymbol { - symbol_index: sym_idx, - name, - class, - group_index: 0, - segment_index: 0, - frame_number: 0, - offset: 0, - type_index, - kind: read::SymbolKind::Unknown, - }); - - // Add to external_order for symbols that contribute to external-name table - self.external_order.push(read::SymbolIndex(sym_idx)); - } - - Ok(()) - } - - fn parse_comdef(&mut self, data: &'data [u8], class: OmfSymbolClass) -> Result<()> { - let mut offset = 0; - - while offset < data.len() { - // Parse name - let Some((name, size)) = omf::read_counted_string(&data[offset..]) else { - break; - }; - offset += size; - - // Parse type index - let (type_index, size) = omf::read_index(&data[offset..]) - .ok_or(Error("Invalid type index in COMDEF/LCOMDEF record"))?; - offset += size; - - // Parse data type and communal length - if offset >= data.len() { - break; - } - let data_type = data[offset]; - offset += 1; - - let communal_length = match data_type { - 0x61 => { - // FAR data - number of elements followed by element size - let (num_elements, size1) = omf::read_encoded_value(&data[offset..]) - .ok_or(Error("Invalid number of elements in FAR COMDEF"))?; - offset += size1; - let (element_size, size2) = omf::read_encoded_value(&data[offset..]) - .ok_or(Error("Invalid element size in FAR COMDEF"))?; - offset += size2; - num_elements * element_size - } - 0x62 => { - // NEAR data - size in bytes - let (size_val, size_bytes) = omf::read_encoded_value(&data[offset..]) - .ok_or(Error("Invalid size in NEAR COMDEF"))?; - offset += size_bytes; - size_val - } - _ => { - // Unknown data type, skip - continue; - } - }; - - let sym_idx = self.symbols.len(); - self.symbols.push(OmfSymbol { - symbol_index: sym_idx, - name, - class, - group_index: 0, - segment_index: 0, - frame_number: 0, - offset: communal_length, // Store size in offset field - type_index, - kind: read::SymbolKind::Data, - }); - - // Add to external_order for symbols that contribute to external-name table - self.external_order.push(read::SymbolIndex(sym_idx)); - } - - Ok(()) - } - - fn parse_comdat(&mut self, data: &'data [u8], is_32bit: bool) -> Result<()> { - let mut offset = 0; - - // Parse flags byte - if offset >= data.len() { - return Err(Error("Truncated COMDAT record")); - } - let _flags = data[offset]; - offset += 1; - - // Parse attributes byte - if offset >= data.len() { - return Err(Error("Truncated COMDAT record")); - } - let attributes = data[offset]; - offset += 1; - - // Extract selection criteria from high nibble of attributes - let selection = match (attributes >> 4) & 0x0F { - 0x00 => OmfComdatSelection::Explicit, // No match - 0x01 => OmfComdatSelection::UseAny, // Pick any - 0x02 => OmfComdatSelection::SameSize, // Same size - 0x03 => OmfComdatSelection::ExactMatch, // Exact match - _ => OmfComdatSelection::UseAny, - }; - - // Extract allocation type from low nibble of attributes - let allocation_type = attributes & 0x0F; - - // Parse align/segment index field - let (segment_index, size) = - omf::read_index(&data[offset..]).ok_or(Error("Invalid COMDAT segment index"))?; - offset += size; - - // Determine alignment - if segment index is 0-7, it's actually an alignment value - let alignment = if segment_index <= 7 { - match segment_index { - 0 => omf::SegmentAlignment::Absolute, // Use value from SEGDEF - 1 => omf::SegmentAlignment::Byte, - 2 => omf::SegmentAlignment::Word, - 3 => omf::SegmentAlignment::Paragraph, - 4 => omf::SegmentAlignment::Page, - 5 => omf::SegmentAlignment::DWord, - 6 => omf::SegmentAlignment::Page4K, - _ => omf::SegmentAlignment::Byte, - } - } else { - omf::SegmentAlignment::Byte // Default alignment - }; - - // Parse data offset - let _data_offset = if is_32bit { - if offset + 4 > data.len() { - return Err(Error("Truncated COMDAT record")); - } - let off = u32::from_le_bytes([ - data[offset], - data[offset + 1], - data[offset + 2], - data[offset + 3], - ]); - offset += 4; - off - } else { - if offset + 2 > data.len() { - return Err(Error("Truncated COMDAT record")); - } - let off = u16::from_le_bytes([data[offset], data[offset + 1]]) as u32; - offset += 2; - off - }; - - // Parse type index - let (_type_index, size) = - omf::read_index(&data[offset..]).ok_or(Error("Invalid type index in COMDAT record"))?; - offset += size; - - // Parse public base (only if allocation type is 0x00 - Explicit) - if allocation_type == 0x00 { - // Has public base (Base Group, Base Segment, Base Frame) - let (_group_index, size) = omf::read_index(&data[offset..]) - .ok_or(Error("Invalid group index in COMDAT record"))?; - offset += size; - let (_seg_idx, size) = omf::read_index(&data[offset..]) - .ok_or(Error("Invalid segment index in COMDAT record"))?; - offset += size; - if _seg_idx == 0 { - if offset + 2 <= data.len() { - offset += 2; // Skip frame number - } - } - } - - // Parse public name - this is an index into LNAMES - let (name_index, size) = - omf::read_index(&data[offset..]).ok_or(Error("Invalid name index in COMDAT record"))?; - offset += size; - - // Look up the name from the names table - let name = name_index - .checked_sub(1) - .and_then(|i| self.names.get(i as usize).copied()) - .unwrap_or(b""); - - // Remaining data is the COMDAT content - let comdat_data = &data[offset..]; - - self.comdats.push(OmfComdatData { - name, - segment_index, - selection, - alignment, - data: comdat_data, - }); - - Ok(()) - } - - fn parse_comment(&mut self, data: &'data [u8]) -> Result<()> { - if data.len() < 2 { - return Ok(()); // Ignore truncated comments - } - - let _comment_type = data[0]; // Usually 0x00 for non-purge, 0x40 for purge - let _comment_class = data[1]; - - Ok(()) - } - - fn parse_ledata(&mut self, data: &'data [u8], is_32bit: bool) -> Result<(usize, u32)> { - let mut offset = 0; - - // Parse segment index - let (segment_index, size) = - omf::read_index(data).ok_or(Error("Invalid segment index in LEDATA"))?; - offset += size; - - if segment_index == 0 || segment_index > self.segments.len() as u16 { - return Err(Error("Invalid segment index in LEDATA")); - } - - // Parse data offset - let data_offset = if is_32bit { - if offset + 4 > data.len() { - return Err(Error("Truncated LEDATA record")); - } - let off = u32::from_le_bytes([ - data[offset], - data[offset + 1], - data[offset + 2], - data[offset + 3], - ]); - offset += 4; - off - } else { - if offset + 2 > data.len() { - return Err(Error("Truncated LEDATA record")); - } - let off = u16::from_le_bytes([data[offset], data[offset + 1]]) as u32; - offset += 2; - off - }; - - // Store reference to data chunk - let seg_idx = (segment_index - 1) as usize; - let segment = &mut self.segments[seg_idx]; - - // Store the data chunk reference - if offset < data.len() { - segment - .data_chunks - .push((data_offset, OmfDataChunk::Direct(&data[offset..]))); - } - - Ok((seg_idx, data_offset)) - } - - fn parse_fixupp( - &mut self, - data: &'data [u8], - is_32bit: bool, - seg_idx: usize, - data_offset: u32, - frame_threads: &mut [Option; 4], - target_threads: &mut [Option; 4], - ) -> Result<()> { - let mut offset = 0; - - while offset < data.len() { - let b = data[offset]; - offset += 1; - - if (b & 0x80) == 0 { - // THREAD subrecord - let is_frame = (b & 0x40) != 0; // D-bit - let method = (b >> 2) & 0x07; // Method bits - let thread_num = (b & 0x03) as usize; // Thread number (0-3) - - let index = if method < 3 { - // Methods 0-2 have an index - let (idx, size) = omf::read_index(&data[offset..]) - .ok_or(Error("Invalid index in THREAD subrecord"))?; - offset += size; - idx - } else if method == 3 { - // Method 3 has a raw frame number - if offset + 2 > data.len() { - return Err(Error("Invalid frame number in THREAD subrecord")); - } - let frame_num = u16::from_le_bytes([data[offset], data[offset + 1]]); - offset += 2; - frame_num - } else { - 0 - }; - - // Store the thread definition - let thread_def = ThreadDef { method, index }; - if is_frame { - frame_threads[thread_num] = Some(thread_def); - } else { - target_threads[thread_num] = Some(thread_def); - } - } else { - // FIXUP subrecord - if offset + 1 > data.len() { - return Err(Error("Truncated FIXUP location")); - } - let locat = data[offset] as u32 | (((b as u32) & 0x03) << 8); - offset += 1; - - let location = match (b >> 2) & 0x0F { - 0 => omf::FixupLocation::LowByte, - 1 => omf::FixupLocation::Offset, - 2 => omf::FixupLocation::Base, - 3 => omf::FixupLocation::Pointer, - 4 => omf::FixupLocation::HighByte, - 5 => omf::FixupLocation::LoaderOffset, - 9 => omf::FixupLocation::Offset32, - 11 => omf::FixupLocation::Pointer48, - 13 => omf::FixupLocation::LoaderOffset32, - _ => continue, // Skip unknown fixup types - }; - - // Parse fix data byte - if offset >= data.len() { - return Err(Error("Truncated FIXUP fix data")); - } - let fix_data = data[offset]; - offset += 1; - - // Check F-bit (bit 7 of fix_data) - let (frame_method, frame_index) = if (fix_data & 0x80) != 0 { - // F=1: Use frame thread - let thread_num = ((fix_data >> 4) & 0x03) as usize; - match frame_threads[thread_num] { - Some(thread) => { - let method = match thread.method { - 0 => omf::FrameMethod::SegmentIndex, - 1 => omf::FrameMethod::GroupIndex, - 2 => omf::FrameMethod::ExternalIndex, - 3 => omf::FrameMethod::FrameNumber, - 4 => omf::FrameMethod::Location, - 5 => omf::FrameMethod::Target, - _ => return Err(Error("Invalid frame method in thread")), - }; - (method, thread.index) - } - None => return Err(Error("Undefined frame thread in FIXUP")), - } - } else { - // F=0: Read frame datum - let method_bits = (fix_data >> 4) & 0x07; - let method = match method_bits { - 0 => omf::FrameMethod::SegmentIndex, - 1 => omf::FrameMethod::GroupIndex, - 2 => omf::FrameMethod::ExternalIndex, - 3 => omf::FrameMethod::FrameNumber, - 4 => omf::FrameMethod::Location, - 5 => omf::FrameMethod::Target, - _ => return Err(Error("Invalid frame method in FIXUP")), - }; - let index = match method { - omf::FrameMethod::SegmentIndex - | omf::FrameMethod::GroupIndex - | omf::FrameMethod::ExternalIndex => { - let (idx, size) = omf::read_index(&data[offset..]) - .ok_or(Error("Truncated FIXUP frame datum: missing index data"))?; - offset += size; - idx - } - omf::FrameMethod::FrameNumber => { - if offset + 2 > data.len() { - return Err(Error( - "Truncated FIXUP frame datum: missing frame number", - )); - } - let frame_num = u16::from_le_bytes([data[offset], data[offset + 1]]); - offset += 2; - frame_num - } - omf::FrameMethod::Location | omf::FrameMethod::Target => 0, - }; - (method, index) - }; - - // Check T-bit (bit 3 of fix_data) - let (target_method, target_index) = if (fix_data & 0x08) != 0 { - // T=1: Use target thread - let thread_num = (fix_data & 0x03) as usize; - match target_threads[thread_num] { - Some(thread) => { - // Only check the low 2 bits of method for target - let method = match thread.method & 0x03 { - 0 => omf::TargetMethod::SegmentIndex, - 1 => omf::TargetMethod::GroupIndex, - 2 => omf::TargetMethod::ExternalIndex, - 3 => omf::TargetMethod::FrameNumber, - _ => return Err(Error("Invalid target method in thread")), - }; - (method, thread.index) - } - None => return Err(Error("Undefined target thread in FIXUP")), - } - } else { - // T=0: Read target datum - // Only check the low 2 bits of method for target - let method = match fix_data & 0x03 { - 0 => omf::TargetMethod::SegmentIndex, - 1 => omf::TargetMethod::GroupIndex, - 2 => omf::TargetMethod::ExternalIndex, - 3 => omf::TargetMethod::FrameNumber, - _ => return Err(Error("Invalid frame method in FIXUP")), - }; - let index = match method { - omf::TargetMethod::SegmentIndex - | omf::TargetMethod::GroupIndex - | omf::TargetMethod::ExternalIndex => { - let (idx, size) = omf::read_index(&data[offset..]) - .ok_or(Error("Truncated FIXUP target datum: missing index data"))?; - offset += size; - idx - } - omf::TargetMethod::FrameNumber => { - if offset + 2 > data.len() { - return Err(Error( - "Truncated FIXUP target datum: missing frame number", - )); - } - let frame_num = u16::from_le_bytes([data[offset], data[offset + 1]]); - offset += 2; - frame_num - } - }; - (method, index) - }; - - // Parse target displacement if present (P=0) - let target_displacement = if fix_data & 0x04 == 0 { - if is_32bit { - if offset + 4 <= data.len() { - let disp = u32::from_le_bytes([ - data[offset], - data[offset + 1], - data[offset + 2], - data[offset + 3], - ]); - offset += 4; - disp - } else { - return Err(Error("Truncated FIXUP 32-bit displacement")); - } - } else { - if offset + 2 <= data.len() { - let disp = u16::from_le_bytes([data[offset], data[offset + 1]]) as u32; - offset += 2; - disp - } else { - return Err(Error("Truncated FIXUP 16-bit displacement")); - } - } - } else { - 0 - }; - - // Extract M-bit (bit 6 of fix_data) - let is_segment_relative = (fix_data & 0x40) != 0; - self.segments[seg_idx].relocations.push(OmfRelocation { - offset: data_offset + locat, - location, - frame_method, - target_method, - frame_index, - target_index, - target_displacement, - is_segment_relative, - }); - } - } - - Ok(()) - } - - fn parse_lidata(&mut self, data: &'data [u8], is_32bit: bool) -> Result<(usize, u32)> { - let mut offset = 0; - - // Read segment index - let (segment_index, size) = - omf::read_index(&data[offset..]).ok_or(Error("Invalid segment index in LIDATA"))?; - offset += size; - - if segment_index == 0 || segment_index > self.segments.len() as u16 { - return Err(Error("Invalid segment index in LIDATA")); - } - - // Read data offset - let data_offset = if is_32bit { - if offset + 4 > data.len() { - return Err(Error("Truncated LIDATA record")); - } - let off = u32::from_le_bytes([ - data[offset], - data[offset + 1], - data[offset + 2], - data[offset + 3], - ]); - offset += 4; - off - } else { - if offset + 2 > data.len() { - return Err(Error("Truncated LIDATA record")); - } - let off = u16::from_le_bytes([data[offset], data[offset + 1]]) as u32; - offset += 2; - off - }; - - // For LIDATA, we need to store the unexpanded data and expand on demand - let seg_idx = (segment_index - 1) as usize; - if offset < data.len() { - self.segments[seg_idx] - .data_chunks - .push((data_offset, OmfDataChunk::Iterated(&data[offset..]))); - } - - Ok((seg_idx, data_offset)) - } - - /// Expand a LIDATA block into its uncompressed form - fn expand_lidata_block(&self, data: &[u8]) -> Result> { - let mut offset = 0; - let mut result = Vec::new(); - - // Read repeat count - let (repeat_count, size) = omf::read_encoded_value(&data[offset..]) - .ok_or(Error("Invalid repeat count in LIDATA block"))?; - offset += size; - - if repeat_count == 0 { - return Ok(result); - } - - // Read block count - let (block_count, size) = omf::read_encoded_value(&data[offset..]) - .ok_or(Error("Invalid block count in LIDATA block"))?; - offset += size; - - if block_count == 0 { - // Leaf block: contains actual data - if offset >= data.len() { - return Ok(result); - } - let data_length = data[offset] as usize; - offset += 1; - - if offset + data_length > data.len() { - return Err(Error("Truncated LIDATA block")); - } - - let block_data = &data[offset..offset + data_length]; - - // Repeat the data block - for _ in 0..repeat_count { - result.extend_from_slice(block_data); - } - } else { - // Nested blocks: recurse for each block - for _ in 0..block_count { - let block_data = self.expand_lidata_block(&data[offset..])?; - let block_size = lidata_block_size(&data[offset..])?; - offset += block_size; - - // Repeat the expanded block - for _ in 0..repeat_count { - result.extend_from_slice(&block_data); - } - } - } - - Ok(result) - } - - /// Get the module name - pub fn module_name(&self) -> Option<&'data str> { - self.module_name - } - - /// Get the segments as a slice - pub fn segments_slice(&self) -> &[OmfSegment<'data>] { - &self.segments - } - - /// Get symbol by external-name index (1-based, as used in FIXUPP records) - pub fn external_symbol(&self, external_index: u16) -> Option<&OmfSymbol<'data>> { - let symbol_index = self - .external_order - .get(external_index.checked_sub(1)? as usize)?; - self.symbols.get(symbol_index.0) - } - - /// Get a name by index (1-based) - pub fn get_name(&self, index: u16) -> Option<&'data [u8]> { - let name_index = index.checked_sub(1)?; - self.names.get(name_index as usize).copied() - } - - /// Get all symbols (for iteration) - pub fn all_symbols(&self) -> &[OmfSymbol<'data>] { - &self.symbols - } - - /// Verify the checksum of an OMF record - /// - /// The checksum is calculated so that the sum of all bytes in the record, - /// including the checksum byte itself, equals 0 (modulo 256). - fn verify_checksum(record_type: u8, length: usize, body: &[u8], checksum: u8) -> bool { - // Some compilers write a 0 byte rather than computing the checksum, - // so we accept that as valid - if checksum == 0 { - return true; - } - - let mut sum = u32::from(record_type); - // Add length bytes (little-endian) - sum = sum.wrapping_add((length & 0xff) as u32); - sum = sum.wrapping_add((length >> 8) as u32); - // Add all body bytes - for &byte in body { - sum = sum.wrapping_add(u32::from(byte)); - } - // Add checksum byte - sum = sum.wrapping_add(u32::from(checksum)); - - // The sum should be 0 (modulo 256) - (sum & 0xff) == 0 - } -} - -/// Helper function to calculate LIDATA block size -fn lidata_block_size(data: &[u8]) -> Result { - let mut offset = 0; - - // Read repeat count - let (_, size) = omf::read_encoded_value(&data[offset..]) - .ok_or(Error("Invalid repeat count in LIDATA block"))?; - offset += size; - - // Read block count - let (block_count, size) = omf::read_encoded_value(&data[offset..]) - .ok_or(Error("Invalid block count in LIDATA block"))?; - offset += size; - - if block_count == 0 { - // Leaf block - if offset >= data.len() { - return Ok(offset); - } - let data_length = data[offset] as usize; - offset += 1 + data_length; - } else { - // Nested blocks - for _ in 0..block_count { - offset += lidata_block_size(&data[offset..])?; - } - } - - Ok(offset) -} diff --git a/src/read/omf/relocation.rs b/src/read/omf/relocation.rs index 60d2941f..b616d7c7 100644 --- a/src/read/omf/relocation.rs +++ b/src/read/omf/relocation.rs @@ -1,16 +1,41 @@ -use crate::{omf, read, Relocation, SectionIndex}; +use crate::read::ReadRef; +use crate::{ + omf, Relocation, RelocationEncoding, RelocationFlags, RelocationKind, RelocationTarget, + SectionIndex, SymbolIndex, +}; -use super::OmfFile; +use super::{FrameMethod, OmfFile, TargetMethod}; + +/// An OMF fixup (relocation entry). +#[derive(Debug, Clone)] +pub(super) struct OmfFixup { + /// Offset in segment where fixup is applied + pub(super) offset: u32, + /// Location type (what to patch) + pub(super) location: omf::FixupLocation, + /// Frame method + pub(super) frame_method: FrameMethod, + /// Target method + pub(super) target_method: TargetMethod, + /// Frame index (meaning depends on frame_method) + pub(super) frame_index: u16, + /// Target index (meaning depends on target_method) + pub(super) target_index: u16, + /// Target displacement + pub(super) target_displacement: u32, + /// M-bit: true for segment-relative, false for PC-relative + pub(super) is_segment_relative: bool, +} /// An iterator over OMF relocations. #[derive(Debug)] -pub struct OmfRelocationIterator<'data, 'file, R: read::ReadRef<'data>> { +pub struct OmfRelocationIterator<'data, 'file, R: ReadRef<'data>> { pub(super) file: &'file OmfFile<'data, R>, pub(super) segment_index: usize, pub(super) index: usize, } -impl<'data, 'file, R: read::ReadRef<'data>> Iterator for OmfRelocationIterator<'data, 'file, R> { +impl<'data, 'file, R: ReadRef<'data>> Iterator for OmfRelocationIterator<'data, 'file, R> { type Item = (u64, Relocation); fn next(&mut self) -> Option { @@ -18,58 +43,109 @@ impl<'data, 'file, R: read::ReadRef<'data>> Iterator for OmfRelocationIterator<' let reloc = relocations.get(self.index)?; self.index += 1; - // Convert OMF relocation to generic relocation - let (kind, size, addend) = match reloc.location { - omf::FixupLocation::LowByte => (read::RelocationKind::Absolute, 8, 0), - omf::FixupLocation::HighByte => (read::RelocationKind::Absolute, 8, 0), + let (mut kind, size, base_addend) = match reloc.location { + omf::FixupLocation::LowByte => (RelocationKind::Absolute, 8, 0), + omf::FixupLocation::HighByte => (RelocationKind::Absolute, 8, 0), omf::FixupLocation::Offset | omf::FixupLocation::LoaderOffset => { if reloc.is_segment_relative { - // M=1: Segment-relative - (read::RelocationKind::Absolute, 16, 0) + (RelocationKind::SectionOffset, 16, 0) } else { - // M=0: PC-relative (self-relative) - (read::RelocationKind::Relative, 16, -2) + (RelocationKind::Relative, 16, -2) } } omf::FixupLocation::Offset32 | omf::FixupLocation::LoaderOffset32 => { if reloc.is_segment_relative { - // M=1: Segment-relative - (read::RelocationKind::Absolute, 32, 0) + (RelocationKind::SectionOffset, 32, 0) } else { - // M=0: PC-relative (self-relative) - (read::RelocationKind::Relative, 32, -4) + (RelocationKind::Relative, 32, -4) } } - omf::FixupLocation::Base => (read::RelocationKind::Absolute, 16, 0), - omf::FixupLocation::Pointer => (read::RelocationKind::Absolute, 32, 0), - omf::FixupLocation::Pointer48 => (read::RelocationKind::Absolute, 48, 0), + omf::FixupLocation::Base => { + if matches!(reloc.target_method, TargetMethod::SegmentIndex) { + (RelocationKind::SectionIndex, 16, 0) + } else { + (RelocationKind::Unknown, 16, 0) + } + } + omf::FixupLocation::Pointer => (RelocationKind::Absolute, 32, 0), + omf::FixupLocation::Pointer48 => (RelocationKind::Absolute, 48, 0), }; - let relocation = Relocation { - kind, - encoding: read::RelocationEncoding::Generic, - size, - target: match reloc.target_method { - omf::TargetMethod::SegmentIndex => { - read::RelocationTarget::Section(SectionIndex(reloc.target_index as usize)) - } - omf::TargetMethod::ExternalIndex => { - // External indices in OMF are 1-based indices into the external-name table - if let Some(symbol) = self.file.external_symbol(reloc.target_index) { - read::RelocationTarget::Symbol(read::SymbolIndex(symbol.symbol_index)) + if matches!(kind, RelocationKind::SectionOffset) + && !matches!(reloc.target_method, TargetMethod::SegmentIndex) + { + kind = RelocationKind::Unknown; + } + + if matches!( + reloc.location, + omf::FixupLocation::LoaderOffset | omf::FixupLocation::LoaderOffset32 + ) && matches!(reloc.frame_method, FrameMethod::ExternalIndex) + { + kind = RelocationKind::Unknown; + } + + if matches!(reloc.target_method, TargetMethod::GroupIndex) { + kind = RelocationKind::Unknown; + } + + let target = match reloc.target_method { + TargetMethod::SegmentIndex => { + if let Some(zero_based) = reloc.target_index.checked_sub(1) { + let index = zero_based as usize; + if index < self.file.segments.len() { + RelocationTarget::Section(SectionIndex(index)) } else { - // Invalid external index - read::RelocationTarget::Absolute + RelocationTarget::Absolute } + } else { + RelocationTarget::Absolute } - _ => read::RelocationTarget::Absolute, - }, - addend: reloc.target_displacement as i64 + addend, + } + TargetMethod::ExternalIndex => { + // External indices in OMF are 1-based indices into the external-name table + if let Some(symbol) = self.file.external_symbol(reloc.target_index) { + RelocationTarget::Symbol(SymbolIndex(symbol.symbol_index)) + } else { + RelocationTarget::Absolute + } + } + TargetMethod::GroupIndex | TargetMethod::FrameNumber => RelocationTarget::Absolute, + }; + + let fixup_frame = match reloc.frame_method { + FrameMethod::SegmentIndex => omf::FixupFrame::Segment(reloc.frame_index), + FrameMethod::GroupIndex => omf::FixupFrame::Group(reloc.frame_index), + FrameMethod::ExternalIndex => omf::FixupFrame::External(reloc.frame_index), + FrameMethod::FrameNumber => omf::FixupFrame::FrameNumber(reloc.frame_index), + FrameMethod::Location => omf::FixupFrame::Location, + FrameMethod::Target => omf::FixupFrame::Target, + }; + + let fixup_target = match reloc.target_method { + TargetMethod::SegmentIndex => omf::FixupTarget::Segment(reloc.target_index), + TargetMethod::GroupIndex => omf::FixupTarget::Group(reloc.target_index), + TargetMethod::ExternalIndex => omf::FixupTarget::External(reloc.target_index), + TargetMethod::FrameNumber => omf::FixupTarget::FrameNumber(reloc.target_index), + }; + + let relocation = Relocation { + kind, + encoding: RelocationEncoding::Generic, + size, + target, + addend: (reloc.target_displacement as i64) + base_addend, implicit_addend: false, - flags: read::RelocationFlags::Generic { - kind, - encoding: read::RelocationEncoding::Generic, - size, + flags: RelocationFlags::Omf { + location: reloc.location, + mode: if reloc.is_segment_relative { + omf::FixupMode::SegmentRelative + } else { + omf::FixupMode::SelfRelative + }, + frame: fixup_frame, + target: fixup_target, + // target_displacement: reloc.target_displacement, }, }; diff --git a/src/read/omf/section.rs b/src/read/omf/section.rs index 43970aa3..45dbd248 100644 --- a/src/read/omf/section.rs +++ b/src/read/omf/section.rs @@ -1,18 +1,13 @@ -//! OMF section implementation. - use alloc::borrow::Cow; -use alloc::vec; +use alloc::{vec, vec::Vec}; use core::str; -use crate::{ - read::{ - self, CompressedData, CompressedFileRange, Error, ObjectSection, ReadRef, RelocationMap, - Result, SectionFlags, SectionIndex, SectionKind, - }, - ComdatKind, ObjectComdat, SymbolIndex, +use crate::read::{ + self, CompressedData, CompressedFileRange, Error, ObjectSection, ReadRef, RelocationMap, + Result, SectionFlags, SectionIndex, SectionKind, }; -use super::{relocation::OmfRelocationIterator, OmfDataChunk, OmfFile, OmfSegment}; +use super::{expand_lidata_block, OmfDataChunk, OmfFile, OmfRelocationIterator, OmfSegment}; /// A section in an OMF file. #[derive(Debug)] @@ -21,6 +16,16 @@ pub struct OmfSection<'data, 'file, R: ReadRef<'data>> { pub(super) index: usize, } +/// An OMF group definition +#[derive(Debug, Clone)] +#[allow(unused)] +pub(super) struct OmfGroup { + /// Group name index (into names table) + pub(super) name_index: u16, + /// Segment indices in this group + pub(super) segments: Vec, +} + impl<'data, 'file, R: ReadRef<'data>> OmfSection<'data, 'file, R> { fn segment(&self) -> &OmfSegment<'data> { &self.file.segments[self.index] @@ -153,7 +158,7 @@ impl<'data, 'file, R: ReadRef<'data>> ObjectSection<'data> for OmfSection<'data, } OmfDataChunk::Iterated(lidata) => { // LIDATA needs expansion - if let Ok(expanded) = self.file.expand_lidata_block(lidata) { + if let Ok(expanded) = expand_lidata_block(lidata) { let end = start + expanded.len(); if end <= result.len() { result[start..end].copy_from_slice(&expanded); @@ -245,97 +250,3 @@ impl<'data, 'file, R: ReadRef<'data>> Iterator for OmfSectionIterator<'data, 'fi } } } - -/// A COMDAT section in an OMF file. -#[derive(Debug)] -pub struct OmfComdat<'data, 'file, R: ReadRef<'data>> { - file: &'file OmfFile<'data, R>, - index: usize, - _phantom: core::marker::PhantomData<&'data ()>, -} - -impl<'data, 'file, R: ReadRef<'data>> read::private::Sealed for OmfComdat<'data, 'file, R> {} - -impl<'data, 'file, R: ReadRef<'data>> ObjectComdat<'data> for OmfComdat<'data, 'file, R> { - type SectionIterator = OmfComdatSectionIterator<'data, 'file, R>; - - fn kind(&self) -> ComdatKind { - let comdat = &self.file.comdats[self.index]; - match comdat.selection { - super::OmfComdatSelection::Explicit => ComdatKind::NoDuplicates, - super::OmfComdatSelection::UseAny => ComdatKind::Any, - super::OmfComdatSelection::SameSize => ComdatKind::SameSize, - super::OmfComdatSelection::ExactMatch => ComdatKind::ExactMatch, - } - } - - fn symbol(&self) -> SymbolIndex { - // COMDAT symbols don't have a direct symbol index in OMF - SymbolIndex(usize::MAX) - } - - fn name_bytes(&self) -> Result<&'data [u8]> { - let comdat = &self.file.comdats[self.index]; - Ok(comdat.name) - } - - fn name(&self) -> Result<&'data str> { - let comdat = &self.file.comdats[self.index]; - core::str::from_utf8(comdat.name).map_err(|_| Error("Invalid UTF-8 in COMDAT name")) - } - - fn sections(&self) -> Self::SectionIterator { - let comdat = &self.file.comdats[self.index]; - OmfComdatSectionIterator { - segment_index: (comdat.segment_index as usize).checked_sub(1), - returned: false, - _phantom: core::marker::PhantomData, - } - } -} - -/// An iterator over COMDAT sections. -#[derive(Debug)] -pub struct OmfComdatIterator<'data, 'file, R: ReadRef<'data>> { - pub(super) file: &'file OmfFile<'data, R>, - pub(super) index: usize, -} - -impl<'data, 'file, R: ReadRef<'data>> Iterator for OmfComdatIterator<'data, 'file, R> { - type Item = OmfComdat<'data, 'file, R>; - - fn next(&mut self) -> Option { - if self.index < self.file.comdats.len() { - let comdat = OmfComdat { - file: self.file, - index: self.index, - _phantom: core::marker::PhantomData, - }; - self.index += 1; - Some(comdat) - } else { - None - } - } -} - -/// An iterator over sections in a COMDAT. -#[derive(Debug)] -pub struct OmfComdatSectionIterator<'data, 'file, R: ReadRef<'data>> { - segment_index: Option, - returned: bool, - _phantom: core::marker::PhantomData<(&'data (), &'file (), R)>, -} - -impl<'data, 'file, R: ReadRef<'data>> Iterator for OmfComdatSectionIterator<'data, 'file, R> { - type Item = SectionIndex; - - fn next(&mut self) -> Option { - if !self.returned { - self.returned = true; - self.segment_index.map(|idx| SectionIndex(idx + 1)) - } else { - None - } - } -} diff --git a/src/read/omf/segment.rs b/src/read/omf/segment.rs index 684a0322..1055f14b 100644 --- a/src/read/omf/segment.rs +++ b/src/read/omf/segment.rs @@ -1,6 +1,69 @@ -use crate::{read, ObjectSegment, ReadRef, Result, SegmentFlags}; +use alloc::vec::Vec; -use super::OmfFile; +use crate::read::{self, ObjectSegment, ReadRef, Result}; +use crate::{omf, SegmentFlags}; + +use super::{OmfFile, OmfFixup}; + +/// An OMF segment definition +#[derive(Debug, Clone)] +pub struct OmfSegment<'data> { + /// Segment name index (into names table) + pub(super) name_index: u16, + /// Class name index (into names table) + pub(super) class_index: u16, + /// Overlay name index (into names table) + #[allow(unused)] // TODO + pub(super) overlay_index: u16, + /// Segment alignment + pub(super) alignment: omf::SegmentAlignment, + /// Segment combination + pub(super) combination: omf::SegmentCombination, + /// Whether this is a 32-bit segment + #[allow(unused)] // TODO + pub(super) use32: bool, + /// Segment length + pub(super) length: u32, + /// Segment data chunks (offset, data) + /// Multiple LEDATA/LIDATA records can contribute to a single segment + pub(super) data_chunks: Vec<(u32, OmfDataChunk<'data>)>, + /// Relocations for this segment + pub(super) relocations: Vec, +} + +/// Data chunk for a segment +#[derive(Debug, Clone)] +pub(super) enum OmfDataChunk<'data> { + /// Direct data from LEDATA record + Direct(&'data [u8]), + /// Compressed/iterated data from LIDATA record (needs expansion) + Iterated(&'data [u8]), +} + +impl<'data> OmfSegment<'data> { + /// Get the raw data of the segment if it's a single contiguous chunk + pub fn get_single_chunk(&self) -> Option<&'data [u8]> { + if self.data_chunks.len() == 1 { + let (offset, chunk) = &self.data_chunks[0]; + if *offset == 0 { + match chunk { + OmfDataChunk::Direct(data) if data.len() == self.length as usize => { + return Some(data); + } + _ => {} + } + } + } + None + } + + /// Check if any data chunk needs expansion (LIDATA) + pub fn has_iterated_data(&self) -> bool { + self.data_chunks + .iter() + .any(|(_, chunk)| matches!(chunk, OmfDataChunk::Iterated(_))) + } +} /// An OMF segment reference. #[derive(Debug)] diff --git a/src/read/omf/symbol.rs b/src/read/omf/symbol.rs index 3c3dfa46..b342cc4a 100644 --- a/src/read/omf/symbol.rs +++ b/src/read/omf/symbol.rs @@ -1,14 +1,53 @@ -//! OMF symbol implementation. - use core::str; use crate::read::{ - self, ObjectSymbol, ObjectSymbolTable, ReadRef, Result, SectionIndex, SymbolFlags, SymbolIndex, - SymbolKind, SymbolScope, SymbolSection, + self, Error, ObjectSymbol, ObjectSymbolTable, ReadRef, Result, SectionIndex, SymbolFlags, + SymbolIndex, SymbolKind, SymbolScope, SymbolSection, }; -use crate::Error; -use super::{OmfFile, OmfSymbol}; +use super::OmfFile; + +/// An OMF symbol +#[derive(Debug, Clone)] +pub struct OmfSymbol<'data> { + /// Symbol table index + pub symbol_index: usize, + /// Symbol name + pub name: &'data [u8], + /// Symbol class (Public, External, etc.) + pub class: OmfSymbolClass, + /// Group index (0 if none) + pub group_index: u16, + /// Segment index (0 if external) + pub segment_index: u16, + /// Frame number (for absolute symbols when segment_index == 0) + pub frame_number: u16, + /// Offset within segment + pub offset: u32, + /// Type index (usually 0) + pub type_index: u16, + /// Pre-computed symbol kind + pub kind: SymbolKind, +} + +/// Symbol class for OMF symbols +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum OmfSymbolClass { + /// Public symbol (PUBDEF) + Public, + /// Local public symbol (LPUBDEF) + LocalPublic, + /// External symbol (EXTDEF) + External, + /// Local external symbol (LEXTDEF) + LocalExternal, + /// Communal symbol (COMDEF) + Communal, + /// Local communal symbol (LCOMDEF) + LocalCommunal, + /// COMDAT external symbol (CEXTDEF) + ComdatExternal, +} impl<'data> read::private::Sealed for OmfSymbol<'data> {} From f8023933c931335a1ea55384bfef538457c412a5 Mon Sep 17 00:00:00 2001 From: Luke Street Date: Thu, 25 Sep 2025 00:20:55 -0600 Subject: [PATCH 6/8] Improve LIDATA expansion --- src/read/omf/file.rs | 143 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 124 insertions(+), 19 deletions(-) diff --git a/src/read/omf/file.rs b/src/read/omf/file.rs index 441c9ef3..a57bd51a 100644 --- a/src/read/omf/file.rs +++ b/src/read/omf/file.rs @@ -1418,28 +1418,43 @@ pub(super) enum FrameMethod { /// Expand a LIDATA block into its uncompressed form pub(super) fn expand_lidata_block(data: &[u8]) -> Result> { + let (consumed, expanded_size) = lidata_block_expanded_size(data)?; + let mut result = vec![0u8; expanded_size]; + let mut write_offset = 0usize; + let consumed_by_expand = expand_lidata_block_into(data, &mut result, &mut write_offset)?; + + debug_assert_eq!(write_offset, expanded_size); + debug_assert_eq!(consumed_by_expand, consumed); + + Ok(result) +} + +fn expand_lidata_block_into( + data: &[u8], + output: &mut [u8], + write_offset: &mut usize, +) -> Result { let mut offset = 0; - let mut result = Vec::new(); - // Read repeat count let (repeat_count, size) = read_encoded_value(&data[offset..]).ok_or(Error("Invalid repeat count in LIDATA block"))?; offset += size; if repeat_count == 0 { - return Ok(result); + return lidata_block_size(data); } - // Read block count + let repeat_count = repeat_count as usize; + let (block_count, size) = read_encoded_value(&data[offset..]).ok_or(Error("Invalid block count in LIDATA block"))?; offset += size; if block_count == 0 { - // Leaf block: contains actual data if offset >= data.len() { - return Ok(result); + return Ok(offset); } + let data_length = data[offset] as usize; offset += 1; @@ -1448,28 +1463,118 @@ pub(super) fn expand_lidata_block(data: &[u8]) -> Result> { } let block_data = &data[offset..offset + data_length]; + offset += data_length; - // Repeat the data block for _ in 0..repeat_count { - result.extend_from_slice(block_data); + let end = write_offset + .checked_add(data_length) + .ok_or(Error("LIDATA expanded size overflow"))?; + if end > output.len() { + return Err(Error("LIDATA expanded size mismatch")); + } + output[*write_offset..end].copy_from_slice(block_data); + *write_offset = end; } } else { - // Nested blocks: recurse for each block + let mut block_offset = offset; + let iteration_start = *write_offset; + for _ in 0..block_count { - // TODO this is bad, we should instead start by recursively calculating the size, - // allocating the buffer up front, then writing directly into it (e.g. expand_lidata_block_into(&[u8], &mut [u8])) - let block_data = expand_lidata_block(&data[offset..])?; - let block_size = lidata_block_size(&data[offset..])?; - offset += block_size; - - // Repeat the expanded block - for _ in 0..repeat_count { - result.extend_from_slice(&block_data); + let block_size = lidata_block_size(&data[block_offset..])?; + let block_consumed = + expand_lidata_block_into(&data[block_offset..], output, write_offset)?; + + debug_assert_eq!(block_size, block_consumed); + block_offset = block_offset + .checked_add(block_size) + .ok_or(Error("LIDATA block size overflow"))?; + if block_offset > data.len() { + return Err(Error("Truncated LIDATA block")); } } + + let iteration_len = *write_offset - iteration_start; + + for _ in 1..repeat_count { + let dest_start = *write_offset; + let dest_end = dest_start + .checked_add(iteration_len) + .ok_or(Error("LIDATA expanded size overflow"))?; + if dest_end > output.len() { + return Err(Error("LIDATA expanded size mismatch")); + } + if iteration_len != 0 { + output.copy_within(iteration_start..iteration_start + iteration_len, dest_start); + } + *write_offset = dest_end; + } + + offset = block_offset; } - Ok(result) + Ok(offset) +} + +fn lidata_block_expanded_size(data: &[u8]) -> Result<(usize, usize)> { + let mut offset = 0; + + let (repeat_count, size) = + read_encoded_value(&data[offset..]).ok_or(Error("Invalid repeat count in LIDATA block"))?; + offset += size; + + if repeat_count == 0 { + let consumed = lidata_block_size(data)?; + if consumed > data.len() { + return Err(Error("Truncated LIDATA block")); + } + return Ok((consumed, 0)); + } + + let (block_count, size) = + read_encoded_value(&data[offset..]).ok_or(Error("Invalid block count in LIDATA block"))?; + offset += size; + + if block_count == 0 { + if offset >= data.len() { + return Ok((offset, 0)); + } + + let data_length = data[offset] as usize; + offset += 1; + + if offset + data_length > data.len() { + return Err(Error("Truncated LIDATA block")); + } + + offset += data_length; + + let expanded = data_length + .checked_mul(repeat_count as usize) + .ok_or(Error("LIDATA expanded size overflow"))?; + Ok((offset, expanded)) + } else { + let mut block_offset = offset; + let mut single_iteration = 0usize; + + for _ in 0..block_count { + let (consumed, expanded) = lidata_block_expanded_size(&data[block_offset..])?; + block_offset = block_offset + .checked_add(consumed) + .ok_or(Error("LIDATA block size overflow"))?; + if block_offset > data.len() { + return Err(Error("Truncated LIDATA block")); + } + single_iteration = single_iteration + .checked_add(expanded) + .ok_or(Error("LIDATA expanded size overflow"))?; + } + + let expanded = single_iteration + .checked_mul(repeat_count as usize) + .ok_or(Error("LIDATA expanded size overflow"))?; + + Ok((block_offset, expanded)) + } } /// Helper function to calculate LIDATA block size From 2d115c99c11f5a919d4b0314b332a698544bfbe7 Mon Sep 17 00:00:00 2001 From: Luke Street Date: Thu, 25 Sep 2025 13:11:22 -0600 Subject: [PATCH 7/8] Refactoring & improvements --- src/common.rs | 1 + src/omf.rs | 144 ++++++++++++++++--------- src/read/mod.rs | 12 +-- src/read/omf/file.rs | 250 +++++++++++++------------------------------ tests/read/omf.rs | 10 +- 5 files changed, 175 insertions(+), 242 deletions(-) diff --git a/src/common.rs b/src/common.rs index 5386d7d3..81241ee1 100644 --- a/src/common.rs +++ b/src/common.rs @@ -617,6 +617,7 @@ pub enum RelocationFlags { /// `r_rsize` field in the XCOFF relocation. r_rsize: u8, }, + #[cfg(feature = "omf")] /// OMF relocation metadata. Omf { /// The location field describing what bytes are being fixed up. diff --git a/src/omf.rs b/src/omf.rs index b1bd5b3d..073f8f5d 100644 --- a/src/omf.rs +++ b/src/omf.rs @@ -90,64 +90,63 @@ pub mod record_type { pub const VERNUM: u8 = 0xCC; /// Vendor-specific OMF Extension Record pub const VENDEXT: u8 = 0xCE; -} -/// Check if a byte is a valid OMF record type -pub fn is_omf_record_type(byte: u8) -> bool { - use crate::omf::record_type::*; - matches!( - byte, - THEADR - | LHEADR - | COMENT - | MODEND - | MODEND32 - | EXTDEF - | TYPDEF - | PUBDEF - | PUBDEF32 - | LINNUM - | LINNUM32 - | LNAMES - | SEGDEF - | SEGDEF32 - | GRPDEF - | FIXUPP - | FIXUPP32 - | LEDATA - | LEDATA32 - | LIDATA - | LIDATA32 - | COMDEF - | BAKPAT - | BAKPAT32 - | LEXTDEF - | LEXTDEF32 - | LPUBDEF - | LPUBDEF32 - | LCOMDEF - | CEXTDEF - | COMDAT - | COMDAT32 - | LINSYM - | LINSYM32 - | ALIAS - | NBKPAT - | NBKPAT32 - | LLNAMES - | VERNUM - | VENDEXT - ) + /// Return true if the record type is valid + pub fn is_valid(record_type: u8) -> bool { + matches!( + record_type, + THEADR + | LHEADR + | COMENT + | MODEND + | MODEND32 + | EXTDEF + | TYPDEF + | PUBDEF + | PUBDEF32 + | LINNUM + | LINNUM32 + | LNAMES + | SEGDEF + | SEGDEF32 + | GRPDEF + | FIXUPP + | FIXUPP32 + | LEDATA + | LEDATA32 + | LIDATA + | LIDATA32 + | COMDEF + | BAKPAT + | BAKPAT32 + | LEXTDEF + | LEXTDEF32 + | LPUBDEF + | LPUBDEF32 + | LCOMDEF + | CEXTDEF + | COMDAT + | COMDAT32 + | LINSYM + | LINSYM32 + | ALIAS + | NBKPAT + | NBKPAT32 + | LLNAMES + | VERNUM + | VENDEXT + ) + } } /// The addressing mode for an OMF relocation. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[repr(u8)] pub enum FixupMode { - /// Segment-relative relocation (`M = 1`). - SegmentRelative = 0, /// Self-relative relocation (`M = 0`). - SelfRelative = 1, + SelfRelative = 0, + /// Segment-relative relocation (`M = 1`). + SegmentRelative = 1, } /// Frame datum variants as defined by the OMF specification. @@ -249,3 +248,48 @@ pub enum FixupLocation { /// 32-bit loader-resolved offset LoaderOffset32 = 13, } + +/// Return true if the data looks like an OMF file. +pub(crate) fn is_omf<'data, R: crate::ReadRef<'data>>(data: R, offset: u64) -> bool { + let Ok(header) = data.read_at::(offset) else { + return false; + }; + if !matches!( + header.record_type, + record_type::THEADR | record_type::LHEADR + ) { + return false; + } + let length = header.length.get(crate::endian::LittleEndian) as usize; + if length < 1 { + return false; + } + // Read the full record including the checksum byte + let Ok(record) = data.read_bytes_at(offset, (3 + length) as u64) else { + return false; + }; + // Verify the record checksum + if !verify_checksum(record) { + return false; + } + // Check that the translator or module name string fits in the record + if length > 1 { + let name_len = record[3] as usize; + if name_len > length - 1 { + return false; + } + } + true +} + +/// Verify the checksum of an OMF record +/// +/// The checksum is calculated so that the sum of all bytes in the record, +/// including the checksum byte itself, equals 0 (modulo 256). +/// +/// Some compilers write 0 rather than computing the checksum, +/// so we accept that as valid. +pub(crate) fn verify_checksum(record: &[u8]) -> bool { + let checksum = record.last().copied().unwrap_or(0); + checksum == 0 || record.iter().copied().fold(0u8, u8::wrapping_add) == 0 +} diff --git a/src/read/mod.rs b/src/read/mod.rs index 59a9c9f6..8d7aefe3 100644 --- a/src/read/mod.rs +++ b/src/read/mod.rs @@ -369,15 +369,9 @@ impl FileKind { #[cfg(feature = "xcoff")] [0x01, 0xf7, ..] => FileKind::Xcoff64, #[cfg(feature = "omf")] - [0x80, ..] | [0x82, ..] => { - // Check if it's a valid OMF record type - // TODO this is tautological, 0x80 and 0x82 are valid OMF record types - // how can we check better? - if crate::omf::is_omf_record_type(magic[0]) { - FileKind::Omf - } else { - return Err(Error("Unknown file magic")); - } + [crate::omf::record_type::THEADR, ..] | [crate::omf::record_type::LHEADR, ..] + if crate::omf::is_omf(data, offset) => { + FileKind::Omf } _ => return Err(Error("Unknown file magic")), }; diff --git a/src/read/omf/file.rs b/src/read/omf/file.rs index a57bd51a..cae07055 100644 --- a/src/read/omf/file.rs +++ b/src/read/omf/file.rs @@ -94,123 +94,43 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { } } - /// Get the section kind for a segment (reusing logic from OmfSection) + /// Get the section kind for a segment pub(super) fn segment_section_kind(&self, segment_index: usize) -> read::SectionKind { - if segment_index >= self.segments.len() { + let Some(segment) = self.segments.get(segment_index) else { return read::SectionKind::Unknown; - } - - let segment = &self.segments[segment_index]; - - // Check segment name first for special cases - if let Some(seg_name) = self.get_name(segment.name_index) { - // Segments named CONST are always read-only regardless of class - match seg_name { - b"CONST" | b"_CONST" | b"CONST2" | b"RDATA" | b"_RDATA" => { - return read::SectionKind::ReadOnlyData; - } - _ => {} - } + }; - // Check for debug sections by name - if seg_name.starts_with(b"$$") { - // Watcom-style debug sections - return read::SectionKind::Debug; - } - if seg_name == b".drectve" || seg_name == b".DRECTVE" { - return read::SectionKind::Linker; - } + let segment_name = self.get_name(segment.name_index).unwrap_or_default(); + let class_name = self.get_name(segment.class_index).unwrap_or_default(); - // Check other common names - let name_upper = seg_name.to_ascii_uppercase(); - if name_upper == b"_TEXT" || name_upper == b"CODE" || name_upper == b".TEXT" { - return read::SectionKind::Text; - } else if name_upper == b"_DATA" || name_upper == b"DATA" || name_upper == b".DATA" { - return read::SectionKind::Data; - } else if name_upper == b"_BSS" - || name_upper == b"BSS" - || name_upper == b".BSS" - || name_upper == b"STACK" - { - return read::SectionKind::UninitializedData; - } + // Reserved names for debug sections + if segment_name.starts_with(b"$$") { + return read::SectionKind::Debug; } - // Determine kind from class name - if let Some(class_name) = self.get_name(segment.class_index) { - // Check for exact matches first (most common case) - match class_name { - b"CODE" | b"_TEXT" | b"TEXT" => return read::SectionKind::Text, - b"CONST" | b"_CONST" | b"CONST2" | b"RDATA" | b"_RDATA" => { - return read::SectionKind::ReadOnlyData; - } - b"BSS" | b"_BSS" => return read::SectionKind::UninitializedData, - b"STACK" | b"_STACK" => return read::SectionKind::UninitializedData, - b"DEBUG" | b"_DEBUG" | b"DEBSYM" | b"DEBTYP" => return read::SectionKind::Debug, - b"DATA" | b"_DATA" => { - // DATA sections with no actual data are treated as uninitialized - if segment.data_chunks.is_empty() { - return read::SectionKind::UninitializedData; - } else { - return read::SectionKind::Data; - } - } - _ => {} - } - - // Check for case-insensitive substring matches for less common variations - let class_upper = class_name.to_ascii_uppercase(); - if class_upper.windows(4).any(|w| w == b"CODE") { - return read::SectionKind::Text; - } else if class_upper.windows(5).any(|w| w == b"CONST") { + // Substring matches for common class names + if class_name.windows(4).any(|w| w == b"CODE") { + return read::SectionKind::Text; + } else if class_name.windows(4).any(|w| w == b"DATA") { + if segment_name.windows(5).any(|w| w == b"CONST") { return read::SectionKind::ReadOnlyData; - } else if class_upper.windows(3).any(|w| w == b"BSS") - || class_upper.windows(5).any(|w| w == b"STACK") - { - return read::SectionKind::UninitializedData; - } else if class_upper.windows(5).any(|w| w == b"DEBUG") { - return read::SectionKind::Debug; - } else if class_upper.windows(4).any(|w| w == b"DATA") { - // DATA sections with no actual data are treated as uninitialized - if segment.data_chunks.is_empty() { - return read::SectionKind::UninitializedData; - } else { - return read::SectionKind::Data; - } + } else { + return read::SectionKind::Data; } + } else if class_name.windows(3).any(|w| w == b"BSS") + || class_name.windows(5).any(|w| w == b"STACK") + { + return read::SectionKind::UninitializedData; + } else if class_name.starts_with(b"DEB") { + return read::SectionKind::Debug; + } else if class_name == b"COMMON" { + return read::SectionKind::Common; } - // Final fallback based on whether segment has data - if segment.data_chunks.is_empty() { - read::SectionKind::UninitializedData - } else { - read::SectionKind::Unknown - } + read::SectionKind::Unknown } fn parse_records(&mut self) -> Result<()> { - let len = self - .data - .len() - .map_err(|_| Error("Failed to get data length"))?; - let data = self - .data - .read_bytes_at(0, len) - .map_err(|_| Error("Failed to read OMF data"))?; - let mut offset = 0; - - // First record must be THEADR or LHEADR - if data.is_empty() { - return Err(Error("Empty OMF file")); - } - - let first_type = data[0]; - if first_type != omf::record_type::THEADR && first_type != omf::record_type::LHEADR { - return Err(Error( - "Invalid OMF file: first record must be THEADR or LHEADR", - )); - } - let mut current_segment: Option = None; let mut current_data_offset: Option = None; @@ -218,87 +138,92 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { let mut frame_threads: [Option; 4] = [None; 4]; let mut target_threads: [Option; 4] = [None; 4]; - while offset < data.len() { - // Read record header - if offset + 3 > data.len() { - break; - } - - let record_type = data[offset]; - let length = u16::from_le_bytes([data[offset + 1], data[offset + 2]]) as usize; - - // Length includes the checksum byte at the end - if offset + 3 + length > data.len() { - return Err(Error("Truncated OMF record")); + let mut offset = 0; + while let Ok(record_header) = self.data.read_at::(offset) { + let record_type = record_header.record_type; + let record_length = record_header.length.get(crate::endian::LittleEndian); + let record_data = self + .data + .read_bytes_at(offset, record_length as u64 + 3) + .map_err(|_| Error("Truncated OMF record data"))?; + + if offset == 0 + && !matches!( + record_type, + omf::record_type::THEADR | omf::record_type::LHEADR + ) + { + return Err(Error( + "Invalid OMF file: first record must be THEADR or LHEADR", + )); } - // Record data excludes the checksum - let record_data = &data[offset + 3..offset + 3 + length - 1]; - let checksum = data[offset + 3 + length - 1]; - // Verify checksum - if !Self::verify_checksum(record_type, length, record_data, checksum) { + if !omf::verify_checksum(record_data) { return Err(Error("Invalid OMF record checksum")); } + // Exclude the header and checksum + let inner_data = &record_data[3..2 + record_length as usize]; + // Process record based on type match record_type { omf::record_type::THEADR | omf::record_type::LHEADR => { - self.parse_header(record_data)?; + self.parse_header(inner_data)?; } omf::record_type::LNAMES | omf::record_type::LLNAMES => { - self.parse_names(record_data)?; + self.parse_names(inner_data)?; } omf::record_type::SEGDEF | omf::record_type::SEGDEF32 => { - self.parse_segdef(record_data, record_type == omf::record_type::SEGDEF32)?; + self.parse_segdef(inner_data, record_type == omf::record_type::SEGDEF32)?; } omf::record_type::GRPDEF => { - self.parse_grpdef(record_data)?; + self.parse_grpdef(inner_data)?; } omf::record_type::PUBDEF | omf::record_type::PUBDEF32 => { self.parse_pubdef( - record_data, + inner_data, record_type == omf::record_type::PUBDEF32, OmfSymbolClass::Public, )?; } omf::record_type::LPUBDEF | omf::record_type::LPUBDEF32 => { self.parse_pubdef( - record_data, + inner_data, record_type == omf::record_type::LPUBDEF32, OmfSymbolClass::LocalPublic, )?; } omf::record_type::EXTDEF => { - self.parse_extdef(record_data, OmfSymbolClass::External)?; + self.parse_extdef(inner_data, OmfSymbolClass::External)?; } omf::record_type::LEXTDEF | omf::record_type::LEXTDEF32 => { - self.parse_extdef(record_data, OmfSymbolClass::LocalExternal)?; + self.parse_extdef(inner_data, OmfSymbolClass::LocalExternal)?; } omf::record_type::CEXTDEF => { - self.parse_extdef(record_data, OmfSymbolClass::ComdatExternal)?; + self.parse_extdef(inner_data, OmfSymbolClass::ComdatExternal)?; } omf::record_type::COMDEF => { - self.parse_comdef(record_data, OmfSymbolClass::Communal)?; + self.parse_comdef(inner_data, OmfSymbolClass::Communal)?; } omf::record_type::LCOMDEF => { - self.parse_comdef(record_data, OmfSymbolClass::LocalCommunal)?; + self.parse_comdef(inner_data, OmfSymbolClass::LocalCommunal)?; } omf::record_type::COMDAT | omf::record_type::COMDAT32 => { - self.parse_comdat(record_data, record_type == omf::record_type::COMDAT32)?; + self.parse_comdat(inner_data, record_type == omf::record_type::COMDAT32)?; } omf::record_type::COMENT => { - self.parse_comment(record_data)?; + self.parse_comment(inner_data)?; } omf::record_type::LEDATA | omf::record_type::LEDATA32 => { let (seg_idx, offset) = - self.parse_ledata(record_data, record_type == omf::record_type::LEDATA32)?; + self.parse_ledata(inner_data, record_type == omf::record_type::LEDATA32)?; current_segment = Some(seg_idx); current_data_offset = Some(offset); } omf::record_type::LIDATA | omf::record_type::LIDATA32 => { let (seg_idx, offset) = - self.parse_lidata(record_data, record_type == omf::record_type::LIDATA32)?; + self.parse_lidata(inner_data, record_type == omf::record_type::LIDATA32)?; current_segment = Some(seg_idx); current_data_offset = Some(offset); } @@ -307,7 +232,7 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { (current_segment, current_data_offset) { self.parse_fixupp( - record_data, + inner_data, record_type == omf::record_type::FIXUPP32, seg_idx, data_offset, @@ -329,7 +254,11 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { } } - offset += 3 + length; // header + data (which includes checksum) + offset += record_length as u64 + 3; + } + + if offset == 0 { + return Err(Error("No OMF records found")); } Ok(()) @@ -629,8 +558,7 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { size_val } _ => { - // Unknown data type, skip - continue; + return Err(Error("Invalid data type in COMDEF/LCOMDEF record")); } }; @@ -1109,7 +1037,7 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { } /// Get the segments as a slice - pub fn segments_slice(&self) -> &[OmfSegment<'data>] { + pub fn raw_segments(&self) -> &[OmfSegment<'data>] { &self.segments } @@ -1128,35 +1056,9 @@ impl<'data, R: ReadRef<'data>> OmfFile<'data, R> { } /// Get all symbols (for iteration) - pub fn all_symbols(&self) -> &[OmfSymbol<'data>] { + pub fn raw_symbols(&self) -> &[OmfSymbol<'data>] { &self.symbols } - - /// Verify the checksum of an OMF record - /// - /// The checksum is calculated so that the sum of all bytes in the record, - /// including the checksum byte itself, equals 0 (modulo 256). - fn verify_checksum(record_type: u8, length: usize, body: &[u8], checksum: u8) -> bool { - // Some compilers write a 0 byte rather than computing the checksum, - // so we accept that as valid - if checksum == 0 { - return true; - } - - let mut sum = u32::from(record_type); - // Add length bytes (little-endian) - sum = sum.wrapping_add((length & 0xff) as u32); - sum = sum.wrapping_add((length >> 8) as u32); - // Add all body bytes - for &byte in body { - sum = sum.wrapping_add(u32::from(byte)); - } - // Add checksum byte - sum = sum.wrapping_add(u32::from(checksum)); - - // The sum should be 0 (modulo 256) - (sum & 0xff) == 0 - } } impl<'data, R: ReadRef<'data>> Object<'data> for OmfFile<'data, R> { @@ -1311,7 +1213,7 @@ impl<'data, R: ReadRef<'data>> Object<'data> for OmfFile<'data, R> { fn imports(&self) -> Result>> { Ok(self - .all_symbols() + .raw_symbols() .iter() .filter(|sym| { matches!( @@ -1328,7 +1230,7 @@ impl<'data, R: ReadRef<'data>> Object<'data> for OmfFile<'data, R> { fn exports(&self) -> Result>> { Ok(self - .all_symbols() + .raw_symbols() .iter() .filter(|sym| sym.class == OmfSymbolClass::Public) .map(|pub_sym| Export { @@ -1416,15 +1318,15 @@ pub(super) enum FrameMethod { Target = 5, } -/// Expand a LIDATA block into its uncompressed form +/// Expand a LIDATA block into a newly allocated buffer pub(super) fn expand_lidata_block(data: &[u8]) -> Result> { - let (consumed, expanded_size) = lidata_block_expanded_size(data)?; + let (orig_size, expanded_size) = lidata_block_expanded_size(data)?; let mut result = vec![0u8; expanded_size]; let mut write_offset = 0usize; - let consumed_by_expand = expand_lidata_block_into(data, &mut result, &mut write_offset)?; + let consumed = expand_lidata_block_into(data, &mut result, &mut write_offset)?; debug_assert_eq!(write_offset, expanded_size); - debug_assert_eq!(consumed_by_expand, consumed); + debug_assert_eq!(consumed, orig_size); Ok(result) } diff --git a/tests/read/omf.rs b/tests/read/omf.rs index 4e85ea03..102ae182 100644 --- a/tests/read/omf.rs +++ b/tests/read/omf.rs @@ -61,13 +61,7 @@ fn test_lidata() { total_data_size += data.len(); } } - - // With LIDATA support, we should have expanded data for the arrays - assert!( - total_data_size >= 200, - "Section data should be expanded from LIDATA, got {} bytes", - total_data_size - ); + assert_eq!(total_data_size, 401); } #[cfg(feature = "std")] @@ -89,8 +83,6 @@ fn test_relocations() { } } } - - // With M-bit support, we should see both types assert!(has_relative, "Should have Relative relocations (M=0)"); assert!(has_absolute, "Should have Absolute relocations (M=1)"); } From 528af94788cc650e43916e02aed0c0e0cf542b3d Mon Sep 17 00:00:00 2001 From: Luke Street Date: Thu, 25 Sep 2025 13:42:17 -0600 Subject: [PATCH 8/8] Update testfiles --- testfiles | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testfiles b/testfiles index 5b121e59..e7ec8ce8 160000 --- a/testfiles +++ b/testfiles @@ -1 +1 @@ -Subproject commit 5b121e59e36d00567366691765c0fce3cb72b5e3 +Subproject commit e7ec8ce87c55569e8511e6a0f157fdcc2d641388