diff --git a/libwild/MachO.md b/libwild/MachO.md index 5e06e8355..a162ff731 100644 --- a/libwild/MachO.md +++ b/libwild/MachO.md @@ -180,7 +180,9 @@ Contents of __unwind_info section: ## `LC_CODE_SIGNATURE` command Code signature is mandatory and cannot run a final binary without it. Can be manually created for a produced binary: `codesign -s - -f a.out`. -A linker can skip emitting the signature by using: `-Wl,-no_adhoc_codesign`. +A linker can skip emitting the signature by using: `-Wl,-no_adhoc_codesign`. One drawback of invoking `codesign` externally is that you +must reserve space for the additional load command in advance; otherwise, offsets in segments such as `__TEXT` will shift. + It's basically an array of SHA-256 hashes, one for each page of the file - similar to how we emit build-id. There's existing LLVM implementation of the format we can use: https://github.com/llvm/llvm-project/blob/36e495dd903cea000f6c4f51954554c22f39d7da/lld/MachO/SyntheticSections.cpp#L1622-L1662 @@ -196,9 +198,17 @@ Pretty straightforward to implement, replaces a legacy `LC_DYLD_INFO(_ONLY)` com ## `LC_DYLD_CHAINED_FIXUPS` command -TODO: explain better +This is roughly analogous to dynamic relocations in ELF. The format is made up of three parts: + +- a table of imported symbols, including the symbol name and the referenced dylib +- a string table used by the import table +- a per-segment chain of fixups, where each entry records the location to patch and an index into the import table that identifies the target + +Good documentation is available here: https://github.com/qyang-nj/llios/blob/main/dynamic_linking/chained_fixups.md. +The relevant data structures are also defined in Apple's `mach-o/fixup-chains.h` header +and mirrored here: https://github.com/qyang-nj/llios/blob/d204d56ff0533c1fae115b77e7554d2e6f4bc4aa/apple_open_source/dyld/include/mach-o/fixup-chains.h. -Good documentation here: https://github.com/qyang-nj/llios/blob/main/dynamic_linking/chained_fixups.md. +Ideally, support for these structures should be added to the `object` crate. ## benchmarks: LLD vs. system linker diff --git a/libwild/src/alignment.rs b/libwild/src/alignment.rs index e30d8e948..72871118d 100644 --- a/libwild/src/alignment.rs +++ b/libwild/src/alignment.rs @@ -62,6 +62,9 @@ pub(crate) const NOTE_GNU_BUILD_ID: Alignment = Alignment { exponent: 2 }; // GNU_STACK.alignment pub(crate) const STACK_ALIGNMENT: Alignment = Alignment { exponent: 4 }; +// Mach-O specific +pub(crate) const MACHO_PAGE_ALIGNMENT: Alignment = Alignment { exponent: 14 }; + impl Alignment { pub(crate) fn new(raw: u64) -> Result { if !raw.is_power_of_two() { diff --git a/libwild/src/args/macho.rs b/libwild/src/args/macho.rs index fed1fe695..9fd601b98 100644 --- a/libwild/src/args/macho.rs +++ b/libwild/src/args/macho.rs @@ -2,6 +2,8 @@ #![allow(unused_variables)] #![allow(unused)] +use crate::alignment::Alignment; +use crate::alignment::MACHO_PAGE_ALIGNMENT; use crate::args::ArgumentParser; use crate::args::CommonArgs; use crate::args::FILES_PER_GROUP_ENV; @@ -92,7 +94,7 @@ impl platform::Args for MachOArgs { } fn loadable_segment_alignment(&self) -> crate::alignment::Alignment { - todo!() + MACHO_PAGE_ALIGNMENT } fn should_merge_sections(&self) -> bool { diff --git a/libwild/src/elf.rs b/libwild/src/elf.rs index d8ebb8766..864d6ffc6 100644 --- a/libwild/src/elf.rs +++ b/libwild/src/elf.rs @@ -1927,6 +1927,15 @@ impl platform::Platform for Elf { group_sizes.merge(&extra_sizes); total_sizes.merge(&extra_sizes); } + + fn align_load_segment_start( + _segment_def: Self::ProgramSegmentDef, + segment_alignment: Alignment, + file_offset: &mut usize, + mem_offset: &mut u64, + ) { + *mem_offset = segment_alignment.align_modulo(*file_offset as u64, *mem_offset); + } } impl<'data> platform::ObjectFile<'data> for File<'data> { diff --git a/libwild/src/layout.rs b/libwild/src/layout.rs index 0933cf0a7..2fa1e1f84 100644 --- a/libwild/src/layout.rs +++ b/libwild/src/layout.rs @@ -4591,11 +4591,18 @@ fn layout_section_parts( .copied() .unwrap_or_else(|| args.loadable_segment_alignment()); if let Some(location) = pending_location.take() { + // The OrderEvent::SetLocation is ELF-specific only. mem_offset = location.address; file_offset = segment_alignment.align_modulo(mem_offset, file_offset as u64) as usize; } else { - mem_offset = segment_alignment.align_modulo(file_offset as u64, mem_offset); + let segment_def = *program_segments.segment_def(segment_id); + P::align_load_segment_start( + segment_def, + segment_alignment, + &mut file_offset, + &mut mem_offset, + ); } } } diff --git a/libwild/src/macho.rs b/libwild/src/macho.rs index 090f2799e..98e6d80b5 100644 --- a/libwild/src/macho.rs +++ b/libwild/src/macho.rs @@ -4,6 +4,8 @@ use crate::OutputKind; use crate::alignment; +use crate::alignment::Alignment; +use crate::alignment::MACHO_PAGE_ALIGNMENT; use crate::args::macho::MachOArgs; use crate::ensure; use crate::error; @@ -23,8 +25,9 @@ use crate::output_section_id::SectionOutputInfo; use crate::part_id; use crate::platform; use crate::symbol_db::Visibility; -use linker_utils::elf::secnames; +use object::Endian; use object::Endianness; +use object::U32; use object::macho; use object::macho::N_ABS; use object::macho::N_EXT; @@ -49,7 +52,13 @@ const LE: Endianness = Endianness::Little; /// Mach-O uses a zero page for all 32bit addresses and thus we begin the memory /// offsets right after that (1GiB). -pub const MACHO_START_MEM_ADDRESS: u64 = 0x1_0000_0000; +pub(crate) const MACHO_START_MEM_ADDRESS: u64 = 0x1_0000_0000; + +/// A path to the default dynamic linker. +pub(crate) const DYLINKER_PATH: &str = "/usr/lib/dyld"; +pub(crate) const DEFAULT_SEGMENT_COUNT: usize = 4; +pub(crate) const CHAINED_FIXUP_TABLE_SIZE: u64 = + (size_of::() + size_of::() * (DEFAULT_SEGMENT_COUNT + 1 + 1)) as u64; type SectionHeader = Section64; type SectionTable<'data> = &'data [Section64]; @@ -61,6 +70,53 @@ pub(crate) type FileHeader = object::macho::MachHeader64; pub(crate) type SegmentCommand = object::macho::SegmentCommand64; pub(crate) type SectionEntry = object::macho::Section64; pub(crate) type EntryPointCommand = object::macho::EntryPointCommand; +pub(crate) type DylinkerCommand = object::macho::DylinkerCommand; +pub(crate) type CodeSignatureCommand = object::macho::LinkeditDataCommand; +pub(crate) type DyldChainedFixupsCommand = object::macho::LinkeditDataCommand; +pub(crate) type ChainedFixupsHeader = DyldChainedFixupsHeader; + +// TODO: move to object crate + +// values for dyld_chained_fixups_header.imports_format +#[allow(non_camel_case_types)] +#[repr(u32)] +pub(crate) enum DyldChainedFixupsImporstFormat { + DYLD_CHAINED_IMPORT = 1, + DYLD_CHAINED_IMPORT_ADDEND = 2, + DYLD_CHAINED_IMPORT_ADDEND64 = 3, +} + +// header of the LC_DYLD_CHAINED_FIXUPS payload +#[derive(Clone, Copy)] +#[repr(C)] +pub(crate) struct DyldChainedFixupsHeader { + // 0 + pub(crate) fixups_version: U32, + // offset of dyld_chained_starts_in_image in chain_data + pub(crate) starts_offset: U32, + // offset of imports table in chain_data + pub(crate) imports_offset: U32, + // offset of symbol strings in chain_data + pub(crate) symbols_offset: U32, + // number of imported symbol names + pub(crate) imports_count: U32, + // DYLD_CHAINED_IMPORT* + pub(crate) imports_format: U32, + // 0 => uncompressed, 1 => zlib compressed + pub(crate) symbols_format: U32, +} + +// Safety: +// `DyldChainedFixupsHeader` is repr(C), contains only `U32` fields, and has no padding. +unsafe impl object::Pod for DyldChainedFixupsHeader {} + +// This struct is embedded in LC_DYLD_CHAINED_FIXUPS payload +// struct dyld_chained_starts_in_image +// { +// uint32_t seg_count; +// uint32_t seg_info_offset[1]; // each entry is offset into this struct for that segment +// // followed by pool of dyld_chain_starts_in_segment data +// }; #[derive(derive_more::Debug)] pub(crate) struct File<'data> { @@ -598,19 +654,19 @@ impl platform::NonAddressableIndexes for NonAddressableIndexes { } } +// TODO: update comment + #[derive(Debug, Copy, Clone, Default, PartialEq)] pub(crate) enum SegmentType { - Header, - // All load commands are grouped into the segment. - LoadCommands, - // Sections belonging to __TEXT segment. Text, - // Sections belonging to __DATA segment. - Data, - // Sections belonging to __DATA_CONST segment. - DataConst, + LoadCommands, + TextSections, + DataSections, + DataConstSections, + LinkeditSections, + // The other ELF-specific (or unused) parts/sections will be collected here. #[default] - Misc, + Unused, } impl platform::SegmentType for SegmentType {} @@ -642,7 +698,7 @@ impl platform::ProgramSegmentDef for ProgramSegmentDef { } fn is_loadable(self) -> bool { - false + true } fn is_stack(self) -> bool { @@ -662,24 +718,34 @@ impl platform::ProgramSegmentDef for ProgramSegmentDef { section_info: &crate::output_section_id::SectionOutputInfo, section_id: crate::output_section_id::OutputSectionId, ) -> bool { - self.segment_type - == match section_id { - output_section_id::FILE_HEADER => SegmentType::Header, - output_section_id::PAGEZERO_SEGMENT - | output_section_id::TEXT_SEGMENT - | output_section_id::DATA_SEGMENT - | output_section_id::LINK_EDIT_SEGMENT - | output_section_id::ENTRY_POINT => SegmentType::LoadCommands, - output_section_id::TEXT | output_section_id::CSTRING => SegmentType::Text, - output_section_id::DATA => SegmentType::Data, - _ => SegmentType::Misc, + let mapped_segment = match section_id { + output_section_id::FILE_HEADER => SegmentType::Text, + output_section_id::PAGEZERO_SEGMENT + | output_section_id::TEXT_SEGMENT + | output_section_id::DATA_SEGMENT + | output_section_id::LINK_EDIT_SEGMENT + | output_section_id::ENTRY_POINT + | output_section_id::INTERP + | output_section_id::DYLD_CHAINED_FIXUPS => SegmentType::LoadCommands, + output_section_id::TEXT | output_section_id::CSTRING => SegmentType::TextSections, + output_section_id::DATA => SegmentType::DataSections, + output_section_id::CHAINED_FIXUP_TABLE | output_section_id::STRTAB => { + SegmentType::LinkeditSections } + _ => SegmentType::Unused, + }; + + match (self.segment_type, mapped_segment) { + (SegmentType::Text, SegmentType::LoadCommands | SegmentType::TextSections) => true, + _ => self.segment_type == mapped_segment, + } } } pub(crate) struct BuiltInSectionDetails { pub(crate) kind: SectionKind<'static>, pub(crate) section_flags: SectionFlags, + pub(crate) min_alignment: Alignment, pub(crate) target_segment_type: Option, } @@ -688,6 +754,7 @@ impl platform::BuiltInSectionDetails for BuiltInSectionDetails {} const DEFAULT_DEFS: BuiltInSectionDetails = BuiltInSectionDetails { kind: SectionKind::Primary(SectionName(&[])), section_flags: SectionFlags::empty(), + min_alignment: alignment::MIN, target_segment_type: None, }; @@ -955,7 +1022,7 @@ impl platform::Platform for MachO { flags: d.section_flags, }, kind: d.kind, - min_alignment: alignment::MIN, + min_alignment: d.min_alignment, location: None, secondary_order: None, }) @@ -1083,14 +1150,14 @@ impl platform::Platform for MachO { part_id::TEXT_SEGMENT, (size_of::() + size_of::() - * count_sections_for_segment_type(output_sections, SegmentType::Text)) + * count_sections_for_segment_type(output_sections, SegmentType::TextSections)) as u64, ); sizes.increment( part_id::DATA_SEGMENT, (size_of::() + size_of::() - * count_sections_for_segment_type(output_sections, SegmentType::Data)) + * count_sections_for_segment_type(output_sections, SegmentType::DataSections)) as u64, ); sizes.increment( @@ -1098,6 +1165,14 @@ impl platform::Platform for MachO { size_of::() as u64, ); sizes.increment(part_id::ENTRY_POINT, size_of::() as u64); + sizes.increment( + part_id::INTERP, + ((size_of::() + DYLINKER_PATH.len()).next_multiple_of(8)) as u64, + ); + sizes.increment( + part_id::DYLD_CHAINED_FIXUPS, + size_of::() as u64, + ); } fn finalise_sizes_for_symbol<'data>( @@ -1152,7 +1227,13 @@ impl platform::Platform for MachO { common: &mut crate::layout::CommonGroupState, symbol_db: &crate::symbol_db::SymbolDb, ) { - // TODO + common.allocate(part_id::CHAINED_FIXUP_TABLE, CHAINED_FIXUP_TABLE_SIZE); + // TODO: Just a filler for now that will ensure the __LINKEDIT takes 16KiB - find a better + // solution. + common.allocate( + part_id::STRTAB, + MACHO_PAGE_ALIGNMENT.value() - CHAINED_FIXUP_TABLE_SIZE, + ); } fn finalise_prelude_layout<'data>( @@ -1202,13 +1283,17 @@ impl platform::Platform for MachO { builder.add_section(output_section_id::PAGEZERO_SEGMENT); builder.add_section(output_section_id::TEXT_SEGMENT); builder.add_section(output_section_id::DATA_SEGMENT); - builder.add_section(output_section_id::ENTRY_POINT); builder.add_section(output_section_id::LINK_EDIT_SEGMENT); + builder.add_section(output_section_id::ENTRY_POINT); + builder.add_section(output_section_id::INTERP); // DYLINKER + builder.add_section(output_section_id::DYLD_CHAINED_FIXUPS); // Content of the sections (e.g. __text, __data). builder.add_section(output_section_id::TEXT); builder.add_section(output_section_id::CSTRING); builder.add_section(output_section_id::DATA); // The rest (e.g. symbol table, string table). + builder.add_section(output_section_id::STRTAB); + builder.add_section(output_section_id::CHAINED_FIXUP_TABLE); builder.build() } @@ -1216,6 +1301,31 @@ impl platform::Platform for MachO { fn start_memory_address(output_kind: OutputKind) -> u64 { MACHO_START_MEM_ADDRESS } + + fn align_load_segment_start( + segment_def: ProgramSegmentDef, + segment_alignment: Alignment, + file_offset: &mut usize, + mem_offset: &mut u64, + ) { + match segment_def.segment_type { + SegmentType::Text + | SegmentType::DataSections + | SegmentType::DataConstSections + | SegmentType::LinkeditSections => { + *file_offset = segment_alignment.align_up(*file_offset as u64) as usize; + *mem_offset = segment_alignment.align_up(*mem_offset); + } + SegmentType::TextSections => { + // TODO: A placeholder space for the LinkeditDataCommand command is allocated + // (added by codesign tool) in order to preserve the offsets into __text and + // other sections in the __TEXT segment. + *file_offset += size_of::(); + *mem_offset += size_of::() as u64; + } + _ => {} + } + } } const SECTION_DEFINITIONS: [BuiltInSectionDetails; NUM_BUILT_IN_SECTIONS] = { @@ -1224,7 +1334,7 @@ const SECTION_DEFINITIONS: [BuiltInSectionDetails; NUM_BUILT_IN_SECTIONS] = { defs[output_section_id::FILE_HEADER.as_usize()] = BuiltInSectionDetails { kind: SectionKind::Primary(SectionName(b"FILE_HEADER")), - target_segment_type: Some(SegmentType::Header), + target_segment_type: Some(SegmentType::Text), ..DEFAULT_DEFS }; // Load commands @@ -1248,6 +1358,7 @@ const SECTION_DEFINITIONS: [BuiltInSectionDetails; NUM_BUILT_IN_SECTIONS] = { defs[output_section_id::LINK_EDIT_SEGMENT.as_usize()] = BuiltInSectionDetails { kind: SectionKind::Primary(SectionName(SEG_LINKEDIT.as_bytes())), target_segment_type: Some(SegmentType::LoadCommands), + section_flags: SectionFlags::from_u32(macho::VM_PROT_READ), ..DEFAULT_DEFS }; defs[output_section_id::ENTRY_POINT.as_usize()] = BuiltInSectionDetails { @@ -1255,8 +1366,24 @@ const SECTION_DEFINITIONS: [BuiltInSectionDetails; NUM_BUILT_IN_SECTIONS] = { target_segment_type: Some(SegmentType::LoadCommands), ..DEFAULT_DEFS }; + defs[output_section_id::INTERP.as_usize()] = BuiltInSectionDetails { + kind: SectionKind::Primary(SectionName(b"LC_LOAD_DYLINKER")), + target_segment_type: Some(SegmentType::LoadCommands), + ..DEFAULT_DEFS + }; + defs[output_section_id::DYLD_CHAINED_FIXUPS.as_usize()] = BuiltInSectionDetails { + kind: SectionKind::Primary(SectionName(b"LC_DYLD_CHAINED_FIXUPS")), + target_segment_type: Some(SegmentType::LoadCommands), + ..DEFAULT_DEFS + }; + defs[output_section_id::CHAINED_FIXUP_TABLE.as_usize()] = BuiltInSectionDetails { + kind: SectionKind::Primary(SectionName(b"DYLD_CHAINED_FIXUPS_TABLE")), + target_segment_type: Some(SegmentType::LinkeditSections), + ..DEFAULT_DEFS + }; defs[output_section_id::STRTAB.as_usize()] = BuiltInSectionDetails { - kind: SectionKind::Primary(SectionName(secnames::STRTAB_SECTION_NAME)), + kind: SectionKind::Primary(SectionName(b"STRING_TABLE")), + target_segment_type: Some(SegmentType::LinkeditSections), ..DEFAULT_DEFS }; // Multi-part generated sections @@ -1296,22 +1423,22 @@ const DEFAULT_SECTION_RULES: &[SectionRule<'static>] = &[ const PROGRAM_SEGMENT_DEFS: &[ProgramSegmentDef] = &[ ProgramSegmentDef { - segment_type: SegmentType::Header, + segment_type: SegmentType::Text, }, ProgramSegmentDef { segment_type: SegmentType::LoadCommands, }, ProgramSegmentDef { - segment_type: SegmentType::Text, + segment_type: SegmentType::TextSections, }, ProgramSegmentDef { - segment_type: SegmentType::Data, + segment_type: SegmentType::DataSections, }, ProgramSegmentDef { - segment_type: SegmentType::DataConst, + segment_type: SegmentType::DataConstSections, }, ProgramSegmentDef { - segment_type: SegmentType::Misc, + segment_type: SegmentType::LinkeditSections, }, ]; diff --git a/libwild/src/macho_writer.rs b/libwild/src/macho_writer.rs index b6a9122b3..b525a0560 100644 --- a/libwild/src/macho_writer.rs +++ b/libwild/src/macho_writer.rs @@ -2,6 +2,7 @@ #![allow(unused_variables)] #![allow(unused)] +use crate::alignment::MACHO_PAGE_ALIGNMENT; use crate::bail; use crate::error; use crate::error::Context; @@ -17,6 +18,12 @@ use crate::layout::ObjectLayout; use crate::layout::OutputRecordLayout; use crate::layout::PreludeLayout; use crate::layout::Section; +use crate::macho::ChainedFixupsHeader; +use crate::macho::DEFAULT_SEGMENT_COUNT; +use crate::macho::DYLINKER_PATH; +use crate::macho::DyldChainedFixupsCommand; +use crate::macho::DyldChainedFixupsImporstFormat; +use crate::macho::DylinkerCommand; use crate::macho::EntryPointCommand; use crate::macho::FileHeader; use crate::macho::MACHO_START_MEM_ADDRESS; @@ -27,6 +34,7 @@ use crate::macho::SegmentSectionsInfo; use crate::macho::SegmentType; use crate::macho::get_segment_sections; use crate::output_section_id; +use crate::output_section_id::LINK_EDIT_SEGMENT; use crate::output_section_id::OrderEvent; use crate::output_section_id::OutputSectionId; use crate::output_section_id::SectionName; @@ -40,10 +48,14 @@ use crate::resolution::SectionSlot; use crate::timing_phase; use crate::verbose_timing_phase; use object::BigEndian; +use object::Endian; use object::Endianness; use object::U32; use object::from_bytes_mut; +use object::macho; use object::macho::CPU_TYPE_ARM64; +use object::macho::LC_DYLD_CHAINED_FIXUPS; +use object::macho::LC_LOAD_DYLINKER; use object::macho::LC_MAIN; use object::macho::LC_SEGMENT_64; use object::macho::MH_CIGAM_64; @@ -55,11 +67,11 @@ use object::macho::SEG_TEXT; use object::slice_from_bytes_mut; use rayon::iter::IntoParallelIterator; use rayon::iter::ParallelIterator; +use std::io::Write; use tracing::debug_span; use zerocopy::FromZeros; const LE: Endianness = Endianness::Little; - type MachOLayout<'data> = Layout<'data, MachO>; pub(crate) fn write<'data, A: Arch>( @@ -121,12 +133,6 @@ fn write_prelude<'data, A: Arch>( .map_err(|_| error!("Invalid PAGEZERO segment allocation"))? .0; write_pagezero_command::(pagezero_command); - - let linkedit_command: &mut SegmentCommand = - from_bytes_mut(buffers.get_mut(part_id::LINK_EDIT_SEGMENT)) - .map_err(|_| error!("Invalid LINKEDIT segment allocation"))? - .0; - write_linkedit_command::(linkedit_command); write_segment_commands::(layout, buffers)?; let entry_point_command: &mut EntryPointCommand = @@ -135,6 +141,39 @@ fn write_prelude<'data, A: Arch>( .0; write_entry_point_command::(layout, entry_point_command); + let (dylinker_command, dylinker_path_buffer): (&mut DylinkerCommand, &mut [u8]) = + from_bytes_mut(buffers.get_mut(part_id::INTERP)) + .map_err(|_| error!("Invalid INTERP command allocation"))?; + write_dylinker_command::(dylinker_command, dylinker_path_buffer); + + let chained_fixups_command: &mut DyldChainedFixupsCommand = + from_bytes_mut(buffers.get_mut(part_id::DYLD_CHAINED_FIXUPS)) + .map_err(|_| error!("Invalid DYLD_CHAINED_FIXUPS command allocation"))? + .0; + write_dyld_chained_fixups_command::(layout, chained_fixups_command); + + let chained_fixup_table = buffers.get_mut(part_id::CHAINED_FIXUP_TABLE); + chained_fixup_table.fill(0); + let starts_len = size_of::() * (DEFAULT_SEGMENT_COUNT + 1); + let min_len = size_of::() + starts_len; + if chained_fixup_table.len() < min_len { + bail!( + "CHAINED_FIXUP_TABLE allocation too small. Need at least {} bytes, got {}", + min_len, + chained_fixup_table.len() + ); + } + let (chained_fixups_header, rest): (&mut ChainedFixupsHeader, &mut [u8]) = + from_bytes_mut(chained_fixup_table) + .map_err(|_| error!("Invalid chained fixups header allocation"))?; + let (starts_in_image, _) = + slice_from_bytes_mut::>(rest, DEFAULT_SEGMENT_COUNT + 1) + .map_err(|_| error!("Invalid chained fixups starts allocation"))?; + write_chained_fixup_table::(chained_fixups_header, starts_in_image)?; + + // TODO: remove + buffers.get_mut(part_id::STRTAB).write_all(b"x")?; + Ok(()) } @@ -151,7 +190,10 @@ fn populate_file_header>( header.filetype = U32::new(LE, MH_EXECUTE); header.ncmds = U32::new(LE, load_commands_info.segment_sections.len() as u32); header.sizeofcmds = U32::new(LE, load_commands_info.segment_size.file_size as u32); - header.flags = U32::new(LE, 0); + header.flags = U32::new( + LE, + macho::MH_PIE | macho::MH_DYLDLINK | macho::MH_NOUNDEFS | macho::MH_TWOLEVEL, + ); header.reserved = U32::new(LE, 0); } @@ -169,20 +211,6 @@ fn write_pagezero_command>(command: &mut SegmentComman command.flags.set(LE, 0); } -fn write_linkedit_command>(command: &mut SegmentCommand) { - command.cmd.set(LE, LC_SEGMENT_64); - command.cmdsize.set(LE, size_of::() as u32); - command.segname[..SEG_LINKEDIT.len()].copy_from_slice(SEG_LINKEDIT.as_bytes()); - command.vmaddr.set(LE, 0); - command.vmsize.set(LE, 0); - command.fileoff.set(LE, 0); - command.filesize.set(LE, 0); - command.maxprot.set(LE, 0); - command.initprot.set(LE, 0); - command.nsects.set(LE, 0); - command.flags.set(LE, 0); -} - fn split_segment_command_buffer( bytes: &mut [u8], section_count: usize, @@ -201,18 +229,38 @@ fn write_segment_commands>( layout: &MachOLayout, buffers: &mut OutputSectionPartMap<&mut [u8]>, ) -> Result { - for (part_id, seg_name, segment_type) in [ - (part_id::TEXT_SEGMENT, SEG_TEXT, SegmentType::Text), - (part_id::DATA_SEGMENT, SEG_DATA, SegmentType::Data), + for (part_id, seg_name, segment_type, segment_sections_type) in [ + ( + part_id::TEXT_SEGMENT, + SEG_TEXT, + SegmentType::Text, + SegmentType::TextSections, + ), + ( + part_id::DATA_SEGMENT, + SEG_DATA, + SegmentType::DataSections, + SegmentType::DataSections, + ), + ( + part_id::LINK_EDIT_SEGMENT, + SEG_LINKEDIT, + SegmentType::LinkeditSections, + SegmentType::LinkeditSections, + ), ] { - let SegmentSectionsInfo { - segment_size, - segment_sections, - } = get_segment_sections(layout, segment_type); + // TODO: write comments + let segment_sections = get_segment_sections(layout, segment_sections_type).segment_sections; + let segment_size = get_segment_sections(layout, segment_type).segment_size; + + let section_count = if segment_sections_type == SegmentType::LinkeditSections { + 0 + } else { + segment_sections.len() + }; let (segment_cmd, sections) = - split_segment_command_buffer(buffers.get_mut(part_id), segment_sections.len())?; + split_segment_command_buffer(buffers.get_mut(part_id), section_count)?; - debug_assert_eq!(sections.len(), segment_sections.len()); let prot_flags = layout .output_sections .section_flags(part_id.output_section_id()) @@ -221,43 +269,58 @@ fn write_segment_commands>( segment_cmd.cmd.set(LE, LC_SEGMENT_64); segment_cmd.cmdsize.set( LE, - (size_of::() + size_of::() * segment_sections.len()) - as u32, + (size_of::() + size_of::() * section_count) as u32, ); segment_cmd.segname[..seg_name.len()].copy_from_slice(seg_name.as_bytes()); segment_cmd.segname[seg_name.len()..].zero(); segment_cmd.vmaddr.set(LE, segment_size.mem_offset); - segment_cmd.vmsize.set(LE, segment_size.mem_size); - // TODO: should be likely offset relative to the place after the commands + segment_cmd.vmsize.set( + LE, + segment_size + .mem_size + .next_multiple_of(MACHO_PAGE_ALIGNMENT.value()), + ); segment_cmd.fileoff.set(LE, segment_size.file_offset as u64); - segment_cmd.filesize.set(LE, segment_size.file_size as u64); + segment_cmd.filesize.set( + LE, + segment_size + .file_size + .next_multiple_of(MACHO_PAGE_ALIGNMENT.value() as usize) as u64, + ); segment_cmd.maxprot.set(LE, prot_flags); segment_cmd.initprot.set(LE, prot_flags); segment_cmd.nsects.set(LE, segment_sections.len() as u32); segment_cmd.flags.set(LE, 0); - for (section, (size, section_name, section_flags)) in - sections.iter_mut().zip(segment_sections) - { - let section_name = section_name - .ok_or_else(|| error!("section name must be known"))? - .0; - - section.segname[..seg_name.len()].copy_from_slice(seg_name.as_bytes()); - section.segname[seg_name.len()..].zero(); - section.sectname[..section_name.len()].copy_from_slice(section_name); - section.sectname[section_name.len()..].zero(); - section.addr.set(LE, size.mem_offset); - section.size.set(LE, size.mem_size); - section.offset.set(LE, size.file_offset as u32); - // TODO - section.align.set(LE, 0); - section.reloff.set(LE, 0); - section.nreloc.set(LE, 0); - section.flags.set(LE, section_flags.raw()); - section.reserved1.set(LE, 0); - section.reserved2.set(LE, 0); - section.reserved3.set(LE, 0); + // The sections in __LINKEDIT are actually hidden and must be hidden (not exposed in the + // SEGMENT). + if segment_sections_type == SegmentType::LinkeditSections { + segment_cmd.nsects.set(LE, 0); + } else { + segment_cmd.nsects.set(LE, segment_sections.len() as u32); + for (section, (size, section_name, section_flags)) in + sections.iter_mut().zip(segment_sections) + { + let section_name = section_name + .ok_or_else(|| error!("section name must be known"))? + .0; + + section.segname[..seg_name.len()].copy_from_slice(seg_name.as_bytes()); + section.segname[seg_name.len()..].zero(); + section.sectname[..section_name.len()].copy_from_slice(section_name); + section.sectname[section_name.len()..].zero(); + section.addr.set(LE, size.mem_offset); + section.size.set(LE, size.mem_size); + section.offset.set(LE, size.file_offset as u32); + // TODO + section.align.set(LE, 0); + section.reloff.set(LE, 0); + section.nreloc.set(LE, 0); + section.flags.set(LE, section_flags.raw()); + section.reserved1.set(LE, 0); + section.reserved2.set(LE, 0); + section.reserved3.set(LE, 0); + } } } Ok(()) @@ -333,7 +396,8 @@ fn write_entry_point_command>( layout: &MachOLayout, command: &mut EntryPointCommand, ) { - let SegmentSectionsInfo { segment_size, .. } = get_segment_sections(layout, SegmentType::Text); + let SegmentSectionsInfo { segment_size, .. } = + get_segment_sections(layout, SegmentType::TextSections); command.cmd.set(LE, LC_MAIN); command @@ -342,3 +406,80 @@ fn write_entry_point_command>( command.entryoff.set(LE, segment_size.file_offset as u64); command.stacksize.set(LE, 0); } + +fn write_dylinker_command>( + command: &mut DylinkerCommand, + path_buffer: &mut [u8], +) { + command.cmd.set(LE, LC_LOAD_DYLINKER); + command.cmdsize.set( + LE, + ((size_of::() + DYLINKER_PATH.len()).next_multiple_of(8)) as u32, + ); + command + .name + .offset + .set(LE, size_of::() as u32); + + let path_buffer_len = DYLINKER_PATH.len() + 1; + + path_buffer[0..DYLINKER_PATH.len()].copy_from_slice(DYLINKER_PATH.as_bytes()); + // The string size is always a multiple of 8B. + path_buffer[DYLINKER_PATH.len()..].zero(); +} + +fn write_dyld_chained_fixups_command>( + layout: &MachOLayout, + command: &mut DyldChainedFixupsCommand, +) { + let chained_fixup_table = layout + .section_layouts + .get(output_section_id::CHAINED_FIXUP_TABLE); + + command.cmd.set(LE, LC_DYLD_CHAINED_FIXUPS); + command + .cmdsize + .set(LE, size_of::() as u32); + command + .dataoff + .set(LE, chained_fixup_table.file_offset as u32); + command + .datasize + .set(LE, chained_fixup_table.file_size as u32); +} + +fn write_chained_fixup_table>( + header: &mut ChainedFixupsHeader, + starts_in_image: &mut [U32], +) -> Result { + let starts_len = size_of::() * (DEFAULT_SEGMENT_COUNT + 1); + if starts_in_image.len() != DEFAULT_SEGMENT_COUNT + 1 { + bail!( + "Invalid chained fixups starts allocation. Expected {} entries, got {}", + DEFAULT_SEGMENT_COUNT + 1, + starts_in_image.len() + ); + } + + header.fixups_version.set(LE, 0); + header + .starts_offset + .set(LE, size_of::() as u32); + header + .imports_offset + .set(LE, (size_of::() + starts_len) as u32); + header + .symbols_offset + .set(LE, (size_of::() + starts_len) as u32); + header.imports_count.set(LE, 0); + header.imports_format.set( + LE, + DyldChainedFixupsImporstFormat::DYLD_CHAINED_IMPORT as u32, + ); + header.symbols_format.set(LE, 0); + + starts_in_image[0].set(LE, DEFAULT_SEGMENT_COUNT as u32); + starts_in_image[1..].fill(U32::new(LE, 0)); + + Ok(()) +} diff --git a/libwild/src/output_section_id.rs b/libwild/src/output_section_id.rs index c622caaef..7d1e6730e 100644 --- a/libwild/src/output_section_id.rs +++ b/libwild/src/output_section_id.rs @@ -104,9 +104,13 @@ pub(crate) const SYMTAB_SHNDX_GLOBAL: OutputSectionId = pub(crate) const PAGEZERO_SEGMENT: OutputSectionId = part_id::PAGEZERO_SEGMENT.output_section_id(); pub(crate) const TEXT_SEGMENT: OutputSectionId = part_id::TEXT_SEGMENT.output_section_id(); pub(crate) const DATA_SEGMENT: OutputSectionId = part_id::DATA_SEGMENT.output_section_id(); -pub(crate) const ENTRY_POINT: OutputSectionId = part_id::ENTRY_POINT.output_section_id(); pub(crate) const LINK_EDIT_SEGMENT: OutputSectionId = part_id::LINK_EDIT_SEGMENT.output_section_id(); +pub(crate) const ENTRY_POINT: OutputSectionId = part_id::ENTRY_POINT.output_section_id(); +pub(crate) const DYLD_CHAINED_FIXUPS: OutputSectionId = + part_id::DYLD_CHAINED_FIXUPS.output_section_id(); +pub(crate) const CHAINED_FIXUP_TABLE: OutputSectionId = + part_id::CHAINED_FIXUP_TABLE.output_section_id(); // Regular sections copied from the input objects. pub(crate) const RODATA: OutputSectionId = OutputSectionId::regular(0); diff --git a/libwild/src/output_section_part_map.rs b/libwild/src/output_section_part_map.rs index 30a6615b2..72b4f124f 100644 --- a/libwild/src/output_section_part_map.rs +++ b/libwild/src/output_section_part_map.rs @@ -256,6 +256,9 @@ fn test_merge_parts() { output_section_id::CSTRING, output_section_id::ENTRY_POINT, output_section_id::LINK_EDIT_SEGMENT, + output_section_id::ENTRY_POINT, + output_section_id::DYLD_CHAINED_FIXUPS, + output_section_id::CHAINED_FIXUP_TABLE, ]; let mut sum_of_sums = 0; sum_of_1s.for_each(|section_id, sum| { diff --git a/libwild/src/part_id.rs b/libwild/src/part_id.rs index 53eccd01f..a4a32e5e4 100644 --- a/libwild/src/part_id.rs +++ b/libwild/src/part_id.rs @@ -53,8 +53,10 @@ pub(crate) const TEXT_SEGMENT: PartId = PartId(32); pub(crate) const DATA_SEGMENT: PartId = PartId(33); pub(crate) const LINK_EDIT_SEGMENT: PartId = PartId(34); pub(crate) const ENTRY_POINT: PartId = PartId(35); +pub(crate) const DYLD_CHAINED_FIXUPS: PartId = PartId(36); +pub(crate) const CHAINED_FIXUP_TABLE: PartId = PartId(37); -pub(crate) const NUM_SINGLE_PART_SECTIONS: u32 = 36; +pub(crate) const NUM_SINGLE_PART_SECTIONS: u32 = 38; #[cfg(test)] pub(crate) const NUM_BUILT_IN_PARTS: usize = NUM_SINGLE_PART_SECTIONS as usize diff --git a/libwild/src/platform.rs b/libwild/src/platform.rs index fe5697f9d..5646b9e2e 100644 --- a/libwild/src/platform.rs +++ b/libwild/src/platform.rs @@ -290,6 +290,15 @@ pub(crate) trait Platform: Copy + Send + Sync + Sized + std::fmt::Debug + 'stati /// Resolves a reference to the frame data section. fn frame_data_base_address(memory_offsets: &OutputSectionPartMap) -> u64; + /// Aligns the start of a load segment. Platforms may override this to coordinate file and + /// memory offsets when a segment boundary is introduced. + fn align_load_segment_start( + _segment_def: Self::ProgramSegmentDef, + segment_alignment: Alignment, + file_offset: &mut usize, + mem_offset: &mut u64, + ); + /// Called after GC phase has completed. Mostly useful for platform-specific logging. fn finalise_find_required_sections(groups: &[layout::GroupState]); diff --git a/libwild/src/verification.rs b/libwild/src/verification.rs index f1631edb8..27d03d997 100644 --- a/libwild/src/verification.rs +++ b/libwild/src/verification.rs @@ -117,6 +117,8 @@ pub(crate) fn clear_ignored(expected: &mut OutputSectionPartMap) { part_id::PAGEZERO_SEGMENT, part_id::LINK_EDIT_SEGMENT, part_id::ENTRY_POINT, + part_id::DYLD_CHAINED_FIXUPS, + part_id::CHAINED_FIXUP_TABLE, ]; for part_id in IGNORED {