diff --git a/.gitignore b/.gitignore index 1c31bc6ef..f0d03371c 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,4 @@ fakes-debug/sde-cet-checker-out.txt *.bench-results .DS_Store *.rcgu.o +.claude/settings.local.json diff --git a/Cargo.lock b/Cargo.lock index 5f9888414..a1775846f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -205,10 +205,19 @@ dependencies = [ "cc", "cfg-if", "constant_time_eq", - "cpufeatures", + "cpufeatures 0.3.0", "rayon-core", ] +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "block2" version = "0.6.2" @@ -388,6 +397,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + [[package]] name = "cpufeatures" version = "0.3.0" @@ -446,6 +464,16 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "debugid" version = "0.8.0" @@ -494,6 +522,16 @@ dependencies = [ "thousands", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "dispatch2" version = "0.3.0" @@ -591,6 +629,16 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.3.4" @@ -870,12 +918,14 @@ dependencies = [ "perf-event", "perfetto-recorder", "rayon", + "sha2", "sharded-offset-map", "sharded-vec-writer 0.4.0", "smallvec", "symbolic-common", "symbolic-demangle", "tempfile", + "text-stub-library", "thread_local", "tracing", "tracing-subscriber", @@ -885,6 +935,12 @@ dependencies = [ "zstd", ] +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + [[package]] name = "linker-diff" version = "0.8.0" @@ -1581,6 +1637,12 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + [[package]] name = "scopeguard" version = "1.2.0" @@ -1658,6 +1720,30 @@ dependencies = [ "serde_core", ] +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures 0.2.17", + "digest", +] + [[package]] name = "sharded-offset-map" version = "0.2.0" @@ -1828,6 +1914,17 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "text-stub-library" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e48070939e80c2662b5dd403a0b09cb97e8467a248d67e373e23f85dbdacd882" +dependencies = [ + "serde", + "serde_yaml", + "yaml-rust", +] + [[package]] name = "thiserror" version = "1.0.69" @@ -1968,6 +2065,12 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + [[package]] name = "unicode-ident" version = "1.0.24" @@ -1998,6 +2101,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3" +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + [[package]] name = "utf8parse" version = "0.2.2" @@ -2015,6 +2124,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "wait-timeout" version = "0.2.1" @@ -2311,6 +2426,15 @@ dependencies = [ "wasmparser", ] +[[package]] +name = "yaml-rust" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" +dependencies = [ + "linked-hash-map", +] + [[package]] name = "zerocopy" version = "0.8.48" diff --git a/Cargo.toml b/Cargo.toml index afc6d7685..4bac13f2f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ edition = "2024" [workspace.dependencies] anyhow = "1.0.97" +sha2 = "0.10" ar = "0.9.0" atomic-take = "1.0.0" bitflags = "2.4.0" diff --git a/libwild/Cargo.toml b/libwild/Cargo.toml index 765d8d9d9..524396832 100644 --- a/libwild/Cargo.toml +++ b/libwild/Cargo.toml @@ -39,6 +39,7 @@ memmap2 = { workspace = true } object = { workspace = true } perfetto-recorder = { workspace = true } rayon = { workspace = true } +sha2 = { workspace = true } sharded-offset-map = { workspace = true } sharded-vec-writer = { workspace = true } smallvec = { workspace = true } @@ -51,6 +52,7 @@ uuid = { workspace = true } winnow = { workspace = true } zerocopy = { workspace = true } zstd = { workspace = true } +text-stub-library = "0.9.0" [target.'cfg(all(target_os = "linux", any(target_arch = "x86_64", target_arch = "aarch64")))'.dependencies] perf-event = { workspace = true } diff --git a/libwild/src/args/macho.rs b/libwild/src/args/macho.rs index fed1fe695..999d93342 100644 --- a/libwild/src/args/macho.rs +++ b/libwild/src/args/macho.rs @@ -1,27 +1,61 @@ -// TODO +// Mach-O argument parsing for the macOS linker driver interface. #![allow(unused_variables)] -#![allow(unused)] -use crate::args::ArgumentParser; use crate::args::CommonArgs; -use crate::args::FILES_PER_GROUP_ENV; +use crate::args::Input; +use crate::args::InputSpec; use crate::args::Modifiers; -use crate::args::REFERENCE_LINKER_ENV; use crate::args::RelocationModel; -use crate::ensure; +use crate::error::Context as _; use crate::error::Result; use crate::platform; -use crate::save_dir::SaveDir; -use jobserver::Client; use std::path::Path; +use std::path::PathBuf; use std::sync::Arc; #[derive(Debug)] pub struct MachOArgs { pub(crate) common: super::CommonArgs, - pub(crate) output: Arc, pub(crate) relocation_model: RelocationModel, + pub(crate) lib_search_paths: Vec>, + pub(crate) syslibroot: Option>, + pub(crate) entry_symbol: Option>, + pub(crate) explicit_entry: bool, + pub(crate) strip_locals: bool, + pub(crate) is_dylib: bool, + pub(crate) is_relocatable: bool, + #[allow(dead_code)] + pub(crate) install_name: Option>, + /// Additional dylibs to emit LC_LOAD_DYLIB for (from -l flags resolving to .tbd stubs). + pub(crate) extra_dylibs: Vec>, + /// Symbols to force as undefined (-u flag), triggering archive member loading. + pub(crate) force_undefined: Vec, + /// Symbols exported by linked dylibs (from .tbd parsing). Used to distinguish + /// undefined symbols that are dylib imports from truly missing symbols. + pub(crate) dylib_symbols: std::collections::HashSet>, + /// Whether to skip ad-hoc code signing (-no_adhoc_codesign). + pub(crate) no_adhoc_codesign: bool, + /// LC_RPATH entries from -rpath flags. + pub(crate) rpaths: Vec>, + /// Whether to omit LC_FUNCTION_STARTS (-no_function_starts). + pub(crate) no_function_starts: bool, + /// Custom stack size from -stack_size. + pub(crate) stack_size: Option, + /// Whether to omit LC_DATA_IN_CODE (-no_data_in_code_info). + pub(crate) no_data_in_code: bool, + /// Minimum OS version for LC_BUILD_VERSION (encoded as Mach-O packed version). + pub(crate) minos: Option, + /// SDK version for LC_BUILD_VERSION (encoded as Mach-O packed version). + pub(crate) sdk_version: Option, + /// The name used for UUID hashing (from -final_output). Falls back to output path. + pub(crate) final_output: Option, + /// Whether to omit LC_UUID. + pub(crate) no_uuid: bool, + /// Whether to emit a random UUID instead of deterministic. + pub(crate) random_uuid: bool, + /// Additional empty sections from -add_empty_section (segname, sectname). + pub(crate) empty_sections: Vec<([u8; 16], [u8; 16])>, } impl MachOArgs { @@ -37,10 +71,30 @@ impl Default for MachOArgs { fn default() -> Self { Self { common: CommonArgs::default(), - - // TODO: move to CommonArgs relocation_model: RelocationModel::NonRelocatable, output: Arc::from(Path::new("a.out")), + lib_search_paths: Vec::new(), + syslibroot: None, + entry_symbol: Some(b"_main".to_vec()), + explicit_entry: false, + strip_locals: false, + is_dylib: false, + is_relocatable: false, + install_name: None, + extra_dylibs: Vec::new(), + force_undefined: Vec::new(), + dylib_symbols: Default::default(), + no_adhoc_codesign: false, + rpaths: Vec::new(), + no_function_starts: false, + stack_size: None, + no_data_in_code: false, + minos: None, + sdk_version: None, + final_output: None, + no_uuid: false, + random_uuid: false, + empty_sections: Vec::new(), } } } @@ -55,49 +109,52 @@ impl platform::Args for MachOArgs { } fn should_strip_debug(&self) -> bool { - todo!() + false } - fn should_strip_all(&self) -> bool { false } fn entry_symbol_name<'a>(&'a self, linker_script_entry: Option<&'a [u8]>) -> &'a [u8] { - // TODO: probably add option - b"_main" + linker_script_entry + .or(self.entry_symbol.as_deref()) + .unwrap_or(b"_main") + } + + fn has_explicit_entry(&self) -> bool { + self.explicit_entry } fn lib_search_path(&self) -> &[Box] { - todo!() + &self.lib_search_paths } fn output(&self) -> &std::sync::Arc { &self.output } - fn common(&self) -> &crate::args::CommonArgs { &self.common } - fn common_mut(&mut self) -> &mut crate::args::CommonArgs { &mut self.common } - fn should_export_all_dynamic_symbols(&self) -> bool { - todo!() + false + } + fn should_export_dynamic(&self, _lib_name: &[u8]) -> bool { + false } - fn should_export_dynamic(&self, lib_name: &[u8]) -> bool { - todo!() + fn force_undefined_symbol_names(&self) -> &[String] { + &self.force_undefined } fn loadable_segment_alignment(&self) -> crate::alignment::Alignment { - todo!() + crate::alignment::Alignment { exponent: 14 } // 16KB pages } fn should_merge_sections(&self) -> bool { - // TODO - true + false } fn relocation_model(&self) -> crate::args::RelocationModel { @@ -105,51 +162,543 @@ impl platform::Args for MachOArgs { } fn should_output_executable(&self) -> bool { - // TODO - true + !self.is_dylib && !self.is_relocatable + } + + fn should_output_partial_object(&self) -> bool { + self.is_relocatable } } -// Parse the supplied input arguments, which should not include the program name. +/// Parse macOS linker arguments. Handles the ld64-compatible flags that clang passes. pub(crate) fn parse, I: Iterator>( args: &mut MachOArgs, mut input: I, ) -> Result { let mut modifier_stack = vec![Modifiers::default()]; - let arg_parser = setup_argument_parser(); while let Some(arg) = input.next() { let arg = arg.as_ref(); - arg_parser.handle_argument(args, &mut modifier_stack, arg, &mut input)?; + // Handle @response files + if let Some(path) = arg.strip_prefix('@') { + let file_args = crate::args::read_args_from_file(Path::new(path))?; + // Re-parse the file contents (simplified - no recursion limit) + let mut file_iter = file_args.iter().map(|s| s.as_str()); + while let Some(file_arg) = file_iter.next() { + parse_one_arg(args, file_arg, &mut file_iter, &mut modifier_stack)?; + } + continue; + } + + parse_one_arg(args, arg, &mut input, &mut modifier_stack)?; + } + + Ok(()) +} + +fn parse_one_arg<'a, S: AsRef, I: Iterator>( + args: &mut MachOArgs, + arg: &str, + input: &mut I, + modifier_stack: &mut Vec, +) -> Result { + // Flags that take a following argument (must be checked before prefix matching) + match arg { + "-help" | "--help" => { + println!("Usage: wild [options] file..."); + println!(" Wild — a fast linker"); + std::process::exit(0); + } + "-o" | "--output" => { + if let Some(val) = input.next() { + args.output = Arc::from(Path::new(val.as_ref())); + } + return Ok(()); + } + "--time" => { + args.common.time_phase_options = Some(Vec::new()); + return Ok(()); + } + "-arch" => { + input.next(); + return Ok(()); + } // consume and ignore + "-syslibroot" => { + if let Some(val) = input.next() { + args.syslibroot = Some(Box::from(Path::new(val.as_ref()))); + } + return Ok(()); + } + "-e" => { + if let Some(val) = input.next() { + args.entry_symbol = Some(val.as_ref().as_bytes().to_vec()); + args.explicit_entry = true; + } + return Ok(()); + } + "-u" => { + if let Some(val) = input.next() { + args.force_undefined.push(val.as_ref().to_string()); + } + return Ok(()); + } + // Flags that take 1 argument, ignored + "-install_name" => { + if let Some(val) = input.next() { + args.install_name = Some(val.as_ref().as_bytes().to_vec()); + } + return Ok(()); + } + "-rpath" => { + if let Some(val) = input.next() { + args.rpaths.push(val.as_ref().as_bytes().to_vec()); + } + return Ok(()); + } + "-lto_library" + | "-mllvm" + | "-headerpad" + | "-compatibility_version" + | "-current_version" + | "-object_path_lto" + | "-order_file" + | "-exported_symbols_list" + | "-unexported_symbols_list" + | "-framework" + | "-weak_framework" + | "-weak_library" + | "-reexport_library" + | "-umbrella" + | "-allowable_client" + | "-client_name" + | "-sub_library" + | "-sub_umbrella" + | "-objc_abi_version" + | "-add_ast_path" + | "-dependency_info" + | "-map" + | "-pagezero_size" + | "-image_base" + | "-oso_prefix" + | "-needed_framework" => { + input.next(); // consume the argument + return Ok(()); + } + // -sectcreate takes 3 arguments: segname sectname file + "-sectcreate" => { + input.next(); // segname + input.next(); // sectname + input.next(); // file + return Ok(()); + } + // -add_empty_section takes 2 arguments: segname sectname + "-add_empty_section" => { + if let (Some(seg), Some(sect)) = (input.next(), input.next()) { + let mut segname = [0u8; 16]; + let mut sectname = [0u8; 16]; + let seg_bytes = seg.as_ref().as_bytes(); + let sect_bytes = sect.as_ref().as_bytes(); + segname[..seg_bytes.len().min(16)] + .copy_from_slice(&seg_bytes[..seg_bytes.len().min(16)]); + sectname[..sect_bytes.len().min(16)] + .copy_from_slice(§_bytes[..sect_bytes.len().min(16)]); + args.empty_sections.push((segname, sectname)); + } + return Ok(()); + } + // -platform_version takes 3 arguments: platform min_version sdk_version + "-platform_version" => { + input.next(); // platform (ignored, always macos) + if let Some(v) = input.next() { + args.minos = Some(parse_macho_version(v.as_ref())); + } + if let Some(v) = input.next() { + args.sdk_version = Some(parse_macho_version(v.as_ref())); + } + return Ok(()); + } + "-macos_version_min" => { + if let Some(v) = input.next() { + args.minos = Some(parse_macho_version(v.as_ref())); + } + return Ok(()); + } + "-force_load" => { + if let Some(val) = input.next() { + let path = Path::new(val.as_ref()); + let mut mods = *modifier_stack.last().unwrap(); + mods.whole_archive = true; + args.common.inputs.push(Input { + spec: InputSpec::File(Box::from(path)), + search_first: None, + modifiers: mods, + }); + } + return Ok(()); + } + // Flags that take 1 argument, ignored (group 2) + "-undefined" | "-multiply_defined" | "-upward-l" | "-alignment" => { + input.next(); + return Ok(()); + } + // No-argument flags, ignored + "-demangle" + | "-dynamic" + | "-no_deduplicate" + | "-no_compact_unwind" + | "-dead_strip" + | "-dead_strip_dylibs" + | "-headerpad_max_install_names" + | "-export_dynamic" + | "-application_extension" + | "-no_objc_category_merging" + | "-mark_dead_strippable_dylib" + | "-ObjC" + | "-no_implicit_dylibs" + | "-search_paths_first" + | "-search_dylibs_first" + | "-two_levelnamespace" + | "-flat_namespace" + | "-bind_at_load" + | "-pie" + | "-no_pie" + | "-execute" + | "-bundle" + | "-no_fixup_chains" + | "-fixup_chains" + | "-adhoc_codesign" + | "-S" + | "-w" + | "-Z" + | "-data_in_code_info" + | "-function_starts" + | "-subsections_via_symbols" + | "-reproducible" => { + return Ok(()); + } + "-all_load" => { + modifier_stack.last_mut().unwrap().whole_archive = true; + return Ok(()); + } + "-noall_load" => { + modifier_stack.last_mut().unwrap().whole_archive = false; + return Ok(()); + } + "-dylib" | "-dynamiclib" => { + args.is_dylib = true; + args.entry_symbol = None; // dylibs have no entry point + return Ok(()); + } + "-x" => { + args.strip_locals = true; + return Ok(()); + } + "-no_adhoc_codesign" => { + args.no_adhoc_codesign = true; + return Ok(()); + } + "-no_function_starts" => { + args.no_function_starts = true; + return Ok(()); + } + "-final_output" => { + if let Some(val) = input.next() { + args.final_output = Some(val.as_ref().to_string()); + } + return Ok(()); + } + "-no_uuid" => { + args.no_uuid = true; + return Ok(()); + } + "-random_uuid" => { + args.random_uuid = true; + return Ok(()); + } + "-no_data_in_code_info" => { + args.no_data_in_code = true; + return Ok(()); + } + "-stack_size" => { + if let Some(val) = input.next() { + let val = val.as_ref(); + args.stack_size = Some( + u64::from_str_radix(val.strip_prefix("0x").unwrap_or(val), 16).unwrap_or(0), + ); + } + return Ok(()); + } + "-r" => { + args.is_relocatable = true; + args.entry_symbol = None; + return Ok(()); + } + "--validate-output" => { + args.common.validate_output = true; + return Ok(()); + } + "-filelist" => { + if let Some(val) = input.next() { + let val = val.as_ref(); + // -filelist [,] + let (file_path, prefix) = if let Some(comma) = val.find(',') { + (&val[..comma], Some(&val[comma + 1..])) + } else { + (val, None) + }; + let content = std::fs::read_to_string(file_path) + .with_context(|| format!("Failed to read filelist `{file_path}`"))?; + for line in content.lines() { + let line = line.trim(); + if line.is_empty() { + continue; + } + let path = if let Some(dir) = prefix { + Path::new(dir).join(line) + } else { + PathBuf::from(line) + }; + args.common.inputs.push(Input { + spec: InputSpec::File(Box::from(path.as_path())), + search_first: None, + modifiers: *modifier_stack.last().unwrap(), + }); + } + } + return Ok(()); + } + _ => {} + } + + // Handle --time= form + if let Some(val) = arg.strip_prefix("--time=") { + args.common.time_phase_options = Some(super::parse_time_phase_options(val)?); + return Ok(()); + } + + // -L (library search path) + if let Some(path) = arg.strip_prefix("-L") { + if path.is_empty() { + if let Some(val) = input.next() { + args.lib_search_paths + .push(Box::from(Path::new(val.as_ref()))); + } + } else { + args.lib_search_paths.push(Box::from(Path::new(path))); + } + return Ok(()); + } + + // -F (framework search path) — ignore for now + if arg.strip_prefix("-F").is_some() { + return Ok(()); + } + + // -U (allow undefined, dynamic lookup) + if arg == "-U" { + input.next(); + return Ok(()); + } + + // Prefix link flags: -hidden-l, -needed-l, -reexport-l, -weak-l + if arg.starts_with("-hidden-l") + || arg.starts_with("-needed-l") + || arg.starts_with("-reexport-l") + || arg.starts_with("-weak-l") + { + return Ok(()); + } + + // -l (link library) -- must come after -lto_library check above + if let Some(lib) = arg.strip_prefix("-l") { + if !lib.is_empty() { + // On macOS, libSystem is implicitly linked (we emit LC_LOAD_DYLIB for it). + // Skip it and other system dylibs that we handle implicitly, but still + // parse their .tbd to know which symbols they export. + if lib == "System" || lib == "c" || lib == "m" || lib == "pthread" { + // Still parse .tbd for symbol resolution (including re-exported libs) + let mut search_paths: Vec> = args.lib_search_paths.clone(); + if let Some(ref root) = args.syslibroot { + search_paths.push(Box::from(root.join("usr/lib"))); + } + for dir in &search_paths { + let tbd_path = dir.join(format!("lib{lib}.tbd")); + if tbd_path.exists() { + collect_tbd_symbols(&tbd_path, &mut args.dylib_symbols); + // Also collect from re-exported libraries (e.g. libSystem + // re-exports libdyld, libsystem_c, etc. from system/ subdir) + let system_dir = dir.join("system"); + if system_dir.is_dir() { + if let Ok(entries) = std::fs::read_dir(&system_dir) { + for entry in entries.flatten() { + let p = entry.path(); + if p.extension().map_or(false, |e| e == "tbd") { + collect_tbd_symbols(&p, &mut args.dylib_symbols); + } + } + } + } + break; + } + } + return Ok(()); + } + // Try to find the library on the search path, including syslibroot + let mut found = false; + let extensions = [".tbd", ".dylib", ".a"]; + let mut search_paths: Vec> = args.lib_search_paths.clone(); + if let Some(ref root) = args.syslibroot { + search_paths.push(Box::from(root.join("usr/lib"))); + search_paths.push(Box::from(root.join("usr/lib/swift"))); + } + for ext in &extensions { + let filename = format!("lib{lib}{ext}"); + for dir in &search_paths { + let path = dir.join(&filename); + if path.exists() { + // .tbd files are text-based dylib stubs. Parse the + // install-name so we can emit LC_LOAD_DYLIB for it. + if *ext == ".tbd" { + if let Some(dylib_path) = parse_tbd_install_name(&path) { + if !args.extra_dylibs.contains(&dylib_path) { + args.extra_dylibs.push(dylib_path); + } + } + collect_tbd_symbols(&path, &mut args.dylib_symbols); + found = true; + break; + } + if *ext == ".dylib" { + // For .dylib files found via -l, emit LC_LOAD_DYLIB + // using the file's install name (from LC_ID_DYLIB). + // For simplicity, use the path as the install name. + let install = path.to_string_lossy().as_bytes().to_vec(); + if !args.extra_dylibs.contains(&install) { + args.extra_dylibs.push(install); + } + found = true; + break; + } + args.common.inputs.push(Input { + spec: InputSpec::File(Box::from(path.as_path())), + search_first: None, + modifiers: *modifier_stack.last().unwrap(), + }); + found = true; + break; + } + } + if found { + break; + } + } + // If not found, warn but don't error (might be a system dylib we handle implicitly) + if !found { + tracing::warn!("library not found: -l{lib}"); + } + } + return Ok(()); + } + + // Unknown flags starting with - go to unrecognized + if arg.starts_with('-') { + args.common.unrecognized_options.push(arg.to_owned()); + return Ok(()); } + // Positional argument = input file + args.common.save_dir.handle_file(arg); + args.common.inputs.push(Input { + spec: InputSpec::File(Box::from(Path::new(arg))), + search_first: None, + modifiers: *modifier_stack.last().unwrap(), + }); + Ok(()) } -fn setup_argument_parser() -> ArgumentParser { - let mut parser = ArgumentParser::::new(); - - parser - .declare_with_param() - .long("output") - .short("o") - .help("Set the output filename") - .execute(|args, _modifier_stack, value| { - args.output = Arc::from(Path::new(value)); - Ok(()) - }); - parser - .declare_with_optional_param() - .long("time") - .help("Show timing information") - .execute(|args, _modifier_stack, value| { - args.common.time_phase_options = match value { - Some(v) => Some(super::parse_time_phase_options(v)?), - None => Some(Vec::new()), - }; - Ok(()) - }); - - parser +/// Extract `install-name` from a .tbd (text-based dylib stub) file. +fn parse_tbd_install_name(path: &Path) -> Option> { + let content = std::fs::read_to_string(path).ok()?; + for line in content.lines() { + let trimmed = line.trim(); + if let Some(rest) = trimmed.strip_prefix("install-name:") { + let name = rest.trim().trim_matches('\'').trim_matches('"'); + if !name.is_empty() { + return Some(name.as_bytes().to_vec()); + } + } + } + None +} + +/// Parse a Mach-O version string like "10.9" or "13.5.1" into packed u32 format: +/// major<<16 | minor<<8 | patch. +fn parse_macho_version(s: &str) -> u32 { + let mut parts = s.split('.'); + let major = parts + .next() + .and_then(|p| p.parse::().ok()) + .unwrap_or(0); + let minor = parts + .next() + .and_then(|p| p.parse::().ok()) + .unwrap_or(0); + let patch = parts + .next() + .and_then(|p| p.parse::().ok()) + .unwrap_or(0); + (major << 16) | (minor << 8) | patch +} + +/// Collect exported symbols from a .tbd file into the given set. +fn collect_tbd_symbols(path: &Path, symbols: &mut std::collections::HashSet>) { + let content = match std::fs::read_to_string(path) { + Ok(c) => c, + Err(_) => return, + }; + let records = match text_stub_library::parse_str(&content) { + Ok(r) => r, + Err(_) => return, + }; + for record in &records { + match record { + text_stub_library::TbdVersionedRecord::V4(v4) => { + let is_arm64 = |targets: &[String]| -> bool { + targets.is_empty() + || targets + .iter() + .any(|t| t.starts_with("arm64-") || t.starts_with("arm64e-")) + }; + for exp in &v4.exports { + if !is_arm64(&exp.targets) { + continue; + } + for sym in &exp.symbols { + symbols.insert(sym.as_bytes().to_vec()); + } + for sym in &exp.weak_symbols { + symbols.insert(sym.as_bytes().to_vec()); + } + } + for exp in &v4.re_exports { + if !is_arm64(&exp.targets) { + continue; + } + for sym in &exp.symbols { + symbols.insert(sym.as_bytes().to_vec()); + } + } + } + text_stub_library::TbdVersionedRecord::V3(v3) => { + for exp in &v3.exports { + for sym in &exp.symbols { + symbols.insert(sym.as_bytes().to_vec()); + } + } + } + _ => {} + } + } } diff --git a/libwild/src/eh_frame.rs b/libwild/src/eh_frame.rs new file mode 100644 index 000000000..a5e246c8c --- /dev/null +++ b/libwild/src/eh_frame.rs @@ -0,0 +1,60 @@ +//! Shared __eh_frame types and parsing logic. +//! +//! The CIE/FDE format is identical between ELF and Mach-O. This module provides +//! platform-generic types and a parsing function that both can reuse. + +use crate::platform::FrameIndex; +use crate::platform::Relocation; +use crate::symbol_db::SymbolId; +use smallvec::SmallVec; +use zerocopy::FromBytes; + +/// Prefix of every CIE or FDE entry in __eh_frame. +/// This format is identical between ELF and Mach-O. +#[derive(FromBytes, Clone, Copy)] +#[repr(C)] +pub(crate) struct EhFrameEntryPrefix { + pub(crate) length: u32, + pub(crate) cie_id: u32, +} + +/// The offset of the pc_begin field in an FDE (after the length + cie_pointer). +pub(crate) const FDE_PC_BEGIN_OFFSET: usize = 8; + +/// A stored exception frame (CIE or FDE) with its associated relocations. +/// +/// `R` is the concrete relocation type. The relocations are stored as a +/// subsequence of the parent sequence, parameterized by `R::Sequence<'data>`. +#[derive(Default)] +pub(crate) struct ExceptionFrame<'data, R: Relocation> { + /// The relocations that need to be processed if we load this frame. + pub(crate) relocations: R::Sequence<'data>, + + /// Number of bytes required to store this frame. + pub(crate) frame_size: u32, + + /// The index of the previous frame that is for the same section. + pub(crate) previous_frame_for_section: Option, +} + +/// Accumulated sizes for eh_frame output. +pub(crate) struct EhFrameSizes { + pub(crate) num_frames: u64, + pub(crate) eh_frame_size: u64, +} + +/// A "common information entry". Part of __eh_frame data. +#[derive(PartialEq, Eq, Hash)] +pub(crate) struct Cie<'data> { + pub(crate) bytes: &'data [u8], + pub(crate) eligible_for_deduplication: bool, + pub(crate) referenced_symbols: SmallVec<[SymbolId; 1]>, +} + +/// A CIE stored with its offset within __eh_frame. +pub(crate) struct CieAtOffset<'data> { + /// Offset within __eh_frame. + #[allow(dead_code)] + pub(crate) offset: u32, + pub(crate) cie: Cie<'data>, +} diff --git a/libwild/src/elf.rs b/libwild/src/elf.rs index d8ebb8766..cb98ac5c4 100644 --- a/libwild/src/elf.rs +++ b/libwild/src/elf.rs @@ -2473,6 +2473,11 @@ fn process_eh_frame_relocations<'data, 'scope, A: Arch, R: Reloc relocations: &R::Sequence<'data>, scope: &Scope<'scope>, ) -> Result>> { + // NOTE: This function keeps its original inline implementation rather than + // delegating to eh_frame::parse_eh_frame_entries because the Rust type + // system can't prove that without adding a bound + // to the Relocation trait. The generic function IS used by Mach-O. + // TODO: Add the round-trip bound to Relocation and unify. const PREFIX_LEN: usize = size_of::(); let mut rel_iter = relocations.rel_iter().enumerate().peekable(); @@ -2480,10 +2485,6 @@ fn process_eh_frame_relocations<'data, 'scope, A: Arch, R: Reloc let mut exception_frames = Vec::new(); while offset + PREFIX_LEN <= data.len() { - // Although the section data will be aligned within the object file, there's - // no guarantee that the object is aligned within the archive to any more - // than 2 bytes, so we can't rely on alignment here. Archives are annoying! - // See https://www.airs.com/blog/archives/170 let prefix = EhFrameEntryPrefix::read_from_bytes(&data[offset..offset + PREFIX_LEN]).unwrap(); let size = size_of_val(&prefix.length) + prefix.length as usize; @@ -2494,21 +2495,14 @@ fn process_eh_frame_relocations<'data, 'scope, A: Arch, R: Reloc } if prefix.cie_id == 0 { - // This is a CIE let mut referenced_symbols: SmallVec<[SymbolId; 1]> = Default::default(); - // When deduplicating CIEs, we take into consideration the bytes of the CIE and all the - // symbols it references. If however, it references something other than a symbol, then, - // because we're not taking that into consideration, we disallow deduplication. let mut eligible_for_deduplication = true; while let Some((_, rel)) = rel_iter.peek() { let rel_offset = rel.offset(); if rel_offset >= next_offset as u64 { - // This relocation belongs to the next entry. break; } - // We currently always load all CIEs, so any relocations found in CIEs always need - // to be processed. process_relocation:: as RelocationSequence>::Rel>( object, common, @@ -2539,7 +2533,6 @@ fn process_eh_frame_relocations<'data, 'scope, A: Arch, R: Reloc }, }); } else { - // This is an FDE let mut section_index = None; let rel_start_index = rel_iter.peek().map_or(0, |(i, _)| *i); let mut rel_end_index = 0; @@ -2564,9 +2557,6 @@ fn process_eh_frame_relocations<'data, 'scope, A: Arch, R: Reloc && let Some(unloaded) = object.sections[section_index.0].unloaded_mut() { let frame_index = FrameIndex::from_usize(exception_frames.len()); - - // Update our unloaded section to point to our new frame. Our frame will then in - // turn point to whatever the section pointed to before. let previous_frame_for_section = unloaded.last_frame_index.replace(frame_index); exception_frames.push(ExceptionFrame { @@ -2580,9 +2570,6 @@ fn process_eh_frame_relocations<'data, 'scope, A: Arch, R: Reloc } common.format_specific.exception_frame_count += object.format_specific.exception_frames.len(); - - // Allocate space for any remaining bytes in .eh_frame that aren't large enough to constitute an - // actual entry. crtend.o has a single u32 equal to 0 as an end marker. object.format_specific.eh_frame_size += (data.len() - offset) as u64; Ok(exception_frames) @@ -2923,15 +2910,8 @@ pub(crate) struct EhFrameHdrEntry { pub(crate) frame_info_ptr: i32, } -#[derive(FromBytes, Clone, Copy)] -#[repr(C)] -pub(crate) struct EhFrameEntryPrefix { - pub(crate) length: u32, - pub(crate) cie_id: u32, -} - -/// The offset of the pc_begin field in an FDE. -pub(crate) const FDE_PC_BEGIN_OFFSET: usize = 8; +pub(crate) use crate::eh_frame::EhFrameEntryPrefix; +pub(crate) use crate::eh_frame::FDE_PC_BEGIN_OFFSET; /// Offset in the file where we store the program headers. We always store these straight after the /// file header. @@ -3997,21 +3977,8 @@ fn finalise_gnu_version_size<'data>( } } -/// A "common information entry". This is part of the .eh_frame data in ELF. -#[derive(PartialEq, Eq, Hash)] -struct Cie<'data> { - bytes: &'data [u8], - eligible_for_deduplication: bool, - referenced_symbols: SmallVec<[SymbolId; 1]>, -} - -struct CieAtOffset<'data> { - // TODO: Use or remove. I think we need this when we implement deduplication of CIEs. - /// Offset within .eh_frame - #[allow(dead_code)] - offset: u32, - cie: Cie<'data>, -} +use crate::eh_frame::Cie; +use crate::eh_frame::CieAtOffset; enum ExceptionFrames<'data> { Rela(Vec>), @@ -4024,22 +3991,8 @@ impl<'data> Default for ExceptionFrames<'data> { } } -#[derive(Default)] -struct ExceptionFrame<'data, R: Relocation> { - /// The relocations that need to be processed if we load this frame. - relocations: R::Sequence<'data>, - - /// Number of bytes required to store this frame. - frame_size: u32, - - /// The index of the previous frame that is for the same section. - previous_frame_for_section: Option, -} - -struct EhFrameSizes { - num_frames: u64, - eh_frame_size: u64, -} +use crate::eh_frame::EhFrameSizes; +use crate::eh_frame::ExceptionFrame; impl<'data> ExceptionFrames<'data> { fn len(&self) -> usize { diff --git a/libwild/src/file_kind.rs b/libwild/src/file_kind.rs index 262eed43d..26858c410 100644 --- a/libwild/src/file_kind.rs +++ b/libwild/src/file_kind.rs @@ -18,6 +18,7 @@ pub(crate) enum FileKind { ElfObject, ElfDynamic, MachOObject, + FatBinary, Archive, ThinArchive, Text, @@ -72,6 +73,13 @@ impl FileKind { "Expected object file" ); Ok(FileKind::MachOObject) + } else if bytes.len() >= 8 + && (bytes.starts_with(&macho::FAT_MAGIC.to_be_bytes()) + || bytes.starts_with(&macho::FAT_MAGIC_64.to_be_bytes())) + { + // Mach-O universal (fat) binary. Currently not fully supported. + // TODO: extract the arm64 slice and process it. + Ok(FileKind::FatBinary) } else if bytes.is_ascii() { Ok(FileKind::Text) } else if bytes.starts_with(b"BC") { @@ -119,6 +127,7 @@ impl std::fmt::Display for FileKind { FileKind::ElfObject => "ELF object", FileKind::ElfDynamic => "ELF dynamic", FileKind::MachOObject => "MachO object", + FileKind::FatBinary => "fat binary", FileKind::Archive => "archive", FileKind::ThinArchive => "thin archive", FileKind::Text => "text", diff --git a/libwild/src/input_data.rs b/libwild/src/input_data.rs index 2f2d925a3..da7cd461e 100644 --- a/libwild/src/input_data.rs +++ b/libwild/src/input_data.rs @@ -615,6 +615,11 @@ impl<'data, P: Platform> TemporaryState<'data, P> { let kind = FileKind::identify_bytes(&data.bytes)?; match kind { + FileKind::FatBinary => { + // TODO: Extract arm64 slice from universal binary. + // For now, skip fat binaries (e.g. libclang_rt.osx.a). + return Ok(LoadedFileState::Archive(input_file, Vec::new())); + } FileKind::Archive => process_archive(input_file, &Arc::new(file), self), FileKind::ThinArchive => process_thin_archive(input_file, self), FileKind::Text => { @@ -707,7 +712,10 @@ impl<'data, P: Platform> TemporaryState<'data, P> { }))); } - if input_ref.is_archive_entry() && kind != FileKind::ElfObject { + if input_ref.is_archive_entry() + && kind != FileKind::ElfObject + && kind != FileKind::MachOObject + { bail!("Unexpected archive member of kind {kind:?}: {input_ref}"); } diff --git a/libwild/src/layout.rs b/libwild/src/layout.rs index 0933cf0a7..cc41b39c9 100644 --- a/libwild/src/layout.rs +++ b/libwild/src/layout.rs @@ -426,10 +426,12 @@ fn update_dynamic_symbol_resolutions<'data, P: Platform>( }; for (index, sym) in resources.dynamic_symbol_definitions.iter().enumerate() { - let dynamic_symbol_index = NonZeroU32::try_from(epilogue.dynsym_start_index + index as u32) - .expect("Dynamic symbol definitions should start > 0"); - if let Some(res) = &mut resolutions[sym.symbol_id.as_usize()] { - res.dynamic_symbol_index = Some(dynamic_symbol_index); + if let Some(dynamic_symbol_index) = + NonZeroU32::new(epilogue.dynsym_start_index + index as u32) + { + if let Some(res) = &mut resolutions[sym.symbol_id.as_usize()] { + res.dynamic_symbol_index = Some(dynamic_symbol_index); + } } } } @@ -571,6 +573,12 @@ pub(crate) struct SymbolResolutions { resolutions: Vec>>, } +impl SymbolResolutions

{ + pub(crate) fn iter(&self) -> impl Iterator>> { + self.resolutions.iter() + } +} + pub(crate) enum FileLayout<'data, P: Platform> { Prelude(PreludeLayout<'data, P>), Object(ObjectLayout<'data, P>), @@ -1306,6 +1314,12 @@ impl<'data, P: Platform> Layout<'data, P> { return Ok(0); } + // If the user explicitly specified an entry point (via -e), error out. + if self.symbol_db.has_explicit_entry() { + let entry_name = String::from_utf8_lossy(self.symbol_db.entry_symbol_name()); + crate::bail!("undefined entry point symbol: {}", entry_name); + } + // There's no entry point specified, set it to the start of .text. This is pretty weird, // but it's what GNU ld does. let text_layout = self.section_layouts.get(output_section_id::TEXT); @@ -1661,8 +1675,8 @@ fn compute_segment_layout( let r = &complete[id.as_usize()]; let sizes = OutputRecordLayout { - file_size: r.file_end - r.file_start, - mem_size: r.mem_end - r.mem_start, + file_size: r.file_end.saturating_sub(r.file_start), + mem_size: r.mem_end.saturating_sub(r.mem_start), alignment: r.alignment, file_offset: r.file_start, mem_offset: r.mem_start, @@ -3060,7 +3074,7 @@ impl<'data, P: Platform> PreludeLayoutState<'data, P> { } } - if !resources.symbol_db.args.should_output_partial_object() { + if !resources.symbol_db.args.should_output_partial_object() && !keep_segments.is_empty() { // Always keep the program headers segment even though we don't emit any sections in it. keep_segments[0] = true; } @@ -3625,6 +3639,12 @@ impl<'data, P: Platform> ObjectLayoutState<'data, P> { .context("Cannot parse .riscv.attributes section")?; } + // For whole-archive members, ensure all defined symbols get DIRECT flag + // so they receive resolutions and appear in the output symbol table. + if self.input.file.modifiers.whole_archive { + self.load_all_defined_symbols::(common, resources, queue, scope)?; + } + let export_all_dynamic = resources.symbol_db.output_kind == OutputKind::SharedObject && (!self.input.has_archive_semantics() || resources @@ -3903,7 +3923,9 @@ impl<'data, P: Platform> ObjectLayoutState<'data, P> { .symbol_section(local_symbol, local_symbol_index)? { if let Some(section_address) = section_resolutions[section_index.0].address() { - let input_offset = local_symbol.value(); + let input_offset = self + .object + .symbol_value_in_section(local_symbol, section_index)?; let output_offset = opt_input_to_output( self.section_relax_deltas.get(section_index.0), input_offset, @@ -3963,6 +3985,37 @@ impl<'data, P: Platform> ObjectLayoutState<'data, P> { ))) } + /// For whole-archive members, set DIRECT on all defined symbols so they + /// get resolutions during finalisation and appear in the output. + fn load_all_defined_symbols<'scope, A: Arch>( + &mut self, + common: &mut CommonGroupState<'data, P>, + resources: &'scope GraphResources<'data, 'scope, P>, + queue: &mut LocalWorkQueue, + scope: &Scope<'scope>, + ) -> Result { + for (sym_index, sym) in self.object.enumerate_symbols() { + if sym.is_undefined() || sym.is_common() { + continue; + } + // Skip symbols in discarded sections (e.g. __compact_unwind). + if let Ok(Some(sec_idx)) = self.object.symbol_section(sym, sym_index) { + if matches!(self.sections.get(sec_idx.0), Some(SectionSlot::Discard)) { + continue; + } + } + let symbol_id = self.symbol_id_range().input_to_id(sym_index); + let old_flags = resources + .per_symbol_flags + .get_atomic(symbol_id) + .fetch_or(ValueFlags::DIRECT); + if !old_flags.has_resolution() { + self.load_symbol::(common, symbol_id, resources, queue, scope)?; + } + } + Ok(()) + } + fn load_non_hidden_symbols<'scope, A: Arch>( &mut self, common: &mut CommonGroupState<'data, P>, @@ -4595,6 +4648,9 @@ fn layout_section_parts( file_offset = segment_alignment.align_modulo(mem_offset, file_offset as u64) as usize; } else { + // Page-align file_offset at segment boundary. + // This ensures segments don't share pages in the output file. + file_offset = segment_alignment.align_up(file_offset as u64) as usize; mem_offset = segment_alignment.align_modulo(file_offset as u64, mem_offset); } } diff --git a/libwild/src/lib.rs b/libwild/src/lib.rs index 3d68918fa..0a0d13022 100644 --- a/libwild/src/lib.rs +++ b/libwild/src/lib.rs @@ -7,6 +7,7 @@ pub(crate) mod debug_trace; pub(crate) mod diagnostics; pub(crate) mod diff; pub(crate) mod dwarf_address_info; +pub(crate) mod eh_frame; pub(crate) mod elf; pub(crate) mod elf_aarch64; pub(crate) mod elf_loongarch64; diff --git a/libwild/src/macho.rs b/libwild/src/macho.rs index 090f2799e..c55521453 100644 --- a/libwild/src/macho.rs +++ b/libwild/src/macho.rs @@ -1,66 +1,32 @@ -// TODO -#![allow(unused_variables)] -#![allow(unused)] +// Mach-O platform support for wild linker. +#![allow(unused_variables, dead_code)] use crate::OutputKind; -use crate::alignment; use crate::args::macho::MachOArgs; use crate::ensure; use crate::error; -use crate::error::Result; -use crate::file_writer::copy_section_data; -use crate::layout::Layout; -use crate::layout::OutputRecordLayout; -use crate::layout_rules::SectionKind; -use crate::layout_rules::SectionRule; -use crate::macho_writer; -use crate::output_section_id; -use crate::output_section_id::NUM_BUILT_IN_SECTIONS; -use crate::output_section_id::OrderEvent; -use crate::output_section_id::OutputOrderBuilder; -use crate::output_section_id::SectionName; -use crate::output_section_id::SectionOutputInfo; -use crate::part_id; use crate::platform; -use crate::symbol_db::Visibility; -use linker_utils::elf::secnames; +use crate::platform::SectionAttributes as _; use object::Endianness; use object::macho; -use object::macho::N_ABS; -use object::macho::N_EXT; -use object::macho::N_PEXT; -use object::macho::N_TYPE; -use object::macho::N_WEAK_DEF; -use object::macho::SEG_DATA; -use object::macho::SEG_LINKEDIT; -use object::macho::SEG_PAGEZERO; -use object::macho::SEG_TEXT; -use object::macho::Section64; use object::read::macho::MachHeader; use object::read::macho::Nlist; -use object::read::macho::Section; -use object::read::macho::Segment; -use std::borrow::Cow; +use object::read::macho::Section as MachOSectionTrait; +use object::read::macho::Segment as MachOSegmentTrait; #[derive(Debug, Copy, Clone)] pub(crate) struct MachO; const LE: Endianness = Endianness::Little; -/// Mach-O uses a zero page for all 32bit addresses and thus we begin the memory -/// offsets right after that (1GiB). -pub const MACHO_START_MEM_ADDRESS: u64 = 0x1_0000_0000; - -type SectionHeader = Section64; -type SectionTable<'data> = &'data [Section64]; +type SectionTable<'data> = &'data [macho::Section64]; type SymbolTable<'data> = object::read::macho::SymbolTable<'data, macho::MachHeader64>; -type SymtabEntry = object::macho::Nlist64; -type Relocation = object::macho::Relocation; +pub(crate) type SymtabEntry = macho::Nlist64; -pub(crate) type FileHeader = object::macho::MachHeader64; -pub(crate) type SegmentCommand = object::macho::SegmentCommand64; -pub(crate) type SectionEntry = object::macho::Section64; -pub(crate) type EntryPointCommand = object::macho::EntryPointCommand; +/// Wraps a Mach-O Section64 so we can implement platform traits on it. +#[derive(Debug, Clone, Copy)] +#[repr(transparent)] +pub(crate) struct SectionHeader(pub(crate) macho::Section64); #[derive(derive_more::Debug)] pub(crate) struct File<'data> { @@ -77,7 +43,7 @@ impl<'data> platform::ObjectFile<'data> for File<'data> { type Platform = MachO; fn parse_bytes(input: &'data [u8], is_dynamic: bool) -> crate::error::Result { - let header = macho::MachHeader64::::parse(input, 0)?; + let header = macho::MachHeader64::::parse(input, 0)?; let mut commands = header.load_commands(LE, input, 0)?; let mut symbols = None; @@ -88,16 +54,17 @@ impl<'data> platform::ObjectFile<'data> for File<'data> { ensure!(symbols.is_none(), "At most one symtab command expected"); symbols = Some(symtab_command.symbols::, _>(LE, input)?); } else if let Some((segment_command, segment_data)) = command.segment_64()? { - ensure!(sections.is_none(), "At most one segment command expected"); - let section_list = segment_command.sections(LE, segment_data)?; - sections = Some(section_list); + // Mach-O object files have a single unnamed segment containing all sections. + if sections.is_none() { + sections = Some(segment_command.sections(LE, segment_data)?); + } } } Ok(File { data: input, symbols: symbols.ok_or("Missing symbol table")?, - sections: sections.ok_or("Missing segment command")?, + sections: sections.unwrap_or(&[]), flags: header.flags(LE), }) } @@ -106,12 +73,10 @@ impl<'data> platform::ObjectFile<'data> for File<'data> { input: &crate::input_data::InputBytes<'data>, args: &::Args, ) -> crate::error::Result { - // TODO Self::parse_bytes(input.data, false) } fn is_dynamic(&self) -> bool { - // TODO false } @@ -120,281 +85,363 @@ impl<'data> platform::ObjectFile<'data> for File<'data> { } fn symbols_iter(&self) -> impl Iterator { - for s in self.symbols.iter() { - let name = s.name(LE, self.symbols.strings()).unwrap(); - // TODO: remove - // dbg!(String::from_utf8_lossy(name)); - } - self.symbols.iter() } - fn symbol( - &self, - index: object::SymbolIndex, - ) -> crate::error::Result<&'data ::SymtabEntry> { - Ok(self.symbols.symbol(index)?) + fn symbol(&self, index: object::SymbolIndex) -> crate::error::Result<&'data SymtabEntry> { + self.symbols + .symbol(index) + .map_err(|e| error!("Symbol index {} out of range: {e}", index.0)) } - fn section_size( - &self, - header: &::SectionHeader, - ) -> crate::error::Result { - Ok(header.size.get(LE)) + fn section_size(&self, header: &SectionHeader) -> crate::error::Result { + Ok(header.0.size(LE)) } - fn symbol_name( - &self, - symbol: &::SymtabEntry, - ) -> crate::error::Result<&'data [u8]> { - Ok(symbol.name(LE, self.symbols.strings())?) + fn symbol_name(&self, symbol: &SymtabEntry) -> crate::error::Result<&'data [u8]> { + symbol + .name(LE, self.symbols.strings()) + .map_err(|e| error!("Failed to read symbol name: {e}")) } fn num_sections(&self) -> usize { self.sections.len() } - fn section_iter(&self) -> ::SectionIterator<'data> { - self.sections.iter() + fn section_iter(&self) -> ::SectionIterator<'data> { + MachOSectionIter { + inner: self.sections.iter(), + } } fn enumerate_sections( &self, - ) -> impl Iterator< - Item = ( - object::SectionIndex, - &'data ::SectionHeader, - ), - > { - self.sections - .iter() - .enumerate() - .map(|(i, section)| (object::SectionIndex(i), section)) - } - - fn section( - &self, - index: object::SectionIndex, - ) -> crate::error::Result<&'data ::SectionHeader> { - self.sections - .get(index.0) - .ok_or(error!("section index out of range")) + ) -> impl Iterator { + self.sections.iter().enumerate().map(|(i, section)| { + // Safety: SectionHeader is #[repr(transparent)] over Section64 + let header: &'data SectionHeader = unsafe { + &*(section as *const macho::Section64 as *const SectionHeader) + }; + (object::SectionIndex(i), header) + }) } - fn section_by_name( - &self, - name: &str, - ) -> Option<( - object::SectionIndex, - &'data ::SectionHeader, - )> { - todo!() + fn section(&self, index: object::SectionIndex) -> crate::error::Result<&'data SectionHeader> { + let section = self + .sections + .get(index.0) + .ok_or_else(|| error!("Section index {} out of range", index.0))?; + Ok(unsafe { &*(section as *const macho::Section64 as *const SectionHeader) }) + } + + fn section_by_name(&self, name: &str) -> Option<(object::SectionIndex, &'data SectionHeader)> { + for (i, section) in self.sections.iter().enumerate() { + let sectname = trim_nul(section.sectname()); + if sectname == name.as_bytes() { + let header: &'data SectionHeader = unsafe { + &*(section as *const macho::Section64 as *const SectionHeader) + }; + return Some((object::SectionIndex(i), header)); + } + } + None } fn symbol_section( &self, - symbol: &::SymtabEntry, + symbol: &SymtabEntry, index: object::SymbolIndex, ) -> crate::error::Result> { - todo!() + let n_type = symbol.n_type() & macho::N_TYPE; + if n_type == macho::N_SECT { + // n_sect is 1-based in Mach-O + let sect = symbol.n_sect(); + if sect == 0 { + return Ok(None); + } + Ok(Some(object::SectionIndex(sect as usize - 1))) + } else { + Ok(None) + } + } + + fn symbol_value_in_section( + &self, + symbol: &SymtabEntry, + section_index: object::SectionIndex, + ) -> crate::error::Result { + let section = &self.sections[section_index.0]; + let section_addr = section.addr.get(LE); + let sym_value = symbol.n_value(LE); + Ok(sym_value.wrapping_sub(section_addr)) } - fn symbol_versions(&self) -> &[::SymbolVersionIndex] { - todo!() + fn symbol_versions(&self) -> &[()] { + // Mach-O doesn't have symbol versioning + &[] } fn dynamic_symbol_used( &self, - symbol_index: object::SymbolIndex, - state: &mut ::DynamicLayoutStateExt<'data>, + _symbol_index: object::SymbolIndex, + _state: &mut (), ) -> crate::error::Result { - todo!() + Ok(()) } fn finalise_sizes_dynamic( &self, - lib_name: &[u8], - state: &mut ::DynamicLayoutStateExt<'data>, - mem_sizes: &mut crate::output_section_part_map::OutputSectionPartMap, + _lib_name: &[u8], + _state: &mut (), + _mem_sizes: &mut crate::output_section_part_map::OutputSectionPartMap, ) -> crate::error::Result { - todo!() + Ok(()) } fn apply_non_addressable_indexes_dynamic( &self, - indexes: &mut ::NonAddressableIndexes, - counts: &mut ::NonAddressableCounts, - state: &mut ::DynamicLayoutStateExt<'data>, + _indexes: &mut NonAddressableIndexes, + _counts: &mut (), + _state: &mut (), ) -> crate::error::Result { - todo!() + Ok(()) } - fn section_name( - &self, - section_header: &'data ::SectionHeader, - ) -> crate::error::Result<&'data [u8]> { - Ok(section_header.name()) + fn section_name(&self, section_header: &SectionHeader) -> crate::error::Result<&'data [u8]> { + for s in self.sections { + if std::ptr::eq( + s as *const macho::Section64, + §ion_header.0 as *const macho::Section64, + ) { + let sectname = trim_nul(s.sectname()); + let segname = trim_nul(&s.segname); + // __const appears in both __TEXT (read-only, no pointers) and + // __DATA (has pointer relocations). Qualify with segment name + // so they map to different output sections. + if sectname == b"__const" && segname == b"__TEXT" { + return Ok(b"__text_const"); + } + return Ok(sectname); + } + } + Err(error!("Section header not found in file's section table")) } - fn raw_section_data( - &self, - section: &::SectionHeader, - ) -> crate::error::Result<&'data [u8]> { - todo!() + fn raw_section_data(&self, section: &SectionHeader) -> crate::error::Result<&'data [u8]> { + let offset = section.0.offset(LE) as usize; + let size = section.0.size(LE) as usize; + if size == 0 { + return Ok(&[]); + } + self.data + .get(offset..offset + size) + .ok_or_else(|| error!("Section data out of range")) } fn section_data( &self, - section: &::SectionHeader, - member: &bumpalo_herd::Member<'data>, - loaded_metrics: &crate::resolution::LoadedMetrics, + section: &SectionHeader, + _member: &bumpalo_herd::Member<'data>, + _loaded_metrics: &crate::resolution::LoadedMetrics, ) -> crate::error::Result<&'data [u8]> { - todo!() + // Mach-O sections are never compressed + self.raw_section_data(section) } - fn copy_section_data(&self, section: &SectionHeader, out: &mut [u8]) -> Result { - let data = section - .data(LE, self.data) - .map_err(|_e| error!("cannot get section data"))?; - copy_section_data(data, out); - + fn copy_section_data(&self, section: &SectionHeader, out: &mut [u8]) -> crate::error::Result { + let data = self.raw_section_data(section)?; + out[..data.len()].copy_from_slice(data); Ok(()) } fn section_data_cow( &self, - section: &::SectionHeader, + section: &SectionHeader, ) -> crate::error::Result> { - todo!() - } - - fn section_alignment( - &self, - section: &::SectionHeader, - ) -> crate::error::Result { - Ok(2u64.pow(section.align(LE))) + Ok(std::borrow::Cow::Borrowed(self.raw_section_data(section)?)) + } + + fn section_alignment(&self, section: &SectionHeader) -> crate::error::Result { + let raw_align = 1u64 << section.0.align(LE); + // __thread_vars descriptors contain pointers and need 8-byte alignment, + // but rustc/clang emit them with align=1. Force minimum 8-byte alignment + // to match ld64 behaviour. + let sec_type = section.0.flags(LE) & 0xFF; + if sec_type == 0x13 { + // S_THREAD_LOCAL_VARIABLES + Ok(raw_align.max(8)) + } else { + Ok(raw_align) + } } fn relocations( &self, index: object::SectionIndex, - relocations: &::RelocationSections, - ) -> crate::error::Result<::RelocationList<'data>> { + _relocations: &(), + ) -> crate::error::Result> { + let section = self + .sections + .get(index.0) + .ok_or_else(|| error!("Section index {} out of range for relocations", index.0))?; + let relocs = section + .relocations(LE, self.data) + .map_err(|e| error!("Failed to read relocations: {e}"))?; Ok(RelocationList { - relocations: self - .sections - .get(index.0) - .ok_or(error!("section index out of range"))? - .relocations(LE, self.data)?, + relocations: relocs, }) } - fn parse_relocations( - &self, - ) -> crate::error::Result<::RelocationSections> { + fn parse_relocations(&self) -> crate::error::Result<()> { + // Mach-O relocations are stored per-section, accessed via `relocations` method Ok(()) } - fn symbol_version_debug(&self, symbol_index: object::SymbolIndex) -> Option { - todo!() + fn symbol_version_debug(&self, _symbol_index: object::SymbolIndex) -> Option { + None } - fn section_display_name(&self, index: object::SectionIndex) -> Cow<'data, str> { - self.section(index) - .and_then(|section| self.section_name(section)) - .map_or_else( - |_| format!("", index.0).into(), - String::from_utf8_lossy, - ) + fn section_display_name(&self, index: object::SectionIndex) -> std::borrow::Cow<'data, str> { + if let Some(section) = self.sections.get(index.0) { + let segname = String::from_utf8_lossy(trim_nul(section.segname())); + let sectname = String::from_utf8_lossy(trim_nul(section.sectname())); + std::borrow::Cow::Owned(format!("{segname},{sectname}")) + } else { + std::borrow::Cow::Borrowed("") + } } - fn dynamic_tag_values( - &self, - ) -> Option<::DynamicTagValues<'data>> { + fn is_symbol_in_common_section(&self, symbol: &SymtabEntry) -> bool { + let n_type = symbol.n_type() & macho::N_TYPE; + if n_type != macho::N_SECT { + return false; + } + let sect = symbol.n_sect(); + if sect == 0 { + return false; + } + if let Some(section) = self.sections.get(sect as usize - 1) { + trim_nul(section.sectname()) == b"__common" + } else { + false + } + } + + fn dynamic_tag_values(&self) -> Option> { None } - fn get_version_names( - &self, - ) -> crate::error::Result<::VersionNames<'data>> { - todo!() + fn get_version_names(&self) -> crate::error::Result<()> { + Ok(()) } fn get_symbol_name_and_version( &self, - symbol: &::SymtabEntry, - local_index: usize, - version_names: &::VersionNames<'data>, - ) -> crate::error::Result<::RawSymbolName<'data>> { - todo!() + symbol: &SymtabEntry, + _local_index: usize, + _version_names: &(), + ) -> crate::error::Result> { + let name = symbol + .name(LE, self.symbols.strings()) + .map_err(|e| error!("Failed to read symbol name: {e}"))?; + Ok(RawSymbolName { name }) } fn should_enforce_undefined( &self, - resources: &crate::layout::GraphResources<'data, '_, Self::Platform>, + _resources: &crate::layout::GraphResources<'data, '_, MachO>, ) -> bool { - todo!() + true } - fn verneed_table( - &self, - ) -> crate::error::Result<::VerneedTable<'data>> { + fn verneed_table(&self) -> crate::error::Result> { Ok(VerneedTable { _phantom: &[] }) } fn process_gnu_note_section( &self, - state: &mut ::ObjectLayoutStateExt<'data>, - section_index: object::SectionIndex, + _state: &mut (), + _section_index: object::SectionIndex, ) -> crate::error::Result { - todo!() + Ok(()) } - fn dynamic_tags( - &self, - ) -> crate::error::Result<&'data [::DynamicEntry]> { - todo!() + fn dynamic_tags(&self) -> crate::error::Result<&'data [()]> { + Ok(&[]) } } +// -- SectionHeader trait impls -- + impl platform::SectionHeader for SectionHeader { fn is_alloc(&self) -> bool { - todo!() + // In Mach-O, all sections in loadable segments are "allocated" + true } fn is_writable(&self) -> bool { - todo!() + // Check segment name: __DATA and __DATA_CONST segments are writable + let segname = trim_nul(self.0.segname()); + segname.starts_with(b"__DATA") } fn is_executable(&self) -> bool { - todo!() + let flags = self.0.flags(LE); + (flags & macho::S_ATTR_PURE_INSTRUCTIONS) != 0 + || (flags & macho::S_ATTR_SOME_INSTRUCTIONS) != 0 } fn is_tls(&self) -> bool { - todo!() + // Only __thread_data and __thread_bss are actual TLS data sections. + // __thread_vars is the descriptor table that lives in regular DATA — + // it must NOT be marked as TLS so it gets a normal section resolution. + let sectname = trim_nul(self.0.sectname()); + sectname == b"__thread_data" || sectname == b"__thread_bss" } fn is_merge_section(&self) -> bool { - // TODO - false + let flags = self.0.flags(LE) & macho::SECTION_TYPE; + flags == macho::S_CSTRING_LITERALS || flags == macho::S_LITERAL_POINTERS } fn is_strings(&self) -> bool { - todo!() + let flags = self.0.flags(LE) & macho::SECTION_TYPE; + flags == macho::S_CSTRING_LITERALS } fn should_retain(&self) -> bool { - // TODO + let sec_type = self.0.flags(LE) & macho::SECTION_TYPE; + let sectname = trim_nul(self.0.sectname()); + // Constructor/destructor function pointer arrays. + if sec_type == macho::S_MOD_INIT_FUNC_POINTERS + || sec_type == macho::S_MOD_TERM_FUNC_POINTERS + { + return true; + } + // Exception handling sections needed for unwinding. + if sectname == b"__eh_frame" || sectname == b"__gcc_except_tab" { + return true; + } false } fn should_exclude(&self) -> bool { - // TODO + let segname = trim_nul(self.0.segname()); + let sectname = trim_nul(self.0.sectname()); + // Debug sections in __DWARF segment are not loaded + if segname == b"__DWARF" { + return true; + } + // __LD segment contains linker-private data (e.g. __compact_unwind) + // that must be consumed by the linker, not emitted to output. + if segname == b"__LD" { + return true; + } false } fn is_group(&self) -> bool { - todo!() + false } fn is_note(&self) -> bool { @@ -402,32 +449,34 @@ impl platform::SectionHeader for SectionHeader { } fn is_prog_bits(&self) -> bool { - todo!() + let section_type = self.0.flags(LE) & macho::SECTION_TYPE; + section_type == macho::S_REGULAR || section_type == macho::S_CSTRING_LITERALS } fn is_no_bits(&self) -> bool { - todo!() + let section_type = self.0.flags(LE) & macho::SECTION_TYPE; + section_type == macho::S_ZEROFILL || section_type == macho::S_GB_ZEROFILL } } #[derive(Debug, Copy, Clone, Default)] -pub(crate) struct SectionType {} +pub(crate) struct SectionType(u32); impl platform::SectionType for SectionType { fn is_rela(&self) -> bool { - todo!() + false } fn is_rel(&self) -> bool { - todo!() + false } fn is_symtab(&self) -> bool { - todo!() + false } fn is_strtab(&self) -> bool { - todo!() + false } } @@ -435,91 +484,125 @@ impl platform::SectionType for SectionType { pub(crate) struct SectionFlags(u32); impl SectionFlags { - #[must_use] - pub const fn empty() -> Self { - Self(0) - } - - #[must_use] - pub const fn from_u32(raw: u32) -> SectionFlags { - SectionFlags(raw) - } - - #[must_use] - pub const fn raw(self) -> u32 { - self.0 + pub(crate) fn from_header(header: &SectionHeader) -> Self { + SectionFlags(header.0.flags(LE)) } } impl platform::SectionFlags for SectionFlags { fn is_alloc(self) -> bool { + // All Mach-O sections are allocated true } } -// Documentation link for Nlist64 type: https://leopard-adc.pepas.com/documentation/DeveloperTools/Conceptual/MachORuntime/Reference/reference.html impl platform::Symbol for SymtabEntry { fn as_common(&self) -> Option { - todo!() + // In Mach-O, common symbols are N_UNDF | N_EXT with n_value > 0 + let n_type = self.n_type(); + if (n_type & macho::N_TYPE) == macho::N_UNDF + && (n_type & macho::N_EXT) != 0 + && self.n_value(LE) > 0 + { + // GET_COMM_ALIGN: alignment is encoded in bits 8-11 of n_desc + let alignment_val = u64::from((self.n_desc(LE) >> 8) & 0x0f); + let alignment = crate::alignment::Alignment::new(if alignment_val > 0 { + 1u64 << alignment_val + } else { + 1 + }) + .unwrap_or(crate::alignment::MIN); + let size = alignment.align_up(self.n_value(LE)); + let output_section_id = crate::output_section_id::BSS; + let part_id = output_section_id.part_id_with_alignment(alignment); + Some(platform::CommonSymbol { size, part_id }) + } else { + None + } } fn is_undefined(&self) -> bool { - Nlist::is_undefined(self) + let n_type = self.n_type(); + // Not a stab, and type is N_UNDF, but NOT a common symbol + // (common symbols are N_UNDF | N_EXT with n_value > 0) + (n_type & macho::N_STAB) == 0 + && (n_type & macho::N_TYPE) == macho::N_UNDF + && !self.is_common() } fn is_local(&self) -> bool { - self.n_type & N_EXT == 0 + let n_type = self.n_type(); + // Not external and not a stab entry + (n_type & macho::N_STAB) == 0 && (n_type & macho::N_EXT) == 0 } fn is_absolute(&self) -> bool { - self.n_type & N_TYPE == N_ABS + (self.n_type() & macho::N_TYPE) == macho::N_ABS } fn is_weak(&self) -> bool { - self.n_desc.get(LE) & N_WEAK_DEF != 0 + (self.n_desc(LE) & (macho::N_WEAK_DEF | macho::N_WEAK_REF)) != 0 } fn visibility(&self) -> crate::symbol_db::Visibility { - if self.n_type & N_PEXT != 0 { - Visibility::Hidden + let n_type = self.n_type(); + if (n_type & macho::N_PEXT) != 0 { + crate::symbol_db::Visibility::Hidden + } else if (n_type & macho::N_EXT) != 0 { + crate::symbol_db::Visibility::Default } else { - Visibility::Default + crate::symbol_db::Visibility::Hidden } } fn value(&self) -> u64 { - self.n_value.get(LE) + self.n_value(LE) } fn size(&self) -> u64 { - // TODO + // Mach-O symbols don't have a size field 0 } fn section_index(&self) -> object::SectionIndex { - object::SectionIndex(usize::from(self.n_sect)) + let n_type = self.n_type() & macho::N_TYPE; + if n_type == macho::N_SECT { + // n_sect is 1-based in Mach-O + let sect = self.n_sect(); + if sect > 0 { + return object::SectionIndex(sect as usize - 1); + } + } + object::SectionIndex(0) } fn has_name(&self) -> bool { - self.n_strx.get(LE) != 0 + self.n_strx(LE) != 0 } fn debug_string(&self) -> String { - // TODO - String::new() + format!( + "Nlist64 {{ n_type: 0x{:02x}, n_sect: {}, n_desc: 0x{:04x}, n_value: 0x{:x} }}", + self.n_type(), + self.n_sect(), + self.n_desc(LE), + self.n_value(LE), + ) } fn is_tls(&self) -> bool { - // TODO: derive from section name + // In Mach-O, TLS symbols reference __thread_vars section false } fn is_interposable(&self) -> bool { + // Mach-O two-level namespace means symbols are generally not interposable false } fn is_func(&self) -> bool { - // TODO: derive from section name + // Mach-O doesn't have an explicit function type in nlist. + // We'd need to check the section type, but for now return false. false } @@ -528,7 +611,7 @@ impl platform::Symbol for SymtabEntry { } fn is_hidden(&self) -> bool { - self.visibility() == Visibility::Hidden + (self.n_type() & macho::N_PEXT) != 0 } fn is_gnu_unique(&self) -> bool { @@ -536,22 +619,25 @@ impl platform::Symbol for SymtabEntry { } } +// -- SectionAttributes -- + #[derive(Debug, Copy, Clone, Default)] pub(crate) struct SectionAttributes { - pub(crate) flags: SectionFlags, + flags: u32, + segname: [u8; 16], } impl platform::SectionAttributes for SectionAttributes { type Platform = MachO; fn merge(&mut self, rhs: Self) { - self.flags = SectionFlags::from_u32(self.flags.raw() | rhs.flags.raw()); + self.flags |= rhs.flags; } fn apply( &self, - output_sections: &mut crate::output_section_id::OutputSections, - section_id: crate::output_section_id::OutputSectionId, + _output_sections: &mut crate::output_section_id::OutputSections, + _section_id: crate::output_section_id::OutputSectionId, ) { } @@ -560,11 +646,12 @@ impl platform::SectionAttributes for SectionAttributes { } fn is_alloc(&self) -> bool { - false + true } fn is_executable(&self) -> bool { - false + (self.flags & macho::S_ATTR_PURE_INSTRUCTIONS) != 0 + || (self.flags & macho::S_ATTR_SOME_INSTRUCTIONS) != 0 } fn is_tls(&self) -> bool { @@ -572,57 +659,69 @@ impl platform::SectionAttributes for SectionAttributes { } fn is_writable(&self) -> bool { - false + self.segname.starts_with(b"__DATA") } fn is_no_bits(&self) -> bool { - false + let section_type = self.flags & macho::SECTION_TYPE; + section_type == macho::S_ZEROFILL || section_type == macho::S_GB_ZEROFILL } - fn flags(&self) -> ::SectionFlags { - self.flags + fn flags(&self) -> SectionFlags { + SectionFlags(self.flags) } - fn ty(&self) -> ::SectionType { - SectionType {} + fn ty(&self) -> SectionType { + SectionType(self.flags & macho::SECTION_TYPE) } - fn set_to_default_type(&mut self) {} + fn set_to_default_type(&mut self) { + self.flags = (self.flags & !macho::SECTION_TYPE) | macho::S_REGULAR; + } } +// -- Other platform type stubs -- + pub(crate) struct NonAddressableIndexes {} impl platform::NonAddressableIndexes for NonAddressableIndexes { - fn new(symbol_db: &crate::symbol_db::SymbolDb

) -> Self { + fn new(_symbol_db: &crate::symbol_db::SymbolDb

) -> Self { NonAddressableIndexes {} } } -#[derive(Debug, Copy, Clone, Default, PartialEq)] -pub(crate) enum SegmentType { - Header, - // All load commands are grouped into the segment. - LoadCommands, - // Sections belonging to __TEXT segment. - Text, - // Sections belonging to __DATA segment. - Data, - // Sections belonging to __DATA_CONST segment. - DataConst, - #[default] - Misc, -} +#[derive(Debug, Copy, Clone, Default)] +pub(crate) struct SegmentType {} impl platform::SegmentType for SegmentType {} -#[derive(Debug, Copy, Clone, Default, PartialEq)] +#[derive(Debug, Copy, Clone, Default)] pub(crate) struct ProgramSegmentDef { - pub(crate) segment_type: SegmentType, + pub(crate) writable: bool, + pub(crate) executable: bool, } +/// __TEXT segment: r-x, contains headers + code + read-only data +const TEXT_SEGMENT_DEF: ProgramSegmentDef = ProgramSegmentDef { + writable: false, + executable: true, +}; + +/// __DATA segment: rw-, contains writable data + GOT + BSS +const DATA_SEGMENT_DEF: ProgramSegmentDef = ProgramSegmentDef { + writable: true, + executable: false, +}; + +const MACHO_SEGMENT_DEFS: &[ProgramSegmentDef] = &[TEXT_SEGMENT_DEF, DATA_SEGMENT_DEF]; + impl std::fmt::Display for ProgramSegmentDef { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self.segment_type) + if self.executable { + write!(f, "__TEXT") + } else { + write!(f, "__DATA") + } } } @@ -630,19 +729,19 @@ impl platform::ProgramSegmentDef for ProgramSegmentDef { type Platform = MachO; fn is_writable(self) -> bool { - false + self.writable } fn is_executable(self) -> bool { - false + self.executable } fn always_keep(self) -> bool { - true + true // Both __TEXT and __DATA are always emitted } fn is_loadable(self) -> bool { - false + true // Both are loadable } fn is_stack(self) -> bool { @@ -654,51 +753,47 @@ impl platform::ProgramSegmentDef for ProgramSegmentDef { } fn order_key(self) -> usize { - self.segment_type as usize + if self.executable { 0 } else { 1 } } fn should_include_section( self, - section_info: &crate::output_section_id::SectionOutputInfo, - section_id: crate::output_section_id::OutputSectionId, + section_info: &crate::output_section_id::SectionOutputInfo, + _section_id: crate::output_section_id::OutputSectionId, ) -> bool { - self.segment_type - == match section_id { - output_section_id::FILE_HEADER => SegmentType::Header, - output_section_id::PAGEZERO_SEGMENT - | output_section_id::TEXT_SEGMENT - | output_section_id::DATA_SEGMENT - | output_section_id::LINK_EDIT_SEGMENT - | output_section_id::ENTRY_POINT => SegmentType::LoadCommands, - output_section_id::TEXT | output_section_id::CSTRING => SegmentType::Text, - output_section_id::DATA => SegmentType::Data, - _ => SegmentType::Misc, - } + let attrs = §ion_info.section_attributes; + if !attrs.is_alloc() { + return false; + } + if self.writable { + attrs.is_writable() + } else { + !attrs.is_writable() + } } } -pub(crate) struct BuiltInSectionDetails { - pub(crate) kind: SectionKind<'static>, - pub(crate) section_flags: SectionFlags, - pub(crate) target_segment_type: Option, -} +pub(crate) struct BuiltInSectionDetails {} impl platform::BuiltInSectionDetails for BuiltInSectionDetails {} -const DEFAULT_DEFS: BuiltInSectionDetails = BuiltInSectionDetails { - kind: SectionKind::Primary(SectionName(&[])), - section_flags: SectionFlags::empty(), - target_segment_type: None, -}; +/// Mach-O specific resolution data attached to each resolved symbol. +#[derive(Debug, Copy, Clone, Default)] +pub(crate) struct MachOResolutionExt { + /// GOT entry address (if the symbol needs a GOT slot). + pub(crate) got_address: Option, + /// PLT stub address (if the symbol needs a dynamic call stub). + pub(crate) plt_address: Option, +} #[derive(Default, Debug, Clone, Copy)] pub(crate) struct DynamicTagValues<'data> { - phantom: &'data [u8], + _phantom: &'data [u8], } #[derive(Debug)] pub(crate) struct RelocationList<'data> { - relocations: &'data [Relocation], + pub(crate) relocations: &'data [macho::Relocation], } impl<'data> platform::RelocationList<'data> for RelocationList<'data> { @@ -708,8 +803,8 @@ impl<'data> platform::RelocationList<'data> for RelocationList<'data> { } impl<'data> platform::DynamicTagValues<'data> for DynamicTagValues<'data> { - fn lib_name(&self, input: &crate::input_data::InputRef<'data>) -> &'data [u8] { - todo!() + fn lib_name(&self, _input: &crate::input_data::InputRef<'data>) -> &'data [u8] { + b"" } } @@ -720,7 +815,7 @@ pub(crate) struct RawSymbolName<'data> { impl<'data> platform::RawSymbolName<'data> for RawSymbolName<'data> { fn parse(bytes: &'data [u8]) -> Self { - Self { name: bytes } + RawSymbolName { name: bytes } } fn name(&self) -> &'data [u8] { @@ -732,24 +827,38 @@ impl<'data> platform::RawSymbolName<'data> for RawSymbolName<'data> { } fn is_default(&self) -> bool { - false + true } } impl std::fmt::Display for RawSymbolName<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - todo!() + write!(f, "{}", String::from_utf8_lossy(self.name)) } } pub(crate) struct VerneedTable<'data> { - // TODO _phantom: &'data [u8], } impl<'data> platform::VerneedTable<'data> for VerneedTable<'data> { - fn version_name(&self, local_symbol_index: object::SymbolIndex) -> Option<&'data [u8]> { - todo!() + fn version_name(&self, _local_symbol_index: object::SymbolIndex) -> Option<&'data [u8]> { + None + } +} + +/// Iterator adapter to cast Section64 refs to SectionHeader refs. +pub(crate) struct MachOSectionIter<'data> { + inner: core::slice::Iter<'data, macho::Section64>, +} + +impl<'data> Iterator for MachOSectionIter<'data> { + type Item = &'data SectionHeader; + + fn next(&mut self) -> Option { + self.inner.next().map(|s| unsafe { + &*(s as *const macho::Section64 as *const SectionHeader) + }) } } @@ -773,11 +882,10 @@ impl platform::Platform for MachO { type CommonGroupStateExt = (); type ArchIdentifier = (); type Args = MachOArgs; - type ResolutionExt = (); - type SymtabShndxEntry = (); + type ResolutionExt = MachOResolutionExt; type SymbolVersionIndex = (); type LayoutExt = (); - type SectionIterator<'data> = core::slice::Iter<'data, SectionHeader>; + type SectionIterator<'data> = MachOSectionIter<'data>; type DynamicTagValues<'data> = DynamicTagValues<'data>; type RelocationList<'data> = RelocationList<'data>; type DynamicLayoutStateExt<'data> = (); @@ -789,6 +897,7 @@ impl platform::Platform for MachO { type RawSymbolName<'data> = RawSymbolName<'data>; type VersionNames<'data> = (); type VerneedTable<'data> = VerneedTable<'data>; + type SymtabShndxEntry = u32; fn link_for_arch<'data>( linker: &'data crate::Linker, @@ -801,119 +910,269 @@ impl platform::Platform for MachO { output: &crate::file_writer::Output, layout: &crate::layout::Layout<'data, Self>, ) -> crate::error::Result { - output.write(layout, macho_writer::write::) + // Mach-O writer bypasses SizedOutput but we still need to go through + // Output::write to satisfy the file creation lifecycle. + output.write(layout, |_sized_output, lay| { + crate::macho_writer::write_direct::(lay) + }) } fn section_attributes(header: &Self::SectionHeader) -> Self::SectionAttributes { - Self::SectionAttributes { - ..Default::default() + SectionAttributes { + flags: header.0.flags(LE), + segname: *header.0.segname(), } } fn apply_force_keep_sections( keep_sections: &mut crate::output_section_map::OutputSectionMap, - args: &Self::Args, + _args: &Self::Args, ) { + *keep_sections.get_mut(crate::output_section_id::INIT_ARRAY) = true; + *keep_sections.get_mut(crate::output_section_id::FINI_ARRAY) = true; + // Exception handling sections needed for stack unwinding. + *keep_sections.get_mut(crate::output_section_id::EH_FRAME) = true; + *keep_sections.get_mut(crate::output_section_id::GCC_EXCEPT_TABLE) = true; } fn is_zero_sized_section_content( - section_id: crate::output_section_id::OutputSectionId, + _section_id: crate::output_section_id::OutputSectionId, ) -> bool { - todo!() + false } fn built_in_section_details() -> &'static [Self::BuiltInSectionDetails] { - &SECTION_DEFINITIONS + &[] } fn finalise_group_layout( - memory_offsets: &crate::output_section_part_map::OutputSectionPartMap, + _memory_offsets: &crate::output_section_part_map::OutputSectionPartMap, ) -> Self::GroupLayoutExt { } fn frame_data_base_address( - memory_offsets: &crate::output_section_part_map::OutputSectionPartMap, + _memory_offsets: &crate::output_section_part_map::OutputSectionPartMap, ) -> u64 { - todo!() + 0 + } + + fn start_memory_address(output_kind: crate::output_kind::OutputKind) -> u64 { + if output_kind == crate::output_kind::OutputKind::SharedObject + || output_kind.is_relocatable() + { + 0 // dylibs and relocatables have no PAGEZERO + } else { + 0x1_0000_0000 // PAGEZERO size for executables + } } - fn finalise_find_required_sections(groups: &[crate::layout::GroupState]) {} + fn finalise_find_required_sections(_groups: &[crate::layout::GroupState]) {} fn activate_dynamic<'data>( - state: &mut crate::layout::DynamicLayoutState<'data, Self>, - common: &mut crate::layout::CommonGroupState<'data, Self>, + _state: &mut crate::layout::DynamicLayoutState<'data, Self>, + _common: &mut crate::layout::CommonGroupState<'data, Self>, ) { - todo!() } fn pre_finalise_sizes_prelude<'scope, 'data>( - prelude: &mut crate::layout::PreludeLayoutState<'data, Self>, - common: &mut crate::layout::CommonGroupState<'data, Self>, - resources: &crate::layout::GraphResources<'data, 'scope, Self>, + _prelude: &mut crate::layout::PreludeLayoutState<'data, Self>, + _common: &mut crate::layout::CommonGroupState<'data, Self>, + _resources: &crate::layout::GraphResources<'data, 'scope, Self>, ) { } fn finalise_sizes_dynamic<'data>( - object: &mut crate::layout::DynamicLayoutState<'data, Self>, - common: &mut crate::layout::CommonGroupState<'data, Self>, + _object: &mut crate::layout::DynamicLayoutState<'data, Self>, + _common: &mut crate::layout::CommonGroupState<'data, Self>, ) -> crate::error::Result { - todo!() + Ok(()) } fn finalise_object_sizes<'data>( - object: &mut crate::layout::ObjectLayoutState<'data, Self>, - common: &mut crate::layout::CommonGroupState<'data, Self>, + _object: &mut crate::layout::ObjectLayoutState<'data, Self>, + _common: &mut crate::layout::CommonGroupState<'data, Self>, ) { } fn finalise_object_layout<'data>( - object: &crate::layout::ObjectLayoutState<'data, Self>, - memory_offsets: &mut crate::output_section_part_map::OutputSectionPartMap, + _object: &crate::layout::ObjectLayoutState<'data, Self>, + _memory_offsets: &mut crate::output_section_part_map::OutputSectionPartMap, ) { } fn finalise_layout_dynamic<'data>( - state: &mut crate::layout::DynamicLayoutState<'data, Self>, - memory_offsets: &mut crate::output_section_part_map::OutputSectionPartMap, - resources: &crate::layout::FinaliseLayoutResources<'_, 'data, Self>, - resolutions_out: &mut crate::layout::ResolutionWriter, + _state: &mut crate::layout::DynamicLayoutState<'data, Self>, + _memory_offsets: &mut crate::output_section_part_map::OutputSectionPartMap, + _resources: &crate::layout::FinaliseLayoutResources<'_, 'data, Self>, + _resolutions_out: &mut crate::layout::ResolutionWriter, ) -> crate::error::Result> { - todo!() + Ok(()) } fn take_dynsym_index( - memory_offsets: &mut crate::output_section_part_map::OutputSectionPartMap, - section_layouts: &crate::output_section_map::OutputSectionMap< + _memory_offsets: &mut crate::output_section_part_map::OutputSectionPartMap, + _section_layouts: &crate::output_section_map::OutputSectionMap< crate::layout::OutputRecordLayout, >, ) -> crate::error::Result { - todo!() + // Mach-O doesn't use dynsym indices. Return 1 to satisfy NonZeroU32. + // The value is unused in the Mach-O writer. + Ok(1) } fn compute_object_addresses<'data>( - object: &crate::layout::ObjectLayoutState<'data, Self>, - memory_offsets: &mut crate::output_section_part_map::OutputSectionPartMap, + _object: &crate::layout::ObjectLayoutState<'data, Self>, + _memory_offsets: &mut crate::output_section_part_map::OutputSectionPartMap, ) { - todo!() } fn layout_resources_ext<'data>( - groups: &[crate::grouping::Group<'data, Self>], + _groups: &[crate::grouping::Group<'data, Self>], ) -> Self::LayoutResourcesExt<'data> { } fn load_object_section_relocations<'data, 'scope, A: platform::Arch>( state: &crate::layout::ObjectLayoutState<'data, Self>, - common: &mut crate::layout::CommonGroupState<'data, Self>, + _common: &mut crate::layout::CommonGroupState<'data, Self>, queue: &mut crate::layout::LocalWorkQueue, resources: &'scope crate::layout::GraphResources<'data, '_, Self>, section: crate::layout::Section, scope: &rayon::Scope<'scope>, ) -> crate::error::Result { - // TODO - // for rel in state.relocations(section.index)?.relocations { - // dbg!(rel.info(LE)); - // } + // Scan relocations to discover referenced symbols and trigger loading + // of their containing sections. + let le = object::Endianness::Little; + let input_section = state + .object + .sections + .get(section.index.0) + .ok_or_else(|| crate::error!("Section index out of range"))?; + let relocs = match input_section.relocations(le, state.object.data) { + Ok(r) => r, + Err(_) => return Ok(()), + }; + let mut after_subtractor = false; + for reloc_raw in relocs { + let reloc = reloc_raw.info(le); + if !reloc.r_extern { + continue; + } + if reloc.r_type == 10 { + // ADDEND + continue; + } + if reloc.r_type == 1 { + // SUBTRACTOR + after_subtractor = true; + continue; + } + + let sym_idx = object::SymbolIndex(reloc.r_symbolnum as usize); + let local_symbol_id = state.symbol_id_range.input_to_id(sym_idx); + let symbol_id = resources.symbol_db.definition(local_symbol_id); + + let is_def_undef = resources.symbol_db.is_undefined(symbol_id); + let is_ref_undef = resources.symbol_db.is_undefined(local_symbol_id); + let flags_to_add = match reloc.r_type { + 5 | 6 | 7 => crate::value_flags::ValueFlags::GOT, // GOT_LOAD / POINTER_TO_GOT + 2 if is_def_undef => { + crate::value_flags::ValueFlags::PLT | crate::value_flags::ValueFlags::GOT + } + // UNSIGNED after SUBTRACTOR: personality pointers in __eh_frame CIE + // entries need GOT if the referenced symbol is undefined (from a dylib). + 0 if after_subtractor && is_ref_undef => crate::value_flags::ValueFlags::GOT, + _ => crate::value_flags::ValueFlags::DIRECT, + }; + after_subtractor = false; + let atomic_flags = &resources.per_symbol_flags.get_atomic(symbol_id); + let previous_flags = atomic_flags.fetch_or(flags_to_add); + + // Request this symbol to be loaded (which loads its section) + if !previous_flags.has_resolution() { + queue.send_symbol_request::(symbol_id, resources, scope); + } + + // Check for undefined symbol errors: strong references to symbols + // not found in any input or linked dylib. Only check when we have + // .tbd symbol data (meaning syslibroot was provided and we can + // distinguish dylib imports from truly missing symbols). + if is_def_undef && !resources.symbol_db.args.dylib_symbols.is_empty() { + use object::read::macho::Nlist as _; + let local_sym = state.object.symbols.symbol(sym_idx).ok(); + let is_weak = local_sym.map_or(false, |s| { + (s.n_desc(le) & (macho::N_WEAK_DEF | macho::N_WEAK_REF)) != 0 + }); + if !is_weak { + let sym_name = resources.symbol_db.symbol_name(symbol_id).ok(); + let in_dylib = sym_name.map_or(false, |n| { + resources.symbol_db.args.dylib_symbols.contains(n.bytes()) + }); + // If extra dylibs are linked (e.g. user .dylib files we don't + // parse symbols from), assume the symbol might come from them. + let has_unparsed_dylibs = !resources.symbol_db.args.extra_dylibs.is_empty(); + if !in_dylib && !has_unparsed_dylibs { + let sym_display = resources.symbol_db.symbol_name_for_display(symbol_id); + resources.report_error(crate::error!( + "Undefined symbol {sym_display}, referenced by {}", + state.input, + )); + } + } + } + } + + // Also scan __compact_unwind for personality function references that + // need GOT entries. The personality reloc is at offset 16 within each + // 32-byte entry. We request GOT for undefined personality symbols so + // they get GOT slots allocated during layout. + { + use object::read::macho::MachHeader as _; + use object::read::macho::Segment as _; + if let Ok(header) = + object::macho::MachHeader64::::parse(state.object.data, 0) + { + if let Ok(mut cmds) = header.load_commands(le, state.object.data, 0) { + while let Ok(Some(cmd)) = cmds.next() { + let Ok(Some((seg, seg_data))) = cmd.segment_64() else { + continue; + }; + let Ok(sections) = seg.sections(le, seg_data) else { + continue; + }; + for sec in sections { + let sec_segname = crate::macho::trim_nul(&sec.segname); + let sectname = crate::macho::trim_nul(&sec.sectname); + if sec_segname != b"__LD" || sectname != b"__compact_unwind" { + continue; + } + let relocs = match sec.relocations(le, state.object.data) { + Ok(r) => r, + Err(_) => continue, + }; + for r in relocs { + let ri = r.info(le); + if !ri.r_extern || ri.r_type != 0 { + continue; + } + // Personality is at offset 16 within each 32-byte entry. + if ri.r_address as usize % 32 != 16 { + continue; + } + let sym_idx = object::SymbolIndex(ri.r_symbolnum as usize); + let local_id = state.symbol_id_range.input_to_id(sym_idx); + let sym_id = resources.symbol_db.definition(local_id); + let atomic = &resources.per_symbol_flags.get_atomic(sym_id); + let prev = atomic.fetch_or(crate::value_flags::ValueFlags::GOT); + if !prev.has_resolution() { + queue.send_symbol_request::(sym_id, resources, scope); + } + } + } + } + } + } + } + Ok(()) } @@ -921,18 +1180,25 @@ impl platform::Platform for MachO { symbol_db: &crate::symbol_db::SymbolDb<'data, Self>, symbol_id: crate::symbol_db::SymbolId, ) -> crate::error::Result> { - todo!() + let name = symbol_db.symbol_name(symbol_id)?.bytes(); + Ok(crate::layout::DynamicSymbolDefinition { + symbol_id, + name, + format_specific: (), + }) } fn update_segment_keep_list( - program_segments: &crate::program_segments::ProgramSegments, - keep_segments: &mut [bool], - args: &Self::Args, + _program_segments: &crate::program_segments::ProgramSegments, + _keep_segments: &mut [bool], + _args: &Self::Args, ) { + // Default keep logic is sufficient -- segments with sections are kept automatically. + // The pipeline sets keep_segments[0] = true for the first segment (__TEXT). } fn program_segment_defs() -> &'static [Self::ProgramSegmentDef] { - PROGRAM_SEGMENT_DEFS + MACHO_SEGMENT_DEFS } fn unconditional_segment_defs() -> &'static [Self::ProgramSegmentDef] { @@ -940,32 +1206,96 @@ impl platform::Platform for MachO { } fn create_linker_defined_symbols( - symbols: &mut crate::parsing::InternalSymbolsBuilder, - output_kind: crate::output_kind::OutputKind, - args: &Self::Args, + _symbols: &mut crate::parsing::InternalSymbolsBuilder, + _output_kind: crate::output_kind::OutputKind, + _args: &Self::Args, ) { } fn built_in_section_infos<'data>() -> Vec> { - SECTION_DEFINITIONS - .iter() - .map(|d| SectionOutputInfo { - section_attributes: SectionAttributes { - flags: d.section_flags, - }, - kind: d.kind, - min_alignment: alignment::MIN, + use crate::layout_rules::SectionKind; + use crate::output_section_id::NUM_BUILT_IN_SECTIONS; + use crate::output_section_id::SectionName; + use crate::output_section_id::SectionOutputInfo; + + let mut infos: Vec> = + Vec::with_capacity(NUM_BUILT_IN_SECTIONS); + for _ in 0..NUM_BUILT_IN_SECTIONS { + infos.push(SectionOutputInfo { + kind: SectionKind::Primary(SectionName(b"")), + section_attributes: SectionAttributes::default(), + min_alignment: crate::alignment::MIN, location: None, secondary_order: None, - }) - .collect() + }); + } + + // Provide names/attributes for the regular sections we care about + infos[crate::output_section_id::TEXT.as_usize()] = SectionOutputInfo { + kind: SectionKind::Primary(SectionName(b"__text")), + section_attributes: SectionAttributes { + flags: macho::S_REGULAR | macho::S_ATTR_PURE_INSTRUCTIONS, + segname: *b"__TEXT\0\0\0\0\0\0\0\0\0\0", + }, + min_alignment: crate::alignment::MIN, + location: None, + secondary_order: None, + }; + infos[crate::output_section_id::RODATA.as_usize()] = SectionOutputInfo { + kind: SectionKind::Primary(SectionName(b"__rodata")), + section_attributes: SectionAttributes::default(), + min_alignment: crate::alignment::MIN, + location: None, + secondary_order: None, + }; + infos[crate::output_section_id::DATA.as_usize()] = SectionOutputInfo { + kind: SectionKind::Primary(SectionName(b"__data")), + section_attributes: SectionAttributes { + flags: macho::S_REGULAR, + segname: *b"__DATA\0\0\0\0\0\0\0\0\0\0", + }, + min_alignment: crate::alignment::MIN, + location: None, + secondary_order: None, + }; + infos[crate::output_section_id::GOT.as_usize()] = SectionOutputInfo { + kind: SectionKind::Primary(SectionName(b"__got")), + section_attributes: SectionAttributes { + flags: 0x06, // S_NON_LAZY_SYMBOL_POINTERS + segname: *b"__DATA\0\0\0\0\0\0\0\0\0\0", + }, + min_alignment: crate::alignment::GOT_ENTRY, + location: None, + secondary_order: None, + }; + infos[crate::output_section_id::TDATA.as_usize()] = SectionOutputInfo { + kind: SectionKind::Primary(SectionName(b"__thread_data")), + section_attributes: SectionAttributes { + flags: macho::S_THREAD_LOCAL_REGULAR, + segname: *b"__DATA\0\0\0\0\0\0\0\0\0\0", + }, + min_alignment: crate::alignment::Alignment { exponent: 3 }, // 8-byte align + location: None, + secondary_order: None, + }; + infos[crate::output_section_id::BSS.as_usize()] = SectionOutputInfo { + kind: SectionKind::Primary(SectionName(b"__bss")), + section_attributes: SectionAttributes { + flags: macho::S_ZEROFILL, + segname: *b"__DATA\0\0\0\0\0\0\0\0\0\0", + }, + min_alignment: crate::alignment::MIN, + location: None, + secondary_order: None, + }; + infos } fn create_layout_properties<'data, 'states, 'files, A: platform::Arch>( - args: &Self::Args, - objects: impl Iterator>, - states: impl Iterator> + Clone, + _args: &Self::Args, + _objects: impl Iterator>, + _states: impl Iterator> + Clone, ) -> crate::error::Result where 'data: 'files, @@ -975,136 +1305,116 @@ impl platform::Platform for MachO { } fn load_exception_frame_data<'data, 'scope, A: platform::Arch>( - object: &mut crate::layout::ObjectLayoutState<'data, Self>, - common: &mut crate::layout::CommonGroupState<'data, Self>, - eh_frame_section_index: object::SectionIndex, - resources: &'scope crate::layout::GraphResources<'data, '_, Self>, - queue: &mut crate::layout::LocalWorkQueue, - scope: &rayon::Scope<'scope>, + _object: &mut crate::layout::ObjectLayoutState<'data, Self>, + _common: &mut crate::layout::CommonGroupState<'data, Self>, + _eh_frame_section_index: object::SectionIndex, + _resources: &'scope crate::layout::GraphResources<'data, '_, Self>, + _queue: &mut crate::layout::LocalWorkQueue, + _scope: &rayon::Scope<'scope>, ) -> crate::error::Result { - todo!() + Ok(()) } fn non_empty_section_loaded<'data, 'scope, A: platform::Arch>( - object: &mut crate::layout::ObjectLayoutState<'data, Self>, - common: &mut crate::layout::CommonGroupState<'data, Self>, - queue: &mut crate::layout::LocalWorkQueue, - unloaded: crate::resolution::UnloadedSection, - resources: &'scope crate::layout::GraphResources<'data, 'scope, Self>, - scope: &rayon::Scope<'scope>, + _object: &mut crate::layout::ObjectLayoutState<'data, Self>, + _common: &mut crate::layout::CommonGroupState<'data, Self>, + _queue: &mut crate::layout::LocalWorkQueue, + _unloaded: crate::resolution::UnloadedSection, + _resources: &'scope crate::layout::GraphResources<'data, 'scope, Self>, + _scope: &rayon::Scope<'scope>, ) -> crate::error::Result { Ok(()) } fn new_epilogue_layout( - args: &Self::Args, - output_kind: crate::output_kind::OutputKind, - dynamic_symbol_definitions: &mut [crate::layout::DynamicSymbolDefinition<'_, Self>], + _args: &Self::Args, + _output_kind: crate::output_kind::OutputKind, + _dynamic_symbol_definitions: &mut [crate::layout::DynamicSymbolDefinition<'_, Self>], ) -> Self::EpilogueLayoutExt { } fn apply_non_addressable_indexes_epilogue( - counts: &mut Self::NonAddressableCounts, - state: &mut Self::EpilogueLayoutExt, + _counts: &mut Self::NonAddressableCounts, + _state: &mut Self::EpilogueLayoutExt, ) { } fn apply_non_addressable_indexes<'data, 'groups>( - symbol_db: &crate::symbol_db::SymbolDb<'data, Self>, - counts: &Self::NonAddressableCounts, - mem_sizes_iter: impl Iterator< + _symbol_db: &crate::symbol_db::SymbolDb<'data, Self>, + _counts: &Self::NonAddressableCounts, + _mem_sizes_iter: impl Iterator< Item = &'groups mut crate::output_section_part_map::OutputSectionPartMap, >, ) { } fn finalise_sizes_epilogue<'data>( - state: &mut Self::EpilogueLayoutExt, - mem_sizes: &mut crate::output_section_part_map::OutputSectionPartMap, - dynamic_symbol_definitions: &[crate::layout::DynamicSymbolDefinition<'data, Self>], - properties: &Self::LayoutExt, - symbol_db: &crate::symbol_db::SymbolDb<'data, Self>, + _state: &mut Self::EpilogueLayoutExt, + _mem_sizes: &mut crate::output_section_part_map::OutputSectionPartMap, + _dynamic_symbol_definitions: &[crate::layout::DynamicSymbolDefinition<'data, Self>], + _properties: &Self::LayoutExt, + _symbol_db: &crate::symbol_db::SymbolDb<'data, Self>, ) { } fn finalise_sizes_all<'data>( - mem_sizes: &mut crate::output_section_part_map::OutputSectionPartMap, - symbol_db: &crate::symbol_db::SymbolDb<'data, Self>, + _mem_sizes: &mut crate::output_section_part_map::OutputSectionPartMap, + _symbol_db: &crate::symbol_db::SymbolDb<'data, Self>, ) { } fn apply_late_size_adjustments_epilogue( - state: &mut Self::EpilogueLayoutExt, - current_sizes: &crate::output_section_part_map::OutputSectionPartMap, - extra_sizes: &mut crate::output_section_part_map::OutputSectionPartMap, - dynamic_symbol_defs: &[crate::layout::DynamicSymbolDefinition], - args: &Self::Args, + _state: &mut Self::EpilogueLayoutExt, + _current_sizes: &crate::output_section_part_map::OutputSectionPartMap, + _extra_sizes: &mut crate::output_section_part_map::OutputSectionPartMap, + _dynamic_symbol_defs: &[crate::layout::DynamicSymbolDefinition], + _args: &Self::Args, ) -> crate::error::Result { Ok(()) } fn finalise_layout_epilogue<'data>( - epilogue_state: &mut Self::EpilogueLayoutExt, - memory_offsets: &mut crate::output_section_part_map::OutputSectionPartMap, - symbol_db: &crate::symbol_db::SymbolDb<'data, Self>, - common_state: &Self::LayoutExt, - dynsym_start_index: u32, - dynamic_symbol_defs: &[crate::layout::DynamicSymbolDefinition], + _epilogue_state: &mut Self::EpilogueLayoutExt, + _memory_offsets: &mut crate::output_section_part_map::OutputSectionPartMap, + _symbol_db: &crate::symbol_db::SymbolDb<'data, Self>, + _common_state: &Self::LayoutExt, + _dynsym_start_index: u32, + _dynamic_symbol_defs: &[crate::layout::DynamicSymbolDefinition], ) -> crate::error::Result { Ok(()) } fn is_symbol_non_interposable<'data>( - object: &Self::File<'data>, - args: &Self::Args, - sym: &Self::SymtabEntry, - output_kind: crate::output_kind::OutputKind, - export_list: Option<&crate::export_list::ExportList>, - lib_name: &[u8], - archive_semantics: bool, - is_undefined: bool, + _object: &Self::File<'data>, + _args: &Self::Args, + _sym: &Self::SymtabEntry, + _output_kind: crate::output_kind::OutputKind, + _export_list: Option<&crate::export_list::ExportList>, + _lib_name: &[u8], + _archive_semantics: bool, + _is_undefined: bool, ) -> bool { - // TODO + // Mach-O two-level namespace: symbols are generally non-interposable true } fn allocate_header_sizes( - prelude: &mut crate::layout::PreludeLayoutState, + _prelude: &mut crate::layout::PreludeLayoutState, sizes: &mut crate::output_section_part_map::OutputSectionPartMap, - header_info: &crate::layout::HeaderInfo, - output_sections: &crate::output_section_id::OutputSections, + _header_info: &crate::layout::HeaderInfo, + _output_sections: &crate::output_section_id::OutputSections, ) { - sizes.increment(part_id::FILE_HEADER, size_of::() as u64); - sizes.increment( - part_id::PAGEZERO_SEGMENT, - size_of::() as u64, - ); - sizes.increment( - part_id::TEXT_SEGMENT, - (size_of::() - + size_of::() - * count_sections_for_segment_type(output_sections, SegmentType::Text)) - as u64, - ); - sizes.increment( - part_id::DATA_SEGMENT, - (size_of::() - + size_of::() - * count_sections_for_segment_type(output_sections, SegmentType::Data)) - as u64, - ); - sizes.increment( - part_id::LINK_EDIT_SEGMENT, - size_of::() as u64, - ); - sizes.increment(part_id::ENTRY_POINT, size_of::() as u64); + // Reserve a full page for headers. Mach-O __TEXT segment starts at page 0 and + // includes the headers. Sections start after the headers, page-aligned. + // A full page (16KB) is more than enough for headers + load commands. + sizes.increment(crate::part_id::FILE_HEADER, 0x4000); // 16KB page } fn finalise_sizes_for_symbol<'data>( - common: &mut crate::layout::CommonGroupState<'data, Self>, - symbol_db: &crate::symbol_db::SymbolDb<'data, Self>, - symbol_id: crate::symbol_db::SymbolId, - flags: crate::value_flags::ValueFlags, + _common: &mut crate::layout::CommonGroupState<'data, Self>, + _symbol_db: &crate::symbol_db::SymbolDb<'data, Self>, + _symbol_id: crate::symbol_db::SymbolId, + _flags: crate::value_flags::ValueFlags, ) -> crate::error::Result { Ok(()) } @@ -1112,53 +1422,47 @@ impl platform::Platform for MachO { fn allocate_resolution( flags: crate::value_flags::ValueFlags, mem_sizes: &mut crate::output_section_part_map::OutputSectionPartMap, - output_kind: crate::output_kind::OutputKind, + _output_kind: crate::output_kind::OutputKind, _args: &Self::Args, ) { + if flags.needs_plt() { + // Mach-O stubs are 12 bytes (adrp + ldr + br) + mem_sizes.increment(crate::part_id::PLT_GOT, 12); + // Each stub needs a GOT entry (8 bytes) for the dyld bind target + mem_sizes.increment(crate::part_id::GOT, 8); + } else if flags.needs_got() { + mem_sizes.increment(crate::part_id::GOT, 8); + } } fn allocate_object_symtab_space<'data>( - state: &crate::layout::ObjectLayoutState<'data, Self>, - common: &mut crate::layout::CommonGroupState<'data, Self>, - symbol_db: &crate::symbol_db::SymbolDb<'data, Self>, - per_symbol_flags: &crate::value_flags::AtomicPerSymbolFlags, - ) -> Result { - // TODO - // let mut num_globals = 0; - // let mut strings_size = 0; - // for symbol in state.object.symbols_iter() { - // // TODO: very basic - // num_globals += 1; - // strings_size += state.object.symbol_name(symbol)?.len() + 1; - // } - // let entry_size = size_of::() as u64; - // - // common.allocate(part_id::SYMTAB_GLOBAL, dbg!(num_globals * entry_size)); - // common.allocate(part_id::STRTAB, dbg!(strings_size as u64)); - + _state: &crate::layout::ObjectLayoutState<'data, Self>, + _common: &mut crate::layout::CommonGroupState<'data, Self>, + _symbol_db: &crate::symbol_db::SymbolDb<'data, Self>, + _per_symbol_flags: &crate::value_flags::AtomicPerSymbolFlags, + ) -> crate::error::Result { Ok(()) } fn allocate_internal_symbol( - symbol_id: crate::symbol_db::SymbolId, - def_info: &crate::parsing::InternalSymDefInfo, - sizes: &mut crate::output_section_part_map::OutputSectionPartMap, - symbol_db: &crate::symbol_db::SymbolDb, + _symbol_id: crate::symbol_db::SymbolId, + _def_info: &crate::parsing::InternalSymDefInfo, + _sizes: &mut crate::output_section_part_map::OutputSectionPartMap, + _symbol_db: &crate::symbol_db::SymbolDb, ) -> crate::error::Result { - todo!() + Ok(()) } fn allocate_prelude( - common: &mut crate::layout::CommonGroupState, - symbol_db: &crate::symbol_db::SymbolDb, + _common: &mut crate::layout::CommonGroupState, + _symbol_db: &crate::symbol_db::SymbolDb, ) { - // TODO } fn finalise_prelude_layout<'data>( - prelude: &crate::layout::PreludeLayoutState, - memory_offsets: &mut crate::output_section_part_map::OutputSectionPartMap, - resources: &crate::layout::FinaliseLayoutResources<'_, 'data, Self>, + _prelude: &crate::layout::PreludeLayoutState, + _memory_offsets: &mut crate::output_section_part_map::OutputSectionPartMap, + _resources: &crate::layout::FinaliseLayoutResources<'_, 'data, Self>, ) -> crate::error::Result { Ok(()) } @@ -1169,7 +1473,32 @@ impl platform::Platform for MachO { dynamic_symbol_index: Option, memory_offsets: &mut crate::output_section_part_map::OutputSectionPartMap, ) -> crate::layout::Resolution { - todo!() + let mut got_address = None; + let mut plt_address = None; + + if flags.needs_plt() { + let got_addr = *memory_offsets.get(crate::part_id::GOT); + *memory_offsets.get_mut(crate::part_id::GOT) += 8; + got_address = Some(got_addr); + + let plt_addr = *memory_offsets.get(crate::part_id::PLT_GOT); + *memory_offsets.get_mut(crate::part_id::PLT_GOT) += 12; + plt_address = Some(plt_addr); + } else if flags.needs_got() { + let got_addr = *memory_offsets.get(crate::part_id::GOT); + *memory_offsets.get_mut(crate::part_id::GOT) += 8; + got_address = Some(got_addr); + } + + crate::layout::Resolution { + raw_value, + dynamic_symbol_index, + flags, + format_specific: MachOResolutionExt { + got_address, + plt_address, + }, + } } fn raw_symbol_name<'data>( @@ -1181,7 +1510,7 @@ impl platform::Platform for MachO { } fn default_layout_rules() -> &'static [crate::layout_rules::SectionRule<'static>] { - DEFAULT_SECTION_RULES + MACHO_SECTION_RULES } fn build_output_order_and_program_segments<'data>( @@ -1195,188 +1524,91 @@ impl platform::Platform for MachO { crate::output_section_id::OutputOrder, crate::program_segments::ProgramSegments, ) { - let mut builder = OutputOrderBuilder::::new(output_kind, output_sections, secondary); + use crate::output_section_id; + let mut builder = crate::output_section_id::OutputOrderBuilder::::new( + output_kind, + output_sections, + secondary, + ); - // File header and all load commands. + // __TEXT segment (r-x): headers, code, read-only data, stubs builder.add_section(output_section_id::FILE_HEADER); - builder.add_section(output_section_id::PAGEZERO_SEGMENT); - builder.add_section(output_section_id::TEXT_SEGMENT); - builder.add_section(output_section_id::DATA_SEGMENT); - builder.add_section(output_section_id::ENTRY_POINT); - builder.add_section(output_section_id::LINK_EDIT_SEGMENT); - // Content of the sections (e.g. __text, __data). + builder.add_section(output_section_id::RODATA); // __cstring + builder.add_section(output_section_id::COMMENT); // __literal4/8/16 + builder.add_section(output_section_id::DATA_REL_RO); // __text_const + builder.add_sections(&custom.ro); builder.add_section(output_section_id::TEXT); - builder.add_section(output_section_id::CSTRING); + builder.add_sections(&custom.exec); + builder.add_section(output_section_id::PLT_GOT); // __stubs (call trampolines) + builder.add_section(output_section_id::GCC_EXCEPT_TABLE); + builder.add_section(output_section_id::EH_FRAME); + + // __DATA segment (rw-): writable data, GOT, BSS builder.add_section(output_section_id::DATA); - // The rest (e.g. symbol table, string table). + builder.add_section(output_section_id::CSTRING); // __DATA,__const + builder.add_section(output_section_id::PREINIT_ARRAY); // __thread_vars + builder.add_section(output_section_id::INIT_ARRAY); // __mod_init_func + builder.add_section(output_section_id::FINI_ARRAY); // __mod_term_func + builder.add_sections(&custom.data); + builder.add_section(output_section_id::GOT); + builder.add_section(output_section_id::TDATA); + builder.add_section(output_section_id::TBSS); + builder.add_section(output_section_id::BSS); + builder.add_sections(&custom.bss); builder.build() } - - fn start_memory_address(output_kind: OutputKind) -> u64 { - MACHO_START_MEM_ADDRESS - } } -const SECTION_DEFINITIONS: [BuiltInSectionDetails; NUM_BUILT_IN_SECTIONS] = { - let mut defs: [BuiltInSectionDetails; NUM_BUILT_IN_SECTIONS] = - [DEFAULT_DEFS; NUM_BUILT_IN_SECTIONS]; - - defs[output_section_id::FILE_HEADER.as_usize()] = BuiltInSectionDetails { - kind: SectionKind::Primary(SectionName(b"FILE_HEADER")), - target_segment_type: Some(SegmentType::Header), - ..DEFAULT_DEFS - }; - // Load commands - defs[output_section_id::PAGEZERO_SEGMENT.as_usize()] = BuiltInSectionDetails { - kind: SectionKind::Primary(SectionName(SEG_PAGEZERO.as_bytes())), - target_segment_type: Some(SegmentType::LoadCommands), - ..DEFAULT_DEFS - }; - defs[output_section_id::TEXT_SEGMENT.as_usize()] = BuiltInSectionDetails { - kind: SectionKind::Primary(SectionName(SEG_TEXT.as_bytes())), - target_segment_type: Some(SegmentType::LoadCommands), - section_flags: SectionFlags::from_u32(macho::VM_PROT_READ | macho::VM_PROT_EXECUTE), - ..DEFAULT_DEFS - }; - defs[output_section_id::DATA_SEGMENT.as_usize()] = BuiltInSectionDetails { - kind: SectionKind::Primary(SectionName(SEG_DATA.as_bytes())), - target_segment_type: Some(SegmentType::LoadCommands), - section_flags: SectionFlags::from_u32(macho::VM_PROT_READ | macho::VM_PROT_WRITE), - ..DEFAULT_DEFS - }; - defs[output_section_id::LINK_EDIT_SEGMENT.as_usize()] = BuiltInSectionDetails { - kind: SectionKind::Primary(SectionName(SEG_LINKEDIT.as_bytes())), - target_segment_type: Some(SegmentType::LoadCommands), - ..DEFAULT_DEFS - }; - defs[output_section_id::ENTRY_POINT.as_usize()] = BuiltInSectionDetails { - kind: SectionKind::Primary(SectionName(b"LC_MAIN")), - target_segment_type: Some(SegmentType::LoadCommands), - ..DEFAULT_DEFS - }; - defs[output_section_id::STRTAB.as_usize()] = BuiltInSectionDetails { - kind: SectionKind::Primary(SectionName(secnames::STRTAB_SECTION_NAME)), - ..DEFAULT_DEFS - }; - // Multi-part generated sections - defs[output_section_id::SYMTAB_GLOBAL.as_usize()] = BuiltInSectionDetails { - kind: SectionKind::Secondary(output_section_id::SYMTAB_LOCAL), - ..DEFAULT_DEFS - }; - // Start of regular sections - defs[output_section_id::TEXT.as_usize()] = BuiltInSectionDetails { - kind: SectionKind::Primary(SectionName(b"__text")), - section_flags: SectionFlags::from_u32( - macho::S_REGULAR | macho::S_ATTR_PURE_INSTRUCTIONS | macho::S_ATTR_SOME_INSTRUCTIONS, - ), - ..DEFAULT_DEFS - }; - defs[output_section_id::CSTRING.as_usize()] = BuiltInSectionDetails { - kind: SectionKind::Primary(SectionName(b"__cstring")), - section_flags: SectionFlags::from_u32(macho::S_CSTRING_LITERALS), - ..DEFAULT_DEFS - }; - defs[output_section_id::DATA.as_usize()] = BuiltInSectionDetails { - kind: SectionKind::Primary(SectionName(b"__data")), - section_flags: SectionFlags::from_u32(macho::S_REGULAR), - ..DEFAULT_DEFS - }; - - defs +const MACHO_SECTION_RULES: &[crate::layout_rules::SectionRule<'static>] = { + use crate::layout_rules::SectionRule; + use crate::output_section_id; + &[ + SectionRule::exact_section(b"__text", output_section_id::TEXT), + SectionRule::exact_section(b"__stubs", output_section_id::TEXT), + SectionRule::exact_section(b"__stub_helper", output_section_id::TEXT), + // Each Mach-O section gets a dedicated output section ID where possible. + // Sharing output section IDs between sections with different names can + // cause data overlap when the layout pipeline assigns overlapping parts. + // __DATA,__const has pointer relocations — give it CSTRING (unused regular + // section on Mach-O) to keep it separate from __data (both align 8). + SectionRule::exact_section(b"__const", output_section_id::CSTRING), + SectionRule::exact_section(b"__text_const", output_section_id::DATA_REL_RO), + SectionRule::exact_section(b"__cstring", output_section_id::RODATA), + SectionRule::exact_section(b"__literal4", output_section_id::COMMENT), + SectionRule::exact_section(b"__literal8", output_section_id::COMMENT), + SectionRule::exact_section(b"__literal16", output_section_id::COMMENT), + SectionRule::exact_section(b"__data", output_section_id::DATA), + SectionRule::exact_section(b"__la_symbol_ptr", output_section_id::DATA), + SectionRule::exact_section(b"__nl_symbol_ptr", output_section_id::DATA), + SectionRule::exact_section(b"__got", output_section_id::DATA), + // TLS descriptors go in TDATA (after GOT), init data follows. + // This separates TLS bind fixups from GOT bind fixups in the chain. + // __thread_vars must NOT share the GOT output section — GOT-only entries + // (e.g. for __eh_frame personality pointers) would overlap with TLV descriptors. + // __thread_vars uses PREINIT_ARRAY (unused on Mach-O) as its dedicated + // output section so all thread_vars from all objects are grouped contiguously. + // Using DATA would interleave them with __data from other objects. + SectionRule::exact_section(b"__thread_vars", output_section_id::PREINIT_ARRAY), + SectionRule::exact_section(b"__thread_data", output_section_id::TDATA), + SectionRule::exact_section(b"__thread_bss", output_section_id::TBSS), + // Constructor/destructor function pointer arrays (Mach-O equivalent of + // .init_array/.fini_array) + SectionRule::exact_section(b"__mod_init_func", output_section_id::INIT_ARRAY), + SectionRule::exact_section(b"__mod_term_func", output_section_id::FINI_ARRAY), + SectionRule::exact_section(b"__gcc_except_tab", output_section_id::GCC_EXCEPT_TABLE), + SectionRule::exact_section(b".rustc", output_section_id::DATA), + SectionRule::exact_section(b"__bss", output_section_id::BSS), + SectionRule::exact_section(b"__common", output_section_id::BSS), + SectionRule::exact_section(b"__unwind_info", output_section_id::RODATA), + SectionRule::exact_section(b"__eh_frame", output_section_id::EH_FRAME), + SectionRule::exact_section(b"__compact_unwind", output_section_id::RODATA), + ] }; -// TODO: sort properly -const DEFAULT_SECTION_RULES: &[SectionRule<'static>] = &[ - SectionRule::exact_section_keep(b"__text", crate::output_section_id::TEXT), - SectionRule::exact_section_keep(b"__cstring", crate::output_section_id::CSTRING), - SectionRule::exact_section_keep(b"__data", crate::output_section_id::DATA), - // SectionRule::exact_section_keep(b"__compact_unwind", crate::output_section_id::EH_FRAME), -]; - -const PROGRAM_SEGMENT_DEFS: &[ProgramSegmentDef] = &[ - ProgramSegmentDef { - segment_type: SegmentType::Header, - }, - ProgramSegmentDef { - segment_type: SegmentType::LoadCommands, - }, - ProgramSegmentDef { - segment_type: SegmentType::Text, - }, - ProgramSegmentDef { - segment_type: SegmentType::Data, - }, - ProgramSegmentDef { - segment_type: SegmentType::DataConst, - }, - ProgramSegmentDef { - segment_type: SegmentType::Misc, - }, -]; - -fn count_sections_for_segment_type( - output_sections: &crate::output_section_id::OutputSections, - segment_type: SegmentType, -) -> usize { - let segment_def = ProgramSegmentDef { segment_type }; - output_sections - .ids_with_info() - .filter(|(section_id, _)| { - output_sections.should_include_in_segment(*section_id, segment_def) - }) - .count() -} - -pub(crate) struct SegmentSectionsInfo<'data> { - pub(crate) segment_size: OutputRecordLayout, - pub(crate) segment_sections: - Vec<(OutputRecordLayout, Option>, SectionFlags)>, -} - -pub(crate) fn get_segment_sections<'data>( - layout: &Layout<'data, MachO>, - segment_type: SegmentType, -) -> SegmentSectionsInfo<'data> { - let mut in_matching_segment = false; - let mut sections = Vec::new(); - let mut segment_id = None; - - for event in &layout.output_order { - match event { - OrderEvent::SegmentStart(seg_id) - if layout.program_segments.segment_def(seg_id).segment_type == segment_type => - { - segment_id = Some(seg_id); - in_matching_segment = true; - } - OrderEvent::SegmentEnd(seg_id) - if layout.program_segments.segment_def(seg_id).segment_type == segment_type - && in_matching_segment => - { - break; - } - OrderEvent::Section(section_id) if in_matching_segment => { - let sizes = *layout.section_layouts.get(section_id); - sections.push(( - sizes, - layout.output_sections.name(section_id), - layout.output_sections.section_flags(section_id), - )); - } - _ => {} - } - } - - let segment_id = segment_id.expect("must be visited in the output order"); - SegmentSectionsInfo { - segment_sections: sections, - segment_size: layout - .segment_layouts - .segments - .iter() - .find(|seg| seg.id == segment_id) - .unwrap() - .sizes, - } +/// Trim trailing NUL bytes from a fixed-size Mach-O name field. +pub(crate) fn trim_nul(name: &[u8; 16]) -> &[u8] { + let end = name.iter().position(|&b| b == 0).unwrap_or(16); + // Safety: end <= 16, and the array has 16 elements + &name.as_slice()[..end] } diff --git a/libwild/src/macho_aarch64.rs b/libwild/src/macho_aarch64.rs index 677327957..28a7511c4 100644 --- a/libwild/src/macho_aarch64.rs +++ b/libwild/src/macho_aarch64.rs @@ -1,46 +1,157 @@ -// TODO +// Mach-O ARM64 architecture support. #![allow(unused_variables)] use crate::macho::MachO; +use linker_utils::elf::AArch64Instruction; +use linker_utils::elf::AllowedRange; +use linker_utils::elf::RelocationKind; +use linker_utils::elf::RelocationKindInfo; +use linker_utils::elf::RelocationSize; +use linker_utils::relaxation::RelocationModifier; +use object::macho; pub(crate) struct MachOAArch64; +/// Mach-O ARM64 relocation types mapped to our internal representation. +fn macho_aarch64_relocation_from_raw(r_type: u32) -> Option { + let (kind, size, range, alignment) = match r_type as u8 { + macho::ARM64_RELOC_UNSIGNED => ( + RelocationKind::Absolute, + RelocationSize::ByteSize(8), + AllowedRange::no_check(), + 1, + ), + macho::ARM64_RELOC_BRANCH26 => ( + RelocationKind::Relative, + RelocationSize::bit_mask_aarch64(0, 26, AArch64Instruction::JumpCall), + AllowedRange::from_bit_size(28, linker_utils::elf::Sign::Signed), + 4, + ), + macho::ARM64_RELOC_PAGE21 => ( + RelocationKind::Relative, + RelocationSize::bit_mask_aarch64(12, 33, AArch64Instruction::Adr), + AllowedRange::from_bit_size(33, linker_utils::elf::Sign::Signed), + 1, + ), + macho::ARM64_RELOC_PAGEOFF12 => ( + RelocationKind::AbsoluteLowPart, + RelocationSize::bit_mask_aarch64(0, 12, AArch64Instruction::Add), + AllowedRange::no_check(), + 1, + ), + macho::ARM64_RELOC_GOT_LOAD_PAGE21 => ( + RelocationKind::GotRelative, + RelocationSize::bit_mask_aarch64(12, 33, AArch64Instruction::Adr), + AllowedRange::from_bit_size(33, linker_utils::elf::Sign::Signed), + 1, + ), + macho::ARM64_RELOC_GOT_LOAD_PAGEOFF12 => ( + RelocationKind::GotRelative, + RelocationSize::bit_mask_aarch64(0, 12, AArch64Instruction::LdrRegister), + AllowedRange::no_check(), + 8, + ), + macho::ARM64_RELOC_SUBTRACTOR => ( + RelocationKind::Absolute, + RelocationSize::ByteSize(8), + AllowedRange::no_check(), + 1, + ), + macho::ARM64_RELOC_POINTER_TO_GOT => ( + RelocationKind::GotRelative, + RelocationSize::ByteSize(4), + AllowedRange::from_bit_size(32, linker_utils::elf::Sign::Signed), + 1, + ), + macho::ARM64_RELOC_TLVP_LOAD_PAGE21 => ( + RelocationKind::TlsGd, + RelocationSize::bit_mask_aarch64(12, 33, AArch64Instruction::Adr), + AllowedRange::from_bit_size(33, linker_utils::elf::Sign::Signed), + 1, + ), + macho::ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => ( + RelocationKind::TlsGd, + RelocationSize::bit_mask_aarch64(0, 12, AArch64Instruction::Add), + AllowedRange::no_check(), + 1, + ), + macho::ARM64_RELOC_ADDEND => ( + RelocationKind::None, + RelocationSize::ByteSize(0), + AllowedRange::no_check(), + 1, + ), + _ => return None, + }; + Some(RelocationKindInfo { + kind, + size, + mask: None, + range, + alignment, + bias: 0, + }) +} + +fn macho_aarch64_rel_type_to_string(r_type: u32) -> std::borrow::Cow<'static, str> { + match r_type as u8 { + macho::ARM64_RELOC_UNSIGNED => "ARM64_RELOC_UNSIGNED".into(), + macho::ARM64_RELOC_SUBTRACTOR => "ARM64_RELOC_SUBTRACTOR".into(), + macho::ARM64_RELOC_BRANCH26 => "ARM64_RELOC_BRANCH26".into(), + macho::ARM64_RELOC_PAGE21 => "ARM64_RELOC_PAGE21".into(), + macho::ARM64_RELOC_PAGEOFF12 => "ARM64_RELOC_PAGEOFF12".into(), + macho::ARM64_RELOC_GOT_LOAD_PAGE21 => "ARM64_RELOC_GOT_LOAD_PAGE21".into(), + macho::ARM64_RELOC_GOT_LOAD_PAGEOFF12 => "ARM64_RELOC_GOT_LOAD_PAGEOFF12".into(), + macho::ARM64_RELOC_POINTER_TO_GOT => "ARM64_RELOC_POINTER_TO_GOT".into(), + macho::ARM64_RELOC_TLVP_LOAD_PAGE21 => "ARM64_RELOC_TLVP_LOAD_PAGE21".into(), + macho::ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => "ARM64_RELOC_TLVP_LOAD_PAGEOFF12".into(), + macho::ARM64_RELOC_ADDEND => "ARM64_RELOC_ADDEND".into(), + other => format!("unknown Mach-O ARM64 relocation {other}").into(), + } +} + #[derive(Debug, Clone)] pub(crate) struct Relaxation {} impl crate::platform::Relaxation for Relaxation { - fn apply(&self, section_bytes: &mut [u8], offset_in_section: &mut u64, addend: &mut i64) { - todo!() + fn apply(&self, _section_bytes: &mut [u8], _offset_in_section: &mut u64, _addend: &mut i64) { + // No relaxations for Mach-O yet } - fn rel_info(&self) -> linker_utils::elf::RelocationKindInfo { - todo!() + fn rel_info(&self) -> RelocationKindInfo { + RelocationKindInfo { + kind: RelocationKind::None, + size: RelocationSize::ByteSize(0), + mask: None, + range: AllowedRange::no_check(), + alignment: 1, + bias: 0, + } } fn debug_kind(&self) -> impl std::fmt::Debug { - todo!() + "MachORelaxation(none)" } - fn next_modifier(&self) -> linker_utils::relaxation::RelocationModifier { - todo!() + fn next_modifier(&self) -> RelocationModifier { + RelocationModifier::Normal } fn is_mandatory(&self) -> bool { - todo!() + false } } impl crate::platform::Arch for MachOAArch64 { type Relaxation = Relaxation; - type Platform = MachO; fn arch_identifier() -> ::ArchIdentifier { - todo!() + // Mach-O doesn't use ELF-style arch identifiers } - fn get_dynamic_relocation_type(relocation: linker_utils::elf::DynamicRelocationKind) -> u32 { - todo!() + fn get_dynamic_relocation_type(_relocation: linker_utils::elf::DynamicRelocationKind) -> u32 { + 0 } fn write_plt_entry( @@ -48,54 +159,84 @@ impl crate::platform::Arch for MachOAArch64 { got_address: u64, plt_address: u64, ) -> crate::error::Result { - todo!() - } - - fn relocation_from_raw( - r_type: u32, - ) -> crate::error::Result { - todo!() + // Mach-O __stubs entry: 12 bytes + // adrp x16, GOT_PAGE + // ldr x16, [x16, GOT_OFFSET] + // br x16 + let stub: [u8; 12] = [ + 0x10, 0x00, 0x00, 0x90, // adrp x16, #0 + 0x10, 0x02, 0x40, 0xf9, // ldr x16, [x16] + 0x00, 0x02, 0x1f, 0xd6, // br x16 + ]; + plt_entry[..12].copy_from_slice(&stub); + + // Patch ADRP with page distance to GOT entry + let stub_page = plt_address & !0xFFF; + let got_page = got_address & !0xFFF; + let page_delta = got_page.wrapping_sub(stub_page) as i64 >> 12; + let immlo = ((page_delta & 0x3) as u32) << 29; + let immhi = (((page_delta >> 2) & 0x7_FFFF) as u32) << 5; + let adrp = u32::from_le_bytes(plt_entry[0..4].try_into().unwrap()); + let adrp = (adrp & 0x9F00_001F) | immhi | immlo; + plt_entry[0..4].copy_from_slice(&adrp.to_le_bytes()); + + // Patch LDR with page offset to GOT entry (scaled by 8) + let page_off = ((got_address & 0xFFF) >> 3) as u32; + let ldr = u32::from_le_bytes(plt_entry[4..8].try_into().unwrap()); + let ldr = (ldr & 0xFFC0_03FF) | (page_off << 10); + plt_entry[4..8].copy_from_slice(&ldr.to_le_bytes()); + + Ok(()) + } + + fn relocation_from_raw(r_type: u32) -> crate::error::Result { + macho_aarch64_relocation_from_raw(r_type).ok_or_else(|| { + crate::error!( + "Unsupported Mach-O ARM64 relocation type {}", + macho_aarch64_rel_type_to_string(r_type) + ) + }) } fn rel_type_to_string(r_type: u32) -> std::borrow::Cow<'static, str> { - todo!() + macho_aarch64_rel_type_to_string(r_type) } - fn tp_offset_start(layout: &crate::layout::Layout) -> u64 { - todo!() + fn tp_offset_start(_layout: &crate::layout::Layout) -> u64 { + 0 } - fn get_property_class(property_type: u32) -> Option { - todo!() + fn get_property_class(_property_type: u32) -> Option { + None } - fn merge_eflags(eflags: impl Iterator) -> crate::error::Result { - todo!() + fn merge_eflags(_eflags: impl Iterator) -> crate::error::Result { + Ok(0) } fn high_part_relocations() -> &'static [u32] { - todo!() + &[] } fn get_source_info<'data>( - object: &::File<'data>, - relocations: &::RelocationSections, - section: &::SectionHeader, - offset_in_section: u64, + _object: &::File<'data>, + _relocations: &::RelocationSections, + _section: &::SectionHeader, + _offset_in_section: u64, ) -> crate::error::Result { - todo!() + Ok(crate::platform::SourceInfo(None)) } fn new_relaxation( - relocation_kind: u32, - section_bytes: &[u8], - offset_in_section: u64, - flags: crate::value_flags::ValueFlags, - output_kind: crate::output_kind::OutputKind, - section_flags: ::SectionFlags, - non_zero_address: bool, - relax_deltas: Option<&linker_utils::relaxation::SectionRelaxDeltas>, + _relocation_kind: u32, + _section_bytes: &[u8], + _offset_in_section: u64, + _flags: crate::value_flags::ValueFlags, + _output_kind: crate::output_kind::OutputKind, + _section_flags: ::SectionFlags, + _non_zero_address: bool, + _relax_deltas: Option<&linker_utils::relaxation::SectionRelaxDeltas>, ) -> Option { - todo!() + None } } diff --git a/libwild/src/macho_writer.rs b/libwild/src/macho_writer.rs index b6a9122b3..e1016c3f7 100644 --- a/libwild/src/macho_writer.rs +++ b/libwild/src/macho_writer.rs @@ -1,344 +1,4535 @@ -// TODO -#![allow(unused_variables)] -#![allow(unused)] +// Mach-O output file writer. +// +// Uses the common layout pipeline's symbol resolutions and section addresses +// to produce a Mach-O executable for aarch64-apple-darwin. +#![allow(dead_code)] -use crate::bail; -use crate::error; -use crate::error::Context; use crate::error::Result; -use crate::file_writer::SizedOutput; -use crate::file_writer::split_buffers_by_alignment; -use crate::file_writer::split_output_by_group; -use crate::file_writer::split_output_into_sections; use crate::layout::FileLayout; -use crate::layout::HeaderInfo; use crate::layout::Layout; use crate::layout::ObjectLayout; -use crate::layout::OutputRecordLayout; -use crate::layout::PreludeLayout; -use crate::layout::Section; -use crate::macho::EntryPointCommand; -use crate::macho::FileHeader; -use crate::macho::MACHO_START_MEM_ADDRESS; use crate::macho::MachO; -use crate::macho::SectionEntry; -use crate::macho::SegmentCommand; -use crate::macho::SegmentSectionsInfo; -use crate::macho::SegmentType; -use crate::macho::get_segment_sections; use crate::output_section_id; -use crate::output_section_id::OrderEvent; -use crate::output_section_id::OutputSectionId; -use crate::output_section_id::SectionName; -use crate::output_section_part_map::OutputSectionPartMap; -use crate::output_trace::TraceOutput; -use crate::part_id; use crate::platform::Arch; -use crate::platform::Args; -use crate::platform::ObjectFile; -use crate::resolution::SectionSlot; -use crate::timing_phase; -use crate::verbose_timing_phase; -use object::BigEndian; -use object::Endianness; -use object::U32; -use object::from_bytes_mut; -use object::macho::CPU_TYPE_ARM64; -use object::macho::LC_MAIN; -use object::macho::LC_SEGMENT_64; -use object::macho::MH_CIGAM_64; -use object::macho::MH_EXECUTE; -use object::macho::SEG_DATA; -use object::macho::SEG_LINKEDIT; -use object::macho::SEG_PAGEZERO; -use object::macho::SEG_TEXT; -use object::slice_from_bytes_mut; -use rayon::iter::IntoParallelIterator; -use rayon::iter::ParallelIterator; -use tracing::debug_span; -use zerocopy::FromZeros; - -const LE: Endianness = Endianness::Little; - -type MachOLayout<'data> = Layout<'data, MachO>; - -pub(crate) fn write<'data, A: Arch>( - sized_output: &mut SizedOutput, - layout: &MachOLayout<'data>, -) -> Result { - timing_phase!("Write data to file"); - let mut section_buffers = split_output_into_sections(layout, &mut sized_output.out); +use crate::platform::Args as _; + +const PAGE_SIZE: u64 = 0x4000; +const PAGEZERO_SIZE: u64 = 0x1_0000_0000; + +const MH_MAGIC_64: u32 = 0xfeed_facf; +const MH_EXECUTE: u32 = 2; +const MH_PIE: u32 = 0x0020_0000; +const MH_TWOLEVEL: u32 = 0x80; +const MH_DYLDLINK: u32 = 4; +const CPU_TYPE_ARM64: u32 = 0x0100_000c; +const CPU_SUBTYPE_ARM64_ALL: u32 = 0; +const LC_SEGMENT_64: u32 = 0x19; +const LC_MAIN: u32 = 0x8000_0028; +const LC_SYMTAB: u32 = 0x02; +const LC_DYSYMTAB: u32 = 0x0b; +const LC_LOAD_DYLINKER: u32 = 0x0e; +const LC_LOAD_DYLIB: u32 = 0x0c; +const LC_BUILD_VERSION: u32 = 0x32; +const LC_DYLD_CHAINED_FIXUPS: u32 = 0x8000_0034; +const LC_DYLD_EXPORTS_TRIE: u32 = 0x8000_0033; +const VM_PROT_READ: u32 = 1; +const VM_PROT_WRITE: u32 = 2; +const VM_PROT_EXECUTE: u32 = 4; +const PLATFORM_MACOS: u32 = 1; + +const DYLD_PATH: &[u8] = b"/usr/lib/dyld"; +const LIBSYSTEM_PATH: &[u8] = b"/usr/lib/libSystem.B.dylib"; + +pub(crate) fn write_direct>(layout: &Layout<'_, MachO>) -> Result { + if layout.symbol_db.args.is_relocatable { + return write_relocatable_object(layout); + } + + // Collect compact-unwind entries from all input objects. + let plain_entries = collect_compact_unwind_entries(layout); + + // Find TEXT segment bounds (first non-empty segment). + // The layout mem_size is content-sized (not page-aligned). The actual + // page boundary between TEXT and DATA is align_to(content_end, PAGE_SIZE). + let (text_base, text_vm_end) = layout + .segment_layouts + .segments + .iter() + .find(|s| s.sizes.file_size > 0 || s.sizes.mem_size > 0) + .map(|s| { + let content_end = s.sizes.mem_offset + s.sizes.mem_size; + (s.sizes.mem_offset, align_to(content_end, PAGE_SIZE)) + }) + .unwrap_or((PAGEZERO_SIZE, PAGEZERO_SIZE + PAGE_SIZE)); + + // Find the end of actual TEXT content (last byte of __eh_frame, or __text). + // The gap [text_content_end, text_vm_end) is zero padding within the TEXT + // file allocation — we can place __unwind_info there without extending + // TEXT vmsize or shifting DATA vmaddr. + let text_content_end = { + // Find the end of the last TEXT-segment section: + // EH_FRAME > GCC_EXCEPT_TABLE > PLT_GOT > TEXT + let eh = layout.section_layouts.get(output_section_id::EH_FRAME); + let ge = layout + .section_layouts + .get(output_section_id::GCC_EXCEPT_TABLE); + let plt = layout.section_layouts.get(output_section_id::PLT_GOT); + let t = layout.section_layouts.get(output_section_id::TEXT); + if eh.mem_size > 0 { + eh.mem_offset + eh.mem_size + } else if ge.mem_size > 0 { + ge.mem_offset + ge.mem_size + } else if plt.mem_size > 0 { + plt.mem_offset + plt.mem_size + } else { + t.mem_offset + t.mem_size + } + }; + let gap_bytes = text_vm_end.saturating_sub(text_content_end); + + // Decide where to place __unwind_info (4-byte aligned start of gap). + // The actual content is built inside write_macho after __eh_frame is written, + // so we only need to know whether there is room and the vm_addr. + let unwind_info_vm_addr = if plain_entries.is_empty() || gap_bytes == 0 { + 0u64 + } else { + (text_content_end + 3) & !3u64 + }; + + let extra_text = 0u64; + + let (mappings, alloc_size) = build_mappings_and_size(layout, extra_text); + let mut buf = vec![0u8; alloc_size]; + let final_size = write_macho::( + &mut buf, + layout, + &mappings, + &plain_entries, + unwind_info_vm_addr, + text_base, + text_vm_end, + )?; + buf.truncate(final_size); + + if layout.symbol_db.args.common().validate_output { + validate_macho_output(&buf)?; + } - let mut writable_buckets = split_buffers_by_alignment(&mut section_buffers, layout); - let groups_and_buffers = split_output_by_group(layout, &mut writable_buckets); - groups_and_buffers - .into_par_iter() - .try_for_each(|(group, mut buffers)| -> Result { - verbose_timing_phase!("Write group"); + let output_path = layout.symbol_db.args.output(); - for file in &group.files { - write_file::(file, &mut buffers, layout, &sized_output.trace) - .with_context(|| format!("Failed copying from {file} to output file"))?; + std::fs::write(output_path.as_ref(), &buf) + .map_err(|e| crate::error!("Failed to write: {e}"))?; + + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let _ = + std::fs::set_permissions(output_path.as_ref(), std::fs::Permissions::from_mode(0o755)); + } + #[cfg(target_os = "macos")] + if !layout.symbol_db.args.no_adhoc_codesign { + let mut codesign_cmd = std::process::Command::new("codesign"); + codesign_cmd.args(["-s", "-", "--force", "-o", "linker-signed"]); + // Use -final_output as identifier for reproducible signing + if let Some(ref fo) = layout.symbol_db.args.final_output { + codesign_cmd.args(["-i", fo]); + } + let status = codesign_cmd.arg(output_path.as_ref()).status(); + if let Ok(s) = &status { + if !s.success() { + tracing::warn!("codesign failed with status: {s}"); } - Ok(()) - })?; + } + } Ok(()) } -fn write_file<'data, A: Arch>( - file: &FileLayout<'data, MachO>, - buffers: &mut OutputSectionPartMap<&mut [u8]>, - layout: &MachOLayout<'data>, - trace: &TraceOutput, -) -> Result { - match file { - FileLayout::Object(s) => { - write_object::(s, buffers, layout)?; +/// Build exactly 2 segment mappings (TEXT + merged DATA) from pipeline layout. +/// `extra_text` extends the TEXT segment (first segment) by that many bytes. +fn build_mappings_and_size( + layout: &Layout<'_, MachO>, + extra_text: u64, +) -> (Vec, usize) { + let mut raw: Vec<(u64, u64, u64)> = Vec::new(); + let mut file_cursor: u64 = 0; + let mut is_first = true; + for seg in &layout.segment_layouts.segments { + if seg.sizes.file_size == 0 && seg.sizes.mem_size == 0 { + continue; + } + let file_off = if raw.is_empty() { + 0 + } else { + align_to(file_cursor, PAGE_SIZE) + }; + let extra = if is_first { extra_text } else { 0 }; + is_first = false; + // extra_text extends the TEXT file allocation (for __unwind_info in the + // gap) but NOT the vmsize — vmsize is determined by the layout to avoid + // overlapping with the DATA segment. + let file_sz = align_to(seg.sizes.file_size as u64 + extra, PAGE_SIZE); + raw.push(( + seg.sizes.mem_offset, + seg.sizes.mem_offset + seg.sizes.mem_size, + file_off, + )); + file_cursor = file_off + file_sz; + } + + let mut mappings = Vec::new(); + if let Some(&(vm_start, vm_end, file_off)) = raw.first() { + // Extend TEXT mapping to the page boundary so __unwind_info in the + // gap between content end and page boundary is addressable. + mappings.push(SegmentMapping { + vm_start, + vm_end: align_to(vm_end - vm_start, PAGE_SIZE) + vm_start, + file_offset: file_off, + }); + } + if raw.len() > 1 { + // Merge all non-TEXT segments into one DATA mapping. + // Segments may be out of VM order, so use min/max. + let data_vm_start = raw.iter().skip(1).map(|r| r.0).min().unwrap(); + let data_vm_end = raw.iter().skip(1).map(|r| r.1).max().unwrap(); + let data_file_off = raw.iter().skip(1).map(|r| r.2).min().unwrap(); + mappings.push(SegmentMapping { + vm_start: data_vm_start, + vm_end: data_vm_end, + file_offset: data_file_off, + }); + } + + // Compute LINKEDIT offset the same way write_headers does: + // TEXT filesize is page-aligned, DATA filesize is page-aligned from its file_offset. + let text_filesize = mappings + .first() + .map_or(PAGE_SIZE, |m| align_to(m.vm_end - m.vm_start, PAGE_SIZE)); + let linkedit_offset = if mappings.len() > 1 { + let data_fileoff = mappings[1].file_offset; + let data_filesize = align_to( + mappings + .iter() + .skip(1) + .map(|m| m.file_offset + (m.vm_end - m.vm_start)) + .max() + .unwrap() + - data_fileoff, + PAGE_SIZE, + ); + data_fileoff + data_filesize + } else { + text_filesize + }; + // Estimate LINKEDIT size: chained fixups + symtab + strtab + exports trie. + // For dylibs with many exports, 8KB is not enough. + // For executables, we write all defined symbols for backtrace symbolization. + let n_exports = layout.dynamic_symbol_definitions.len(); + let n_syms = layout + .symbol_resolutions + .iter() + .filter(|r| r.is_some()) + .count(); + // Each nlist64 = 16 bytes, Rust mangled symbol names average ~200 bytes. + // Also account for chained fixups data (page starts, imports, symbol names). + // Overestimating is cheap (buffer is truncated to actual size); underestimating + // causes silent data loss and codesign failure. + let symtab_estimate = n_syms * (16 + 200); + let n_fixups = n_syms; + let fixups_estimate = 16384 + n_fixups * 12; + let linkedit_estimate = fixups_estimate + n_exports * 256 + symtab_estimate; + let total = linkedit_offset as usize + linkedit_estimate.max(65536); + (mappings, total) +} + +/// A rebase fixup: an absolute pointer that needs ASLR adjustment. +struct RebaseFixup { + file_offset: usize, + target: u64, +} + +/// A bind fixup: a GOT entry that dyld must fill with a dylib symbol address. +struct BindFixup { + file_offset: usize, + import_index: u32, +} + +/// An imported symbol name and its dylib ordinal. +struct ImportEntry { + name: Vec, + /// 1 = libSystem, 2+ = extra dylibs, 0xFE = flat lookup (search all dylibs). + lib_ordinal: u8, + /// If true, dyld won't error if this symbol isn't found (weak import). + weak_import: bool, +} + +/// Determine the lib ordinal for a symbol name. +/// If there are extra dylibs (beyond libSystem), we use flat lookup (0xFE) +/// since we don't yet track which dylib exports which symbol. +fn lib_ordinal_for_symbol(has_extra_dylibs: bool) -> u8 { + if has_extra_dylibs { 0xFE } else { 1 } +} + +/// Returns the actual final file size. +fn write_macho>( + out: &mut [u8], + layout: &Layout<'_, MachO>, + mappings: &[SegmentMapping], + plain_entries: &[CollectedUnwindEntry], + unwind_info_vm_addr: u64, + text_base: u64, + text_vm_end: u64, +) -> Result { + let le = object::Endianness::Little; + let header_layout = layout.section_layouts.get(output_section_id::FILE_HEADER); + + // Collect fixups during section writing and stub generation + let mut rebase_fixups: Vec = Vec::new(); + let mut bind_fixups: Vec = Vec::new(); + let mut imports: Vec = Vec::new(); + let has_extra_dylibs = !layout.symbol_db.args.extra_dylibs.is_empty(); + + // Track section write ranges for overlap detection (validation only). + let validate = layout.symbol_db.args.common().validate_output; + let mut write_ranges: Vec<(usize, usize, String)> = Vec::new(); + + // Copy section data and apply relocations + for group in &layout.group_layouts { + for file_layout in &group.files { + if let FileLayout::Object(obj) = file_layout { + write_object_sections( + out, + obj, + layout, + mappings, + le, + &mut rebase_fixups, + &mut bind_fixups, + &mut imports, + has_extra_dylibs, + if validate { + Some(&mut write_ranges) + } else { + None + }, + )?; + } + } + } + + // Validate: no two section data writes should overlap. + if validate && !write_ranges.is_empty() { + write_ranges.sort_by_key(|r| r.0); + for w in write_ranges.windows(2) { + let (off1, size1, ref name1) = w[0]; + let (off2, _size2, ref name2) = w[1]; + if off1 + size1 > off2 { + crate::bail!( + "validate: section data write overlap: \ + {name1} [{off1:#x}..{:#x}) and {name2} [{off2:#x}..)", + off1 + size1 + ); + } + } + } + + // Write PLT stubs and collect bind fixups for imported symbols + write_stubs_and_got::( + out, + layout, + mappings, + &mut bind_fixups, + &mut imports, + has_extra_dylibs, + )?; + + // Populate GOT entries for non-import symbols + write_got_entries( + out, + layout, + mappings, + &mut rebase_fixups, + &mut bind_fixups, + &mut imports, + has_extra_dylibs, + )?; + + // Build chained fixup data: merge rebase + bind, encode per-page chains. + // + // Filter out fixups that fall on __thread_vars `key` or `offset` fields. + // TLV descriptors are 24-byte structs: (init_ptr, key, offset). + // Only `init` (at offset 0 of each descriptor) should have a fixup (bind to + // __tlv_bootstrap). The `key` (offset 8) and `offset` (offset 16) fields are + // plain values that dyld manages — they must NOT be in the fixup chain. + // Find __thread_vars key/offset field file offsets to exclude from + // the fixup chain. TLV descriptors are 24 bytes: only the init pointer + // (byte 0) should have a fixup. The key (byte 8) and offset (byte 16) + // are plain values that must not be in the chain. + // + // We find the thread_vars address range by scanning all bind+rebase + // fixups: every fixup at a position that's (n*24 + 8) or (n*24 + 16) + // relative to the first __tlv_bootstrap bind is a key/offset field. + // + // Simpler approach: collect ALL fixup file offsets that target TDATA + // or TBSS addresses (these are the TLV offset fields whose values + // were correctly computed by apply_relocations). They should NOT have + // rebase fixups because we wrote TLS-relative offsets, not absolute + // addresses. However, the non-extern relocation path may have created + // rebase fixups anyway. Remove them. + // Build set of file offsets for __thread_vars key/offset fields. + // These must NOT be in the fixup chain. We identify them by scanning + // the output for the bind fixups we already created for __tlv_bootstrap + // and init-function pointers — every such fixup marks the start of a + // 24-byte TLV descriptor. The key (+8) and offset (+16) fields after + // each descriptor start must be excluded. + let tvars_key_offset_positions: std::collections::HashSet = { + let mut positions = std::collections::HashSet::new(); + // Every fixup (bind or rebase) that's at a 24-byte-aligned position + // within the thread_vars output IS a descriptor start. + // But we don't know exactly where tvars is in the output. + // Use a different approach: find ALL fixups in the DATA segment, + // and for each one, check if the 8 bytes before it are also a fixup + // (which would make this a key field after an init fixup) or if + // 16 bytes before is a fixup (making this an offset field). + // + // Actually simplest: find tvars range from the bind fixups for + // __tlv_bootstrap. The first and last such bind define the range. + let mut tvars_start = usize::MAX; + let mut tvars_end = 0usize; + for f in &bind_fixups { + if let Some(imp) = imports.get(f.import_index as usize) { + if imp.name == b"__tlv_bootstrap" { + tvars_start = tvars_start.min(f.file_offset); + tvars_end = tvars_end.max(f.file_offset + 24); // descriptor size + } + } + } + // Also scan rebase fixups that target init functions (which are in + // __thread_data/__thread_bss). These are at descriptor +0 too. + // A rebase targeting TDATA/TBSS means it's a TLS offset value (written + // by apply_relocations). But init-function rebase fixups target TEXT. + // To catch all descriptors, extend the range to cover all rebase fixups + // between the first and last __tlv_bootstrap binds. + // Actually, the tvars section is contiguous. Extend by scanning: + // starting from the first __tlv_bootstrap bind, every 24 bytes is a + // descriptor until we run out. + if tvars_start != usize::MAX { + // Find the total tvars block: from the first bind, walk forward + // checking if there's a fixup or data at each 24-byte boundary. + // The block size = (number of descriptors) * 24. + // We know from bind_fixups how many __tlv_bootstrap entries there are, + // but some descriptors have rebase inits instead. Use the DATA output + // section's thread_vars content size. + // The simplest: compute from the input objects. + let le = object::Endianness::Little; + let mut total_tvars_size = 0usize; + for group in &layout.group_layouts { + for file_layout in &group.files { + if let FileLayout::Object(obj) = file_layout { + for sec_idx in 0..obj.object.sections.len() { + if let Some(s) = obj.object.sections.get(sec_idx) { + use object::read::macho::Section as _; + if s.flags(le) & 0xFF == 0x13 { + total_tvars_size += s.size(le) as usize; + } + } + } + } + } + } + tvars_end = tvars_start + total_tvars_size; + } + + if tvars_start != usize::MAX { + for off in (tvars_start..tvars_end).step_by(24) { + positions.insert(off + 8); // key field + positions.insert(off + 16); // offset field + } } - FileLayout::Prelude(s) => write_prelude::(s, buffers, layout)?, - _ => { - // TODO + positions + }; + + rebase_fixups.sort_by_key(|f| f.file_offset); + bind_fixups.sort_by_key(|f| f.file_offset); + + // Zero out __thread_vars key fields. Key must always be 0 — dyld + // initializes it at runtime with a pthread key. Relocation application + // may have written garbage into key positions from non-extern relocations. + // Key is at offset +8 in each 24-byte descriptor. + // tvars_key_offset_positions contains both key (+8) and offset (+16) positions. + // Key positions: those that are 8 bytes before an offset position. + for &pos in &tvars_key_offset_positions { + // Check if pos+8 is also in the set (making this a key field) + if tvars_key_offset_positions.contains(&(pos + 8)) && pos + 8 <= out.len() { + out[pos..pos + 8].fill(0); + } + } + + let data_seg_start = if mappings.len() > 1 { + mappings[1].file_offset as usize + } else { + usize::MAX + }; + let data_seg_end = if mappings.len() > 1 { + mappings[1].file_offset as usize + (mappings[1].vm_end - mappings[1].vm_start) as usize + } else { + 0 + }; + + let image_base = if layout.symbol_db.args.is_dylib { + 0u64 + } else { + PAGEZERO_SIZE + }; + let mut all_data_fixups: Vec<(usize, u64)> = Vec::new(); + for f in &rebase_fixups { + if f.file_offset < data_seg_start || f.file_offset >= data_seg_end { + continue; + } + if tvars_key_offset_positions.contains(&f.file_offset) { + continue; + } + let target_offset = f.target.wrapping_sub(image_base); + all_data_fixups.push((f.file_offset, target_offset & 0xF_FFFF_FFFF)); + } + for f in &bind_fixups { + if f.file_offset < data_seg_start || f.file_offset >= data_seg_end { + continue; + } + // Don't filter bind fixups for __thread_vars init pointers — + // those ARE legitimate (bind to __tlv_bootstrap). + // Only filter rebase fixups for key/offset fields. + let encoded = (1u64 << 63) | (f.import_index as u64 & 0xFF_FFFF); + all_data_fixups.push((f.file_offset, encoded)); + } + all_data_fixups.sort_by_key(|&(off, _)| off); + + // Encode per-page chains + let data_seg_file_off = if mappings.len() > 1 { + mappings[1].file_offset + } else { + 0 + }; + for i in 0..all_data_fixups.len() { + let (file_off, mut encoded) = all_data_fixups[i]; + let next_stride = if i + 1 < all_data_fixups.len() { + let cur_page = (file_off as u64 - data_seg_file_off) / PAGE_SIZE; + let next_page = (all_data_fixups[i + 1].0 as u64 - data_seg_file_off) / PAGE_SIZE; + if cur_page == next_page { + ((all_data_fixups[i + 1].0 - file_off) / 4) as u64 + } else { + 0 + } + } else { + 0 + }; + + // Both bind and rebase use bits 51-62 for next (12 bits, 4-byte stride) + encoded |= (next_stride & 0xFFF) << 51; + if file_off + 8 <= out.len() { + out[file_off..file_off + 8].copy_from_slice(&encoded.to_le_bytes()); + } + } + + let has_fixups = !all_data_fixups.is_empty(); + let n_imports = imports.len() as u32; + + // Build symbol name pool for imports + let mut symbols_pool = vec![0u8]; + let mut import_name_offsets: Vec = Vec::new(); + for entry in &imports { + import_name_offsets.push(symbols_pool.len() as u32); + symbols_pool.extend_from_slice(&entry.name); + symbols_pool.push(0); + } + + // Compute chained fixups data size + let has_data = mappings.len() > 1 && (mappings[1].vm_end > mappings[1].vm_start); + let is_dylib = layout.symbol_db.args.is_dylib; + let base_segs = if is_dylib { 2u32 } else { 3u32 }; // TEXT+LINKEDIT or PAGEZERO+TEXT+LINKEDIT + let seg_count = if has_data { base_segs + 1 } else { base_segs }; + let starts_in_image_size = 4 + 4 * seg_count; + let page_count = if has_fixups && has_data { + let data_mem_size = mappings[1].vm_end - mappings[1].vm_start; + ((data_mem_size + PAGE_SIZE - 1) / PAGE_SIZE) as u32 + } else { + 0 + }; + + let cf_data_size = if !has_fixups { + (32 + 4 + 4 * seg_count + 8).max(48) + } else { + let seg_starts_size = 22 + 2 * page_count; + let imports_size = 4 * n_imports; + 32 + starts_in_image_size + seg_starts_size + imports_size + symbols_pool.len() as u32 + }; + + // Build and write __unwind_info now that __eh_frame is in the output buffer. + // Scan output __eh_frame to map func_vm_addr → EhFrameFdeInfo. + let unwind_info_size = if unwind_info_vm_addr != 0 { + let eh_layout = layout.section_layouts.get(output_section_id::EH_FRAME); + let fde_map: std::collections::HashMap = if eh_layout.mem_size > 0 { + if let Some(eh_foff) = vm_addr_to_file_offset(eh_layout.mem_offset, mappings) { + let m = scan_eh_frame_fde_offsets( + out, + eh_layout.mem_offset, + eh_foff, + eh_layout.mem_size as usize, + ); + m + } else { + Default::default() + } + } else { + Default::default() + }; + let available = text_vm_end.saturating_sub(unwind_info_vm_addr); + let content = build_unwind_info_section(plain_entries, &fde_map, text_base, available); + if !content.is_empty() && content.len() as u64 <= available { + if let Some(ui_foff) = vm_addr_to_file_offset(unwind_info_vm_addr, mappings) { + let end = ui_foff + content.len(); + if end <= out.len() { + out[ui_foff..end].copy_from_slice(&content); + } + } + content.len() as u64 + } else { + if !content.is_empty() { + tracing::debug!( + "compact_unwind: __unwind_info too large ({} bytes) for gap ({} bytes)", + content.len(), + available + ); + } + 0 + } + } else { + 0 + }; + + // Write headers + let header_offset = header_layout.file_offset; + let chained_fixups_offset = write_headers( + out, + header_offset, + layout, + mappings, + cf_data_size, + unwind_info_vm_addr, + unwind_info_size, + )?; + + // Write chained fixups + let final_size = if let Some(cf_off) = chained_fixups_offset { + if !has_fixups { + let cf = cf_off as usize; + if cf + cf_data_size as usize <= out.len() { + // Minimal header with correct seg_count and imports_format + let starts_off = 32u32; + out[cf + 4..cf + 8].copy_from_slice(&starts_off.to_le_bytes()); // starts_offset + let imports_off = starts_off + 4 + 4 * seg_count; + out[cf + 8..cf + 12].copy_from_slice(&imports_off.to_le_bytes()); // imports_offset + out[cf + 12..cf + 16].copy_from_slice(&imports_off.to_le_bytes()); // symbols_offset + out[cf + 20..cf + 24].copy_from_slice(&1u32.to_le_bytes()); // imports_format + let si = cf + starts_off as usize; + out[si..si + 4].copy_from_slice(&seg_count.to_le_bytes()); + } + cf + cf_data_size as usize + } else { + let ordinals: Vec = imports.iter().map(|e| e.lib_ordinal).collect(); + let weak_flags: Vec = imports.iter().map(|e| e.weak_import).collect(); + write_chained_fixups_header( + out, + cf_off as usize, + &all_data_fixups, + n_imports, + &import_name_offsets, + &ordinals, + &weak_flags, + &symbols_pool, + mappings, + layout.symbol_db.args.is_dylib, + )?; + cf_off as usize + cf_data_size as usize + } + } else { + out.len() + }; + + // Write symbol table + let final_size = if layout.symbol_db.args.is_dylib { + write_dylib_symtab(out, final_size, layout, mappings)? + } else { + write_exe_symtab(out, final_size, layout, mappings)? + }; + + Ok(final_size) +} + +/// Write a minimal symbol table for dylib exports. +fn write_dylib_symtab( + out: &mut [u8], + start: usize, + layout: &Layout<'_, MachO>, + _mappings: &[SegmentMapping], +) -> Result { + // Collect exported symbols from dynamic_symbol_definitions + let mut entries: Vec<(Vec, u64)> = Vec::new(); + for def in &layout.dynamic_symbol_definitions { + let sym_id = def.symbol_id; + if let Some(res) = layout + .symbol_resolutions + .iter() + .nth(sym_id.as_usize()) + .and_then(|r| r.as_ref()) + { + entries.push((def.name.to_vec(), res.raw_value)); + } + } + + if entries.is_empty() { + return Ok(start); + } + + // Build string table: starts with \0 + let mut strtab = vec![0u8]; + let mut str_offsets = Vec::new(); + for (name, _) in &entries { + str_offsets.push(strtab.len() as u32); + strtab.extend_from_slice(name); + strtab.push(0); + } + + // Build section ranges from the already-written headers for n_sect lookup. + let section_ranges = parse_section_ranges(out); + + // Write nlist64 entries (16 bytes each). No alignment padding — + // LINKEDIT must be fully packed for strip(1) compatibility. + let symoff = start; + let nsyms = entries.len(); + let mut pos = symoff; + for (i, (_, value)) in entries.iter().enumerate() { + if pos + 16 > out.len() { + break; + } + let n_sect = section_ranges + .iter() + .position(|&(s, e)| *value >= s && *value < e) + .map(|idx| (idx + 1) as u8) + .unwrap_or(1); + // nlist64: n_strx (4), n_type (1), n_sect (1), n_desc (2), n_value (8) + out[pos..pos + 4].copy_from_slice(&str_offsets[i].to_le_bytes()); + out[pos + 4] = 0x0F; // N_SECT | N_EXT + out[pos + 5] = n_sect; + out[pos + 6..pos + 8].copy_from_slice(&0u16.to_le_bytes()); // n_desc + out[pos + 8..pos + 16].copy_from_slice(&value.to_le_bytes()); + pos += 16; + } + + // Write string table + let stroff = pos; + if stroff + strtab.len() <= out.len() { + out[stroff..stroff + strtab.len()].copy_from_slice(&strtab); + } + pos = stroff + strtab.len(); + + // Patch LC_SYMTAB in the header + // Find LC_SYMTAB command and update it + let mut off = 32u32; // after header + let ncmds = u32::from_le_bytes(out[16..20].try_into().unwrap()); + for _ in 0..ncmds { + let cmd = u32::from_le_bytes(out[off as usize..off as usize + 4].try_into().unwrap()); + let cmdsize = + u32::from_le_bytes(out[off as usize + 4..off as usize + 8].try_into().unwrap()); + if cmd == LC_SYMTAB { + out[off as usize + 8..off as usize + 12] + .copy_from_slice(&(symoff as u32).to_le_bytes()); + out[off as usize + 12..off as usize + 16] + .copy_from_slice(&(nsyms as u32).to_le_bytes()); + out[off as usize + 16..off as usize + 20] + .copy_from_slice(&(stroff as u32).to_le_bytes()); + out[off as usize + 20..off as usize + 24] + .copy_from_slice(&(strtab.len() as u32).to_le_bytes()); + break; + } + off += cmdsize; + } + + // Build export trie for dlsym (must be aligned) + let trie_off = (pos + 7) & !7; + let trie = build_export_trie(&entries); + if trie_off + trie.len() <= out.len() { + out[trie_off..trie_off + trie.len()].copy_from_slice(&trie); + } + pos = trie_off + trie.len(); + + // Patch LC_SYMTAB and LC_DYLD_EXPORTS_TRIE in headers + off = 32; + for _ in 0..ncmds { + let cmd = u32::from_le_bytes(out[off as usize..off as usize + 4].try_into().unwrap()); + let cmdsize = + u32::from_le_bytes(out[off as usize + 4..off as usize + 8].try_into().unwrap()); + match cmd { + 0x19 => { + // LC_SEGMENT_64 + let segname = &out[off as usize + 8..off as usize + 24]; + if segname.starts_with(b"__LINKEDIT") { + let linkedit_fileoff = u64::from_le_bytes( + out[off as usize + 40..off as usize + 48] + .try_into() + .unwrap(), + ); + let new_filesize = pos as u64 - linkedit_fileoff; + out[off as usize + 48..off as usize + 56] + .copy_from_slice(&new_filesize.to_le_bytes()); + // Update vmsize to cover the content + let new_vmsize = align_to(new_filesize, PAGE_SIZE); + out[off as usize + 32..off as usize + 40] + .copy_from_slice(&new_vmsize.to_le_bytes()); + } + } + LC_DYSYMTAB => { + // DYSYMTAB: ilocalsym nlocalsym iextdefsym nextdefsym iundefsym nundefsym + let o = off as usize + 8; + out[o..o + 4].copy_from_slice(&0u32.to_le_bytes()); // ilocalsym + out[o + 4..o + 8].copy_from_slice(&0u32.to_le_bytes()); // nlocalsym + out[o + 8..o + 12].copy_from_slice(&0u32.to_le_bytes()); // iextdefsym + out[o + 12..o + 16].copy_from_slice(&(nsyms as u32).to_le_bytes()); // nextdefsym + out[o + 16..o + 20].copy_from_slice(&(nsyms as u32).to_le_bytes()); // iundefsym + out[o + 20..o + 24].copy_from_slice(&0u32.to_le_bytes()); // nundefsym + } + 0x8000_0033 => { + // LC_DYLD_EXPORTS_TRIE + out[off as usize + 8..off as usize + 12] + .copy_from_slice(&(trie_off as u32).to_le_bytes()); + out[off as usize + 12..off as usize + 16] + .copy_from_slice(&(trie.len() as u32).to_le_bytes()); + } + _ => {} + } + off += cmdsize; + } + + Ok(pos) +} + +/// Parse section address ranges from the already-written Mach-O headers. +/// Returns a vec of (start_addr, end_addr) in section order. +fn parse_section_ranges(out: &[u8]) -> Vec<(u64, u64)> { + let mut ranges = Vec::new(); + let mut hoff = 32usize; + let ncmds = u32::from_le_bytes(out[16..20].try_into().unwrap_or([0; 4])) as usize; + for _ in 0..ncmds { + if hoff + 8 > out.len() { + break; + } + let cmd = u32::from_le_bytes(out[hoff..hoff + 4].try_into().unwrap()); + let cmdsize = u32::from_le_bytes(out[hoff + 4..hoff + 8].try_into().unwrap()) as usize; + if cmd == LC_SEGMENT_64 && hoff + 72 <= out.len() { + let nsects = u32::from_le_bytes(out[hoff + 64..hoff + 68].try_into().unwrap()) as usize; + for j in 0..nsects { + let so = hoff + 72 + j * 80; + if so + 48 > out.len() { + break; + } + let addr = u64::from_le_bytes(out[so + 32..so + 40].try_into().unwrap()); + let size = u64::from_le_bytes(out[so + 40..so + 48].try_into().unwrap()); + ranges.push((addr, addr + size)); + } + } + hoff += cmdsize; + } + ranges +} + +/// Check if a symbol was originally external (N_EXT) in its input object. +fn is_symbol_external(layout: &Layout<'_, MachO>, symbol_id: crate::symbol_db::SymbolId) -> bool { + use object::read::macho::Nlist as _; + let file_id = layout.symbol_db.file_id_for_symbol(symbol_id); + for group in &layout.group_layouts { + for file_layout in &group.files { + if let crate::layout::FileLayout::Object(obj) = file_layout { + if obj.file_id == file_id { + let local_index = symbol_id.to_input(obj.symbol_id_range); + if let Ok(sym) = obj.object.symbols.symbol(local_index) { + return (sym.n_type() & object::macho::N_EXT) != 0; + } + } + } + } + } + // Default to external for prelude/synthetic symbols + true +} + +/// Write a symbol table for executables so that backtraces can resolve function names. +fn write_exe_symtab( + out: &mut [u8], + start: usize, + layout: &Layout<'_, MachO>, + _mappings: &[SegmentMapping], +) -> Result { + use crate::symbol_db::SymbolId; + + // Collect all defined symbols with non-zero addresses. + let mut entries: Vec<(Vec, u64, u8)> = Vec::new(); // (name, value, n_type) + let mut seen_names: std::collections::HashSet> = Default::default(); + for (sym_idx, res) in layout.symbol_resolutions.iter().enumerate() { + let Some(res) = res else { continue }; + if res.raw_value == 0 { + continue; + } + if res.flags.contains(crate::value_flags::ValueFlags::DYNAMIC) { + continue; + } + let symbol_id = SymbolId::from_usize(sym_idx); + let name = match layout.symbol_db.symbol_name(symbol_id) { + Ok(n) => n.bytes().to_vec(), + Err(_) => continue, + }; + if name.is_empty() { + continue; + } + // Check if this symbol is external by looking at its original binding. + // Local symbols (static functions, file-scoped data) should NOT get N_EXT. + let is_external = + !res.flags.is_downgraded_to_local() && is_symbol_external(layout, symbol_id); + // -x: strip local (non-external) symbols from the output + if layout.symbol_db.args.strip_locals && !is_external { + continue; + } + let n_type = if res.flags.contains(crate::value_flags::ValueFlags::ABSOLUTE) { + if is_external { 0x03_u8 } else { 0x02_u8 } // N_ABS [| N_EXT] + } else if is_external { + 0x0f_u8 // N_SECT | N_EXT (external) + } else { + 0x0e_u8 // N_SECT (local) + }; + seen_names.insert(name.clone()); + entries.push((name, res.raw_value, n_type)); + } + + // Also collect absolute symbols from input objects that may lack resolutions + // (e.g. unreferenced .set symbols). + { + use object::read::macho::Nlist as _; + let le = object::Endianness::Little; + for group in &layout.group_layouts { + for file_layout in &group.files { + if let crate::layout::FileLayout::Object(obj) = file_layout { + for sym_idx in 0..obj.object.symbols.len() { + let Ok(sym) = obj.object.symbols.symbol(object::SymbolIndex(sym_idx)) + else { + continue; + }; + // N_ABS = 0x02, N_EXT = 0x01 + let n_type_raw = sym.n_type(); + if (n_type_raw & 0x0e) != 0x02 { + continue; // not absolute + } + let val = sym.n_value(le); + if val == 0 { + continue; + } + let name = sym.name(le, obj.object.symbols.strings()).unwrap_or(&[]); + if name.is_empty() || seen_names.contains(name) { + continue; + } + seen_names.insert(name.to_vec()); + entries.push((name.to_vec(), val, 0x02)); // N_ABS + } + } + } + } + } + + if entries.is_empty() { + return Ok(start); + } + + // Sort: locals first, then externals; within each group, by address. + // DYSYMTAB requires this ordering (ilocalsym..iextdefsym..iundefsym). + entries.sort_by_key(|e| { + let is_ext = (e.2 & 0x01) != 0; + (is_ext, e.1) + }); + + // Build string table: starts with \0 + let mut strtab = vec![0u8]; + let mut str_offsets = Vec::new(); + for (name, _, _) in &entries { + str_offsets.push(strtab.len() as u32); + strtab.extend_from_slice(name); + strtab.push(0); + } + + // Build section ranges from the already-written headers for n_sect lookup. + let section_ranges = parse_section_ranges(out); + + // Write nlist64 entries (16 bytes each). No alignment padding — + // LINKEDIT must be fully packed for strip(1) compatibility. + let symoff = start; + let nsyms = entries.len(); + let mut pos = symoff; + for (i, (_, value, n_type)) in entries.iter().enumerate() { + if pos + 16 > out.len() { + break; + } + let n_sect = if *n_type == 0x02 { + 0u8 // N_ABS + } else { + section_ranges + .iter() + .position(|&(s, e)| *value >= s && *value < e) + .map(|idx| (idx + 1) as u8) + .unwrap_or(0) + }; + out[pos..pos + 4].copy_from_slice(&str_offsets[i].to_le_bytes()); + out[pos + 4] = *n_type; + out[pos + 5] = n_sect; + out[pos + 6..pos + 8].copy_from_slice(&0u16.to_le_bytes()); + out[pos + 8..pos + 16].copy_from_slice(&value.to_le_bytes()); + pos += 16; + } + + // Write string table + let stroff = pos; + if stroff + strtab.len() <= out.len() { + out[stroff..stroff + strtab.len()].copy_from_slice(&strtab); + } + pos = stroff + strtab.len(); + + // Patch LC_SYMTAB, LC_DYSYMTAB, and LINKEDIT segment in the header + let mut off = 32u32; + let ncmds = u32::from_le_bytes(out[16..20].try_into().unwrap()); + for _ in 0..ncmds { + let cmd = u32::from_le_bytes(out[off as usize..off as usize + 4].try_into().unwrap()); + let cmdsize = + u32::from_le_bytes(out[off as usize + 4..off as usize + 8].try_into().unwrap()); + match cmd { + LC_SYMTAB => { + out[off as usize + 8..off as usize + 12] + .copy_from_slice(&(symoff as u32).to_le_bytes()); + out[off as usize + 12..off as usize + 16] + .copy_from_slice(&(nsyms as u32).to_le_bytes()); + out[off as usize + 16..off as usize + 20] + .copy_from_slice(&(stroff as u32).to_le_bytes()); + out[off as usize + 20..off as usize + 24] + .copy_from_slice(&(strtab.len() as u32).to_le_bytes()); + } + 0x19 => { + // LC_SEGMENT_64 — update LINKEDIT filesize/vmsize + let segname = &out[off as usize + 8..off as usize + 24]; + if segname.starts_with(b"__LINKEDIT") { + let linkedit_fileoff = u64::from_le_bytes( + out[off as usize + 40..off as usize + 48] + .try_into() + .unwrap(), + ); + let new_filesize = pos as u64 - linkedit_fileoff; + out[off as usize + 48..off as usize + 56] + .copy_from_slice(&new_filesize.to_le_bytes()); + let new_vmsize = align_to(new_filesize, PAGE_SIZE); + out[off as usize + 32..off as usize + 40] + .copy_from_slice(&new_vmsize.to_le_bytes()); + } + } + LC_DYSYMTAB => { + // Split symbols into local and external ranges + let n_locals = entries + .iter() + .filter(|(_, _, nt)| (*nt & 0x01) == 0) // no N_EXT + .count(); + let n_ext = nsyms - n_locals; + let o = off as usize + 8; + out[o..o + 4].copy_from_slice(&0u32.to_le_bytes()); // ilocalsym + out[o + 4..o + 8].copy_from_slice(&(n_locals as u32).to_le_bytes()); // nlocalsym + out[o + 8..o + 12].copy_from_slice(&(n_locals as u32).to_le_bytes()); // iextdefsym + out[o + 12..o + 16].copy_from_slice(&(n_ext as u32).to_le_bytes()); // nextdefsym + out[o + 16..o + 20].copy_from_slice(&(nsyms as u32).to_le_bytes()); // iundefsym + out[o + 20..o + 24].copy_from_slice(&0u32.to_le_bytes()); // nundefsym + } + LC_DYLD_EXPORTS_TRIE => { + // Must come right after fixups + out[off as usize + 8..off as usize + 12] + .copy_from_slice(&(start as u32).to_le_bytes()); + out[off as usize + 12..off as usize + 16].copy_from_slice(&0u32.to_le_bytes()); + } + 0x26 | 0x29 => { + // function_starts, data_in_code: contiguous with symtab (size 0) + out[off as usize + 8..off as usize + 12] + .copy_from_slice(&(symoff as u32).to_le_bytes()); + out[off as usize + 12..off as usize + 16].copy_from_slice(&0u32.to_le_bytes()); + } + _ => {} + } + off += cmdsize; + } + + Ok(pos) +} + +/// Build a Mach-O export trie for the given symbols. +fn build_export_trie(entries: &[(Vec, u64)]) -> Vec { + if entries.is_empty() { + return vec![0, 0]; + } // empty root + + // Build child nodes first to know their sizes + let mut children: Vec> = Vec::new(); + for (_, addr) in entries { + let mut node = Vec::new(); + let mut info = Vec::new(); + uleb128_encode(&mut info, 0); // flags: regular + uleb128_encode(&mut info, *addr); + uleb128_encode(&mut node, info.len() as u64); // terminal size + node.extend_from_slice(&info); + node.push(0); // 0 child edges + children.push(node); + } + + // Build edge labels (symbol name bytes + NUL) + let mut labels: Vec> = Vec::new(); + for (name, _) in entries { + let mut label = Vec::new(); + label.extend_from_slice(name); + label.push(0); + labels.push(label); + } + + // Compute root node size to determine child offsets. + // Root = terminal_size(1) + edge_count(1) + edges + // Each edge = label + ULEB128(child_offset) + // We need to know root size to compute offsets, but offsets depend on their ULEB encoding size. + // Use two passes: estimate then fix. + let n = entries.len(); + // Estimate: each offset ULEB is ~2 bytes for typical small tries + let mut root_size_estimate = 2usize; // terminal_size(0) + edge_count + for label in &labels { + root_size_estimate += label.len() + 3; // label + ~3 byte offset + } + + // Compute exact child offsets from root_size_estimate + let mut child_offsets = Vec::new(); + let mut off = root_size_estimate; + for child in &children { + child_offsets.push(off); + off += child.len(); + } + + // Now build root with exact offsets + let mut root = Vec::new(); + root.push(0); // not terminal + root.push(n as u8); // edge count + for (i, label) in labels.iter().enumerate() { + root.extend_from_slice(label); + uleb128_encode(&mut root, child_offsets[i] as u64); + } + + // Check if root size matches estimate; if not, recompute + if root.len() != root_size_estimate { + let actual_root_size = root.len(); + let delta = actual_root_size as isize - root_size_estimate as isize; + // Recompute with corrected offsets + root.clear(); + root.push(0); + root.push(n as u8); + for (i, label) in labels.iter().enumerate() { + root.extend_from_slice(label); + uleb128_encode(&mut root, (child_offsets[i] as isize + delta) as u64); + } + } + + // Assemble trie + let mut trie = root; + for child in &children { + trie.extend_from_slice(child); + } + trie +} + +fn uleb128_encode(buf: &mut Vec, mut val: u64) { + loop { + let mut byte = (val & 0x7F) as u8; + val >>= 7; + if val != 0 { + byte |= 0x80; + } + buf.push(byte); + if val == 0 { + break; } } - Ok(()) } -fn write_prelude<'data, A: Arch>( - prelude: &PreludeLayout, - buffers: &mut OutputSectionPartMap<&mut [u8]>, - layout: &MachOLayout<'data>, +/// Write PLT stubs and GOT bind entries for imported symbols. +fn write_stubs_and_got>( + out: &mut [u8], + layout: &Layout<'_, MachO>, + mappings: &[SegmentMapping], + bind_fixups: &mut Vec, + imports: &mut Vec, + has_extra_dylibs: bool, ) -> Result { - verbose_timing_phase!("Write prelude"); - - let header: &mut FileHeader = from_bytes_mut(buffers.get_mut(part_id::FILE_HEADER)) - .map_err(|_| error!("Invalid file header allocation"))? - .0; - populate_file_header::(layout, &prelude.header_info, header); - - let pagezero_command: &mut SegmentCommand = - from_bytes_mut(buffers.get_mut(part_id::PAGEZERO_SEGMENT)) - .map_err(|_| error!("Invalid PAGEZERO segment allocation"))? - .0; - write_pagezero_command::(pagezero_command); - - let linkedit_command: &mut SegmentCommand = - from_bytes_mut(buffers.get_mut(part_id::LINK_EDIT_SEGMENT)) - .map_err(|_| error!("Invalid LINKEDIT segment allocation"))? - .0; - write_linkedit_command::(linkedit_command); - write_segment_commands::(layout, buffers)?; - - let entry_point_command: &mut EntryPointCommand = - from_bytes_mut(buffers.get_mut(part_id::ENTRY_POINT)) - .map_err(|_| error!("Invalid ENTRY_POINT command allocation"))? - .0; - write_entry_point_command::(layout, entry_point_command); + use crate::symbol_db::SymbolId; + + for (sym_idx, res) in layout.symbol_resolutions.iter().enumerate() { + let Some(res) = res else { continue }; + let Some(plt_addr) = res.format_specific.plt_address else { + continue; + }; + let Some(got_addr) = res.format_specific.got_address else { + continue; + }; + + if let Some(plt_file_off) = vm_addr_to_file_offset(plt_addr, mappings) { + if plt_file_off + 12 <= out.len() { + A::write_plt_entry( + &mut out[plt_file_off..plt_file_off + 12], + got_addr, + plt_addr, + )?; + } + } + if let Some(got_file_off) = vm_addr_to_file_offset(got_addr, mappings) { + let import_index = imports.len() as u32; + let symbol_id = SymbolId::from_usize(sym_idx); + let name = match layout.symbol_db.symbol_name(symbol_id) { + Ok(n) => n.bytes().to_vec(), + Err(_) => b"".to_vec(), + }; + let weak = layout.symbol_db.is_weak_ref(symbol_id); + imports.push(ImportEntry { + name, + lib_ordinal: lib_ordinal_for_symbol(has_extra_dylibs), + weak_import: weak, + }); + bind_fixups.push(BindFixup { + file_offset: got_file_off, + import_index, + }); + } + } Ok(()) } -fn populate_file_header>( - layout: &MachOLayout, - _header_info: &HeaderInfo, - header: &mut FileHeader, -) { - let load_commands_info = get_segment_sections(layout, SegmentType::LoadCommands); - - header.magic = U32::new(BigEndian, MH_CIGAM_64); - header.cputype = U32::new(LE, CPU_TYPE_ARM64); - header.cpusubtype = U32::new(LE, 0); - header.filetype = U32::new(LE, MH_EXECUTE); - header.ncmds = U32::new(LE, load_commands_info.segment_sections.len() as u32); - header.sizeofcmds = U32::new(LE, load_commands_info.segment_size.file_size as u32); - header.flags = U32::new(LE, 0); - header.reserved = U32::new(LE, 0); -} - -fn write_pagezero_command>(command: &mut SegmentCommand) { - command.cmd.set(LE, LC_SEGMENT_64); - command.cmdsize.set(LE, size_of::() as u32); - command.segname[..SEG_PAGEZERO.len()].copy_from_slice(SEG_PAGEZERO.as_bytes()); - command.vmaddr.set(LE, 0); - command.vmsize.set(LE, MACHO_START_MEM_ADDRESS); - command.fileoff.set(LE, 0); - command.filesize.set(LE, 0); - command.maxprot.set(LE, 0); - command.initprot.set(LE, 0); - command.nsects.set(LE, 0); - command.flags.set(LE, 0); -} - -fn write_linkedit_command>(command: &mut SegmentCommand) { - command.cmd.set(LE, LC_SEGMENT_64); - command.cmdsize.set(LE, size_of::() as u32); - command.segname[..SEG_LINKEDIT.len()].copy_from_slice(SEG_LINKEDIT.as_bytes()); - command.vmaddr.set(LE, 0); - command.vmsize.set(LE, 0); - command.fileoff.set(LE, 0); - command.filesize.set(LE, 0); - command.maxprot.set(LE, 0); - command.initprot.set(LE, 0); - command.nsects.set(LE, 0); - command.flags.set(LE, 0); -} - -fn split_segment_command_buffer( - bytes: &mut [u8], - section_count: usize, -) -> Result<(&mut SegmentCommand, &mut [SectionEntry])> { - let (command, rest) = - from_bytes_mut(bytes).map_err(|_| error!("Invalid segment command allocation"))?; - let (sections, rest) = slice_from_bytes_mut(rest, section_count) - .map_err(|_| error!("Invalid segment section allocation"))?; - if !rest.is_empty() { - return Err(error!("Trailing bytes in segment command allocation")); - } - Ok((command, sections)) -} - -fn write_segment_commands>( - layout: &MachOLayout, - buffers: &mut OutputSectionPartMap<&mut [u8]>, +/// Fill GOT entries with target symbol addresses (for non-import symbols). +/// Also registers rebase fixups so dyld can adjust for ASLR. +fn write_got_entries( + out: &mut [u8], + layout: &Layout<'_, MachO>, + mappings: &[SegmentMapping], + rebase_fixups: &mut Vec, + bind_fixups: &mut Vec, + imports: &mut Vec, + has_extra_dylibs: bool, ) -> Result { - for (part_id, seg_name, segment_type) in [ - (part_id::TEXT_SEGMENT, SEG_TEXT, SegmentType::Text), - (part_id::DATA_SEGMENT, SEG_DATA, SegmentType::Data), - ] { - let SegmentSectionsInfo { - segment_size, - segment_sections, - } = get_segment_sections(layout, segment_type); - let (segment_cmd, sections) = - split_segment_command_buffer(buffers.get_mut(part_id), segment_sections.len())?; - - debug_assert_eq!(sections.len(), segment_sections.len()); - let prot_flags = layout - .output_sections - .section_flags(part_id.output_section_id()) - .raw(); - - segment_cmd.cmd.set(LE, LC_SEGMENT_64); - segment_cmd.cmdsize.set( - LE, - (size_of::() + size_of::() * segment_sections.len()) - as u32, - ); - segment_cmd.segname[..seg_name.len()].copy_from_slice(seg_name.as_bytes()); - segment_cmd.segname[seg_name.len()..].zero(); - segment_cmd.vmaddr.set(LE, segment_size.mem_offset); - segment_cmd.vmsize.set(LE, segment_size.mem_size); - // TODO: should be likely offset relative to the place after the commands - segment_cmd.fileoff.set(LE, segment_size.file_offset as u64); - segment_cmd.filesize.set(LE, segment_size.file_size as u64); - segment_cmd.maxprot.set(LE, prot_flags); - segment_cmd.initprot.set(LE, prot_flags); - segment_cmd.nsects.set(LE, segment_sections.len() as u32); - segment_cmd.flags.set(LE, 0); - - for (section, (size, section_name, section_flags)) in - sections.iter_mut().zip(segment_sections) - { - let section_name = section_name - .ok_or_else(|| error!("section name must be known"))? - .0; - - section.segname[..seg_name.len()].copy_from_slice(seg_name.as_bytes()); - section.segname[seg_name.len()..].zero(); - section.sectname[..section_name.len()].copy_from_slice(section_name); - section.sectname[section_name.len()..].zero(); - section.addr.set(LE, size.mem_offset); - section.size.set(LE, size.mem_size); - section.offset.set(LE, size.file_offset as u32); - // TODO - section.align.set(LE, 0); - section.reloff.set(LE, 0); - section.nreloc.set(LE, 0); - section.flags.set(LE, section_flags.raw()); - section.reserved1.set(LE, 0); - section.reserved2.set(LE, 0); - section.reserved3.set(LE, 0); + use crate::symbol_db::SymbolId; + + for (sym_idx, res) in layout.symbol_resolutions.iter().enumerate() { + let Some(res) = res else { continue }; + if res.format_specific.plt_address.is_some() { + continue; + } // handled by stubs + if let Some(got_vm_addr) = res.format_specific.got_address { + if let Some(file_off) = vm_addr_to_file_offset(got_vm_addr, mappings) { + if file_off + 8 > out.len() { + continue; + } + if res.raw_value != 0 { + // Defined symbol: write value and create rebase fixup for ASLR. + out[file_off..file_off + 8].copy_from_slice(&res.raw_value.to_le_bytes()); + rebase_fixups.push(RebaseFixup { + file_offset: file_off, + target: res.raw_value, + }); + } else { + // Undefined symbol with GOT entry (e.g. personality pointer + // from __eh_frame): create a bind fixup so dyld fills the GOT. + let symbol_id = SymbolId::from_usize(sym_idx); + let name = match layout.symbol_db.symbol_name(symbol_id) { + Ok(n) => n.bytes().to_vec(), + Err(_) => continue, + }; + let import_index = imports.len() as u32; + imports.push(ImportEntry { + name, + lib_ordinal: lib_ordinal_for_symbol(has_extra_dylibs), + weak_import: false, + }); + bind_fixups.push(BindFixup { + file_offset: file_off, + import_index, + }); + } + } } } Ok(()) } -fn write_object<'data, A: Arch>( - object: &ObjectLayout<'data, MachO>, - buffers: &mut OutputSectionPartMap<&mut [u8]>, - layout: &MachOLayout<'data>, +/// Copy an object's section data to the output and apply relocations. +/// Write __eh_frame data with FDE filtering: only include FDEs whose target +/// function is in a loaded section. +fn write_filtered_eh_frame( + out: &mut [u8], + file_offset: usize, + output_addr: u64, + input_data: &[u8], + input_section: &object::macho::Section64, + obj: &ObjectLayout<'_, MachO>, + layout: &Layout<'_, MachO>, + le: object::Endianness, + rebase_fixups: &mut Vec, + bind_fixups: &mut Vec, + imports: &mut Vec, + has_extra_dylibs: bool, ) -> Result { - verbose_timing_phase!("Write object", file_id = object.file_id.as_u32()); + use crate::eh_frame::EhFrameEntryPrefix; + use object::read::macho::Nlist as _; + use object::read::macho::Section as MachOSection; + use std::mem::size_of; + use std::mem::size_of_val; + use zerocopy::FromBytes; + + let relocs = input_section + .relocations(le, obj.object.data) + .unwrap_or(&[]); + + const PREFIX_LEN: usize = size_of::(); + let mut input_pos = 0; + let mut output_pos = 0; + let mut cie_offset_map = std::collections::HashMap::new(); + + // First pass: determine which entries to keep and build a compacted copy. + while input_pos + PREFIX_LEN <= input_data.len() { + let prefix = + EhFrameEntryPrefix::read_from_bytes(&input_data[input_pos..input_pos + PREFIX_LEN]) + .unwrap(); + let size = size_of_val(&prefix.length) + prefix.length as usize; + let next_input = input_pos + size; + if next_input > input_data.len() { + break; + } + + let keep = if prefix.cie_id == 0 { + // CIE: always keep + cie_offset_map.insert(input_pos as u32, output_pos as u32); + true + } else { + // FDE: check if target function section is loaded + let mut loaded = false; + for reloc_raw in relocs { + let reloc = reloc_raw.info(le); + let r_off = reloc.r_address as usize; + if r_off >= input_pos && r_off < next_input { + let is_pc_begin = (r_off - input_pos) == crate::eh_frame::FDE_PC_BEGIN_OFFSET; + if is_pc_begin && reloc.r_extern { + let sym_idx = object::SymbolIndex(reloc.r_symbolnum as usize); + if let Ok(sym) = obj.object.symbols.symbol(sym_idx) { + let n_sect = sym.n_sect(); + if n_sect > 0 { + let sec_idx = n_sect as usize - 1; + loaded = obj + .section_resolutions + .get(sec_idx) + .and_then(|r| r.address()) + .is_some(); + } + } + } + } + } + loaded + }; + + if keep { + let dest = file_offset + output_pos; + if dest + size <= out.len() { + out[dest..dest + size].copy_from_slice(&input_data[input_pos..next_input]); + + // Rewrite CIE pointer in FDEs + if prefix.cie_id != 0 { + let cie_ptr_input = input_pos as u32 + 4; + let input_cie = cie_ptr_input.wrapping_sub(prefix.cie_id); + if let Some(&output_cie) = cie_offset_map.get(&input_cie) { + let new_ptr = output_pos as u32 + 4 - output_cie; + let p = dest + 4; + if p + 4 <= out.len() { + out[p..p + 4].copy_from_slice(&new_ptr.to_le_bytes()); + } + } + } + } + output_pos += size; + } + input_pos = next_input; + } + + // Zero remaining space + let remaining = file_offset + output_pos; + let end = file_offset + input_data.len(); + if remaining < end && end <= out.len() { + out[remaining..end].fill(0); + } - let _span = debug_span!("write_file", filename = %object.input).entered(); - let _file_span = layout.args().common().trace_span_for_file(object.file_id); - for sec in &object.sections { - match sec { - SectionSlot::Loaded(sec) => { - write_object_section::(object, layout, sec, buffers)?; + // Second pass: apply relocations to the compacted data. + // Build a mapping from input reloc offsets to output offsets. + // For simplicity, re-scan entries and apply relocs for kept entries. + input_pos = 0; + output_pos = 0; + let mut cie_map2 = std::collections::HashMap::new(); + + while input_pos + PREFIX_LEN <= input_data.len() { + let prefix = + EhFrameEntryPrefix::read_from_bytes(&input_data[input_pos..input_pos + PREFIX_LEN]) + .unwrap(); + let size = size_of_val(&prefix.length) + prefix.length as usize; + let next_input = input_pos + size; + if next_input > input_data.len() { + break; + } + + let keep = if prefix.cie_id == 0 { + cie_map2.insert(input_pos as u32, output_pos as u32); + true + } else { + let mut loaded = false; + for reloc_raw in relocs { + let reloc = reloc_raw.info(le); + let r_off = reloc.r_address as usize; + if r_off >= input_pos && r_off < next_input { + let is_pc = (r_off - input_pos) == crate::eh_frame::FDE_PC_BEGIN_OFFSET; + if is_pc && reloc.r_extern { + let sym_idx = object::SymbolIndex(reloc.r_symbolnum as usize); + if let Ok(sym) = obj.object.symbols.symbol(sym_idx) { + let n = sym.n_sect(); + if n > 0 { + loaded = obj + .section_resolutions + .get(n as usize - 1) + .and_then(|r| r.address()) + .is_some(); + } + } + } + } + } + loaded + }; + + if keep { + // Collect relocs for this entry and apply them at their output positions + let entry_relocs: Vec<_> = relocs + .iter() + .filter(|r| { + let off = r.info(le).r_address as usize; + off >= input_pos && off < next_input + }) + .collect(); + + // Create adjusted relocs with output-relative addresses + let adjusted: Vec> = entry_relocs + .iter() + .map(|r| { + let mut copy = **r; + let info = r.info(le); + let new_addr = info.r_address as usize - input_pos + output_pos; + // Reconstruct the raw relocation with adjusted address + // The address is in the first 3 bytes of the first u32 + let _r_word0 = copy.r_word0.get(le); + let new_word0 = new_addr as u32; + copy.r_word0.set(le, new_word0); + copy + }) + .collect(); + + if !adjusted.is_empty() { + apply_relocations( + out, + file_offset, + output_addr, + &adjusted, + obj, + layout, + le, + rebase_fixups, + bind_fixups, + imports, + has_extra_dylibs, + )?; } - _ => (), + output_pos += size; } + input_pos = next_input; } Ok(()) } -fn write_object_section<'data, A: Arch>( - object: &ObjectLayout<'data, MachO>, - layout: &MachOLayout<'data>, - section: &Section, - buffers: &mut OutputSectionPartMap<&mut [u8]>, +fn write_object_sections( + out: &mut [u8], + obj: &ObjectLayout<'_, MachO>, + layout: &Layout<'_, MachO>, + mappings: &[SegmentMapping], + le: object::Endianness, + rebase_fixups: &mut Vec, + bind_fixups: &mut Vec, + imports: &mut Vec, + has_extra_dylibs: bool, + mut write_ranges: Option<&mut Vec<(usize, usize, String)>>, ) -> Result { - write_section_raw(object, layout, section, buffers)?; - Ok(()) + use object::read::macho::Section as MachOSection; - // TODO: process relocations -} - -fn write_section_raw<'out, 'data>( - object: &ObjectLayout<'data, MachO>, - layout: &MachOLayout, - sec: &Section, - buffers: &'out mut OutputSectionPartMap<&mut [u8]>, -) -> Result<&'out mut [u8]> { - if layout - .output_sections - .has_data_in_file(sec.output_section_id()) - { - let section_buffer = buffers.get_mut(sec.output_part_id()); - let allocation_size = sec.capacity(&layout.output_sections) as usize; - if section_buffer.len() < allocation_size { - bail!( - "Insufficient space allocated to section `{}`. Tried to take {} bytes, but only {} remain", - object.object.section_display_name(sec.index), - allocation_size, - section_buffer.len() + // Verify that sections/section_resolutions/object.sections have same length. + if let Some(ref _ranges) = write_ranges { + let loaded = obj.sections.len(); + let resolutions = obj.section_resolutions.len(); + let input = obj.object.sections.len(); + if loaded != resolutions || loaded != input { + crate::bail!( + "validate: section count mismatch for {}: \ + loaded={loaded} resolutions={resolutions} input={input}", + obj.input ); } - let out = section_buffer.split_off_mut(..allocation_size).unwrap(); - let object_section = object.object.section(sec.index)?; + } - let section_size = object.object.section_size(object_section)?; - let (out, padding) = out.split_at_mut(section_size as usize); - object.object.copy_section_data(object_section, out)?; - padding.fill(0); - Ok(out) - } else { - Ok(&mut []) + for (sec_idx, _slot) in obj.sections.iter().enumerate() { + let section_res = &obj.section_resolutions[sec_idx]; + let Some(output_addr) = section_res.address() else { + continue; + }; + let Some(file_offset) = vm_addr_to_file_offset(output_addr, mappings) else { + continue; + }; + + let input_section = match obj.object.sections.get(sec_idx) { + Some(s) => s, + None => continue, + }; + + // Log __const section resolutions for debugging + if let Some(ref _ranges) = write_ranges { + use object::read::macho::Section as _; + let sectname = crate::macho::trim_nul(input_section.sectname()); + let segname = crate::macho::trim_nul(&input_section.segname); + if sectname == b"__const" { + let input_addr = input_section.addr(le); + let input_size = input_section.size(le); + let _ = std::fs::OpenOptions::new().create(true).append(true) + .open("/tmp/wild_const_debug.log") + .and_then(|mut f| { + use std::io::Write; + writeln!(f, "sec[{sec_idx}] {},{}: input={input_addr:#x}+{input_size:#x} → output={output_addr:#x} foff={file_offset:#x}", + String::from_utf8_lossy(segname), String::from_utf8_lossy(sectname)) + }); + } + } + + let sec_type = input_section.flags(le) & 0xFF; + if sec_type == 0x01 || sec_type == 0x0C || sec_type == 0x12 { + continue; + } + + let input_offset = input_section.offset(le) as usize; + let input_size = input_section.size(le) as usize; + if input_size == 0 || input_offset == 0 { + continue; + } + + let input_data = match obj.object.data.get(input_offset..input_offset + input_size) { + Some(d) => d, + None => continue, + }; + + // For __eh_frame: filter FDEs, only keeping those for loaded sections. + let sectname = crate::macho::trim_nul(input_section.sectname()); + if sectname == b"__eh_frame" { + write_filtered_eh_frame( + out, + file_offset, + output_addr, + input_data, + input_section, + obj, + layout, + le, + rebase_fixups, + bind_fixups, + imports, + has_extra_dylibs, + )?; + continue; + } + + if file_offset + input_size <= out.len() { + if let Some(ref mut ranges) = write_ranges { + let sectname = crate::macho::trim_nul(input_section.sectname()); + let segname = crate::macho::trim_nul(&input_section.segname); + ranges.push(( + file_offset, + input_size, + format!( + "{},{}", + String::from_utf8_lossy(segname), + String::from_utf8_lossy(sectname) + ), + )); + + // Invariant: verify round-trip — after copy, reading the first + // 8 bytes from the output at the resolved address must match + // the first 8 bytes of the input section data. If they differ, + // another section's data was already at that position. + if input_size >= 8 { + let expected = &input_data[..8]; + let actual = &out[file_offset..file_offset + 8]; + // Only check if the position was previously zero (fresh) + if actual != [0u8; 8] && actual != expected { + crate::bail!( + "validate: section {},{} at foff={file_offset:#x} — \ + output already has data {:02x?} but input starts with {:02x?}", + String::from_utf8_lossy(segname), + String::from_utf8_lossy(sectname), + actual, + expected + ); + } + } + } + out[file_offset..file_offset + input_size].copy_from_slice(input_data); + } + + if let Ok(relocs) = input_section.relocations(le, obj.object.data) { + apply_relocations( + out, + file_offset, + output_addr, + relocs, + obj, + layout, + le, + rebase_fixups, + bind_fixups, + imports, + has_extra_dylibs, + )?; + } } + Ok(()) } -fn write_entry_point_command>( - layout: &MachOLayout, - command: &mut EntryPointCommand, -) { - let SegmentSectionsInfo { segment_size, .. } = get_segment_sections(layout, SegmentType::Text); +/// Apply relocations for a section. +fn apply_relocations( + out: &mut [u8], + section_file_offset: usize, + section_vm_addr: u64, + relocs: &[object::macho::Relocation], + obj: &ObjectLayout<'_, MachO>, + layout: &Layout<'_, MachO>, + le: object::Endianness, + rebase_fixups: &mut Vec, + bind_fixups: &mut Vec, + imports: &mut Vec, + has_extra_dylibs: bool, +) -> Result { + let mut pending_addend: i64 = 0; + let mut pending_subtrahend: Option = None; + + for reloc_raw in relocs { + let reloc = reloc_raw.info(le); + + if reloc.r_type == 10 { + // ARM64_RELOC_ADDEND + pending_addend = reloc.r_symbolnum as i64; + continue; + } + if reloc.r_type == 1 { + // ARM64_RELOC_SUBTRACTOR (part of a pair) + // Store the subtrahend symbol address for the next UNSIGNED reloc. + let sub_addr = if reloc.r_extern { + let sym_idx = object::SymbolIndex(reloc.r_symbolnum as usize); + let sym_id = obj.symbol_id_range.input_to_id(sym_idx); + match layout.merged_symbol_resolution(sym_id) { + Some(r) if r.raw_value != 0 => r.raw_value, + _ => { + // Local temp label without a global resolution. + // Compute from section base + symbol offset. + use object::read::macho::Nlist as _; + let sym = obj.object.symbols.symbol(sym_idx).ok(); + if let Some(sym) = sym { + let n_sect = sym.n_sect(); + if n_sect > 0 { + let sec_idx = n_sect as usize - 1; + let sec_out = obj + .section_resolutions + .get(sec_idx) + .and_then(|r| r.address()) + .unwrap_or(0); + let sec_in = obj + .object + .sections + .get(sec_idx) + .map(|s| s.addr.get(le)) + .unwrap_or(0); + sec_out + sym.n_value(le).wrapping_sub(sec_in) + } else { + 0 + } + } else { + 0 + } + } + } + } else { + let sec_ord = reloc.r_symbolnum as usize; + if sec_ord > 0 { + obj.section_resolutions + .get(sec_ord - 1) + .and_then(|r| r.address()) + .unwrap_or(0) + } else { + 0 + } + }; + pending_subtrahend = Some(sub_addr); + continue; + } + + let addend = pending_addend; + pending_addend = 0; + + let patch_file_offset = section_file_offset + reloc.r_address as usize; + let pc_addr = section_vm_addr + reloc.r_address as u64; + if patch_file_offset + 4 > out.len() { + continue; + } - command.cmd.set(LE, LC_MAIN); - command - .cmdsize - .set(LE, size_of::() as u32); - command.entryoff.set(LE, segment_size.file_offset as u64); - command.stacksize.set(LE, 0); + let (target_addr, got_addr, plt_addr) = if reloc.r_extern { + let sym_idx = object::SymbolIndex(reloc.r_symbolnum as usize); + let sym_id = obj.symbol_id_range.input_to_id(sym_idx); + match layout.merged_symbol_resolution(sym_id) { + Some(res) if res.raw_value != 0 || res.format_specific.plt_address.is_some() => ( + res.raw_value, + res.format_specific.got_address, + res.format_specific.plt_address, + ), + other => { + // Symbol has no global resolution (or raw_value=0). + // Try computing from section base + symbol offset + // (handles local labels like GCC_except_table*, ltmp*). + use object::read::macho::Nlist as _; + let fallback = obj.object.symbols.symbol(sym_idx).ok().and_then(|sym| { + let n_sect = sym.n_sect(); + if n_sect == 0 { + // Symbol is undefined (no section). Check if it has a name + // that looks like a TLS init symbol. + return None; + } + let sec_idx = n_sect as usize - 1; + // Try section_resolutions first. + let sec_res_addr = obj + .section_resolutions + .get(sec_idx) + .and_then(|r| r.address()); + if let Some(sec_out) = sec_res_addr { + let sec_in = + obj.object.sections.get(sec_idx).map(|s| s.addr.get(le))?; + let result = sec_out + sym.n_value(le).wrapping_sub(sec_in); + let name = sym.name(le, obj.object.symbols.strings()).unwrap_or(b""); + // For TLS init symbols ($tlv$init), compute a TLS-block- + // relative offset instead of an absolute address. The TLV + // descriptor offset field is read by dyld as an offset into + // the thread-local storage template (tdata + tbss). + if name.ends_with(b"$tlv$init") { + let tdata = layout.section_layouts.get(output_section_id::TDATA); + let tdata_start = tdata.mem_offset; + let tbss = layout.section_layouts.get(output_section_id::TBSS); + use object::read::macho::Section as _; + let sec_type = obj + .object + .sections + .get(sec_idx) + .map(|s| s.flags(le) & 0xFF) + .unwrap_or(0); + let tls_offset = if sec_type == 0x12 { + // S_THREAD_LOCAL_ZEROFILL: offset = tdata_size + offset_in_tbss + let tbss_start = tbss.mem_offset; + tdata.mem_size + (result - tbss_start) + } else { + // S_THREAD_LOCAL_REGULAR: offset = offset_in_tdata + result - tdata_start + }; + return Some(tls_offset); + } + return Some(result); + } + // Section resolution missing — fall back to TDATA/TBSS for TLS. + use object::read::macho::Section as _; + let sec_type = obj + .object + .sections + .get(sec_idx) + .map(|s| s.flags(le) & 0xFF)?; + let sec_in = obj.object.sections.get(sec_idx).map(|s| s.addr.get(le))?; + let sym_offset = sym.n_value(le).wrapping_sub(sec_in); + let tdata = layout.section_layouts.get(output_section_id::TDATA); + let tbss = layout.section_layouts.get(output_section_id::TBSS); + match sec_type { + 0x11 if tdata.mem_size > 0 => { + tracing::warn!( + "TLS fallback: tdata + {sym_offset:#x} -> {:#x}", + tdata.mem_offset + sym_offset + ); + Some(tdata.mem_offset + sym_offset) + } + 0x12 if tbss.mem_size > 0 => { + tracing::warn!( + "TLS fallback: tbss + {sym_offset:#x} -> {:#x}", + tbss.mem_offset + sym_offset + ); + Some(tbss.mem_offset + sym_offset) + } + _ => { + tracing::warn!("TLS fallback MISS: sec_type={sec_type:#x}"); + None + } + } + }); + if let Some(addr) = fallback { + let got = other.and_then(|r| r.format_specific.got_address); + let plt = other.and_then(|r| r.format_specific.plt_address); + (addr, got, plt) + } else if let Some(res) = other { + ( + res.raw_value, + res.format_specific.got_address, + res.format_specific.plt_address, + ) + } else { + continue; + } + } + } + } else { + // Non-extern: r_symbolnum is 1-based section ordinal. + // target = output_section_address + addend + let sec_ord = reloc.r_symbolnum as usize; + if sec_ord == 0 { + continue; + } + let sec_idx = sec_ord - 1; + let output_sec_addr = obj + .section_resolutions + .get(sec_idx) + .and_then(|r| r.address()); + if let Some(addr) = output_sec_addr { + (addr, None, None) + } else { + // Section resolution missing. For TLS sections (__thread_data, + // __thread_bss), fall back to the TDATA/TBSS output section layout. + // Read the in-place value to get the symbol's offset within the + // input section, then compute the output address. + use object::read::macho::Section as _; + let input_sec = obj.object.sections.get(sec_idx); + let sec_type = input_sec.map(|s| s.flags(le) & 0xFF).unwrap_or(0); + let input_sec_base = input_sec.map(|s| s.addr.get(le)).unwrap_or(0); + let tdata = layout.section_layouts.get(output_section_id::TDATA); + let tbss = layout.section_layouts.get(output_section_id::TBSS); + match sec_type { + 0x11 if tdata.mem_size > 0 => { + // Read in-place addend: absolute input address at reloc position + let in_place = if patch_file_offset + 8 <= out.len() { + u64::from_le_bytes( + out[patch_file_offset..patch_file_offset + 8] + .try_into() + .unwrap_or([0; 8]), + ) + } else { + 0 + }; + let sym_offset = in_place.wrapping_sub(input_sec_base); + (tdata.mem_offset + sym_offset, None, None) + } + 0x12 if tbss.mem_size > 0 => { + let in_place = if patch_file_offset + 8 <= out.len() { + u64::from_le_bytes( + out[patch_file_offset..patch_file_offset + 8] + .try_into() + .unwrap_or([0; 8]), + ) + } else { + 0 + }; + let sym_offset = in_place.wrapping_sub(input_sec_base); + (tbss.mem_offset + sym_offset, None, None) + } + _ => continue, + } + } + }; + + let target_addr = (target_addr as i64 + addend) as u64; + + match reloc.r_type { + 2 => { + // ARM64_RELOC_BRANCH26 + let branch_target = plt_addr.unwrap_or(target_addr); + let offset = branch_target.wrapping_sub(pc_addr) as i64; + let imm26 = ((offset >> 2) & 0x03FF_FFFF) as u32; + let insn = read_u32(out, patch_file_offset); + write_u32_at(out, patch_file_offset, (insn & 0xFC00_0000) | imm26); + } + 3 => { + write_adrp(out, patch_file_offset, pc_addr, target_addr); + } + 4 => { + write_pageoff12(out, patch_file_offset, target_addr); + } + 5 => { + // ARM64_RELOC_GOT_LOAD_PAGE21 + if let Some(got) = got_addr { + write_adrp(out, patch_file_offset, pc_addr, got); + } else { + write_adrp(out, patch_file_offset, pc_addr, target_addr); + } + } + 6 => { + // ARM64_RELOC_GOT_LOAD_PAGEOFF12 + if let Some(got) = got_addr { + let page_off = (got & 0xFFF) as u32; + let insn = read_u32(out, patch_file_offset); + let imm12 = (page_off >> 3) & 0xFFF; + write_u32_at(out, patch_file_offset, (insn & 0xFFC0_03FF) | (imm12 << 10)); + } else { + let page_off = (target_addr & 0xFFF) as u32; + let insn = read_u32(out, patch_file_offset); + let rd = insn & 0x1F; + let rn = (insn >> 5) & 0x1F; + write_u32_at( + out, + patch_file_offset, + 0x9100_0000 | (page_off << 10) | (rn << 5) | rd, + ); + } + } + 8 => { + write_adrp(out, patch_file_offset, pc_addr, target_addr); + } + 9 => { + // ARM64_RELOC_TLVP_LOAD_PAGEOFF12 -> relax to ADD + let page_off = (target_addr & 0xFFF) as u32; + let insn = read_u32(out, patch_file_offset); + let rd = insn & 0x1F; + let rn = (insn >> 5) & 0x1F; + write_u32_at( + out, + patch_file_offset, + 0x9100_0000 | (page_off << 10) | (rn << 5) | rd, + ); + } + 0 if reloc.r_length == 3 => { + // ARM64_RELOC_UNSIGNED 64-bit. + // If preceded by a SUBTRACTOR, compute difference: + // result = target_addr - subtrahend + existing_content + if let Some(sub_addr) = pending_subtrahend.take() { + if patch_file_offset + 8 <= out.len() { + // SUBTRACTOR+UNSIGNED encodes a pcrel difference (e.g. FDE pc_begin, + // LSDA pointer). Always use the direct symbol address, never the GOT + // address — GOT indirection is expressed via POINTER_TO_GOT (type 7). + let existing = i64::from_le_bytes( + out[patch_file_offset..patch_file_offset + 8] + .try_into() + .unwrap(), + ); + let val = target_addr as i64 - sub_addr as i64 + existing; + out[patch_file_offset..patch_file_offset + 8] + .copy_from_slice(&val.to_le_bytes()); + } + } else if patch_file_offset + 8 <= out.len() { + if reloc.r_extern && target_addr == 0 { + // Extern undefined symbol (e.g. _tlv_bootstrap): bind fixup + let sym_idx = object::SymbolIndex(reloc.r_symbolnum as usize); + let sym_id = obj.symbol_id_range.input_to_id(sym_idx); + let name = match layout.symbol_db.symbol_name(sym_id) { + Ok(n) => n.bytes().to_vec(), + Err(_) => b"".to_vec(), + }; + let import_index = imports.len() as u32; + imports.push(ImportEntry { + name, + lib_ordinal: lib_ordinal_for_symbol(has_extra_dylibs), + weak_import: false, + }); + bind_fixups.push(BindFixup { + file_offset: patch_file_offset, + import_index, + }); + } else { + // Check if target is in TLS data — write offset, not rebase + let tdata = layout.section_layouts.get(output_section_id::TDATA); + let tbss = layout.section_layouts.get(output_section_id::TBSS); + let in_tdata = tdata.mem_size > 0 + && target_addr >= tdata.mem_offset + && target_addr < tdata.mem_offset + tdata.mem_size; + let in_tbss = tbss.mem_size > 0 + && target_addr >= tbss.mem_offset + && target_addr < tbss.mem_offset + tbss.mem_size; + if in_tdata || in_tbss { + let tls_init_start = tdata.mem_offset; + let tls_init_size = tdata.mem_size; + let tls_offset = if in_tbss { + tls_init_size + target_addr.saturating_sub(tbss.mem_offset) + } else { + target_addr.saturating_sub(tls_init_start) + }; + out[patch_file_offset..patch_file_offset + 8] + .copy_from_slice(&tls_offset.to_le_bytes()); + } else { + rebase_fixups.push(RebaseFixup { + file_offset: patch_file_offset, + target: target_addr, + }); + } + } + } + } + 7 if reloc.r_length == 2 && reloc.r_pcrel => { + // ARM64_RELOC_POINTER_TO_GOT + if let Some(got) = got_addr { + let delta = (got as i64 - pc_addr as i64) as i32; + if patch_file_offset + 4 <= out.len() { + out[patch_file_offset..patch_file_offset + 4] + .copy_from_slice(&delta.to_le_bytes()); + } + } else { + let delta = (target_addr as i64 - pc_addr as i64) as i32; + if patch_file_offset + 4 <= out.len() { + out[patch_file_offset..patch_file_offset + 4] + .copy_from_slice(&delta.to_le_bytes()); + } + } + } + _ => {} + } + } + Ok(()) +} + +/// Write full chained fixups header with imports and symbol names. +fn write_chained_fixups_header( + out: &mut [u8], + cf_offset: usize, + all_fixups: &[(usize, u64)], + n_imports: u32, + import_name_offsets: &[u32], + import_ordinals: &[u8], + import_weak: &[bool], + symbols_pool: &[u8], + mappings: &[SegmentMapping], + is_dylib: bool, +) -> Result { + let has_data = mappings.len() > 1 && (mappings[1].vm_end > mappings[1].vm_start); + let base_segs = if is_dylib { 2u32 } else { 3u32 }; + let seg_count = if has_data { base_segs + 1 } else { base_segs }; + let data_seg_idx: usize = if is_dylib { 1 } else { 2 }; + let starts_offset: u32 = 32; + let starts_in_image_size = 4 + 4 * seg_count as usize; + + let (data_seg_file_offset, page_count) = if mappings.len() > 1 { + let m = &mappings[1]; + let mem_size = m.vm_end - m.vm_start; + ( + m.file_offset, + ((mem_size + PAGE_SIZE - 1) / PAGE_SIZE) as u16, + ) + } else { + (0, 0) + }; + + let seg_starts_size = 22 + 2 * page_count as usize; + let seg_starts_offset_in_image = starts_in_image_size as u32; + + let imports_table_offset = starts_offset + starts_in_image_size as u32 + seg_starts_size as u32; + let imports_size = 4 * n_imports; + let symbols_offset = imports_table_offset + imports_size; + + let w = &mut out[cf_offset..]; + + w[0..4].copy_from_slice(&0u32.to_le_bytes()); + w[4..8].copy_from_slice(&starts_offset.to_le_bytes()); + w[8..12].copy_from_slice(&imports_table_offset.to_le_bytes()); + w[12..16].copy_from_slice(&symbols_offset.to_le_bytes()); + w[16..20].copy_from_slice(&n_imports.to_le_bytes()); + w[20..24].copy_from_slice(&1u32.to_le_bytes()); + w[24..28].copy_from_slice(&0u32.to_le_bytes()); + + let si = starts_offset as usize; + w[si..si + 4].copy_from_slice(&seg_count.to_le_bytes()); + for seg in 0..seg_count as usize { + let off: u32 = if seg == data_seg_idx { + seg_starts_offset_in_image + } else { + 0 + }; + w[si + 4 + seg * 4..si + 4 + seg * 4 + 4].copy_from_slice(&off.to_le_bytes()); + } + + let ss = si + seg_starts_offset_in_image as usize; + w[ss..ss + 4].copy_from_slice(&(seg_starts_size as u32).to_le_bytes()); + w[ss + 4..ss + 6].copy_from_slice(&(PAGE_SIZE as u16).to_le_bytes()); + w[ss + 6..ss + 8].copy_from_slice(&6u16.to_le_bytes()); + let image_base = if mappings + .first() + .map_or(false, |m| m.vm_start >= PAGEZERO_SIZE) + { + PAGEZERO_SIZE + } else { + 0 + }; + let seg_offset_val: u64 = if mappings.len() > 1 { + mappings[1].vm_start.wrapping_sub(image_base) + } else { + 0 + }; + w[ss + 8..ss + 16].copy_from_slice(&seg_offset_val.to_le_bytes()); + w[ss + 16..ss + 20].copy_from_slice(&0u32.to_le_bytes()); + w[ss + 20..ss + 22].copy_from_slice(&page_count.to_le_bytes()); + + let mut page_starts = vec![0xFFFFu16; page_count as usize]; + for &(file_off, _) in all_fixups { + if data_seg_file_offset == 0 || (file_off as u64) < data_seg_file_offset { + continue; + } + let offset_in_seg = file_off as u64 - data_seg_file_offset; + let page_idx = (offset_in_seg / PAGE_SIZE) as usize; + let offset_in_page = (offset_in_seg % PAGE_SIZE) as u16; + if page_idx < page_starts.len() && page_starts[page_idx] == 0xFFFF { + page_starts[page_idx] = offset_in_page; + } + } + for (p, &ps) in page_starts.iter().enumerate() { + w[ss + 22 + p * 2..ss + 22 + p * 2 + 2].copy_from_slice(&ps.to_le_bytes()); + } + + let it = imports_table_offset as usize; + for (i, &name_off) in import_name_offsets.iter().enumerate() { + let ordinal = import_ordinals[i] as u32; + let weak_bit = if import_weak.get(i).copied().unwrap_or(false) { + 1u32 << 8 + } else { + 0 + }; + let import_val: u32 = ordinal | weak_bit | ((name_off & 0x7F_FFFF) << 9); + w[it + i * 4..it + i * 4 + 4].copy_from_slice(&import_val.to_le_bytes()); + } + + let sp = symbols_offset as usize; + if sp + symbols_pool.len() <= w.len() { + w[sp..sp + symbols_pool.len()].copy_from_slice(symbols_pool); + } + + Ok(()) +} + +struct SegmentMapping { + vm_start: u64, + vm_end: u64, + file_offset: u64, +} + +fn vm_addr_to_file_offset(vm_addr: u64, mappings: &[SegmentMapping]) -> Option { + for m in mappings { + if vm_addr >= m.vm_start && vm_addr < m.vm_end { + return Some((m.file_offset + (vm_addr - m.vm_start)) as usize); + } + } + None +} + +fn write_adrp(out: &mut [u8], offset: usize, pc: u64, target: u64) { + let page_off = (target & !0xFFF).wrapping_sub(pc & !0xFFF) as i64; + let imm = (page_off >> 12) as u32; + let insn = read_u32(out, offset); + write_u32_at( + out, + offset, + (insn & 0x9F00_001F) | ((imm & 0x1F_FFFC) << 3) | ((imm & 0x3) << 29), + ); +} + +fn write_pageoff12(out: &mut [u8], offset: usize, target: u64) { + let page_off = (target & 0xFFF) as u32; + let insn = read_u32(out, offset); + // Determine the access size shift for scaled load/store instructions. + // For integer LDR/STR: bits 31:30 encode the size directly. + // For SIMD/FP LDR/STR (V bit = bit 26): size depends on both + // bits 31:30 and opc bits 23:22. + let shift = if (insn & 0x3B00_0000) == 0x3900_0000 { + let size = (insn >> 30) & 0x3; + let v = (insn >> 26) & 1; + let opc = (insn >> 22) & 0x3; + if v == 1 && opc == 3 && size == 0 { + 4 // 128-bit SIMD (Q register): scale by 16 = 2^4 + } else { + size + } + } else { + 0 + }; + let imm12 = (page_off >> shift) & 0xFFF; + write_u32_at(out, offset, (insn & 0xFFC0_03FF) | (imm12 << 10)); +} + +// ── Compact unwind / __unwind_info generation ────────────────────────────── + +/// A per-function compact unwind entry collected from `__LD,__compact_unwind`. +struct CollectedUnwindEntry { + /// Output VM address of the function. + func_addr: u64, + /// Function size in bytes. + func_size: u32, + /// Compact unwind encoding (ARM64 mode + register mask). + encoding: u32, + /// Personality function GOT address (if any). + personality_got: Option, + /// LSDA VM address (if any). + lsda_addr: Option, +} + +/// Scan all input objects for `__LD,__compact_unwind` sections and collect +/// frame-pointer entries that can be represented directly in `__unwind_info`. +/// Personality entries are handled separately by scanning output `__eh_frame`. +fn collect_compact_unwind_entries(layout: &Layout<'_, MachO>) -> Vec { + use object::read::macho::MachHeader as _; + use object::read::macho::Section as _; + use object::read::macho::Segment as _; + let le = object::Endianness::Little; + let mut entries: Vec = Vec::new(); + + let mut n_objects = 0usize; + let mut n_cu_entries = 0usize; + for group in &layout.group_layouts { + for file_layout in &group.files { + let FileLayout::Object(obj) = file_layout else { + continue; + }; + let _ = n_objects; // suppress unused warning + n_objects += 1; + // Parse raw load commands to reach __LD segment (not in obj.object.sections). + let Ok(header) = + object::macho::MachHeader64::::parse(obj.object.data, 0) + else { + continue; + }; + // Mach-O object files have a single unnamed LC_SEGMENT_64 containing + // ALL sections. Each section has its own segname field. Iterate all + // sections of the single segment to find __LD,__compact_unwind. + let Ok(mut cmds) = header.load_commands(le, obj.object.data, 0) else { + continue; + }; + while let Ok(Some(cmd)) = cmds.next() { + let Ok(Some((seg, seg_data))) = cmd.segment_64() else { + continue; + }; + let Ok(sections) = seg.sections(le, seg_data) else { + continue; + }; + for sec in sections { + let sec_segname = crate::macho::trim_nul(&sec.segname); + let sectname = crate::macho::trim_nul(&sec.sectname); + if sec_segname != b"__LD" || sectname != b"__compact_unwind" { + continue; + } + n_cu_entries += 1; + let sec_off = sec.offset.get(le) as usize; + let sec_size = sec.size.get(le) as usize; + if sec_size == 0 || sec_off == 0 { + continue; + } + let Some(data) = obj.object.data.get(sec_off..sec_off + sec_size) else { + continue; + }; + let relocs = sec.relocations(le, obj.object.data).unwrap_or(&[]); + let n = sec_size / 32; + for i in 0..n { + let base = i * 32; + if base + 32 > data.len() { + break; + } + let func_size = + u32::from_le_bytes(data[base + 8..base + 12].try_into().unwrap()); + let encoding = + u32::from_le_bytes(data[base + 12..base + 16].try_into().unwrap()); + if encoding == 0 { + continue; // no unwind info needed + } + // DWARF mode → handled via __eh_frame FDE scan, skip here. + if (encoding & 0x0F00_0000) == 0x0300_0000 { + continue; + } + let Some(func_addr) = + resolve_compact_unwind_addr(obj, layout, le, relocs, base, data) + else { + continue; + }; + // Extract personality GOT addr (offset 16) and LSDA addr (offset 24) + let personality_got = + resolve_compact_unwind_got_addr(obj, layout, le, relocs, base + 16); + let lsda_addr = + resolve_compact_unwind_addr(obj, layout, le, relocs, base + 24, data) + .and_then(|addr| if addr != 0 { Some(addr) } else { None }); + entries.push(CollectedUnwindEntry { + func_addr, + func_size, + encoding, + personality_got, + lsda_addr, + }); + } + } + } + } + } + + tracing::debug!( + "compact_unwind: {} raw entries, {} plain", + n_cu_entries, + entries.len() + ); + entries.sort_by_key(|e| e.func_addr); + entries.dedup_by_key(|e| e.func_addr); + entries +} + +/// Resolve the VM address stored at `field_offset` within a compact-unwind entry. +/// `field_offset` is the absolute byte offset within the `__compact_unwind` section data. +/// `sec_data` is the raw section bytes (used to read the implicit 8-byte addend for +/// non-extern / section-relative relocations). +fn resolve_compact_unwind_addr( + obj: &ObjectLayout<'_, MachO>, + layout: &Layout<'_, MachO>, + le: object::Endianness, + relocs: &[object::macho::Relocation], + field_offset: usize, + sec_data: &[u8], +) -> Option { + use object::read::macho::Nlist as _; + for r in relocs { + let reloc = r.info(le); + if reloc.r_address as usize != field_offset { + continue; + } + if reloc.r_extern { + let sym_idx = object::SymbolIndex(reloc.r_symbolnum as usize); + let sym_id = obj.symbol_id_range.input_to_id(sym_idx); + if let Some(res) = layout.merged_symbol_resolution(sym_id) { + if res.raw_value != 0 { + return Some(res.raw_value); + } + } + // Fallback: local symbol (compute from section base + symbol value). + let sym = obj.object.symbols.symbol(sym_idx).ok()?; + let n_sect = sym.n_sect(); + if n_sect == 0 { + return None; + } + let sec_idx = n_sect as usize - 1; + let sec_out = obj.section_resolutions.get(sec_idx)?.address()?; + let sec_in = obj.object.sections.get(sec_idx).map(|s| s.addr.get(le))?; + return Some(sec_out + sym.n_value(le).wrapping_sub(sec_in)); + } else { + // Non-extern (section-relative): r_symbolnum is 1-based section ordinal. + let sec_ord = reloc.r_symbolnum as usize; + if sec_ord == 0 { + return None; + } + let sec_idx = sec_ord - 1; + let sec_out = obj.section_resolutions.get(sec_idx)?.address()?; + let sec_in = obj.object.sections.get(sec_idx).map(|s| s.addr.get(le))?; + // Read the 8-byte implicit addend from the field. + let addend = u64::from_le_bytes( + sec_data + .get(field_offset..field_offset + 8)? + .try_into() + .ok()?, + ); + return Some(sec_out + addend.wrapping_sub(sec_in)); + } + } + None +} + +/// Like resolve_compact_unwind_addr, but returns the GOT address for the symbol +/// (needed for personality pointers in __unwind_info). +fn resolve_compact_unwind_got_addr( + obj: &ObjectLayout<'_, MachO>, + layout: &Layout<'_, MachO>, + le: object::Endianness, + relocs: &[object::macho::Relocation], + field_offset: usize, +) -> Option { + for r in relocs { + let reloc = r.info(le); + if reloc.r_address as usize != field_offset { + continue; + } + if reloc.r_extern { + let sym_idx = object::SymbolIndex(reloc.r_symbolnum as usize); + let sym_id = obj.symbol_id_range.input_to_id(sym_idx); + if let Some(res) = layout.merged_symbol_resolution(sym_id) { + if let Some(got) = res.format_specific.got_address { + return Some(got); + } + if res.raw_value != 0 { + return Some(res.raw_value); + } + } + } + break; + } + None +} + +/// Build the binary content of the `__unwind_info` section from collected entries. +/// `text_base` is the VM address of the start of the `__TEXT` segment. +/// +/// Produces a version-1 unwind_info with regular second-level pages (kind=2). +/// Info extracted from a `__eh_frame` CIE augmentation string. +#[derive(Default, Clone)] +struct CieAugInfo { + /// Whether the CIE has a personality function ('P' in augstr). + has_personality: bool, + /// VM address of the GOT slot for the personality function, or 0. + pers_got_vm: u64, + /// Whether FDEs referencing this CIE carry an LSDA pointer ('L' in augstr). + has_lsda: bool, + /// Size of the FDE pc_begin / pc_range fields in bytes (from 'R' enc; 0 = unknown/8). + fde_ptr_size: u8, + /// Size of the LSDA pointer in FDE augmentation data (from 'L' enc; 0 = unknown/8). + lsda_ptr_size: u8, +} + +/// Per-FDE info extracted from the output `__eh_frame` buffer. +pub(crate) struct EhFrameFdeInfo { + /// Byte offset of the FDE within the `__eh_frame` section. + pub section_offset: u32, + /// VM address of the LSDA for this function, or 0. + pub lsda_vm: u64, + /// VM address of the GOT slot for the personality function, or 0. + pub pers_got_vm: u64, +} + +/// Read a ULEB128 value from `data` at `pos`, advancing `pos`. +fn read_uleb128(data: &[u8], pos: &mut usize) -> u64 { + let mut val = 0u64; + let mut shift = 0; + while *pos < data.len() { + let b = data[*pos]; + *pos += 1; + val |= ((b & 0x7F) as u64) << shift; + shift += 7; + if b & 0x80 == 0 { + break; + } + } + val +} + +/// Determine the byte size of an encoded pointer value from a DW_EH_PE encoding byte. +/// Returns 4 or 8; defaults to 8 (pointer-sized) for unknown formats. +fn eh_ptr_size(enc: u8) -> u8 { + match enc & 0x0F { + 0x00 => 8, // DW_EH_PE_absptr (pointer-sized = 8 on 64-bit) + 0x02 => 2, + 0x03 => 4, // DW_EH_PE_udata4 + 0x04 => 8, // DW_EH_PE_udata8 + 0x09 => 2, + 0x0A => 4, + 0x0B => 4, // DW_EH_PE_sdata4 + 0x0C => 8, // DW_EH_PE_sdata8 + _ => 8, + } +} + +/// Read a PC-relative signed value of `size` bytes from `data` at `pos`, +/// apply it relative to `field_vm_addr`, and return the target VM address. +fn read_pcrel(data: &[u8], pos: usize, size: usize, field_vm_addr: u64) -> u64 { + let bytes = match data.get(pos..pos + size) { + Some(b) => b, + None => return 0, + }; + let delta = match size { + 4 => i32::from_le_bytes(bytes.try_into().unwrap_or([0; 4])) as i64, + 8 => i64::from_le_bytes(bytes.try_into().unwrap_or([0; 8])), + _ => return 0, + }; + (field_vm_addr as i64 + delta) as u64 +} + +/// Parse a CIE at section offset `cie_pos` and return its augmentation info. +fn parse_cie_aug(data: &[u8], cie_pos: usize, eh_frame_vm_addr: u64) -> CieAugInfo { + let mut info = CieAugInfo::default(); + // Skip: length(4) + cie_id(4) + version(1) = 9 bytes. + let mut pos = cie_pos + 9; + // Find augmentation string (null-terminated). + let aug_start = pos; + while pos < data.len() && data[pos] != 0 { + pos += 1; + } + if pos >= data.len() { + return info; + } + let aug_bytes = &data[aug_start..pos]; + pos += 1; // skip null terminator + + let has_z = aug_bytes.contains(&b'z'); + let has_p = aug_bytes.contains(&b'P'); + let has_l = aug_bytes.contains(&b'L'); + let has_r = aug_bytes.contains(&b'R'); + info.has_lsda = has_l; + + // Skip code_alignment (ULEB128), data_alignment (SLEB128), ra_register (ULEB128). + read_uleb128(data, &mut pos); // code_alignment + // SLEB128 (just skip as if ULEB128 since we only care about the byte count) + loop { + if pos >= data.len() { + return info; + } + let b = data[pos]; + pos += 1; + if b & 0x80 == 0 { + break; + } + } + read_uleb128(data, &mut pos); // ra_register + + if !has_z { + return info; + } + let aug_data_len = read_uleb128(data, &mut pos) as usize; + let aug_data_start = pos; + + // Augmentation data contains per-letter info in augstr order (skipping 'z'). + let mut ap = aug_data_start; + for &ch in aug_bytes { + if ap >= aug_data_start + aug_data_len { + break; + } + match ch { + b'P' if has_p => { + let pers_enc = data[ap]; + ap += 1; + let sz = eh_ptr_size(pers_enc) as usize; + if ap + sz <= data.len() { + // Personality ptr is PC-relative from the field address. + let field_vm = eh_frame_vm_addr + ap as u64; + let target_vm = read_pcrel(data, ap, sz, field_vm); + if target_vm != 0 { + info.has_personality = true; + info.pers_got_vm = target_vm; + } + } + ap += sz; + } + b'L' if has_l => { + let lsda_enc = data[ap]; + ap += 1; + info.lsda_ptr_size = eh_ptr_size(lsda_enc); + } + b'R' if has_r => { + let fde_enc = data[ap]; + ap += 1; + info.fde_ptr_size = eh_ptr_size(fde_enc); + } + _ => {} + } + } + + // Default pointer size = 8 for 64-bit Mach-O. + if info.fde_ptr_size == 0 { + info.fde_ptr_size = 8; + } + if info.lsda_ptr_size == 0 { + info.lsda_ptr_size = 8; + } + info +} + +/// Scan the output `__eh_frame` buffer. +/// Returns a map: `func_vm_addr → EhFrameFdeInfo` for every FDE found. +/// FDEs without personality have `pers_got_vm = 0`. +fn scan_eh_frame_fde_offsets( + buf: &[u8], + eh_frame_vm_addr: u64, + eh_frame_file_offset: usize, + eh_frame_size: usize, +) -> std::collections::HashMap { + use crate::eh_frame::EhFrameEntryPrefix; + use std::mem::size_of; + use zerocopy::FromBytes; + + let mut map = std::collections::HashMap::new(); + // CIE map: section_offset → CieAugInfo + let mut cie_map: std::collections::HashMap = Default::default(); + + let Some(data) = buf.get(eh_frame_file_offset..eh_frame_file_offset + eh_frame_size) else { + return map; + }; + + const PREFIX_LEN: usize = size_of::(); + let mut pos = 0usize; + + while pos + PREFIX_LEN <= data.len() { + let Ok(prefix) = EhFrameEntryPrefix::read_from_bytes(&data[pos..pos + PREFIX_LEN]) else { + break; + }; + if prefix.length == 0 { + break; + } + let size = 4 + prefix.length as usize; + if pos + size > data.len() { + break; + } + + if prefix.cie_id == 0 { + // CIE: parse augmentation. + let cie_aug = parse_cie_aug(data, pos, eh_frame_vm_addr); + cie_map.insert(pos as u32, cie_aug); + } else { + // FDE: resolve CIE, then extract pc_begin, LSDA. + // cie_id = byte distance from the cie_ptr field to the CIE. + let cie_ptr_field_off = pos + 4; + let cie_off = (cie_ptr_field_off as u64).wrapping_sub(prefix.cie_id as u64) as u32; + let cie_aug = cie_map.get(&cie_off).cloned().unwrap_or_default(); + let ptr_size = cie_aug.fde_ptr_size.max(4) as usize; + + // pc_begin at byte 8, PC-relative signed value of ptr_size bytes. + let pc_begin_field_vm = eh_frame_vm_addr + pos as u64 + 8; + let func_vm = read_pcrel(data, pos + 8, ptr_size, pc_begin_field_vm); + if func_vm == 0 { + pos += size; + continue; + } + + // pc_range at byte 8+ptr_size (absolute, not PC-relative). + // Skip it (we don't use pc_range for __unwind_info). + + // aug_data_length at byte 8 + 2*ptr_size. + let aug_len_pos = pos + 8 + 2 * ptr_size; + let mut ap = aug_len_pos; + let aug_len = read_uleb128(data, &mut ap) as usize; + + // LSDA pointer at start of aug_data (if CIE has 'L'). + let lsda_vm = if cie_aug.has_lsda + && cie_aug.lsda_ptr_size > 0 + && ap + cie_aug.lsda_ptr_size as usize <= data.len() + { + let lsda_field_vm = eh_frame_vm_addr + ap as u64; + read_pcrel(data, ap, cie_aug.lsda_ptr_size as usize, lsda_field_vm) + } else { + 0 + }; + let _ = aug_len; + + map.insert( + func_vm, + EhFrameFdeInfo { + section_offset: pos as u32, + lsda_vm, + pers_got_vm: cie_aug.pers_got_vm, + }, + ); + } + + pos += size; + } + + map +} + +/// Build the binary content of `__unwind_info` from collected compact-unwind entries +/// and FDE info from the output `__eh_frame`. +/// +/// `plain_entries`: ARM64 frame-pointer entries (from __compact_unwind). +/// `fde_map`: func_vm_addr → EhFrameFdeInfo (from scanning output __eh_frame). +/// `text_base`: VM address of the start of `__TEXT`. +/// +/// For each FDE with a personality function, emits a DWARF-mode entry +/// (`UNWIND_HAS_LSDA | pers_idx | UNWIND_ARM64_DWARF | fde_section_offset`). +/// Plain frame-pointer entries are also included. +fn build_unwind_info_section( + plain_entries: &[CollectedUnwindEntry], + fde_map: &std::collections::HashMap, + text_base: u64, + max_bytes: u64, +) -> Vec { + // ARM64 compact-unwind encoding constants. + const UNWIND_ARM64_DWARF: u32 = 0x0300_0000; + + // Build: (func_addr, func_size, encoding) sorted by func_addr. + let mut all_entries: Vec<(u64, u32, u32)> = Vec::new(); + + // Collect unique personality GOT slots (in encounter order). + let mut personalities: Vec = Vec::new(); + + // Emit DWARF-mode entries for FDEs that have a personality function. + // Each such FDE needs an __unwind_info entry so the unwinder can find it. + // + // For DWARF-mode entries the unwinder reads the LSDA from the FDE + // augmentation data in __eh_frame, NOT from the __unwind_info LSDA array. + // So we omit UNWIND_HAS_LSDA and the LSDA array to save space. + for (&func_vm, fde_info) in fde_map { + if fde_info.pers_got_vm == 0 { + continue; + } // no personality → skip + + // Personality index (1-based into the personality array we build). + let pers_idx = if let Some(pos) = personalities + .iter() + .position(|&g| g == fde_info.pers_got_vm) + { + pos + 1 + } else { + personalities.push(fde_info.pers_got_vm); + personalities.len() + }; + + let enc = UNWIND_ARM64_DWARF | fde_info.section_offset | (((pers_idx as u32) & 3) << 28); + all_entries.push((func_vm, 0u32, enc)); + } + + // Also collect personalities from compact_unwind entries. + for e in plain_entries { + if let Some(got) = e.personality_got { + if !personalities.contains(&got) { + personalities.push(got); + } + } + } + + let pers_count = all_entries.len(); + // LSDA descriptors: (func_offset_from_text, lsda_offset_from_text) + let mut lsda_descriptors: Vec<(u32, u32)> = Vec::new(); + for e in plain_entries { + if fde_map + .get(&e.func_addr) + .is_some_and(|f| f.pers_got_vm != 0) + { + continue; + } + let mut enc = e.encoding; + // Set personality index in encoding bits [29:28] + if let Some(got) = e.personality_got { + if let Some(pos) = personalities.iter().position(|&g| g == got) { + let pers_idx = (pos + 1) as u32; + enc = (enc & !(0x3 << 28)) | ((pers_idx & 3) << 28); + } + } + // Set UNWIND_HAS_LSDA flag and record LSDA descriptor + if let Some(lsda) = e.lsda_addr { + enc |= 0x4000_0000; // UNWIND_HAS_LSDA + lsda_descriptors.push(((e.func_addr - text_base) as u32, (lsda - text_base) as u32)); + } + all_entries.push((e.func_addr, e.func_size, enc)); + } + lsda_descriptors.sort_by_key(|d| d.0); + + if all_entries.is_empty() { + return Vec::new(); + } + + all_entries.sort_by_key(|e| e.0); + all_entries.dedup_by_key(|e| e.0); + + // Truncate if the full content would exceed max_bytes. + // Personality entries (pers_count) are critical; trim plain entries first. + let n_pers = personalities.len() as u32; + const ENTRIES_PER_PAGE: usize = 500; + loop { + let np = all_entries.len().div_ceil(ENTRIES_PER_PAGE); + // Estimate: header(28) + pers(n*4) + index((np+1)*12) + LSDA(n*8) + SL pages(np*8 + + // entries*8) + let est = 28 + + (n_pers as usize) * 4 + + (np + 1) * 12 + + lsda_descriptors.len() * 8 + + np * 8 + + all_entries.len() * 8; + if est as u64 <= max_bytes || all_entries.len() <= pers_count { + break; + } + // Remove last plain entry (they're sorted, so the highest address is removed first). + all_entries.pop(); + } + + let num_pages = all_entries.len().div_ceil(ENTRIES_PER_PAGE); + + tracing::debug!( + "compact_unwind: building __unwind_info: {} entries ({} pers), {} personalities", + all_entries.len(), + pers_count, + personalities.len() + ); + + // DWARF-mode entries all have unique encodings (different FDE offsets) so + // common encodings provide no benefit — skip them to save space. + + // Section layout: + // [28] header (7 × u32) + // [P*4] personality array (GOT slot offsets from TEXT base) + // [(N+1)*12] first-level index (N pages + sentinel) + // [page data…] + // + // LSDA array is empty: DWARF-mode entries get LSDA from the FDE augmentation + // data in __eh_frame, so no separate LSDA index is needed. + let ce_off = 28u32; + let pers_off = ce_off; // no common encodings + let pers_bytes = n_pers * 4; + let idx_off = pers_off + pers_bytes; + let idx_bytes = (num_pages as u32 + 1) * 12; + let lsda_off = idx_off + idx_bytes; + let lsda_bytes = lsda_descriptors.len() as u32 * 8; // 8 bytes each: funcOffset + lsdaOffset + let sl_start = lsda_off + lsda_bytes; + + let mut sl_offsets = Vec::with_capacity(num_pages); + let mut cur = sl_start; + for i in 0..num_pages { + sl_offsets.push(cur); + let n = (all_entries.len() - i * ENTRIES_PER_PAGE).min(ENTRIES_PER_PAGE); + cur += 8 + n as u32 * 8; + } + let total = cur as usize; + + let mut out = vec![0u8; total]; + macro_rules! wu32 { + ($off:expr, $val:expr) => { + out[$off..$off + 4].copy_from_slice(&($val as u32).to_le_bytes()) + }; + } + macro_rules! wu16 { + ($off:expr, $val:expr) => { + out[$off..$off + 2].copy_from_slice(&($val as u16).to_le_bytes()) + }; + } + + // Header + wu32!(0, 1u32); // version + wu32!(4, ce_off); // commonEncodingsArraySectionOffset + wu32!(8, 0u32); // commonEncodingsArrayCount (none) + wu32!(12, pers_off); // personalityArraySectionOffset + wu32!(16, n_pers); // personalityArrayCount + wu32!(20, idx_off); // indexSectionOffset + wu32!(24, num_pages as u32 + 1); // indexCount (includes sentinel) + + // Personality array: 4-byte offsets from TEXT base to GOT slots. + for (i, &got_vm) in personalities.iter().enumerate() { + let offset_from_text = (got_vm - text_base) as u32; + wu32!(pers_off as usize + i * 4, offset_from_text); + } + + // LSDA descriptors array (8 bytes each: funcOffset + lsdaOffset) + for (i, &(func_off, lsda_off_val)) in lsda_descriptors.iter().enumerate() { + let off = lsda_off as usize + i * 8; + wu32!(off, func_off); + wu32!(off + 4, lsda_off_val); + } + + // First-level index entries + second-level regular pages + for page in 0..num_pages { + let start = page * ENTRIES_PER_PAGE; + let end = (start + ENTRIES_PER_PAGE).min(all_entries.len()); + let page_entries = &all_entries[start..end]; + + let first_fn_off = (page_entries[0].0 - text_base) as u32; + let sl_off = sl_offsets[page] as usize; + + // Index entry (12 bytes) + let ie = idx_off as usize + page * 12; + wu32!(ie, first_fn_off); + wu32!(ie + 4, sl_off as u32); // secondLevelPagesSectionOffset + wu32!(ie + 8, lsda_off); // lsdaIndexArraySectionOffset + + // Regular second-level page header (8 bytes) + wu32!(sl_off, 2u32); // kind = UNWIND_SECOND_LEVEL_REGULAR + wu16!(sl_off + 4, 8u16); // entryPageOffset + wu16!(sl_off + 6, page_entries.len() as u16); // entryCount + + // Entries (8 bytes each: funcOffset u32 + encoding u32) + for (j, &(fa, _, enc)) in page_entries.iter().enumerate() { + let eo = sl_off + 8 + j * 8; + wu32!(eo, (fa - text_base) as u32); + wu32!(eo + 4, enc); + } + } + + // Sentinel first-level index entry + let (last_fa, last_fs, _) = *all_entries.last().unwrap(); + let sentinel_fn_off = (last_fa - text_base + last_fs as u64) as u32; + let sie = idx_off as usize + num_pages * 12; + wu32!(sie, sentinel_fn_off); + wu32!(sie + 4, 0u32); // secondLevelPagesSectionOffset = 0 (sentinel) + wu32!(sie + 8, lsda_off + lsda_bytes); // lsdaIndexArraySectionOffset (end) + + out +} + +/// Mach-O section metadata for a given output section ID. +struct MachoSectionInfo { + segname: &'static [u8; 16], + sectname: [u8; 16], + flags: u32, +} + +/// Map an OutputSectionId to Mach-O section name and flags. +/// Returns None for sections that don't need their own section header +/// (e.g. FILE_HEADER, BSS handled specially, etc.). +fn macho_section_info(id: crate::output_section_id::OutputSectionId) -> Option { + use crate::output_section_id; + fn name16(s: &[u8]) -> [u8; 16] { + let mut buf = [0u8; 16]; + let len = s.len().min(16); + buf[..len].copy_from_slice(&s[..len]); + buf + } + static TEXT_SEG: &[u8; 16] = b"__TEXT\0\0\0\0\0\0\0\0\0\0"; + static DATA_SEG: &[u8; 16] = b"__DATA\0\0\0\0\0\0\0\0\0\0"; + + let (segname, sectname, flags) = match id { + output_section_id::TEXT => (TEXT_SEG, name16(b"__text"), 0x8000_0400u32), + output_section_id::PLT_GOT => (TEXT_SEG, name16(b"__stubs"), 0x8000_0408), + output_section_id::GCC_EXCEPT_TABLE => (TEXT_SEG, name16(b"__gcc_except_tab"), 0), + output_section_id::EH_FRAME => (TEXT_SEG, name16(b"__eh_frame"), 0x6800_000B), + output_section_id::RODATA => (TEXT_SEG, name16(b"__cstring"), 0), + output_section_id::COMMENT => (TEXT_SEG, name16(b"__literal"), 0), + output_section_id::DATA_REL_RO => (TEXT_SEG, name16(b"__const"), 0), + output_section_id::DATA => (DATA_SEG, name16(b"__data"), 0), + output_section_id::CSTRING => (DATA_SEG, name16(b"__const"), 0), + output_section_id::GOT => (DATA_SEG, name16(b"__got"), 0x06), + output_section_id::PREINIT_ARRAY => (DATA_SEG, name16(b"__thread_vars"), 0x13), + output_section_id::INIT_ARRAY => (DATA_SEG, name16(b"__mod_init_func"), 0x09), + output_section_id::FINI_ARRAY => (DATA_SEG, name16(b"__mod_term_func"), 0x0E), + output_section_id::TDATA => (DATA_SEG, name16(b"__thread_data"), 0x11), + output_section_id::TBSS => (DATA_SEG, name16(b"__thread_bss"), 0x12), + output_section_id::BSS => (DATA_SEG, name16(b"__bss"), 0x01), + _ => return None, + }; + Some(MachoSectionInfo { + segname, + sectname, + flags, + }) +} + +/// Write Mach-O headers. Returns the chained fixups file offset. +fn write_headers( + out: &mut [u8], + offset: usize, + layout: &Layout<'_, MachO>, + mappings: &[SegmentMapping], + chained_fixups_data_size: u32, + unwind_info_vm_addr: u64, + unwind_info_size: u64, +) -> Result> { + let text_vm_start = mappings.first().map_or(PAGEZERO_SIZE, |m| m.vm_start); + let text_vm_end = mappings + .first() + .map_or(PAGEZERO_SIZE + PAGE_SIZE, |m| m.vm_end); + let text_filesize = align_to(text_vm_end - text_vm_start, PAGE_SIZE); + + let has_data = mappings.len() > 1; + let data_vmaddr = mappings.get(1).map_or(0, |m| m.vm_start); + let data_vm_end = mappings + .iter() + .skip(1) + .map(|m| m.vm_end) + .max() + .unwrap_or(data_vmaddr); + let data_vmsize = align_to(data_vm_end - data_vmaddr, PAGE_SIZE); + let data_fileoff = mappings.get(1).map_or(0, |m| m.file_offset); + let data_filesize = if has_data { + align_to( + mappings + .iter() + .skip(1) + .map(|m| m.file_offset + (m.vm_end - m.vm_start)) + .max() + .unwrap() + - data_fileoff, + PAGE_SIZE, + ) + } else { + 0 + }; + + let text_layout = layout.section_layouts.get(output_section_id::TEXT); + let entry_addr = layout.entry_symbol_address()?; + let entry_offset = + vm_addr_to_file_offset(entry_addr, mappings).unwrap_or(text_layout.file_offset); + + let tdata_layout = layout.section_layouts.get(output_section_id::TDATA); + let tbss_layout = layout.section_layouts.get(output_section_id::TBSS); + let has_tlv = tdata_layout.mem_size > 0 || tbss_layout.mem_size > 0; + let _has_tvars = has_tlv; + // Scan for .rustc section (proc-macro metadata) before computing cmd sizes + let mut rustc_addr = 0u64; + let mut rustc_size = 0u64; + { + use object::read::macho::Section as _; + let le = object::Endianness::Little; + for group in &layout.group_layouts { + for file_layout in &group.files { + if let FileLayout::Object(obj) = file_layout { + for (sec_idx, _) in obj.sections.iter().enumerate() { + if let Some(s) = obj.object.sections.get(sec_idx) { + let name = crate::macho::trim_nul(s.sectname()); + if name == b".rustc" { + if let Some(addr) = obj.section_resolutions[sec_idx].address() { + if rustc_addr == 0 || addr < rustc_addr { + rustc_addr = addr; + } + rustc_size += s.size(le); + } + } + } + } + } + } + } + } + let has_rustc = rustc_addr > 0 && rustc_size > 0; + + let buf_len = out.len(); + let mut w = Writer { + buf: out, + pos: offset, + }; + let dylinker_cmd_size = align8((12 + DYLD_PATH.len() + 1) as u32); + let dylib_cmd_size = align8((24 + LIBSYSTEM_PATH.len() + 1) as u32); + + let is_dylib = layout.symbol_db.args.is_dylib; + let install_name = if is_dylib { + if let Some(ref name) = layout.symbol_db.args.install_name { + String::from_utf8_lossy(name).into_owned() + } else { + layout + .symbol_db + .args + .output() + .to_string_lossy() + .into_owned() + } + } else { + String::new() + }; + let id_dylib_cmd_size = if is_dylib { + align8(24 + install_name.len() as u32 + 1) + } else { + 0 + }; + + let mut ncmds = 0u32; + let mut cmdsize = 0u32; + let add_cmd = |n: &mut u32, s: &mut u32, size: u32| { + *n += 1; + *s += size; + }; + if !is_dylib { + add_cmd(&mut ncmds, &mut cmdsize, 72); + } // PAGEZERO (exe only) + let rustc_in_text = has_rustc && rustc_addr < text_vm_start + text_filesize; + let has_unwind_info = unwind_info_size > 0; + + // Dynamically collect TEXT and DATA section headers from all output sections. + // This replaces the hardcoded section counting. + struct SectionHeader { + segname: [u8; 16], + sectname: [u8; 16], + addr: u64, + size: u64, + offset: u32, + align: u32, + flags: u32, + } + + let mut text_sections: Vec = Vec::new(); + let mut data_sections: Vec = Vec::new(); + + static TEXT_SEG_NAME: [u8; 16] = *b"__TEXT\0\0\0\0\0\0\0\0\0\0"; + static DATA_SEG_NAME: [u8; 16] = *b"__DATA\0\0\0\0\0\0\0\0\0\0"; + + // Enumerate all output sections that have content. + for (sec_id, sec_layout) in layout.section_layouts.iter() { + if sec_layout.mem_size == 0 { + continue; + } + let file_off = vm_addr_to_file_offset(sec_layout.mem_offset, mappings).unwrap_or(0) as u32; + if let Some(info) = macho_section_info(sec_id) { + let hdr = SectionHeader { + segname: *info.segname, + sectname: info.sectname, + addr: sec_layout.mem_offset, + size: sec_layout.mem_size, + offset: file_off, + align: sec_layout.alignment.exponent as u32, + flags: info.flags, + }; + if *info.segname == TEXT_SEG_NAME { + text_sections.push(hdr); + } else { + data_sections.push(hdr); + } + } + } + // Sort by address within each segment. + text_sections.sort_by_key(|s| s.addr); + data_sections.sort_by_key(|s| s.addr); + + // Add special sections: .rustc (if in TEXT), __unwind_info + if rustc_in_text { + let rustc_foff = vm_addr_to_file_offset(rustc_addr, mappings).unwrap_or(0) as u32; + text_sections.push(SectionHeader { + segname: TEXT_SEG_NAME, + sectname: *b".rustc\0\0\0\0\0\0\0\0\0\0", + addr: rustc_addr, + size: rustc_size, + offset: rustc_foff, + align: 0, + flags: 0, + }); + } + if has_unwind_info { + let ui_foff = vm_addr_to_file_offset(unwind_info_vm_addr, mappings).unwrap_or(0) as u32; + text_sections.push(SectionHeader { + segname: TEXT_SEG_NAME, + sectname: *b"__unwind_info\0\0\0", + addr: unwind_info_vm_addr, + size: unwind_info_size, + offset: ui_foff, + align: 2, + flags: 0, + }); + } + // Re-sort TEXT after adding special sections. + text_sections.sort_by_key(|s| s.addr); + + // Add .rustc in DATA if not in TEXT. + if has_rustc && !rustc_in_text { + let rc_addr = rustc_addr.max(data_vmaddr); + let rc_foff = + vm_addr_to_file_offset(rustc_addr, mappings).unwrap_or(data_fileoff as usize) as u32; + data_sections.push(SectionHeader { + segname: DATA_SEG_NAME, + sectname: *b".rustc\0\0\0\0\0\0\0\0\0\0", + addr: rc_addr, + size: rustc_size, + offset: rc_foff, + align: 0, + flags: 0, + }); + data_sections.sort_by_key(|s| s.addr); + } + + // Fix up __thread_data: override type to S_THREAD_LOCAL_REGULAR and extend + // Fix __thread_data flags (set correct Mach-O section type). + for sec in &mut data_sections { + let name = crate::macho::trim_nul(&sec.sectname); + if name == b"__thread_data" { + sec.flags = 0x11; // S_THREAD_LOCAL_REGULAR + } + } + + let text_nsects = text_sections.len() as u32; + add_cmd(&mut ncmds, &mut cmdsize, 72 + 80 * text_nsects); // TEXT + if has_data { + let data_nsects = data_sections.len() as u32; + add_cmd(&mut ncmds, &mut cmdsize, 72 + 80 * data_nsects); + } + // Group empty sections by segment name + let empty_sections = &layout.symbol_db.args.empty_sections; + let mut empty_segs: Vec<(&[u8; 16], Vec<&[u8; 16]>)> = Vec::new(); + for (segname, sectname) in empty_sections { + if let Some(seg) = empty_segs.iter_mut().find(|(s, _)| *s == segname) { + seg.1.push(sectname); + } else { + empty_segs.push((segname, vec![sectname])); + } + } + for (_, sects) in &empty_segs { + add_cmd(&mut ncmds, &mut cmdsize, 72 + 80 * sects.len() as u32); + } + add_cmd(&mut ncmds, &mut cmdsize, 72); // LINKEDIT + let emit_uuid = !layout.symbol_db.args.no_uuid; + if emit_uuid { + add_cmd(&mut ncmds, &mut cmdsize, 24); // LC_UUID + } + if is_dylib { + add_cmd(&mut ncmds, &mut cmdsize, id_dylib_cmd_size); // LC_ID_DYLIB + } else { + add_cmd(&mut ncmds, &mut cmdsize, 24); // LC_MAIN + } + if !is_dylib { + add_cmd(&mut ncmds, &mut cmdsize, dylinker_cmd_size); + } + add_cmd(&mut ncmds, &mut cmdsize, dylib_cmd_size); // libSystem + let extra_dylibs = &layout.symbol_db.args.extra_dylibs; + let extra_dylib_sizes: Vec = extra_dylibs + .iter() + .map(|p| align8(24 + p.len() as u32 + 1)) + .collect(); + for &sz in &extra_dylib_sizes { + add_cmd(&mut ncmds, &mut cmdsize, sz); + } + let rpaths = &layout.symbol_db.args.rpaths; + let rpath_sizes: Vec = rpaths + .iter() + .map(|p| align8(12 + p.len() as u32 + 1)) + .collect(); + for &sz in &rpath_sizes { + add_cmd(&mut ncmds, &mut cmdsize, sz); + } + add_cmd(&mut ncmds, &mut cmdsize, 24); // SYMTAB + add_cmd(&mut ncmds, &mut cmdsize, 80); // DYSYMTAB + add_cmd(&mut ncmds, &mut cmdsize, 32); // LC_BUILD_VERSION + add_cmd(&mut ncmds, &mut cmdsize, 16); // LC_DYLD_CHAINED_FIXUPS + add_cmd(&mut ncmds, &mut cmdsize, 16); // LC_DYLD_EXPORTS_TRIE + let emit_func_starts = !layout.symbol_db.args.no_function_starts; + if emit_func_starts { + add_cmd(&mut ncmds, &mut cmdsize, 16); // LC_FUNCTION_STARTS + } + let emit_data_in_code = !layout.symbol_db.args.no_data_in_code; + if emit_data_in_code { + add_cmd(&mut ncmds, &mut cmdsize, 16); // LC_DATA_IN_CODE + } + + let filetype = if is_dylib { 6u32 } else { MH_EXECUTE }; // MH_DYLIB = 6 + w.u32(MH_MAGIC_64); + w.u32(CPU_TYPE_ARM64); + w.u32(CPU_SUBTYPE_ARM64_ALL); + w.u32(filetype); + w.u32(ncmds); + w.u32(cmdsize); + let mut flags = MH_PIE | MH_TWOLEVEL | MH_DYLDLINK; + if has_tlv { + flags |= 0x0080_0000; + } // MH_HAS_TLV_DESCRIPTORS + w.u32(flags); + w.u32(0); + + if !is_dylib { + w.segment(b"__PAGEZERO", 0, PAGEZERO_SIZE, 0, 0, 0, 0, 0); + } + + // __TEXT — include .rustc section if it falls in TEXT range + w.u32(LC_SEGMENT_64); + w.u32(72 + 80 * text_nsects); + w.name16(b"__TEXT"); + w.u64(text_vm_start); + w.u64(text_filesize); + w.u64(0); + w.u64(text_filesize); + w.u32(VM_PROT_READ | VM_PROT_EXECUTE); + w.u32(VM_PROT_READ | VM_PROT_EXECUTE); + w.u32(text_nsects); + w.u32(0); + // Write TEXT section headers. + for sec in &text_sections { + w.buf[w.pos..w.pos + 16].copy_from_slice(&sec.sectname); + w.pos += 16; + w.buf[w.pos..w.pos + 16].copy_from_slice(&sec.segname); + w.pos += 16; + w.u64(sec.addr); + w.u64(sec.size); + w.u32(sec.offset); + w.u32(sec.align); + w.u32(0); // reloff + w.u32(0); // nreloc + w.u32(sec.flags); + w.u32(0); // reserved1 + // reserved2: stub size for S_SYMBOL_STUBS + let reserved2 = if sec.flags & 0xFF == 0x08 { 12u32 } else { 0 }; + w.u32(reserved2); + w.u32(0); // reserved3 + } + + if has_data { + let data_nsects = data_sections.len() as u32; + let data_cmd_size = 72 + 80 * data_nsects; + w.u32(LC_SEGMENT_64); + w.u32(data_cmd_size); + w.name16(b"__DATA"); + w.u64(data_vmaddr); + w.u64(data_vmsize); + w.u64(data_fileoff); + w.u64(data_filesize); + w.u32(VM_PROT_READ | VM_PROT_WRITE); + w.u32(VM_PROT_READ | VM_PROT_WRITE); + w.u32(data_nsects); + w.u32(0); + + // Write DATA section headers. + for sec in &data_sections { + w.buf[w.pos..w.pos + 16].copy_from_slice(&sec.sectname); + w.pos += 16; + w.buf[w.pos..w.pos + 16].copy_from_slice(&sec.segname); + w.pos += 16; + w.u64(sec.addr); + w.u64(sec.size); + w.u32(sec.offset); + w.u32(sec.align); + w.u32(0); // reloff + w.u32(0); // nreloc + w.u32(sec.flags); + w.u32(0); // reserved1 + w.u32(0); // reserved2 + w.u32(0); // reserved3 + } + } + + // Write empty sections (from -add_empty_section) as zero-size segments + for (segname, sects) in &empty_segs { + let n = sects.len() as u32; + w.u32(LC_SEGMENT_64); + w.u32(72 + 80 * n); + w.buf[w.pos..w.pos + 16].copy_from_slice(*segname); + w.pos += 16; + w.u64(0); // vmaddr + w.u64(0); // vmsize + w.u64(0); // fileoff + w.u64(0); // filesize + w.u32(0); // maxprot + w.u32(0); // initprot + w.u32(n); + w.u32(0); // flags + for sectname in sects { + w.buf[w.pos..w.pos + 16].copy_from_slice(*sectname); + w.pos += 16; + w.buf[w.pos..w.pos + 16].copy_from_slice(*segname); + w.pos += 16; + w.u64(0); // addr + w.u64(0); // size + w.u32(0); // offset + w.u32(0); // align + w.u32(0); // reloff + w.u32(0); // nreloc + w.u32(0); // flags + w.u32(0); // reserved1 + w.u32(0); // reserved2 + w.u32(0); // reserved3 + } + } + + let (last_file_end, linkedit_vm) = if has_data { + (data_fileoff + data_filesize, data_vmaddr + data_vmsize) + } else { + ( + text_filesize, + align_to(text_vm_start + text_filesize, PAGE_SIZE), + ) + }; + let cf_offset = last_file_end; + let cf_size = chained_fixups_data_size as u64; + + // LINKEDIT vmsize must cover the full content (fixups + symtab + exports). + let linkedit_vmsize = align_to( + (buf_len as u64) + .saturating_sub(last_file_end) + .max(PAGE_SIZE), + PAGE_SIZE, + ); + w.segment( + b"__LINKEDIT", + linkedit_vm, + linkedit_vmsize, + last_file_end, + cf_size, + VM_PROT_READ, + VM_PROT_READ, + 0, + ); + + // LC_UUID = 0x1B + if emit_uuid { + w.u32(0x1B); + w.u32(24); + let uuid_bytes: [u8; 16] = if layout.symbol_db.args.random_uuid { + let mut h = [0u8; 16]; + // Use std::time for a simple source of entropy + let t = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_nanos(); + h[..16].copy_from_slice(&t.to_le_bytes()); + h[6] = (h[6] & 0x0F) | 0x40; // version 4 + h[8] = (h[8] & 0x3F) | 0x80; // variant 1 + h + } else { + // Deterministic UUID from -final_output name or output path + let mut h = [0u8; 16]; + let output_lossy = layout + .symbol_db + .args + .output() + .to_string_lossy() + .into_owned(); + let name = layout + .symbol_db + .args + .final_output + .as_deref() + .unwrap_or(&output_lossy); + for (i, b) in name.bytes().enumerate() { + h[i % 16] ^= b; + } + h[6] = (h[6] & 0x0F) | 0x40; // version 4 + h[8] = (h[8] & 0x3F) | 0x80; // variant 1 + h + }; + w.bytes(&uuid_bytes); + } + + if is_dylib { + // LC_ID_DYLIB = 0x0D + w.u32(0x0D); + w.u32(id_dylib_cmd_size); + w.u32(24); + w.u32(2); + w.u32(0x01_0000); + w.u32(0x01_0000); + w.bytes(install_name.as_bytes()); + w.u8(0); + w.pad8(); + } else { + w.u32(LC_MAIN); + w.u32(24); + w.u64(entry_offset as u64); + w.u64(layout.symbol_db.args.stack_size.unwrap_or(0)); + } + + if !is_dylib { + w.u32(LC_LOAD_DYLINKER); + w.u32(dylinker_cmd_size); + w.u32(12); + w.bytes(DYLD_PATH); + w.u8(0); + w.pad8(); + } + + w.u32(LC_LOAD_DYLIB); + w.u32(dylib_cmd_size); + w.u32(24); + w.u32(2); + w.u32(0x01_0000); + w.u32(0x01_0000); + w.bytes(LIBSYSTEM_PATH); + w.u8(0); + w.pad8(); + + for (i, dylib_path) in extra_dylibs.iter().enumerate() { + w.u32(LC_LOAD_DYLIB); + w.u32(extra_dylib_sizes[i]); + w.u32(24); + w.u32(2); + w.u32(0x01_0000); + w.u32(0x01_0000); + w.bytes(dylib_path); + w.u8(0); + w.pad8(); + } + + // LC_RPATH = 0x8000_001C + for (i, rpath) in rpaths.iter().enumerate() { + w.u32(0x8000_001C); + w.u32(rpath_sizes[i]); + w.u32(12); // path offset + w.bytes(rpath); + w.u8(0); + w.pad8(); + } + + w.u32(LC_SYMTAB); + w.u32(24); + w.u32(0); + w.u32(0); + w.u32(0); + w.u32(0); + w.u32(LC_DYSYMTAB); + w.u32(80); + for _ in 0..18 { + w.u32(0); + } + + w.u32(LC_BUILD_VERSION); + w.u32(32); + w.u32(PLATFORM_MACOS); + w.u32(layout.symbol_db.args.minos.unwrap_or(0x000E_0000)); + w.u32(layout.symbol_db.args.sdk_version.unwrap_or(0x000E_0000)); + w.u32(1); + w.u32(3); + w.u32(0x0300_0100); + + w.u32(LC_DYLD_CHAINED_FIXUPS); + w.u32(16); + w.u32(cf_offset as u32); + w.u32(cf_size as u32); + w.u32(LC_DYLD_EXPORTS_TRIE); + w.u32(16); + w.u32(last_file_end as u32); + w.u32(0); + // LC_FUNCTION_STARTS = 0x26 + if emit_func_starts { + w.u32(0x26); + w.u32(16); + w.u32(last_file_end as u32); // offset (patched later) + w.u32(0); // size 0 + } + // LC_DATA_IN_CODE = 0x29 + if emit_data_in_code { + w.u32(0x29); + w.u32(16); + w.u32(last_file_end as u32); // offset (patched later) + w.u32(0); // size 0 + } + + Ok(Some(cf_offset)) +} + +fn read_u32(buf: &[u8], offset: usize) -> u32 { + u32::from_le_bytes(buf[offset..offset + 4].try_into().unwrap()) +} + +fn write_u32_at(buf: &mut [u8], offset: usize, val: u32) { + buf[offset..offset + 4].copy_from_slice(&val.to_le_bytes()); +} + +fn align8(v: u32) -> u32 { + (v + 7) & !7 +} +fn align_to(value: u64, alignment: u64) -> u64 { + (value + alignment - 1) & !(alignment - 1) +} + +struct Writer<'a> { + buf: &'a mut [u8], + pos: usize, +} + +impl Writer<'_> { + fn u8(&mut self, v: u8) { + self.buf[self.pos] = v; + self.pos += 1; + } + fn u32(&mut self, v: u32) { + self.buf[self.pos..self.pos + 4].copy_from_slice(&v.to_le_bytes()); + self.pos += 4; + } + fn u64(&mut self, v: u64) { + self.buf[self.pos..self.pos + 8].copy_from_slice(&v.to_le_bytes()); + self.pos += 8; + } + fn name16(&mut self, name: &[u8]) { + let mut buf = [0u8; 16]; + buf[..name.len().min(16)].copy_from_slice(&name[..name.len().min(16)]); + self.buf[self.pos..self.pos + 16].copy_from_slice(&buf); + self.pos += 16; + } + fn bytes(&mut self, data: &[u8]) { + self.buf[self.pos..self.pos + data.len()].copy_from_slice(data); + self.pos += data.len(); + } + fn pad8(&mut self) { + let aligned = (self.pos + 7) & !7; + while self.pos < aligned { + self.buf[self.pos] = 0; + self.pos += 1; + } + } + fn segment( + &mut self, + name: &[u8], + vmaddr: u64, + vmsize: u64, + fileoff: u64, + filesize: u64, + maxprot: u32, + initprot: u32, + nsects: u32, + ) { + self.u32(LC_SEGMENT_64); + self.u32(72 + 80 * nsects); + self.name16(name); + self.u64(vmaddr); + self.u64(vmsize); + self.u64(fileoff); + self.u64(filesize); + self.u32(maxprot); + self.u32(initprot); + self.u32(nsects); + self.u32(0); + } +} + +/// Write a Mach-O relocatable object file (MH_OBJECT) for partial linking (-r). +fn write_relocatable_object(layout: &Layout<'_, MachO>) -> Result { + use crate::layout::FileLayout; + use object::read::macho::Nlist as _; + use object::read::macho::Section as MachOSec; + let le = object::Endianness::Little; + + // Phase 1: Collect sections and symbols from all input objects. + // Each output section aggregates data from matching input sections. + struct OutSection { + segname: [u8; 16], + sectname: [u8; 16], + data: Vec, + align: u32, + flags: u32, + relocs: Vec<[u8; 8]>, // raw Mach-O relocation entries + } + + // Symbol entry for the output nlist table. + struct OutSym { + name: Vec, + n_type: u8, + n_sect: u8, // 1-based section ordinal in output, 0 = NO_SECT + n_desc: u16, + n_value: u64, + } + + let mut sections: Vec = Vec::new(); + let mut symbols: Vec = Vec::new(); + + // Map: (segname, sectname) -> index in `sections` + let mut sec_map: std::collections::HashMap<([u8; 16], [u8; 16]), usize> = Default::default(); + + for group in &layout.group_layouts { + for file_layout in &group.files { + let FileLayout::Object(obj) = file_layout else { + continue; + }; + + // Build input symbol index -> output symbol index mapping for this object. + let n_input_syms = obj.object.symbols.len(); + let mut sym_remap: Vec = vec![0; n_input_syms]; + // Also track which input sections map to which output sections. + let n_input_secs = obj.object.sections.len(); + let mut sec_remap: Vec = vec![0; n_input_secs]; // 1-based output ordinal + let mut sec_value_adjust: Vec = vec![0; n_input_secs]; // offset adjustment per input section + + // Process sections: copy data and build section map. + for sec_idx in 0..n_input_secs { + let Some(sec) = obj.object.sections.get(sec_idx) else { + continue; + }; + let sec_segname = sec.segname; + let sec_sectname = sec.sectname; + let trimmed_seg = crate::macho::trim_nul(&sec_segname); + let _trimmed_name = crate::macho::trim_nul(&sec_sectname); + + // Skip __LD,__compact_unwind (linker-private metadata) + if trimmed_seg == b"__LD" { + continue; + } + + let sec_type = sec.flags(le) & 0xFF; + // Skip zerofill (BSS) sections' data + let has_data = sec_type != 0x01 && sec_type != 0x0C; + + let input_offset = sec.offset(le) as usize; + let input_size = sec.size(le) as usize; + + let out_sec_idx = if let Some(&idx) = sec_map.get(&(sec_segname, sec_sectname)) { + idx + } else { + let idx = sections.len(); + sec_map.insert((sec_segname, sec_sectname), idx); + sections.push(OutSection { + segname: sec_segname, + sectname: sec_sectname, + data: Vec::new(), + align: sec.align(le), + flags: sec.flags(le), + relocs: Vec::new(), + }); + idx + }; + sec_remap[sec_idx] = (out_sec_idx + 1) as u8; + + let out_sec = &mut sections[out_sec_idx]; + // Align the output position + let alignment = 1usize << out_sec.align.max(sec.align(le)); + out_sec.align = out_sec.align.max(sec.align(le)); + let padding = (alignment - (out_sec.data.len() % alignment)) % alignment; + out_sec.data.resize(out_sec.data.len() + padding, 0); + let output_offset_in_sec = out_sec.data.len(); + // Record the adjustment: symbols in this input section need their + // value increased by (output_offset_in_sec - input_section_addr). + let input_sec_addr = sec.addr.get(le); + sec_value_adjust[sec_idx] = output_offset_in_sec as u64 - input_sec_addr; + + if has_data && input_size > 0 && input_offset > 0 { + if let Some(data) = obj.object.data.get(input_offset..input_offset + input_size) + { + out_sec.data.extend_from_slice(data); + } else { + out_sec.data.resize(out_sec.data.len() + input_size, 0); + } + } else { + out_sec.data.resize(out_sec.data.len() + input_size, 0); + } + + // Copy and remap relocations (deferred until symbols are mapped) + // For now, store reloc info to process after symbol table is built. + // We'll handle this in a second pass. + } + + // Process symbols: add to output symbol table. + for sym_idx in 0..n_input_syms { + let Ok(sym) = obj.object.symbols.symbol(object::SymbolIndex(sym_idx)) else { + continue; + }; + let n_type = sym.n_type(); + // Skip debug symbols (N_STAB) + if n_type & 0xE0 != 0 { + continue; + } + let name = sym + .name(le, obj.object.symbols.strings()) + .unwrap_or(&[]) + .to_vec(); + // Remap n_sect + let n_sect_in = sym.n_sect(); + let n_sect_out = if n_sect_in > 0 && (n_sect_in as usize - 1) < sec_remap.len() { + sec_remap[n_sect_in as usize - 1] + } else { + 0 + }; + // Adjust n_value for merged section offset + let n_value = if n_sect_in > 0 + && n_sect_out > 0 + && (n_sect_in as usize - 1) < sec_value_adjust.len() + { + sym.n_value(le) + .wrapping_add(sec_value_adjust[n_sect_in as usize - 1]) + } else { + sym.n_value(le) + }; + let out_idx = symbols.len() as u32; + sym_remap[sym_idx] = out_idx; + symbols.push(OutSym { + name, + n_type, + n_sect: n_sect_out, + n_desc: sym.n_desc(le) as u16, + n_value, + }); + } + + // Second pass: copy and remap relocations. + for sec_idx in 0..n_input_secs { + let Some(sec) = obj.object.sections.get(sec_idx) else { + continue; + }; + let trimmed_seg = crate::macho::trim_nul(&sec.segname); + if trimmed_seg == b"__LD" { + continue; + } + let out_sec_ordinal = sec_remap[sec_idx]; + if out_sec_ordinal == 0 { + continue; + } + let out_sec_idx = out_sec_ordinal as usize - 1; + + let relocs = match sec.relocations(le, obj.object.data) { + Ok(r) => r, + Err(_) => continue, + }; + for r in relocs { + let ri = r.info(le); + // Build output relocation with remapped symbol/section index. + let new_symbolnum = if ri.r_extern { + let idx = ri.r_symbolnum as usize; + if idx < sym_remap.len() { + sym_remap[idx] + } else { + ri.r_symbolnum + } + } else { + // Non-extern: r_symbolnum is 1-based section ordinal. + let sec_ord = ri.r_symbolnum as usize; + if sec_ord > 0 + && sec_ord - 1 < sec_remap.len() + && sec_remap[sec_ord - 1] > 0 + { + sec_remap[sec_ord - 1] as u32 + } else { + ri.r_symbolnum + } + }; + // Encode relocation entry (Mach-O ARM64 format): + // word0 = r_address (adjusted for output section offset) + // word1 = packed(r_symbolnum, r_pcrel, r_length, r_extern, r_type) + let addr_adjust = sec_value_adjust[sec_idx] as u32; + let word0 = ri.r_address.wrapping_add(addr_adjust); + let word1: u32 = (new_symbolnum & 0x00FF_FFFF) + | (if ri.r_pcrel { 1 << 24 } else { 0 }) + | ((ri.r_length as u32 & 3) << 25) + | (if ri.r_extern { 1 << 27 } else { 0 }) + | ((ri.r_type as u32 & 0xF) << 28); + let mut entry = [0u8; 8]; + entry[0..4].copy_from_slice(&word0.to_le_bytes()); + entry[4..8].copy_from_slice(&word1.to_le_bytes()); + sections[out_sec_idx].relocs.push(entry); + } + } + } + } + + if sections.is_empty() { + // Nothing to output + let output_path = layout.symbol_db.args.output(); + std::fs::write(output_path.as_ref(), &[]) + .map_err(|e| crate::error!("Failed to write: {e}"))?; + return Ok(()); + } + + // Phase 2: Sort symbols (locals first, then defined externals, then undefined). + let mut local_syms: Vec = Vec::new(); + let mut ext_def_syms: Vec = Vec::new(); + let mut undef_syms: Vec = Vec::new(); + for (i, sym) in symbols.iter().enumerate() { + if sym.name.is_empty() && sym.n_type == 0 { + continue; // skip null symbol + } + let is_ext = (sym.n_type & 0x01) != 0; // N_EXT + let sym_type = sym.n_type & 0x0E; + if !is_ext { + local_syms.push(i); + } else if sym_type == 0 && sym.n_sect == 0 { + // N_UNDF + N_EXT = undefined external + undef_syms.push(i); + } else { + ext_def_syms.push(i); + } + } + let sorted_indices: Vec = local_syms + .iter() + .chain(ext_def_syms.iter()) + .chain(undef_syms.iter()) + .copied() + .collect(); + // Build reverse map: old index -> new index (for relocation fixup) + let mut new_sym_index = vec![0u32; symbols.len()]; + for (new_idx, &old_idx) in sorted_indices.iter().enumerate() { + new_sym_index[old_idx] = new_idx as u32; + } + + // Fixup relocations to use new symbol indices. + for sec in &mut sections { + for entry in &mut sec.relocs { + let word1 = u32::from_le_bytes(entry[4..8].try_into().unwrap()); + let old_symbolnum = word1 & 0x00FF_FFFF; + let is_extern = (word1 >> 27) & 1 != 0; + if is_extern { + let new_num = if (old_symbolnum as usize) < new_sym_index.len() { + new_sym_index[old_symbolnum as usize] + } else { + old_symbolnum + }; + let word1_new = (word1 & 0xFF00_0000) | (new_num & 0x00FF_FFFF); + entry[4..8].copy_from_slice(&word1_new.to_le_bytes()); + } + // Non-extern relocs reference section ordinals, already remapped. + } + } + + // Phase 3: Build string table and nlist entries. + let mut strtab = vec![0u8]; // starts with NUL + let mut nlist_data: Vec = Vec::new(); + for &old_idx in &sorted_indices { + let sym = &symbols[old_idx]; + let strx = strtab.len() as u32; + strtab.extend_from_slice(&sym.name); + strtab.push(0); + // nlist_64: n_strx(4) + n_type(1) + n_sect(1) + n_desc(2) + n_value(8) = 16 + nlist_data.extend_from_slice(&strx.to_le_bytes()); + nlist_data.push(sym.n_type); + nlist_data.push(sym.n_sect); + nlist_data.extend_from_slice(&sym.n_desc.to_le_bytes()); + nlist_data.extend_from_slice(&sym.n_value.to_le_bytes()); + } + + // Phase 4: Compute layout and write output. + let nsects = sections.len() as u32; + let ncmds = 3u32; // LC_SEGMENT_64 + LC_SYMTAB + LC_DYSYMTAB + let seg_cmdsize = 72 + 80 * nsects; + let symtab_cmdsize = 24u32; + let dysymtab_cmdsize = 80u32; + let header_size = 32; // Mach-O 64 header + let total_cmdsize = seg_cmdsize + symtab_cmdsize + dysymtab_cmdsize; + + let mut section_offset = header_size + total_cmdsize; + let mut sec_offsets: Vec = Vec::new(); + for sec in §ions { + // Align section data + let alignment = 1u32 << sec.align; + section_offset = (section_offset + alignment - 1) & !(alignment - 1); + sec_offsets.push(section_offset); + section_offset += sec.data.len() as u32; + } + + // Relocation entries follow section data + let mut reloc_offsets: Vec = Vec::new(); + let mut reloc_offset = section_offset; + for sec in §ions { + reloc_offsets.push(if sec.relocs.is_empty() { + 0 + } else { + reloc_offset + }); + reloc_offset += (sec.relocs.len() * 8) as u32; + } + + // Symbol table follows relocations + let symoff = (reloc_offset + 7) & !7; // 8-byte align + let nsyms = sorted_indices.len() as u32; + let stroff = symoff + nsyms * 16; + let total_size = stroff + strtab.len() as u32; + + let mut buf = vec![0u8; total_size as usize]; + + // Write header + let mut pos = 0usize; + let w = |buf: &mut Vec, pos: &mut usize, val: u32| { + buf[*pos..*pos + 4].copy_from_slice(&val.to_le_bytes()); + *pos += 4; + }; + w(&mut buf, &mut pos, MH_MAGIC_64); + w(&mut buf, &mut pos, CPU_TYPE_ARM64); + w(&mut buf, &mut pos, CPU_SUBTYPE_ARM64_ALL); + w(&mut buf, &mut pos, 1); // MH_OBJECT + w(&mut buf, &mut pos, ncmds); + w(&mut buf, &mut pos, total_cmdsize); + w(&mut buf, &mut pos, 0x2000); // MH_SUBSECTIONS_VIA_SYMBOLS + w(&mut buf, &mut pos, 0); // reserved + + // LC_SEGMENT_64 (unnamed, contains all sections) + w(&mut buf, &mut pos, LC_SEGMENT_64); + w(&mut buf, &mut pos, seg_cmdsize); + // segname: empty (16 NUL bytes) + buf[pos..pos + 16].fill(0); + pos += 16; + // vmaddr, vmsize + let seg_vmsize = sections + .iter() + .enumerate() + .map(|(i, s)| sec_offsets[i] as u64 - sec_offsets[0] as u64 + s.data.len() as u64) + .max() + .unwrap_or(0); + buf[pos..pos + 8].copy_from_slice(&0u64.to_le_bytes()); // vmaddr + pos += 8; + buf[pos..pos + 8].copy_from_slice(&seg_vmsize.to_le_bytes()); // vmsize + pos += 8; + buf[pos..pos + 8].copy_from_slice(&(sec_offsets[0] as u64).to_le_bytes()); // fileoff + pos += 8; + buf[pos..pos + 8] + .copy_from_slice(&(section_offset as u64 - sec_offsets[0] as u64).to_le_bytes()); // filesize + pos += 8; + w(&mut buf, &mut pos, 7); // maxprot: rwx + w(&mut buf, &mut pos, 7); // initprot: rwx + w(&mut buf, &mut pos, nsects); + w(&mut buf, &mut pos, 0); // flags + + // Section headers + for (i, sec) in sections.iter().enumerate() { + buf[pos..pos + 16].copy_from_slice(&sec.sectname); + pos += 16; + buf[pos..pos + 16].copy_from_slice(&sec.segname); + pos += 16; + buf[pos..pos + 8] + .copy_from_slice(&((sec_offsets[i] - sec_offsets[0]) as u64).to_le_bytes()); // addr (section-relative) + pos += 8; + buf[pos..pos + 8].copy_from_slice(&(sec.data.len() as u64).to_le_bytes()); // size + pos += 8; + w(&mut buf, &mut pos, sec_offsets[i]); // offset + w(&mut buf, &mut pos, sec.align); // align + w(&mut buf, &mut pos, reloc_offsets[i]); // reloff + w(&mut buf, &mut pos, sec.relocs.len() as u32); // nreloc + w(&mut buf, &mut pos, sec.flags); // flags + w(&mut buf, &mut pos, 0); // reserved1 + w(&mut buf, &mut pos, 0); // reserved2 + w(&mut buf, &mut pos, 0); // reserved3 + } + + // LC_SYMTAB + w(&mut buf, &mut pos, LC_SYMTAB); + w(&mut buf, &mut pos, symtab_cmdsize); + w(&mut buf, &mut pos, symoff); + w(&mut buf, &mut pos, nsyms); + w(&mut buf, &mut pos, stroff); + w(&mut buf, &mut pos, strtab.len() as u32); + + // LC_DYSYMTAB + w(&mut buf, &mut pos, LC_DYSYMTAB); + w(&mut buf, &mut pos, dysymtab_cmdsize); + let nlocalsym = local_syms.len() as u32; + let nextdefsym = ext_def_syms.len() as u32; + let nundefsym = undef_syms.len() as u32; + w(&mut buf, &mut pos, 0); // ilocalsym + w(&mut buf, &mut pos, nlocalsym); + w(&mut buf, &mut pos, nlocalsym); // iextdefsym + w(&mut buf, &mut pos, nextdefsym); + w(&mut buf, &mut pos, nlocalsym + nextdefsym); // iundefsym + w(&mut buf, &mut pos, nundefsym); + // Remaining DYSYMTAB fields are all zero + for _ in 0..14 { + w(&mut buf, &mut pos, 0); + } + + // Write section data + for (i, sec) in sections.iter().enumerate() { + let off = sec_offsets[i] as usize; + if off + sec.data.len() <= buf.len() { + buf[off..off + sec.data.len()].copy_from_slice(&sec.data); + } + } + + // Write relocations + for (i, sec) in sections.iter().enumerate() { + if sec.relocs.is_empty() { + continue; + } + let off = reloc_offsets[i] as usize; + for (j, entry) in sec.relocs.iter().enumerate() { + let p = off + j * 8; + if p + 8 <= buf.len() { + buf[p..p + 8].copy_from_slice(entry); + } + } + } + + // Write symbol table + if symoff as usize + nlist_data.len() <= buf.len() { + buf[symoff as usize..symoff as usize + nlist_data.len()].copy_from_slice(&nlist_data); + } + if stroff as usize + strtab.len() <= buf.len() { + buf[stroff as usize..stroff as usize + strtab.len()].copy_from_slice(&strtab); + } + + let output_path = layout.symbol_db.args.output(); + std::fs::write(output_path.as_ref(), &buf) + .map_err(|e| crate::error!("Failed to write: {e}"))?; + + Ok(()) +} + +/// Validate structural invariants of a Mach-O output binary. +/// +/// Called when `WILD_VALIDATE_OUTPUT=1` is set. Parses the output back and checks: +/// +/// # Segment invariants +/// - Segment vmaddr is page-aligned (16KB on arm64) +/// - Segment fileoff is page-aligned (when filesize > 0) +/// - Segment file content fits within the file +/// +/// # Section invariants +/// - Section addr is within parent segment [vmaddr, vmaddr+vmsize) +/// - Section file offset is within parent segment [fileoff, fileoff+filesize) +/// - Section addr respects its declared alignment +/// - Sections within a segment do not overlap +/// +/// # Chained fixups invariants +/// - Page start offsets are within a page (< page_size) +fn validate_macho_output(buf: &[u8]) -> Result { + use object::read::macho::MachHeader as _; + use object::read::macho::Section as _; + use object::read::macho::Segment as _; + let le = object::Endianness::Little; + let header = object::macho::MachHeader64::::parse(buf, 0) + .map_err(|e| crate::error!("validate: bad Mach-O header: {e}"))?; + let mut cmds = header + .load_commands(le, buf, 0) + .map_err(|e| crate::error!("validate: bad load commands: {e}"))?; + + let file_len = buf.len() as u64; + + while let Ok(Some(cmd)) = cmds.next() { + if let Ok(Some((seg, seg_data))) = cmd.segment_64() { + let segname = crate::macho::trim_nul(&seg.segname); + let segname_str = String::from_utf8_lossy(segname); + + let vm_addr = seg.vmaddr.get(le); + let vm_size = seg.vmsize.get(le); + let file_off = seg.fileoff.get(le); + let file_size = seg.filesize.get(le); + + // Segment vmaddr page alignment + if vm_addr % PAGE_SIZE != 0 && !segname.is_empty() { + crate::bail!( + "validate: segment {segname_str} vmaddr {vm_addr:#x} not page-aligned" + ); + } + + // Segment fileoff page alignment + if file_size > 0 && file_off % PAGE_SIZE != 0 { + crate::bail!( + "validate: segment {segname_str} fileoff {file_off:#x} not page-aligned" + ); + } + + // Segment fits in file + if file_off + file_size > file_len { + crate::bail!( + "validate: segment {segname_str} extends beyond file \ + ({file_off:#x}+{file_size:#x} > {file_len:#x})" + ); + } + + // Section invariants + if let Ok(sections) = seg.sections(le, seg_data) { + let mut prev_end: u64 = 0; + for sec in sections { + let sect_raw = sec.sectname(); + let sect_name = String::from_utf8_lossy(crate::macho::trim_nul(sect_raw)); + + let sec_addr = sec.addr(le); + let sec_size = sec.size(le); + let sec_offset = sec.offset(le) as u64; + let sec_align = sec.align(le); + + // Section addr within segment + if sec_size > 0 + && (sec_addr < vm_addr || sec_addr + sec_size > vm_addr + vm_size) + { + crate::bail!( + "validate: section {segname_str},{sect_name} addr \ + {sec_addr:#x}+{sec_size:#x} outside segment \ + [{vm_addr:#x}..{:#x})", + vm_addr + vm_size + ); + } + + // Section file offset within segment + let sec_type = sec.flags(le) & 0xFF; + let is_zerofill = sec_type == 0x01 || sec_type == 0x0C; + if sec_size > 0 && !is_zerofill && sec_offset > 0 && file_size > 0 { + if sec_offset < file_off || sec_offset + sec_size > file_off + file_size { + crate::bail!( + "validate: section {segname_str},{sect_name} file range \ + [{sec_offset:#x}..{:#x}) outside segment \ + [{file_off:#x}..{:#x})", + sec_offset + sec_size, + file_off + file_size + ); + } + } + + // Section alignment + if sec_size > 0 && sec_align > 0 { + let alignment = 1u64 << sec_align; + if sec_addr % alignment != 0 { + crate::bail!( + "validate: section {segname_str},{sect_name} addr \ + {sec_addr:#x} not aligned to 2^{sec_align} ({alignment})" + ); + } + } + + // No overlap with previous section + if sec_size > 0 && sec_addr > 0 && sec_addr < prev_end { + crate::bail!( + "validate: section {segname_str},{sect_name} at {sec_addr:#x} \ + overlaps previous section ending at {prev_end:#x}" + ); + } + if sec_size > 0 { + prev_end = sec_addr + sec_size; + } + } + } + } + + // Check TLS invariants for __thread_vars descriptors. + if let Ok(Some((seg, seg_data))) = cmd.segment_64() { + if crate::macho::trim_nul(&seg.segname) == b"__DATA" { + if let Ok(sections) = seg.sections(le, seg_data) { + let mut tdata_size = 0u64; + let mut tbss_size = 0u64; + let mut tvars_foff = 0usize; + let mut tvars_count = 0usize; + for sec in sections { + let sec_type = sec.flags(le) & 0xFF; + let size = sec.size(le); + match sec_type { + 0x11 => tdata_size = size, + 0x12 => tbss_size = size, + 0x13 => { + tvars_foff = sec.offset(le) as usize; + tvars_count = size as usize / 24; + } + _ => {} + } + } + let tls_total = tdata_size + tbss_size; + + if tvars_count > 0 && tls_total > 0 { + let mut offsets = Vec::new(); + for i in 0..tvars_count { + let base = tvars_foff + i * 24; + if base + 24 > buf.len() { + break; + } + let key = + u64::from_le_bytes(buf[base + 8..base + 16].try_into().unwrap()); + let offset = + u64::from_le_bytes(buf[base + 16..base + 24].try_into().unwrap()); + + // Invariant: key must be 0 (dyld manages it at runtime) + if key != 0 { + crate::bail!( + "validate: TLV descriptor [{i}] key={key:#x} (must be 0)" + ); + } + + // Invariant: offset must not have fixup encoding + // (high bits in 51-63 must be 0) + if (offset >> 51) != 0 { + crate::bail!( + "validate: TLV descriptor [{i}] offset={offset:#x} \ + has fixup encoding (bits 51+ set)" + ); + } + + // Invariant: offset must be within TLS block + if offset >= tls_total { + crate::bail!( + "validate: TLV descriptor [{i}] offset={offset:#x} \ + exceeds TLS block size {tls_total:#x} \ + (thread_data={tdata_size:#x} + thread_bss={tbss_size:#x})" + ); + } + + offsets.push(offset); + } + + // Invariant: no two TLV descriptors should share the same offset + // (unless both are zero — which indicates a bug but may not crash) + offsets.sort(); + for w in offsets.windows(2) { + if w[0] == w[1] && tvars_count > 1 { + crate::bail!( + "validate: duplicate TLV offset {:#x} — \ + two thread-locals share the same TLS slot", + w[0] + ); + } + } + } + } + } + } + + // Check LC_SYMTAB + if let Ok(Some(symtab)) = cmd.symtab() { + let symoff = symtab.symoff.get(le) as u64; + let nsyms = symtab.nsyms.get(le) as u64; + let stroff = symtab.stroff.get(le) as u64; + let strsize = symtab.strsize.get(le) as u64; + let sym_end = symoff + nsyms * 16; + if sym_end > file_len { + crate::bail!( + "validate: LC_SYMTAB extends beyond file \ + (symoff {symoff:#x} + {nsyms}*16 = {sym_end:#x} > {file_len:#x})" + ); + } + if stroff + strsize > file_len { + crate::bail!( + "validate: LC_SYMTAB strtab extends beyond file \ + (stroff {stroff:#x} + {strsize:#x} > {file_len:#x})" + ); + } + } + } + + // Symbol-section consistency check: every defined symbol's n_value must + // fall within the address range of the section identified by its n_sect. + // This catches layout bugs where a symbol is resolved using the wrong + // section's output address. + { + let mut cmds_sym = header + .load_commands(le, buf, 0) + .map_err(|e| crate::error!("validate: {e}"))?; + // Collect all sections with their address ranges + let mut section_ranges: Vec<(u64, u64)> = Vec::new(); // (addr, addr+size) + while let Ok(Some(cmd)) = cmds_sym.next() { + if let Ok(Some((seg, seg_data))) = cmd.segment_64() { + if let Ok(sections) = seg.sections(le, seg_data) { + for sec in sections { + let addr = sec.addr(le); + let size = sec.size(le); + section_ranges.push((addr, addr + size)); + } + } + } + if let Ok(Some(symtab)) = cmd.symtab() { + let symoff = symtab.symoff.get(le) as usize; + let nsyms = symtab.nsyms.get(le) as usize; + let stroff = symtab.stroff.get(le) as usize; + for i in 0..nsyms { + let sym_off = symoff + i * 16; + if sym_off + 16 > buf.len() { + break; + } + let n_strx = u32::from_le_bytes(buf[sym_off..sym_off + 4].try_into().unwrap()); + let n_type = buf[sym_off + 4]; + let n_sect = buf[sym_off + 5]; + let n_value = + u64::from_le_bytes(buf[sym_off + 8..sym_off + 16].try_into().unwrap()); + + // Only check defined symbols in a section (N_SECT = 0x0e) + if (n_type & 0x0e) != 0x0e || n_sect == 0 { + continue; + } + let sec_idx = n_sect as usize - 1; + if sec_idx >= section_ranges.len() { + continue; + } + let (sec_start, sec_end) = section_ranges[sec_idx]; + if n_value < sec_start || n_value > sec_end { + // Get symbol name for the error message + let name = if (n_strx as usize) < buf.len() - stroff { + let name_start = stroff + n_strx as usize; + let name_end = buf[name_start..] + .iter() + .position(|&b| b == 0) + .map(|p| name_start + p) + .unwrap_or(name_start); + String::from_utf8_lossy(&buf[name_start..name_end]).to_string() + } else { + format!("") + }; + crate::bail!( + "validate: symbol '{name}' n_value={n_value:#x} is outside \ + section {sec_idx} range [{sec_start:#x}..{sec_end:#x})" + ); + } + } + } + } + } + + // Global section file-offset overlap check: no two sections should + // write to the same file bytes. This catches bugs where multiple input + // sections map to overlapping parts of the same output section. + { + let mut cmds2 = header + .load_commands(le, buf, 0) + .map_err(|e| crate::error!("validate: bad load commands: {e}"))?; + let mut all_sections: Vec<(u64, u64, String)> = Vec::new(); + while let Ok(Some(cmd)) = cmds2.next() { + if let Ok(Some((seg, seg_data))) = cmd.segment_64() { + let segname = String::from_utf8_lossy(crate::macho::trim_nul(&seg.segname)); + if let Ok(sections) = seg.sections(le, seg_data) { + for sec in sections { + let sectname = + String::from_utf8_lossy(crate::macho::trim_nul(sec.sectname())); + let sec_offset = sec.offset(le) as u64; + let sec_size = sec.size(le); + let sec_type = sec.flags(le) & 0xFF; + // Skip zerofill sections (no file data) + if sec_size > 0 && sec_offset > 0 && sec_type != 0x01 && sec_type != 0x0C { + all_sections.push(( + sec_offset, + sec_size, + format!("{segname},{sectname}"), + )); + } + } + } + } + } + all_sections.sort_by_key(|s| s.0); + for w in all_sections.windows(2) { + let (off1, size1, ref name1) = w[0]; + let (off2, _size2, ref name2) = w[1]; + if off1 + size1 > off2 { + crate::bail!( + "validate: section file ranges overlap: \ + {name1} [{off1:#x}..{:#x}) and {name2} [{off2:#x}..)", + off1 + size1 + ); + } + } + } + + // Validate chained fixup chains: walk every chain entry and verify + // rebase targets are within the image and strides stay within pages. + validate_chained_fixups(buf)?; + + Ok(()) +} + +/// Walk all chained fixup chains and validate each entry. +fn validate_chained_fixups(buf: &[u8]) -> Result { + use object::read::macho::MachHeader as _; + let le = object::Endianness::Little; + let header = match object::macho::MachHeader64::::parse(buf, 0) { + Ok(h) => h, + Err(_) => return Ok(()), + }; + let mut cmds = match header.load_commands(le, buf, 0) { + Ok(c) => c, + Err(_) => return Ok(()), + }; + + // Find LC_DYLD_CHAINED_FIXUPS and the DATA segment + let mut cf_off = 0u32; + let mut cf_size = 0u32; + let mut data_fileoff = 0u64; + let mut _data_vmaddr = 0u64; + let mut image_end = 0u64; // highest vmaddr + vmsize + + // Scan load commands manually for chained fixups offset. + { + let mut off = 32usize; // after Mach-O 64 header + let ncmds = u32::from_le_bytes(buf[16..20].try_into().unwrap_or([0; 4])) as usize; + for _ in 0..ncmds { + if off + 8 > buf.len() { + break; + } + let cmd_val = u32::from_le_bytes(buf[off..off + 4].try_into().unwrap()); + let cmdsize = u32::from_le_bytes(buf[off + 4..off + 8].try_into().unwrap()) as usize; + if cmd_val == 0x8000_0034 && off + 16 <= buf.len() { + cf_off = u32::from_le_bytes(buf[off + 8..off + 12].try_into().unwrap()); + cf_size = u32::from_le_bytes(buf[off + 12..off + 16].try_into().unwrap()); + } + off += cmdsize; + } + } + + while let Ok(Some(cmd)) = cmds.next() { + if let Ok(Some((seg, _))) = cmd.segment_64() { + let va = seg.vmaddr.get(le); + let vs = seg.vmsize.get(le); + image_end = image_end.max(va + vs); + let segname = crate::macho::trim_nul(&seg.segname); + if segname == b"__DATA" { + data_fileoff = seg.fileoff.get(le); + _data_vmaddr = va; + } + } + } + + if cf_off == 0 || cf_size == 0 { + return Ok(()); // no chained fixups + } + + let cf = match buf.get(cf_off as usize..(cf_off + cf_size) as usize) { + Some(d) => d, + None => return Ok(()), + }; + if cf.len() < 32 { + return Ok(()); + } + + let starts_offset = u32::from_le_bytes(cf[4..8].try_into().unwrap()) as usize; + let imports_count = u32::from_le_bytes(cf[16..20].try_into().unwrap()); + + if starts_offset + 4 > cf.len() { + return Ok(()); + } + let seg_count = u32::from_le_bytes(cf[starts_offset..starts_offset + 4].try_into().unwrap()); + + for s in 0..seg_count as usize { + let seg_off_pos = starts_offset + 4 + s * 4; + if seg_off_pos + 4 > cf.len() { + break; + } + let seg_off = + u32::from_le_bytes(cf[seg_off_pos..seg_off_pos + 4].try_into().unwrap()) as usize; + if seg_off == 0 { + continue; + } + let ss = starts_offset + seg_off; + if ss + 22 > cf.len() { + continue; + } + let page_size = u16::from_le_bytes(cf[ss + 4..ss + 6].try_into().unwrap()) as u64; + let page_count = u16::from_le_bytes(cf[ss + 20..ss + 22].try_into().unwrap()) as usize; + + if page_size == 0 { + continue; + } + + for p in 0..page_count { + let ps_pos = ss + 22 + p * 2; + if ps_pos + 2 > cf.len() { + break; + } + let ps = u16::from_le_bytes(cf[ps_pos..ps_pos + 2].try_into().unwrap()); + if ps == 0xFFFF { + continue; + } + if ps as u64 >= page_size { + crate::bail!( + "validate: chained fixup page start {ps:#x} >= page_size {page_size:#x} \ + (seg {s}, page {p})" + ); + } + + // Walk the chain + let page_file_off = data_fileoff as usize + p * page_size as usize; + let mut file_off = page_file_off + ps as usize; + let mut chain_count = 0u32; + loop { + if file_off + 8 > buf.len() { + crate::bail!( + "validate: fixup chain entry at file offset {file_off:#x} \ + beyond file end (seg {s}, page {p}, entry {chain_count})" + ); + } + let val = u64::from_le_bytes(buf[file_off..file_off + 8].try_into().unwrap()); + let bind = (val >> 63) & 1; + let next_stride = ((val >> 51) & 0xFFF) as usize; + + if bind != 0 { + let ordinal = (val & 0xFF_FFFF) as u32; + if ordinal >= imports_count { + crate::bail!( + "validate: bind ordinal {ordinal} >= imports_count {imports_count} \ + at file offset {file_off:#x} (seg {s}, page {p})" + ); + } + } else { + let target = val & 0xF_FFFF_FFFF; + if target > 0 && target > image_end { + crate::bail!( + "validate: rebase target {target:#x} beyond image end {image_end:#x} \ + at file offset {file_off:#x} (seg {s}, page {p})" + ); + } + } + + chain_count += 1; + if next_stride == 0 { + break; + } + + let next_off = file_off + next_stride * 4; + let next_in_page = next_off - page_file_off; + if next_in_page >= page_size as usize { + crate::bail!( + "validate: fixup chain crosses page boundary at file offset \ + {file_off:#x}, next at +{} bytes = offset {next_in_page:#x} in page \ + (page_size={page_size:#x}, seg {s}, page {p})", + next_stride * 4 + ); + } + file_off = next_off; + } + } + } + + Ok(()) } diff --git a/libwild/src/output_section_id.rs b/libwild/src/output_section_id.rs index c622caaef..de2dae369 100644 --- a/libwild/src/output_section_id.rs +++ b/libwild/src/output_section_id.rs @@ -100,11 +100,16 @@ pub(crate) const SYMTAB_SHNDX_LOCAL: OutputSectionId = part_id::SYMTAB_SHNDX_LOCAL.output_section_id(); pub(crate) const SYMTAB_SHNDX_GLOBAL: OutputSectionId = part_id::SYMTAB_SHNDX_GLOBAL.output_section_id(); -// Mach-O specific sections +// Mach-O specific sections (used by the Mach-O writer pipeline) +#[allow(dead_code)] pub(crate) const PAGEZERO_SEGMENT: OutputSectionId = part_id::PAGEZERO_SEGMENT.output_section_id(); +#[allow(dead_code)] pub(crate) const TEXT_SEGMENT: OutputSectionId = part_id::TEXT_SEGMENT.output_section_id(); +#[allow(dead_code)] pub(crate) const DATA_SEGMENT: OutputSectionId = part_id::DATA_SEGMENT.output_section_id(); +#[allow(dead_code)] pub(crate) const ENTRY_POINT: OutputSectionId = part_id::ENTRY_POINT.output_section_id(); +#[allow(dead_code)] pub(crate) const LINK_EDIT_SEGMENT: OutputSectionId = part_id::LINK_EDIT_SEGMENT.output_section_id(); @@ -125,6 +130,7 @@ pub(crate) const GCC_EXCEPT_TABLE: OutputSectionId = OutputSectionId::regular(12 pub(crate) const NOTE_ABI_TAG: OutputSectionId = OutputSectionId::regular(13); pub(crate) const DATA_REL_RO: OutputSectionId = OutputSectionId::regular(14); // Mach-O specific sections +#[allow(dead_code)] pub(crate) const CSTRING: OutputSectionId = OutputSectionId::regular(15); pub(crate) const NUM_BUILT_IN_REGULAR_SECTIONS: usize = 16; diff --git a/libwild/src/platform.rs b/libwild/src/platform.rs index fe5697f9d..9d548a2c8 100644 --- a/libwild/src/platform.rs +++ b/libwild/src/platform.rs @@ -715,6 +715,16 @@ pub(crate) trait ObjectFile<'data>: Sized + Send + Sync + std::fmt::Debug + 'dat index: object::SymbolIndex, ) -> Result>; + /// Returns the symbol's offset within its section. For ELF, st_value is already + /// section-relative. For Mach-O, n_value is absolute so we subtract the section base. + fn symbol_value_in_section( + &self, + symbol: &::SymtabEntry, + _section_index: object::SectionIndex, + ) -> Result { + Ok(symbol.value()) + } + fn symbol_versions(&self) -> &[::SymbolVersionIndex]; fn dynamic_symbol_used( @@ -788,6 +798,15 @@ pub(crate) trait ObjectFile<'data>: Sized + Send + Sync + std::fmt::Debug + 'dat fn section_display_name(&self, index: object::SectionIndex) -> Cow<'data, str>; + /// Returns true if the given symbol is in a common/tentative section (e.g. + /// Mach-O `__common`). Default returns false; Mach-O overrides this. + fn is_symbol_in_common_section( + &self, + _symbol: &::SymtabEntry, + ) -> bool { + false + } + fn dynamic_tag_values(&self) -> Option<::DynamicTagValues<'data>>; fn get_version_names(&self) -> Result<::VersionNames<'data>>; @@ -1095,6 +1114,11 @@ pub(crate) trait Args: std::fmt::Debug + Send + Sync + 'static { fn entry_symbol_name<'a>(&'a self, linker_script_entry: Option<&'a [u8]>) -> &'a [u8]; + /// Whether the user explicitly specified an entry point (e.g. via `-e`). + fn has_explicit_entry(&self) -> bool { + false + } + fn version_script_path(&self) -> Option<&Path> { None } diff --git a/libwild/src/resolution.rs b/libwild/src/resolution.rs index 8df98866f..cf4f0117d 100644 --- a/libwild/src/resolution.rs +++ b/libwild/src/resolution.rs @@ -642,6 +642,7 @@ pub(crate) struct ResolvedCommon<'data, P: Platform> { pub(crate) object: &'data P::File<'data>, pub(crate) file_id: FileId, pub(crate) symbol_id_range: SymbolIdRange, + pub(crate) whole_archive: bool, } #[derive(Debug)] @@ -1000,15 +1001,21 @@ impl<'data, P: Platform> ResolvedCommon<'data, P> { object: &obj.parsed.object, file_id: obj.file_id, symbol_id_range: obj.symbol_id_range, + whole_archive: obj.parsed.modifiers.whole_archive, } } pub(crate) fn symbol_strength(&self, symbol_id: SymbolId) -> SymbolStrength { let local_index = symbol_id.to_input(self.symbol_id_range); let Ok(obj_symbol) = self.object.symbol(local_index) else { - // Errors from this function should have been reported elsewhere. return SymbolStrength::Undefined; }; + // Mach-O __common section symbols are tentative definitions (like ELF + // SHN_COMMON) but appear as N_SECT in the nlist. Check the section + // name to classify them correctly. + if self.object.is_symbol_in_common_section(obj_symbol) { + return SymbolStrength::Common(obj_symbol.size()); + } SymbolStrength::of(obj_symbol) } } @@ -1113,7 +1120,8 @@ fn resolve_section<'data, P: Platform>( let mut unloaded_section; let mut is_debug_info = false; - let mut must_load = input_section.should_retain() || input_section.is_note(); + let mut must_load = + input_section.should_retain() || input_section.is_note() || obj.common.whole_archive; let file_name = if let Some(entry) = &obj.common.input.entry { // For archive members, match against the member name (e.g., "app.o"), diff --git a/libwild/src/symbol_db.rs b/libwild/src/symbol_db.rs index 8d0e9223e..3fb2d6965 100644 --- a/libwild/src/symbol_db.rs +++ b/libwild/src/symbol_db.rs @@ -899,6 +899,10 @@ impl<'data, P: Platform> SymbolDb<'data, P> { self.args.entry_symbol_name(self.entry) } + pub(crate) fn has_explicit_entry(&self) -> bool { + self.args.has_explicit_entry() + } + pub(crate) fn defsym_defined_via_cli_option(&self, symbol_name: &[u8]) -> bool { self.args .defsym() @@ -1015,6 +1019,22 @@ impl<'data, P: Platform> SymbolDb<'data, P> { } } + /// Returns whether the symbol is a weak reference (N_WEAK_REF on Mach-O). + pub(crate) fn is_weak_ref(&self, symbol_id: SymbolId) -> bool { + let file_id = self.file_id_for_symbol(symbol_id); + match &self.groups[file_id.group()] { + Group::Objects(objects) => { + let file = &objects[file_id.file()]; + let local_index = file.symbol_id_range.id_to_input(symbol_id); + file.parsed + .object + .symbol(local_index) + .is_ok_and(|sym| sym.is_weak()) + } + _ => false, + } + } + pub(crate) fn warning(&self, message: impl Into) { self.args.warning(message); } diff --git a/linker-diff/src/utils.rs b/linker-diff/src/utils.rs index 8be62a248..9d8976631 100644 --- a/linker-diff/src/utils.rs +++ b/linker-diff/src/utils.rs @@ -22,8 +22,19 @@ pub fn decode_insn_with_objdump(insn: &[u8], address: u64, arch: ArchKind) -> Re let objdump = objdump_bin_candidates .iter() - .find(|bin| which::which(bin).is_ok()) - .unwrap(); + .find(|bin| { + if which::which(bin).is_ok() { + // macOS ships llvm-objdump as "objdump" which doesn't support -b binary. + // Only accept objdump if it supports the -b flag (GNU objdump). + if **bin == "objdump" && cfg!(target_os = "macos") { + return false; + } + true + } else { + false + } + }) + .context("No suitable objdump found")?; let command = Command::new(objdump) .arg("-b") @@ -61,10 +72,12 @@ fn test_align_up() { || std::env::var("WILD_TEST_CROSS") .is_ok_and(|v| v == "all" || v.split(',').any(|a| a == "aarch64")) { - assert_eq!( - decode_insn_with_objdump(&[0xe3, 0x93, 0x44, 0xa9], 0x1000, ArchKind::Aarch64).unwrap(), - "ldp\tx3, x4, [sp, #72]" - ); + // Skip if no suitable (GNU) objdump is available (e.g. macOS ships llvm-objdump). + if let Ok(result) = + decode_insn_with_objdump(&[0xe3, 0x93, 0x44, 0xa9], 0x1000, ArchKind::Aarch64) + { + assert_eq!(result, "ldp\tx3, x4, [sp, #72]"); + } } if cfg!(target_arch = "riscv64") diff --git a/linker-utils/src/elf.rs b/linker-utils/src/elf.rs index 6da02541f..9a1d21c00 100644 --- a/linker-utils/src/elf.rs +++ b/linker-utils/src/elf.rs @@ -1390,7 +1390,8 @@ impl fmt::Display for RelocationSize { } impl RelocationSize { - pub(crate) const fn bit_mask_aarch64( + #[must_use] + pub const fn bit_mask_aarch64( bit_start: u32, bit_end: u32, instruction: AArch64Instruction, diff --git a/main b/main new file mode 100755 index 000000000..b6e3ee291 Binary files /dev/null and b/main differ diff --git a/tests/macho_tests.sh b/tests/macho_tests.sh new file mode 100755 index 000000000..80c88ab65 --- /dev/null +++ b/tests/macho_tests.sh @@ -0,0 +1,446 @@ +#!/bin/bash +# Integration tests for macOS Mach-O linking. +# Run from the repo root: bash tests/macho_tests.sh +set -euo pipefail + +WILD="$(cd "$(dirname "${1:-./target/debug/wild}")" && pwd)/$(basename "${1:-./target/debug/wild}")" +TMPDIR=$(mktemp -d) +PASS=0 +FAIL=0 + +cleanup() { rm -rf "$TMPDIR"; } +trap cleanup EXIT + +pass() { PASS=$((PASS + 1)); echo " PASS: $1"; } +fail() { FAIL=$((FAIL + 1)); echo " FAIL: $1"; } + +check_exit() { + local binary="$1" expected="$2" name="$3" + # wild now auto-signs binaries, no manual codesign needed + set +e + "$binary" + local got=$? + set -e + if [ "$got" -eq "$expected" ]; then + pass "$name (exit=$got)" + else + fail "$name (expected exit=$expected, got exit=$got)" + fi +} + +echo "=== Wild macOS Mach-O Tests ===" +echo "Linker: $WILD" +echo "" + +# --- Test 1: Single .o, return constant --- +echo "Test 1: Single object file, return 42" +cat > "$TMPDIR/t1.c" << 'EOF' +int main() { return 42; } +EOF +clang -c "$TMPDIR/t1.c" -o "$TMPDIR/t1.o" +"$WILD" "$TMPDIR/t1.o" -o "$TMPDIR/t1" +check_exit "$TMPDIR/t1" 42 "single-obj-return-42" + +# --- Test 2: Two .o files with cross-object call --- +echo "Test 2: Two object files, cross-object function call" +cat > "$TMPDIR/t2_add.c" << 'EOF' +int add(int a, int b) { return a + b; } +EOF +cat > "$TMPDIR/t2_main.c" << 'EOF' +int add(int a, int b); +int main() { return add(30, 12); } +EOF +clang -c "$TMPDIR/t2_add.c" -o "$TMPDIR/t2_add.o" +clang -c "$TMPDIR/t2_main.c" -o "$TMPDIR/t2_main.o" +"$WILD" "$TMPDIR/t2_main.o" "$TMPDIR/t2_add.o" -o "$TMPDIR/t2" +check_exit "$TMPDIR/t2" 42 "two-objs-cross-call" + +# --- Test 3: Three .o files --- +echo "Test 3: Three object files" +cat > "$TMPDIR/t3_a.c" << 'EOF' +int mul(int a, int b) { return a * b; } +EOF +cat > "$TMPDIR/t3_b.c" << 'EOF' +int mul(int a, int b); +int square(int x) { return mul(x, x); } +EOF +cat > "$TMPDIR/t3_main.c" << 'EOF' +int square(int x); +int main() { return square(5) - 25 + 7; } +EOF +clang -c "$TMPDIR/t3_a.c" -o "$TMPDIR/t3_a.o" +clang -c "$TMPDIR/t3_b.c" -o "$TMPDIR/t3_b.o" +clang -c "$TMPDIR/t3_main.c" -o "$TMPDIR/t3_main.o" +"$WILD" "$TMPDIR/t3_main.o" "$TMPDIR/t3_b.o" "$TMPDIR/t3_a.o" -o "$TMPDIR/t3" +check_exit "$TMPDIR/t3" 7 "three-objs-chain-calls" + +# --- Test 4: Global variable (data section) --- +echo "Test 4: Global variable access" +cat > "$TMPDIR/t4_data.c" << 'EOF' +int value = 42; +EOF +cat > "$TMPDIR/t4_main.c" << 'EOF' +extern int value; +int main() { return value; } +EOF +clang -c "$TMPDIR/t4_data.c" -o "$TMPDIR/t4_data.o" +clang -c "$TMPDIR/t4_main.c" -o "$TMPDIR/t4_main.o" +"$WILD" "$TMPDIR/t4_main.o" "$TMPDIR/t4_data.o" -o "$TMPDIR/t4" +check_exit "$TMPDIR/t4" 42 "global-variable-extern" + +# --- Test 4b: Static variable --- +echo "Test 4b: Static variable access" +cat > "$TMPDIR/t4b.c" << 'EOF' +static int value = 42; +int main() { return value; } +EOF +clang -c "$TMPDIR/t4b.c" -o "$TMPDIR/t4b.o" +"$WILD" "$TMPDIR/t4b.o" -o "$TMPDIR/t4b" +check_exit "$TMPDIR/t4b" 42 "global-variable-static" + +# --- Test 4c: Static archive (.a) --- +echo "Test 4c: Static archive linking" +cat > "$TMPDIR/t4c_add.c" << 'EOF' +int add(int a, int b) { return a + b; } +EOF +cat > "$TMPDIR/t4c_mul.c" << 'EOF' +int mul(int a, int b) { return a * b; } +EOF +cat > "$TMPDIR/t4c_main.c" << 'EOF' +int add(int a, int b); +int mul(int a, int b); +int main() { return add(mul(6, 7), 0); } +EOF +clang -c "$TMPDIR/t4c_add.c" -o "$TMPDIR/t4c_add.o" +clang -c "$TMPDIR/t4c_mul.c" -o "$TMPDIR/t4c_mul.o" +clang -c "$TMPDIR/t4c_main.c" -o "$TMPDIR/t4c_main.o" +ar rcs "$TMPDIR/t4c_lib.a" "$TMPDIR/t4c_add.o" "$TMPDIR/t4c_mul.o" +"$WILD" "$TMPDIR/t4c_main.o" "$TMPDIR/t4c_lib.a" -o "$TMPDIR/t4c" +check_exit "$TMPDIR/t4c" 42 "static-archive" + +# --- Test 4d: Dynamic symbol (printf) --- +echo "Test 4d: Dynamic symbol call (printf)" +cat > "$TMPDIR/t4d.c" << 'EOF' +#include +int main() { + printf("hello wild\n"); + return 7; +} +EOF +clang -c "$TMPDIR/t4d.c" -o "$TMPDIR/t4d.o" +"$WILD" "$TMPDIR/t4d.o" -o "$TMPDIR/t4d" +check_exit "$TMPDIR/t4d" 7 "dynamic-symbol-printf" + +# --- Test 4e: clang drop-in linker --- +echo "Test 4e: clang -fuse-ld=wild" +cat > "$TMPDIR/t4e.c" << 'EOF' +#include +void greet(const char *name) { printf("Hello, %s!\n", name); } +EOF +cat > "$TMPDIR/t4e_main.c" << 'EOF' +void greet(const char *name); +int main() { greet("wild"); return 3; } +EOF +if clang -fuse-ld="$WILD" "$TMPDIR/t4e.c" "$TMPDIR/t4e_main.c" -o "$TMPDIR/t4e" 2>/dev/null; then + check_exit "$TMPDIR/t4e" 3 "clang-drop-in-linker" +else + fail "clang-drop-in-linker (link failed)" +fi + +# --- Test 4f: Function pointer table (rebase fixups) --- +echo "Test 4f: Function pointer table with rebases" +cat > "$TMPDIR/t4f.c" << 'EOF' +#include +typedef int (*fn_t)(void); +int f0(void) { return 10; } +int f1(void) { return 20; } +int f2(void) { return 12; } +fn_t table[] = { f0, f1, f2 }; +int main() { + int sum = 0; + for (int i = 0; i < 3; i++) sum += table[i](); + return sum; +} +EOF +clang -c "$TMPDIR/t4f.c" -o "$TMPDIR/t4f.o" +"$WILD" "$TMPDIR/t4f.o" -o "$TMPDIR/t4f" +check_exit "$TMPDIR/t4f" 42 "function-pointer-rebase" + +# --- Test 4g: Rust no_std --- +echo "Test 4g: Rust no_std program" +cat > "$TMPDIR/t4g.rs" << 'EOF' +#![no_std] +#![no_main] +#[no_mangle] +pub extern "C" fn main() -> i32 { 42 } +#[panic_handler] +fn panic(_: &core::panic::PanicInfo) -> ! { loop {} } +EOF +if rustc "$TMPDIR/t4g.rs" --emit=obj --target=aarch64-apple-darwin -C panic=abort -o "$TMPDIR/t4g.o" 2>/dev/null; then + "$WILD" "$TMPDIR/t4g.o" -o "$TMPDIR/t4g" + check_exit "$TMPDIR/t4g" 42 "rust-no-std" +else + echo " SKIP: rust-no-std (rustc not available)" +fi + +# --- Test 4h: Non-extern relocations (section-ordinal) --- +echo "Test 4h: Non-extern relocations" +cat > "$TMPDIR/t4h.c" << 'EOF' +static int helper(int x) { return x * 2; } +static int other(int x) { return x + 1; } +int main() { + int (*fns[])(int) = { helper, other }; + return fns[0](20) + fns[1](0); +} +EOF +clang -c "$TMPDIR/t4h.c" -o "$TMPDIR/t4h.o" +"$WILD" "$TMPDIR/t4h.o" -o "$TMPDIR/t4h" +check_exit "$TMPDIR/t4h" 41 "non-extern-relocs" + +# --- Test 4i: C TLS variable --- +echo "Test 4i: C thread-local variable" +cat > "$TMPDIR/t4i.c" << 'EOF' +__thread int x = 42; +int main() { return x; } +EOF +clang -c "$TMPDIR/t4i.c" -o "$TMPDIR/t4i.o" +"$WILD" "$TMPDIR/t4i.o" -o "$TMPDIR/t4i" +check_exit "$TMPDIR/t4i" 42 "c-tls-variable" + +# --- Test 4j: Multi-TLS across objects --- +echo "Test 4j: Multi-TLS across objects" +cat > "$TMPDIR/t4j_a.c" << 'EOF' +__thread int a = 10; +__thread int b = 20; +int get_tls_sum(void) { return a + b; } +EOF +cat > "$TMPDIR/t4j_b.c" << 'EOF' +int get_tls_sum(void); +int main() { return get_tls_sum() + 12; } +EOF +clang -c "$TMPDIR/t4j_a.c" -o "$TMPDIR/t4j_a.o" +clang -c "$TMPDIR/t4j_b.c" -o "$TMPDIR/t4j_b.o" +"$WILD" "$TMPDIR/t4j_a.o" "$TMPDIR/t4j_b.o" -o "$TMPDIR/t4j" +check_exit "$TMPDIR/t4j" 42 "multi-tls" + +# --- Test 4k: vtable + printf in archive (no TLS) --- +echo "Test 4k: Archive with vtable and printf" +cat > "$TMPDIR/t4k_lib.c" << 'EOF' +typedef int (*op_t)(int); +static int double_it(int x) { return x * 2; } +static int add_one(int x) { return x + 1; } +const op_t ops[] = { double_it, add_one }; +int apply_op(int i, int x) { return ops[i](x); } +EOF +cat > "$TMPDIR/t4k_main.c" << 'EOF' +#include +int apply_op(int i, int x); +int main() { + int result = apply_op(0, 10) + apply_op(1, 0); + printf("result=%d\n", result); + return result - 21 + 42; +} +EOF +clang -c "$TMPDIR/t4k_lib.c" -o "$TMPDIR/t4k_lib.o" +clang -c "$TMPDIR/t4k_main.c" -o "$TMPDIR/t4k_main.o" +ar rcs "$TMPDIR/t4k.a" "$TMPDIR/t4k_lib.o" +"$WILD" "$TMPDIR/t4k_main.o" "$TMPDIR/t4k.a" -o "$TMPDIR/t4k" +check_exit "$TMPDIR/t4k" 42 "archive-vtable-printf" + +# --- Test 4l: TLS + vtable + archive + printf --- +echo "Test 4l: Complex archive with TLS and vtable" +cat > "$TMPDIR/t4k_lib.c" << 'EOF' +#include +__thread int counter = 0; +typedef int (*op_t)(int); +static int double_it(int x) { return x * 2; } +static int add_one(int x) { return x + 1; } +const op_t ops[] = { double_it, add_one }; +int apply_op(int i, int x) { counter++; return ops[i](x); } +int get_counter(void) { return counter; } +EOF +cat > "$TMPDIR/t4k_main.c" << 'EOF' +#include +int apply_op(int i, int x); +int get_counter(void); +int main() { + int result = apply_op(0, 10) + apply_op(1, 0) + get_counter(); + printf("result=%d\n", result); + return result - 23 + 42; +} +EOF +clang -c "$TMPDIR/t4k_lib.c" -o "$TMPDIR/t4k_lib.o" +clang -c "$TMPDIR/t4k_main.c" -o "$TMPDIR/t4k_main.o" +ar rcs "$TMPDIR/t4k.a" "$TMPDIR/t4k_lib.o" +"$WILD" "$TMPDIR/t4k_main.o" "$TMPDIR/t4k.a" -o "$TMPDIR/t4k" +check_exit "$TMPDIR/t4k" 42 "complex-archive-tls-vtable" + +# --- Test 4m: Trait-like vtable dispatch with TLS + archive --- +echo "Test 4m: Trait dispatch with vtable, TLS, malloc" +cat > "$TMPDIR/t4m_lib.c" << 'EOF' +#include +__thread int depth = 0; +typedef struct { void (*drop)(void*); int (*call)(void*, int); } Vtable; +typedef struct { const Vtable *vtable; int value; } TraitObj; +static void a_drop(void *s) { depth++; } +static int a_call(void *s, int x) { depth++; return ((TraitObj*)s)->value + x; } +static const Vtable A_VT = { a_drop, a_call }; +static void m_drop(void *s) { depth++; } +static int m_call(void *s, int x) { depth++; return ((TraitObj*)s)->value * x; } +static const Vtable M_VT = { m_drop, m_call }; +TraitObj *make_adder(int v) { TraitObj *o=malloc(sizeof(*o)); o->vtable=&A_VT; o->value=v; return o; } +TraitObj *make_mul(int v) { TraitObj *o=malloc(sizeof(*o)); o->vtable=&M_VT; o->value=v; return o; } +int call_trait(TraitObj *o, int x) { return o->vtable->call(o, x); } +void drop_trait(TraitObj *o) { o->vtable->drop(o); free(o); } +int get_depth(void) { return depth; } +EOF +cat > "$TMPDIR/t4m_main.c" << 'EOF' +#include +typedef struct TraitObj TraitObj; +TraitObj *make_adder(int v); TraitObj *make_mul(int v); +int call_trait(TraitObj *o, int x); void drop_trait(TraitObj *o); int get_depth(void); +int main() { + TraitObj *a = make_adder(10), *m = make_mul(3); + int r = call_trait(a,5) + call_trait(m,7); + drop_trait(a); drop_trait(m); + printf("r=%d d=%d\n", r, get_depth()); + return r + get_depth() + 2; +} +EOF +clang -c "$TMPDIR/t4m_lib.c" -o "$TMPDIR/t4m_lib.o" +clang -c "$TMPDIR/t4m_main.c" -o "$TMPDIR/t4m_main.o" +ar rcs "$TMPDIR/t4m.a" "$TMPDIR/t4m_lib.o" +"$WILD" "$TMPDIR/t4m_main.o" "$TMPDIR/t4m.a" -o "$TMPDIR/t4m" +check_exit "$TMPDIR/t4m" 42 "trait-dispatch-tls-vtable" + +# --- Test 4n: Rust std links --- +echo "Test 4n: Rust std links" +cat > "$TMPDIR/t4i.rs" << 'EOF' +fn add(a: i32, b: i32) -> i32 { a + b } +fn main() { + let result = add(30, 12); + std::process::exit(result); +} +EOF +if rustc "$TMPDIR/t4i.rs" -Clinker=clang "-Clink-arg=-fuse-ld=$WILD" -o "$TMPDIR/t4i" 2>/dev/null; then + if [ -f "$TMPDIR/t4i" ] && file "$TMPDIR/t4i" | grep -q "Mach-O 64-bit executable arm64"; then + pass "rust-std-links" + else + fail "rust-std-links" + fi +else + echo " SKIP: rust-std-links (rustc not available or link failed)" +fi + +# --- Test 4j: Rust hello world runs --- +echo "Test 4o: Rust hello world runs" +cat > "$TMPDIR/t4k.rs" << 'EOF' +fn main() { + println!("Hello from wild!"); + std::process::exit(42); +} +EOF +if rustc "$TMPDIR/t4k.rs" -Clinker=clang "-Clink-arg=-fuse-ld=$WILD" -o "$TMPDIR/t4k" 2>/dev/null; then + check_exit "$TMPDIR/t4k" 42 "rust-hello-world" +else + echo " SKIP: rust-hello-world (rustc not available or link failed)" +fi + +# --- Test 4o2: Rust dylib with complex std (HashMap, Vec, format) --- +echo "Test 4o2: Rust dylib with complex std usage" +cat > "$TMPDIR/t4o2.rs" << 'EOF' +use std::collections::HashMap; +#[no_mangle] +pub extern "C" fn complex_test() -> i32 { + let mut map = HashMap::new(); + map.insert("hello".to_string(), 10); + map.insert("world".to_string(), 32); + let sum: i32 = map.values().sum(); + let msg = format!("sum={}", sum); + if msg.contains("42") { sum } else { -1 } +} +EOF +if rustc "$TMPDIR/t4o2.rs" --crate-type dylib -Clinker=clang "-Clink-arg=-fuse-ld=$WILD" -o "$TMPDIR/t4o2.dylib" 2>/dev/null; then + # Test via dlopen + cat > "$TMPDIR/t4o2_test.c" << 'LOADEOF' +#include +#include +int main() { + void *h = dlopen("DYLIB_PATH", RTLD_NOW); + if (!h) { fprintf(stderr, "dlopen: %s\n", dlerror()); return 1; } + int (*fn)(void) = dlsym(h, "complex_test"); + if (!fn) { fprintf(stderr, "dlsym: %s\n", dlerror()); dlclose(h); return 1; } + int r = fn(); + dlclose(h); + return r == 42 ? 42 : 1; +} +LOADEOF + sed -i '' "s|DYLIB_PATH|$TMPDIR/t4o2.dylib|" "$TMPDIR/t4o2_test.c" + clang "$TMPDIR/t4o2_test.c" -o "$TMPDIR/t4o2_test" + check_exit "$TMPDIR/t4o2_test" 42 "rust-dylib-complex-std" +else + echo " SKIP: rust-dylib-complex-std (rustc not available or link failed)" +fi + +# --- Test 4p: Rust proc-macro (requires dylib .rustc section) --- +echo "Test 4p: Rust proc-macro crate" +PROC_DIR="$TMPDIR/procmacro" +mkdir -p "$PROC_DIR/my_macro/src" "$PROC_DIR/my_app/src" +cat > "$PROC_DIR/Cargo.toml" << 'EOF' +[workspace] +members = ["my_macro", "my_app"] +resolver = "2" +EOF +cat > "$PROC_DIR/my_macro/Cargo.toml" << 'EOF' +[package] +name = "my_macro" +version = "0.1.0" +edition = "2021" +[lib] +proc-macro = true +EOF +cat > "$PROC_DIR/my_macro/src/lib.rs" << 'EOF' +extern crate proc_macro; +use proc_macro::TokenStream; +#[proc_macro] +pub fn answer(_input: TokenStream) -> TokenStream { "42i32".parse().unwrap() } +EOF +cat > "$PROC_DIR/my_app/Cargo.toml" << 'EOF' +[package] +name = "my_app" +version = "0.1.0" +edition = "2021" +[dependencies] +my_macro = { path = "../my_macro" } +EOF +cat > "$PROC_DIR/my_app/src/main.rs" << 'EOF' +fn main() { let v: i32 = my_macro::answer!(); std::process::exit(v); } +EOF +if cd "$PROC_DIR" && RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=$WILD" cargo build 2>/dev/null; then + check_exit "$PROC_DIR/target/debug/my_app" 42 "rust-proc-macro" +else + fail "rust-proc-macro" +fi +cd "$TMPDIR" + +# --- Test 5: Output flag --- +echo "Test 5: -o flag" +clang -c "$TMPDIR/t1.c" -o "$TMPDIR/t5.o" +"$WILD" "$TMPDIR/t5.o" -o "$TMPDIR/t5_out" +if [ -f "$TMPDIR/t5_out" ]; then + pass "output-flag" +else + fail "output-flag" +fi + +# --- Test 6: Valid Mach-O structure --- +echo "Test 6: Valid Mach-O structure" +if file "$TMPDIR/t1" | grep -q "Mach-O 64-bit executable arm64"; then + pass "valid-macho-structure" +else + fail "valid-macho-structure" +fi + +echo "" +echo "=== Results: $PASS passed, $FAIL failed ===" +[ "$FAIL" -eq 0 ] diff --git a/wild/Cargo.toml b/wild/Cargo.toml index 4c2303199..f5b5c7636 100644 --- a/wild/Cargo.toml +++ b/wild/Cargo.toml @@ -17,6 +17,21 @@ name = "integration_tests" path = "tests/integration_tests.rs" harness = false +[[test]] +name = "macho_integration_tests" +path = "tests/macho_integration_tests.rs" +harness = false + +[[test]] +name = "lld_macho_tests" +path = "tests/lld_macho_tests.rs" +harness = false + +[[test]] +name = "sold_macho_tests" +path = "tests/sold_macho_tests.rs" +harness = false + [dependencies] libwild = { path = "../libwild", version = "0.8.0" } diff --git a/wild/tests/integration_tests.rs b/wild/tests/integration_tests.rs index c385a212f..665d5df73 100644 --- a/wild/tests/integration_tests.rs +++ b/wild/tests/integration_tests.rs @@ -3912,6 +3912,14 @@ fn run_integration_test( mut config: Config, test_config: &TestConfig, ) -> Result { + // ELF tests require a Linux toolchain (GNU ld, ELF-compatible compiler). + // On macOS, the system linker is ld64 which doesn't support ELF flags. + if cfg!(target_os = "macos") && config.platform == PlatformKind::Elf { + return Ok(libtest_mimic::Completion::ignored_with( + "ELF tests require Linux toolchain", + )); + } + setup_symlink(); let linkers = available_linkers()?; diff --git a/wild/tests/lld-macho/LICENSE.TXT b/wild/tests/lld-macho/LICENSE.TXT new file mode 100644 index 000000000..cba22f66a --- /dev/null +++ b/wild/tests/lld-macho/LICENSE.TXT @@ -0,0 +1,278 @@ +============================================================================== +The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: +============================================================================== + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + +============================================================================== +Software from third parties included in the LLVM Project: +============================================================================== +The LLVM Project contains third party software which is under different license +terms. All such code will be identified clearly using at least one of two +mechanisms: +1) It will be in a separate directory tree with its own `LICENSE.txt` or + `LICENSE` file at the top containing the specific license and restrictions + which apply to that software, or +2) It will contain specific license and restriction terms at the top of every + file. + +============================================================================== +Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): +============================================================================== +University of Illinois/NCSA +Open Source License + +Copyright (c) 2011-2019 by the contributors listed in CREDITS.TXT +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. diff --git a/wild/tests/lld-macho/README.md b/wild/tests/lld-macho/README.md new file mode 100644 index 000000000..d4eeab7c0 --- /dev/null +++ b/wild/tests/lld-macho/README.md @@ -0,0 +1,50 @@ +# lld MachO Test Suite + +These tests are adapted from LLVM lld's MachO linker test suite. + +## Source + + + +## License + +Apache License v2.0 with LLVM Exceptions — see [LICENSE.TXT](LICENSE.TXT). + +## Format + +Tests use LLVM's LIT format: + +- `# RUN:` directives show how to assemble and link +- `# CHECK:` directives show expected output +- `# REQUIRES: aarch64` means the test needs ARM64 support +- `split-file %s %t` splits the file at `#---` markers + +## Usage with Wild + +To run a test manually: + +```sh +# Assemble (strip RUN/CHECK comments first) +grep -v '^#' test.s > clean.s +clang -c -target arm64-apple-macos clean.s -o test.o + +# Link with Wild +wild test.o -dylib -arch arm64 -lSystem -o test.dylib + +# Verify with objdump +objdump --macho -d test.dylib +``` + +## Cherry-picking new tests + +To add tests from upstream lld: + +```sh +# Sparse checkout the lld tests +git clone --depth 1 --filter=blob:none --sparse \ + https://github.com/llvm/llvm-project.git /tmp/llvm +cd /tmp/llvm && git sparse-checkout set lld/test/MachO + +# Copy desired tests +cp /tmp/llvm/lld/test/MachO/new-test.s wild/tests/lld-macho/ +``` diff --git a/wild/tests/lld-macho/abs-symbols.s b/wild/tests/lld-macho/abs-symbols.s new file mode 100644 index 000000000..5c106e5b9 --- /dev/null +++ b/wild/tests/lld-macho/abs-symbols.s @@ -0,0 +1,23 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: %lld -lSystem %t.o -o %t +# RUN: llvm-objdump --macho --syms --exports-trie %t | FileCheck %s + +# CHECK-LABEL: SYMBOL TABLE: +# CHECK-DAG: 000000000000dead g *ABS* _foo +# CHECK-DAG: 000000000000beef g *ABS* _weakfoo +# CHECK-DAG: 000000000000cafe l *ABS* _localfoo + +# CHECK-LABEL: Exports trie: +# CHECK-DAG: 0x0000DEAD _foo [absolute] +# CHECK-DAG: 0x0000BEEF _weakfoo [absolute] + +.globl _foo, _weakfoo, _main +.weak_definition _weakfoo +_foo = 0xdead +_weakfoo = 0xbeef +_localfoo = 0xcafe + +.text +_main: + ret diff --git a/wild/tests/lld-macho/adhoc-codesign-hash.s b/wild/tests/lld-macho/adhoc-codesign-hash.s new file mode 100644 index 000000000..977ca43cf --- /dev/null +++ b/wild/tests/lld-macho/adhoc-codesign-hash.s @@ -0,0 +1,23 @@ +# REQUIRES: x86, aarch64 +# RUN: rm -rf %t; mkdir -p %t + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o %t/empty-arm64-macos.o %s +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-iossimulator -o %t/empty-arm64-iossimulator.o %s +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/empty-x86_64-macos.o %s + +# RUN: %lld -arch arm64 -dylib -adhoc_codesign -o %t/empty-arm64-macos.dylib %t/empty-arm64-macos.o +# RUN: %lld -arch arm64 -dylib -adhoc_codesign -o %t/empty-arm64-iossimulator.dylib %t/empty-arm64-iossimulator.o +# RUN: %lld -arch x86_64 -dylib -adhoc_codesign -o %t/empty-x86_64-macos.dylib %t/empty-x86_64-macos.o + +# RUN: obj2yaml %t/empty-arm64-macos.dylib | FileCheck %s -D#DATA_OFFSET=16432 -D#DATA_SIZE=304 +# RUN: obj2yaml %t/empty-arm64-iossimulator.dylib | FileCheck %s -D#DATA_OFFSET=16432 -D#DATA_SIZE=304 +# RUN: obj2yaml %t/empty-x86_64-macos.dylib | FileCheck %s -D#DATA_OFFSET=4144 -D#DATA_SIZE=208 + +# CHECK: - cmd: LC_CODE_SIGNATURE +# CHECK-NEXT: cmdsize: 16 +# CHECK-NEXT: dataoff: [[#DATA_OFFSET]] +# CHECK-NEXT: datasize: [[#DATA_SIZE]] + +# RUN: %python %p/Inputs/code-signature-check.py %t/empty-arm64-macos.dylib 16432 304 0 16432 +# RUN: %python %p/Inputs/code-signature-check.py %t/empty-arm64-iossimulator.dylib 16432 304 0 16432 +# RUN: %python %p/Inputs/code-signature-check.py %t/empty-x86_64-macos.dylib 4144 208 0 4144 diff --git a/wild/tests/lld-macho/adhoc-codesign.s b/wild/tests/lld-macho/adhoc-codesign.s new file mode 100644 index 000000000..8e422ca2c --- /dev/null +++ b/wild/tests/lld-macho/adhoc-codesign.s @@ -0,0 +1,112 @@ +# REQUIRES: x86, aarch64 + +# RUN: rm -rf %t; split-file %s %t + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o %t/main-arm64-macos.o %t/main.s +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-iossimulator -o %t/main-arm64-sim.o %t/main.s +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/main-x86_64-macos.o %t/main.s +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o %t/foo-arm64-macos.o %t/foo.s +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-iossimulator -o %t/foo-arm64-sim.o %t/foo.s +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/foo-x86_64-macos.o %t/foo.s + +# Exhaustive test for: +# (x86_64-macos, arm64-macos, arm64-ios-simulator) x (default, -adhoc_codesign, -no_adhoc-codesign) x (execute, dylib, bundle) + +# RUN: %lld -lSystem -arch x86_64 -execute -o %t/out %t/main-x86_64-macos.o +# RUN: llvm-objdump --macho --all-headers %t/out | FileCheck --check-prefix=NO-ADHOC %s +# RUN: %lld -arch x86_64 -dylib -o %t/out %t/foo-x86_64-macos.o +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=NO-ADHOC %s +# RUN: %lld -arch x86_64 -bundle -o %t/out %t/foo-x86_64-macos.o +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=NO-ADHOC %s + +# RUN: %lld -lSystem -arch x86_64 -execute -adhoc_codesign -o %t/out %t/main-x86_64-macos.o +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=ADHOC %s +# RUN: %lld -arch x86_64 -dylib -adhoc_codesign -o %t/out %t/foo-x86_64-macos.o +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=ADHOC %s +# RUN: %lld -arch x86_64 -bundle -adhoc_codesign -o %t/out %t/foo-x86_64-macos.o +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=ADHOC %s + +# RUN: %lld -lSystem -arch x86_64 -execute -no_adhoc_codesign -o %t/out %t/main-x86_64-macos.o +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=NO-ADHOC %s +# RUN: %lld -arch x86_64 -dylib -no_adhoc_codesign -o %t/out %t/foo-x86_64-macos.o +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=NO-ADHOC %s +# RUN: %lld -arch x86_64 -bundle -no_adhoc_codesign -o %t/out %t/foo-x86_64-macos.o +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=NO-ADHOC %s + + +# RUN: %lld -lSystem -arch arm64 -execute -o %t/out %t/main-arm64-macos.o +# RUN: llvm-objdump --macho --all-headers %t/out | FileCheck --check-prefix=ADHOC %s +# RUN: %lld -arch arm64 -dylib -o %t/out %t/foo-arm64-macos.o +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=ADHOC %s +# RUN: %lld -arch arm64 -bundle -o %t/out %t/foo-arm64-macos.o +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=ADHOC %s + +# RUN: %lld -lSystem -arch arm64 -execute -adhoc_codesign -o %t/out %t/main-arm64-macos.o +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=ADHOC %s +# RUN: %lld -arch arm64 -dylib -adhoc_codesign -o %t/out %t/foo-arm64-macos.o +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=ADHOC %s +# RUN: %lld -arch arm64 -bundle -adhoc_codesign -o %t/out %t/foo-arm64-macos.o +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=ADHOC %s + +# RUN: %lld -lSystem -arch arm64 -execute -no_adhoc_codesign -o %t/out %t/main-arm64-macos.o +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=NO-ADHOC %s +# RUN: %lld -arch arm64 -dylib -no_adhoc_codesign -o %t/out %t/foo-arm64-macos.o +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=NO-ADHOC %s +# RUN: %lld -arch arm64 -bundle -no_adhoc_codesign -o %t/out %t/foo-arm64-macos.o +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=NO-ADHOC %s + + +# RUN: %no-arg-lld -arch arm64 -platform_version ios-simulator 14.0 15.0 -execute -o %t/out %t/main-arm64-sim.o -syslibroot %S/Inputs/iPhoneSimulator.sdk -lSystem +# RUN: llvm-objdump --macho --all-headers %t/out | FileCheck --check-prefix=ADHOC %s +# RUN: %no-arg-lld -arch arm64 -platform_version ios-simulator 14.0 15.0 -dylib -o %t/out %t/foo-arm64-sim.o +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=ADHOC %s +# RUN: %no-arg-lld -arch arm64 -platform_version ios-simulator 14.0 15.0 -bundle -o %t/out %t/foo-arm64-sim.o +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=ADHOC %s + +# RUN: %no-arg-lld -arch arm64 -platform_version ios-simulator 14.0 15.0 -execute -adhoc_codesign -o %t/out %t/main-arm64-sim.o -syslibroot %S/Inputs/iPhoneSimulator.sdk -lSystem +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=ADHOC %s +# RUN: %no-arg-lld -arch arm64 -platform_version ios-simulator 14.0 15.0 -dylib -adhoc_codesign -o %t/out %t/foo-arm64-sim.o +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=ADHOC %s +# RUN: %no-arg-lld -arch arm64 -platform_version ios-simulator 14.0 15.0 -bundle -adhoc_codesign -o %t/out %t/foo-arm64-sim.o +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=ADHOC %s + +# RUN: %no-arg-lld -lSystem -arch arm64 -platform_version ios-simulator 14.0 15.0 -execute -no_adhoc_codesign -o %t/out %t/main-arm64-sim.o -syslibroot %S/Inputs/iPhoneSimulator.sdk +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=NO-ADHOC %s +# RUN: %no-arg-lld -arch arm64 -platform_version ios-simulator 14.0 15.0 -dylib -no_adhoc_codesign -o %t/out %t/foo-arm64-sim.o +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=NO-ADHOC %s +# RUN: %no-arg-lld -arch arm64 -platform_version ios-simulator 14.0 15.0 -bundle -no_adhoc_codesign -o %t/out %t/foo-arm64-sim.o +# RUN: llvm-objdump --macho --all-headers %t/out| FileCheck --check-prefix=NO-ADHOC %s + +# RUN: %lld -arch x86_64 -dylib -o %t/out_installname.dylib -install_name @rpath/MyInstallName %t/foo-x86_64-macos.o -adhoc_codesign +# RUN: %lld -arch x86_64 -dylib -o %t/out_no_installname.dylib %t/foo-x86_64-macos.o -adhoc_codesign + +## Smoke check to verify the dataoff and datasize value before using them with code-signature-check.py +# RUN: llvm-objdump --macho --all-headers %t/out_installname.dylib | FileCheck %s --check-prefix CS-ID-PRE -D#DATA_OFFSET=4176 -D#DATA_SIZE=192 +# RUN: llvm-objdump --macho --all-headers %t/out_no_installname.dylib | FileCheck %s --check-prefix CS-ID-PRE -D#DATA_OFFSET=4176 -D#DATA_SIZE=208 + +## Verify that the 'Identifier' (aka 'Code Directory ID') field are set to the install-name, if available. +# RUN: %python %p/Inputs/code-signature-check.py %t/out_installname.dylib 4176 192 0 4176 | FileCheck %s --check-prefix CS-ID-INSTALL +# RUN: %python %p/Inputs/code-signature-check.py %t/out_no_installname.dylib 4176 208 0 4176 | FileCheck %s --check-prefix CS-ID-NO-INSTALL + +# ADHOC: cmd LC_CODE_SIGNATURE +# ADHOC-NEXT: cmdsize 16 + +# NO-ADHOC-NOT: cmd LC_CODE_SIGNATURE + +# CS-ID-PRE: cmd LC_CODE_SIGNATURE +# CS-ID-PRE-NEXT: cmdsize 16 +# CS-ID-PRE-NEXT: dataoff [[#DATA_OFFSET]] +# CS-ID-PRE-NEXT: datasize [[#DATA_SIZE]] + +# CS-ID-INSTALL: Code Directory ID: MyInstallName +# CS-ID-NO-INSTALL: Code Directory ID: out_no_installname.dylib + +#--- foo.s +.globl _foo +_foo: + ret + +#--- main.s +.globl _main +_main: + ret diff --git a/wild/tests/lld-macho/application-extension.s b/wild/tests/lld-macho/application-extension.s new file mode 100644 index 000000000..ddd16f33f --- /dev/null +++ b/wild/tests/lld-macho/application-extension.s @@ -0,0 +1,115 @@ +# REQUIRES: aarch64 + +## --no-leading-lines is needed for .tbd files. +# RUN: rm -rf %t; split-file --no-leading-lines %s %t + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o %t/foo.o %t/foo.s +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o %t/bar.o %t/bar.s + +## MH_APP_EXTENSION_SAFE is only set on dylibs, and only if requested. +# RUN: %lld -arch arm64 -dylib -o %t/foo.dylib %t/foo.o +# RUN: llvm-otool -hv %t/foo.dylib | FileCheck --check-prefix=NOAPPEXT %s +# RUN: %lld -arch arm64 -dylib -o %t/foo-appext.dylib %t/foo.o \ +# RUN: -application_extension +# RUN: llvm-otool -hv %t/foo-appext.dylib | FileCheck --check-prefix=APPEXT %s +# RUN: %lld -arch arm64 -dylib -o %t/foo-noappext.dylib %t/foo.o \ +# RUN: -application_extension -no_application_extension +# RUN: llvm-otool -hv %t/foo-noappext.dylib \ +# RUN: | FileCheck --check-prefix=NOAPPEXT %s +# RUN: %lld -arch arm64 -bundle -o %t/foo.so %t/foo.o \ +# RUN: -application_extension +# RUN: llvm-otool -hv %t/foo.so | FileCheck --check-prefix=NOAPPEXT %s + +# APPEXT: APP_EXTENSION_SAFE +# NOAPPEXT-NOT: APP_EXTENSION_SAFE + +## The warning is emitted for all target types. +# RUN: %lld -arch arm64 -dylib -o %t/bar.dylib %t/bar.o \ +# RUN: -application_extension %t/foo-appext.dylib +# RUN: %lld -arch arm64 -dylib -o %t/bar.dylib %t/bar.o \ +# RUN: -application_extension -L %t -ltbd-appext +# RUN: not %lld -arch arm64 -dylib -o %t/bar.dylib %t/bar.o \ +# RUN: -application_extension %t/foo-noappext.dylib \ +# RUN: 2>&1 | FileCheck --check-prefix=WARN %s +# RUN: not %lld -arch arm64 -dylib -o %t/bar.dylib %t/bar.o \ +# RUN: -application_extension -L %t -ltbd-noappext \ +# RUN: 2>&1 | FileCheck --check-prefix=WARN %s +# RUN: not %lld -arch arm64 -bundle -o %t/bar.so %t/bar.o \ +# RUN: -application_extension %t/foo-noappext.dylib \ +# RUN: 2>&1 | FileCheck --check-prefix=WARN %s +# RUN: not %lld -arch arm64 -bundle -o %t/bar.so %t/bar.o \ +# RUN: -application_extension -L %t -ltbd-noappext \ +# RUN: 2>&1 | FileCheck --check-prefix=WARN %s + +# WARN: using '-application_extension' with unsafe dylib: + +## Test we warn on dylibs loaded indirectly via reexports. +# RUN: not %lld -arch arm64 -dylib -o %t/bar.dylib %t/bar.o \ +# RUN: -application_extension -L %t -lbaz-noappext-reexport \ +# RUN: -u _baz 2>&1 | FileCheck --check-prefix=WARN %s + +#--- foo.s +.globl _foo +.p2align 2 +_foo: + ret + +#--- libtbd-appext.tbd +--- !tapi-tbd +tbd-version: 4 +targets: [ arm64-macos ] +uuids: + - target: arm64-macos + value: 2E994C7F-3F03-3A07-879C-55690D22BEDA +install-name: '/usr/lib/libtbd-appext.dylib' +exports: + - targets: [ arm64-macos ] + symbols: [ _foo ] +... + +#--- libtbd-noappext.tbd +--- !tapi-tbd +tbd-version: 4 +targets: [ arm64-macos ] +flags: [ not_app_extension_safe ] +uuids: + - target: arm64-macos + value: 2E994C7F-3F03-3A07-879C-55690D22BEDA +install-name: '/usr/lib/libtbd-noappext.dylib' +exports: + - targets: [ arm64-macos ] + symbols: [ _foo ] +... + +#--- bar.s +.globl _bar +.p2align 2 +_bar: + ret + +#--- libbaz-noappext-reexport.tbd +--- !tapi-tbd +tbd-version: 4 +targets: [ arm64-macos ] +uuids: + - target: arm64-macos + value: 00000000-0000-0000-0000-000000000001 +install-name: '/usr/lib/libbaz.dylib' +reexported-libraries: + - targets: [ arm64-macos ] + libraries: [ '/usr/lib/libbaz-noappext-reexported.dylib'] +--- !tapi-tbd +tbd-version: 4 +targets: [ arm64-macos ] +flags: [ not_app_extension_safe ] +uuids: + - target: arm64-macos + value: 00000000-0000-0000-0000-000000000003 +install-name: '/usr/lib/libbaz-noappext-reexported.dylib' +parent-umbrella: + - targets: [ arm64-macos ] + umbrella: baz +exports: + - targets: [ arm64-macos ] + symbols: [ _baz ] +... diff --git a/wild/tests/lld-macho/archive.s b/wild/tests/lld-macho/archive.s new file mode 100644 index 000000000..c324be0ac --- /dev/null +++ b/wild/tests/lld-macho/archive.s @@ -0,0 +1,66 @@ +# REQUIRES: x86 +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/2.s -o %t/2.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/3.s -o %t/3.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/4.s -o %t/4.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/main.s -o %t/main.o + +# RUN: llvm-ar rcs %t/test.a %t/2.o %t/3.o %t/4.o +# RUN: %lld %t/main.o %t/test.a -o %t/test.out + +## TODO: Run llvm-nm -p to validate symbol order +# RUN: llvm-nm %t/test.out | FileCheck %s +# CHECK: T _bar +# CHECK: T _boo +# CHECK: T _main + +## Linking with the archive first in the command line shouldn't change anything +# RUN: %lld %t/test.a %t/main.o -o %t/test.out +# RUN: llvm-nm %t/test.out | FileCheck %s --check-prefix ARCHIVE-FIRST +# ARCHIVE-FIRST: T _bar +# ARCHIVE-FIRST: T _boo +# ARCHIVE-FIRST: T _main + +# RUN: llvm-nm %t/test.out | FileCheck %s --check-prefix VISIBLE +# VISIBLE-NOT: T _undefined +# VISIBLE-NOT: T _unused + +# RUN: %lld %t/test.a %t/main.o -o %t/all-load -noall_load -all_load +# RUN: llvm-nm %t/all-load | FileCheck %s --check-prefix ALL-LOAD +# ALL-LOAD: T _bar +# ALL-LOAD: T _boo +# ALL-LOAD: T _main +# ALL-LOAD: T _unused + +# RUN: %lld %t/test.a %t/main.o -o %t/no-all-load -all_load -noall_load +# RUN: llvm-nm %t/no-all-load | FileCheck %s --check-prefix NO-ALL-LOAD +# RUN: %lld %t/test.a %t/main.o -o %t/no-all-load-only -noall_load +# RUN: llvm-nm %t/no-all-load-only | FileCheck %s --check-prefix NO-ALL-LOAD +# NO-ALL-LOAD-NOT: T _unused + +## Multiple archives defining the same symbols aren't an issue, due to lazy +## loading +# RUN: cp %t/test.a %t/test2.a +# RUN: %lld %t/test.a %t/test2.a %t/main.o -o /dev/null + +#--- 2.s +.globl _boo +_boo: + ret + +#--- 3.s +.globl _bar +_bar: + ret + +#--- 4.s +.globl _undefined, _unused +_unused: + ret + +#--- main.s +.globl _main +_main: + callq _boo + callq _bar + ret diff --git a/wild/tests/lld-macho/arm64-32-dtrace.s b/wild/tests/lld-macho/arm64-32-dtrace.s new file mode 100644 index 000000000..26c91bd28 --- /dev/null +++ b/wild/tests/lld-macho/arm64-32-dtrace.s @@ -0,0 +1,23 @@ +# REQUIRES: aarch64 +# RUN: rm -rf %t; split-file %s %t + +# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos %t/arm64-32-dtrace.s -o %t/arm64-32-dtrace.o +# RUN: %lld-watchos -arch arm64_32 -o %t/arm64-32-dtrace %t/arm64-32-dtrace.o + +## If references of dtrace symbols are handled by lld, their relocation should be replaced with the following instructions +# RUN: llvm-objdump --macho -D %t/arm64-32-dtrace | FileCheck %s --check-prefix=CHECK + +# CHECK: 00 00 80 d2 mov x0, #0 + +# CHECK: 1f 20 03 d5 nop + +#--- arm64-32-dtrace.s + .globl _main +_main: + bl ___dtrace_isenabled$Foo$added$v1 + .reference ___dtrace_typedefs$Foo$v2 + bl ___dtrace_probe$Foo$added$v1$696e74 + .reference ___dtrace_stability$Foo$v1$1_1_0_1_1_0_1_1_0_1_1_0_1_1_0 + ret + +.subsections_via_symbols diff --git a/wild/tests/lld-macho/arm64-32-reloc-got-load.s b/wild/tests/lld-macho/arm64-32-reloc-got-load.s new file mode 100644 index 000000000..85431b11e --- /dev/null +++ b/wild/tests/lld-macho/arm64-32-reloc-got-load.s @@ -0,0 +1,49 @@ +# REQUIRES: aarch64 + +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-darwin %t/main.s -o %t/main.o +# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-darwin %t/foobar.s -o %t/foobar.o + +# RUN: %lld-watchos -lSystem -arch arm64_32 -o %t/static %t/main.o %t/foobar.o +# RUN: llvm-objdump --no-print-imm-hex --macho -d --no-show-raw-insn --syms %t/static | FileCheck %s --check-prefix=STATIC + +# RUN: %lld-watchos -lSystem -arch arm64_32 -dylib -o %t/libfoo.dylib %t/foobar.o +# RUN: %lld-watchos -lSystem -arch arm64_32 -o %t/main %t/main.o %t/libfoo.dylib +# RUN: llvm-objdump --no-print-imm-hex --macho -d --no-show-raw-insn --section-headers %t/main | FileCheck %s --check-prefix=DYLIB + +# STATIC-LABEL: _main: +# STATIC-NEXT: adrp x8, [[#]] ; 0x[[#%x,PAGE:]] +# STATIC-NEXT: add x8, x8, #[[#%u,FOO_OFF:]] +# STATIC-NEXT: adrp x8, [[#]] ; 0x[[#PAGE]] +# STATIC-NEXT: add x8, x8, #[[#%u,BAR_OFF:]] +# STATIC-NEXT: ret + +# STATIC-LABEL: SYMBOL TABLE: +# STATIC-DAG: {{0*}}[[#%x,PAGE+FOO_OFF]] g F __TEXT,__text _foo +# STATIC-DAG: {{0*}}[[#%x,PAGE+BAR_OFF]] g F __TEXT,__text _bar + +# DYLIB-LABEL: _main: +# DYLIB-NEXT: adrp x8, [[#]] ; 0x[[#%x,GOT:]] +# DYLIB-NEXT: ldr w8, [x8, #4] +# DYLIB-NEXT: adrp x8, [[#]] ; 0x[[#GOT]] +# DYLIB-NEXT: ldr w8, [x8] +# DYLIB-NEXT: ret +# DYLIB-EMPTY: +# DYLIB-NEXT: Sections: +# DYLIB-NEXT: Idx Name Size VMA Type +# DYLIB: [[#]] __got 00000008 [[#%.8x,GOT]] DATA + +#--- main.s +.globl _main, _foo, _bar +.p2align 2 +_main: + adrp x8, _foo@GOTPAGE + ldr w8, [x8, _foo@GOTPAGEOFF] + adrp x8, _bar@GOTPAGE + ldr w8, [x8, _bar@GOTPAGEOFF] + ret + +#--- foobar.s +.globl _foo, _bar +_foo: +_bar: diff --git a/wild/tests/lld-macho/arm64-branch-addend-stubs.s b/wild/tests/lld-macho/arm64-branch-addend-stubs.s new file mode 100644 index 000000000..f1301f580 --- /dev/null +++ b/wild/tests/lld-macho/arm64-branch-addend-stubs.s @@ -0,0 +1,80 @@ +# REQUIRES: aarch64 + +## Test that branch relocations with non-zero addends correctly target the +## actual function address, not the stub address. When a symbol is accessed +## via both a regular call (goes through stub) and a branch with addend +## (targeting an interior point), the addend must be applied to the real +## function VA, not the stub VA. +## +## This test uses -flat_namespace on a dylib, which makes locally-defined +## symbols interposable and thus accessible via stubs. This creates the +## scenario where a function is both defined locally AND in stubs. + +# RUN: rm -rf %t; mkdir -p %t +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t/test.o +# RUN: %lld -arch arm64 -dylib -lSystem -flat_namespace %t/test.o -o %t/test.dylib + +# RUN: llvm-objdump --no-print-imm-hex --macho -d %t/test.dylib | FileCheck %s + +## With -flat_namespace, _target_func is interposable so regular calls go +## through stubs. But the branch with addend must go to the actual function +## address + addend, not stub + addend. +## +## Note: This means `bl _target_func` and `bl _target_func+16` could target +## different functions if interposition occurs at runtime. This is intentional: +## branching to an interior point implies reliance on the original function's +## layout, which an interposed replacement wouldn't preserve. There's no +## meaningful way to "interpose" an interior offset, so we target the original. + +## _target_func layout: +## offset 0: nop +## offset 4: nop +## offset 8: nop +## offset 12: nop +## offset 16: mov w0, #42 <- this is what _target_func+16 should reach +## offset 20: ret + +## Verify _target_func layout and capture the address of the mov instruction +## (which is at _target_func + 16) +# CHECK-LABEL: _target_func: +# CHECK: nop +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: [[#%x,INTERIOR:]]:{{.*}}mov w0, #42 +# CHECK-NEXT: ret + +## Verify the caller structure: +## - First bl goes to stub (marked with "symbol stub for:") +## - Second bl goes to [[INTERIOR]] (the _target_func+16 address captured above) +## +## The key assertion: the second bl MUST target _target_func+16 (INTERIOR), +## NOT stub+16. If the bug exists, it would target stub+16 which would be +## garbage (pointing past the stub section). +# CHECK-LABEL: _caller: +# CHECK: bl {{.*}} symbol stub for: _target_func +# CHECK-NEXT: bl 0x[[#INTERIOR]] +# CHECK-NEXT: ret + +.text +.globl _target_func, _caller +.p2align 2 + +_target_func: + ## 4 nops = 16 bytes to offset 0x10 + nop + nop + nop + nop + ## This is at _target_func + 16 + mov w0, #42 + ret + +_caller: + ## Regular call to _target_func - goes through stub due to -flat_namespace + bl _target_func + ## Branch with addend - must go to actual function + 16, not stub + 16 + bl _target_func + 16 + ret + +.subsections_via_symbols diff --git a/wild/tests/lld-macho/arm64-dtrace.s b/wild/tests/lld-macho/arm64-dtrace.s new file mode 100644 index 000000000..36854195e --- /dev/null +++ b/wild/tests/lld-macho/arm64-dtrace.s @@ -0,0 +1,23 @@ +# REQUIRES: aarch64 +# RUN: rm -rf %t; split-file %s %t + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/arm64-dtrace.s -o %t/arm64-dtrace.o +# RUN: %lld -arch arm64 -o %t/arm64-dtrace %t/arm64-dtrace.o + +## If references of dtrace symbols are handled by lld, their relocation should be replaced with the following instructions +# RUN: llvm-objdump --macho -D %t/arm64-dtrace | FileCheck %s --check-prefix=CHECK + +# CHECK: 00 00 80 d2 mov x0, #0 + +# CHECK: 1f 20 03 d5 nop + +#--- arm64-dtrace.s + .globl _main +_main: + bl ___dtrace_isenabled$Foo$added$v1 + .reference ___dtrace_typedefs$Foo$v2 + bl ___dtrace_probe$Foo$added$v1$696e74 + .reference ___dtrace_stability$Foo$v1$1_1_0_1_1_0_1_1_0_1_1_0_1_1_0 + ret + +.subsections_via_symbols diff --git a/wild/tests/lld-macho/arm64-objc-stubs-dead.s b/wild/tests/lld-macho/arm64-objc-stubs-dead.s new file mode 100644 index 000000000..5dcb171c1 --- /dev/null +++ b/wild/tests/lld-macho/arm64-objc-stubs-dead.s @@ -0,0 +1,27 @@ +# REQUIRES: aarch64 + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o + +# RUN: %lld -arch arm64 -lSystem -U _objc_msgSend -o %t.out %t.o +# RUN: llvm-nm %t.out | FileCheck %s +# RUN: %lld -arch arm64 -lSystem -U _objc_msgSend -dead_strip -o %t.out %t.o +# RUN: llvm-nm %t.out | FileCheck %s --check-prefix=DEAD + +# CHECK: _foo +# CHECK: _objc_msgSend$length + +# DEAD-NOT: _foo +# DEAD-NOT: _objc_msgSend$length + +.section __TEXT,__text + +.globl _foo +_foo: + bl _objc_msgSend$length + ret + +.globl _main +_main: + ret + +.subsections_via_symbols diff --git a/wild/tests/lld-macho/arm64-objc-stubs-dyn.s b/wild/tests/lld-macho/arm64-objc-stubs-dyn.s new file mode 100644 index 000000000..9358fc5b3 --- /dev/null +++ b/wild/tests/lld-macho/arm64-objc-stubs-dyn.s @@ -0,0 +1,76 @@ +# REQUIRES: aarch64 + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o +# RUN: %lld -arch arm64 -lSystem -U _objc_msgSend -o %t.out %t.o +# RUN: llvm-otool -vs __TEXT __objc_stubs %t.out | FileCheck %s +# RUN: %lld -arch arm64 -lSystem -U _objc_msgSend -o %t.out %t.o -dead_strip +# RUN: llvm-otool -vs __TEXT __objc_stubs %t.out | FileCheck %s +# RUN: %lld -arch arm64 -lSystem -U _objc_msgSend -o %t.out %t.o -objc_stubs_fast +# RUN: llvm-otool -vs __TEXT __objc_stubs %t.out | FileCheck %s +# RUN: %lld -arch arm64 -lSystem -U _objc_msgSend -o %t.out %t.o -objc_stubs_small +# RUN: llvm-otool -vs __TEXT __stubs %t.out | FileCheck %s --check-prefix=STUB +# RUN: llvm-otool -vs __TEXT __objc_stubs %t.out | FileCheck %s --check-prefix=SMALL + +# Unlike arm64-objc-stubs.s, in this test, _objc_msgSend is not defined, +# which usually binds with libobjc.dylib. +# 1. -objc_stubs_fast: No change as it uses GOT. +# 2. -objc_stubs_small: Create a (shared) stub to invoke _objc_msgSend, to minimize the size. + +# CHECK: Contents of (__TEXT,__objc_stubs) section + +# CHECK-NEXT: _objc_msgSend$foo: +# CHECK-NEXT: adrp x1, 8 ; 0x100008000 +# CHECK-NEXT: ldr x1, [x1, #0x10] +# CHECK-NEXT: adrp x16, 4 ; 0x100004000 +# CHECK-NEXT: ldr x16, [x16] +# CHECK-NEXT: br x16 +# CHECK-NEXT: brk #0x1 +# CHECK-NEXT: brk #0x1 +# CHECK-NEXT: brk #0x1 + +# CHECK-NEXT: _objc_msgSend$length: +# CHECK-NEXT: adrp x1, 8 ; 0x100008000 +# CHECK-NEXT: ldr x1, [x1, #0x18] +# CHECK-NEXT: adrp x16, 4 ; 0x100004000 +# CHECK-NEXT: ldr x16, [x16] +# CHECK-NEXT: br x16 +# CHECK-NEXT: brk #0x1 +# CHECK-NEXT: brk #0x1 +# CHECK-NEXT: brk #0x1 + +# CHECK-EMPTY: + +# STUB: Contents of (__TEXT,__stubs) section +# STUB-NEXT: adrp x16, 8 ; 0x100008000 +# STUB-NEXT: ldr x16, [x16] +# STUB-NEXT: br x16 + +# SMALL: Contents of (__TEXT,__objc_stubs) section +# SMALL-NEXT: _objc_msgSend$foo: +# SMALL-NEXT: adrp x1, 8 ; 0x100008000 +# SMALL-NEXT: ldr x1, [x1, #0x18] +# SMALL-NEXT: b +# SMALL-NEXT: _objc_msgSend$length: +# SMALL-NEXT: adrp x1, 8 ; 0x100008000 +# SMALL-NEXT: ldr x1, [x1, #0x20] +# SMALL-NEXT: b + +.section __TEXT,__objc_methname,cstring_literals +lselref1: + .asciz "foo" +lselref2: + .asciz "bar" + +.section __DATA,__objc_selrefs,literal_pointers,no_dead_strip +.p2align 3 +.quad lselref1 +.quad lselref2 + +.text + +.globl _main +_main: + bl _objc_msgSend$length + bl _objc_msgSend$foo + bl _objc_msgSend$foo + ret diff --git a/wild/tests/lld-macho/arm64-objc-stubs-fix.s b/wild/tests/lld-macho/arm64-objc-stubs-fix.s new file mode 100644 index 000000000..0dbec361f --- /dev/null +++ b/wild/tests/lld-macho/arm64-objc-stubs-fix.s @@ -0,0 +1,34 @@ +# REQUIRES: aarch64 + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o +# RUN: %lld -arch arm64 -lSystem -fixup_chains -o %t.out %t.o +# RUN: llvm-otool -vs __TEXT __objc_stubs %t.out | FileCheck %s --check-prefix=CHECK --check-prefix=FIRST + +# Prepend a dummy entry to check if the address for _objc_msgSend is valid. +# RUN: %lld -arch arm64 -lSystem -fixup_chains -e _dummy -U _dummy -o %t.out %t.o +# RUN: llvm-otool -vs __TEXT __objc_stubs %t.out | FileCheck %s --check-prefix=CHECK --check-prefix=SECOND + +# CHECK: Contents of (__TEXT,__objc_stubs) section + +# CHECK-NEXT: _objc_msgSend$foo: +# CHECK-NEXT: adrp x1, 8 ; 0x100008000 +# CHECK-NEXT: ldr x1, [x1] +# CHECK-NEXT: adrp x16, 4 ; 0x100004000 +# FIRST-NEXT: ldr x16, [x16] +# SECOND-NEXT:ldr x16, [x16, #0x8] +# CHECK-NEXT: br x16 +# CHECK-NEXT: brk #0x1 +# CHECK-NEXT: brk #0x1 +# CHECK-NEXT: brk #0x1 + +# CHECK-EMPTY: + +.text +.globl _objc_msgSend +_objc_msgSend: + ret + +.globl _main +_main: + bl _objc_msgSend$foo + ret diff --git a/wild/tests/lld-macho/arm64-objc-stubs.s b/wild/tests/lld-macho/arm64-objc-stubs.s new file mode 100644 index 000000000..da25b1292 --- /dev/null +++ b/wild/tests/lld-macho/arm64-objc-stubs.s @@ -0,0 +1,92 @@ +# REQUIRES: aarch64 + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o +# RUN: %lld -arch arm64 -lSystem -o %t.out %t.o -U _external_func +# RUN: llvm-otool -vs __TEXT __objc_stubs %t.out | FileCheck %s +# RUN: %lld -arch arm64 -lSystem -o %t.out %t.o -dead_strip -U _external_func +# RUN: llvm-otool -vs __TEXT __objc_stubs %t.out | FileCheck %s +# RUN: %lld -arch arm64 -lSystem -o %t.out %t.o -objc_stubs_fast -U _external_func +# RUN: llvm-otool -vs __TEXT __objc_stubs %t.out | FileCheck %s +# RUN: llvm-otool -l %t.out | FileCheck %s --check-prefix=FASTALIGN +# RUN: %lld -arch arm64 -lSystem -o %t.out %t.o -objc_stubs_small -U _external_func +# RUN: llvm-otool -vs __TEXT __objc_stubs %t.out | FileCheck %s --check-prefix=SMALL +# RUN: llvm-otool -l %t.out | FileCheck %s --check-prefix=SMALLALIGN +# RUN: llvm-objdump --section-headers %t.out | FileCheck %s --check-prefix=SECTIONS + +# CHECK: Contents of (__TEXT,__objc_stubs) section + +# CHECK-NEXT: _objc_msgSend$foo: +# CHECK-NEXT: adrp x1, 8 ; 0x100008000 +# CHECK-NEXT: ldr x1, [x1, #0x18] +# CHECK-NEXT: adrp x16, 4 ; 0x100004000 +# CHECK-NEXT: ldr x16, [x16] +# CHECK-NEXT: br x16 +# CHECK-NEXT: brk #0x1 +# CHECK-NEXT: brk #0x1 +# CHECK-NEXT: brk #0x1 + +# CHECK-NEXT: _objc_msgSend$length: +# CHECK-NEXT: adrp x1, 8 ; 0x100008000 +# CHECK-NEXT: ldr x1, [x1, #0x20] +# CHECK-NEXT: adrp x16, 4 ; 0x100004000 +# CHECK-NEXT: ldr x16, [x16] +# CHECK-NEXT: br x16 +# CHECK-NEXT: brk #0x1 +# CHECK-NEXT: brk #0x1 +# CHECK-NEXT: brk #0x1 + +# CHECK-EMPTY: + +# FASTALIGN: sectname __objc_stubs +# FASTALIGN-NEXT: segname __TEXT +# FASTALIGN-NEXT: addr +# FASTALIGN-NEXT: size +# FASTALIGN-NEXT: offset +# FASTALIGN-NEXT: align 2^5 (32) + +# SMALL: _objc_msgSend$foo: +# SMALL-NEXT: adrp x1, 8 ; 0x100008000 +# SMALL-NEXT: ldr x1, [x1, #0x18] +# SMALL-NEXT: b + +# SMALL-NEXT: _objc_msgSend$length: +# SMALL-NEXT: adrp x1, 8 ; 0x100008000 +# SMALL-NEXT: ldr x1, [x1, #0x20] +# SMALL-NEXT: b + +# SMALLALIGN: sectname __objc_stubs +# SMALLALIGN-NEXT: segname __TEXT +# SMALLALIGN-NEXT: addr +# SMALLALIGN-NEXT: size +# SMALLALIGN-NEXT: offset +# SMALLALIGN-NEXT: align 2^2 (4) + +## Check correct section ordering +# SECTIONS: Sections: +# SECTIONS: __text +# SECTIONS: __stubs +# SECTIONS: __objc_stubs + +.section __TEXT,__objc_methname,cstring_literals +lselref1: + .asciz "foo" +lselref2: + .asciz "bar" + +.section __DATA,__objc_selrefs,literal_pointers,no_dead_strip +.p2align 3 +.quad lselref1 +.quad lselref2 + +.text +.globl _objc_msgSend +_objc_msgSend: + ret + +.globl _main +_main: + bl _objc_msgSend$length + bl _objc_msgSend$foo + bl _objc_msgSend$foo + bl _external_func + ret diff --git a/wild/tests/lld-macho/arm64-reloc-got-load.s b/wild/tests/lld-macho/arm64-reloc-got-load.s new file mode 100644 index 000000000..0186d41dd --- /dev/null +++ b/wild/tests/lld-macho/arm64-reloc-got-load.s @@ -0,0 +1,51 @@ +# REQUIRES: aarch64 + +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/main.s -o %t/main.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/foobar.s -o %t/foobar.o + +# RUN: %lld -lSystem -arch arm64 -o %t/static %t/main.o %t/foobar.o +# RUN: llvm-objdump --no-print-imm-hex --macho -d --no-show-raw-insn --syms %t/static | FileCheck %s --check-prefix=STATIC + +# RUN: %lld -lSystem -arch arm64 -dylib -o %t/libfoo.dylib %t/foobar.o +# RUN: %lld -lSystem -arch arm64 -o %t/main %t/main.o %t/libfoo.dylib +# RUN: llvm-objdump --no-print-imm-hex --macho -d --no-show-raw-insn --section-headers %t/main | FileCheck %s --check-prefix=DYLIB + +# STATIC-LABEL: _main: +# STATIC-NEXT: adrp x8, [[#]] ; 0x[[#%x,PAGE:]] +# STATIC-NEXT: add x8, x8, #[[#%u,FOO_OFF:]] +# STATIC-NEXT: adrp x8, [[#]] ; 0x[[#PAGE]] +# STATIC-NEXT: add x8, x8, #[[#%u,BAR_OFF:]] +# STATIC-NEXT: ret + +# STATIC-LABEL: SYMBOL TABLE: +# STATIC-DAG: {{0*}}[[#%x,PAGE+FOO_OFF]] g F __TEXT,__text _foo +# STATIC-DAG: {{0*}}[[#%x,PAGE+BAR_OFF]] g F __TEXT,__text _bar + +# DYLIB-LABEL: _main: +# DYLIB-NEXT: adrp x8, [[#]] ; 0x[[#%x,GOT:]] +# DYLIB-NEXT: ldr x8, [x8, #8] ; literal pool symbol address: _foo +# DYLIB-NEXT: adrp x8, [[#]] ; 0x[[#GOT]] +# DYLIB-NEXT: ldr x8, [x8] ; literal pool symbol address: _bar +# DYLIB-NEXT: ret +# DYLIB-EMPTY: +# DYLIB-NEXT: Sections: +# DYLIB-NEXT: Idx Name Size VMA Type +# DYLIB: [[#]] __got 00000010 {{0*}}[[#GOT]] DATA + +#--- main.s +.globl _main, _foo, _bar +.p2align 2 +_main: + adrp x8, _foo@GOTPAGE + ldr x8, [x8, _foo@GOTPAGEOFF] + adrp x8, _bar@GOTPAGE + ldr x8, [x8, _bar@GOTPAGEOFF] + ret + +#--- foobar.s +.globl _foo, _bar +_foo: + .space 0 +_bar: + .space 0 diff --git a/wild/tests/lld-macho/arm64-reloc-pointer-to-got.s b/wild/tests/lld-macho/arm64-reloc-pointer-to-got.s new file mode 100644 index 000000000..2551d7125 --- /dev/null +++ b/wild/tests/lld-macho/arm64-reloc-pointer-to-got.s @@ -0,0 +1,36 @@ +# REQUIRES: aarch64 + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o +# RUN: %lld -lSystem -arch arm64 -o %t %t.o +# RUN: llvm-objdump --macho -d --full-contents --section-headers %t | FileCheck %s + +## FIXME: Even though we have reserved a GOT slot for _foo due to +## POINTER_TO_GOT, we should still be able to relax this GOT_LOAD reference to +## it. +# CHECK: _main: +# CHECK-NEXT: adrp x8, [[#]] ; 0x100004000 +# CHECK-NEXT: ldr x8, [x8] +# CHECK-NEXT: ret + +# CHECK: Idx Name Size VMA Type +# CHECK: [[#]] __got 00000008 0000000100004000 DATA +# CHECK: [[#]] __data 00000004 0000000100008000 DATA + +## The relocated data should contain the difference between the addresses of +## __data and __got in little-endian form. +# CHECK: Contents of section __DATA,__data: +# CHECK-NEXT: 100008000 00c0ffff + +.globl _main, _foo +.p2align 2 +_main: + adrp x8, _foo@GOTPAGE + ldr x8, [x8, _foo@GOTPAGEOFF] + ret + +.p2align 2 +_foo: + ret + +.data +.long _foo@GOT - . diff --git a/wild/tests/lld-macho/arm64-reloc-tlv-load.s b/wild/tests/lld-macho/arm64-reloc-tlv-load.s new file mode 100644 index 000000000..c525f2db1 --- /dev/null +++ b/wild/tests/lld-macho/arm64-reloc-tlv-load.s @@ -0,0 +1,63 @@ +# REQUIRES: aarch64 + +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/main.s -o %t/main.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/foobar.s -o %t/foobar.o + +# RUN: %lld -lSystem -arch arm64 -o %t/static %t/main.o %t/foobar.o +# RUN: llvm-objdump --no-print-imm-hex --macho -d --no-show-raw-insn --syms %t/static | FileCheck %s --check-prefix=STATIC + +# RUN: %lld -lSystem -arch arm64 -dylib -o %t/libfoo.dylib %t/foobar.o +# RUN: %lld -lSystem -arch arm64 -o %t/main %t/main.o %t/libfoo.dylib +# RUN: llvm-objdump --no-print-imm-hex --macho -d --no-show-raw-insn --section-headers %t/main | FileCheck %s --check-prefix=DYLIB + +# STATIC-LABEL: _main: +# STATIC-NEXT: adrp x8, [[#]] ; 0x[[#%x,PAGE:]] +# STATIC-NEXT: add x8, x8, #[[#%u,FOO_OFF:]] +# STATIC-NEXT: adrp x8, [[#]] ; 0x[[#PAGE]] +# STATIC-NEXT: add x8, x8, #[[#%u,BAR_OFF:]] +# STATIC-NEXT: ret + +# STATIC-LABEL: SYMBOL TABLE: +# STATIC-DAG: {{0*}}[[#%x,PAGE+FOO_OFF]] g O __DATA,__thread_vars _foo +# STATIC-DAG: {{0*}}[[#%x,PAGE+BAR_OFF]] g O __DATA,__thread_vars _bar + +# DYLIB-LABEL: _main: +# DYLIB-NEXT: adrp x8, [[#]] ; 0x[[#%x,TLV:]] +# DYLIB-NEXT: ldr x8, [x8, #8] ; literal pool symbol address: _foo +# DYLIB-NEXT: adrp x8, [[#]] ; 0x[[#TLV]] +# DYLIB-NEXT: ldr x8, [x8] ; literal pool symbol address: _bar +# DYLIB-NEXT: ret +# DYLIB-EMPTY: +# DYLIB-NEXT: Sections: +# DYLIB-NEXT: Idx Name Size VMA Type +# DYLIB: [[#]] __thread_ptrs 00000010 {{0*}}[[#TLV]] DATA + +#--- main.s +.globl _main, _foo, _bar +.p2align 2 +_main: + adrp x8, _foo@TLVPPAGE + ldr x8, [x8, _foo@TLVPPAGEOFF] + adrp x8, _bar@TLVPPAGE + ldr x8, [x8, _bar@TLVPPAGEOFF] + ret + +#--- foobar.s +.globl _foo, _bar + +.section __DATA,__thread_data,thread_local_regular +_foo$tlv$init: + .long 123 +_bar$tlv$init: + .long 123 + +.section __DATA,__thread_vars,thread_local_variables +_foo: + .quad __tlv_bootstrap + .quad 0 + .quad _foo$tlv$init +_bar: + .quad __tlv_bootstrap + .quad 0 + .quad _bar$tlv$init diff --git a/wild/tests/lld-macho/arm64-relocs.s b/wild/tests/lld-macho/arm64-relocs.s new file mode 100644 index 000000000..4bd0f6b8a --- /dev/null +++ b/wild/tests/lld-macho/arm64-relocs.s @@ -0,0 +1,70 @@ +# REQUIRES: aarch64 +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o +# RUN: %lld -dylib -arch arm64 -lSystem -o %t %t.o +# RUN: llvm-objdump --syms %t > %t.objdump +# RUN: llvm-objdump --no-print-imm-hex --macho -d --section=__const %t >> %t.objdump +# RUN: FileCheck %s < %t.objdump + +# CHECK-LABEL: SYMBOL TABLE: +# CHECK-DAG: [[#%x,PTR_1:]] l O __DATA_CONST,__const _ptr_1 +# CHECK-DAG: [[#%x,PTR_2:]] l O __DATA_CONST,__const _ptr_2 +# CHECK-DAG: [[#%x,BAR:]] g F __TEXT,__text _bar +# CHECK-DAG: [[#%x,BAZ:]] g O __DATA,__data _baz + +# CHECK-LABEL: _foo: +## BRANCH26 relocations are 4-byte aligned, so 123 is truncated to 120 +# CHECK-NEXT: bl 0x[[#BAR+120]] +## PAGE21 relocations are aligned to 4096 bytes +# CHECK-NEXT: adrp x2, [[#]] ; 0x[[#BAZ+4096-128]] +# CHECK-NEXT: ldr x2, [x2, #128] +# CHECK-NEXT: adrp x3, 8 ; 0x8000 +# CHECK-NEXT: ldr q0, [x3, #144] +# CHECK-NEXT: ret + +# CHECK-LABEL: Contents of (__DATA_CONST,__const) section +# CHECK: [[#PTR_1]] {{0*}}[[#BAZ]] 00000000 00000000 00000000 +# CHECK: [[#PTR_2]] {{0*}}[[#BAZ+123]] 00000000 00000000 00000000 + +.text +.globl _foo, _bar, _baz, _quux +.p2align 2 +_foo: + ## Generates ARM64_RELOC_BRANCH26 and ARM64_RELOC_ADDEND + bl _bar + 123 + ## Generates ARM64_RELOC_PAGE21 and ADDEND + adrp x2, _baz@PAGE + 4097 + ## Generates ARM64_RELOC_PAGEOFF12 + ldr x2, [x2, _baz@PAGEOFF] + + ## Generates ARM64_RELOC_PAGE21 + adrp x3, _quux@PAGE + ## Generates ARM64_RELOC_PAGEOFF12 with internal slide 4 + ldr q0, [x3, _quux@PAGEOFF] + ret + +.p2align 2 +_bar: + ret + +.data +.space 128 +_baz: +.space 1 + +.p2align 4 +_quux: +.quad 0 +.quad 80 + +.section __DATA_CONST,__const +## These generate ARM64_RELOC_UNSIGNED symbol relocations. llvm-mc seems to +## generate UNSIGNED section relocations only for compact unwind sections, so +## those relocations are being tested in compact-unwind.s. +_ptr_1: + .quad _baz + .space 8 +_ptr_2: + .quad _baz + 123 + .space 8 + +.subsections_via_symbols diff --git a/wild/tests/lld-macho/arm64-stubs.s b/wild/tests/lld-macho/arm64-stubs.s new file mode 100644 index 000000000..55e0f0613 --- /dev/null +++ b/wild/tests/lld-macho/arm64-stubs.s @@ -0,0 +1,65 @@ +# REQUIRES: aarch64 + +## Test arm64 stubs +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/foo.s -o %t/foo.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/bar.s -o %t/bar.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/test.s -o %t/test.o +# RUN: %lld -arch arm64 -dylib -install_name @executable_path/libfoo.dylib %t/foo.o -o %t/libfoo.dylib +# RUN: %lld -arch arm64 -dylib -install_name @executable_path/libbar.dylib %t/bar.o -o %t/libbar.dylib +# RUN: %lld -arch arm64 -lSystem %t/libfoo.dylib %t/libbar.dylib %t/test.o -o %t/test -no_fixup_chains +# RUN: llvm-objdump --no-print-imm-hex --macho -d --no-show-raw-insn --section="__TEXT,__stubs" --section="__TEXT,__stub_helper" %t/test | FileCheck %s -DREG=x16 + +## Test arm64_32 stubs +# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos %t/foo.s -o %t/foo32.o +# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos %t/bar.s -o %t/bar32.o +# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos %t/test.s -o %t/test32.o +# RUN: %lld-watchos -dylib -install_name @executable_path/libfoo.dylib %t/foo32.o -o %t/libfoo32.dylib +# RUN: %lld-watchos -dylib -install_name @executable_path/libbar.dylib %t/bar32.o -o %t/libbar32.dylib +# RUN: %lld-watchos -lSystem %t/libfoo32.dylib %t/libbar32.dylib %t/test32.o -o %t/test32 -no_fixup_chains +# RUN: llvm-objdump --no-print-imm-hex --macho -d --no-show-raw-insn --section="__TEXT,__stubs" --section="__TEXT,__stub_helper" %t/test32 | FileCheck %s -DREG=w16 + +# CHECK: _main: +# CHECK-NEXT: bl 0x[[#%x,FOO:]] ; symbol stub for: _foo +# CHECK-NEXT: bl 0x[[#%x,BAR:]] ; symbol stub for: _bar +# CHECK-NEXT: ret + +# CHECK-LABEL: Contents of (__TEXT,__stubs) section +# CHECK-NEXT: [[#BAR]]: adrp x16 +# CHECK-NEXT: ldr [[REG]], [x16{{.*}}] ; literal pool symbol address: _bar +# CHECK-NEXT: br x16 +# CHECK-NEXT: [[#FOO]]: adrp x16 +# CHECK-NEXT: ldr [[REG]], [x16{{.*}}] ; literal pool symbol address: _foo +# CHECK-NEXT: br x16 + +# CHECK-LABEL: Contents of (__TEXT,__stub_helper) section +# CHECK-NEXT: [[#%x,HELPER_HEADER:]]: adrp x17 +# CHECK-NEXT: add x17, x17 +# CHECK-NEXT: stp x16, x17, [sp, #-16]! +# CHECK-NEXT: adrp x16 +# CHECK-NEXT: ldr [[REG]], [x16] ; literal pool symbol address: dyld_stub_binder +# CHECK-NEXT: br x16 +# CHECK-NEXT: ldr w16, 0x[[#%x,BAR_BIND_OFF_ADDR:]] +# CHECK-NEXT: b 0x[[#HELPER_HEADER]] +# CHECK-NEXT: [[#BAR_BIND_OFF_ADDR]]: udf #0 +# CHECK-NEXT: ldr w16, 0x[[#%x,FOO_BIND_OFF_ADDR:]] +# CHECK-NEXT: b 0x[[#HELPER_HEADER]] +# CHECK-NEXT: [[#FOO_BIND_OFF_ADDR]]: udf #11 + +#--- foo.s +.globl _foo +_foo: + +#--- bar.s +.globl _bar +_bar: + +#--- test.s +.text +.globl _main + +.p2align 2 +_main: + bl _foo + bl _bar + ret diff --git a/wild/tests/lld-macho/arm64-thunk-for-alignment.s b/wild/tests/lld-macho/arm64-thunk-for-alignment.s new file mode 100644 index 000000000..f497b81f7 --- /dev/null +++ b/wild/tests/lld-macho/arm64-thunk-for-alignment.s @@ -0,0 +1,44 @@ +# REQUIRES: aarch64 +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/foo.s -o %t/foo.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/bar.s -o %t/bar.o +# RUN: %lld -dylib -arch arm64 -lSystem -o %t/out %t/foo.o %t/bar.o + +# RUN: llvm-objdump --macho --syms %t/out | FileCheck %s +# CHECK: _bar.thunk.0 + +## Regression test for PR59259. Previously, we neglected to check section +## alignments when deciding when to create thunks. + +## If we ignore alignment, the total size of _spacer1 + _spacer2 below is just +## under the limit at which we attempt to insert thunks between the spacers. +## However, with alignment accounted for, their total size ends up being +## 0x8000000, which is just above the max forward branch range, making thunk +## insertion necessary. Thus, not accounting for alignment led to an error. + +#--- foo.s + +_foo: + b _bar + +## Size of a `b` instruction. +.equ callSize, 4 +## Refer to `slop` in TextOutputSection::finalize(). +.equ slopSize, 12 * 256 + +_spacer1: + .space 0x4000000 - slopSize - 2 * callSize - 1 + +.subsections_via_symbols + +#--- bar.s +.globl _bar + +.p2align 14 +_spacer2: + .space 0x4000000 + +_bar: + ret + +.subsections_via_symbols diff --git a/wild/tests/lld-macho/arm64-thunk-icf-body-dedup.s b/wild/tests/lld-macho/arm64-thunk-icf-body-dedup.s new file mode 100644 index 000000000..06d1065c2 --- /dev/null +++ b/wild/tests/lld-macho/arm64-thunk-icf-body-dedup.s @@ -0,0 +1,80 @@ +# REQUIRES: aarch64 + +# RUN: rm -rf %t; mkdir %t +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t/input.o + +## Verify that ICF body-folded symbols share a single branch-extension thunk +## rather than each getting its own. +# RUN: %lld -arch arm64 -lSystem -o %t/icf-all %t/input.o --icf=all -map %t/icf-all.map +# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/icf-all | FileCheck %s --check-prefix=BODY +# RUN: FileCheck %s --input-file %t/icf-all.map --check-prefix=BODY-MAP + +## Both calls in _main branch to the same thunk address. +# BODY-LABEL: <_main>: +# BODY: bl 0x[[#%x, THUNK:]] <_target_a.thunk.0> +# BODY-NEXT: bl 0x[[#%x, THUNK]] <_target_a.thunk.0> + +## Only one thunk should be created for the folded functions. +# BODY-MAP: .thunk.0 +# BODY-MAP-NOT: .thunk.0 + +## Verify that safe_thunks ICF still creates separate branch-extension thunks +## when needed. +# RUN: %lld -arch arm64 -lSystem -o %t/icf-safe %t/input.o --icf=safe_thunks +# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/icf-safe | FileCheck %s --check-prefix=SAFE + +## Each call gets its own branch-extension thunk. +# SAFE-LABEL: <_main>: +# SAFE: bl 0x[[#%x, THUNK_A:]] <_target_a.thunk.0> +# SAFE-NEXT: bl 0x[[#%x, THUNK_B:]] <_target_b.thunk.0> + +.subsections_via_symbols + +.addrsig +.addrsig_sym _target_a +.addrsig_sym _target_b + +.text + +## A unique function placed before _target_a so that the assembler's automatic +## ltmp0 symbol lands here to make the test more readable. +.globl _unique_func +.p2align 2 +_unique_func: + mov w0, #1 + ret + +.globl _target_a +.p2align 2 +_target_a: + mov w0, #42 + ret + +.globl _target_b +.p2align 2 +_target_b: + mov w0, #42 + ret + +.globl _spacer +.p2align 2 +_spacer: + .space 0x8000000 + ret + +.globl _main +.p2align 2 +_main: + bl _target_a + bl _target_b + ret + +## With safe_thunks, _target_b's ICF thunk is a synthetic section appended +## after all regular inputs. This spacer pushes it out of branch range from +## _main so it also needs a branch-extension thunk. +.globl _spacer2 +.p2align 2 +_spacer2: + .space 0x8000000 + mov w0, #0 + ret diff --git a/wild/tests/lld-macho/arm64-thunk-starvation.s b/wild/tests/lld-macho/arm64-thunk-starvation.s new file mode 100644 index 000000000..9e2b54f84 --- /dev/null +++ b/wild/tests/lld-macho/arm64-thunk-starvation.s @@ -0,0 +1,57 @@ +# REQUIRES: aarch64 +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o +# RUN: %lld -arch arm64 -lSystem -o %t.out %t.o + +## Regression test for PR51578. + +.subsections_via_symbols + +.globl _f1, _f2, _f3, _f4, _f5, _f6 +.p2align 2 +_f1: b _fn1 +_f2: b _fn2 +_f3: b _fn3 +_f4: b _fn4 +_f5: b _fn5 +_f6: b _fn6 +## 6 * 4 = 24 bytes for branches +## Currently leaves 12 bytes for one thunk, so 36 bytes. +## Uses < instead of <=, so 40 bytes. + +.global _spacer1, _spacer2 +## 0x8000000 is 128 MiB, one more than the forward branch limit, +## distributed over two functions since our thunk insertion algorithm +## can't deal with a single function that's 128 MiB. +## We leave just enough room so that the old thunking algorithm finalized +## both spacers when processing _f1 (24 bytes for the 4 bytes code for each +## of the 6 _f functions, 12 bytes for one thunk, 4 bytes because the forward +## branch range is 128 Mib - 4 bytes, and another 4 bytes because the algorithm +## uses `<` instead of `<=`, for a total of 44 bytes slop.) Of the slop, 20 +## bytes are actually room for thunks. +## _fn1-_fn6 aren't finalized because then there wouldn't be room for a thunk. +## But when a thunk is inserted to jump from _f1 to _fn1, that needs 12 bytes +## but _f2 is only 4 bytes later, so after _f1 there are only +## 20-(12-4) = 12 bytes left, after _f2 only 12-(12-4) 4 bytes, and after +## _f3 there's no more room for thunks and we can't make progress. +## The fix is to leave room for many more thunks. +## The same construction as this test case can defeat that too with enough +## consecutive jumps, but in practice there aren't hundreds of consecutive +## jump instructions. + +_spacer1: +.space 0x4000000 +_spacer2: +.space 0x4000000 - 44 + +.globl _fn1, _fn2, _fn3, _fn4, _fn5, _fn6 +.p2align 2 +_fn1: ret +_fn2: ret +_fn3: ret +_fn4: ret +_fn5: ret +_fn6: ret + +.globl _main +_main: + ret diff --git a/wild/tests/lld-macho/arm64-thunk-visibility.s b/wild/tests/lld-macho/arm64-thunk-visibility.s new file mode 100644 index 000000000..5fa195f8b --- /dev/null +++ b/wild/tests/lld-macho/arm64-thunk-visibility.s @@ -0,0 +1,83 @@ +# REQUIRES: aarch64 + +# foo.s and bar.s both contain TU-local symbols (think static function) +# with the same name, and both need a thunk. This tests that ld64.lld doesn't +# create a duplicate symbol for the two functions. + +# Test this both when the TU-local symbol is the branch source or target, +# and for both forward and backwards jumps. + +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/foo.s -o %t/foo.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/bar.s -o %t/bar.o +# RUN: %lld -arch arm64 -lSystem -o %t.out %t/foo.o %t/bar.o + +#--- foo.s + +.subsections_via_symbols + +# Note: No .globl, since these are TU-local symbols. +.p2align 2 +_early_jumping_local_fn: b _some_late_external +.p2align 2 +_early_landing_local_fn: ret + +.globl _some_early_external +.p2align 2 +_some_early_external: b _late_landing_local_fn + +## 0x8000000 is 128 MiB, one more than the forward branch limit. +## Distribute that over two functions since our thunk insertion algorithm +## can't deal with a single function that's 128 MiB. +.global _spacer1, _spacer2 +_spacer1: +.space 0x4000000 +_spacer2: +.space 0x4000000 + +# Note: No .globl, since these are TU-local symbols. +.p2align 2 +_late_jumping_local_fn: b _some_early_external +.p2align 2 +_late_landing_local_fn: ret + +.globl _some_late_external +.p2align 2 +_some_late_external: b _early_landing_local_fn + +#--- bar.s + +.subsections_via_symbols + +# Note: No .globl, since these are TU-local symbols. +.p2align 2 +_early_jumping_local_fn: b _some_other_late_external +.p2align 2 +_early_landing_local_fn: ret + +.globl _some_other_early_external +.p2align 2 +_some_other_early_external: b _late_landing_local_fn + +## 0x8000000 is 128 MiB, one more than the forward branch limit. +## Distribute that over two functions since our thunk insertion algorithm +## can't deal with a single function that's 128 MiB. +.global _other_spacer1, _other_spacer1 +_spacer1: +.space 0x4000000 +_spacer2: +.space 0x4000000 + +# Note: No .globl, since these are TU-local symbols. +.p2align 2 +_late_jumping_local_fn: b _some_other_early_external +.p2align 2 +_late_landing_local_fn: ret + +.globl _some_other_late_external +.p2align 2 +_some_other_late_external: b _early_landing_local_fn + +.globl _main +_main: + ret diff --git a/wild/tests/lld-macho/arm64-thunks.s b/wild/tests/lld-macho/arm64-thunks.s new file mode 100644 index 000000000..cd3842895 --- /dev/null +++ b/wild/tests/lld-macho/arm64-thunks.s @@ -0,0 +1,419 @@ +# REQUIRES: aarch64 + +## Check for the following: +## (1) address match between thunk definitions and call destinations +## (2) address match between thunk page+offset computations and function +## definitions +## (3) a second thunk is created when the first one goes out of range +## (4) early calls to a dylib stub use a thunk, and later calls the stub +## directly +## (5) Thunks are created for all sections in the text segment with branches. +## (6) Thunks are in the linker map file. +## Notes: +## 0x4000000 = 64 Mi = half the magnitude of the forward-branch range + +# RUN: rm -rf %t; mkdir %t +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t/input.o +## Use --icf=safe_thunks to test that branch extension algo is compatible +## with safe_thunks ICF. +# RUN: %lld -arch arm64 -dead_strip -lSystem -U _extern_sym -map %t/thunk.map -o %t/thunk %t/input.o --icf=safe_thunks +# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t/thunk | FileCheck %s +# RUN: llvm-objdump --macho --section-headers %t/thunk > %t/headers.txt +# RUN: llvm-otool -vs __DATA __objc_selrefs %t/thunk >> %t/headers.txt +# RUN: llvm-otool -vs __TEXT __objc_stubs %t/thunk >> %t/headers.txt +# RUN: FileCheck %s --check-prefix=OBJC < %t/headers.txt + +# RUN: FileCheck %s --input-file %t/thunk.map --check-prefix=MAP + +# OBJC: Sections: +# OBJC: __text +# OBJC-NEXT: __lcxx_override +# OBJC-NEXT: __stubs +# OBJC-NEXT: __stub_helper +# OBJC-NEXT: __objc_stubs + +# OBJC: Contents of (__DATA,__objc_selrefs) section +# OBJC-NEXT: {{[0-9a-f]*}} __TEXT:__objc_methname:foo +# OBJC-NEXT: {{[0-9a-f]*}} __TEXT:__objc_methname:bar + +# OBJC: Contents of (__TEXT,__objc_stubs) section +# OBJC: _objc_msgSend$bar: +# OBJC: _objc_msgSend$foo: + +# MAP: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_low_addr +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _a +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _b +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _c +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _d.thunk.0 +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _e.thunk.0 +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _f.thunk.0 +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _g.thunk.0 +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _h.thunk.0 +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} ___nan.thunk.0 +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _d +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _e +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _f +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _g +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _a.thunk.0 +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _b.thunk.0 +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _h +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _main +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_high_addr +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _c.thunk.0 +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _d.thunk.1 +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _e.thunk.1 +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _f.thunk.1 +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _fold_func_low_addr.thunk.0 +# MAP-NEXT: 0x{{[[:xdigit:]]+}} {{.*}} _z + + +# CHECK: Disassembly of section __TEXT,__text: + +# CHECK: [[#%.13x, A_PAGE:]][[#%.3x, A_OFFSET:]] <_a>: +# CHECK: bl 0x[[#%x, A:]] <_a> +# CHECK: bl 0x[[#%x, B:]] <_b> +# CHECK: bl 0x[[#%x, C:]] <_c> +# CHECK: bl 0x[[#%x, D_THUNK_0:]] <_d.thunk.0> +# CHECK: bl 0x[[#%x, E_THUNK_0:]] <_e.thunk.0> +# CHECK: bl 0x[[#%x, F_THUNK_0:]] <_f.thunk.0> +# CHECK: bl 0x[[#%x, G_THUNK_0:]] <_g.thunk.0> +# CHECK: bl 0x[[#%x, H_THUNK_0:]] <_h.thunk.0> +# CHECK: bl 0x[[#%x, NAN_THUNK_0:]] <___nan.thunk.0> + +# CHECK: [[#%.13x, B_PAGE:]][[#%.3x, B_OFFSET:]] <_b>: +# CHECK: bl 0x[[#%x, A]] <_a> +# CHECK: bl 0x[[#%x, B]] <_b> +# CHECK: bl 0x[[#%x, C]] <_c> +# CHECK: bl 0x[[#%x, D_THUNK_0]] <_d.thunk.0> +# CHECK: bl 0x[[#%x, E_THUNK_0]] <_e.thunk.0> +# CHECK: bl 0x[[#%x, F_THUNK_0]] <_f.thunk.0> +# CHECK: bl 0x[[#%x, G_THUNK_0]] <_g.thunk.0> +# CHECK: bl 0x[[#%x, H_THUNK_0]] <_h.thunk.0> +# CHECK: bl 0x[[#%x, NAN_THUNK_0]] <___nan.thunk.0> + +# CHECK: [[#%.13x, C_PAGE:]][[#%.3x, C_OFFSET:]] <_c>: +# CHECK: bl 0x[[#%x, A]] <_a> +# CHECK: bl 0x[[#%x, B]] <_b> +# CHECK: bl 0x[[#%x, C]] <_c> +# CHECK: bl 0x[[#%x, D:]] <_d> +# CHECK: bl 0x[[#%x, E:]] <_e> +# CHECK: bl 0x[[#%x, F_THUNK_0]] <_f.thunk.0> +# CHECK: bl 0x[[#%x, G_THUNK_0]] <_g.thunk.0> +# CHECK: bl 0x[[#%x, H_THUNK_0]] <_h.thunk.0> +# CHECK: bl 0x[[#%x, NAN_THUNK_0]] <___nan.thunk.0> + +# CHECK: [[#%x, D_THUNK_0]] <_d.thunk.0>: +# CHECK: adrp x16, 0x[[#%x, D_PAGE:]] +# CHECK: add x16, x16, #[[#D_OFFSET:]] + +# CHECK: [[#%x, E_THUNK_0]] <_e.thunk.0>: +# CHECK: adrp x16, 0x[[#%x, E_PAGE:]] +# CHECK: add x16, x16, #[[#E_OFFSET:]] + +# CHECK: [[#%x, F_THUNK_0]] <_f.thunk.0>: +# CHECK: adrp x16, 0x[[#%x, F_PAGE:]] +# CHECK: add x16, x16, #[[#F_OFFSET:]] + +# CHECK: [[#%x, G_THUNK_0]] <_g.thunk.0>: +# CHECK: adrp x16, 0x[[#%x, G_PAGE:]] +# CHECK: add x16, x16, #[[#G_OFFSET:]] + +# CHECK: [[#%x, H_THUNK_0]] <_h.thunk.0>: +# CHECK: adrp x16, 0x[[#%x, H_PAGE:]] +# CHECK: add x16, x16, #[[#H_OFFSET:]] + +# CHECK: [[#%x, NAN_THUNK_0]] <___nan.thunk.0>: +# CHECK: adrp x16, 0x[[#%x, NAN_PAGE:]] +# CHECK: add x16, x16, #[[#NAN_OFFSET:]] + +# CHECK: [[#%x, D_PAGE + D_OFFSET]] <_d>: +# CHECK: bl 0x[[#%x, A]] <_a> +# CHECK: bl 0x[[#%x, B]] <_b> +# CHECK: bl 0x[[#%x, C]] <_c> +# CHECK: bl 0x[[#%x, D]] <_d> +# CHECK: bl 0x[[#%x, E]] <_e> +# CHECK: bl 0x[[#%x, F_THUNK_0]] <_f.thunk.0> +# CHECK: bl 0x[[#%x, G_THUNK_0]] <_g.thunk.0> +# CHECK: bl 0x[[#%x, H_THUNK_0]] <_h.thunk.0> +# CHECK: bl 0x[[#%x, NAN_THUNK_0]] <___nan.thunk.0> + +# CHECK: [[#%x, E_PAGE + E_OFFSET]] <_e>: +# CHECK: bl 0x[[#%x, A_THUNK_0:]] <_a.thunk.0> +# CHECK: bl 0x[[#%x, B_THUNK_0:]] <_b.thunk.0> +# CHECK: bl 0x[[#%x, C]] <_c> +# CHECK: bl 0x[[#%x, D]] <_d> +# CHECK: bl 0x[[#%x, E]] <_e> +# CHECK: bl 0x[[#%x, F:]] <_f> +# CHECK: bl 0x[[#%x, G:]] <_g> +# CHECK: bl 0x[[#%x, H_THUNK_0]] <_h.thunk.0> +# CHECK: bl 0x[[#%x, NAN_THUNK_0]] <___nan.thunk.0> + +# CHECK: [[#%x, F_PAGE + F_OFFSET]] <_f>: +# CHECK: bl 0x[[#%x, A_THUNK_0]] <_a.thunk.0> +# CHECK: bl 0x[[#%x, B_THUNK_0]] <_b.thunk.0> +# CHECK: bl 0x[[#%x, C]] <_c> +# CHECK: bl 0x[[#%x, D]] <_d> +# CHECK: bl 0x[[#%x, E]] <_e> +# CHECK: bl 0x[[#%x, F]] <_f> +# CHECK: bl 0x[[#%x, G]] <_g> +# CHECK: bl 0x[[#%x, H_THUNK_0]] <_h.thunk.0> +# CHECK: bl 0x[[#%x, NAN_THUNK_0]] <___nan.thunk.0> + +# CHECK: [[#%x, G_PAGE + G_OFFSET]] <_g>: +# CHECK: bl 0x[[#%x, A_THUNK_0]] <_a.thunk.0> +# CHECK: bl 0x[[#%x, B_THUNK_0]] <_b.thunk.0> +# CHECK: bl 0x[[#%x, C_THUNK_0:]] <_c.thunk.0> +# CHECK: bl 0x[[#%x, D_THUNK_1:]] <_d.thunk.1> +# CHECK: bl 0x[[#%x, E]] <_e> +# CHECK: bl 0x[[#%x, F]] <_f> +# CHECK: bl 0x[[#%x, G]] <_g> +# CHECK: bl 0x[[#%x, H:]] <_h> +# CHECK: bl 0x[[#%x, STUBS:]] + +# CHECK: [[#%x, A_THUNK_0]] <_a.thunk.0>: +# CHECK: adrp x16, 0x[[#%x, A_PAGE]]000 +# CHECK: add x16, x16, #[[#%d, A_OFFSET]] + +# CHECK: [[#%x, B_THUNK_0]] <_b.thunk.0>: +# CHECK: adrp x16, 0x[[#%x, B_PAGE]]000 +# CHECK: add x16, x16, #[[#%d, B_OFFSET]] + +# CHECK: [[#%x, H_PAGE + H_OFFSET]] <_h>: +# CHECK: bl 0x[[#%x, A_THUNK_0]] <_a.thunk.0> +# CHECK: bl 0x[[#%x, B_THUNK_0]] <_b.thunk.0> +# CHECK: bl 0x[[#%x, C_THUNK_0]] <_c.thunk.0> +# CHECK: bl 0x[[#%x, D_THUNK_1]] <_d.thunk.1> +# CHECK: bl 0x[[#%x, E]] <_e> +# CHECK: bl 0x[[#%x, F]] <_f> +# CHECK: bl 0x[[#%x, G]] <_g> +# CHECK: bl 0x[[#%x, H]] <_h> +# CHECK: bl 0x[[#%x, STUBS]] + +# CHECK: <_main>: +# CHECK: bl 0x[[#%x, A_THUNK_0]] <_a.thunk.0> +# CHECK: bl 0x[[#%x, B_THUNK_0]] <_b.thunk.0> +# CHECK: bl 0x[[#%x, C_THUNK_0]] <_c.thunk.0> +# CHECK: bl 0x[[#%x, D_THUNK_1]] <_d.thunk.1> +# CHECK: bl 0x[[#%x, E_THUNK_1:]] <_e.thunk.1> +# CHECK: bl 0x[[#%x, F_THUNK_1:]] <_f.thunk.1> +# CHECK: bl 0x[[#%x, G]] <_g> +# CHECK: bl 0x[[#%x, H]] <_h> +# CHECK: bl 0x[[#%x, STUBS]] + +# CHECK: [[#%x, C_THUNK_0]] <_c.thunk.0>: +# CHECK: adrp x16, 0x[[#%x, C_PAGE]]000 +# CHECK: add x16, x16, #[[#%d, C_OFFSET]] + +# CHECK: [[#%x, D_THUNK_1]] <_d.thunk.1>: +# CHECK: adrp x16, 0x[[#%x, D_PAGE]] +# CHECK: add x16, x16, #[[#D_OFFSET]] + +# CHECK: [[#%x, E_THUNK_1]] <_e.thunk.1>: +# CHECK: adrp x16, 0x[[#%x, E_PAGE]] +# CHECK: add x16, x16, #[[#E_OFFSET]] + +# CHECK: [[#%x, F_THUNK_1]] <_f.thunk.1>: +# CHECK: adrp x16, 0x[[#%x, F_PAGE]] +# CHECK: add x16, x16, #[[#F_OFFSET]] + +# CHECK: Disassembly of section __TEXT,__lcxx_override: +# CHECK: <_z>: +# CHECK: bl 0x[[#%x, A_THUNK_0]] <_a.thunk.0> + +# CHECK: Disassembly of section __TEXT,__stubs: + +# CHECK: [[#%x, NAN_PAGE + NAN_OFFSET]] <__stubs>: + +.section __TEXT,__objc_methname,cstring_literals +lselref1: + .asciz "foo" +lselref2: + .asciz "bar" + +.section __DATA,__objc_selrefs,literal_pointers,no_dead_strip +.p2align 3 +.quad lselref1 +.quad lselref2 + +.text +.globl _objc_msgSend +_objc_msgSend: + ret + +.subsections_via_symbols + +.addrsig +.addrsig_sym _fold_func_low_addr +.addrsig_sym _fold_func_high_addr + +.text + +.globl _fold_func_low_addr +.p2align 2 +_fold_func_low_addr: + add x0, x0, x0 + add x1, x0, x1 + add x2, x0, x2 + ret + +.globl _a +.p2align 2 +_a: + bl _a + bl _b + bl _c + bl _d + bl _e + bl _f + bl _g + bl _h + bl ___nan + ret + +.globl _b +.p2align 2 +_b: + bl _a + bl _b + bl _c + bl _d + bl _e + bl _f + bl _g + bl _h + bl ___nan + .space 0x4000000-0x3c + ret + +.globl _c +.p2align 2 +_c: + bl _a + bl _b + bl _c + bl _d + bl _e + bl _f + bl _g + bl _h + bl ___nan + ret + +.globl _d +.p2align 2 +_d: + bl _a + bl _b + bl _c + bl _d + bl _e + bl _f + bl _g + bl _h + bl ___nan + .space 0x4000000-0x38 + ret + +.globl _e +.p2align 2 +_e: + bl _a + bl _b + bl _c + bl _d + bl _e + bl _f + bl _g + bl _h + bl ___nan + ret + +.globl _f +.p2align 2 +_f: + bl _a + bl _b + bl _c + bl _d + bl _e + bl _f + bl _g + bl _h + bl ___nan + .space 0x4000000-0x34 + ret + +.globl _g +.p2align 2 +_g: + bl _a + bl _b + bl _c + bl _d + bl _e + bl _f + bl _g + bl _h + bl ___nan + ret + +.globl _h +.p2align 2 +_h: + bl _a + bl _b + bl _c + bl _d + bl _e + bl _f + bl _g + bl _h + bl ___nan + .space 0x4000000-0x30 + ret + +.globl _main +.p2align 2 +_main: + bl _a + bl _b + bl _c + bl _d + bl _e + bl _f + bl _g + bl _h + bl _fold_func_low_addr + bl _fold_func_high_addr + bl ___nan + bl _objc_msgSend$foo + bl _objc_msgSend$bar + ret + +.globl _fold_func_high_addr +.p2align 2 +_fold_func_high_addr: + add x0, x0, x0 + add x1, x0, x1 + add x2, x0, x2 + ret + + +.section __TEXT,__cstring + # The .space below has to be composed of non-zero characters. Otherwise, the + # linker will create a symbol for every '0' in the section, leading to + # dramatic memory usage and a huge linker map file + .space 0x4000000, 'A' + .byte 0 + + +.section __TEXT,__lcxx_override,regular,pure_instructions + +.globl _z +.no_dead_strip _z +.p2align 2 +_z: + bl _a + ## Ensure calling into stubs works + bl _extern_sym + ret diff --git a/wild/tests/lld-macho/bind-opcodes.s b/wild/tests/lld-macho/bind-opcodes.s new file mode 100644 index 000000000..cf294f2f7 --- /dev/null +++ b/wild/tests/lld-macho/bind-opcodes.s @@ -0,0 +1,186 @@ +# REQUIRES: x86, aarch64 +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo.s -o %t/foo.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin --defsym PTR64=0 %t/test.s -o %t/test.o +# RUN: %lld -O2 -dylib %t/foo.o -o %t/libfoo.dylib +# RUN: %lld -O2 -lSystem %t/test.o %t/libfoo.dylib -o %t/test-x86_64 + +## Test (64-bit): +## 1/ We emit exactly one BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM per symbol. +## 2/ Combine BIND_OPCODE_DO_BIND and BIND_OPCODE_ADD_ADDR_ULEB pairs. +## 3/ Compact BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB +## 4/ Use BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED if possible. +# RUN: obj2yaml %t/test-x86_64 | FileCheck %s + +# CHECK: BindOpcodes: +# CHECK-NEXT: Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# CHECK-NEXT: Imm: 0 +# CHECK-NEXT: Symbol: _foo +# CHECK-NEXT: Opcode: BIND_OPCODE_SET_TYPE_IMM +# CHECK-NEXT: Imm: 1 +# CHECK-NEXT: Symbol: '' +# CHECK-NEXT: Opcode: BIND_OPCODE_SET_DYLIB_ORDINAL_IMM +# CHECK-NEXT: Imm: 2 +# CHECK-NEXT: Symbol: '' +# CHECK-NEXT: Opcode: BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB +# CHECK-NEXT: Imm: 2 +# CHECK-NEXT: ULEBExtraData: [ 0x0 ] +# CHECK-NEXT: Symbol: '' +# CHECK-NEXT: Opcode: BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB +# CHECK-NEXT: Imm: 0 +# CHECK-NEXT: ULEBExtraData: [ 0x2, 0x8 ] +# CHECK-NEXT: Symbol: '' +# CHECK-NEXT: Opcode: BIND_OPCODE_SET_ADDEND_SLEB +# CHECK-NEXT: Imm: 0 +# CHECK-NEXT: SLEBExtraData: [ 1 ] +# CHECK-NEXT: Symbol: '' +# CHECK-NEXT: Opcode: BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB +# CHECK-NEXT: Imm: 0 +# CHECK-NEXT: ULEBExtraData: [ 0x1008 ] +# CHECK-NEXT: Symbol: '' +# CHECK-NEXT: Opcode: BIND_OPCODE_SET_ADDEND_SLEB +# CHECK-NEXT: Imm: 0 +# CHECK-NEXT: SLEBExtraData: [ 0 ] +# CHECK-NEXT: Symbol: '' +# CHECK-NEXT: Opcode: BIND_OPCODE_DO_BIND +# CHECK-NEXT: Imm: 0 +# CHECK-NEXT: Symbol: '' +# CHECK-NEXT: Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# CHECK-NEXT: Imm: 0 +# CHECK-NEXT: Symbol: _bar +# CHECK-NEXT: Opcode: BIND_OPCODE_SET_TYPE_IMM +# CHECK-NEXT: Imm: 1 +# CHECK-NEXT: Symbol: '' +# CHECK-NEXT: Opcode: BIND_OPCODE_ADD_ADDR_ULEB +# CHECK-NEXT: Imm: 0 +# CHECK-NEXT: ULEBExtraData: [ 0xFFFFFFFFFFFFEFD0 ] +# CHECK-NEXT: Symbol: '' +# CHECK-NEXT: Opcode: BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED +# CHECK-NEXT: Imm: 1 +# CHECK-NEXT: Symbol: '' +# CHECK-NEXT: Opcode: BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB +# CHECK-NEXT: Imm: 0 +# CHECK-NEXT: ULEBExtraData: [ 0x1008 ] +# CHECK-NEXT: Symbol: '' +# CHECK-NEXT: Opcode: BIND_OPCODE_DO_BIND +# CHECK-NEXT: Imm: 0 +# CHECK-NEXT: Symbol: '' +# CHECK-NEXT: Opcode: BIND_OPCODE_DONE +# CHECK-NEXT: Imm: 0 +# CHECK-NEXT: Symbol: '' + +# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-darwin %t/foo.s -o %t/foo.o +# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-darwin --defsym PTR32=0 %t/test.s -o %t/test.o +# RUN: %lld-watchos -O2 -dylib %t/foo.o -o %t/libfoo.dylib +# RUN: %lld-watchos -O2 -dylib %t/test.o %t/libfoo.dylib -o %t/libtest-arm64_32.dylib + +## Test (32-bit): +## 1/ We emit exactly one BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM per symbol. +## 2/ Combine BIND_OPCODE_DO_BIND and BIND_OPCODE_ADD_ADDR_ULEB pairs. +## 3/ Compact BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB +## 4/ Use BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED if possible. +# RUN: obj2yaml %t/libtest-arm64_32.dylib | FileCheck %s --check-prefix=CHECK32 + +# CHECK32: BindOpcodes: +# CHECK32-NEXT: Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# CHECK32-NEXT: Imm: 0 +# CHECK32-NEXT: Symbol: _foo +# CHECK32-NEXT: Opcode: BIND_OPCODE_SET_TYPE_IMM +# CHECK32-NEXT: Imm: 1 +# CHECK32-NEXT: Symbol: '' +# CHECK32-NEXT: Opcode: BIND_OPCODE_SET_DYLIB_ORDINAL_IMM +# CHECK32-NEXT: Imm: 2 +# CHECK32-NEXT: Symbol: '' +# CHECK32-NEXT: Opcode: BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB +# CHECK32-NEXT: Imm: 1 +# CHECK32-NEXT: ULEBExtraData: [ 0x0 ] +# CHECK32-NEXT: Symbol: '' +# CHECK32-NEXT: Opcode: BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB +# CHECK32-NEXT: Imm: 0 +# CHECK32-NEXT: ULEBExtraData: [ 0x2, 0x4 ] +# CHECK32-NEXT: Symbol: '' +# CHECK32-NEXT: Opcode: BIND_OPCODE_SET_ADDEND_SLEB +# CHECK32-NEXT: Imm: 0 +# CHECK32-NEXT: SLEBExtraData: [ 1 ] +# CHECK32-NEXT: Symbol: '' +# CHECK32-NEXT: Opcode: BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB +# CHECK32-NEXT: Imm: 0 +# CHECK32-NEXT: ULEBExtraData: [ 0x1004 ] +# CHECK32-NEXT: Symbol: '' +# CHECK32-NEXT: Opcode: BIND_OPCODE_SET_ADDEND_SLEB +# CHECK32-NEXT: Imm: 0 +# CHECK32-NEXT: SLEBExtraData: [ 0 ] +# CHECK32-NEXT: Symbol: '' +# CHECK32-NEXT: Opcode: BIND_OPCODE_DO_BIND +# CHECK32-NEXT: Imm: 0 +# CHECK32-NEXT: Symbol: '' +# CHECK32-NEXT: Opcode: BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM +# CHECK32-NEXT: Imm: 0 +# CHECK32-NEXT: Symbol: _bar +# CHECK32-NEXT: Opcode: BIND_OPCODE_SET_TYPE_IMM +# CHECK32-NEXT: Imm: 1 +# CHECK32-NEXT: Symbol: '' +# CHECK32-NEXT: Opcode: BIND_OPCODE_ADD_ADDR_ULEB +# CHECK32-NEXT: Imm: 0 +# CHECK32-NEXT: ULEBExtraData: [ 0xFFFFFFFFFFFFEFE8 ] +# CHECK32-NEXT: Symbol: '' +# CHECK32-NEXT: Opcode: BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED +# CHECK32-NEXT: Imm: 1 +# CHECK32-NEXT: Symbol: '' +# CHECK32-NEXT: Opcode: BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB +# CHECK32-NEXT: Imm: 0 +# CHECK32-NEXT: ULEBExtraData: [ 0x1004 ] +# CHECK32-NEXT: Symbol: '' +# CHECK32-NEXT: Opcode: BIND_OPCODE_DO_BIND +# CHECK32-NEXT: Imm: 0 +# CHECK32-NEXT: Symbol: '' +# CHECK32-NEXT: Opcode: BIND_OPCODE_DONE +# CHECK32-NEXT: Imm: 0 +# CHECK32-NEXT: Symbol: '' + +# RUN: llvm-objdump --macho --bind %t/test-x86_64 | FileCheck %s -D#PTR=8 --check-prefix=BIND +# RUN: llvm-objdump --macho --bind %t/libtest-arm64_32.dylib | FileCheck %s -D#PTR=4 --check-prefix=BIND +# BIND: Bind table: +# BIND-NEXT: segment section address type addend dylib symbol +# BIND-NEXT: __DATA __data 0x[[#%X,DATA:]] pointer 0 libfoo _foo +# BIND-NEXT: __DATA __data 0x[[#%.8X,DATA + mul(PTR, 2)]] pointer 0 libfoo _foo +# BIND-NEXT: __DATA __data 0x[[#%.8X,DATA + mul(PTR, 4)]] pointer 1 libfoo _foo +# BIND-NEXT: __DATA __data 0x[[#%.8X,DATA + 4096 + mul(PTR, 6)]] pointer 0 libfoo _foo +# BIND-NEXT: __DATA __data 0x[[#%.8X,DATA + PTR]] pointer 0 libfoo _bar +# BIND-NEXT: __DATA __data 0x[[#%.8X,DATA + mul(PTR, 3)]] pointer 0 libfoo _bar +# BIND-NEXT: __DATA __data 0x[[#%.8X,DATA + 4096 + mul(PTR, 5)]] pointer 0 libfoo _bar +# BIND-EMPTY: + +#--- foo.s +.globl _foo, _bar +_foo: + .space 4 +_bar: + .space 4 + +#--- test.s +.ifdef PTR64 +.macro ptr val + .quad \val +.endm +.endif + +.ifdef PTR32 +.macro ptr val + .int \val +.endm +.endif + +.data +ptr _foo +ptr _bar +ptr _foo +ptr _bar +ptr _foo+1 +.zero 0x1000 +ptr _bar +ptr _foo + +.globl _main +.text +_main: diff --git a/wild/tests/lld-macho/bp-section-orderer-stress.s b/wild/tests/lld-macho/bp-section-orderer-stress.s new file mode 100644 index 000000000..0bfd99eb3 --- /dev/null +++ b/wild/tests/lld-macho/bp-section-orderer-stress.s @@ -0,0 +1,108 @@ +# REQUIRES: aarch64 + +# Generate a large test case and check that the output is deterministic. + +# RUN: %python %s %t.s %t.proftext + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t.s -o %t.o +# RUN: llvm-profdata merge %t.proftext -o %t.profdata + +# RUN: %no-fatal-warnings-lld -arch arm64 -lSystem -e _main --icf=all -o - %t.o --irpgo-profile-sort=%t.profdata --compression-sort-startup-functions --compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - > %t.order1.txt +# RUN: %no-fatal-warnings-lld -arch arm64 -lSystem -e _main --icf=all -o - %t.o --irpgo-profile-sort=%t.profdata --compression-sort-startup-functions --compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - > %t.order2.txt +# RUN: diff %t.order1.txt %t.order2.txt + +# RUN: %lld -arch arm64 -lSystem -e _main --icf=all -o - %t.o --irpgo-profile=%t.profdata --bp-startup-sort=function --bp-compression-sort-startup-functions --bp-compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - > %t.order1.txt +# RUN: %lld -arch arm64 -lSystem -e _main --icf=all -o - %t.o --irpgo-profile=%t.profdata --bp-startup-sort=function --bp-compression-sort-startup-functions --bp-compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - > %t.order2.txt +# RUN: diff %t.order1.txt %t.order2.txt +import random +import sys + +assembly_filepath = sys.argv[1] +proftext_filepath = sys.argv[2] + +random.seed(1234) +num_functions = 1000 +num_data = 100 +num_traces = 10 + +function_names = [f"f{n}" for n in range(num_functions)] +data_names = [f"d{n}" for n in range(num_data)] +profiled_functions = function_names[: int(num_functions / 2)] + +function_contents = [ + f""" +{name}: + add w0, w0, #{i % 4096} + add w1, w1, #{i % 10} + add w2, w0, #{i % 20} + adrp x3, {name}@PAGE + ret +""" + for i, name in enumerate(function_names) +] + +data_contents = [ + f""" +{name}: + .ascii "s{i % 2}-{i % 3}-{i % 5}" + .xword {name} +""" + for i, name in enumerate(data_names) +] + +trace_contents = [ + f""" +# Weight +1 +{", ".join(random.sample(profiled_functions, len(profiled_functions)))} +""" + for i in range(num_traces) +] + +profile_contents = [ + f""" +{name} +# Func Hash: +{i} +# Num Counters: +1 +# Counter Values: +1 +""" + for i, name in enumerate(profiled_functions) +] + +with open(assembly_filepath, "w") as f: + f.write( + f""" +.text +.globl _main + +_main: + ret + +{"".join(function_contents)} + +.data +{"".join(data_contents)} + +.subsections_via_symbols +""" + ) + +with open(proftext_filepath, "w") as f: + f.write( + f""" +:ir +:temporal_prof_traces + +# Num Traces +{num_traces} +# Trace Stream Size: +{num_traces} + +{"".join(trace_contents)} + +{"".join(profile_contents)} +""" + ) diff --git a/wild/tests/lld-macho/bp-section-orderer.s b/wild/tests/lld-macho/bp-section-orderer.s new file mode 100644 index 000000000..d7de90d6c --- /dev/null +++ b/wild/tests/lld-macho/bp-section-orderer.s @@ -0,0 +1,186 @@ +# REQUIRES: aarch64 + +# RUN: rm -rf %t && split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o +# RUN: llvm-profdata merge %t/a.proftext -o %t/a.profdata + +# RUN: %no-fatal-warnings-lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --irpgo-profile-sort=%t/a.profdata --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=STARTUP +# RUN: %no-fatal-warnings-lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --irpgo-profile-sort=%t/a.profdata --verbose-bp-section-orderer --icf=all --compression-sort=none 2>&1 | FileCheck %s --check-prefix=STARTUP-ICF + +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --irpgo-profile %t/a.profdata --bp-startup-sort=function --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=STARTUP +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --irpgo-profile=%t/a.profdata --bp-startup-sort=function --verbose-bp-section-orderer --icf=all --bp-compression-sort=none 2>&1 | FileCheck %s --check-prefix=STARTUP-ICF +# STARTUP: Ordered 5 sections ([[#]] bytes) using balanced partitioning +# STARTUP-ICF: Ordered 4 sections ([[#]] bytes) using balanced partitioning + +# Check that orderfiles take precedence over BP +# RUN: %no-fatal-warnings-lld -arch arm64 -lSystem -e _main -o - %t/a.o -order_file %t/a.orderfile --irpgo-profile-sort=%t/a.profdata | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE +# RUN: %lld -arch arm64 -lSystem -e _main -o - %t/a.o -order_file %t/a.orderfile --compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE + +# RUN: %lld -arch arm64 -lSystem -e _main -o - %t/a.o -order_file %t/a.orderfile --irpgo-profile=%t/a.profdata --bp-startup-sort=function | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE +# RUN: %lld -arch arm64 -lSystem -e _main -o - %t/a.o -order_file %t/a.orderfile --bp-compression-sort=both | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=ORDERFILE + +# Functions +# ORDERFILE: A +# ORDERFILE: F +# ORDERFILE: E +# ORDERFILE: D +# ORDERFILE-DAG: _main +# ORDERFILE-DAG: _B +# ORDERFILE-DAG: l_C +# ORDERFILE-DAG: merged1.Tgm +# ORDERFILE-DAG: merged2.Tgm + +# Data +# ORDERFILE: s3 +# ORDERFILE: r3 +# ORDERFILE: r2 +# ORDERFILE-DAG: s1 +# ORDERFILE-DAG: s2 +# ORDERFILE-DAG: r1 +# ORDERFILE-DAG: r4 + +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-FUNC +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=data 2>&1 | FileCheck %s --check-prefix=COMPRESSION-DATA +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=both 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH +# RUN: %no-fatal-warnings-lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --compression-sort=both --irpgo-profile-sort=%t/a.profdata 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH + +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-FUNC +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=function --icf=all 2>&1 | FileCheck %s --check-prefix=COMPRESSION-ICF-FUNC +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=data 2>&1 | FileCheck %s --check-prefix=COMPRESSION-DATA +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=both 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o --verbose-bp-section-orderer --bp-compression-sort=both --irpgo-profile=%t/a.profdata --bp-startup-sort=function 2>&1 | FileCheck %s --check-prefix=COMPRESSION-BOTH + +# COMPRESSION-FUNC: Ordered 9 sections ([[#]] bytes) using balanced partitioning +# COMPRESSION-ICF-FUNC: Ordered 7 sections ([[#]] bytes) using balanced partitioning +# COMPRESSION-DATA: Ordered 7 sections ([[#]] bytes) using balanced partitioning +# COMPRESSION-BOTH: Ordered 16 sections ([[#]] bytes) using balanced partitioning + +#--- a.s +.text +.globl _main, A, _B, l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222 + +_main: + ret +A: + ret +_B: + add w0, w0, #1 + bl A + ret +l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222: + add w0, w0, #2 + bl A + ret +D: + add w0, w0, #2 + bl _B + ret +E: + add w0, w0, #2 + bl l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222 + ret +F: + add w0, w0, #3 + bl l_C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222 + ret +merged1.Tgm: + add w0, w0, #101 + ret +merged2.Tgm: + add w0, w0, #101 + ret + +.data +s1: + .ascii "hello world" +s2: + .ascii "i am a string" +s3: + .ascii "this is s3" +r1: + .quad s1 +r2: + .quad r1 +r3: + .quad r2 +r4: + .quad s2 + +# cstrings are ignored by runBalancedPartitioning() +.cstring +cstr: + .asciz "this is cstr" + +.bss +bss0: + .zero 10 + +.subsections_via_symbols + +#--- a.proftext +:ir +:temporal_prof_traces +# Num Traces +1 +# Trace Stream Size: +1 +# Weight +1 +A, B, C.__uniq.555555555555555555555555555555555555555.llvm.6666666666666666666, merged1, merged2 + +A +# Func Hash: +1111 +# Num Counters: +1 +# Counter Values: +1 + +B +# Func Hash: +2222 +# Num Counters: +1 +# Counter Values: +1 + +C.__uniq.555555555555555555555555555555555555555.llvm.6666666666666666666 +# Func Hash: +3333 +# Num Counters: +1 +# Counter Values: +1 + +D +# Func Hash: +4444 +# Num Counters: +1 +# Counter Values: +1 + +merged1 +# Func Hash: +5555 +# Num Counters: +1 +# Counter Values: +1 + +merged2 +# Func Hash: +6666 +# Num Counters: +1 +# Counter Values: +1 + +#--- a.orderfile +A +F +E +D +s3 +r3 +r2 diff --git a/wild/tests/lld-macho/bss.s b/wild/tests/lld-macho/bss.s new file mode 100644 index 000000000..d773e6762 --- /dev/null +++ b/wild/tests/lld-macho/bss.s @@ -0,0 +1,125 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: %lld -o %t %t.o +# RUN: llvm-readobj --section-headers --macho-segment %t | FileCheck %s + +## Check that __bss takes up zero file size, is at file offset zero, and appears +## at the end of its segment. Also check that __tbss is placed immediately +## before it. +## Zerofill sections in other segments (i.e. not __DATA) should also be placed +## at the end. + +# CHECK: Index: 1 +# CHECK-NEXT: Name: __data +# CHECK-NEXT: Segment: __DATA +# CHECK-NEXT: Address: +# CHECK-NEXT: Size: 0x8 +# CHECK-NEXT: Offset: 4096 +# CHECK-NEXT: Alignment: 0 +# CHECK-NEXT: RelocationOffset: 0x0 +# CHECK-NEXT: RelocationCount: 0 +# CHECK-NEXT: Type: Regular (0x0) +# CHECK-NEXT: Attributes [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Reserved1: 0x0 +# CHECK-NEXT: Reserved2: 0x0 +# CHECK-NEXT: Reserved3: 0x0 + +# CHECK: Index: 2 +# CHECK-NEXT: Name: __thread_bss +# CHECK-NEXT: Segment: __DATA +# CHECK-NEXT: Address: +# CHECK-NEXT: Size: 0x4 +# CHECK-NEXT: Offset: 0 +# CHECK-NEXT: Alignment: 0 +# CHECK-NEXT: RelocationOffset: 0x0 +# CHECK-NEXT: RelocationCount: 0 +# CHECK-NEXT: Type: ThreadLocalZerofill (0x12) +# CHECK-NEXT: Attributes [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Reserved1: 0x0 +# CHECK-NEXT: Reserved2: 0x0 +# CHECK-NEXT: Reserved3: 0x0 + +# CHECK: Index: 3 +# CHECK-NEXT: Name: __bss +# CHECK-NEXT: Segment: __DATA +# CHECK-NEXT: Address: +# CHECK-NEXT: Size: 0x10000 +# CHECK-NEXT: Offset: 0 +# CHECK-NEXT: Alignment: 0 +# CHECK-NEXT: RelocationOffset: 0x0 +# CHECK-NEXT: RelocationCount: 0 +# CHECK-NEXT: Type: ZeroFill (0x1) +# CHECK-NEXT: Attributes [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Reserved1: 0x0 +# CHECK-NEXT: Reserved2: 0x0 +# CHECK-NEXT: Reserved3: 0x0 + +# CHECK: Index: 4 +# CHECK-NEXT: Name: foo +# CHECK-NEXT: Segment: FOO +# CHECK-NEXT: Address: +# CHECK-NEXT: Size: 0x8 +# CHECK-NEXT: Offset: 8192 +# CHECK-NEXT: Alignment: 0 +# CHECK-NEXT: RelocationOffset: 0x0 +# CHECK-NEXT: RelocationCount: 0 +# CHECK-NEXT: Type: Regular (0x0) +# CHECK-NEXT: Attributes [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Reserved1: 0x0 +# CHECK-NEXT: Reserved2: 0x0 +# CHECK-NEXT: Reserved3: 0x0 + +# CHECK: Index: 5 +# CHECK-NEXT: Name: bss +# CHECK-NEXT: Segment: FOO +# CHECK-NEXT: Address: +# CHECK-NEXT: Size: 0x8 +# CHECK-NEXT: Offset: 0 +# CHECK-NEXT: Alignment: 0 +# CHECK-NEXT: RelocationOffset: 0x0 +# CHECK-NEXT: RelocationCount: 0 +# CHECK-NEXT: Type: ZeroFill (0x1) +# CHECK-NEXT: Attributes [ (0x0) +# CHECK-NEXT: ] +# CHECK-NEXT: Reserved1: 0x0 +# CHECK-NEXT: Reserved2: 0x0 +# CHECK-NEXT: Reserved3: 0x0 + +# CHECK: Name: __DATA +# CHECK-NEXT: Size: +# CHECK-NEXT: vmaddr: +# CHECK-NEXT: vmsize: 0x11000 +# CHECK-NEXT: fileoff: +# CHECK-NEXT: filesize: 4096 + +# CHECK: Name: FOO +# CHECK-NEXT: Size: +# CHECK-NEXT: vmaddr: +# CHECK-NEXT: vmsize: 0x9000 +# CHECK-NEXT: fileoff: +# CHECK-NEXT: filesize: 4096 + +.globl _main + +.text +_main: + movq $0, %rax + retq + +.bss +.zero 0x8000 + +.tbss _foo, 4 +.zero 0x8000 + +.data +.quad 0x1234 + +.zerofill FOO,bss,_zero_foo,0x8000 + +.section FOO,foo +.quad 123 diff --git a/wild/tests/lld-macho/cgdata-generate-merge.s b/wild/tests/lld-macho/cgdata-generate-merge.s new file mode 100644 index 000000000..4b6d4a5d8 --- /dev/null +++ b/wild/tests/lld-macho/cgdata-generate-merge.s @@ -0,0 +1,89 @@ +# UNSUPPORTED: system-windows +# REQUIRES: aarch64 + +# RUN: rm -rf %t; split-file %s %t + +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. +# RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata +# RUN: echo -n "s//" > %t/raw-1-sed.txt +# RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' >> %t/raw-1-sed.txt +# RUN: echo "/g" >> %t/raw-1-sed.txt +# RUN: sed -f %t/raw-1-sed.txt %t/merge-template.s > %t/merge-1.s +# RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata +# RUN: echo -n "s//" > %t/raw-2-sed.txt +# RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' >> %t/raw-2-sed.txt +# RUN: echo "/g" >> %t/raw-2-sed.txt +# RUN: sed -f %t/raw-2-sed.txt %t/merge-template.s > %t/merge-2.s + +# RUN: llvm-mc -filetype obj -triple arm64-apple-darwin %t/merge-1.s -o %t/merge-1.o +# RUN: llvm-mc -filetype obj -triple arm64-apple-darwin %t/merge-2.s -o %t/merge-2.o +# RUN: llvm-mc -filetype obj -triple arm64-apple-darwin %t/main.s -o %t/main.o + +# This checks if the codegen data from the linker is identical to the merged codegen data +# from each object file, which is obtained using the llvm-cgdata tool. +# RUN: %no-arg-lld -dylib -arch arm64 -platform_version ios 14.0 15.0 -o %t/out \ +# RUN: %t/merge-1.o %t/merge-2.o %t/main.o --codegen-data-generate-path=%t/out-cgdata +# RUN: llvm-cgdata --merge %t/merge-1.o %t/merge-2.o %t/main.o -o %t/merge-cgdata +# RUN: diff %t/out-cgdata %t/merge-cgdata + +# Merge order doesn't matter in the yaml format. `main.o` is dropped due to missing __llvm_merge. +# RUN: llvm-cgdata --merge %t/merge-2.o %t/merge-1.o -o %t/merge-cgdata-shuffle +# RUN: llvm-cgdata --convert %t/out-cgdata -o %t/out-cgdata.yaml +# RUN: llvm-cgdata --convert %t/merge-cgdata-shuffle -o %t/merge-cgdata-shuffle.yaml +# RUN: diff %t/out-cgdata.yaml %t/merge-cgdata-shuffle.yaml + +# We can also generate the merged codegen data from the executable that is not dead-stripped. +# RUN: llvm-objdump -h %t/out| FileCheck %s +# CHECK: __llvm_merge +# RUN: llvm-cgdata --merge %t/out -o %t/merge-cgdata-exe +# RUN: diff %t/merge-cgdata-exe %t/merge-cgdata + +# Dead-strip will remove __llvm_merge sections from the final executable. +# But the codeden data is still correctly produced from the linker. +# RUN: %no-arg-lld -dylib -arch arm64 -platform_version ios 14.0 15.0 -o %t/out-strip \ +# RUN: %t/merge-1.o %t/merge-2.o %t/main.o -dead_strip --codegen-data-generate-path=%t/out-cgdata-strip +# RUN: llvm-cgdata --merge %t/merge-1.o %t/merge-2.o %t/main.o -o %t/merge-cgdata-strip +# RUN: diff %t/out-cgdata-strip %t/merge-cgdata-strip +# RUN: diff %t/out-cgdata-strip %t/merge-cgdata + +# Ensure no __llvm_merge section remains in the executable. +# RUN: llvm-objdump -h %t/out-strip | FileCheck %s --check-prefix=STRIP +# STRIP-NOT: __llvm_merge + +#--- raw-1.cgtext +:stable_function_map +--- +- Hash: 123 + FunctionName: f1 + ModuleName: 'foo.bc' + InstCount: 7 + IndexOperandHashes: + - InstIndex: 3 + OpndIndex: 0 + OpndHash: 456 +... + +#--- raw-2.cgtext +:stable_function_map +--- +- Hash: 123 + FunctionName: f2 + ModuleName: 'goo.bc' + InstCount: 7 + IndexOperandHashes: + - InstIndex: 3 + OpndIndex: 0 + OpndHash: 789 +... + +#--- merge-template.s +.section __DATA,__llvm_merge +_data: +.byte + +#--- main.s +.globl _main + +.text +_main: + ret diff --git a/wild/tests/lld-macho/cgdata-generate.s b/wild/tests/lld-macho/cgdata-generate.s new file mode 100644 index 000000000..63efc81cd --- /dev/null +++ b/wild/tests/lld-macho/cgdata-generate.s @@ -0,0 +1,93 @@ +# UNSUPPORTED: system-windows +# REQUIRES: aarch64 + +# RUN: rm -rf %t; split-file %s %t + +# Synthesize raw cgdata without the header (32 byte) from the indexed cgdata. +# RUN: llvm-cgdata --convert --format binary %t/raw-1.cgtext -o %t/raw-1.cgdata +# RUN: echo -n "s//" > %t/raw-1-sed.txt +# RUN: od -t x1 -j 32 -An %t/raw-1.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' >> %t/raw-1-sed.txt +# RUN: echo "/g" >> %t/raw-1-sed.txt +# RUN: sed -f %t/raw-1-sed.txt %t/merge-template.s > %t/merge-1.s +# RUN: llvm-cgdata --convert --format binary %t/raw-2.cgtext -o %t/raw-2.cgdata +# RUN: echo -n "s//" > %t/raw-2-sed.txt +# RUN: od -t x1 -j 32 -An %t/raw-2.cgdata | tr -d '\n\r\t' | sed 's/[ ][ ]*/ /g; s/^[ ]*//; s/[ ]*$//; s/[ ]/,0x/g; s/^/0x/' >> %t/raw-2-sed.txt +# RUN: echo "/g" >> %t/raw-2-sed.txt +# RUN: sed -f %t/raw-2-sed.txt %t/merge-template.s > %t/merge-2.s + +# RUN: llvm-mc -filetype obj -triple arm64-apple-darwin %t/merge-1.s -o %t/merge-1.o +# RUN: llvm-mc -filetype obj -triple arm64-apple-darwin %t/merge-2.s -o %t/merge-2.o +# RUN: llvm-mc -filetype obj -triple arm64-apple-darwin %t/main.s -o %t/main.o + +# This checks if the codegen data from the linker is identical to the merged codegen data +# from each object file, which is obtained using the llvm-cgdata tool. +# RUN: %no-arg-lld -dylib -arch arm64 -platform_version ios 14.0 15.0 -o %t/out \ +# RUN: %t/merge-1.o %t/merge-2.o %t/main.o --codegen-data-generate-path=%t/out-cgdata +# RUN: llvm-cgdata --merge %t/merge-1.o %t/merge-2.o %t/main.o -o %t/merge-cgdata +# RUN: diff %t/out-cgdata %t/merge-cgdata + +# Merge order doesn't matter. `main.o` is dropped due to missing __llvm_outline. +# RUN: llvm-cgdata --merge %t/merge-2.o %t/merge-1.o -o %t/merge-cgdata-shuffle +# RUN: diff %t/out-cgdata %t/merge-cgdata-shuffle + +# We can also generate the merged codegen data from the executable that is not dead-stripped. +# RUN: llvm-objdump -h %t/out| FileCheck %s +# CHECK: __llvm_outline +# RUN: llvm-cgdata --merge %t/out -o %t/merge-cgdata-exe +# RUN: diff %t/merge-cgdata-exe %t/merge-cgdata + +# Dead-strip will remove __llvm_outline sections from the final executable. +# But the codeden data is still correctly produced from the linker. +# RUN: %no-arg-lld -dylib -arch arm64 -platform_version ios 14.0 15.0 -o %t/out-strip \ +# RUN: %t/merge-1.o %t/merge-2.o %t/main.o -dead_strip --codegen-data-generate-path=%t/out-cgdata-strip +# RUN: llvm-cgdata --merge %t/merge-1.o %t/merge-2.o %t/main.o -o %t/merge-cgdata-strip +# RUN: diff %t/out-cgdata-strip %t/merge-cgdata-strip +# RUN: diff %t/out-cgdata-strip %t/merge-cgdata + +# Ensure no __llvm_outline section remains in the executable. +# RUN: llvm-objdump -h %t/out-strip | FileCheck %s --check-prefix=STRIP +# STRIP-NOT: __llvm_outline + +#--- raw-1.cgtext +:outlined_hash_tree +0: + Hash: 0x0 + Terminals: 0 + SuccessorIds: [ 1 ] +1: + Hash: 0x1 + Terminals: 0 + SuccessorIds: [ 2 ] +2: + Hash: 0x2 + Terminals: 4 + SuccessorIds: [ ] +... + +#--- raw-2.cgtext +:outlined_hash_tree +0: + Hash: 0x0 + Terminals: 0 + SuccessorIds: [ 1 ] +1: + Hash: 0x1 + Terminals: 0 + SuccessorIds: [ 2 ] +2: + Hash: 0x3 + Terminals: 5 + SuccessorIds: [ ] +... + +#--- merge-template.s +.section __DATA,__llvm_outline +_data: +.byte + +#--- main.s +.globl _main + +.text +_main: + ret diff --git a/wild/tests/lld-macho/compact-unwind.s b/wild/tests/lld-macho/compact-unwind.s new file mode 100644 index 000000000..6516f7162 --- /dev/null +++ b/wild/tests/lld-macho/compact-unwind.s @@ -0,0 +1,184 @@ +# REQUIRES: x86, aarch64 +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 -emit-compact-unwind-non-canonical=true %t/my-personality.s -o %t/x86_64-my-personality.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 -emit-compact-unwind-non-canonical=true %t/main.s -o %t/x86_64-main.o +# RUN: %lld -arch x86_64 -lSystem -lc++ %t/x86_64-my-personality.o %t/x86_64-main.o -o %t/x86_64-personality-first +# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/x86_64-personality-first | FileCheck %s --check-prefixes=FIRST,CHECK -D#%x,BASE=0x100000000 -DSEG=__TEXT +# RUN: %lld -dead_strip -arch x86_64 -lSystem -lc++ %t/x86_64-main.o %t/x86_64-my-personality.o -o %t/x86_64-personality-second +# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/x86_64-personality-second | FileCheck %s --check-prefixes=SECOND,CHECK -D#%x,BASE=0x100000000 -DSEG=__TEXT + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin19.0.0 -emit-compact-unwind-non-canonical=true %t/my-personality.s -o %t/arm64-my-personality.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin19.0.0 -emit-compact-unwind-non-canonical=true %t/main.s -o %t/arm64-main.o +# RUN: %lld -arch arm64 -lSystem -lc++ %t/arm64-my-personality.o %t/arm64-main.o -o %t/arm64-personality-first +# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/arm64-personality-first | FileCheck %s --check-prefixes=FIRST,CHECK -D#%x,BASE=0x100000000 -DSEG=__TEXT +# RUN: %lld -dead_strip -arch arm64 -lSystem -lc++ %t/arm64-main.o %t/arm64-my-personality.o -o %t/arm64-personality-second +# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/arm64-personality-second | FileCheck %s --check-prefixes=SECOND,CHECK -D#%x,BASE=0x100000000 -DSEG=__TEXT + +# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos -emit-compact-unwind-non-canonical=true %t/my-personality.s -o %t/arm64-32-my-personality.o +# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos -emit-compact-unwind-non-canonical=true %t/main.s -o %t/arm64-32-main.o +# RUN: %lld-watchos -lSystem -lc++ %t/arm64-32-my-personality.o %t/arm64-32-main.o -o %t/arm64-32-personality-first +# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/arm64-32-personality-first | FileCheck %s --check-prefixes=FIRST,CHECK -D#%x,BASE=0x4000 -DSEG=__TEXT +# RUN: %lld-watchos -dead_strip -lSystem -lc++ %t/arm64-32-main.o %t/arm64-32-my-personality.o -o %t/arm64-32-personality-second +# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/arm64-32-personality-second | FileCheck %s --check-prefixes=SECOND,CHECK -D#%x,BASE=0x4000 -DSEG=__TEXT + +# RUN: %lld -arch x86_64 -rename_section __TEXT __gcc_except_tab __RODATA __gcc_except_tab -lSystem -lc++ %t/x86_64-my-personality.o %t/x86_64-main.o -o %t/x86_64-personality-first +# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/x86_64-personality-first | FileCheck %s --check-prefixes=FIRST,CHECK -D#%x,BASE=0x100000000 -DSEG=__RODATA +# RUN: %lld -dead_strip -arch x86_64 -rename_section __TEXT __gcc_except_tab __RODATA __gcc_except_tab -lSystem -lc++ %t/x86_64-main.o %t/x86_64-my-personality.o -o %t/x86_64-personality-second +# RUN: llvm-objdump --macho --unwind-info --syms --indirect-symbols --rebase %t/x86_64-personality-second | FileCheck %s --check-prefixes=SECOND,CHECK -D#%x,BASE=0x100000000 -DSEG=__RODATA + +# FIRST: Indirect symbols for (__DATA_CONST,__got) +# FIRST-NEXT: address index name +# FIRST-DAG: 0x[[#%x,GXX_PERSONALITY:]] [[#]] ___gxx_personality_v0 +# FIRST-DAG: 0x[[#%x,MY_PERSONALITY:]] LOCAL + +# SECOND: Indirect symbols for (__DATA_CONST,__got) +# SECOND-NEXT: address index name +# SECOND-DAG: 0x[[#%x,GXX_PERSONALITY:]] [[#]] ___gxx_personality_v0 +# SECOND-DAG: 0x[[#%x,MY_PERSONALITY:]] LOCAL + +# CHECK: SYMBOL TABLE: +# CHECK-DAG: [[#%x,MAIN:]] g F __TEXT,__text _main +# CHECK-DAG: [[#%x,QUUX:]] g F __TEXT,__text _quux +# CHECK-DAG: [[#%x,FOO:]] l F __TEXT,__text _foo +# CHECK-DAG: [[#%x,BAZ:]] l F __TEXT,__text _baz +# CHECK-DAG: [[#%x,EXCEPTION0:]] g O [[SEG]],__gcc_except_tab _exception0 +# CHECK-DAG: [[#%x,EXCEPTION1:]] g O [[SEG]],__gcc_except_tab _exception1 + +# CHECK: Contents of __unwind_info section: +# CHECK: Personality functions: (count = 2) +# CHECK-DAG: personality[{{[0-9]+}}]: 0x{{0*}}[[#MY_PERSONALITY-BASE]] +# CHECK-DAG: personality[{{[0-9]+}}]: 0x{{0*}}[[#GXX_PERSONALITY-BASE]] +# CHECK: Top level indices: (count = 2) +# CHECK-DAG: [0]: function offset={{.*}}, 2nd level page offset=0x[[#%x,PAGEOFF:]], +# CHECK-DAG: [1]: function offset={{.*}}, 2nd level page offset=0x00000000, +# CHECK: LSDA descriptors: +# CHECK-DAG: function offset=0x[[#%.8x,FOO-BASE]], LSDA offset=0x[[#%.8x,EXCEPTION0-BASE]] +# CHECK-DAG: function offset=0x[[#%.8x,MAIN-BASE]], LSDA offset=0x[[#%.8x,EXCEPTION1-BASE]] +# CHECK: Second level indices: +# CHECK-NEXT: Second level index[0]: offset in section=0x[[#%.8x,PAGEOFF]] +# CHECK-DAG: function offset=0x[[#%.8x,MAIN-BASE]], encoding +# CHECK-DAG: function offset=0x[[#%.8x,FOO-BASE]], encoding +# CHECK-DAG: function offset=0x[[#%.8x,BAZ-BASE]], encoding +# CHECK-DAG: function offset=0x[[#%.8x,QUUX-BASE]], encoding{{.*}}=0x00000000 + +## Check that we do not add rebase opcodes to the compact unwind section. +# CHECK: Rebase table: +# CHECK-NEXT: segment section address type +# CHECK-NEXT: __DATA_CONST __got 0x{{[0-9A-F]*}} pointer +# CHECK-NOT: __TEXT + +## Check that we don't create an __unwind_info section if no unwind info +## remains after dead-stripping. +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 \ +# RUN: %t/empty-after-dead-strip.s -o %t/x86_64-empty-after-dead-strip.o +# RUN: %lld -dylib -dead_strip -arch x86_64 -lSystem \ +# RUN: %t/x86_64-empty-after-dead-strip.o -o %t/x86_64-empty-after-strip.dylib +# RUN: llvm-objdump --macho --unwind-info %t/x86_64-empty-after-strip.dylib | \ +# RUN: FileCheck %s --check-prefixes=NOUNWIND --allow-empty +# NOUNWIND-NOT: Contents of __unwind_info section: + +#--- my-personality.s +.globl _my_personality, _exception0 +.text +.p2align 2 +.no_dead_strip _foo +_foo: + .cfi_startproc +## This will generate a section relocation. + .cfi_personality 155, _my_personality + .cfi_lsda 16, _exception0 + .cfi_def_cfa_offset 16 + ret + .cfi_endproc + +.p2align 2 +.no_dead_strip _bar +_bar: + .cfi_startproc +## Check that we dedup references to the same statically-linked personality. + .cfi_personality 155, _my_personality + .cfi_lsda 16, _exception0 + .cfi_def_cfa_offset 16 + ret + .cfi_endproc + +.data +.p2align 2 +## We put this personality in `__data` to test if we correctly handle +## personality symbols whose output addresses occur after that of the +## `__unwind_info` section. +_my_personality: + ret + +.section __TEXT,__gcc_except_tab +_exception0: + .space 1 + +.subsections_via_symbols + +#--- main.s +.globl _main, _quux, _my_personality, _exception1 + +.text +.p2align 2 +_main: + .cfi_startproc + .cfi_personality 155, ___gxx_personality_v0 + .cfi_lsda 16, _exception1 + .cfi_def_cfa_offset 16 + ret + .cfi_endproc + +## _quux has no unwind information. +## (In real life, it'd be part of a separate TU that was built with +## -fno-exceptions, while the previous and next TU might be Objective-C++ +## which has unwind info for Objective-C). +.p2align 2 +.no_dead_strip _quux +_quux: + ret + +.globl _abs +.no_dead_strip _abs +_abs = 4 + +.p2align 2 +.no_dead_strip _baz +_baz: + .cfi_startproc +## This will generate a symbol relocation. Check that we reuse the personality +## referenced by the section relocation in my_personality.s. + .cfi_personality 155, _my_personality + .cfi_lsda 16, _exception1 + .cfi_def_cfa_offset 16 + ret + .cfi_endproc + +.globl _stripped +_stripped: + .cfi_startproc + .cfi_personality 155, ___gxx_personality_v0 + .cfi_lsda 16, _exception1 + .cfi_def_cfa_offset 16 + ret + .cfi_endproc + + +.section __TEXT,__gcc_except_tab +_exception1: + .space 1 + +.subsections_via_symbols + +#--- empty-after-dead-strip.s +.text + +## Local symbol with unwind info. +## The symbol is removed by -dead_strip. +_foo : + .cfi_startproc + .cfi_def_cfa_offset 16 + retq + .cfi_endproc + +.subsections_via_symbols diff --git a/wild/tests/lld-macho/compression-order-sections.s b/wild/tests/lld-macho/compression-order-sections.s new file mode 100644 index 000000000..40ceaf7cc --- /dev/null +++ b/wild/tests/lld-macho/compression-order-sections.s @@ -0,0 +1,112 @@ +# REQUIRES: aarch64 + +# RUN: rm -rf %t && split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/b.s -o %t/b.o + +## Wildcard glob: all sections go to a single group +# RUN: %lld -arch arm64 -e _main -o %t/a.out %t/a.o %t/b.o \ +# RUN: --bp-compression-sort-section="*" \ +# RUN: --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=WILDCARD + +# WILDCARD: Sections for compression: 7 +# WILDCARD: Compression groups: 1 +# WILDCARD: *: 7 sections + +## Two globs: sections are grouped by the winning glob +# RUN: %lld -arch arm64 -e _main -o %t/a.out %t/a.o %t/b.o \ +# RUN: --bp-compression-sort-section="__DATA*" \ +# RUN: --bp-compression-sort-section="__TEXT*" \ +# RUN: --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=TWO-GLOBS + +## Deprecated --bp-compression-sort=both still works +# RUN: %lld -arch arm64 -e _main -o %t/a.out %t/a.o %t/b.o \ +# RUN: --bp-compression-sort=both \ +# RUN: --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=LEGACY-BOTH + +## Deprecated function/data modes still use the legacy buckets. +# RUN: %lld -arch arm64 -e _main -o %t/a.out %t/a.o %t/b.o \ +# RUN: --bp-compression-sort=function \ +# RUN: --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=LEGACY-FUNCTION +# RUN: %lld -arch arm64 -e _main -o %t/a.out %t/a.o %t/b.o \ +# RUN: --bp-compression-sort=data \ +# RUN: --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=LEGACY-DATA + +# TWO-GLOBS: Sections for compression: 7 +# TWO-GLOBS: Compression groups: 2 +# TWO-GLOBS: __DATA*: 6 sections +# TWO-GLOBS: __TEXT*: 1 sections + +# LEGACY-BOTH: Sections for compression: 7 +# LEGACY-BOTH: Compression groups: 2 +# LEGACY-BOTH: legacy:function: 1 sections +# LEGACY-BOTH: legacy:data: 6 sections + +# LEGACY-FUNCTION: Sections for compression: 1 +# LEGACY-FUNCTION: Compression groups: 1 +# LEGACY-FUNCTION: legacy:function: 1 sections + +# LEGACY-DATA: Sections for compression: 6 +# LEGACY-DATA: Compression groups: 1 +# LEGACY-DATA: legacy:data: 6 sections + +## Single glob matching only TEXT +# RUN: %lld -arch arm64 -e _main -o %t/a.out %t/a.o %t/b.o \ +# RUN: --bp-compression-sort-section="__TEXT*" \ +# RUN: --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=TEXT + +# TEXT: Sections for compression: 1 +# TEXT: Compression groups: 1 +# TEXT: __TEXT*: 1 sections + +## Exact section name glob +# RUN: %lld -arch arm64 -e _main -o %t/a.out %t/a.o %t/b.o \ +# RUN: --bp-compression-sort-section="__DATA__custom" \ +# RUN: --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=DATA + +# DATA: Sections for compression: 2 +# DATA: Compression groups: 1 +# DATA: __DATA__custom: 2 sections + +## Match priority: explicit match_priority wins +# RUN: %lld -arch arm64 -e _main -o %t/match.out %t/a.o %t/b.o \ +# RUN: --bp-compression-sort-section="__DATA*" \ +# RUN: --bp-compression-sort-section="__DATA__custom=0=1" \ +# RUN: --verbose-bp-section-orderer 2>&1 | FileCheck %s --check-prefix=MATCH + +# MATCH: Compression groups: 2 +# MATCH: __DATA*: 4 sections +# MATCH: __DATA__custom: 2 sections + +#--- a.s + .text + .globl _main +_main: + ret + + .data +data_01: + .ascii "data_01" +data_02: + .ascii "data_02" +data_03: + .ascii "data_03" + + .section __DATA,__custom +custom_06: + .ascii "custom_06" +custom_07: + .ascii "custom_07" + + .bss +bss0: + .zero 10 + +.subsections_via_symbols + +#--- b.s + .data +data_11: + .ascii "data_11" + +.subsections_via_symbols diff --git a/wild/tests/lld-macho/cstring-tailmerge-objc.s b/wild/tests/lld-macho/cstring-tailmerge-objc.s new file mode 100644 index 000000000..46b2bbf9d --- /dev/null +++ b/wild/tests/lld-macho/cstring-tailmerge-objc.s @@ -0,0 +1,144 @@ +; REQUIRES: aarch64 +; RUN: rm -rf %t && split-file %s %t + +; Test that ObjC method names are tail merged and +; ObjCSelRefsHelper::makeSelRef() still works correctly + +; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o +; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/a.o -o %t/a +; RUN: llvm-objdump --macho --section="__TEXT,__objc_methname" %t/a | FileCheck %s --implicit-check-not=error + +; RUN: %lld -dylib -arch arm64 --no-tail-merge-strings %t/a.o -o %t/nomerge +; RUN: llvm-objdump --macho --section="__TEXT,__objc_methname" %t/nomerge | FileCheck %s --check-prefixes=CHECK,NOMERGE --implicit-check-not=error + +; CHECK: withBar:error: +; NOMERGE: error: + +;--- a.mm +__attribute__((objc_root_class)) +@interface Foo +- (void)withBar:(int)bar error:(int)error; +- (void)error:(int)error; +@end + +@implementation Foo +- (void)withBar:(int)bar error:(int)error {} +- (void)error:(int)error {} +@end + +void *_objc_empty_cache; +void *_objc_empty_vtable; +;--- gen +clang -Oz -target arm64-apple-darwin a.mm -S -o - +;--- a.s + .build_version macos, 11, 0 + .section __TEXT,__text,regular,pure_instructions + .p2align 2 ; -- Begin function -[Foo withBar:error:] +"-[Foo withBar:error:]": ; @"\01-[Foo withBar:error:]" + .cfi_startproc +; %bb.0: + ret + .cfi_endproc + ; -- End function + .p2align 2 ; -- Begin function -[Foo error:] +"-[Foo error:]": ; @"\01-[Foo error:]" + .cfi_startproc +; %bb.0: + ret + .cfi_endproc + ; -- End function + .globl __objc_empty_vtable ; @_objc_empty_vtable +.zerofill __DATA,__common,__objc_empty_vtable,8,3 + .section __DATA,__objc_data + .globl _OBJC_CLASS_$_Foo ; @"OBJC_CLASS_$_Foo" + .p2align 3, 0x0 +_OBJC_CLASS_$_Foo: + .quad _OBJC_METACLASS_$_Foo + .quad 0 + .quad __objc_empty_cache + .quad __objc_empty_vtable + .quad __OBJC_CLASS_RO_$_Foo + + .globl _OBJC_METACLASS_$_Foo ; @"OBJC_METACLASS_$_Foo" + .p2align 3, 0x0 +_OBJC_METACLASS_$_Foo: + .quad _OBJC_METACLASS_$_Foo + .quad _OBJC_CLASS_$_Foo + .quad __objc_empty_cache + .quad __objc_empty_vtable + .quad __OBJC_METACLASS_RO_$_Foo + + .section __TEXT,__objc_classname,cstring_literals +l_OBJC_CLASS_NAME_: ; @OBJC_CLASS_NAME_ + .asciz "Foo" + + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_METACLASS_RO_$_Foo" +__OBJC_METACLASS_RO_$_Foo: + .long 3 ; 0x3 + .long 40 ; 0x28 + .long 40 ; 0x28 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .quad 0 + + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_: ; @OBJC_METH_VAR_NAME_ + .asciz "withBar:error:" + + .section __TEXT,__objc_methtype,cstring_literals +l_OBJC_METH_VAR_TYPE_: ; @OBJC_METH_VAR_TYPE_ + .asciz "v24@0:8i16i20" + + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_.1: ; @OBJC_METH_VAR_NAME_.1 + .asciz "error:" + + .section __TEXT,__objc_methtype,cstring_literals +l_OBJC_METH_VAR_TYPE_.2: ; @OBJC_METH_VAR_TYPE_.2 + .asciz "v20@0:8i16" + + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_INSTANCE_METHODS_Foo" +__OBJC_$_INSTANCE_METHODS_Foo: + .long 24 ; 0x18 + .long 2 ; 0x2 + .quad l_OBJC_METH_VAR_NAME_ + .quad l_OBJC_METH_VAR_TYPE_ + .quad "-[Foo withBar:error:]" + .quad l_OBJC_METH_VAR_NAME_.1 + .quad l_OBJC_METH_VAR_TYPE_.2 + .quad "-[Foo error:]" + + .p2align 3, 0x0 ; @"_OBJC_CLASS_RO_$_Foo" +__OBJC_CLASS_RO_$_Foo: + .long 2 ; 0x2 + .long 0 ; 0x0 + .long 0 ; 0x0 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad __OBJC_$_INSTANCE_METHODS_Foo + .quad 0 + .quad 0 + .quad 0 + .quad 0 + + .globl __objc_empty_cache ; @_objc_empty_cache +.zerofill __DATA,__common,__objc_empty_cache,8,3 + .section __DATA,__objc_classlist,regular,no_dead_strip + .p2align 3, 0x0 ; @"OBJC_LABEL_CLASS_$" +l_OBJC_LABEL_CLASS_$: + .quad _OBJC_CLASS_$_Foo + + .section __DATA,__objc_imageinfo,regular,no_dead_strip +L_OBJC_IMAGE_INFO: + .long 0 + .long 64 + +.subsections_via_symbols diff --git a/wild/tests/lld-macho/cstring-tailmerge.s b/wild/tests/lld-macho/cstring-tailmerge.s new file mode 100644 index 000000000..740f971eb --- /dev/null +++ b/wild/tests/lld-macho/cstring-tailmerge.s @@ -0,0 +1,85 @@ +; REQUIRES: aarch64 +; RUN: rm -rf %t && split-file %s %t + +; RUN: sed "s//0/g" %t/align.s.template > %t/align-1.s +; RUN: sed "s//1/g" %t/align.s.template > %t/align-2.s +; RUN: sed "s//2/g" %t/align.s.template > %t/align-4.s + +; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/first.s -o %t/first.o +; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-1.s -o %t/align-1.o +; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-2.s -o %t/align-2.o +; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/align-4.s -o %t/align-4.o + +; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/first.o %t/align-1.o -o %t/align-1 +; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-1 | FileCheck %s --check-prefixes=CHECK,ALIGN1 + +; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/first.o %t/align-2.o -o %t/align-2 +; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-2 | FileCheck %s --check-prefixes=CHECK,ALIGN2 + +; RUN: %lld -dylib -arch arm64 --tail-merge-strings %t/first.o %t/align-4.o -o %t/align-4 +; RUN: llvm-objdump --macho --section="__TEXT,__cstring" --syms %t/align-4 | FileCheck %s --check-prefixes=CHECK,ALIGN4 + +; CHECK: Contents of (__TEXT,__cstring) section +; CHECK: [[#%.16x,START:]] get awkward offset{{$}} + +; ALIGN1: [[#%.16x,START+19]] myotherlongstr{{$}} +; ALIGN1: [[#%.16x,START+19+15]] otherstr{{$}} + +; ALIGN2: [[#%.16x,START+20]] myotherlongstr{{$}} +; ALIGN2: [[#%.16x,START+20+16]] longstr{{$}} +; ALIGN2: [[#%.16x,START+20+16+8]] otherstr{{$}} +; ALIGN2: [[#%.16x,START+20+16+8+10]] str{{$}} + +; ALIGN4: [[#%.16x,START+20]] myotherlongstr{{$}} +; ALIGN4: [[#%.16x,START+20+16]] otherlongstr{{$}} +; ALIGN4: [[#%.16x,START+20+16+16]] longstr{{$}} +; ALIGN4: [[#%.16x,START+20+16+16+8]] otherstr{{$}} +; ALIGN4: [[#%.16x,START+20+16+16+8+12]] str{{$}} + +; CHECK: SYMBOL TABLE: + +; ALIGN1: [[#%.16x,START+19]] l O __TEXT,__cstring _myotherlongstr +; ALIGN1: [[#%.16x,START+21]] l O __TEXT,__cstring _otherlongstr +; ALIGN1: [[#%.16x,START+26]] l O __TEXT,__cstring _longstr +; ALIGN1: [[#%.16x,START+34]] l O __TEXT,__cstring _otherstr +; ALIGN1: [[#%.16x,START+39]] l O __TEXT,__cstring _str + +; ALIGN2: [[#%.16x,START+20]] l O __TEXT,__cstring _myotherlongstr +; ALIGN2: [[#%.16x,START+20+2]] l O __TEXT,__cstring _otherlongstr +; ALIGN2: [[#%.16x,START+20+16]] l O __TEXT,__cstring _longstr +; ALIGN2: [[#%.16x,START+20+16+8]] l O __TEXT,__cstring _otherstr +; ALIGN2: [[#%.16x,START+20+16+8+10]] l O __TEXT,__cstring _str + +; ALIGN4: [[#%.16x,START+20]] l O __TEXT,__cstring _myotherlongstr +; ALIGN4: [[#%.16x,START+20+16]] l O __TEXT,__cstring _otherlongstr +; ALIGN4: [[#%.16x,START+20+16+16]] l O __TEXT,__cstring _longstr +; ALIGN4: [[#%.16x,START+20+16+16+8]] l O __TEXT,__cstring _otherstr +; ALIGN4: [[#%.16x,START+20+16+16+8+12]] l O __TEXT,__cstring _str + +;--- first.s +.cstring +.p2align 2 +.asciz "get awkward offset" ; length = 19 + +;--- align.s.template +.cstring + +.p2align + _myotherlongstr: +.asciz "myotherlongstr" ; length = 15 + +.p2align + _otherlongstr: +.asciz "otherlongstr" ; length = 13, tail offset = 2 + +.p2align + _longstr: +.asciz "longstr" ; length = 8, tail offset = 7 + +.p2align + _otherstr: +.asciz "otherstr" ; length = 9 + +.p2align + _str: +.asciz "str" ; length = 4, tail offset = 5 diff --git a/wild/tests/lld-macho/dead-strip.s b/wild/tests/lld-macho/dead-strip.s new file mode 100644 index 000000000..d107dad53 --- /dev/null +++ b/wild/tests/lld-macho/dead-strip.s @@ -0,0 +1,1014 @@ +# REQUIRES: x86, llvm-64-bits + +# RUN: rm -rf %t; split-file %s %t + +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/basics.s -o %t/basics.o + +## Check that .private_extern symbols are marked as local in the symbol table +## and aren't in the export trie. +# RUN: %lld -lSystem -dead_strip -map %t/map -u _ref_private_extern_u \ +# RUN: %t/basics.o -o %t/basics +# RUN: llvm-objdump --syms --section-headers %t/basics | \ +# RUN: FileCheck --check-prefix=EXEC --implicit-check-not _unref %s +# RUN: llvm-objdump --macho --section=__DATA,__ref_section \ +# RUN: --exports-trie --indirect-symbols %t/basics | \ +# RUN: FileCheck --check-prefix=EXECDATA --implicit-check-not _unref %s +# RUN: llvm-otool -l %t/basics | grep -q 'segname __PAGEZERO' +# EXEC-LABEL: Sections: +# EXEC-LABEL: Name +# EXEC-NEXT: __text +# EXEC-NEXT: __got +# EXEC-NEXT: __ref_section +# EXEC-NEXT: __common +# EXEC-LABEL: SYMBOL TABLE: +# EXEC-DAG: l {{.*}} _ref_data +# EXEC-DAG: l {{.*}} _ref_local +# EXEC-DAG: l {{.*}} _ref_from_no_dead_strip_globl +# EXEC-DAG: l {{.*}} _no_dead_strip_local +# EXEC-DAG: l {{.*}} _ref_from_no_dead_strip_local +# EXEC-DAG: l {{.*}} _ref_private_extern_u +# EXEC-DAG: l {{.*}} _main +# EXEC-DAG: l {{.*}} _ref_private_extern +# EXEC-DAG: g {{.*}} _no_dead_strip_globl +# EXEC-DAG: g {{.*}} _ref_com +# EXEC-DAG: g {{.*}} __mh_execute_header +# EXECDATA-LABEL: Indirect symbols +# EXECDATA-NEXT: name +# EXECDATA-NEXT: LOCAL +# EXECDATA-LABEL: Contents of (__DATA,__ref_section) section +# EXECDATA-NEXT: 04 00 00 00 00 00 00 00 05 00 00 00 00 00 00 00 +# EXECDATA-LABEL: Exports trie: +# EXECDATA-DAG: _ref_com +# EXECDATA-DAG: _no_dead_strip_globl +# EXECDATA-DAG: __mh_execute_header + +## Check that dead stripped symbols get listed properly. +# RUN: FileCheck --check-prefix=MAP %s < %t/map + +# MAP: _main +# MAP-LABEL: Dead Stripped Symbols +# MAP-DAG: <> 0x00000001 [ 2] _unref_com +# MAP-DAG: <> 0x00000008 [ 2] _unref_data +# MAP-DAG: <> 0x00000006 [ 2] _unref_extern +# MAP-DAG: <> 0x00000001 [ 2] _unref_local +# MAP-DAG: <> 0x00000007 [ 2] _unref_private_extern +# MAP-DAG: <> 0x00000008 [ 2] l_unref_data + +## Run dead stripping on code without any dead symbols. +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/no-dead-symbols.s -o %t/no-dead-symbols.o +# RUN: %lld -lSystem -dead_strip -map %t/no-dead-symbols-map \ +# RUN: %t/no-dead-symbols.o -o %t/no-dead-symbols +## Mark the end of the file with a string. +# RUN: FileCheck --check-prefix=NODEADSYMBOLS %s < %t/no-dead-symbols-map + +# NODEADSYMBOLS-LABEL: # Symbols: +# NODEADSYMBOLS-NEXT: # Address Size File Name +# NODEADSYMBOLS-NEXT: _main +# NODEADSYMBOLS-LABEL: # Dead Stripped Symbols: +# NODEADSYMBOLS-NEXT: # Size File Name +# NODEADSYMBOLS-EMPTY: + +# RUN: %lld -dylib -dead_strip -u _ref_private_extern_u %t/basics.o -o %t/basics.dylib +# RUN: llvm-objdump --syms %t/basics.dylib | \ +# RUN: FileCheck --check-prefix=DYLIB --implicit-check-not _unref %s +# RUN: %lld -bundle -dead_strip -u _ref_private_extern_u %t/basics.o -o %t/basics.dylib +# RUN: llvm-objdump --syms %t/basics.dylib | \ +# RUN: FileCheck --check-prefix=DYLIB --implicit-check-not _unref %s +# DYLIB-LABEL: SYMBOL TABLE: +# DYLIB-DAG: l {{.*}} _ref_data +# DYLIB-DAG: l {{.*}} _ref_local +# DYLIB-DAG: l {{.*}} _ref_from_no_dead_strip_globl +# DYLIB-DAG: l {{.*}} _no_dead_strip_local +# DYLIB-DAG: l {{.*}} _ref_from_no_dead_strip_local +# DYLIB-DAG: l {{.*}} _ref_private_extern_u +# DYLIB-DAG: l {{.*}} _ref_private_extern +# DYLIB-DAG: g {{.*}} _ref_com +# DYLIB-DAG: g {{.*}} _unref_com +# DYLIB-DAG: g {{.*}} _unref_extern +# DYLIB-DAG: g {{.*}} _no_dead_strip_globl + +## Extern symbols aren't stripped from executables with -export_dynamic +# RUN: %lld -lSystem -dead_strip -export_dynamic -u _ref_private_extern_u \ +# RUN: %t/basics.o -o %t/basics-export-dyn +# RUN: llvm-objdump --syms --section-headers %t/basics-export-dyn | \ +# RUN: FileCheck --check-prefix=EXECDYN %s +# EXECDYN-LABEL: Sections: +# EXECDYN-LABEL: Name +# EXECDYN-NEXT: __text +# EXECDYN-NEXT: __got +# EXECDYN-NEXT: __ref_section +# EXECDYN-NEXT: __common +# EXECDYN-LABEL: SYMBOL TABLE: +# EXECDYN-DAG: l {{.*}} _ref_data +# EXECDYN-DAG: l {{.*}} _ref_local +# EXECDYN-DAG: l {{.*}} _ref_from_no_dead_strip_globl +# EXECDYN-DAG: l {{.*}} _no_dead_strip_local +# EXECDYN-DAG: l {{.*}} _ref_from_no_dead_strip_local +# EXECDYN-DAG: l {{.*}} _ref_private_extern_u +# EXECDYN-DAG: l {{.*}} _main +# EXECDYN-DAG: l {{.*}} _ref_private_extern +# EXECDYN-DAG: g {{.*}} _ref_com +# EXECDYN-DAG: g {{.*}} _unref_com +# EXECDYN-DAG: g {{.*}} _unref_extern +# EXECDYN-DAG: g {{.*}} _no_dead_strip_globl +# EXECDYN-DAG: g {{.*}} __mh_execute_header + +## Absolute symbol handling. +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/abs.s -o %t/abs.o +# RUN: %lld -lSystem -dead_strip %t/abs.o -o %t/abs +# RUN: llvm-objdump --macho --syms --exports-trie %t/abs | \ +# RUN: FileCheck --check-prefix=ABS %s +#ABS-LABEL: SYMBOL TABLE: +#ABS-NEXT: g {{.*}} _main +#ABS-NEXT: g *ABS* _abs1 +#ABS-NEXT: g {{.*}} __mh_execute_header +#ABS-LABEL: Exports trie: +#ABS-NEXT: __mh_execute_header +#ABS-NEXT: _main +#ABS-NEXT: _abs1 [absolute] + +## Check that symbols from -exported_symbol(s_list) are preserved. +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/exported-symbol.s -o %t/exported-symbol.o +# RUN: %lld -lSystem -dead_strip -exported_symbol _my_exported_symbol \ +# RUN: %t/exported-symbol.o -o %t/exported-symbol +# RUN: llvm-objdump --syms %t/exported-symbol | \ +# RUN: FileCheck --check-prefix=EXPORTEDSYMBOL --implicit-check-not _unref %s +# EXPORTEDSYMBOL-LABEL: SYMBOL TABLE: +# EXPORTEDSYMBOL-NEXT: l {{.*}} _main +# EXPORTEDSYMBOL-NEXT: l {{.*}} __mh_execute_header +# EXPORTEDSYMBOL-NEXT: g {{.*}} _my_exported_symbol + +## Check that mod_init_funcs and mod_term_funcs are not stripped. +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/mod-funcs.s -o %t/mod-funcs.o +# RUN: %lld -lSystem -dead_strip %t/mod-funcs.o -o %t/mod-funcs +# RUN: llvm-objdump --syms %t/mod-funcs | \ +# RUN: FileCheck --check-prefix=MODFUNCS --implicit-check-not _unref %s +# MODFUNCS-LABEL: SYMBOL TABLE: +# MODFUNCS-NEXT: l {{.*}} _ref_from_init +# MODFUNCS-NEXT: l {{.*}} _ref_init +# MODFUNCS-NEXT: l {{.*}} _ref_from_term +# MODFUNCS-NEXT: l {{.*}} _ref_term +# MODFUNCS-NEXT: g {{.*}} _main +# MODFUNCS-NEXT: g {{.*}} __mh_execute_header + +## Check that DylibSymbols in dead subsections are stripped: They should +## not be in the import table and should have no import stubs. +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/dylib.s -o %t/dylib.o +# RUN: %lld -dylib -dead_strip %t/dylib.o -o %t/dylib.dylib +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/strip-dylib-ref.s -o %t/strip-dylib-ref.o +# RUN: %lld -lSystem -dead_strip %t/strip-dylib-ref.o %t/dylib.dylib \ +# RUN: -o %t/strip-dylib-ref -U _ref_undef_fun -U _unref_undef_fun +# RUN: llvm-objdump --syms --bind --lazy-bind --weak-bind %t/strip-dylib-ref | \ +# RUN: FileCheck --check-prefix=STRIPDYLIB --implicit-check-not _unref %s +# STRIPDYLIB: SYMBOL TABLE: +# STRIPDYLIB-NEXT: l {{.*}} __dyld_private +# STRIPDYLIB-NEXT: g {{.*}} _main +# STRIPDYLIB-NEXT: g {{.*}} __mh_execute_header +# STRIPDYLIB-NEXT: *UND* dyld_stub_binder +# STRIPDYLIB-NEXT: *UND* _ref_dylib_fun +# STRIPDYLIB-NEXT: *UND* _ref_undef_fun +# STRIPDYLIB: Bind table: +# STRIPDYLIB: Lazy bind table: +# STRIPDYLIB: __DATA __la_symbol_ptr {{.*}} flat-namespace _ref_undef_fun +# STRIPDYLIB: __DATA __la_symbol_ptr {{.*}} dylib _ref_dylib_fun +# STRIPDYLIB: Weak bind table: +## Stubs smoke check: There should be two stubs entries, not four, but we +## don't verify that they belong to _ref_undef_fun and _ref_dylib_fun. +# RUN: llvm-objdump -d --section=__stubs --section=__stub_helper \ +# RUN: %t/strip-dylib-ref |FileCheck --check-prefix=STUBS %s +# STUBS-LABEL: <__stubs>: +# STUBS-NEXT: jmpq +# STUBS-NEXT: jmpq +# STUBS-NOT: jmpq +# STUBS-LABEL: <__stub_helper>: +# STUBS: pushq $0 +# STUBS: jmp +# STUBS: jmp +# STUBS-NOT: jmp +## An undefined symbol referenced from a dead-stripped function shouldn't +## produce a diagnostic: +# RUN: %lld -lSystem -dead_strip %t/strip-dylib-ref.o %t/dylib.dylib \ +# RUN: -o %t/strip-dylib-ref -U _ref_undef_fun + +## Check that referenced undefs are kept with -undefined dynamic_lookup. +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/ref-undef.s -o %t/ref-undef.o +# RUN: %lld -lSystem -dead_strip %t/ref-undef.o \ +# RUN: -o %t/ref-undef -undefined dynamic_lookup +# RUN: llvm-objdump --syms --lazy-bind %t/ref-undef | \ +# RUN: FileCheck --check-prefix=STRIPDYNLOOKUP %s +# STRIPDYNLOOKUP: SYMBOL TABLE: +# STRIPDYNLOOKUP: *UND* _ref_undef_fun +# STRIPDYNLOOKUP: Lazy bind table: +# STRIPDYNLOOKUP: __DATA __la_symbol_ptr {{.*}} flat-namespace _ref_undef_fun + +## S_ATTR_LIVE_SUPPORT tests. +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/live-support.s -o %t/live-support.o +# RUN: %lld -lSystem -dead_strip %t/live-support.o %t/dylib.dylib \ +# RUN: -U _ref_undef_fun -U _unref_undef_fun -o %t/live-support +# RUN: llvm-objdump --syms %t/live-support | \ +# RUN: FileCheck --check-prefix=LIVESUPP --implicit-check-not _unref %s +# LIVESUPP-LABEL: SYMBOL TABLE: +# LIVESUPP-NEXT: l {{.*}} _ref_ls_fun_fw +# LIVESUPP-NEXT: l {{.*}} _ref_ls_fun_bw +# LIVESUPP-NEXT: l {{.*}} _ref_ls_dylib_fun +# LIVESUPP-NEXT: l {{.*}} _ref_ls_undef_fun +# LIVESUPP-NEXT: l {{.*}} __dyld_private +# LIVESUPP-NEXT: g {{.*}} _main +# LIVESUPP-NEXT: g {{.*}} _bar +# LIVESUPP-NEXT: g {{.*}} _foo +# LIVESUPP-NEXT: g {{.*}} __mh_execute_header +# LIVESUPP-NEXT: *UND* dyld_stub_binder +# LIVESUPP-NEXT: *UND* _ref_dylib_fun +# LIVESUPP-NEXT: *UND* _ref_undef_fun + +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/live-support-iterations.s -o %t/live-support-iterations.o +# RUN: %lld -lSystem -dead_strip %t/live-support-iterations.o \ +# RUN: -o %t/live-support-iterations +# RUN: llvm-objdump --syms %t/live-support-iterations | \ +# RUN: FileCheck --check-prefix=LIVESUPP2 --implicit-check-not _unref %s +# LIVESUPP2-LABEL: SYMBOL TABLE: +# LIVESUPP2-NEXT: l {{.*}} _bar +# LIVESUPP2-NEXT: l {{.*}} _foo_refd +# LIVESUPP2-NEXT: l {{.*}} _bar_refd +# LIVESUPP2-NEXT: l {{.*}} _baz +# LIVESUPP2-NEXT: l {{.*}} _baz_refd +# LIVESUPP2-NEXT: l {{.*}} _foo +# LIVESUPP2-NEXT: g {{.*}} _main +# LIVESUPP2-NEXT: g {{.*}} __mh_execute_header + +## Dead stripping should not remove the __TEXT,__unwind_info +## and __TEXT,__gcc_except_tab functions, but it should still +## remove the unreferenced function __Z5unref. +## The reference to ___gxx_personality_v0 should also not be +## stripped. +## (Need to use darwin19.0.0 to make -mc emit __LD,__compact_unwind.) +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin19.0.0 \ +# RUN: %t/unwind.s -o %t/unwind.o +# RUN: %lld -lc++ -lSystem -dead_strip %t/unwind.o -o %t/unwind +# RUN: llvm-objdump --syms %t/unwind | \ +# RUN: FileCheck --check-prefix=UNWIND --implicit-check-not unref %s +# RUN: llvm-otool -l %t/unwind | FileCheck --check-prefix=UNWINDSECT %s +# UNWINDSECT-DAG: sectname __unwind_info +# UNWINDSECT-DAG: sectname __gcc_except_tab +# UNWIND-LABEL: SYMBOL TABLE: +# UNWIND-NEXT: l O __TEXT,__gcc_except_tab GCC_except_table1 +# UNWIND-NEXT: l O __DATA,__data __dyld_private +# UNWIND-NEXT: g F __TEXT,__text _main +# UNWIND-NEXT: g F __TEXT,__text __mh_execute_header +# UNWIND-NEXT: *UND* dyld_stub_binder +# UNWIND-NEXT: *UND* __ZTIi +# UNWIND-NEXT: *UND* ___cxa_allocate_exception +# UNWIND-NEXT: *UND* ___cxa_begin_catch +# UNWIND-NEXT: *UND* ___cxa_end_catch +# UNWIND-NEXT: *UND* ___cxa_throw +# UNWIND-NEXT: *UND* ___gxx_personality_v0 +# UNWIND-NOT: GCC_except_table0 + +## If a dead stripped function has a strong ref to a dylib symbol but +## a live function only a weak ref, the dylib is still not a WEAK_DYLIB. +## This matches ld64. +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/weak-ref.s -o %t/weak-ref.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/strong-dead-ref.s -o %t/strong-dead-ref.o +# RUN: %lld -lSystem -dead_strip %t/weak-ref.o %t/strong-dead-ref.o \ +# RUN: %t/dylib.dylib -o %t/weak-ref +# RUN: llvm-otool -l %t/weak-ref | FileCheck -DDIR=%t --check-prefix=WEAK %s +# WEAK: cmd LC_LOAD_DYLIB +# WEAK-NEXT: cmdsize +# WEAK-NEXT: name /usr/lib/libSystem.dylib +# WEAK: cmd LC_LOAD_DYLIB +# WEAK-NEXT: cmdsize +# WEAK-NEXT: name [[DIR]]/dylib.dylib + +## A strong symbol that would override a weak import does not emit the +## "this overrides a weak import" opcode if it is dead-stripped. +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/weak-dylib.s -o %t/weak-dylib.o +# RUN: %lld -dylib -dead_strip %t/weak-dylib.o -o %t/weak-dylib.dylib +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/dead-weak-override.s -o %t/dead-weak-override.o +# RUN: %lld -dead_strip %t/dead-weak-override.o %t/weak-dylib.dylib \ +# RUN: -o %t/dead-weak-override +# RUN: llvm-objdump --macho --weak-bind --private-header \ +# RUN: %t/dead-weak-override | FileCheck --check-prefix=DEADWEAK %s +# DEADWEAK-NOT: WEAK_DEFINES +# DEADWEAK: Weak bind table: +# DEADWEAK: segment section address type addend symbol +# DEADWEAK-NOT: strong _weak_in_dylib + +## Stripped symbols should not be in the debug info stabs entries. +# RUN: llvm-mc -g -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/debug.s -o %t/debug.o +# RUN: %lld -lSystem -dead_strip %t/debug.o -o %t/debug +# RUN: dsymutil -s %t/debug | FileCheck --check-prefix=EXECSTABS %s +# EXECSTABS-NOT: N_FUN {{.*}} '_unref' +# EXECSTABS: N_FUN {{.*}} '_main' +# EXECSTABS-NOT: N_FUN {{.*}} '_unref' + +# RUN: llvm-mc -g -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/literals.s -o %t/literals.o +# RUN: %lld -dylib -dead_strip %t/literals.o -o %t/literals +# RUN: llvm-objdump --macho --section="__TEXT,__cstring" --section="__DATA,str_ptrs" \ +# RUN: --section="__TEXT,__literals" %t/literals | FileCheck %s --check-prefix=LIT +# LIT: Contents of (__TEXT,__cstring) section +# LIT-NEXT: foobar +# LIT-NEXT: Contents of (__DATA,str_ptrs) section +# LIT-NEXT: __TEXT:__cstring:bar +# LIT-NEXT: __TEXT:__cstring:bar +# LIT-NEXT: Contents of (__TEXT,__literals) section +# LIT-NEXT: ef be ad de {{$}} + +## Ensure that addrsig metadata does not keep unreferenced functions alive. +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/addrsig.s -o %t/addrsig.o +# RUN: %lld -lSystem -dead_strip --icf=safe %t/addrsig.o -o %t/addrsig +# RUN: llvm-objdump --syms %t/addrsig | \ +# RUN: FileCheck --check-prefix=ADDSIG --implicit-check-not _addrsig %s +# ADDSIG-LABEL: SYMBOL TABLE: +# ADDSIG-NEXT: g F __TEXT,__text _main +# ADDSIG-NEXT: g F __TEXT,__text __mh_execute_header +# ADDSIG-NEXT: *UND* dyld_stub_binder + +## Duplicate symbols that will be dead stripped later should not fail when using +## the --dead-stripped-duplicates flag +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/duplicate1.s -o %t/duplicate1.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos \ +# RUN: %t/duplicate2.s -o %t/duplicate2.o +# RUN: %lld -lSystem -dead_strip --dead-strip-duplicates -map %t/stripped-duplicate-map \ +# RUN: %t/duplicate1.o %t/duplicate2.o -o %t/duplicate +# RUN: llvm-objdump --syms %t/duplicate | FileCheck %s --check-prefix=DUP +# DUP-LABEL: SYMBOL TABLE: +# DUP-NEXT: g F __TEXT,__text _main +# DUP-NEXT: g F __TEXT,__text __mh_execute_header +# DUP-NEXT: *UND* dyld_stub_binder + +## Check that the duplicate dead stripped symbols get listed properly. +# RUN: FileCheck --check-prefix=DUPMAP %s < %t/stripped-duplicate-map +# DUPMAP: _main +# DUPMAP-LABEL: Dead Stripped Symbols +# DUPMAP: <> 0x00000001 [ 3] _foo + +#--- duplicate1.s +.text +.globl _main, _foo +_foo: + retq + +_main: + retq + +.subsections_via_symbols + +#--- duplicate2.s +.text +.globl _foo +_foo: + retq + +.subsections_via_symbols + +#--- basics.s +.comm _ref_com, 1 +.comm _unref_com, 1 + +.section __DATA,__unref_section +_unref_data: + .quad 4 + +l_unref_data: + .quad 5 + +## Referenced by no_dead_strip == S_ATTR_NO_DEAD_STRIP +.section __DATA,__ref_section,regular,no_dead_strip + +## Referenced because in no_dead_strip section. +_ref_data: + .quad 4 + +## This is a local symbol so it's not in the symbol table, but +## it is still in the section data. +l_ref_data: + .quad 5 + +.text + +# Exported symbols should not be stripped from dylibs +# or bundles, but they should be stripped from executables. +.globl _unref_extern +_unref_extern: + callq _ref_local + retq + +# Unreferenced local symbols should be stripped. +_unref_local: + retq + +# Same for unreferenced private externs. +.globl _unref_private_extern +.private_extern _unref_private_extern +_unref_private_extern: + # This shouldn't create an indirect symbol since it's + # a reference from a dead function. + movb _unref_com@GOTPCREL(%rip), %al + retq + +# Referenced local symbols should not be stripped. +_ref_local: + callq _ref_private_extern + retq + +# Same for referenced private externs. +# This one is referenced by a relocation. +.globl _ref_private_extern +.private_extern _ref_private_extern +_ref_private_extern: + retq + +# This one is referenced by a -u flag. +.globl _ref_private_extern_u +.private_extern _ref_private_extern_u +_ref_private_extern_u: + retq + +# Entry point should not be stripped for executables, even if hidden. +# For shared libraries this is stripped since it's just a regular hidden +# symbol there. +.globl _main +.private_extern _main +_main: + movb _ref_com@GOTPCREL(%rip), %al + callq _ref_local + retq + +# Things marked no_dead_strip should not be stripped either. +# (clang emits this e.g. for `__attribute__((used))` globals.) +# Both for .globl symbols... +.globl _no_dead_strip_globl +.no_dead_strip _no_dead_strip_globl +_no_dead_strip_globl: + callq _ref_from_no_dead_strip_globl + retq +_ref_from_no_dead_strip_globl: + retq + +# ...and for locals. +.no_dead_strip _no_dead_strip_local +_no_dead_strip_local: + callq _ref_from_no_dead_strip_local + retq +_ref_from_no_dead_strip_local: + retq + +.subsections_via_symbols + +#--- exported-symbol.s +.text + +.globl _unref_symbol +_unref_symbol: + retq + +.globl _my_exported_symbol +_my_exported_symbol: + retq + +.globl _main +_main: + retq + +.subsections_via_symbols + +#--- abs.s +.globl _abs1, _abs2, _abs3 + +.no_dead_strip _abs1 +_abs1 = 1 +_abs2 = 2 +_abs3 = 3 + +.section __DATA,__foo,regular,no_dead_strip +# Absolute symbols are not in a section, so the no_dead_strip +# on the section above has no effect. +.globl _abs4 +_abs4 = 4 + +.text +.globl _main +_main: + # This is relaxed away, so there's no relocation here and + # _abs3 isn't in the exported symbol table. + mov _abs3, %rax + retq + +.subsections_via_symbols + +#--- mod-funcs.s +## Roughly based on `clang -O2 -S` output for `struct A { A(); ~A(); }; A a;` +## for mod_init_funcs. mod_term_funcs then similar to that. +.section __TEXT,__StaticInit,regular,pure_instructions + +__unref: + retq + +_ref_from_init: + retq + +_ref_init: + callq _ref_from_init + retq + +_ref_from_term: + retq + +_ref_term: + callq _ref_from_term + retq + +.globl _main +_main: + retq + +.section __DATA,__mod_init_func,mod_init_funcs +.quad _ref_init + +.section __DATA,__mod_term_func,mod_term_funcs +.quad _ref_term + +.subsections_via_symbols + +#--- dylib.s +.text + +.globl _ref_dylib_fun +_ref_dylib_fun: + retq + +.globl _unref_dylib_fun +_unref_dylib_fun: + retq + +.subsections_via_symbols + +#--- strip-dylib-ref.s +.text + +_unref: + callq _ref_dylib_fun + callq _unref_dylib_fun + callq _ref_undef_fun + callq _unref_undef_fun + retq + +.globl _main +_main: + callq _ref_dylib_fun + callq _ref_undef_fun + retq + +.subsections_via_symbols + +#--- live-support.s +## In practice, live_support is used for instruction profiling +## data and asan. (Also for __eh_frame, but that needs special handling +## in the linker anyways.) +## This test isn't based on anything happening in real code though. +.section __TEXT,__ref_ls_fw,regular,live_support +_ref_ls_fun_fw: + # This is called by _main and is kept alive by normal + # forward liveness propagation, The live_support attribute + # does nothing in this case. + retq + +.section __TEXT,__unref_ls_fw,regular,live_support +_unref_ls_fun_fw: + retq + +.section __TEXT,__ref_ls_bw,regular,live_support +_ref_ls_fun_bw: + # This _calls_ something that's alive but isn't referenced itself. This is + # kept alive only due to this being in a live_support section. + callq _foo + + # _bar on the other hand is kept alive since it's called from here. + callq _bar + retq + +## Kept alive by a live symbol form a dynamic library. +_ref_ls_dylib_fun: + callq _ref_dylib_fun + retq + +## Kept alive by a live undefined symbol. +_ref_ls_undef_fun: + callq _ref_undef_fun + retq + +## All symbols in this live_support section reference dead symbols +## and are hence dead themselves. +.section __TEXT,__unref_ls_bw,regular,live_support +_unref_ls_fun_bw: + callq _unref + retq + +_unref_ls_dylib_fun_bw: + callq _unref_dylib_fun + retq + +_unref_ls_undef_fun_bw: + callq _unref_undef_fun + retq + +.text +.globl _unref +_unref: + retq + +.globl _bar +_bar: + retq + +.globl _foo +_foo: + callq _ref_ls_fun_fw + retq + +.globl _main +_main: + callq _ref_ls_fun_fw + callq _foo + callq _ref_dylib_fun + callq _ref_undef_fun + retq + +.subsections_via_symbols + +#--- live-support-iterations.s +.section __TEXT,_ls,regular,live_support + +## This is a live_support subsection that only becomes +## live after _foo below is processed. This means the algorithm of +## 1. mark things reachable from gc roots live +## 2. go through live sections and mark the ones live pointing to +## live symbols or sections +## needs more than one iteration, since _bar won't be live when step 2 +## runs for the first time. +## (ld64 gets this wrong -- it has different output based on if _bar is +## before _foo or after it.) +_bar: + callq _foo_refd + callq _bar_refd + retq + +## Same here. This is maybe more interesting since it references a live_support +## symbol instead of a "normal" symbol. +_baz: + callq _foo_refd + callq _baz_refd + retq + +_foo: + callq _main + callq _foo_refd + retq + +## Test no_dead_strip on a symbol in a live_support section. +## ld64 ignores this, but that doesn't look intentional. So lld honors it. +.no_dead_strip +_quux: + retq + + +.text +.globl _main +_main: + movq $0, %rax + retq + +_foo_refd: + retq + +_bar_refd: + retq + +_baz_refd: + retq + +.subsections_via_symbols + +#--- unwind.s +## This is the output of `clang -O2 -S throw.cc` where throw.cc +## looks like this: +## int unref() { +## try { +## throw 0; +## } catch (int i) { +## return i + 1; +## } +## } +## int main() { +## try { +## throw 0; +## } catch (int i) { +## return i; +## } +## } +.section __TEXT,__text,regular,pure_instructions +.globl __Z5unrefv ## -- Begin function _Z5unrefv +.p2align 4, 0x90 +__Z5unrefv: ## @_Z5unrefv +Lfunc_begin0: + .cfi_startproc + .cfi_personality 155, ___gxx_personality_v0 + .cfi_lsda 16, Lexception0 +## %bb.0: + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + subq $16, %rsp + movl $4, %edi + callq ___cxa_allocate_exception + movl $0, (%rax) +Ltmp0: + movq __ZTIi@GOTPCREL(%rip), %rsi + movq %rax, %rdi + xorl %edx, %edx + callq ___cxa_throw +Ltmp1: +## %bb.1: + ud2 +LBB0_2: +Ltmp2: + leaq -4(%rbp), %rcx + movq %rax, %rdi + movl %edx, %esi + movq %rcx, %rdx + callq __Z5unrefv.cold.1 + movl -4(%rbp), %eax + addq $16, %rsp + popq %rbp + retq +Lfunc_end0: + .cfi_endproc + .section __TEXT,__gcc_except_tab + .p2align 2 +GCC_except_table0: +Lexception0: + .byte 255 ## @LPStart Encoding = omit + .byte 155 ## @TType Encoding = indirect pcrel sdata4 + .uleb128 Lttbase0-Lttbaseref0 +Lttbaseref0: + .byte 1 ## Call site Encoding = uleb128 + .uleb128 Lcst_end0-Lcst_begin0 +Lcst_begin0: + .uleb128 Lfunc_begin0-Lfunc_begin0 ## >> Call Site 1 << + .uleb128 Ltmp0-Lfunc_begin0 ## Call between Lfunc_begin0 and Ltmp0 + .byte 0 ## has no landing pad + .byte 0 ## On action: cleanup + .uleb128 Ltmp0-Lfunc_begin0 ## >> Call Site 2 << + .uleb128 Ltmp1-Ltmp0 ## Call between Ltmp0 and Ltmp1 + .uleb128 Ltmp2-Lfunc_begin0 ## jumps to Ltmp2 + .byte 1 ## On action: 1 + .uleb128 Ltmp1-Lfunc_begin0 ## >> Call Site 3 << + .uleb128 Lfunc_end0-Ltmp1 ## Call between Ltmp1 and Lfunc_end0 + .byte 0 ## has no landing pad + .byte 0 ## On action: cleanup +Lcst_end0: + .byte 1 ## >> Action Record 1 << + ## Catch TypeInfo 1 + .byte 0 ## No further actions + .p2align 2 + ## >> Catch TypeInfos << + .long __ZTIi@GOTPCREL+4 ## TypeInfo 1 +Lttbase0: + .p2align 2 + ## -- End function + .section __TEXT,__text,regular,pure_instructions + .globl _main ## -- Begin function main + .p2align 4, 0x90 +_main: ## @main +Lfunc_begin1: + .cfi_startproc + .cfi_personality 155, ___gxx_personality_v0 + .cfi_lsda 16, Lexception1 +## %bb.0: + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + pushq %rbx + pushq %rax + .cfi_offset %rbx, -24 + movl $4, %edi + callq ___cxa_allocate_exception + movl $0, (%rax) +Ltmp3: + movq __ZTIi@GOTPCREL(%rip), %rsi + movq %rax, %rdi + xorl %edx, %edx + callq ___cxa_throw +Ltmp4: +## %bb.1: + ud2 +LBB1_2: +Ltmp5: + movq %rax, %rdi + callq ___cxa_begin_catch + movl (%rax), %ebx + callq ___cxa_end_catch + movl %ebx, %eax + addq $8, %rsp + popq %rbx + popq %rbp + retq +Lfunc_end1: + .cfi_endproc + .section __TEXT,__gcc_except_tab + .p2align 2 +GCC_except_table1: +Lexception1: + .byte 255 ## @LPStart Encoding = omit + .byte 155 ## @TType Encoding = indirect pcrel sdata4 + .uleb128 Lttbase1-Lttbaseref1 +Lttbaseref1: + .byte 1 ## Call site Encoding = uleb128 + .uleb128 Lcst_end1-Lcst_begin1 +Lcst_begin1: + .uleb128 Lfunc_begin1-Lfunc_begin1 ## >> Call Site 1 << + .uleb128 Ltmp3-Lfunc_begin1 ## Call between Lfunc_begin1 and Ltmp3 + .byte 0 ## has no landing pad + .byte 0 ## On action: cleanup + .uleb128 Ltmp3-Lfunc_begin1 ## >> Call Site 2 << + .uleb128 Ltmp4-Ltmp3 ## Call between Ltmp3 and Ltmp4 + .uleb128 Ltmp5-Lfunc_begin1 ## jumps to Ltmp5 + .byte 1 ## On action: 1 + .uleb128 Ltmp4-Lfunc_begin1 ## >> Call Site 3 << + .uleb128 Lfunc_end1-Ltmp4 ## Call between Ltmp4 and Lfunc_end1 + .byte 0 ## has no landing pad + .byte 0 ## On action: cleanup +Lcst_end1: + .byte 1 ## >> Action Record 1 << + ## Catch TypeInfo 1 + .byte 0 ## No further actions + .p2align 2 + ## >> Catch TypeInfos << + .long __ZTIi@GOTPCREL+4 ## TypeInfo 1 +Lttbase1: + .p2align 2 + ## -- End function + .section __TEXT,__text,regular,pure_instructions + .p2align 4, 0x90 ## -- Begin function _Z5unrefv.cold.1 +__Z5unrefv.cold.1: ## @_Z5unrefv.cold.1 + .cfi_startproc +## %bb.0: + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + pushq %rbx + pushq %rax + .cfi_offset %rbx, -24 + movq %rdx, %rbx + callq ___cxa_begin_catch + movl (%rax), %eax + incl %eax + movl %eax, (%rbx) + addq $8, %rsp + popq %rbx + popq %rbp + jmp ___cxa_end_catch ## TAILCALL + .cfi_endproc + ## -- End function +.subsections_via_symbols + +#--- weak-ref.s +.text +.weak_reference _ref_dylib_fun +.globl _main +_main: + callq _ref_dylib_fun + retq + +.subsections_via_symbols + +#--- strong-dead-ref.s +.text +.globl _unref_dylib_fun +_unref: + callq _unref_dylib_fun + retq + +.subsections_via_symbols + +#--- weak-dylib.s +.text +.globl _weak_in_dylib +.weak_definition _weak_in_dylib +_weak_in_dylib: + retq + +.subsections_via_symbols + +#--- dead-weak-override.s + +## Overrides the _weak_in_dylib symbol in weak-dylib, but is dead stripped. +.text + +#.no_dead_strip _weak_in_dylib +.globl _weak_in_dylib +_weak_in_dylib: + retq + +.globl _main +_main: + retq + +.subsections_via_symbols + +#--- debug.s +.text +.globl _unref +_unref: + retq + +.globl _main +_main: + retq + +.subsections_via_symbols + +#--- no-dead-symbols.s +.text +.globl _main +_main: + retq + +#--- literals.s +.cstring +_unref_foo: + .ascii "foo" +_bar: +Lbar: + .asciz "bar" +_unref_baz: + .asciz "baz" + +.literal4 +.p2align 2 +L._foo4: + .long 0xdeadbeef +L._bar4: + .long 0xdeadbeef +L._unref: + .long 0xfeedface + +.section __DATA,str_ptrs,literal_pointers +.globl _data +_data: + .quad _bar + .quad Lbar + +## The output binary has these integer literals put into a section that isn't +## marked with a S_*BYTE_LITERALS flag, so we don't mark word_ptrs with the +## S_LITERAL_POINTERS flag in order not to confuse llvm-objdump. +.section __DATA,word_ptrs +.globl _more_data +_more_data: + .quad L._foo4 + .quad L._bar4 + +.subsections_via_symbols + +#--- ref-undef.s +.globl _main +_main: + callq _ref_undef_fun +.subsections_via_symbols + +#--- addrsig.s +.globl _main, _addrsig +_main: + retq + +_addrsig: + retq + +.subsections_via_symbols + +.addrsig +.addrsig_sym _addrsig diff --git a/wild/tests/lld-macho/dwarf-no-compile-unit.s b/wild/tests/lld-macho/dwarf-no-compile-unit.s new file mode 100644 index 000000000..ced2467ca --- /dev/null +++ b/wild/tests/lld-macho/dwarf-no-compile-unit.s @@ -0,0 +1,15 @@ +# REQUIRES: aarch64 + +## Check that LLD does not crash if it encounters DWARF sections +## without __debug_info compile unit DIEs being present. + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o +# RUN: %lld -arch arm64 %t.o -o /dev/null + +.text +.globl _main +_main: + ret + +.section __DWARF,__debug_abbrev,regular,debug + .byte 0 diff --git a/wild/tests/lld-macho/dyld-stub-binder.s b/wild/tests/lld-macho/dyld-stub-binder.s new file mode 100644 index 000000000..170fe8abd --- /dev/null +++ b/wild/tests/lld-macho/dyld-stub-binder.s @@ -0,0 +1,66 @@ +# REQUIRES: aarch64 +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/foo.s -o %t/foo.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/bar.s -o %t/bar.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/test.s -o %t/test.o + +## Dylibs that don't do lazy dynamic calls don't need dyld_stub_binder. +# RUN: %no-lsystem-lld -arch arm64 -dylib %t/foo.o -o %t/libfoo.dylib +# RUN: llvm-nm -m %t/libfoo.dylib | FileCheck --check-prefix=NOSTUB %s + +## Binaries that don't do lazy dynamic calls but are linked against +## libSystem.dylib get a reference to dyld_stub_binder even if it's +## not needed. +# RUN: %lld -arch arm64 -lSystem -dylib %t/foo.o -o %t/libfoo.dylib +# RUN: llvm-nm -m %t/libfoo.dylib | FileCheck --check-prefix=STUB %s + +## Dylibs that do lazy dynamic calls do need dyld_stub_binder. +# RUN: not %no-lsystem-lld -arch arm64 -dylib %t/bar.o %t/libfoo.dylib \ +# RUN: -o %t/libbar.dylib -no_fixup_chains 2>&1 | \ +# RUN: FileCheck --check-prefix=MISSINGSTUB %s +# RUN: %lld -arch arm64 -lSystem -dylib %t/bar.o %t/libfoo.dylib \ +# RUN: -o %t/libbar.dylib -no_fixup_chains +# RUN: llvm-nm -m %t/libbar.dylib | FileCheck --check-prefix=STUB %s + +## As do executables. +# RUN: not %no-lsystem-lld -arch arm64 %t/libfoo.dylib %t/libbar.dylib %t/test.o \ +# RUN: -o %t/test -no_fixup_chains 2>&1 | FileCheck --check-prefix=MISSINGSTUB %s +# RUN: %lld -arch arm64 -lSystem %t/libfoo.dylib %t/libbar.dylib %t/test.o \ +# RUN: -o %t/test -no_fixup_chains +# RUN: llvm-nm -m %t/test | FileCheck --check-prefix=STUB %s + +## Test dynamic lookup of dyld_stub_binder. +# RUN: %no-lsystem-lld -arch arm64 %t/libfoo.dylib %t/libbar.dylib %t/test.o \ +# RUN: -o %t/test -undefined dynamic_lookup -no_fixup_chains +# RUN: llvm-nm -m %t/test | FileCheck --check-prefix=DYNSTUB %s +# RUN: %no-lsystem-lld -arch arm64 %t/libfoo.dylib %t/libbar.dylib %t/test.o \ +# RUN: -o %t/test -U dyld_stub_binder -no_fixup_chains +# RUN: llvm-nm -m %t/test | FileCheck --check-prefix=DYNSTUB %s + +# MISSINGSTUB: error: undefined symbol: dyld_stub_binder +# MISSINGSTUB-NEXT: >>> referenced by lazy binding (normally in libSystem.dylib) + +# NOSTUB-NOT: dyld_stub_binder +# STUB: (undefined) external dyld_stub_binder (from libSystem) +# DYNSTUB: (undefined) external dyld_stub_binder (dynamically looked up) + +#--- foo.s +.globl _foo +_foo: + +#--- bar.s +.text +.globl _bar +_bar: + bl _foo + ret + +#--- test.s +.text +.globl _main + +.p2align 2 +_main: + bl _foo + bl _bar + ret diff --git a/wild/tests/lld-macho/eh-frame-dead-strip.s b/wild/tests/lld-macho/eh-frame-dead-strip.s new file mode 100644 index 000000000..c9eb8c167 --- /dev/null +++ b/wild/tests/lld-macho/eh-frame-dead-strip.s @@ -0,0 +1,46 @@ +# REQUIRES: x86, aarch64 + +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos11.0 %t/strong.s -o %t/strong_x86_64.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos11.0 %t/weak.s -o %t/weak_x86_64.o +# RUN: %lld -dylib -dead_strip %t/strong_x86_64.o %t/weak_x86_64.o -o %t/libstrongweak_x86_64.dylib +# RUN: llvm-dwarfdump --eh-frame %t/libstrongweak_x86_64.dylib | FileCheck --check-prefixes CHECK,X86_64 %s +# RUN: %lld -dylib -dead_strip %t/weak_x86_64.o %t/strong_x86_64.o -o %t/libweakstrong_x86_64.dylib +# RUN: llvm-dwarfdump --eh-frame %t/libweakstrong_x86_64.dylib | FileCheck --check-prefixes CHECK,X86_64 %s + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos11.0 %t/strong.s -o %t/strong_arm64.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos11.0 %t/weak.s -o %t/weak_arm64.o +# RUN: %lld -arch arm64 -dylib -dead_strip %t/strong_arm64.o %t/weak_arm64.o -o %t/libstrongweak_arm64.dylib +# RUN: llvm-dwarfdump --eh-frame %t/libstrongweak_arm64.dylib | FileCheck --check-prefixes CHECK,ARM64 %s +# RUN: %lld -arch arm64 -dylib -dead_strip %t/weak_arm64.o %t/strong_arm64.o -o %t/libweakstrong_arm64.dylib +# RUN: llvm-dwarfdump --eh-frame %t/libweakstrong_arm64.dylib | FileCheck --check-prefixes CHECK,ARM64 %s + +## Verify that unneeded FDEs (and their CIEs) are dead-stripped even if they +## point to a live symbol (e.g. because we had multiple weak definitions). + +# CHECK: .eh_frame contents: +# X86_64: 00000000 00000014 00000000 CIE +# X86_64: 00000018 0000001c 0000001c FDE cie=00000000 +# ARM64: 00000000 00000010 00000000 CIE +# ARM64: 00000014 00000018 00000018 FDE cie=00000000 +# CHECK-NOT: CIE +# CHECK-NOT: FDE + +#--- strong.s +.globl _fun +_fun: + .cfi_startproc + ## cfi_escape cannot be encoded in compact unwind + .cfi_escape 0 + ret + .cfi_endproc + +#--- weak.s +.globl _fun +.weak_definition _fun +_fun: + .cfi_startproc + ## cfi_escape cannot be encoded in compact unwind + .cfi_escape 0 + ret + .cfi_endproc diff --git a/wild/tests/lld-macho/eh-frame.s b/wild/tests/lld-macho/eh-frame.s new file mode 100644 index 000000000..64fd364c8 --- /dev/null +++ b/wild/tests/lld-macho/eh-frame.s @@ -0,0 +1,172 @@ +# REQUIRES: x86, aarch64 +# RUN: rm -rf %t; mkdir %t + +# RUN: llvm-mc -emit-compact-unwind-non-canonical=true -filetype=obj -triple=x86_64-apple-macos10.15 %s -o %t/eh-frame-x86_64.o +# RUN: %lld -lSystem -lc++ %t/eh-frame-x86_64.o -o %t/eh-frame-x86_64 +# RUN: llvm-objdump --macho --syms --indirect-symbols --unwind-info \ +# RUN: --dwarf=frames %t/eh-frame-x86_64 | FileCheck %s -D#BASE=0x100000000 -D#DWARF_ENC=4 +# RUN: llvm-nm -m %t/eh-frame-x86_64 | FileCheck %s --check-prefix NO-EH-SYMS +# RUN: llvm-readobj --section-headers %t/eh-frame-x86_64 | FileCheck %s --check-prefix=ALIGN -D#ALIGN=3 + +## Test that we correctly handle the output of `ld -r`, which emits EH frames +## using subtractor relocations instead of implicitly encoding the offsets. +## In order to keep this test cross-platform, we check in ld64's output rather +## than invoking ld64 directly. NOTE: whenever this test is updated, the +## checked-in copy of `ld -r`'s output should be updated too! +# COM: ld -r %t/eh-frame-x86_64.o -o %S/Inputs/eh-frame-x86_64-r.o +# RUN: %lld -lSystem -lc++ %S/Inputs/eh-frame-x86_64-r.o -o %t/eh-frame-x86_64-r +# RUN: llvm-objdump --macho --syms --indirect-symbols --unwind-info \ +# RUN: --dwarf=frames %t/eh-frame-x86_64-r | FileCheck %s -D#BASE=0x100000000 -D#DWARF_ENC=4 +# RUN: llvm-nm -m %t/eh-frame-x86_64-r | FileCheck %s --check-prefix NO-EH-SYMS +# RUN: llvm-readobj --section-headers %t/eh-frame-x86_64-r | FileCheck %s --check-prefix=ALIGN -D#ALIGN=3 + +# RUN: llvm-mc -filetype=obj -emit-compact-unwind-non-canonical=true -triple=arm64-apple-macos11.0 %s -o %t/eh-frame-arm64.o +# RUN: %lld -arch arm64 -lSystem -lc++ %t/eh-frame-arm64.o -o %t/eh-frame-arm64 +# RUN: llvm-objdump --macho --syms --indirect-symbols --unwind-info \ +# RUN: --dwarf=frames %t/eh-frame-arm64 | FileCheck %s -D#BASE=0x100000000 -D#DWARF_ENC=3 +# RUN: llvm-nm -m %t/eh-frame-arm64 | FileCheck %s --check-prefix NO-EH-SYMS + +# COM: ld -r %t/eh-frame-arm64.o -o %S/Inputs/eh-frame-arm64-r.o +# RUN: %lld -arch arm64 -lSystem -lc++ %S/Inputs/eh-frame-arm64-r.o -o %t/eh-frame-arm64-r +# RUN: llvm-objdump --macho --syms --indirect-symbols --unwind-info \ +# RUN: --dwarf=frames %t/eh-frame-arm64-r | FileCheck %s -D#BASE=0x100000000 -D#DWARF_ENC=3 +# RUN: llvm-nm -m %t/eh-frame-arm64-r | FileCheck %s --check-prefix NO-EH-SYMS + +# ALIGN: Name: __eh_frame +# ALIGN-NEXT: Segment: __TEXT +# ALIGN-NEXT: Address: +# ALIGN-NEXT: Size: +# ALIGN-NEXT: Offset: +# ALIGN-NEXT: Alignment: [[#ALIGN]] + +# NO-EH-SYMS-NOT: __eh_frame + +# CHECK: Indirect symbols for (__DATA_CONST,__got) 2 entries +# CHECK: address index name +# CHECK: 0x[[#%x,GXX_PERSONALITY_GOT:]] {{.*}} ___gxx_personality_v0 +# CHECK: 0x[[#%x,MY_PERSONALITY_GOT:]] +# CHECK: SYMBOL TABLE: +# CHECK-DAG: [[#%x,F:]] l F __TEXT,__text _f +# CHECK-DAG: [[#%x,NO_UNWIND:]] l F __TEXT,__text _no_unwind +# CHECK-DAG: [[#%x,G:]] l F __TEXT,__text _g +# CHECK-DAG: [[#%x,H:]] l F __TEXT,__text _h +# CHECK-DAG: [[#%x,EXCEPT0:]] l O __TEXT,__gcc_except_tab GCC_except_table0 +# CHECK-DAG: [[#%x,EXCEPT1:]] l O __TEXT,__gcc_except_tab GCC_except_table1 +# CHECK-DAG: [[#%x,EXCEPT2:]] l O __TEXT,custom_except custom_except_table2 +# CHECK-DAG: [[#%x,MY_PERSONALITY:]] g F __TEXT,__text _my_personality +# CHECK: Contents of __unwind_info section: +# CHECK: Version: 0x1 +# CHECK: Number of personality functions in array: 0x1 +# CHECK: Number of indices in array: 0x2 +# CHECK: Personality functions: (count = 1) +# CHECK: personality[1]: 0x[[#%.8x,GXX_PERSONALITY_GOT - BASE]] +# CHECK: LSDA descriptors: +# CHECK: [0]: function offset=0x[[#%.8x,F - BASE]], LSDA offset=0x[[#%.8x,EXCEPT0 - BASE]] +# CHECK: [1]: function offset=0x[[#%.8x,G - BASE]], LSDA offset=0x[[#%.8x,EXCEPT1 - BASE]] +# CHECK: [2]: function offset=0x[[#%.8x,H - BASE]], LSDA offset=0x[[#%.8x,EXCEPT2 - BASE]] +# CHECK: Second level indices: +# CHECK: Second level index[0]: +# CHECK [0]: function offset=0x[[#%.8x,F - BASE]], encoding[{{.*}}]=0x52{{.*}} +# CHECK [1]: function offset=0x[[#%.8x,NO_UNWIND - BASE]], encoding[{{.*}}]=0x00000000 +# CHECK: [2]: function offset=0x[[#%.8x,G - BASE]], encoding[{{.*}}]=0x0[[#%x,DWARF_ENC]][[#%.6x, G_DWARF_OFF:]] +# CHECK: [3]: function offset=0x[[#%.8x,H - BASE]], encoding[{{.*}}]=0x0[[#%x,DWARF_ENC]][[#%.6x, H_DWARF_OFF:]] +# CHECK: [4]: function offset=0x[[#%.8x,MY_PERSONALITY - BASE]], encoding[{{.*}}]=0x00000000 + +# CHECK: .debug_frame contents: +# CHECK: .eh_frame contents: + +# CHECK: [[#%.8x,CIE1_OFF:]] {{.*}} CIE +# CHECK: Format: DWARF32 +# CHECK: Version: 1 +# CHECK: Augmentation: "zPLR" +# CHECK: Code alignment factor: 1 +# CHECK: Data alignment factor: -8 +# CHECK: Return address column: +# CHECK: Personality Address: [[#%.16x,GXX_PERSONALITY_GOT]] +# CHECK: Augmentation data: 9B {{(([[:xdigit:]]{2} ){4})}}10 10 + +# CHECK: [[#%.8x,G_DWARF_OFF]] {{.*}} [[#%.8x,G_DWARF_OFF + 4 - CIE1_OFF]] FDE cie=[[#CIE1_OFF]] pc=[[#%x,G]] +# CHECK: Format: DWARF32 +# CHECK: LSDA Address: [[#%.16x,EXCEPT1]] +# CHECK: DW_CFA_def_cfa_offset: +8 +# CHECK: 0x[[#%x,G]]: + +# CHECK: [[#%.8x,CIE2_OFF:]] {{.*}} CIE +# CHECK: Format: DWARF32 +# CHECK: Version: 1 +# CHECK: Augmentation: "zPLR" +# CHECK: Code alignment factor: 1 +# CHECK: Data alignment factor: -8 +# CHECK: Return address column: +# CHECK: Personality Address: [[#%.16x,MY_PERSONALITY_GOT]] +# CHECK: Augmentation data: 9B {{(([[:xdigit:]]{2} ){4})}}10 10 + +# CHECK: [[#%.8x,H_DWARF_OFF]] {{.*}} [[#%.8x,H_DWARF_OFF + 4 - CIE2_OFF]] FDE cie=[[#CIE2_OFF]] pc=[[#%x,H]] +# CHECK: Format: DWARF32 +# CHECK: LSDA Address: [[#%.16x,EXCEPT2]] +# CHECK: DW_CFA_def_cfa_offset: +8 +# CHECK: 0x[[#%x,H]]: + +.globl _my_personality, _main + +.text +## _f's unwind info can be encoded with compact unwind, so we shouldn't see an +## FDE entry for it in the output file. +.p2align 2 +_f: + .cfi_startproc + .cfi_personality 155, ___gxx_personality_v0 + .cfi_lsda 16, Lexception0 + .cfi_def_cfa_offset 8 + ret + .cfi_endproc + +.p2align 2 +_no_unwind: + ret + +.p2align 2 +_g: + .cfi_startproc + .cfi_personality 155, ___gxx_personality_v0 + .cfi_lsda 16, Lexception1 + .cfi_def_cfa_offset 8 + ## cfi_escape cannot be encoded in compact unwind, so we must keep _g's FDE + .cfi_escape 0x2e, 0x10 + ret + .cfi_endproc + +.p2align 2 +_h: + .cfi_startproc + .cfi_personality 155, _my_personality + .cfi_lsda 16, Lexception2 + .cfi_def_cfa_offset 8 + ## cfi_escape cannot be encoded in compact unwind, so we must keep _h's FDE + .cfi_escape 0x2e, 0x10 + ret + .cfi_endproc + +.p2align 2 +_my_personality: + ret + +.p2align 2 +_main: + ret + +.section __TEXT,__gcc_except_tab +GCC_except_table0: +Lexception0: + .byte 255 + +GCC_except_table1: +Lexception1: + .byte 255 + +.section __TEXT,custom_except +custom_except_table2: +Lexception2: + .byte 255 + +.subsections_via_symbols diff --git a/wild/tests/lld-macho/encryption-info.s b/wild/tests/lld-macho/encryption-info.s new file mode 100644 index 000000000..fc97d0f88 --- /dev/null +++ b/wild/tests/lld-macho/encryption-info.s @@ -0,0 +1,35 @@ +# REQUIRES: aarch64, x86 +# RUN: rm -rf %t; mkdir -p %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o +# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos %s -o %t/watchos-test.o + +# RUN: %lld -lSystem -o %t/test %t/test.o +# RUN: llvm-objdump --macho --all-headers %t/test | FileCheck %s --check-prefix=NO-ENCRYPTION -DSUFFIX=_64 + +# RUN: %lld -lSystem -encryptable -o %t/test %t/test.o +# RUN: llvm-objdump --macho --all-headers %t/test | FileCheck %s --check-prefix=ENCRYPTION -DSUFFIX=_64 -D#PAGE_SIZE=4096 + +# RUN: %lld-watchos -lSystem -o %t/watchos-test %t/watchos-test.o +# RUN: llvm-objdump --macho --all-headers %t/watchos-test | FileCheck %s --check-prefix=ENCRYPTION -DSUFFIX= -D#PAGE_SIZE=16384 + +# RUN: %lld-watchos -lSystem -no_encryption -o %t/watchos-test %t/watchos-test.o +# RUN: llvm-objdump --macho --all-headers %t/watchos-test | FileCheck %s --check-prefix=NO-ENCRYPTION -DSUFFIX= + +# ENCRYPTION: segname __TEXT +# ENCRYPTION-NEXT: vmaddr +# ENCRYPTION-NEXT: vmsize +# ENCRYPTION-NEXT: fileoff 0 +# ENCRYPTION-NEXT: filesize [[#TEXT_SIZE:]] + +# ENCRYPTION: cmd LC_ENCRYPTION_INFO[[SUFFIX]]{{$}} +# ENCRYPTION-NEXT: cmdsize +# ENCRYPTION-NEXT: cryptoff [[#PAGE_SIZE]] +# ENCRYPTION-NEXT: cryptsize [[#TEXT_SIZE - PAGE_SIZE]] +# ENCRYPTION-NEXT: cryptid 0 + +# NO-ENCRYPTION-NOT: LC_ENCRYPTION_INFO[[SUFFIX]]{{$}} + +.globl _main +.p2align 2 +_main: + ret diff --git a/wild/tests/lld-macho/fat-arch.s b/wild/tests/lld-macho/fat-arch.s new file mode 100644 index 000000000..59b82cd90 --- /dev/null +++ b/wild/tests/lld-macho/fat-arch.s @@ -0,0 +1,45 @@ +# REQUIRES: x86,aarch64 +## FIXME: The tests doesn't run on windows right now because of llvm-mc (can't produce triple=arm64-apple-macos11.0) +# UNSUPPORTED: system-windows + +# RUN: llvm-mc -filetype=obj -triple=i386-apple-darwin %s -o %t.i386.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.x86_64.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos11.0 %s -o %t.arm64.o + +# RUN: llvm-lipo %t.i386.o %t.x86_64.o -create -o %t.fat.o +# RUN: %lld -o /dev/null %t.fat.o +# RUN: llvm-lipo %t.i386.o -create -o %t.noarch.o +# RUN: not %no-fatal-warnings-lld -o /dev/null %t.noarch.o 2>&1 | \ +# RUN: FileCheck %s -DFILE=%t.noarch.o +# CHECK: warning: [[FILE]]: ignoring file because it is universal (i386) but does not contain the x86_64 architecture + +# RUN: not %lld -arch arm64 -o /dev/null %t.fat.o 2>&1 | \ +# RUN: FileCheck --check-prefix=CHECK-FAT %s -DFILE=%t.fat.o +# CHECK-FAT: error: [[FILE]]: ignoring file because it is universal (i386,x86_64) but does not contain the arm64 architecture + +## Validates that we read the cpu-subtype correctly from a fat exec. +# RUN: %lld -o %t.x86_64.out %t.x86_64.o +# RUN: %lld -arch arm64 -o %t.arm64.out %t.arm64.o +# RUN: llvm-lipo %t.x86_64.out %t.arm64.out -create -o %t.fat.exec.out +# RUN: %lld -arch x86_64 %t.x86_64.o -bundle_loader %t.fat.exec.out -bundle -o %t.fat.bundle + +# RUN: llvm-otool -h %t.fat.bundle > %t.bundle_header.txt +# RUN: llvm-otool -f %t.fat.exec.out >> %t.bundle_header.txt +# RUN: cat %t.bundle_header.txt | FileCheck %s --check-prefix=CPU-SUB + +# CPU-SUB: magic cputype cpusubtype caps filetype ncmds sizeofcmds flags +# CPU-SUB-NEXT: 0xfeedfacf 16777223 3 0x{{.+}} {{.+}} {{.+}} {{.+}} {{.+}} + +# CPU-SUB: Fat headers +# CPU-SUB: nfat_arch 2 +# CPU-SUB: architecture 0 +# CPU-SUB-NEXT: cputype 16777223 +# CPU-SUB-NEXT: cpusubtype 3 +# CPU-SUB: architecture 1 +# CPU-SUB-NEXT: cputype 16777228 +# CPU-SUB-NEXT: cpusubtype 0 + +.text +.global _main +_main: + ret diff --git a/wild/tests/lld-macho/header.s b/wild/tests/lld-macho/header.s new file mode 100644 index 000000000..e7ddf9456 --- /dev/null +++ b/wild/tests/lld-macho/header.s @@ -0,0 +1,28 @@ +# REQUIRES: x86, aarch64 +# RUN: rm -rf %t && mkdir -p %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/x86-64-test.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t/arm64-test.o +# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos %s -o %t/arm64-32-test.o +# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos %s -o %t/arm64-32-test.o + +# RUN: %lld -lSystem -arch x86_64 -o %t/x86-64-executable %t/x86-64-test.o +# RUN: %lld -lSystem -arch arm64 -o %t/arm64-executable %t/arm64-test.o +# RUN: %lld-watchos -lSystem -o %t/arm64-32-executable %t/arm64-32-test.o + +# RUN: %lld -arch x86_64 -dylib -o %t/x86-64-dylib %t/x86-64-test.o + +## NOTE: recent versions of ld64 don't emit LIB64 for x86-64-executable, maybe we should follow suit +# RUN: llvm-objdump --macho --private-header %t/x86-64-executable | FileCheck %s --check-prefix=EXEC -DCPU=X86_64 -DSUBTYPE=ALL -DCAPS=LIB64 +# RUN: llvm-objdump --macho --private-header %t/arm64-executable | FileCheck %s --check-prefix=EXEC -DCPU=ARM64 -DSUBTYPE=ALL -DCAPS=0x00 +# RUN: llvm-objdump --macho --private-header %t/arm64-32-executable | FileCheck %s --check-prefix=EXEC -DCPU=ARM64_32 -DSUBTYPE=V8 -DCAPS=0x00 + +# RUN: llvm-objdump --macho --private-header %t/x86-64-dylib | FileCheck %s --check-prefix=DYLIB -DCPU=X86_64 -DSUBTYPE=ALL -DCAPS=0x00 + +# EXEC: magic cputype cpusubtype caps filetype {{.*}} flags +# EXEC-NEXT: MH_MAGIC{{(_64)?}} [[CPU]] [[SUBTYPE]] [[CAPS]] EXECUTE {{.*}} NOUNDEFS DYLDLINK TWOLEVEL PIE{{$}} + +# DYLIB: magic cputype cpusubtype caps filetype {{.*}} flags +# DYLIB-NEXT: MH_MAGIC_64{{(_64)?}} [[CPU]] [[SUBTYPE]] [[CAPS]] DYLIB {{.*}} NOUNDEFS DYLDLINK TWOLEVEL NO_REEXPORTED_DYLIBS{{$}} + +.globl _main +_main: diff --git a/wild/tests/lld-macho/icf-arm64.s b/wild/tests/lld-macho/icf-arm64.s new file mode 100644 index 000000000..7d74af8ce --- /dev/null +++ b/wild/tests/lld-macho/icf-arm64.s @@ -0,0 +1,109 @@ +# REQUIRES: aarch64 +# RUN: rm -rf %t; split-file %s %t + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin19.0.0 %t/main.s -o %t/main.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin19.0.0 %t/f2.s -o %t/f2.o +# RUN: %lld -arch arm64 -lSystem --icf=all -o %t/main %t/main.o %t/f2.o +# RUN: llvm-objdump -d --syms --print-imm-hex %t/main | FileCheck %s + +# CHECK-LABEL: SYMBOL TABLE: +# CHECK: [[#%x,F1_REF:]] g F __TEXT,__text _f1 +# CHECK: [[#%x,F1_REF:]] g F __TEXT,__text _f2 + +# CHECK-LABEL: Disassembly of section __TEXT,__text: +# CHECK: <_main>: +# CHECK: bl 0x[[#%x,F1_REF:]] +# CHECK: bl 0x[[#%x,F1_REF:]] + +#--- main.s + +.subsections_via_symbols + +.literal16 +.p2align 3 +L_align16: +.quad 0xffffffffffffffff +.short 0xaaaa +.short 0xaaaa +.space 4, 0xaa + +.literal8 +.p2align 3 +L_align8: +.quad 0xeeeeeeeeeeeeeeee + +.literal4 +.p2align 2 +L_align4: +.short 0xbbbb +.short 0xbbbb + + +.text +.p2align 2 + +.globl _main, _f1, _f2 + +## Test that loading from __literalN sections at non-literal boundaries +## doesn't confuse ICF. This function should be folded with the identical +## _f2 in f2 (which uses literals of the same value in a different isec). +_f1: + adrp x9, L_align16@PAGE + 4 + add x9, x9, L_align16@PAGEOFF + 4 + ldr x10, [x9] + + adrp x9, L_align8@PAGE + 4 + add x9, x9, L_align8@PAGEOFF + 4 + ldr w11, [x9] + + adrp x9, L_align4@PAGE + 2 + add x9, x9, L_align4@PAGEOFF + 2 + ldrh w12, [x9] + + ret + +_main: + bl _f1 + bl _f2 + +#--- f2.s + +.subsections_via_symbols + +.literal16 +.p2align 3 +L_align16: +.quad 0xffffffffffffffff +.short 0xaaaa +.short 0xaaaa +.space 4, 170 + +.literal8 +.p2align 3 +L_align8: +.quad 0xeeeeeeeeeeeeeeee + +.literal4 +.p2align 2 +L_align4: +.short 0xbbbb +.short 0xbbbb + +.text +.p2align 2 + +.globl _f2 +_f2: + adrp x9, L_align16@PAGE + 4 + add x9, x9, L_align16@PAGEOFF + 4 + ldr x10, [x9] + + adrp x9, L_align8@PAGE + 4 + add x9, x9, L_align8@PAGEOFF + 4 + ldr w11, [x9] + + adrp x9, L_align4@PAGE + 2 + add x9, x9, L_align4@PAGEOFF + 2 + ldrh w12, [x9] + + ret diff --git a/wild/tests/lld-macho/icf-safe-missing-addrsig.s b/wild/tests/lld-macho/icf-safe-missing-addrsig.s new file mode 100644 index 000000000..d289fee47 --- /dev/null +++ b/wild/tests/lld-macho/icf-safe-missing-addrsig.s @@ -0,0 +1,112 @@ +# REQUIRES: aarch64 + +# RUN: rm -rf %t && split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/with-addrsig.s -o %t/with-addrsig.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/without-addrsig.s -o %t/without-addrsig.o +# RUN: %lld -arch arm64 -lSystem --icf=safe -dylib -map %t/with-addrsig-safe.map -o %t/with-addrsig.dylib %t/with-addrsig.o +# RUN: %lld -arch arm64 -lSystem --icf=safe -dylib -map %t/without-addrsig-safe.map -o %t/without-addrsig.dylib %t/without-addrsig.o +# RUN: %lld -arch arm64 -lSystem --icf=safe_thunks -dylib -map %t/with-addrsig-safe-thunks.map -o %t/with-addrsig-thunks.dylib %t/with-addrsig.o +# RUN: %lld -arch arm64 -lSystem --icf=safe_thunks -dylib -map %t/without-addrsig-safe-thunks.map -o %t/without-addrsig-thunks.dylib %t/without-addrsig.o +# RUN: FileCheck %s --check-prefix=ADDRSIG-SAFE < %t/with-addrsig-safe.map +# RUN: FileCheck %s --check-prefix=NO-ADDRSIG-SAFE < %t/without-addrsig-safe.map +# RUN: FileCheck %s --check-prefix=ADDRSIG-SAFE-THUNKS < %t/with-addrsig-safe-thunks.map +# RUN: FileCheck %s --check-prefix=NO-ADDRSIG-SAFE-THUNKS < %t/without-addrsig-safe-thunks.map + +## Input has addrsig section: _g1 and _g2 are address-significant, so _g2 is +## thunk-folded in safe_thunks ICF and remains untouched in safe ICF. +## _f2 is always body-folded into _f1 regardless of ICF level. + +# ADDRSIG-SAFE: 0x00000008 [ 2] _f1 +# ADDRSIG-SAFE-NEXT: 0x00000000 [ 2] _f2 +# ADDRSIG-SAFE: 0x00000008 [ 2] _g1 +# ADDRSIG-SAFE-NEXT: 0x00000008 [ 2] _g2 + +# ADDRSIG-SAFE-THUNKS: 0x00000008 [ 2] _f1 +# ADDRSIG-SAFE-THUNKS-NEXT: 0x00000000 [ 2] _f2 +# ADDRSIG-SAFE-THUNKS: 0x00000008 [ 2] _g1 +# ADDRSIG-SAFE-THUNKS: 0x00000004 [ 2] _g2 + +## Input does not have addrsig section: everything is address-significant, so +## no folding happened in safe ICF, and _f2, _g2 are thunk-folded into _f1, _g1 +## respectively. + +# NO-ADDRSIG-SAFE: 0x00000008 [ 2] _f1 +# NO-ADDRSIG-SAFE-NEXT: 0x00000008 [ 2] _f2 +# NO-ADDRSIG-SAFE-NEXT: 0x00000008 [ 2] _g1 +# NO-ADDRSIG-SAFE-NEXT: 0x00000008 [ 2] _g2 + +# NO-ADDRSIG-SAFE-THUNKS: 0x00000008 [ 2] _f1 +# NO-ADDRSIG-SAFE-THUNKS-NEXT: 0x00000008 [ 2] _g1 +# NO-ADDRSIG-SAFE-THUNKS: 0x00000004 [ 2] _g2 +# NO-ADDRSIG-SAFE-THUNKS-NEXT: 0x00000004 [ 2] _f2 + +#--- with-addrsig.s +.subsections_via_symbols +.text +.p2align 2 + +.globl _f1 +_f1: + mov w0, #0 + ret + +.globl _f2 +_f2: + mov w0, #0 + ret + +.globl _g1 +_g1: + mov w0, #1 + ret + +.globl _g2 +_g2: + mov w0, #1 + ret + +.globl _call_all +_call_all: + bl _f1 + bl _f2 + bl _g1 + bl _g2 + ret + +.addrsig +.addrsig_sym _call_all +.addrsig_sym _g1 +.addrsig_sym _g2 + +#--- without-addrsig.s +.subsections_via_symbols +.text +.p2align 2 + +.globl _f1 +_f1: + mov w0, #0 + ret + +.globl _f2 +_f2: + mov w0, #0 + ret + +.globl _g1 +_g1: + mov w0, #1 + ret + +.globl _g2 +_g2: + mov w0, #1 + ret + +.globl _call_all +_call_all: + bl _f1 + bl _f2 + bl _g1 + bl _g2 + ret diff --git a/wild/tests/lld-macho/ignore-incompat-arch.s b/wild/tests/lld-macho/ignore-incompat-arch.s new file mode 100644 index 000000000..676b5e56f --- /dev/null +++ b/wild/tests/lld-macho/ignore-incompat-arch.s @@ -0,0 +1,72 @@ +# REQUIRES: x86, aarch64 +## Test that LLD correctly ignored archives with incompatible architecture without crashing. + +# RUN: rm -rf %t; split-file %s %t + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos %t/callee.s -o %t/callee_arm64.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %t/callee.s -o %t/callee_x86_64.o + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos %t/caller.s -o %t/caller_arm64.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %t/caller.s -o %t/caller_x86_64.o + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos %t/main.s -o %t/main_arm64.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %t/main.s -o %t/main_x86_64.o + +# RUN: llvm-ar rc %t/libcallee_arm64.a %t/callee_arm64.o +# RUN: llvm-ar r %t/libcallee_x86.a %t/callee_x86_64.o + +# RUN: llvm-ar r %t/libcaller_arm64.a %t/caller_arm64.o +# RUN: llvm-ar r %t/libcaller_x86.a %t/caller_x86_64.o + +## Symbol from the arm64 archive should be ignored even tho it appears before the x86 archive. +# RUN: %no-fatal-warnings-lld -map %t/x86_a.map -arch x86_64 %t/main_x86_64.o %t/libcallee_arm64.a %t/libcallee_x86.a %t/libcaller_x86.a -o %t/x86_a.out 2>&1 \ +# RUN: | FileCheck -check-prefix=X86-WARNING %s + +# RUN: %no-fatal-warnings-lld -map %t/x86_b.map -arch x86_64 %t/main_x86_64.o %t/libcallee_x86.a %t/libcallee_arm64.a %t/libcaller_x86.a -o %t/x86_b.out 2>&1 \ +# RUN: | FileCheck -check-prefix=X86-WARNING %s + +# RUN: %no-fatal-warnings-lld -map %t/arm64_a.map -arch arm64 %t/main_arm64.o %t/libcallee_x86.a %t/libcallee_arm64.a %t/libcaller_arm64.a -o %t/arm64_a.out 2>&1 \ +# RUN: | FileCheck -check-prefix=ARM64-WARNING %s + +# RUN: %no-fatal-warnings-lld -map %t/arm64_b.map -arch arm64 %t/main_arm64.o %t/libcallee_arm64.a %t/libcallee_x86.a %t/libcaller_arm64.a -o %t/arm64_b.out 2>&1 \ +# RUN: | FileCheck -check-prefix=ARM64-WARNING %s + +## Verify that the output doesn't take any symbol from the in-compat archive +# RUN: FileCheck --check-prefix=SYM-X86 %s --input-file=%t/x86_a.map +# RUN: FileCheck --check-prefix=SYM-X86 %s --input-file=%t/x86_b.map + +# RUN: FileCheck --check-prefix=SYM-ARM64 %s --input-file=%t/arm64_a.map +# RUN: FileCheck --check-prefix=SYM-ARM64 %s --input-file=%t/arm64_b.map + + +# X86-WARNING: libcallee_arm64.a has architecture arm64 which is incompatible with target architecture x86_64 + +# ARM64-WARNING: libcallee_x86.a has architecture x86_64 which is incompatible with target architecture arm64 + +# SYM-X86-NOT: libcallee_arm64.a +# SYM-X86: {{.+}}main_x86_64.o +# SYM-X86: {{.+}}libcallee_x86.a(callee_x86_64.o) +# SYM-X86: {{.+}}libcaller_x86.a(caller_x86_64.o) + +# SYM-ARM64-NOT: libcallee_x86.a +# SYM-ARM64: {{.+}}main_arm64.o +# SYM-ARM64: {{.+}}libcallee_arm64.a(callee_arm64.o) +# SYM-ARM64: {{.+}}libcaller_arm64.a(caller_arm64.o) + + +#--- callee.s +.globl _callee +_callee: + ret + +#--- caller.s +.globl _caller +_caller: + .quad _callee + ret + +#--- main.s +.globl _main +_main: + .quad _caller + ret diff --git a/wild/tests/lld-macho/local-alias-to-weak.s b/wild/tests/lld-macho/local-alias-to-weak.s new file mode 100644 index 000000000..feb4c0a2e --- /dev/null +++ b/wild/tests/lld-macho/local-alias-to-weak.s @@ -0,0 +1,149 @@ +# REQUIRES: x86 +## This test checks that when we coalesce weak definitions, their local symbol +## aliases defs don't cause the coalesced data to be retained. This was +## motivated by MC's aarch64 backend which automatically creates `ltmp` +## symbols at the start of each .text section. These symbols are frequently +## aliases of other symbols created by clang or other inputs to MC. I've chosen +## to explicitly create them here since we can then reference those symbols for +## a more complete test. +## +## Not retaining the data matters for more than just size -- we have a use case +## that depends on proper data coalescing to emit a valid file format. We also +## need this behavior to properly deduplicate the __objc_protolist section; +## failure to do this can result in dyld crashing on iOS 13. +## +## Finally, ld64 does all this regardless of whether .subsections_via_symbols is +## specified. We don't. But again, given how rare the lack of that directive is +## (I've only seen it from hand-written assembly inputs), I don't think we need +## to worry about it. + +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/weak-then-local.s -o %t/weak-then-local.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/local-then-weak.s -o %t/local-then-weak.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/no-subsections.s -o %t/no-subsections.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/no-dead-strip.s -o %t/no-dead-strip.o + +# RUN: %lld -lSystem -dylib %t/weak-then-local.o %t/local-then-weak.o -o %t/test1 +# RUN: llvm-objdump --macho --syms --section="__DATA,__data" --weak-bind %t/test1 | FileCheck %s +# RUN: %lld -lSystem -dylib %t/local-then-weak.o %t/weak-then-local.o -o %t/test2 +# RUN: llvm-objdump --macho --syms --section="__DATA,__data" --weak-bind %t/test2 | FileCheck %s + +## Check that we only have one copy of 0x123 in the data, not two. +# CHECK: Contents of (__DATA,__data) section +# CHECK-NEXT: 0000000000001000 23 01 00 00 00 00 00 00 00 10 00 00 00 00 00 00 {{$}} +# CHECK-NEXT: 0000000000001010 00 10 00 00 00 00 00 00 {{$}} +# CHECK-EMPTY: +# CHECK-NEXT: SYMBOL TABLE: +# CHECK-NEXT: 0000000000001000 l O __DATA,__data _alias +# CHECK-NEXT: 0000000000001008 l O __DATA,__data _ref +# CHECK-NEXT: 0000000000001000 l O __DATA,__data _alias +# CHECK-NEXT: 0000000000001010 l O __DATA,__data _ref +# CHECK-NEXT: 0000000000001000 w O __DATA,__data _weak +# CHECK-NEXT: 0000000000000000 *UND* dyld_stub_binder +# CHECK-EMPTY: +## Even though the references were to the non-weak `_alias` symbols, ld64 still +## emits weak binds as if they were the `_weak` symbol itself. We do not. I +## don't know of any programs that rely on this behavior, so I'm just +## documenting it here. +# CHECK-NEXT: Weak bind table: +# CHECK-NEXT: segment section address type addend symbol +# CHECK-EMPTY: + +# RUN: %lld -lSystem -dylib %t/local-then-weak.o %t/no-subsections.o -o %t/sub-nosub +# RUN: llvm-objdump --macho --syms --section="__DATA,__data" %t/sub-nosub | FileCheck %s --check-prefix SUB-NOSUB + +## This test case demonstrates a shortcoming of LLD: If .subsections_via_symbols +## isn't enabled, we don't elide the contents of coalesced weak symbols if they +## are part of a section that has other non-coalesced symbols. In contrast, LD64 +## does elide the contents. +# SUB-NOSUB: Contents of (__DATA,__data) section +# SUB-NOSUB-NEXT: 0000000000001000 23 01 00 00 00 00 00 00 00 10 00 00 00 00 00 00 +# SUB-NOSUB-NEXT: 0000000000001010 00 00 00 00 00 00 00 00 23 01 00 00 00 00 00 00 +# SUB-NOSUB-EMPTY: +# SUB-NOSUB-NEXT: SYMBOL TABLE: +# SUB-NOSUB-NEXT: 0000000000001000 l O __DATA,__data _alias +# SUB-NOSUB-NEXT: 0000000000001008 l O __DATA,__data _ref +# SUB-NOSUB-NEXT: 0000000000001010 l O __DATA,__data _zeros +# SUB-NOSUB-NEXT: 0000000000001000 l O __DATA,__data _alias +# SUB-NOSUB-NEXT: 0000000000001000 w O __DATA,__data _weak +# SUB-NOSUB-NEXT: 0000000000000000 *UND* dyld_stub_binder + +# RUN: %lld -lSystem -dylib %t/no-subsections.o %t/local-then-weak.o -o %t/nosub-sub +# RUN: llvm-objdump --macho --syms --section="__DATA,__data" %t/nosub-sub | FileCheck %s --check-prefix NOSUB-SUB + +# NOSUB-SUB: Contents of (__DATA,__data) section +# NOSUB-SUB-NEXT: 0000000000001000 00 00 00 00 00 00 00 00 23 01 00 00 00 00 00 00 +# NOSUB-SUB-NEXT: 0000000000001010 08 10 00 00 00 00 00 00 {{$}} +# NOSUB-SUB-EMPTY: +# NOSUB-SUB-NEXT: SYMBOL TABLE: +# NOSUB-SUB-NEXT: 0000000000001000 l O __DATA,__data _zeros +# NOSUB-SUB-NEXT: 0000000000001008 l O __DATA,__data _alias +# NOSUB-SUB-NEXT: 0000000000001008 l O __DATA,__data _alias +# NOSUB-SUB-NEXT: 0000000000001010 l O __DATA,__data _ref +# NOSUB-SUB-NEXT: 0000000000001008 w O __DATA,__data _weak +# NOSUB-SUB-NEXT: 0000000000000000 *UND* dyld_stub_binder + +## Verify that we don't drop any flags that the aliases have (such as +## .no_dead_strip). This is a regression test. We previously had subsections +## that were mistakenly stripped. + +# RUN: %lld -lSystem -dead_strip %t/no-dead-strip.o -o %t/no-dead-strip +# RUN: llvm-objdump --macho --section-headers %t/no-dead-strip | FileCheck %s \ +# RUN: --check-prefix=NO-DEAD-STRIP +# NO-DEAD-STRIP: __data 00000010 + +#--- weak-then-local.s +.globl _weak +.weak_definition _weak +.data +_weak: +_alias: + .quad 0x123 + +_ref: + .quad _alias + +.subsections_via_symbols + +#--- local-then-weak.s +.globl _weak +.weak_definition _weak +.data +_alias: +_weak: + .quad 0x123 + +_ref: + .quad _alias + +.subsections_via_symbols + +#--- no-subsections.s +.globl _weak +.weak_definition _weak +.data +_zeros: +.space 8 + +_weak: +_alias: + .quad 0x123 + +#--- no-dead-strip.s +.globl _main + +_main: + ret + +.data +.no_dead_strip l_foo, l_bar + +_foo: +l_foo: + .quad 0x123 + +l_bar: +_bar: + .quad 0x123 + +.subsections_via_symbols diff --git a/wild/tests/lld-macho/loh-adrp-add-ldr.s b/wild/tests/lld-macho/loh-adrp-add-ldr.s new file mode 100644 index 000000000..efab90531 --- /dev/null +++ b/wild/tests/lld-macho/loh-adrp-add-ldr.s @@ -0,0 +1,185 @@ +# REQUIRES: aarch64 + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o +# RUN: %lld -arch arm64 %t.o -o %t +# RUN: llvm-objdump --no-print-imm-hex -d --macho %t | FileCheck %s + +## This is mostly a copy of loh-adrp-ldr-got-ldr.s's `local.s` test, except that Adrp+Ldr+Ldr +## triples have been changed to Adrp+Add+Ldr. The performed optimization is the same. +.text +.align 2 +.globl _main +_main: + +### Transformation to a literal LDR +## Basic case +L1: adrp x0, _close@PAGE +L2: add x1, x0, _close@PAGEOFF +L3: ldr x2, [x1] +# CHECK-LABEL: _main: +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: ldr x2 + +## Load with offset +L4: adrp x0, _close@PAGE +L5: add x1, x0, _close@PAGEOFF +L6: ldr x2, [x1, #8] +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: ldr x2 + +## 32 bit load +L7: adrp x0, _close@PAGE +L8: add x1, x0, _close@PAGEOFF +L9: ldr w1, [x1] +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: ldr w1, _close + +## Floating point +L10: adrp x0, _close@PAGE +L11: add x1, x0, _close@PAGEOFF +L12: ldr s1, [x1] +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: ldr s1, _close + +L13: adrp x0, _close@PAGE +L14: add x1, x0, _close@PAGEOFF +L15: ldr d1, [x1, #8] +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: ldr d1, _close8 + +L16: adrp x0, _close@PAGE +L17: add x1, x0, _close@PAGEOFF +L18: ldr q0, [x1] +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: ldr q0, _close + + +### Transformation to ADR+LDR +## 1 byte floating point load +L19: adrp x0, _close@PAGE +L20: add x1, x0, _close@PAGEOFF +L21: ldr b2, [x1] +# CHECK-NEXT: adr x1 +# CHECK-NEXT: nop +# CHECK-NEXT: ldr b2, [x1] + +## 1 byte GPR load, zero extend +L22: adrp x0, _close@PAGE +L23: add x1, x0, _close@PAGEOFF +L24: ldrb w2, [x1] +# CHECK-NEXT: adr x1 +# CHECK-NEXT: nop +# CHECK-NEXT: ldrb w2, [x1] + +## 1 byte GPR load, sign extend +L25: adrp x0, _close@PAGE +L26: add x1, x0, _close@PAGEOFF +L27: ldrsb x2, [x1] +# CHECK-NEXT: adr x1 +# CHECK-NEXT: nop +# CHECK-NEXT: ldrsb x2, [x1] + +## Unaligned +L28: adrp x0, _unaligned@PAGE +L29: add x1, x0, _unaligned@PAGEOFF +L30: ldr x2, [x1] +# CHECK-NEXT: adr x1 +# CHECK-NEXT: nop +# CHECK-NEXT: ldr x2, [x1] + + +### Transformation to ADRP + immediate LDR +## Basic test: target is far +L31: adrp x0, _far@PAGE +L32: add x1, x0, _far@PAGEOFF +L33: ldr x2, [x1] +# CHECK-NEXT: adrp x0 +# CHECK-NEXT: nop +# CHECK-NEXT: ldr x2 + +## With offset +L34: adrp x0, _far@PAGE +L35: add x1, x0, _far@PAGEOFF +L36: ldr x2, [x1, #8] +# CHECK-NEXT: adrp x0 +# CHECK-NEXT: nop +# CHECK-NEXT: ldr x2 + +### No changes +## Far and unaligned +L37: adrp x0, _far_unaligned@PAGE +L38: add x1, x0, _far_unaligned@PAGEOFF +L39: ldr x2, [x1] +# CHECK-NEXT: adrp x0 +# CHECK-NEXT: add x1, x0 +# CHECK-NEXT: ldr x2, [x1] + +## Far with large offset (_far_offset@PAGE + #255 > 4095) +L40: adrp x0, _far_offset@PAGE +L41: add x1, x0, _far_offset@PAGEOFF +L42: ldrb w2, [x1, #255] +# CHECK-NEXT: adrp x0 +# CHECK-NEXT: add x1, x0 +# CHECK-NEXT: ldrb w2, [x1, #255] + +### Invalid inputs; the instructions should be left untouched. +## Registers don't match +L43: adrp x0, _far@PAGE +L44: add x1, x0, _far@PAGEOFF +L45: ldr x2, [x2] +# CHECK-NEXT: adrp x0 +# CHECK-NEXT: add x1, x0 +# CHECK-NEXT: ldr x2, [x2] + +## Targets don't match +L46: adrp x0, _close@PAGE +L47: add x1, x0, _close8@PAGEOFF +L48: ldr x2, [x1] +# CHECK-NEXT: adrp x0 +# CHECK-NEXT: add x1, x0 +# CHECK-NEXT: ldr x2, [x1] + +.data +.align 4 + .quad 0 +_close: + .quad 0 +_close8: + .quad 0 + .byte 0 +_unaligned: + .quad 0 + +.space 1048576 +.align 12 + .quad 0 +_far: + .quad 0 + .byte 0 +_far_unaligned: + .quad 0 +.space 4000 +_far_offset: + .byte 0 + +.loh AdrpAddLdr L1, L2, L3 +.loh AdrpAddLdr L4, L5, L6 +.loh AdrpAddLdr L7, L8, L9 +.loh AdrpAddLdr L10, L11, L12 +.loh AdrpAddLdr L13, L14, L15 +.loh AdrpAddLdr L16, L17, L18 +.loh AdrpAddLdr L19, L20, L21 +.loh AdrpAddLdr L22, L23, L24 +.loh AdrpAddLdr L25, L26, L27 +.loh AdrpAddLdr L28, L29, L30 +.loh AdrpAddLdr L31, L32, L33 +.loh AdrpAddLdr L34, L35, L36 +.loh AdrpAddLdr L37, L38, L39 +.loh AdrpAddLdr L40, L41, L42 +.loh AdrpAddLdr L43, L44, L45 diff --git a/wild/tests/lld-macho/loh-adrp-add.s b/wild/tests/lld-macho/loh-adrp-add.s new file mode 100644 index 000000000..6026be8d4 --- /dev/null +++ b/wild/tests/lld-macho/loh-adrp-add.s @@ -0,0 +1,90 @@ +# REQUIRES: aarch64 + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o +# RUN: %lld -arch arm64 %t.o -o %t +# RUN: llvm-objdump -d --macho %t | FileCheck %s + +# CHECK-LABEL: _main: +## Out of range, before +# CHECK-NEXT: adrp x0 +# CHECK-NEXT: add x0, x0 +## In range, before +# CHECK-NEXT: adr x1 +# CHECK-NEXT: nop +## Registers don't match (invalid input) +# CHECK-NEXT: adrp x2 +# CHECK-NEXT: add x0 +## Not an adrp instruction (invalid input) +# CHECK-NEXT: nop +# CHECK-NEXT: add x4 +## In range, after +# CHECK-NEXT: adr x5 +# CHECK-NEXT: nop +## In range, add's destination register is not the same as its source +# CHECK-NEXT: adr x7 +# CHECK-NEXT: nop +## Valid, non-adjacent instructions - start +# CHECK-NEXT: adr x8 +## Out of range, after +# CHECK-NEXT: adrp x9 +# CHECK-NEXT: add x9, x9 +## Valid, non-adjacent instructions - end +# CHECK-NEXT: nop + +.text +.align 2 +_before_far: + .space 1048576 + +_before_near: + nop + +.globl _main +_main: +L1: + adrp x0, _before_far@PAGE +L2: + add x0, x0, _before_far@PAGEOFF +L3: + adrp x1, _before_near@PAGE +L4: + add x1, x1, _before_near@PAGEOFF +L5: + adrp x2, _before_near@PAGE +L6: + add x0, x0, _before_near@PAGEOFF +L9: + nop +L10: + add x4, x4, _after_near@PAGEOFF +L11: + adrp x5, _after_near@PAGE +L12: + add x5, x5, _after_near@PAGEOFF +L13: + adrp x6, _after_near@PAGE +L14: + add x7, x6, _after_near@PAGEOFF +L15: + adrp x8, _after_near@PAGE +L16: + adrp x9, _after_far@PAGE +L17: + add x9, x9, _after_far@PAGEOFF +L18: + add x8, x8, _after_near@PAGEOFF + +_after_near: + .space 1048576 + +_after_far: + nop + +.loh AdrpAdd L1, L2 +.loh AdrpAdd L3, L4 +.loh AdrpAdd L5, L6 +.loh AdrpAdd L9, L10 +.loh AdrpAdd L11, L12 +.loh AdrpAdd L13, L14 +.loh AdrpAdd L15, L18 +.loh AdrpAdd L16, L17 diff --git a/wild/tests/lld-macho/loh-adrp-adrp.s b/wild/tests/lld-macho/loh-adrp-adrp.s new file mode 100644 index 000000000..55d6a614f --- /dev/null +++ b/wild/tests/lld-macho/loh-adrp-adrp.s @@ -0,0 +1,72 @@ +# REQUIRES: aarch64 + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o +# RUN: %lld -arch arm64 %t.o -o %t +# RUN: llvm-objdump -d --macho %t | FileCheck %s + +# CHECK-LABEL: _main: +## Valid +# CHECK-NEXT: adrp x0 +# CHECK-NEXT: nop +## Mismatched registers +# CHECK-NEXT: adrp x1 +# CHECK-NEXT: adrp x2 +## Not on the same page +# CHECK-NEXT: adrp x3 +# CHECK-NEXT: adrp x3 +## Not an adrp instruction (invalid) +# CHECK-NEXT: nop +# CHECK-NEXT: adrp x4 +## Other relaxations take precedence over AdrpAdrp +# CHECK-NEXT: adr x6 +# CHECK-NEXT: nop +# CHECK-NEXT: adr x6 +# CHECK-NEXT: nop + +.text +.align 2 + +.globl _main +_main: +L1: + adrp x0, _foo@PAGE +L2: + adrp x0, _bar@PAGE +L3: + adrp x1, _foo@PAGE +L4: + adrp x2, _bar@PAGE +L5: + adrp x3, _foo@PAGE +L6: + adrp x3, _baz@PAGE +L7: + nop +L8: + adrp x4, _baz@PAGE +L9: + adrp x5, _foo@PAGE +L10: + add x6, x5, _foo@PAGEOFF +L11: + adrp x5, _bar@PAGE +L12: + add x6, x5, _bar@PAGEOFF + +.data +.align 12 +_foo: + .byte 0 +_bar: + .byte 0 +.space 4094 +_baz: + .byte 0 + +.loh AdrpAdrp L1, L2 +.loh AdrpAdrp L3, L4 +.loh AdrpAdrp L5, L6 +.loh AdrpAdrp L7, L8 +.loh AdrpAdrp L9, L11 +.loh AdrpAdd L9, L10 +.loh AdrpAdd L11, L12 diff --git a/wild/tests/lld-macho/loh-adrp-ldr-got-ldr.s b/wild/tests/lld-macho/loh-adrp-ldr-got-ldr.s new file mode 100644 index 000000000..1905666f8 --- /dev/null +++ b/wild/tests/lld-macho/loh-adrp-ldr-got-ldr.s @@ -0,0 +1,263 @@ +# REQUIRES: aarch64 + +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/lib.s -o %t/lib.o +# RUN: %lld -arch arm64 -dylib -o %t/lib.dylib %t/lib.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/external.s -o %t/near-got.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/external.s -defsym=PADDING=1 -o %t/far-got.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/local.s -o %t/local.o +# RUN: %lld -arch arm64 %t/near-got.o %t/lib.dylib -o %t/NearGot +# RUN: %lld -arch arm64 %t/far-got.o %t/lib.dylib -o %t/FarGot +# RUN: %lld -arch arm64 %t/local.o -o %t/Local +# RUN: llvm-objdump --no-print-imm-hex -d --macho %t/NearGot | FileCheck %s -check-prefix=NEAR-GOT +# RUN: llvm-objdump --no-print-imm-hex -d --macho %t/FarGot | FileCheck %s -check-prefix=FAR-GOT +# RUN: llvm-objdump --no-print-imm-hex -d --macho %t/Local | FileCheck %s -check-prefix=LOCAL + +#--- external.s +.text +.align 2 +.globl _main +_main: + +## Basic test +L1: adrp x0, _external@GOTPAGE +L2: ldr x1, [x0, _external@GOTPAGEOFF] +L3: ldr x2, [x1] +# NEAR-GOT-LABEL: _main: +# NEAR-GOT-NEXT: nop +# NEAR-GOT-NEXT: ldr x1, #{{.*}} ; literal pool symbol address: _external +# NEAR-GOT-NEXT: ldr x2, [x1] +# FAR-GOT-LABEL: _main: +# FAR-GOT-NEXT: adrp x0 +# FAR-GOT-NEXT: ldr x1 +# FAR-GOT-NEXT: ldr x2, [x1] + +## The second load has an offset +L4: adrp x0, _external@GOTPAGE +L5: ldr x1, [x0, _external@GOTPAGEOFF] +L6: ldr q2, [x1, #16] +# NEAR-GOT-NEXT: nop +# NEAR-GOT-NEXT: ldr x1, #{{.*}} ; literal pool symbol address: _external +# NEAR-GOT-NEXT: ldr q2, [x1, #16] +# FAR-GOT-NEXT: adrp x0 +# FAR-GOT-NEXT: ldr x1 +# FAR-GOT-NEXT: ldr q2, [x1, #16] + +### Tests for invalid inputs +.ifndef PADDING +## Registers don't match +L7: adrp x0, _external@GOTPAGE +L8: ldr x1, [x1, _external@GOTPAGEOFF] +L9: ldr x2, [x1] +# NEAR-GOT-NEXT: adrp x0 +# NEAR-GOT-NEXT: ldr x1 +# NEAR-GOT-NEXT: ldr x2, [x1] + +## Registers don't match +L10: adrp x0, _external@GOTPAGE +L11: ldr x1, [x0, _external@GOTPAGEOFF] +L12: ldr x2, [x0] +# NEAR-GOT-NEXT: adrp x0 +# NEAR-GOT-NEXT: ldr x1 +# NEAR-GOT-NEXT: ldr x2, [x0] + +## Not an LDR (immediate) +L13: adrp x0, _external@GOTPAGE +L14: ldr x1, 0 +L15: ldr x2, [x1] +# NEAR-GOT-NEXT: adrp x0 +# NEAR-GOT-NEXT: ldr x1 +# NEAR-GOT-NEXT: ldr x2, [x1] + +.loh AdrpLdrGotLdr L7, L8, L9 +.loh AdrpLdrGotLdr L10, L11, L12 +.loh AdrpLdrGotLdr L13, L14, L15 +.endif + +.loh AdrpLdrGotLdr L1, L2, L3 +.loh AdrpLdrGotLdr L4, L5, L6 + +.ifdef PADDING +.space 1048576 +.endif +.data + + +#--- lib.s +.data +.align 4 +.globl _external +_external: + .zero 32 + +#--- local.s +.text +.align 2 +.globl _main +_main: + +### Transformation to a literal LDR +## Basic case +L1: adrp x0, _close@GOTPAGE +L2: ldr x1, [x0, _close@GOTPAGEOFF] +L3: ldr x2, [x1] +# LOCAL-LABEL: _main: +# LOCAL-NEXT: nop +# LOCAL-NEXT: nop +# LOCAL-NEXT: ldr x2 + +## Load with offset +L4: adrp x0, _close@GOTPAGE +L5: ldr x1, [x0, _close@GOTPAGEOFF] +L6: ldr x2, [x1, #8] +# LOCAL-NEXT: nop +# LOCAL-NEXT: nop +# LOCAL-NEXT: ldr x2 + +## 32 bit load +L7: adrp x0, _close@GOTPAGE +L8: ldr x1, [x0, _close@GOTPAGEOFF] +L9: ldr w1, [x1] +# LOCAL-NEXT: nop +# LOCAL-NEXT: nop +# LOCAL-NEXT: ldr w1, _close + +## Floating point +L10: adrp x0, _close@GOTPAGE +L11: ldr x1, [x0, _close@GOTPAGEOFF] +L12: ldr s1, [x1] +# LOCAL-NEXT: nop +# LOCAL-NEXT: nop +# LOCAL-NEXT: ldr s1, _close + +L13: adrp x0, _close@GOTPAGE +L14: ldr x1, [x0, _close@GOTPAGEOFF] +L15: ldr d1, [x1, #8] +# LOCAL-NEXT: nop +# LOCAL-NEXT: nop +# LOCAL-NEXT: ldr d1, _close8 + +L16: adrp x0, _close@GOTPAGE +L17: ldr x1, [x0, _close@GOTPAGEOFF] +L18: ldr q0, [x1] +# LOCAL-NEXT: nop +# LOCAL-NEXT: nop +# LOCAL-NEXT: ldr q0, _close + + +### Transformation to ADR+LDR +## 1 byte floating point load +L19: adrp x0, _close@GOTPAGE +L20: ldr x1, [x0, _close@GOTPAGEOFF] +L21: ldr b2, [x1] +# LOCAL-NEXT: adr x1 +# LOCAL-NEXT: nop +# LOCAL-NEXT: ldr b2, [x1] + +## 1 byte GPR load, zero extend +L22: adrp x0, _close@GOTPAGE +L23: ldr x1, [x0, _close@GOTPAGEOFF] +L24: ldrb w2, [x1] +# LOCAL-NEXT: adr x1 +# LOCAL-NEXT: nop +# LOCAL-NEXT: ldrb w2, [x1] + +## 1 byte GPR load, sign extend +L25: adrp x0, _close@GOTPAGE +L26: ldr x1, [x0, _close@GOTPAGEOFF] +L27: ldrsb x2, [x1] +# LOCAL-NEXT: adr x1 +# LOCAL-NEXT: nop +# LOCAL-NEXT: ldrsb x2, [x1] + +## Unaligned +L28: adrp x0, _unaligned@GOTPAGE +L29: ldr x1, [x0, _unaligned@GOTPAGEOFF] +L30: ldr x2, [x1] +# LOCAL-NEXT: adr x1 +# LOCAL-NEXT: nop +# LOCAL-NEXT: ldr x2, [x1] + + +### Transformation to ADRP + immediate LDR +## Basic test: target is far +L31: adrp x0, _far@GOTPAGE +L32: ldr x1, [x0, _far@GOTPAGEOFF] +L33: ldr x2, [x1] +# LOCAL-NEXT: adrp x0 +# LOCAL-NEXT: nop +# LOCAL-NEXT: ldr x2 + +## With offset +L34: adrp x0, _far@GOTPAGE +L35: ldr x1, [x0, _far@GOTPAGEOFF] +L36: ldr x2, [x1, #8] +# LOCAL-NEXT: adrp x0 +# LOCAL-NEXT: nop +# LOCAL-NEXT: ldr x2 + +### No changes other than GOT relaxation +## Far and unaligned +L37: adrp x0, _far_unaligned@GOTPAGE +L38: ldr x1, [x0, _far_unaligned@GOTPAGEOFF] +L39: ldr x2, [x1] +# LOCAL-NEXT: adrp x0 +# LOCAL-NEXT: add x1, x0 +# LOCAL-NEXT: ldr x2, [x1] + +## Far with large offset (_far_offset@GOTPAGEOFF + #255 > 4095) +L40: adrp x0, _far_offset@GOTPAGE +L41: ldr x1, [x0, _far_offset@GOTPAGEOFF] +L42: ldrb w2, [x1, #255] +# LOCAL-NEXT: adrp x0 +# LOCAL-NEXT: add x1, x0 +# LOCAL-NEXT: ldrb w2, [x1, #255] + +### Tests for invalid inputs, only GOT relaxation should happen +## Registers don't match +L43: adrp x0, _far@GOTPAGE +L44: ldr x1, [x0, _far@GOTPAGEOFF] +L45: ldr x2, [x2] +# LOCAL-NEXT: adrp x0 +# LOCAL-NEXT: add x1, x0 +# LOCAL-NEXT: ldr x2, [x2] + +.data +.align 4 + .quad 0 +_close: + .quad 0 +_close8: + .quad 0 + .byte 0 +_unaligned: + .quad 0 + +.space 1048576 +.align 12 + .quad 0 +_far: + .quad 0 + .byte 0 +_far_unaligned: + .quad 0 +.space 4000 +_far_offset: + .byte 0 + + +.loh AdrpLdrGotLdr L1, L2, L3 +.loh AdrpLdrGotLdr L4, L5, L6 +.loh AdrpLdrGotLdr L7, L8, L9 +.loh AdrpLdrGotLdr L10, L11, L12 +.loh AdrpLdrGotLdr L13, L14, L15 +.loh AdrpLdrGotLdr L16, L17, L18 +.loh AdrpLdrGotLdr L19, L20, L21 +.loh AdrpLdrGotLdr L22, L23, L24 +.loh AdrpLdrGotLdr L25, L26, L27 +.loh AdrpLdrGotLdr L28, L29, L30 +.loh AdrpLdrGotLdr L31, L32, L33 +.loh AdrpLdrGotLdr L34, L35, L36 +.loh AdrpLdrGotLdr L37, L38, L39 +.loh AdrpLdrGotLdr L40, L41, L42 +.loh AdrpLdrGotLdr L43, L44, L45 diff --git a/wild/tests/lld-macho/loh-adrp-ldr-got.s b/wild/tests/lld-macho/loh-adrp-ldr-got.s new file mode 100644 index 000000000..5363a1167 --- /dev/null +++ b/wild/tests/lld-macho/loh-adrp-ldr-got.s @@ -0,0 +1,35 @@ +# REQUIRES: aarch64 + +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/obj.s -o %t/obj.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/dylib.s -o %t/dylib.o +# RUN: %lld -arch arm64 -dylib -o %t/libdylib.dylib %t/dylib.o +# RUN: %lld -arch arm64 %t/obj.o %t/libdylib.dylib -o %t/AdrpLdrGot +# RUN: llvm-objdump -d --macho %t/AdrpLdrGot | FileCheck %s + +#--- obj.s +.text +.globl _main +# CHECK-LABEL: _main: +_main: +## The referenced symbol is local +L1: adrp x0, _local@GOTPAGE +L2: ldr x0, [x0, _local@GOTPAGEOFF] +# CHECK-NEXT: adr x0 +# CHECK-NEXT: nop + +## The referenced symbol is in a dylib +L3: adrp x1, _external@GOTPAGE +L4: ldr x1, [x1, _external@GOTPAGEOFF] +# CHECK-NEXT: nop +# CHECK-NEXT: ldr x1 + +_local: + nop + +.loh AdrpLdrGot L1, L2 +.loh AdrpLdrGot L3, L4 + +#--- dylib.s +.globl _external +_external: diff --git a/wild/tests/lld-macho/loh-adrp-ldr.s b/wild/tests/lld-macho/loh-adrp-ldr.s new file mode 100644 index 000000000..956632ae9 --- /dev/null +++ b/wild/tests/lld-macho/loh-adrp-ldr.s @@ -0,0 +1,133 @@ +# REQUIRES: aarch64 + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o +# RUN: %lld -arch arm64 %t.o -o %t +# RUN: llvm-objdump --no-print-imm-hex -d --macho %t | FileCheck %s + +.text +.align 2 +_before_far: + .space 1048576 + +.align 2 +_before_near: + .quad 0 + +.globl _main +# CHECK-LABEL: _main: +_main: +## Out of range, before +L1: adrp x0, _before_far@PAGE +L2: ldr x0, [x0, _before_far@PAGEOFF] +# CHECK-NEXT: adrp x0 +# CHECK-NEXT: ldr x0 + +## In range, before +L3: adrp x1, _before_near@PAGE +L4: ldr x1, [x1, _before_near@PAGEOFF] +# CHECK-NEXT: nop +# CHECK-NEXT: ldr x1, #-20 + +## Registers don't match (invalid input) +L5: adrp x2, _before_near@PAGE +L6: ldr x3, [x3, _before_near@PAGEOFF] +# CHECK-NEXT: adrp x2 +# CHECK-NEXT: ldr x3 + +## Not an adrp instruction +L9: udf 0 +L10: ldr x5, [x5, _after_near@PAGEOFF] +# CHECK-NEXT: udf +# CHECK-NEXT: ldr x5 + +## Not an ldr with an immediate offset +L11: adrp x6, _after_near@PAGE +L12: ldr x6, 0 +# CHECK-NEXT: adrp x6 +# CHECK-NEXT: ldr x6, #0 + +## Byte load, unsupported +L15: adrp x8, _after_near@PAGE +L16: ldr b8, [x8, _after_near@PAGEOFF] +# CHECK-NEXT: adrp x8 +# CHECK-NEXT: ldr b8 + +## Halfword load, unsupported +L17: adrp x9, _after_near@PAGE +L18: ldr h9, [x9, _after_near@PAGEOFF] +# CHECK-NEXT: adrp x9 +# CHECK-NEXT: ldr h9 + +## Word load +L19: adrp x10, _after_near@PAGE +L20: ldr w10, [x10, _after_near@PAGEOFF] +# CHECK-NEXT: nop +# CHECK-NEXT: ldr w10, _after_near + +## With addend +L21: adrp x11, _after_near@PAGE + 8 +L22: ldr x11, [x11, _after_near@PAGEOFF + 8] +# CHECK-NEXT: nop +# CHECK-NEXT: ldr x11 + +## Signed 32-bit read from 16-bit value, unsupported +L23: adrp x12, _after_near@PAGE +L24: ldrsb w12, [x12, _after_near@PAGEOFF] +# CHECK-NEXT: adrp x12 +# CHECK-NEXT: ldrsb w12 + +## 64-bit load from signed 32-bit value +L25: adrp x13, _after_near@PAGE +L26: ldrsw x13, [x13, _after_near@PAGEOFF] +# CHECK-NEXT: nop +# CHECK-NEXT: ldrsw x13, _after_near + +## Single precision FP read +L27: adrp x14, _after_near@PAGE +L28: ldr s0, [x14, _after_near@PAGEOFF] +# CHECK-NEXT: nop +# CHECK-NEXT: ldr s0, _after_near + +## Double precision FP read +L29: adrp x15, _after_near@PAGE +L30: ldr d0, [x15, _after_near@PAGEOFF] +# CHECK-NEXT: nop +# CHECK-NEXT: ldr d0, _after_near + +## Quad precision FP read +L31: adrp x16, _after_near@PAGE +L32: ldr q0, [x16, _after_near@PAGEOFF] +# CHECK-NEXT: nop +# CHECK-NEXT: ldr q0, _after_near + +## Out of range, after +L33: adrp x17, _after_far@PAGE +L34: ldr x17, [x17, _after_far@PAGEOFF] +# CHECK-NEXT: adrp x17 +# CHECK-NEXT: ldr x17 + +.data +.align 4 +_after_near: + .quad 0 + .quad 0 +.space 1048576 + +_after_far: + .quad 0 + +.loh AdrpLdr L1, L2 +.loh AdrpLdr L3, L4 +.loh AdrpLdr L5, L6 +.loh AdrpLdr L9, L10 +.loh AdrpLdr L11, L12 +.loh AdrpLdr L15, L16 +.loh AdrpLdr L17, L18 +.loh AdrpLdr L19, L20 +.loh AdrpLdr L21, L22 +.loh AdrpLdr L23, L24 +.loh AdrpLdr L25, L26 +.loh AdrpLdr L27, L28 +.loh AdrpLdr L29, L30 +.loh AdrpLdr L31, L32 +.loh AdrpLdr L33, L34 diff --git a/wild/tests/lld-macho/loh-arm64-32.s b/wild/tests/lld-macho/loh-arm64-32.s new file mode 100644 index 000000000..906d0e1ce --- /dev/null +++ b/wild/tests/lld-macho/loh-arm64-32.s @@ -0,0 +1,64 @@ +# REQUIRES: aarch64 + +# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos %s -o %t.o +# RUN: %lld-watchos -U _external %t.o -o %t +# RUN: llvm-objdump -d --macho %t | FileCheck %s + +.text +.align 2 +.globl _foo +_foo: + ret +.globl _bar +_bar: + ret + +.globl _main +_main: +# CHECK-LABEL: _main: + +L1: adrp x0, _foo@PAGE +L2: add x0, x0, _foo@PAGEOFF +# CHECK-NEXT: adr x0 +# CHECK-NEXT: nop + +L3: adrp x0, _ptr@PAGE +L4: add x1, x0, _ptr@PAGEOFF +L5: ldr x2, [x1] +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: ldr x2 + +L6: adrp x0, _foo@PAGE +L7: adrp x0, _bar@PAGE +# CHECK-NEXT: adrp x0 +# CHECK-NEXT: nop + +L8: adrp x0, _ptr@PAGE +L9: ldr x0, [x0, _ptr@PAGEOFF] +# CHECK-NEXT: nop +# CHECK-NEXT: ldr x0 + +L10: adrp x0, _ptr@PAGE +L11: ldr w0, [x0, _ptr@PAGEOFF] +# CHECK-NEXT: nop +# CHECK-NEXT: ldr w0, _ptr + +L12: adrp x0, _external@PAGE +L13: ldr w1, [x0, _external@PAGEOFF] +L14: ldr x2, [x1] +# CHECK-NEXT: nop +# CHECK-NEXT: ldr w1, 0x{{.*}} +# CHECK-NEXT: ldr x2, [x1] + +.data +.align 4 +_ptr: + .quad 0 + +.loh AdrpAdd L1, L2 +.loh AdrpAddLdr L3, L4, L5 +.loh AdrpAdrp L6, L7 +.loh AdrpLdr L8, L9 +.loh AdrpLdrGot L10, L11 +.loh AdrpLdrGotLdr L12, L13, L14 diff --git a/wild/tests/lld-macho/loh-parsing.s b/wild/tests/lld-macho/loh-parsing.s new file mode 100644 index 000000000..aad1af359 --- /dev/null +++ b/wild/tests/lld-macho/loh-parsing.s @@ -0,0 +1,24 @@ +# REQUIRES: aarch64 + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.o +# RUN: %lld -arch arm64 -dylib %t.o -o /dev/null + +## Check that we parse the LOH & match it to its referent sections correctly, +## even when there are other subsections that don't get parsed as regular +## sections. (We would previously segfault.) +## __debug_info is one such section that gets special-case handling. + +.text +_foo: + +.section __DWARF,__debug_info,regular,debug + +## __StaticInit occurs after __debug_info in the input object file, so the +## LOH-matching code will have to "walk" past __debug_info while searching for +## __StaticInit. Thus this verifies that we can skip past __debug_info +## correctly. +.section __TEXT,__StaticInit +L1: adrp x1, _foo@PAGE +L2: ldr x1, [x1, _foo@PAGEOFF] + +.loh AdrpLdr L1, L2 diff --git a/wild/tests/lld-macho/no-pie.s b/wild/tests/lld-macho/no-pie.s new file mode 100644 index 000000000..c51e2b3f9 --- /dev/null +++ b/wild/tests/lld-macho/no-pie.s @@ -0,0 +1,17 @@ +# REQUIRES: aarch64, x86 + +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.x86_64.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t.arm64.o +# RUN: llvm-mc -filetype=obj -triple=arm64e-apple-darwin %s -o %t.arm64e.o +# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos %s -o %t.arm64_32.o + +# RUN: %lld -arch x86_64 -lSystem -no_pie -o %t %t.x86_64.o +# RUN: not %lld -arch arm64 -lSystem -no_pie -o %t %t.arm64.o 2>&1 | FileCheck %s +# RUN: not %lld -arch arm64e -lSystem -no_pie -o %t %t.arm64e.o 2>&1 | FileCheck %s +# RUN: not %lld-watchos -arch arm64_32 -lSystem -no_pie -o %t %t.arm64_32.o 2>&1 | FileCheck %s + +# CHECK: error: -no_pie ignored for arm64 + +.globl _main +_main: + ret diff --git a/wild/tests/lld-macho/objc-category-merging-complete-test.s b/wild/tests/lld-macho/objc-category-merging-complete-test.s new file mode 100644 index 000000000..3befd683e --- /dev/null +++ b/wild/tests/lld-macho/objc-category-merging-complete-test.s @@ -0,0 +1,1023 @@ +# REQUIRES: aarch64 +# RUN: rm -rf %t; split-file %s %t && cd %t + +############ Test merging multiple categories into a single category ############ +## Create a dylib to link against(a64_file1.dylib) and merge categories in the main binary (file2_merge_a64.exe) +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o a64_file1.o a64_file1.s +# RUN: %lld -arch arm64 a64_file1.o -o a64_file1.dylib -dylib + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o a64_file2.o a64_file2.s +# RUN: %lld -no_objc_relative_method_lists -arch arm64 -o a64_file2_no_merge.exe a64_file1.dylib a64_file2.o +# RUN: %lld -no_objc_relative_method_lists -arch arm64 -o a64_file2_no_merge_v2.exe a64_file1.dylib a64_file2.o -no_objc_category_merging +# RUN: %lld -no_objc_relative_method_lists -arch arm64 -o a64_file2_no_merge_v3.exe a64_file1.dylib a64_file2.o -objc_category_merging -no_objc_category_merging +# RUN: %lld -no_objc_relative_method_lists -arch arm64 -o a64_file2_merge.exe -objc_category_merging a64_file1.dylib a64_file2.o + +# RUN: llvm-objdump --objc-meta-data --macho a64_file2_no_merge.exe | FileCheck %s --check-prefixes=NO_MERGE_CATS +# RUN: llvm-objdump --objc-meta-data --macho a64_file2_no_merge_v2.exe | FileCheck %s --check-prefixes=NO_MERGE_CATS +# RUN: llvm-objdump --objc-meta-data --macho a64_file2_no_merge_v3.exe | FileCheck %s --check-prefixes=NO_MERGE_CATS +# RUN: llvm-objdump --objc-meta-data --macho a64_file2_merge.exe | FileCheck %s --check-prefixes=MERGE_CATS + +############ Test merging multiple categories into the base class ############ +# RUN: %lld -no_objc_relative_method_lists -arch arm64 -o a64_file2_merge_into_class.exe -objc_category_merging a64_file1.o a64_file2.o +# RUN: llvm-objdump --objc-meta-data --macho a64_file2_merge_into_class.exe | FileCheck %s --check-prefixes=MERGE_CATS_CLS + + +MERGE_CATS: __OBJC_$_CATEGORY_MyBaseClass(Category02|Category03) +MERGE_CATS-NEXT: name {{.*}} Category02|Category03 +MERGE_CATS: instanceMethods +MERGE_CATS-NEXT: entsize 24 +MERGE_CATS-NEXT: count 4 +MERGE_CATS-NEXT: name {{.*}} class02InstanceMethod +MERGE_CATS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS-NEXT: imp -[MyBaseClass(Category02) class02InstanceMethod] +MERGE_CATS-NEXT: name {{.*}} myProtocol02Method +MERGE_CATS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS-NEXT: imp -[MyBaseClass(Category02) myProtocol02Method] +MERGE_CATS-NEXT: name {{.*}} class03InstanceMethod +MERGE_CATS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS-NEXT: imp -[MyBaseClass(Category03) class03InstanceMethod] +MERGE_CATS-NEXT: name {{.*}} myProtocol03Method +MERGE_CATS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS-NEXT: imp -[MyBaseClass(Category03) myProtocol03Method] +MERGE_CATS-NEXT: classMethods {{.*}} +MERGE_CATS-NEXT: entsize 24 +MERGE_CATS-NEXT: count 4 +MERGE_CATS-NEXT: name {{.*}} class02ClassMethod +MERGE_CATS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS-NEXT: imp +[MyBaseClass(Category02) class02ClassMethod] +MERGE_CATS-NEXT: name {{.*}} MyProtocol02Prop +MERGE_CATS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS-NEXT: imp +[MyBaseClass(Category02) MyProtocol02Prop] +MERGE_CATS-NEXT: name {{.*}} class03ClassMethod +MERGE_CATS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS-NEXT: imp +[MyBaseClass(Category03) class03ClassMethod] +MERGE_CATS-NEXT: name {{.*}} MyProtocol03Prop +MERGE_CATS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS-NEXT: imp +[MyBaseClass(Category03) MyProtocol03Prop] +MERGE_CATS-NEXT: protocols +MERGE_CATS-NEXT: count 2 +MERGE_CATS-NEXT: list[0] {{.*}} (struct protocol_t *) +MERGE_CATS-NEXT: isa 0x0 +MERGE_CATS-NEXT: name {{.*}} MyProtocol02 +MERGE_CATS-NEXT: protocols 0x0 +MERGE_CATS-NEXT: instanceMethods +MERGE_CATS-NEXT: entsize 24 +MERGE_CATS-NEXT: count 2 +MERGE_CATS-NEXT: name {{.*}} myProtocol02Method +MERGE_CATS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS-NEXT: imp 0x0 +MERGE_CATS-NEXT: name {{.*}} MyProtocol02Prop +MERGE_CATS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS-NEXT: imp 0x0 +MERGE_CATS-NEXT: classMethods +MERGE_CATS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS-NEXT: optionalClassMethods 0x0 +MERGE_CATS-NEXT: instanceProperties {{.*}} +MERGE_CATS-NEXT: list[1] {{.*}} +MERGE_CATS-NEXT: isa 0x0 +MERGE_CATS-NEXT: name {{.*}} MyProtocol03 +MERGE_CATS-NEXT: protocols 0x0 +MERGE_CATS-NEXT: instanceMethods +MERGE_CATS-NEXT: entsize 24 +MERGE_CATS-NEXT: count 2 +MERGE_CATS-NEXT: name {{.*}} myProtocol03Method +MERGE_CATS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS-NEXT: imp 0x0 +MERGE_CATS-NEXT: name {{.*}} MyProtocol03Prop +MERGE_CATS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS-NEXT: imp 0x0 +MERGE_CATS-NEXT: classMethods 0x0 +MERGE_CATS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS-NEXT: optionalClassMethods 0x0 +MERGE_CATS-NEXT: instanceProperties {{.*}} +MERGE_CATS-NEXT: instanceProperties +MERGE_CATS-NEXT: entsize 16 +MERGE_CATS-NEXT: count 2 +MERGE_CATS-NEXT: name {{.*}} MyProtocol02Prop +MERGE_CATS-NEXT: attributes {{.*}} Ti,R,D +MERGE_CATS-NEXT: name {{.*}} MyProtocol03Prop +MERGE_CATS-NEXT: attributes {{.*}} Ti,R,D +MERGE_CATS: __OBJC_$_CATEGORY_MyBaseClass_$_Category04 + + +NO_MERGE_CATS-NOT: __OBJC_$_CATEGORY_MyBaseClass(Category02|Category03) +NO_MERGE_CATS: __OBJC_$_CATEGORY_MyBaseClass_$_Category02 +NO_MERGE_CATS: instanceMethods +NO_MERGE_CATS-NEXT: 24 +NO_MERGE_CATS-NEXT: 2 +NO_MERGE_CATS: classMethods +NO_MERGE_CATS-NEXT: 24 +NO_MERGE_CATS-NEXT: 2 + + +MERGE_CATS_CLS: _OBJC_CLASS_$_MyBaseClass +MERGE_CATS_CLS-NEXT: isa {{.*}} _OBJC_METACLASS_$_MyBaseClass +MERGE_CATS_CLS-NEXT: superclass 0x0 +MERGE_CATS_CLS-NEXT: cache {{.*}} __objc_empty_cache +MERGE_CATS_CLS-NEXT: vtable 0x0 +MERGE_CATS_CLS-NEXT: data {{.*}} (struct class_ro_t *) +MERGE_CATS_CLS-NEXT: flags 0x2 RO_ROOT +MERGE_CATS_CLS-NEXT: instanceStart 0 +MERGE_CATS_CLS-NEXT: instanceSize 4 +MERGE_CATS_CLS-NEXT: reserved 0x0 +MERGE_CATS_CLS-NEXT: ivarLayout 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyBaseClass +MERGE_CATS_CLS-NEXT: baseMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 8 +MERGE_CATS_CLS-NEXT: name {{.*}} class02InstanceMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass(Category02) class02InstanceMethod] +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol02Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass(Category02) myProtocol02Method] +MERGE_CATS_CLS-NEXT: name {{.*}} class03InstanceMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass(Category03) class03InstanceMethod] +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol03Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass(Category03) myProtocol03Method] +MERGE_CATS_CLS-NEXT: name {{.*}} baseInstanceMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass baseInstanceMethod] +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol01Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass myProtocol01Method] +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass MyProtocol01Prop] +MERGE_CATS_CLS-NEXT: name {{.*}} setMyProtocol01Prop: +MERGE_CATS_CLS-NEXT: types {{.*}} v20@0:8i16 +MERGE_CATS_CLS-NEXT: imp -[MyBaseClass setMyProtocol01Prop:] +MERGE_CATS_CLS-NEXT: baseProtocols {{.*}} +MERGE_CATS_CLS-NEXT: count 3 +MERGE_CATS_CLS-NEXT: list[0] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 2 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol02Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: list[1] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 2 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol03Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: list[2] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 3 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol01Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} setMyProtocol01Prop: +MERGE_CATS_CLS-NEXT: types {{.*}} v20@0:8i16 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: ivars {{.*}} +MERGE_CATS_CLS-NEXT: entsize 32 +MERGE_CATS_CLS-NEXT: count 1 +MERGE_CATS_CLS-NEXT: offset {{.*}} 0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01Prop +MERGE_CATS_CLS-NEXT: type {{.*}} i +MERGE_CATS_CLS-NEXT: alignment 2 +MERGE_CATS_CLS-NEXT: size 4 +MERGE_CATS_CLS-NEXT: weakIvarLayout 0x0 +MERGE_CATS_CLS-NEXT: baseProperties {{.*}} +MERGE_CATS_CLS-NEXT: entsize 16 +MERGE_CATS_CLS-NEXT: count 3 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02Prop +MERGE_CATS_CLS-NEXT: attributes {{.*}} Ti,R,D +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03Prop +MERGE_CATS_CLS-NEXT: attributes {{.*}} Ti,R,D +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01Prop +MERGE_CATS_CLS-NEXT: attributes {{.*}} Ti,N,VMyProtocol01Prop +MERGE_CATS_CLS-NEXT: Meta Class +MERGE_CATS_CLS-NEXT: isa {{.*}} _OBJC_METACLASS_$_MyBaseClass +MERGE_CATS_CLS-NEXT: superclass {{.*}} _OBJC_CLASS_$_MyBaseClass +MERGE_CATS_CLS-NEXT: cache {{.*}} __objc_empty_cache +MERGE_CATS_CLS-NEXT: vtable 0x0 +MERGE_CATS_CLS-NEXT: data {{.*}} (struct class_ro_t *) +MERGE_CATS_CLS-NEXT: flags 0x3 RO_META RO_ROOT +MERGE_CATS_CLS-NEXT: instanceStart 40 +MERGE_CATS_CLS-NEXT: instanceSize 40 +MERGE_CATS_CLS-NEXT: reserved 0x0 +MERGE_CATS_CLS-NEXT: ivarLayout 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyBaseClass +MERGE_CATS_CLS-NEXT: baseMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 5 +MERGE_CATS_CLS-NEXT: name {{.*}} class02ClassMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp +[MyBaseClass(Category02) class02ClassMethod] +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp +[MyBaseClass(Category02) MyProtocol02Prop] +MERGE_CATS_CLS-NEXT: name {{.*}} class03ClassMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp +[MyBaseClass(Category03) class03ClassMethod] +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp +[MyBaseClass(Category03) MyProtocol03Prop] +MERGE_CATS_CLS-NEXT: name {{.*}} baseClassMethod +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp +[MyBaseClass baseClassMethod] +MERGE_CATS_CLS-NEXT: baseProtocols {{.*}} +MERGE_CATS_CLS-NEXT: count 3 +MERGE_CATS_CLS-NEXT: list[0] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 2 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol02Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol02Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: list[1] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 2 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol03Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol03Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: list[2] {{.*}} (struct protocol_t *) +MERGE_CATS_CLS-NEXT: isa 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01 +MERGE_CATS_CLS-NEXT: protocols 0x0 +MERGE_CATS_CLS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +MERGE_CATS_CLS-NEXT: entsize 24 +MERGE_CATS_CLS-NEXT: count 3 +MERGE_CATS_CLS-NEXT: name {{.*}} myProtocol01Method +MERGE_CATS_CLS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} MyProtocol01Prop +MERGE_CATS_CLS-NEXT: types {{.*}} i16@0:8 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: name {{.*}} setMyProtocol01Prop: +MERGE_CATS_CLS-NEXT: types {{.*}} v20@0:8i16 +MERGE_CATS_CLS-NEXT: imp 0x0 +MERGE_CATS_CLS-NEXT: classMethods 0x0 (struct method_list_t *) +MERGE_CATS_CLS-NEXT: optionalInstanceMethods 0x0 +MERGE_CATS_CLS-NEXT: optionalClassMethods 0x0 +MERGE_CATS_CLS-NEXT: instanceProperties {{.*}} +MERGE_CATS_CLS-NEXT: ivars 0x0 +MERGE_CATS_CLS-NEXT: weakIvarLayout 0x0 +MERGE_CATS_CLS-NEXT: baseProperties 0x0 +MERGE_CATS_CLS: __OBJC_$_CATEGORY_MyBaseClass_$_Category04 + + +#--- a64_file1.s + +## @protocol MyProtocol01 +## - (void)myProtocol01Method; +## @property (nonatomic) int MyProtocol01Prop; +## @end +## +## __attribute__((objc_root_class)) +## @interface MyBaseClass +## - (void)baseInstanceMethod; +## - (void)myProtocol01Method; +## + (void)baseClassMethod; +## @end +## +## @implementation MyBaseClass +## @synthesize MyProtocol01Prop; +## - (void)baseInstanceMethod {} +## - (void)myProtocol01Method {} +## + (void)baseClassMethod {} +## @end +## +## void *_objc_empty_cache; + + .section __TEXT,__text,regular,pure_instructions + .p2align 2 ; -- Begin function -[MyBaseClass baseInstanceMethod] +"-[MyBaseClass baseInstanceMethod]": ; @"\01-[MyBaseClass baseInstanceMethod]" + .cfi_startproc +; %bb.0: ; %entry + ret + .cfi_endproc + ; -- End function + .p2align 2 ; -- Begin function -[MyBaseClass myProtocol01Method] +"-[MyBaseClass myProtocol01Method]": ; @"\01-[MyBaseClass myProtocol01Method]" + .cfi_startproc +; %bb.0: ; %entry + ret + .cfi_endproc + ; -- End function + .p2align 2 ; -- Begin function +[MyBaseClass baseClassMethod] +"+[MyBaseClass baseClassMethod]": ; @"\01+[MyBaseClass baseClassMethod]" + .cfi_startproc +; %bb.0: ; %entry + ret + .cfi_endproc + ; -- End function + .p2align 2 ; -- Begin function -[MyBaseClass MyProtocol01Prop] +"-[MyBaseClass MyProtocol01Prop]": ; @"\01-[MyBaseClass MyProtocol01Prop]" + .cfi_startproc +; %bb.0: ; %entry +Lloh0: + adrp x8, _OBJC_IVAR_$_MyBaseClass.MyProtocol01Prop@PAGE +Lloh1: + ldrsw x8, [x8, _OBJC_IVAR_$_MyBaseClass.MyProtocol01Prop@PAGEOFF] + ldr w0, [x0, x8] + ret + .loh AdrpLdr Lloh0, Lloh1 + .cfi_endproc + ; -- End function + .p2align 2 ; -- Begin function -[MyBaseClass setMyProtocol01Prop:] +"-[MyBaseClass setMyProtocol01Prop:]": ; @"\01-[MyBaseClass setMyProtocol01Prop:]" + .cfi_startproc +; %bb.0: ; %entry +Lloh2: + adrp x8, _OBJC_IVAR_$_MyBaseClass.MyProtocol01Prop@PAGE +Lloh3: + ldrsw x8, [x8, _OBJC_IVAR_$_MyBaseClass.MyProtocol01Prop@PAGEOFF] + str w2, [x0, x8] + ret + .loh AdrpLdr Lloh2, Lloh3 + .cfi_endproc + ; -- End function + .private_extern _OBJC_IVAR_$_MyBaseClass.MyProtocol01Prop ; @"OBJC_IVAR_$_MyBaseClass.MyProtocol01Prop" + .section __DATA,__objc_ivar + .globl _OBJC_IVAR_$_MyBaseClass.MyProtocol01Prop + .p2align 2, 0x0 +_OBJC_IVAR_$_MyBaseClass.MyProtocol01Prop: + .long 0 ; 0x0 + .section __DATA,__objc_data + .globl _OBJC_CLASS_$_MyBaseClass ; @"OBJC_CLASS_$_MyBaseClass" + .p2align 3, 0x0 +_OBJC_CLASS_$_MyBaseClass: + .quad _OBJC_METACLASS_$_MyBaseClass + .quad 0 + .quad __objc_empty_cache + .quad 0 + .quad __OBJC_CLASS_RO_$_MyBaseClass + .globl _OBJC_METACLASS_$_MyBaseClass ; @"OBJC_METACLASS_$_MyBaseClass" + .p2align 3, 0x0 +_OBJC_METACLASS_$_MyBaseClass: + .quad _OBJC_METACLASS_$_MyBaseClass + .quad _OBJC_CLASS_$_MyBaseClass + .quad __objc_empty_cache + .quad 0 + .quad __OBJC_METACLASS_RO_$_MyBaseClass + .section __TEXT,__objc_classname,cstring_literals +l_OBJC_CLASS_NAME_: ; @OBJC_CLASS_NAME_ + .asciz "MyBaseClass" + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_: ; @OBJC_METH_VAR_NAME_ + .asciz "baseClassMethod" + .section __TEXT,__objc_methtype,cstring_literals +l_OBJC_METH_VAR_TYPE_: ; @OBJC_METH_VAR_TYPE_ + .asciz "v16@0:8" + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_CLASS_METHODS_MyBaseClass" +__OBJC_$_CLASS_METHODS_MyBaseClass: + .long 24 ; 0x18 + .long 1 ; 0x1 + .quad l_OBJC_METH_VAR_NAME_ + .quad l_OBJC_METH_VAR_TYPE_ + .quad "+[MyBaseClass baseClassMethod]" + .section __TEXT,__objc_classname,cstring_literals +l_OBJC_CLASS_NAME_.1: ; @OBJC_CLASS_NAME_.1 + .asciz "MyProtocol01" + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_.2: ; @OBJC_METH_VAR_NAME_.2 + .asciz "myProtocol01Method" +l_OBJC_METH_VAR_NAME_.3: ; @OBJC_METH_VAR_NAME_.3 + .asciz "MyProtocol01Prop" + .section __TEXT,__objc_methtype,cstring_literals +l_OBJC_METH_VAR_TYPE_.4: ; @OBJC_METH_VAR_TYPE_.4 + .asciz "i16@0:8" + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_.5: ; @OBJC_METH_VAR_NAME_.5 + .asciz "setMyProtocol01Prop:" + .section __TEXT,__objc_methtype,cstring_literals +l_OBJC_METH_VAR_TYPE_.6: ; @OBJC_METH_VAR_TYPE_.6 + .asciz "v20@0:8i16" + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_PROTOCOL_INSTANCE_METHODS_MyProtocol01" +__OBJC_$_PROTOCOL_INSTANCE_METHODS_MyProtocol01: + .long 24 ; 0x18 + .long 3 ; 0x3 + .quad l_OBJC_METH_VAR_NAME_.2 + .quad l_OBJC_METH_VAR_TYPE_ + .quad 0 + .quad l_OBJC_METH_VAR_NAME_.3 + .quad l_OBJC_METH_VAR_TYPE_.4 + .quad 0 + .quad l_OBJC_METH_VAR_NAME_.5 + .quad l_OBJC_METH_VAR_TYPE_.6 + .quad 0 + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_PROP_NAME_ATTR_: ; @OBJC_PROP_NAME_ATTR_ + .asciz "MyProtocol01Prop" +l_OBJC_PROP_NAME_ATTR_.7: ; @OBJC_PROP_NAME_ATTR_.7 + .asciz "Ti,N" + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_PROP_LIST_MyProtocol01" +__OBJC_$_PROP_LIST_MyProtocol01: + .long 16 ; 0x10 + .long 1 ; 0x1 + .quad l_OBJC_PROP_NAME_ATTR_ + .quad l_OBJC_PROP_NAME_ATTR_.7 + .p2align 3, 0x0 ; @"_OBJC_$_PROTOCOL_METHOD_TYPES_MyProtocol01" +__OBJC_$_PROTOCOL_METHOD_TYPES_MyProtocol01: + .quad l_OBJC_METH_VAR_TYPE_ + .quad l_OBJC_METH_VAR_TYPE_.4 + .quad l_OBJC_METH_VAR_TYPE_.6 + .private_extern __OBJC_PROTOCOL_$_MyProtocol01 ; @"_OBJC_PROTOCOL_$_MyProtocol01" + .section __DATA,__data + .globl __OBJC_PROTOCOL_$_MyProtocol01 + .weak_definition __OBJC_PROTOCOL_$_MyProtocol01 + .p2align 3, 0x0 +__OBJC_PROTOCOL_$_MyProtocol01: + .quad 0 + .quad l_OBJC_CLASS_NAME_.1 + .quad 0 + .quad __OBJC_$_PROTOCOL_INSTANCE_METHODS_MyProtocol01 + .quad 0 + .quad 0 + .quad 0 + .quad __OBJC_$_PROP_LIST_MyProtocol01 + .long 96 ; 0x60 + .long 0 ; 0x0 + .quad __OBJC_$_PROTOCOL_METHOD_TYPES_MyProtocol01 + .quad 0 + .quad 0 + .private_extern __OBJC_LABEL_PROTOCOL_$_MyProtocol01 ; @"_OBJC_LABEL_PROTOCOL_$_MyProtocol01" + .section __DATA,__objc_protolist,coalesced,no_dead_strip + .globl __OBJC_LABEL_PROTOCOL_$_MyProtocol01 + .weak_definition __OBJC_LABEL_PROTOCOL_$_MyProtocol01 + .p2align 3, 0x0 +__OBJC_LABEL_PROTOCOL_$_MyProtocol01: + .quad __OBJC_PROTOCOL_$_MyProtocol01 + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_CLASS_PROTOCOLS_$_MyBaseClass" +__OBJC_CLASS_PROTOCOLS_$_MyBaseClass: + .quad 1 ; 0x1 + .quad __OBJC_PROTOCOL_$_MyProtocol01 + .quad 0 + .p2align 3, 0x0 ; @"_OBJC_METACLASS_RO_$_MyBaseClass" +__OBJC_METACLASS_RO_$_MyBaseClass: + .long 3 ; 0x3 + .long 40 ; 0x28 + .long 40 ; 0x28 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad __OBJC_$_CLASS_METHODS_MyBaseClass + .quad __OBJC_CLASS_PROTOCOLS_$_MyBaseClass + .quad 0 + .quad 0 + .quad 0 + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_.8: ; @OBJC_METH_VAR_NAME_.8 + .asciz "baseInstanceMethod" + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_INSTANCE_METHODS_MyBaseClass" +__OBJC_$_INSTANCE_METHODS_MyBaseClass: + .long 24 ; 0x18 + .long 4 ; 0x4 + .quad l_OBJC_METH_VAR_NAME_.8 + .quad l_OBJC_METH_VAR_TYPE_ + .quad "-[MyBaseClass baseInstanceMethod]" + .quad l_OBJC_METH_VAR_NAME_.2 + .quad l_OBJC_METH_VAR_TYPE_ + .quad "-[MyBaseClass myProtocol01Method]" + .quad l_OBJC_METH_VAR_NAME_.3 + .quad l_OBJC_METH_VAR_TYPE_.4 + .quad "-[MyBaseClass MyProtocol01Prop]" + .quad l_OBJC_METH_VAR_NAME_.5 + .quad l_OBJC_METH_VAR_TYPE_.6 + .quad "-[MyBaseClass setMyProtocol01Prop:]" + .section __TEXT,__objc_methtype,cstring_literals +l_OBJC_METH_VAR_TYPE_.9: ; @OBJC_METH_VAR_TYPE_.9 + .asciz "i" + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_INSTANCE_VARIABLES_MyBaseClass" +__OBJC_$_INSTANCE_VARIABLES_MyBaseClass: + .long 32 ; 0x20 + .long 1 ; 0x1 + .quad _OBJC_IVAR_$_MyBaseClass.MyProtocol01Prop + .quad l_OBJC_METH_VAR_NAME_.3 + .quad l_OBJC_METH_VAR_TYPE_.9 + .long 2 ; 0x2 + .long 4 ; 0x4 + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_PROP_NAME_ATTR_.10: ; @OBJC_PROP_NAME_ATTR_.10 + .asciz "Ti,N,VMyProtocol01Prop" + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_PROP_LIST_MyBaseClass" +__OBJC_$_PROP_LIST_MyBaseClass: + .long 16 ; 0x10 + .long 1 ; 0x1 + .quad l_OBJC_PROP_NAME_ATTR_ + .quad l_OBJC_PROP_NAME_ATTR_.10 + .p2align 3, 0x0 ; @"_OBJC_CLASS_RO_$_MyBaseClass" +__OBJC_CLASS_RO_$_MyBaseClass: + .long 2 ; 0x2 + .long 0 ; 0x0 + .long 4 ; 0x4 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad __OBJC_$_INSTANCE_METHODS_MyBaseClass + .quad __OBJC_CLASS_PROTOCOLS_$_MyBaseClass + .quad __OBJC_$_INSTANCE_VARIABLES_MyBaseClass + .quad 0 + .quad __OBJC_$_PROP_LIST_MyBaseClass + .globl __objc_empty_cache ; @_objc_empty_cache +.zerofill __DATA,__common,__objc_empty_cache,8,3 + .section __DATA,__objc_classlist,regular,no_dead_strip + .p2align 3, 0x0 ; @"OBJC_LABEL_CLASS_$" +l_OBJC_LABEL_CLASS_$: + .quad _OBJC_CLASS_$_MyBaseClass + .no_dead_strip __OBJC_LABEL_PROTOCOL_$_MyProtocol01 + .no_dead_strip __OBJC_PROTOCOL_$_MyProtocol01 + .section __DATA,__objc_imageinfo,regular,no_dead_strip +L_OBJC_IMAGE_INFO: + .long 0 + .long 96 +.subsections_via_symbols + + +#--- a64_file2.s + +## @protocol MyProtocol01 +## - (void)myProtocol01Method; +## @end +## +## @protocol MyProtocol02 +## - (void)myProtocol02Method; +## @property(readonly) int MyProtocol02Prop; +## @end +## +## @protocol MyProtocol03 +## - (void)myProtocol03Method; +## @property(readonly) int MyProtocol03Prop; +## @end +## +## +## __attribute__((objc_root_class)) +## @interface MyBaseClass +## - (void)baseInstanceMethod; +## - (void)myProtocol01Method; +## + (void)baseClassMethod; +## @end +## +## +## +## @interface MyBaseClass(Category02) +## - (void)class02InstanceMethod; +## - (void)myProtocol02Method; +## + (void)class02ClassMethod; +## + (int)MyProtocol02Prop; +## @end +## +## @implementation MyBaseClass(Category02) +## - (void)class02InstanceMethod {} +## - (void)myProtocol02Method {} +## + (void)class02ClassMethod {} +## + (int)MyProtocol02Prop { return 0;} +## @dynamic MyProtocol02Prop; +## @end +## +## @interface MyBaseClass(Category03) +## - (void)class03InstanceMethod; +## - (void)myProtocol03Method; +## + (void)class03ClassMethod; +## + (int)MyProtocol03Prop; +## @end +## +## @implementation MyBaseClass(Category03) +## - (void)class03InstanceMethod {} +## - (void)myProtocol03Method {} +## + (void)class03ClassMethod {} +## + (int)MyProtocol03Prop { return 0;} +## @dynamic MyProtocol03Prop; +## @end +## +## // This category shouldn't be merged +## @interface MyBaseClass(Category04) +## + (void)load; +## @end +## +## @implementation MyBaseClass(Category04) +## + (void)load {} +## @end +## +## int main() { +## return 0; +## } + + + .section __TEXT,__text,regular,pure_instructions + .p2align 2 ; -- Begin function -[MyBaseClass(Category02) class02InstanceMethod] +"-[MyBaseClass(Category02) class02InstanceMethod]": ; @"\01-[MyBaseClass(Category02) class02InstanceMethod]" + .cfi_startproc +; %bb.0: ; %entry + ret + .cfi_endproc + ; -- End function + .p2align 2 ; -- Begin function -[MyBaseClass(Category02) myProtocol02Method] +"-[MyBaseClass(Category02) myProtocol02Method]": ; @"\01-[MyBaseClass(Category02) myProtocol02Method]" + .cfi_startproc +; %bb.0: ; %entry + ret + .cfi_endproc + ; -- End function + .p2align 2 ; -- Begin function +[MyBaseClass(Category02) class02ClassMethod] +"+[MyBaseClass(Category02) class02ClassMethod]": ; @"\01+[MyBaseClass(Category02) class02ClassMethod]" + .cfi_startproc +; %bb.0: ; %entry + ret + .cfi_endproc + ; -- End function + .p2align 2 ; -- Begin function +[MyBaseClass(Category02) MyProtocol02Prop] +"+[MyBaseClass(Category02) MyProtocol02Prop]": ; @"\01+[MyBaseClass(Category02) MyProtocol02Prop]" + .cfi_startproc +; %bb.0: ; %entry + b _OUTLINED_FUNCTION_0 + .cfi_endproc + ; -- End function + .p2align 2 ; -- Begin function -[MyBaseClass(Category03) class03InstanceMethod] +"-[MyBaseClass(Category03) class03InstanceMethod]": ; @"\01-[MyBaseClass(Category03) class03InstanceMethod]" + .cfi_startproc +; %bb.0: ; %entry + ret + .cfi_endproc + ; -- End function + .p2align 2 ; -- Begin function -[MyBaseClass(Category03) myProtocol03Method] +"-[MyBaseClass(Category03) myProtocol03Method]": ; @"\01-[MyBaseClass(Category03) myProtocol03Method]" + .cfi_startproc +; %bb.0: ; %entry + ret + .cfi_endproc + ; -- End function + .p2align 2 ; -- Begin function +[MyBaseClass(Category03) class03ClassMethod] +"+[MyBaseClass(Category03) class03ClassMethod]": ; @"\01+[MyBaseClass(Category03) class03ClassMethod]" + .cfi_startproc +; %bb.0: ; %entry + ret + .cfi_endproc + ; -- End function + .p2align 2 ; -- Begin function +[MyBaseClass(Category03) MyProtocol03Prop] +"+[MyBaseClass(Category03) MyProtocol03Prop]": ; @"\01+[MyBaseClass(Category03) MyProtocol03Prop]" + .cfi_startproc +; %bb.0: ; %entry + b _OUTLINED_FUNCTION_0 + .cfi_endproc + ; -- End function + .p2align 2 +"+[MyBaseClass(Category04) load]": + .cfi_startproc +; %bb.0: + ret + .cfi_endproc + .globl _main ; -- Begin function main + .p2align 2 +_main: ; @main + .cfi_startproc +; %bb.0: ; %entry + b _OUTLINED_FUNCTION_0 + .cfi_endproc + ; -- End function + .p2align 2 ; -- Begin function OUTLINED_FUNCTION_0 +_OUTLINED_FUNCTION_0: ; @OUTLINED_FUNCTION_0 Tail Call + .cfi_startproc +; %bb.0: + mov w0, #0 + ret + .cfi_endproc + ; -- End function + .section __TEXT,__objc_classname,cstring_literals +l_OBJC_CLASS_NAME_: ; @OBJC_CLASS_NAME_ + .asciz "Category02" + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_: ; @OBJC_METH_VAR_NAME_ + .asciz "class02InstanceMethod" + .section __TEXT,__objc_methtype,cstring_literals +l_OBJC_METH_VAR_TYPE_: ; @OBJC_METH_VAR_TYPE_ + .asciz "v16@0:8" + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_.1: ; @OBJC_METH_VAR_NAME_.1 + .asciz "myProtocol02Method" + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_CATEGORY_INSTANCE_METHODS_MyBaseClass_$_Category02" +__OBJC_$_CATEGORY_INSTANCE_METHODS_MyBaseClass_$_Category02: + .long 24 ; 0x18 + .long 2 ; 0x2 + .quad l_OBJC_METH_VAR_NAME_ + .quad l_OBJC_METH_VAR_TYPE_ + .quad "-[MyBaseClass(Category02) class02InstanceMethod]" + .quad l_OBJC_METH_VAR_NAME_.1 + .quad l_OBJC_METH_VAR_TYPE_ + .quad "-[MyBaseClass(Category02) myProtocol02Method]" + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_.2: ; @OBJC_METH_VAR_NAME_.2 + .asciz "class02ClassMethod" +l_OBJC_METH_VAR_NAME_.3: ; @OBJC_METH_VAR_NAME_.3 + .asciz "MyProtocol02Prop" + .section __TEXT,__objc_methtype,cstring_literals +l_OBJC_METH_VAR_TYPE_.4: ; @OBJC_METH_VAR_TYPE_.4 + .asciz "i16@0:8" + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_CATEGORY_CLASS_METHODS_MyBaseClass_$_Category02" +__OBJC_$_CATEGORY_CLASS_METHODS_MyBaseClass_$_Category02: + .long 24 ; 0x18 + .long 2 ; 0x2 + .quad l_OBJC_METH_VAR_NAME_.2 + .quad l_OBJC_METH_VAR_TYPE_ + .quad "+[MyBaseClass(Category02) class02ClassMethod]" + .quad l_OBJC_METH_VAR_NAME_.3 + .quad l_OBJC_METH_VAR_TYPE_.4 + .quad "+[MyBaseClass(Category02) MyProtocol02Prop]" + .section __TEXT,__objc_classname,cstring_literals +l_OBJC_CLASS_NAME_.5: ; @OBJC_CLASS_NAME_.5 + .asciz "MyProtocol02" + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_PROTOCOL_INSTANCE_METHODS_MyProtocol02" +__OBJC_$_PROTOCOL_INSTANCE_METHODS_MyProtocol02: + .long 24 ; 0x18 + .long 2 ; 0x2 + .quad l_OBJC_METH_VAR_NAME_.1 + .quad l_OBJC_METH_VAR_TYPE_ + .quad 0 + .quad l_OBJC_METH_VAR_NAME_.3 + .quad l_OBJC_METH_VAR_TYPE_.4 + .quad 0 + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_PROP_NAME_ATTR_: ; @OBJC_PROP_NAME_ATTR_ + .asciz "MyProtocol02Prop" +l_OBJC_PROP_NAME_ATTR_.6: ; @OBJC_PROP_NAME_ATTR_.6 + .asciz "Ti,R" + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_PROP_LIST_MyProtocol02" +__OBJC_$_PROP_LIST_MyProtocol02: + .long 16 ; 0x10 + .long 1 ; 0x1 + .quad l_OBJC_PROP_NAME_ATTR_ + .quad l_OBJC_PROP_NAME_ATTR_.6 + .p2align 3, 0x0 ; @"_OBJC_$_PROTOCOL_METHOD_TYPES_MyProtocol02" +__OBJC_$_PROTOCOL_METHOD_TYPES_MyProtocol02: + .quad l_OBJC_METH_VAR_TYPE_ + .quad l_OBJC_METH_VAR_TYPE_.4 + .private_extern __OBJC_PROTOCOL_$_MyProtocol02 ; @"_OBJC_PROTOCOL_$_MyProtocol02" + .section __DATA,__data + .globl __OBJC_PROTOCOL_$_MyProtocol02 + .weak_definition __OBJC_PROTOCOL_$_MyProtocol02 + .p2align 3, 0x0 +__OBJC_PROTOCOL_$_MyProtocol02: + .quad 0 + .quad l_OBJC_CLASS_NAME_.5 + .quad 0 + .quad __OBJC_$_PROTOCOL_INSTANCE_METHODS_MyProtocol02 + .quad 0 + .quad 0 + .quad 0 + .quad __OBJC_$_PROP_LIST_MyProtocol02 + .long 96 ; 0x60 + .long 0 ; 0x0 + .quad __OBJC_$_PROTOCOL_METHOD_TYPES_MyProtocol02 + .quad 0 + .quad 0 + .private_extern __OBJC_LABEL_PROTOCOL_$_MyProtocol02 ; @"_OBJC_LABEL_PROTOCOL_$_MyProtocol02" + .section __DATA,__objc_protolist,coalesced,no_dead_strip + .globl __OBJC_LABEL_PROTOCOL_$_MyProtocol02 + .weak_definition __OBJC_LABEL_PROTOCOL_$_MyProtocol02 + .p2align 3, 0x0 +__OBJC_LABEL_PROTOCOL_$_MyProtocol02: + .quad __OBJC_PROTOCOL_$_MyProtocol02 + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_CATEGORY_PROTOCOLS_$_MyBaseClass_$_Category02" +__OBJC_CATEGORY_PROTOCOLS_$_MyBaseClass_$_Category02: + .quad 1 ; 0x1 + .quad __OBJC_PROTOCOL_$_MyProtocol02 + .quad 0 + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_PROP_NAME_ATTR_.7: ; @OBJC_PROP_NAME_ATTR_.7 + .asciz "Ti,R,D" + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_PROP_LIST_MyBaseClass_$_Category02" +__OBJC_$_PROP_LIST_MyBaseClass_$_Category02: + .long 16 ; 0x10 + .long 1 ; 0x1 + .quad l_OBJC_PROP_NAME_ATTR_ + .quad l_OBJC_PROP_NAME_ATTR_.7 + .p2align 3, 0x0 ; @"_OBJC_$_CATEGORY_MyBaseClass_$_Category02" +__OBJC_$_CATEGORY_MyBaseClass_$_Category02: + .quad l_OBJC_CLASS_NAME_ + .quad _OBJC_CLASS_$_MyBaseClass + .quad __OBJC_$_CATEGORY_INSTANCE_METHODS_MyBaseClass_$_Category02 + .quad __OBJC_$_CATEGORY_CLASS_METHODS_MyBaseClass_$_Category02 + .quad __OBJC_CATEGORY_PROTOCOLS_$_MyBaseClass_$_Category02 + .quad __OBJC_$_PROP_LIST_MyBaseClass_$_Category02 + .quad 0 + .long 64 ; 0x40 + .space 4 + .section __TEXT,__objc_classname,cstring_literals +l_OBJC_CLASS_NAME_.8: ; @OBJC_CLASS_NAME_.8 + .asciz "Category03" + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_.9: ; @OBJC_METH_VAR_NAME_.9 + .asciz "class03InstanceMethod" +l_OBJC_METH_VAR_NAME_.10: ; @OBJC_METH_VAR_NAME_.10 + .asciz "myProtocol03Method" + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_CATEGORY_INSTANCE_METHODS_MyBaseClass_$_Category03" +__OBJC_$_CATEGORY_INSTANCE_METHODS_MyBaseClass_$_Category03: + .long 24 ; 0x18 + .long 2 ; 0x2 + .quad l_OBJC_METH_VAR_NAME_.9 + .quad l_OBJC_METH_VAR_TYPE_ + .quad "-[MyBaseClass(Category03) class03InstanceMethod]" + .quad l_OBJC_METH_VAR_NAME_.10 + .quad l_OBJC_METH_VAR_TYPE_ + .quad "-[MyBaseClass(Category03) myProtocol03Method]" + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_.11: ; @OBJC_METH_VAR_NAME_.11 + .asciz "class03ClassMethod" +l_OBJC_METH_VAR_NAME_.12: ; @OBJC_METH_VAR_NAME_.12 + .asciz "MyProtocol03Prop" + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_CATEGORY_CLASS_METHODS_MyBaseClass_$_Category03" +__OBJC_$_CATEGORY_CLASS_METHODS_MyBaseClass_$_Category03: + .long 24 ; 0x18 + .long 2 ; 0x2 + .quad l_OBJC_METH_VAR_NAME_.11 + .quad l_OBJC_METH_VAR_TYPE_ + .quad "+[MyBaseClass(Category03) class03ClassMethod]" + .quad l_OBJC_METH_VAR_NAME_.12 + .quad l_OBJC_METH_VAR_TYPE_.4 + .quad "+[MyBaseClass(Category03) MyProtocol03Prop]" + .section __TEXT,__objc_classname,cstring_literals +l_OBJC_CLASS_NAME_.13: ; @OBJC_CLASS_NAME_.13 + .asciz "MyProtocol03" + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_PROTOCOL_INSTANCE_METHODS_MyProtocol03" +__OBJC_$_PROTOCOL_INSTANCE_METHODS_MyProtocol03: + .long 24 ; 0x18 + .long 2 ; 0x2 + .quad l_OBJC_METH_VAR_NAME_.10 + .quad l_OBJC_METH_VAR_TYPE_ + .quad 0 + .quad l_OBJC_METH_VAR_NAME_.12 + .quad l_OBJC_METH_VAR_TYPE_.4 + .quad 0 + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_PROP_NAME_ATTR_.14: ; @OBJC_PROP_NAME_ATTR_.14 + .asciz "MyProtocol03Prop" + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_PROP_LIST_MyProtocol03" +__OBJC_$_PROP_LIST_MyProtocol03: + .long 16 ; 0x10 + .long 1 ; 0x1 + .quad l_OBJC_PROP_NAME_ATTR_.14 + .quad l_OBJC_PROP_NAME_ATTR_.6 + .p2align 3, 0x0 ; @"_OBJC_$_PROTOCOL_METHOD_TYPES_MyProtocol03" +__OBJC_$_PROTOCOL_METHOD_TYPES_MyProtocol03: + .quad l_OBJC_METH_VAR_TYPE_ + .quad l_OBJC_METH_VAR_TYPE_.4 + .private_extern __OBJC_PROTOCOL_$_MyProtocol03 ; @"_OBJC_PROTOCOL_$_MyProtocol03" + .section __DATA,__data + .globl __OBJC_PROTOCOL_$_MyProtocol03 + .weak_definition __OBJC_PROTOCOL_$_MyProtocol03 + .p2align 3, 0x0 +__OBJC_PROTOCOL_$_MyProtocol03: + .quad 0 + .quad l_OBJC_CLASS_NAME_.13 + .quad 0 + .quad __OBJC_$_PROTOCOL_INSTANCE_METHODS_MyProtocol03 + .quad 0 + .quad 0 + .quad 0 + .quad __OBJC_$_PROP_LIST_MyProtocol03 + .long 96 ; 0x60 + .long 0 ; 0x0 + .quad __OBJC_$_PROTOCOL_METHOD_TYPES_MyProtocol03 + .quad 0 + .quad 0 + .private_extern __OBJC_LABEL_PROTOCOL_$_MyProtocol03 ; @"_OBJC_LABEL_PROTOCOL_$_MyProtocol03" + .section __DATA,__objc_protolist,coalesced,no_dead_strip + .globl __OBJC_LABEL_PROTOCOL_$_MyProtocol03 + .weak_definition __OBJC_LABEL_PROTOCOL_$_MyProtocol03 + .p2align 3, 0x0 +__OBJC_LABEL_PROTOCOL_$_MyProtocol03: + .quad __OBJC_PROTOCOL_$_MyProtocol03 + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_CATEGORY_PROTOCOLS_$_MyBaseClass_$_Category03" +__OBJC_CATEGORY_PROTOCOLS_$_MyBaseClass_$_Category03: + .quad 1 ; 0x1 + .quad __OBJC_PROTOCOL_$_MyProtocol03 + .quad 0 + .p2align 3, 0x0 ; @"_OBJC_$_PROP_LIST_MyBaseClass_$_Category03" +__OBJC_$_PROP_LIST_MyBaseClass_$_Category03: + .long 16 ; 0x10 + .long 1 ; 0x1 + .quad l_OBJC_PROP_NAME_ATTR_.14 + .quad l_OBJC_PROP_NAME_ATTR_.7 + .p2align 3, 0x0 ; @"_OBJC_$_CATEGORY_MyBaseClass_$_Category03" +__OBJC_$_CATEGORY_MyBaseClass_$_Category03: + .quad l_OBJC_CLASS_NAME_.8 + .quad _OBJC_CLASS_$_MyBaseClass + .quad __OBJC_$_CATEGORY_INSTANCE_METHODS_MyBaseClass_$_Category03 + .quad __OBJC_$_CATEGORY_CLASS_METHODS_MyBaseClass_$_Category03 + .quad __OBJC_CATEGORY_PROTOCOLS_$_MyBaseClass_$_Category03 + .quad __OBJC_$_PROP_LIST_MyBaseClass_$_Category03 + .quad 0 + .long 64 ; 0x40 + .space 4 + .section __TEXT,__objc_classname,cstring_literals +l_OBJC_CLASS_NAME_.15: + .asciz "Category04" + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_.16: + .asciz "load" + .section __DATA,__objc_const + .p2align 3, 0x0 +__OBJC_$_CATEGORY_CLASS_METHODS_MyBaseClass_$_Category04: + .long 24 + .long 1 + .quad l_OBJC_METH_VAR_NAME_.16 + .quad l_OBJC_METH_VAR_TYPE_ + .quad "+[MyBaseClass(Category04) load]" + .p2align 3, 0x0 +__OBJC_$_CATEGORY_MyBaseClass_$_Category04: + .quad l_OBJC_CLASS_NAME_.15 + .quad _OBJC_CLASS_$_MyBaseClass + .quad 0 + .quad __OBJC_$_CATEGORY_CLASS_METHODS_MyBaseClass_$_Category04 + .quad 0 + .quad 0 + .quad 0 + .long 64 + .space 4 + .section __DATA,__objc_catlist,regular,no_dead_strip + .p2align 3, 0x0 ; @"OBJC_LABEL_CATEGORY_$" +l_OBJC_LABEL_CATEGORY_$: + .quad __OBJC_$_CATEGORY_MyBaseClass_$_Category02 + .quad __OBJC_$_CATEGORY_MyBaseClass_$_Category03 + .quad __OBJC_$_CATEGORY_MyBaseClass_$_Category04 + .section __DATA,__objc_nlcatlist,regular,no_dead_strip + .p2align 3, 0x0 +l_OBJC_LABEL_NONLAZY_CATEGORY_$: + .quad __OBJC_$_CATEGORY_MyBaseClass_$_Category04 + + .no_dead_strip __OBJC_LABEL_PROTOCOL_$_MyProtocol02 + .no_dead_strip __OBJC_LABEL_PROTOCOL_$_MyProtocol03 + .no_dead_strip __OBJC_PROTOCOL_$_MyProtocol02 + .no_dead_strip __OBJC_PROTOCOL_$_MyProtocol03 + .section __DATA,__objc_imageinfo,regular,no_dead_strip +L_OBJC_IMAGE_INFO: + .long 0 + .long 96 +.subsections_via_symbols diff --git a/wild/tests/lld-macho/objc-category-merging-erase-objc-name-test.s b/wild/tests/lld-macho/objc-category-merging-erase-objc-name-test.s new file mode 100644 index 000000000..bc0b27d19 --- /dev/null +++ b/wild/tests/lld-macho/objc-category-merging-erase-objc-name-test.s @@ -0,0 +1,308 @@ +; REQUIRES: aarch64 + +; Here we test that if we defined a protocol MyTestProtocol and also a category MyTestProtocol +; then when merging the category into the base class (and deleting the category), we don't +; delete the 'MyTestProtocol' name + +; RUN: mkdir -p %t.dir + +; RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o %t.dir/erase-objc-name.o %s +; RUN: %lld -no_objc_relative_method_lists -arch arm64 -dylib -o %t.dir/erase-objc-name.dylib %t.dir/erase-objc-name.o -objc_category_merging +; RUN: llvm-objdump --objc-meta-data --macho %t.dir/erase-objc-name.dylib | FileCheck %s --check-prefixes=MERGE_CATS + +; === Check merge categories enabled === +; Check that the original categories are not there +; MERGE_CATS-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category01 +; MERGE_CATS-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category02 + +; Check that we get the expected output - most importantly that the protocol is named `MyTestProtocol` +; MERGE_CATS: Contents of (__DATA_CONST,__objc_classlist) section +; MERGE_CATS-NEXT: _OBJC_CLASS_$_MyBaseClass +; MERGE_CATS-NEXT: isa {{.*}} _OBJC_METACLASS_$_MyBaseClass +; MERGE_CATS-NEXT: superclass {{.*}} +; MERGE_CATS-NEXT: cache {{.*}} +; MERGE_CATS-NEXT: vtable {{.*}} +; MERGE_CATS-NEXT: data {{.*}} (struct class_ro_t *) +; MERGE_CATS-NEXT: flags {{.*}} RO_ROOT +; MERGE_CATS-NEXT: instanceStart 0 +; MERGE_CATS-NEXT: instanceSize 0 +; MERGE_CATS-NEXT: reserved {{.*}} +; MERGE_CATS-NEXT: ivarLayout {{.*}} +; MERGE_CATS-NEXT: name {{.*}} MyBaseClass +; MERGE_CATS-NEXT: baseMethods {{.*}} (struct method_list_t *) +; MERGE_CATS-NEXT: entsize 24 +; MERGE_CATS-NEXT: count 2 +; MERGE_CATS-NEXT: name {{.*}} getValue +; MERGE_CATS-NEXT: types {{.*}} i16@0:8 +; MERGE_CATS-NEXT: imp -[MyBaseClass(MyTestProtocol) getValue] +; MERGE_CATS-NEXT: name {{.*}} baseInstanceMethod +; MERGE_CATS-NEXT: types {{.*}} v16@0:8 +; MERGE_CATS-NEXT: imp -[MyBaseClass baseInstanceMethod] +; MERGE_CATS-NEXT: baseProtocols {{.*}} +; MERGE_CATS-NEXT: count 1 +; MERGE_CATS-NEXT: list[0] {{.*}} (struct protocol_t *) +; MERGE_CATS-NEXT: isa {{.*}} +; MERGE_CATS-NEXT: name {{.*}} MyTestProtocol +; MERGE_CATS-NEXT: protocols {{.*}} +; MERGE_CATS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +; MERGE_CATS-NEXT: entsize 24 +; MERGE_CATS-NEXT: count 1 +; MERGE_CATS-NEXT: name {{.*}} getValue +; MERGE_CATS-NEXT: types {{.*}} i16@0:8 +; MERGE_CATS-NEXT: imp {{.*}} +; MERGE_CATS-NEXT: classMethods {{.*}} (struct method_list_t *) +; MERGE_CATS-NEXT: optionalInstanceMethods {{.*}} +; MERGE_CATS-NEXT: optionalClassMethods {{.*}} +; MERGE_CATS-NEXT: instanceProperties {{.*}} +; MERGE_CATS-NEXT: ivars {{.*}} +; MERGE_CATS-NEXT: weakIvarLayout {{.*}} +; MERGE_CATS-NEXT: baseProperties {{.*}} +; MERGE_CATS-NEXT: Meta Class +; MERGE_CATS-NEXT: isa {{.*}} _OBJC_METACLASS_$_MyBaseClass +; MERGE_CATS-NEXT: superclass {{.*}} _OBJC_CLASS_$_MyBaseClass +; MERGE_CATS-NEXT: cache {{.*}} +; MERGE_CATS-NEXT: vtable {{.*}} +; MERGE_CATS-NEXT: data {{.*}} (struct class_ro_t *) +; MERGE_CATS-NEXT: flags {{.*}} RO_META RO_ROOT +; MERGE_CATS-NEXT: instanceStart 40 +; MERGE_CATS-NEXT: instanceSize 40 +; MERGE_CATS-NEXT: reserved {{.*}} +; MERGE_CATS-NEXT: ivarLayout {{.*}} +; MERGE_CATS-NEXT: name {{.*}} MyBaseClass +; MERGE_CATS-NEXT: baseMethods {{.*}} (struct method_list_t *) +; MERGE_CATS-NEXT: baseProtocols {{.*}} +; MERGE_CATS-NEXT: count 1 +; MERGE_CATS-NEXT: list[0] {{.*}} (struct protocol_t *) +; MERGE_CATS-NEXT: isa {{.*}} +; MERGE_CATS-NEXT: name {{.*}} MyTestProtocol +; MERGE_CATS-NEXT: protocols {{.*}} +; MERGE_CATS-NEXT: instanceMethods {{.*}} (struct method_list_t *) +; MERGE_CATS-NEXT: entsize 24 +; MERGE_CATS-NEXT: count 1 +; MERGE_CATS-NEXT: name {{.*}} getValue +; MERGE_CATS-NEXT: types {{.*}} i16@0:8 +; MERGE_CATS-NEXT: imp {{.*}} +; MERGE_CATS-NEXT: classMethods {{.*}} (struct method_list_t *) +; MERGE_CATS-NEXT: optionalInstanceMethods {{.*}} +; MERGE_CATS-NEXT: optionalClassMethods {{.*}} +; MERGE_CATS-NEXT: instanceProperties {{.*}} +; MERGE_CATS-NEXT: ivars {{.*}} +; MERGE_CATS-NEXT: weakIvarLayout {{.*}} +; MERGE_CATS-NEXT: baseProperties {{.*}} +; MERGE_CATS-NEXT: Contents of (__DATA_CONST,__objc_protolist) section +; MERGE_CATS-NEXT: {{.*}} {{.*}} __OBJC_PROTOCOL_$_MyTestProtocol +; MERGE_CATS-NEXT: Contents of (__DATA_CONST,__objc_imageinfo) section +; MERGE_CATS-NEXT: version 0 +; MERGE_CATS-NEXT: flags {{.*}} OBJC_IMAGE_HAS_CATEGORY_CLASS_PROPERTIES + + +; ================== repro.sh ==================== +; # Write the Objective-C code to a file +; cat << EOF > MyClass.m +; @protocol MyTestProtocol +; - (int)getValue; +; @end +; +; __attribute__((objc_root_class)) +; @interface MyBaseClass +; - (void)baseInstanceMethod; +; @end +; +; @implementation MyBaseClass +; - (void)baseInstanceMethod {} +; @end +; +; @interface MyBaseClass (MyTestProtocol) +; @end +; +; @implementation MyBaseClass (MyTestProtocol) +; +; - (int)getValue { +; return 0x30; +; } +; +; @end +; EOF +; +; # Compile the Objective-C file to assembly +; xcrun clang -S -arch arm64 MyClass.m -o MyClass.s +; ============================================== + + + .section __TEXT,__text,regular,pure_instructions + .p2align 2 ; -- Begin function -[MyBaseClass baseInstanceMethod] +"-[MyBaseClass baseInstanceMethod]": ; @"\01-[MyBaseClass baseInstanceMethod]" + .cfi_startproc +; %bb.0: + sub sp, sp, #16 + .cfi_def_cfa_offset 16 + str x0, [sp, #8] + str x1, [sp] + add sp, sp, #16 + ret + .cfi_endproc + ; -- End function + .p2align 2 ; -- Begin function -[MyBaseClass(MyTestProtocol) getValue] +"-[MyBaseClass(MyTestProtocol) getValue]": ; @"\01-[MyBaseClass(MyTestProtocol) getValue]" + .cfi_startproc +; %bb.0: + sub sp, sp, #16 + .cfi_def_cfa_offset 16 + str x0, [sp, #8] + str x1, [sp] + mov w0, #48 ; =0x30 + add sp, sp, #16 + ret + .cfi_endproc + ; -- End function + .section __DATA,__objc_data + .globl _OBJC_CLASS_$_MyBaseClass ; @"OBJC_CLASS_$_MyBaseClass" + .p2align 3, 0x0 +_OBJC_CLASS_$_MyBaseClass: + .quad _OBJC_METACLASS_$_MyBaseClass + .quad 0 + .quad __objc_empty_cache + .quad 0 + .quad __OBJC_CLASS_RO_$_MyBaseClass + .globl _OBJC_METACLASS_$_MyBaseClass ; @"OBJC_METACLASS_$_MyBaseClass" + .p2align 3, 0x0 +_OBJC_METACLASS_$_MyBaseClass: + .quad _OBJC_METACLASS_$_MyBaseClass + .quad _OBJC_CLASS_$_MyBaseClass + .quad __objc_empty_cache + .quad 0 + .quad __OBJC_METACLASS_RO_$_MyBaseClass + .section __TEXT,__objc_classname,cstring_literals +l_OBJC_CLASS_NAME_: ; @OBJC_CLASS_NAME_ + .asciz "MyBaseClass" + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_METACLASS_RO_$_MyBaseClass" +__OBJC_METACLASS_RO_$_MyBaseClass: + .long 131 ; 0x83 + .long 40 ; 0x28 + .long 40 ; 0x28 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_: ; @OBJC_METH_VAR_NAME_ + .asciz "baseInstanceMethod" + .section __TEXT,__objc_methtype,cstring_literals +l_OBJC_METH_VAR_TYPE_: ; @OBJC_METH_VAR_TYPE_ + .asciz "v16@0:8" + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_INSTANCE_METHODS_MyBaseClass" +__OBJC_$_INSTANCE_METHODS_MyBaseClass: + .long 24 ; 0x18 + .long 1 ; 0x1 + .quad l_OBJC_METH_VAR_NAME_ + .quad l_OBJC_METH_VAR_TYPE_ + .quad "-[MyBaseClass baseInstanceMethod]" + .p2align 3, 0x0 ; @"_OBJC_CLASS_RO_$_MyBaseClass" +__OBJC_CLASS_RO_$_MyBaseClass: + .long 130 ; 0x82 + .long 0 ; 0x0 + .long 0 ; 0x0 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad __OBJC_$_INSTANCE_METHODS_MyBaseClass + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .section __TEXT,__objc_classname,cstring_literals +l_OBJC_CLASS_NAME_.1: ; @OBJC_CLASS_NAME_.1 + .asciz "MyTestProtocol" + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_.2: ; @OBJC_METH_VAR_NAME_.2 + .asciz "getValue" + .section __TEXT,__objc_methtype,cstring_literals +l_OBJC_METH_VAR_TYPE_.3: ; @OBJC_METH_VAR_TYPE_.3 + .asciz "i16@0:8" + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_CATEGORY_INSTANCE_METHODS_MyBaseClass_$_MyTestProtocol" +__OBJC_$_CATEGORY_INSTANCE_METHODS_MyBaseClass_$_MyTestProtocol: + .long 24 ; 0x18 + .long 1 ; 0x1 + .quad l_OBJC_METH_VAR_NAME_.2 + .quad l_OBJC_METH_VAR_TYPE_.3 + .quad "-[MyBaseClass(MyTestProtocol) getValue]" + .p2align 3, 0x0 ; @"_OBJC_$_PROTOCOL_INSTANCE_METHODS_MyTestProtocol" +__OBJC_$_PROTOCOL_INSTANCE_METHODS_MyTestProtocol: + .long 24 ; 0x18 + .long 1 ; 0x1 + .quad l_OBJC_METH_VAR_NAME_.2 + .quad l_OBJC_METH_VAR_TYPE_.3 + .quad 0 + .p2align 3, 0x0 ; @"_OBJC_$_PROTOCOL_METHOD_TYPES_MyTestProtocol" +__OBJC_$_PROTOCOL_METHOD_TYPES_MyTestProtocol: + .quad l_OBJC_METH_VAR_TYPE_.3 + .private_extern __OBJC_PROTOCOL_$_MyTestProtocol ; @"_OBJC_PROTOCOL_$_MyTestProtocol" + .section __DATA,__data + .globl __OBJC_PROTOCOL_$_MyTestProtocol + .weak_definition __OBJC_PROTOCOL_$_MyTestProtocol + .p2align 3, 0x0 +__OBJC_PROTOCOL_$_MyTestProtocol: + .quad 0 + .quad l_OBJC_CLASS_NAME_.1 + .quad 0 + .quad __OBJC_$_PROTOCOL_INSTANCE_METHODS_MyTestProtocol + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .long 96 ; 0x60 + .long 0 ; 0x0 + .quad __OBJC_$_PROTOCOL_METHOD_TYPES_MyTestProtocol + .quad 0 + .quad 0 + .private_extern __OBJC_LABEL_PROTOCOL_$_MyTestProtocol ; @"_OBJC_LABEL_PROTOCOL_$_MyTestProtocol" + .section __DATA,__objc_protolist,coalesced,no_dead_strip + .globl __OBJC_LABEL_PROTOCOL_$_MyTestProtocol + .weak_definition __OBJC_LABEL_PROTOCOL_$_MyTestProtocol + .p2align 3, 0x0 +__OBJC_LABEL_PROTOCOL_$_MyTestProtocol: + .quad __OBJC_PROTOCOL_$_MyTestProtocol + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_CATEGORY_PROTOCOLS_$_MyBaseClass_$_MyTestProtocol" +__OBJC_CATEGORY_PROTOCOLS_$_MyBaseClass_$_MyTestProtocol: + .quad 1 ; 0x1 + .quad __OBJC_PROTOCOL_$_MyTestProtocol + .quad 0 + .p2align 3, 0x0 ; @"_OBJC_$_CATEGORY_MyBaseClass_$_MyTestProtocol" +__OBJC_$_CATEGORY_MyBaseClass_$_MyTestProtocol: + .quad l_OBJC_CLASS_NAME_.1 + .quad _OBJC_CLASS_$_MyBaseClass + .quad __OBJC_$_CATEGORY_INSTANCE_METHODS_MyBaseClass_$_MyTestProtocol + .quad 0 + .quad __OBJC_CATEGORY_PROTOCOLS_$_MyBaseClass_$_MyTestProtocol + .quad 0 + .quad 0 + .long 64 ; 0x40 + .space 4 + .section __DATA,__objc_classlist,regular,no_dead_strip + .p2align 3, 0x0 ; @"OBJC_LABEL_CLASS_$" +l_OBJC_LABEL_CLASS_$: + .quad _OBJC_CLASS_$_MyBaseClass + .section __DATA,__objc_catlist,regular,no_dead_strip + .p2align 3, 0x0 ; @"OBJC_LABEL_CATEGORY_$" +l_OBJC_LABEL_CATEGORY_$: + .quad __OBJC_$_CATEGORY_MyBaseClass_$_MyTestProtocol + .no_dead_strip __OBJC_PROTOCOL_$_MyTestProtocol + .no_dead_strip __OBJC_LABEL_PROTOCOL_$_MyTestProtocol + .section __DATA,__objc_imageinfo,regular,no_dead_strip +L_OBJC_IMAGE_INFO: + .long 0 + .long 64 + +__objc_empty_cache: +_$sBOWV: + .quad 0 + +.subsections_via_symbols diff --git a/wild/tests/lld-macho/objc-category-merging-minimal.s b/wild/tests/lld-macho/objc-category-merging-minimal.s new file mode 100644 index 000000000..d4d5933aa --- /dev/null +++ b/wild/tests/lld-macho/objc-category-merging-minimal.s @@ -0,0 +1,387 @@ +# REQUIRES: aarch64 +# UNSUPPORTED: system-windows +# due to awk usage + +# RUN: rm -rf %t; split-file %s %t && cd %t + +############ Test merging multiple categories into a single category ############ +## Create a dylib with a fake base class to link against in when merging between categories +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o a64_fakedylib.o a64_fakedylib.s +# RUN: %lld -arch arm64 a64_fakedylib.o -o a64_fakedylib.dylib -dylib + +## Create our main testing dylib - linking against the fake dylib above +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o merge_cat_minimal.o merge_cat_minimal.s +# RUN: %lld -arch arm64 -dylib -o merge_cat_minimal_no_merge.dylib a64_fakedylib.dylib merge_cat_minimal.o +# RUN: %lld -arch arm64 -dylib -o merge_cat_minimal_merge.dylib -objc_category_merging a64_fakedylib.dylib merge_cat_minimal.o + +## Now verify that the flag caused category merging to happen appropriatelly +# RUN: llvm-objdump --objc-meta-data --macho merge_cat_minimal_no_merge.dylib | FileCheck %s --check-prefixes=NO_MERGE_CATS +# RUN: llvm-objdump --objc-meta-data --macho merge_cat_minimal_merge.dylib | FileCheck %s --check-prefixes=MERGE_CATS + +############ Test merging multiple categories into the base class ############ +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o merge_base_class_minimal.o merge_base_class_minimal.s +# RUN: %lld -arch arm64 -dylib -o merge_base_class_minimal_yes_merge.dylib -objc_category_merging merge_base_class_minimal.o merge_cat_minimal.o +# RUN: %lld -arch arm64 -dylib -o merge_base_class_minimal_no_merge.dylib merge_base_class_minimal.o merge_cat_minimal.o + +# RUN: llvm-objdump --objc-meta-data --macho merge_base_class_minimal_no_merge.dylib | FileCheck %s --check-prefixes=NO_MERGE_INTO_BASE +# RUN: llvm-objdump --objc-meta-data --macho merge_base_class_minimal_yes_merge.dylib | FileCheck %s --check-prefixes=YES_MERGE_INTO_BASE + +############ Test merging swift category into the base class ############ +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o MyBaseClassSwiftExtension.o MyBaseClassSwiftExtension.s +# RUN: %lld -no_objc_relative_method_lists -arch arm64 -dylib -o merge_base_class_swift_minimal_yes_merge.dylib -objc_category_merging MyBaseClassSwiftExtension.o merge_base_class_minimal.o +# RUN: llvm-objdump --objc-meta-data --macho merge_base_class_swift_minimal_yes_merge.dylib | FileCheck %s --check-prefixes=YES_MERGE_INTO_BASE_SWIFT + +############ Test merging skipped due to invalid category name ############ +# Modify __OBJC_$_CATEGORY_MyBaseClass_$_Category01's name to point to L_OBJC_IMAGE_INFO+3 +# RUN: awk '/^__OBJC_\$_CATEGORY_MyBaseClass_\$_Category01:/ { print; getline; sub(/^[ \t]*\.quad[ \t]+l_OBJC_CLASS_NAME_$/, "\t.quad\tL_OBJC_IMAGE_INFO+3"); print; next } { print }' merge_cat_minimal.s > merge_cat_minimal_bad_name.s + +# Assemble the modified source +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o merge_cat_minimal_bad_name.o merge_cat_minimal_bad_name.s + +# Run lld and check for the specific warning +# RUN: %no-fatal-warnings-lld -arch arm64 -dylib -objc_category_merging -o merge_cat_minimal_merge.dylib a64_fakedylib.dylib merge_cat_minimal_bad_name.o 2>&1 | FileCheck %s --check-prefix=MERGE_WARNING + +# Check that lld emitted the warning about skipping category merging +MERGE_WARNING: warning: ObjC category merging skipped for class symbol' _OBJC_CLASS_$_MyBaseClass' + +#### Check merge categories enabled ### +# Check that the original categories are not there +MERGE_CATS-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category01 +MERGE_CATS-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category02 + +# Check that the merged cateogry is there, in the correct format +MERGE_CATS: __OBJC_$_CATEGORY_MyBaseClass(Category01|Category02) +MERGE_CATS-NEXT: name {{.*}} Category01|Category02 +MERGE_CATS: instanceMethods +MERGE_CATS-NEXT: entsize 12 (relative) +MERGE_CATS-NEXT: count 2 +MERGE_CATS-NEXT: name {{.*}} cat01_InstanceMethod +MERGE_CATS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS-NEXT: imp {{.*}} -[MyBaseClass(Category01) cat01_InstanceMethod] +MERGE_CATS-NEXT: name {{.*}} cat02_InstanceMethod +MERGE_CATS-NEXT: types {{.*}} v16@0:8 +MERGE_CATS-NEXT: imp {{.*}} -[MyBaseClass(Category02) cat02_InstanceMethod] +MERGE_CATS-NEXT: classMethods 0x0 +MERGE_CATS-NEXT: protocols 0x0 +MERGE_CATS-NEXT: instanceProperties 0x0 + +#### Check merge categories disabled ### +# Check that the merged category is not there +NO_MERGE_CATS-NOT: __OBJC_$_CATEGORY_MyBaseClass(Category01|Category02) + +# Check that the original categories are there +NO_MERGE_CATS: __OBJC_$_CATEGORY_MyBaseClass_$_Category01 +NO_MERGE_CATS: __OBJC_$_CATEGORY_MyBaseClass_$_Category02 + + +#### Check merge cateogires into base class is disabled #### +NO_MERGE_INTO_BASE: __OBJC_$_CATEGORY_MyBaseClass_$_Category01 +NO_MERGE_INTO_BASE: __OBJC_$_CATEGORY_MyBaseClass_$_Category02 + +#### Check merge cateogires into base class is enabled and categories are merged into base class #### +YES_MERGE_INTO_BASE-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category01 +YES_MERGE_INTO_BASE-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category02 + +YES_MERGE_INTO_BASE: _OBJC_CLASS_$_MyBaseClass +YES_MERGE_INTO_BASE-NEXT: _OBJC_METACLASS_$_MyBaseClass +YES_MERGE_INTO_BASE: baseMethods +YES_MERGE_INTO_BASE-NEXT: entsize 12 (relative) +YES_MERGE_INTO_BASE-NEXT: count 3 +YES_MERGE_INTO_BASE-NEXT: name {{.*}} cat01_InstanceMethod +YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8 +YES_MERGE_INTO_BASE-NEXT: imp {{.*}} -[MyBaseClass(Category01) cat01_InstanceMethod] +YES_MERGE_INTO_BASE-NEXT: name {{.*}} cat02_InstanceMethod +YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8 +YES_MERGE_INTO_BASE-NEXT: imp {{.*}} -[MyBaseClass(Category02) cat02_InstanceMethod] +YES_MERGE_INTO_BASE-NEXT: name {{.*}} baseInstanceMethod +YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8 +YES_MERGE_INTO_BASE-NEXT: imp {{.*}} -[MyBaseClass baseInstanceMethod] + + +#### Check merge swift category into base class ### +YES_MERGE_INTO_BASE_SWIFT: _OBJC_CLASS_$_MyBaseClass +YES_MERGE_INTO_BASE_SWIFT-NEXT: _OBJC_METACLASS_$_MyBaseClass +YES_MERGE_INTO_BASE_SWIFT: baseMethods +YES_MERGE_INTO_BASE_SWIFT-NEXT: entsize 24 +YES_MERGE_INTO_BASE_SWIFT-NEXT: count 2 +YES_MERGE_INTO_BASE_SWIFT-NEXT: name {{.*}} swiftMethod +YES_MERGE_INTO_BASE_SWIFT-NEXT: types {{.*}} v16@0:8 +YES_MERGE_INTO_BASE_SWIFT-NEXT: imp _$sSo11MyBaseClassC0abC14SwiftExtensionE11swiftMethodyyFTo +YES_MERGE_INTO_BASE_SWIFT-NEXT: name {{.*}} baseInstanceMethod +YES_MERGE_INTO_BASE_SWIFT-NEXT: types {{.*}} v16@0:8 +YES_MERGE_INTO_BASE_SWIFT-NEXT: imp -[MyBaseClass baseInstanceMethod] + + +#--- a64_fakedylib.s + + .section __DATA,__objc_data + .globl _OBJC_CLASS_$_MyBaseClass +_OBJC_CLASS_$_MyBaseClass: + .quad 0 + +#--- merge_cat_minimal.s + +; ================== Generated from ObjC: ================== +; __attribute__((objc_root_class)) +; @interface MyBaseClass +; - (void)baseInstanceMethod; +; @end +; +; @interface MyBaseClass(Category01) +; - (void)cat01_InstanceMethod; +; @end +; +; @implementation MyBaseClass(Category01) +; - (void)cat01_InstanceMethod {} +; @end +; +; @interface MyBaseClass(Category02) +; - (void)cat02_InstanceMethod; +; @end +; +; @implementation MyBaseClass(Category02) +; - (void)cat02_InstanceMethod {} +; @end +; ================== Generated from ObjC: ================== + + .section __TEXT,__text,regular,pure_instructions + .p2align 2 ; -- Begin function -[MyBaseClass(Category01) cat01_InstanceMethod] +"-[MyBaseClass(Category01) cat01_InstanceMethod]": ; @"\01-[MyBaseClass(Category01) cat01_InstanceMethod]" + .cfi_startproc + ret + .cfi_endproc + ; -- End function + .p2align 2 ; -- Begin function -[MyBaseClass(Category02) cat02_InstanceMethod] +"-[MyBaseClass(Category02) cat02_InstanceMethod]": ; @"\01-[MyBaseClass(Category02) cat02_InstanceMethod]" + .cfi_startproc + ret + .cfi_endproc + ; -- End function + .section __TEXT,__objc_classname,cstring_literals +l_OBJC_CLASS_NAME_: ; @OBJC_CLASS_NAME_ + .asciz "Category01" + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_: ; @OBJC_METH_VAR_NAME_ + .asciz "cat01_InstanceMethod" + .section __TEXT,__objc_methtype,cstring_literals +l_OBJC_METH_VAR_TYPE_: ; @OBJC_METH_VAR_TYPE_ + .asciz "v16@0:8" + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_CATEGORY_INSTANCE_METHODS_MyBaseClass_$_Category01" +__OBJC_$_CATEGORY_INSTANCE_METHODS_MyBaseClass_$_Category01: + .long 24 ; 0x18 + .long 1 ; 0x1 + .quad l_OBJC_METH_VAR_NAME_ + .quad l_OBJC_METH_VAR_TYPE_ + .quad "-[MyBaseClass(Category01) cat01_InstanceMethod]" + .p2align 3, 0x0 ; @"_OBJC_$_CATEGORY_MyBaseClass_$_Category01" +__OBJC_$_CATEGORY_MyBaseClass_$_Category01: + .quad l_OBJC_CLASS_NAME_ + .quad _OBJC_CLASS_$_MyBaseClass + .quad __OBJC_$_CATEGORY_INSTANCE_METHODS_MyBaseClass_$_Category01 + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .long 64 ; 0x40 + .space 4 + .section __DATA,__objc_const +l_OBJC_CLASS_NAME_.1: ; @OBJC_CLASS_NAME_.1 + .asciz "Category02" + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_.2: ; @OBJC_METH_VAR_NAME_.2 + .asciz "cat02_InstanceMethod" + .section __DATA,__objc_const + .p2align 3, 0x0 ; @"_OBJC_$_CATEGORY_INSTANCE_METHODS_MyBaseClass_$_Category02" +__OBJC_$_CATEGORY_INSTANCE_METHODS_MyBaseClass_$_Category02: + .long 24 ; 0x18 + .long 1 ; 0x1 + .quad l_OBJC_METH_VAR_NAME_.2 + .quad l_OBJC_METH_VAR_TYPE_ + .quad "-[MyBaseClass(Category02) cat02_InstanceMethod]" + .p2align 3, 0x0 ; @"_OBJC_$_CATEGORY_MyBaseClass_$_Category02" +__OBJC_$_CATEGORY_MyBaseClass_$_Category02: + .quad l_OBJC_CLASS_NAME_.1 + .quad _OBJC_CLASS_$_MyBaseClass + .quad __OBJC_$_CATEGORY_INSTANCE_METHODS_MyBaseClass_$_Category02 + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .long 64 ; 0x40 + .space 4 + .section __DATA,__objc_catlist,regular,no_dead_strip + .p2align 3, 0x0 ; @"OBJC_LABEL_CATEGORY_$" +l_OBJC_LABEL_CATEGORY_$: + .quad __OBJC_$_CATEGORY_MyBaseClass_$_Category01 + .quad __OBJC_$_CATEGORY_MyBaseClass_$_Category02 + .section __DATA,__objc_imageinfo,regular,no_dead_strip +L_OBJC_IMAGE_INFO: + .long 0 + .long 96 +.subsections_via_symbols + +.addrsig +.addrsig_sym __OBJC_$_CATEGORY_MyBaseClass_$_Category01 + +#--- merge_base_class_minimal.s +; clang -c merge_base_class_minimal.mm -O3 -target arm64-apple-macos -arch arm64 -S -o merge_base_class_minimal.s +; ================== Generated from ObjC: ================== +; __attribute__((objc_root_class)) +; @interface MyBaseClass +; - (void)baseInstanceMethod; +; @end +; +; @implementation MyBaseClass +; - (void)baseInstanceMethod {} +; @end +; ================== Generated from ObjC ================== + .section __TEXT,__text,regular,pure_instructions + .build_version macos, 11, 0 + .p2align 2 +"-[MyBaseClass baseInstanceMethod]": + .cfi_startproc +; %bb.0: + ret + .cfi_endproc + .section __DATA,__objc_data + .globl _OBJC_CLASS_$_MyBaseClass + .p2align 3, 0x0 +_OBJC_CLASS_$_MyBaseClass: + .quad _OBJC_METACLASS_$_MyBaseClass + .quad 0 + .quad 0 + .quad 0 + .quad __OBJC_CLASS_RO_$_MyBaseClass + .globl _OBJC_METACLASS_$_MyBaseClass + .p2align 3, 0x0 +_OBJC_METACLASS_$_MyBaseClass: + .quad _OBJC_METACLASS_$_MyBaseClass + .quad _OBJC_CLASS_$_MyBaseClass + .quad 0 + .quad 0 + .quad __OBJC_METACLASS_RO_$_MyBaseClass + .section __TEXT,__objc_classname,cstring_literals +l_OBJC_CLASS_NAME_: + .asciz "MyBaseClass" + .section __DATA,__objc_const + .p2align 3, 0x0 +__OBJC_METACLASS_RO_$_MyBaseClass: + .long 3 + .long 40 + .long 40 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_: + .asciz "baseInstanceMethod" + .section __TEXT,__objc_methtype,cstring_literals +l_OBJC_METH_VAR_TYPE_: + .asciz "v16@0:8" + .section __DATA,__objc_const + .p2align 3, 0x0 +__OBJC_$_INSTANCE_METHODS_MyBaseClass: + .long 24 + .long 1 + .quad l_OBJC_METH_VAR_NAME_ + .quad l_OBJC_METH_VAR_TYPE_ + .quad "-[MyBaseClass baseInstanceMethod]" + .p2align 3, 0x0 +__OBJC_CLASS_RO_$_MyBaseClass: + .long 2 + .long 0 + .long 0 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad __OBJC_$_INSTANCE_METHODS_MyBaseClass + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .section __DATA,__objc_classlist,regular,no_dead_strip + .p2align 3, 0x0 +l_OBJC_LABEL_CLASS_$: + .quad _OBJC_CLASS_$_MyBaseClass + .section __DATA,__objc_imageinfo,regular,no_dead_strip +L_OBJC_IMAGE_INFO: + .long 0 + .long 64 +.subsections_via_symbols + + +#--- MyBaseClassSwiftExtension.s +; xcrun -sdk macosx swiftc -emit-assembly MyBaseClassSwiftExtension.swift -import-objc-header YourProject-Bridging-Header.h -o MyBaseClassSwiftExtension.s +; ================== Generated from Swift: ================== +; import Foundation +; extension MyBaseClass { +; @objc func swiftMethod() { +; } +; } +; ================== Generated from Swift =================== + .private_extern _$sSo11MyBaseClassC0abC14SwiftExtensionE11swiftMethodyyF + .globl _$sSo11MyBaseClassC0abC14SwiftExtensionE11swiftMethodyyF + .p2align 2 +_$sSo11MyBaseClassC0abC14SwiftExtensionE11swiftMethodyyF: + .cfi_startproc + mov w0, #0 + ret + .cfi_endproc + + .p2align 2 +_$sSo11MyBaseClassC0abC14SwiftExtensionE11swiftMethodyyFTo: + .cfi_startproc + mov w0, #0 + ret + .cfi_endproc + + .section __TEXT,__cstring,cstring_literals + .p2align 4, 0x0 +l_.str.25.MyBaseClassSwiftExtension: + .asciz "MyBaseClassSwiftExtension" + + .section __TEXT,__objc_methname,cstring_literals +"L_selector_data(swiftMethod)": + .asciz "swiftMethod" + + .section __TEXT,__cstring,cstring_literals +"l_.str.7.v16@0:8": + .asciz "v16@0:8" + + .section __DATA,__objc_data + .p2align 3, 0x0 +__CATEGORY_INSTANCE_METHODS_MyBaseClass_$_MyBaseClassSwiftExtension: + .long 24 + .long 1 + .quad "L_selector_data(swiftMethod)" + .quad "l_.str.7.v16@0:8" + .quad _$sSo11MyBaseClassC0abC14SwiftExtensionE11swiftMethodyyFTo + + .section __DATA,__objc_const + .p2align 3, 0x0 +__CATEGORY_MyBaseClass_$_MyBaseClassSwiftExtension: + .quad l_.str.25.MyBaseClassSwiftExtension + .quad _OBJC_CLASS_$_MyBaseClass + .quad __CATEGORY_INSTANCE_METHODS_MyBaseClass_$_MyBaseClassSwiftExtension + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .long 60 + .space 4 + + .section __DATA,__objc_catlist,regular,no_dead_strip + .p2align 3, 0x0 +_objc_categories: + .quad __CATEGORY_MyBaseClass_$_MyBaseClassSwiftExtension + + .no_dead_strip _main + .no_dead_strip l_entry_point + +.subsections_via_symbols diff --git a/wild/tests/lld-macho/objc-category-merging-swift-class-ext.s b/wild/tests/lld-macho/objc-category-merging-swift-class-ext.s new file mode 100644 index 000000000..f6461e13a --- /dev/null +++ b/wild/tests/lld-macho/objc-category-merging-swift-class-ext.s @@ -0,0 +1,442 @@ +# REQUIRES: aarch64 +# RUN: rm -rf %t; mkdir %t && cd %t + +############ Test swift category merging into @objc class, with protocol ############ +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o cat_swift.o %s +# RUN: %lld -arch arm64 -dylib -o cat_swift.dylib cat_swift.o -objc_category_merging +# RUN: llvm-objdump --objc-meta-data --macho cat_swift.dylib | FileCheck %s --check-prefixes=CHECK-MERGE + + +; CHECK-MERGE: Contents of (__DATA_CONST,__objc_classlist) section + +; CHECK-MERGE-NEXT: [[#%x,]] 0x[[#%x,]] _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass +; CHECK-MERGE-NEXT: isa 0x[[#%x,]] _OBJC_METACLASS_$__TtC11MyTestClass11MyTestClass +; CHECK-MERGE-NEXT: superclass 0x0 +; CHECK-MERGE-NEXT: cache 0x0 +; CHECK-MERGE-NEXT: vtable 0x0 +; CHECK-MERGE-NEXT: data 0x[[#%x,]] (struct class_ro_t *) Swift class +; CHECK-MERGE-NEXT: flags 0x80 +; CHECK-MERGE-NEXT: instanceStart 8 +; CHECK-MERGE-NEXT: instanceSize 8 +; CHECK-MERGE-NEXT: reserved 0x0 +; CHECK-MERGE-NEXT: ivarLayout 0x0 +; CHECK-MERGE-NEXT: name 0x[[#%x,]] _TtC11MyTestClass11MyTestClass +; CHECK-MERGE-NEXT: baseMethods 0x[[#%x,]] (struct method_list_t *) +; CHECK-MERGE-NEXT: entsize 24 +; CHECK-MERGE-NEXT: count 1 +; CHECK-MERGE-NEXT: name 0x[[#%x,]] init +; CHECK-MERGE-NEXT: types 0x[[#%x,]] @16@0:8 +; CHECK-MERGE-NEXT: imp _$s11MyTestClassAACABycfcTo +; CHECK-MERGE-NEXT: baseProtocols 0x0 +; CHECK-MERGE-NEXT: ivars 0x0 +; CHECK-MERGE-NEXT: weakIvarLayout 0x0 +; CHECK-MERGE-NEXT: baseProperties 0x0 +; CHECK-MERGE-NEXT: Meta Class +; CHECK-MERGE-NEXT: isa 0x0 +; CHECK-MERGE-NEXT: superclass 0x0 +; CHECK-MERGE-NEXT: cache 0x0 +; CHECK-MERGE-NEXT: vtable 0x0 +; CHECK-MERGE-NEXT: data 0x[[#%x,]] (struct class_ro_t *) +; CHECK-MERGE-NEXT: flags 0x81 RO_META +; CHECK-MERGE-NEXT: instanceStart 40 +; CHECK-MERGE-NEXT: instanceSize 40 +; CHECK-MERGE-NEXT: reserved 0x0 +; CHECK-MERGE-NEXT: ivarLayout 0x0 +; CHECK-MERGE-NEXT: name 0x[[#%x,]] _TtC11MyTestClass11MyTestClass +; CHECK-MERGE-NEXT: baseMethods 0x0 (struct method_list_t *) +; CHECK-MERGE-NEXT: baseProtocols 0x0 +; CHECK-MERGE-NEXT: ivars 0x0 +; CHECK-MERGE-NEXT: weakIvarLayout 0x0 +; CHECK-MERGE-NEXT: baseProperties 0x0 + + +; ================== Generated from Swift: ================== +;; > xcrun swiftc --version +;; swift-driver version: 1.109.2 Apple Swift version 6.0 (swiftlang-6.0.0.3.300 clang-1600.0.20.10) +;; > xcrun swiftc -S MyTestClass.swift -o MyTestClass.s +;; +; import Foundation +; +; protocol MyProtocol { +; func protocolMethod() +; } +; +; @objc class MyTestClass: NSObject, MyProtocol { +; func protocolMethod() { +; } +; } +; +; extension MyTestClass { +; public func extensionMethod() { +; } +; } +; ================== Generated from Swift: ================== + + + .section __TEXT,__text,regular,pure_instructions + .build_version macos, 11, 0 sdk_version 10, 0 + .globl _main + .p2align 2 +_main: + .cfi_startproc + mov w0, #0 + ret + .cfi_endproc + + .private_extern _$s11MyTestClassAAC14protocolMethodyyF + .globl _$s11MyTestClassAAC14protocolMethodyyF + .p2align 2 +_$s11MyTestClassAAC14protocolMethodyyF: + .cfi_startproc + ret + .cfi_endproc + + .private_extern _$s11MyTestClassAACABycfC + .globl _$s11MyTestClassAACABycfC + .p2align 2 +_$s11MyTestClassAACABycfC: + .cfi_startproc + ret + .cfi_endproc + + .private_extern _$s11MyTestClassAACABycfc + .globl _$s11MyTestClassAACABycfc + .p2align 2 +_$s11MyTestClassAACABycfc: + .cfi_startproc + ret + .cfi_endproc + + .private_extern _$s11MyTestClassAACMa + .globl _$s11MyTestClassAACMa + .p2align 2 +_$s11MyTestClassAACMa: + ret + + .p2align 2 +_$s11MyTestClassAACABycfcTo: + .cfi_startproc + ret + .cfi_endproc + + .private_extern _$s11MyTestClassAACfD + .globl _$s11MyTestClassAACfD + .p2align 2 +_$s11MyTestClassAACfD: + .cfi_startproc + ret + .cfi_endproc + + .p2align 2 +_$s11MyTestClassAACAA0A8ProtocolA2aCP14protocolMethodyyFTW: + .cfi_startproc + ret + .cfi_endproc + + .private_extern _$s11MyTestClassAAC15extensionMethodyyF + .globl _$s11MyTestClassAAC15extensionMethodyyF + .p2align 2 +_$s11MyTestClassAAC15extensionMethodyyF: + .cfi_startproc + ret + .cfi_endproc + + .section __TEXT,__objc_methname,cstring_literals +"L_selector_data(init)": + .asciz "init" + + .section __DATA,__objc_selrefs,literal_pointers,no_dead_strip + .p2align 3, 0x0 +"L_selector(init)": + .quad "L_selector_data(init)" + + .section __TEXT,__objc_methname,cstring_literals +"L_selector_data(dealloc)": + .asciz "dealloc" + + .section __DATA,__objc_selrefs,literal_pointers,no_dead_strip + .p2align 3, 0x0 +"L_selector(dealloc)": + .quad "L_selector_data(dealloc)" + + .private_extern _$s11MyTestClassAACAA0A8ProtocolAAMc + .section __TEXT,__const + .globl _$s11MyTestClassAACAA0A8ProtocolAAMc + .p2align 2, 0x0 +_$s11MyTestClassAACAA0A8ProtocolAAMc: + .long _$s11MyTestClass0A8ProtocolMp-_$s11MyTestClassAACAA0A8ProtocolAAMc + .long (_$s11MyTestClassAACMn-_$s11MyTestClassAACAA0A8ProtocolAAMc)-4 + .long (_$s11MyTestClassAACAA0A8ProtocolAAWP-_$s11MyTestClassAACAA0A8ProtocolAAMc)-8 + .long 0 + + .private_extern _$s11MyTestClassAACAA0A8ProtocolAAWP + .section __DATA,__const + .globl _$s11MyTestClassAACAA0A8ProtocolAAWP + .p2align 3, 0x0 +_$s11MyTestClassAACAA0A8ProtocolAAWP: + .quad _$s11MyTestClassAACAA0A8ProtocolAAMc + .quad _$s11MyTestClassAACAA0A8ProtocolA2aCP14protocolMethodyyFTW + + .section __TEXT,__swift5_entry,regular,no_dead_strip + .p2align 2, 0x0 +l_entry_point: + .long _main-l_entry_point + .long 0 + + .private_extern "_symbolic $s11MyTestClass0A8ProtocolP" + .section __TEXT,__swift5_typeref + .globl "_symbolic $s11MyTestClass0A8ProtocolP" + .weak_definition "_symbolic $s11MyTestClass0A8ProtocolP" + .p2align 1, 0x0 +"_symbolic $s11MyTestClass0A8ProtocolP": + .ascii "$s11MyTestClass0A8ProtocolP" + .byte 0 + + .section __TEXT,__swift5_fieldmd + .p2align 2, 0x0 +_$s11MyTestClass0A8Protocol_pMF: + .long "_symbolic $s11MyTestClass0A8ProtocolP"-_$s11MyTestClass0A8Protocol_pMF + .long 0 + .short 4 + .short 12 + .long 0 + + .section __TEXT,__const +l_.str.11.MyTestClass: + .asciz "MyTestClass" + + .private_extern _$s11MyTestClassMXM + .section __TEXT,__constg_swiftt + .globl _$s11MyTestClassMXM + .weak_definition _$s11MyTestClassMXM + .p2align 2, 0x0 +_$s11MyTestClassMXM: + .long 0 + .long 0 + .long (l_.str.11.MyTestClass-_$s11MyTestClassMXM)-8 + + .section __TEXT,__const +l_.str.10.MyProtocol: + .asciz "MyProtocol" + + .private_extern _$s11MyTestClass0A8ProtocolMp + .section __TEXT,__constg_swiftt + .globl _$s11MyTestClass0A8ProtocolMp + .p2align 2, 0x0 +_$s11MyTestClass0A8ProtocolMp: + .long 65603 + .long (_$s11MyTestClassMXM-_$s11MyTestClass0A8ProtocolMp)-4 + .long (l_.str.10.MyProtocol-_$s11MyTestClass0A8ProtocolMp)-8 + .long 0 + .long 1 + .long 0 + .long 17 + .long 0 + + .private_extern _OBJC_METACLASS_$__TtC11MyTestClass11MyTestClass + .section __DATA,__data + .globl _OBJC_METACLASS_$__TtC11MyTestClass11MyTestClass + .p2align 3, 0x0 +_OBJC_METACLASS_$__TtC11MyTestClass11MyTestClass: + .quad _OBJC_METACLASS_$_NSObject + .quad _OBJC_METACLASS_$_NSObject + .quad __objc_empty_cache + .quad 0 + .quad __METACLASS_DATA__TtC11MyTestClass11MyTestClass + + .section __TEXT,__cstring,cstring_literals + .p2align 4, 0x0 +l_.str.30._TtC11MyTestClass11MyTestClass: + .asciz "_TtC11MyTestClass11MyTestClass" + + .section __DATA,__objc_const + .p2align 3, 0x0 +__METACLASS_DATA__TtC11MyTestClass11MyTestClass: + .long 129 + .long 40 + .long 40 + .long 0 + .quad 0 + .quad l_.str.30._TtC11MyTestClass11MyTestClass + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .quad 0 + + .section __TEXT,__cstring,cstring_literals +"l_.str.7.@16@0:8": + .asciz "@16@0:8" + + .section __DATA,__objc_data + .p2align 3, 0x0 +__INSTANCE_METHODS__TtC11MyTestClass11MyTestClass: + .long 24 + .long 1 + .quad "L_selector_data(init)" + .quad "l_.str.7.@16@0:8" + .quad _$s11MyTestClassAACABycfcTo + + .p2align 3, 0x0 +__DATA__TtC11MyTestClass11MyTestClass: + .long 128 + .long 8 + .long 8 + .long 0 + .quad 0 + .quad l_.str.30._TtC11MyTestClass11MyTestClass + .quad __INSTANCE_METHODS__TtC11MyTestClass11MyTestClass + .quad 0 + .quad 0 + .quad 0 + .quad 0 + + .private_extern "_symbolic So8NSObjectC" + .section __TEXT,__swift5_typeref + .globl "_symbolic So8NSObjectC" + .weak_definition "_symbolic So8NSObjectC" + .p2align 1, 0x0 +"_symbolic So8NSObjectC": + .ascii "So8NSObjectC" + .byte 0 + + .private_extern _$s11MyTestClassAACMn + .section __TEXT,__constg_swiftt + .globl _$s11MyTestClassAACMn + .p2align 2, 0x0 +_$s11MyTestClassAACMn: + .long 2147483728 + .long (_$s11MyTestClassMXM-_$s11MyTestClassAACMn)-4 + .long (l_.str.11.MyTestClass-_$s11MyTestClassAACMn)-8 + .long (_$s11MyTestClassAACMa-_$s11MyTestClassAACMn)-12 + .long (_$s11MyTestClassAACMF-_$s11MyTestClassAACMn)-16 + .long ("_symbolic So8NSObjectC"-_$s11MyTestClassAACMn)-20 + .long 3 + .long 11 + .long 1 + .long 0 + .long 10 + .long 10 + .long 1 + .long 16 + .long (_$s11MyTestClassAAC14protocolMethodyyF-_$s11MyTestClassAACMn)-56 + + .section __DATA,__objc_data + .p2align 3, 0x0 +_$s11MyTestClassAACMf: + .quad 0 + .quad _$s11MyTestClassAACfD + .quad _$sBOWV + .quad _OBJC_METACLASS_$__TtC11MyTestClass11MyTestClass + .quad _OBJC_CLASS_$_NSObject + .quad __objc_empty_cache + .quad 0 + .quad __DATA__TtC11MyTestClass11MyTestClass+2 + .long 0 + .long 0 + .long 8 + .short 7 + .short 0 + .long 112 + .long 24 + .quad _$s11MyTestClassAACMn + .quad 0 + .quad _$s11MyTestClassAAC14protocolMethodyyF + + .private_extern "_symbolic _____ 11MyTestClassAAC" + .section __TEXT,__swift5_typeref + .globl "_symbolic _____ 11MyTestClassAAC" + .weak_definition "_symbolic _____ 11MyTestClassAAC" + .p2align 1, 0x0 +"_symbolic _____ 11MyTestClassAAC": + .byte 1 + .long (_$s11MyTestClassAACMn-"_symbolic _____ 11MyTestClassAAC")-1 + .byte 0 + + .section __TEXT,__swift5_fieldmd + .p2align 2, 0x0 +_$s11MyTestClassAACMF: + .long "_symbolic _____ 11MyTestClassAAC"-_$s11MyTestClassAACMF + .long ("_symbolic So8NSObjectC"-_$s11MyTestClassAACMF)-4 + .short 7 + .short 12 + .long 0 + + .section __TEXT,__swift5_protos + .p2align 2, 0x0 +l_$s11MyTestClass0A8ProtocolHr: + .long _$s11MyTestClass0A8ProtocolMp-l_$s11MyTestClass0A8ProtocolHr + + .section __TEXT,__swift5_proto + .p2align 2, 0x0 +l_$s11MyTestClassAACAA0A8ProtocolAAHc: + .long _$s11MyTestClassAACAA0A8ProtocolAAMc-l_$s11MyTestClassAACAA0A8ProtocolAAHc + + .section __TEXT,__swift5_types + .p2align 2, 0x0 +l_$s11MyTestClassAACHn: + .long _$s11MyTestClassAACMn-l_$s11MyTestClassAACHn + + .private_extern ___swift_reflection_version + .section __TEXT,__const + .globl ___swift_reflection_version + .weak_definition ___swift_reflection_version + .p2align 1, 0x0 +___swift_reflection_version: + .short 3 + + .section __DATA,__objc_classlist,regular,no_dead_strip + .p2align 3, 0x0 +_objc_classes_$s11MyTestClassAACN: + .quad _$s11MyTestClassAACN + + .no_dead_strip _main + .no_dead_strip l_entry_point + .no_dead_strip _$s11MyTestClass0A8Protocol_pMF + .no_dead_strip _$s11MyTestClassAACMF + .no_dead_strip __swift_FORCE_LOAD_$_swiftFoundation_$_MyTestClass + .no_dead_strip __swift_FORCE_LOAD_$_swiftDarwin_$_MyTestClass + .no_dead_strip __swift_FORCE_LOAD_$_swiftObjectiveC_$_MyTestClass + .no_dead_strip __swift_FORCE_LOAD_$_swiftCoreFoundation_$_MyTestClass + .no_dead_strip __swift_FORCE_LOAD_$_swiftDispatch_$_MyTestClass + .no_dead_strip __swift_FORCE_LOAD_$_swiftXPC_$_MyTestClass + .no_dead_strip __swift_FORCE_LOAD_$_swiftIOKit_$_MyTestClass + .no_dead_strip l_$s11MyTestClass0A8ProtocolHr + .no_dead_strip l_$s11MyTestClassAACAA0A8ProtocolAAHc + .no_dead_strip l_$s11MyTestClassAACHn + .no_dead_strip ___swift_reflection_version + .no_dead_strip _objc_classes_$s11MyTestClassAACN + .section __DATA,__objc_imageinfo,regular,no_dead_strip +L_OBJC_IMAGE_INFO: + .long 0 + .long 100665152 + + .globl _$s11MyTestClass0A8ProtocolTL + .private_extern _$s11MyTestClass0A8ProtocolTL + .alt_entry _$s11MyTestClass0A8ProtocolTL +.set _$s11MyTestClass0A8ProtocolTL, (_$s11MyTestClass0A8ProtocolMp+24)-8 + .globl _$s11MyTestClassAAC14protocolMethodyyFTq + .private_extern _$s11MyTestClassAAC14protocolMethodyyFTq + .alt_entry _$s11MyTestClassAAC14protocolMethodyyFTq +.set _$s11MyTestClassAAC14protocolMethodyyFTq, _$s11MyTestClassAACMn+52 + .globl _$s11MyTestClassAACN + .private_extern _$s11MyTestClassAACN + .alt_entry _$s11MyTestClassAACN +.set _$s11MyTestClassAACN, _$s11MyTestClassAACMf+24 + .globl _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass + .private_extern _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass +.set _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass, _$s11MyTestClassAACN + .weak_reference __swift_FORCE_LOAD_$_swiftFoundation + .weak_reference __swift_FORCE_LOAD_$_swiftDarwin + .weak_reference __swift_FORCE_LOAD_$_swiftObjectiveC + .weak_reference __swift_FORCE_LOAD_$_swiftCoreFoundation + .weak_reference __swift_FORCE_LOAD_$_swiftDispatch + .weak_reference __swift_FORCE_LOAD_$_swiftXPC + .weak_reference __swift_FORCE_LOAD_$_swiftIOKit +.subsections_via_symbols + +_OBJC_CLASS_$_NSObject: +_OBJC_METACLASS_$_NSObject: +__objc_empty_cache: +_$sBOWV: + .quad 0 diff --git a/wild/tests/lld-macho/objc-category-merging-swift.s b/wild/tests/lld-macho/objc-category-merging-swift.s new file mode 100644 index 000000000..7a084d931 --- /dev/null +++ b/wild/tests/lld-macho/objc-category-merging-swift.s @@ -0,0 +1,410 @@ +# REQUIRES: aarch64 +# RUN: rm -rf %t; mkdir %t && cd %t + +############ Test merging multiple categories into a single category ############ +## Apply category merging to swiftc code just make sure we can handle addends +## and don't erase category names for swift -- in order to not crash +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o cat_swift.o %s +# RUN: %lld -arch arm64 -dylib -o cat_swift.dylib cat_swift.o -objc_category_merging -no_objc_relative_method_lists +# RUN: llvm-objdump --objc-meta-data --macho cat_swift.dylib | FileCheck %s --check-prefixes=CHECK-MERGE + +; CHECK-MERGE: Contents of (__DATA_CONST,__objc_classlist) section +; CHECK-MERGE-NEXT: _$s11SimpleClassAACN +; CHECK-MERGE-NEXT: isa {{.+}} _OBJC_METACLASS_$__TtC11SimpleClass11SimpleClass +; CHECK-MERGE-NEXT: superclass 0x0 +; CHECK-MERGE-NEXT: cache 0x0 +; CHECK-MERGE-NEXT: vtable 0x0 +; CHECK-MERGE-NEXT: data {{.+}} (struct class_ro_t *) Swift class +; CHECK-MERGE-NEXT: flags 0x80 +; CHECK-MERGE-NEXT: instanceStart 8 +; CHECK-MERGE-NEXT: instanceSize 8 +; CHECK-MERGE-NEXT: reserved 0x0 +; CHECK-MERGE-NEXT: ivarLayout 0x0 +; CHECK-MERGE-NEXT: name {{.+}} _TtC11SimpleClass11SimpleClass +; CHECK-MERGE-NEXT: baseMethods {{.+}} (struct method_list_t *) +; CHECK-MERGE-NEXT: entsize 24 +; CHECK-MERGE-NEXT: count 3 +; CHECK-MERGE-NEXT: name {{.+}} categoryInstanceMethod +; CHECK-MERGE-NEXT: types {{.+}} q16@0:8 +; CHECK-MERGE-NEXT: imp _$s11SimpleClassAAC22categoryInstanceMethodSiyFTo +; CHECK-MERGE-NEXT: name {{.+}} baseClassInstanceMethod +; CHECK-MERGE-NEXT: types {{.+}} i16@0:8 +; CHECK-MERGE-NEXT: imp _$s11SimpleClassAAC04baseB14InstanceMethods5Int32VyFTo +; CHECK-MERGE-NEXT: name {{.+}} init +; CHECK-MERGE-NEXT: types {{.+}} @16@0:8 +; CHECK-MERGE-NEXT: imp _$s11SimpleClassAACABycfcTo +; CHECK-MERGE-NEXT: baseProtocols 0x0 +; CHECK-MERGE-NEXT: ivars 0x0 +; CHECK-MERGE-NEXT: weakIvarLayout 0x0 +; CHECK-MERGE-NEXT: baseProperties 0x0 +; CHECK-MERGE-NEXT: Meta Class +; CHECK-MERGE-NEXT: isa 0x0 +; CHECK-MERGE-NEXT: superclass 0x0 +; CHECK-MERGE-NEXT: cache 0x0 +; CHECK-MERGE-NEXT: vtable 0x0 +; CHECK-MERGE-NEXT: data {{.+}} (struct class_ro_t *) +; CHECK-MERGE-NEXT: flags 0x81 RO_META +; CHECK-MERGE-NEXT: instanceStart 40 +; CHECK-MERGE-NEXT: instanceSize 40 +; CHECK-MERGE-NEXT: reserved 0x0 +; CHECK-MERGE-NEXT: ivarLayout 0x0 +; CHECK-MERGE-NEXT: name {{.+}} _TtC11SimpleClass11SimpleClass +; CHECK-MERGE-NEXT: baseMethods 0x0 (struct method_list_t *) +; CHECK-MERGE-NEXT: baseProtocols 0x0 +; CHECK-MERGE-NEXT: ivars 0x0 +; CHECK-MERGE-NEXT: weakIvarLayout 0x0 +; CHECK-MERGE-NEXT: baseProperties 0x0 +; CHECK-MERGE-NEXT: Contents of (__DATA_CONST,__objc_imageinfo) section +; CHECK-MERGE-NEXT: version 0 +; CHECK-MERGE-NEXT: flags 0x740 OBJC_IMAGE_HAS_CATEGORY_CLASS_PROPERTIES Swift 5 or later + +; ================== Generated from Swift: ================== +;; > xcrun swiftc --version +;; swift-driver version: 1.109.2 Apple Swift version 6.0 (swiftlang-6.0.0.3.300 clang-1600.0.20.10) +;; > xcrun swiftc -S SimpleClass.swift -o SimpleClass.s +; import Foundation +; @objc class SimpleClass: NSObject { +; @objc func baseClassInstanceMethod() -> Int32 { +; return 2 +; } +; } +; extension SimpleClass { +; @objc func categoryInstanceMethod() -> Int { +; return 3 +; } +; } + +; ================== Generated from Swift: ================== + .section __TEXT,__text,regular,pure_instructions + .build_version macos, 11, 0 sdk_version 12, 0 + .globl _main + .p2align 2 +_main: + .cfi_startproc + mov w0, #0 + ret + .cfi_endproc + + .private_extern _$s11SimpleClassAAC04baseB14InstanceMethods5Int32VyF + .globl _$s11SimpleClassAAC04baseB14InstanceMethods5Int32VyF + .p2align 2 +_$s11SimpleClassAAC04baseB14InstanceMethods5Int32VyF: + .cfi_startproc + ret + .cfi_endproc + + .p2align 2 +_$s11SimpleClassAAC04baseB14InstanceMethods5Int32VyFTo: + .cfi_startproc + ret + .cfi_endproc + + .private_extern _$s11SimpleClassAACABycfC + .globl _$s11SimpleClassAACABycfC + .p2align 2 +_$s11SimpleClassAACABycfC: + .cfi_startproc + ret + .cfi_endproc + + .private_extern _$s11SimpleClassAACABycfc + .globl _$s11SimpleClassAACABycfc + .p2align 2 +_$s11SimpleClassAACABycfc: + .cfi_startproc + ret + .cfi_endproc + + .private_extern _$s11SimpleClassAACMa + .globl _$s11SimpleClassAACMa + .p2align 2 +_$s11SimpleClassAACMa: + ret + + .p2align 2 +_$s11SimpleClassAACABycfcTo: + .cfi_startproc + ret + .cfi_endproc + + .private_extern _$s11SimpleClassAACfD + .globl _$s11SimpleClassAACfD + .p2align 2 +_$s11SimpleClassAACfD: + .cfi_startproc + ret + .cfi_endproc + + .private_extern _$s11SimpleClassAAC22categoryInstanceMethodSiyF + .globl _$s11SimpleClassAAC22categoryInstanceMethodSiyF + .p2align 2 +_$s11SimpleClassAAC22categoryInstanceMethodSiyF: + .cfi_startproc + ret + .cfi_endproc + + .p2align 2 +_$s11SimpleClassAAC22categoryInstanceMethodSiyFTo: + .cfi_startproc + ret + .cfi_endproc + + .section __TEXT,__objc_methname,cstring_literals +"L_selector_data(init)": + .asciz "init" + + .section __DATA,__objc_selrefs,literal_pointers,no_dead_strip + .p2align 3, 0x0 +"L_selector(init)": + .quad "L_selector_data(init)" + + .section __TEXT,__objc_methname,cstring_literals +"L_selector_data(dealloc)": + .asciz "dealloc" + + .section __DATA,__objc_selrefs,literal_pointers,no_dead_strip + .p2align 3, 0x0 +"L_selector(dealloc)": + .quad "L_selector_data(dealloc)" + + .section __TEXT,__swift5_entry,regular,no_dead_strip + .p2align 2, 0x0 +l_entry_point: + .long _main-l_entry_point + .long 0 + + .private_extern _OBJC_METACLASS_$__TtC11SimpleClass11SimpleClass + .section __DATA,__data + .globl _OBJC_METACLASS_$__TtC11SimpleClass11SimpleClass + .p2align 3, 0x0 +_OBJC_METACLASS_$__TtC11SimpleClass11SimpleClass: + .quad _OBJC_METACLASS_$_NSObject + .quad _OBJC_METACLASS_$_NSObject + .quad __objc_empty_cache + .quad 0 + .quad __METACLASS_DATA__TtC11SimpleClass11SimpleClass + + .section __TEXT,__cstring,cstring_literals + .p2align 4, 0x0 +l_.str.30._TtC11SimpleClass11SimpleClass: + .asciz "_TtC11SimpleClass11SimpleClass" + + .section __DATA,__objc_const + .p2align 3, 0x0 +__METACLASS_DATA__TtC11SimpleClass11SimpleClass: + .long 129 + .long 40 + .long 40 + .long 0 + .quad 0 + .quad l_.str.30._TtC11SimpleClass11SimpleClass + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .quad 0 + + .section __TEXT,__objc_methname,cstring_literals +"L_selector_data(baseClassInstanceMethod)": + .asciz "baseClassInstanceMethod" + + .section __TEXT,__cstring,cstring_literals +"l_.str.7.i16@0:8": + .asciz "i16@0:8" + +"l_.str.7.@16@0:8": + .asciz "@16@0:8" + + .section __DATA,__objc_data + .p2align 3, 0x0 +__INSTANCE_METHODS__TtC11SimpleClass11SimpleClass: + .long 24 + .long 2 + .quad "L_selector_data(baseClassInstanceMethod)" + .quad "l_.str.7.i16@0:8" + .quad _$s11SimpleClassAAC04baseB14InstanceMethods5Int32VyFTo + .quad "L_selector_data(init)" + .quad "l_.str.7.@16@0:8" + .quad _$s11SimpleClassAACABycfcTo + + .p2align 3, 0x0 +__DATA__TtC11SimpleClass11SimpleClass: + .long 128 + .long 8 + .long 8 + .long 0 + .quad 0 + .quad l_.str.30._TtC11SimpleClass11SimpleClass + .quad __INSTANCE_METHODS__TtC11SimpleClass11SimpleClass + .quad 0 + .quad 0 + .quad 0 + .quad 0 + + .section __TEXT,__const +l_.str.11.SimpleClass: + .asciz "SimpleClass" + + .private_extern _$s11SimpleClassMXM + .section __TEXT,__constg_swiftt + .globl _$s11SimpleClassMXM + .weak_definition _$s11SimpleClassMXM + .p2align 2, 0x0 +_$s11SimpleClassMXM: + .long 0 + .long 0 + .long (l_.str.11.SimpleClass-_$s11SimpleClassMXM)-8 + + .private_extern "_symbolic So8NSObjectC" + .section __TEXT,__swift5_typeref + .globl "_symbolic So8NSObjectC" + .weak_definition "_symbolic So8NSObjectC" + .p2align 1, 0x0 +"_symbolic So8NSObjectC": + .ascii "So8NSObjectC" + .byte 0 + + .private_extern _$s11SimpleClassAACMn + .section __TEXT,__constg_swiftt + .globl _$s11SimpleClassAACMn + .p2align 2, 0x0 +_$s11SimpleClassAACMn: + .long 2147483728 + .long (_$s11SimpleClassMXM-_$s11SimpleClassAACMn)-4 + .long (l_.str.11.SimpleClass-_$s11SimpleClassAACMn)-8 + .long (_$s11SimpleClassAACMa-_$s11SimpleClassAACMn)-12 + .long (_$s11SimpleClassAACMF-_$s11SimpleClassAACMn)-16 + .long ("_symbolic So8NSObjectC"-_$s11SimpleClassAACMn)-20 + .long 3 + .long 11 + .long 1 + .long 0 + .long 10 + .long 10 + .long 1 + .long 16 + .long (_$s11SimpleClassAAC04baseB14InstanceMethods5Int32VyF-_$s11SimpleClassAACMn)-56 + + .section __DATA,__objc_data + .p2align 3, 0x0 +_$s11SimpleClassAACMf: + .quad 0 + .quad _$s11SimpleClassAACfD + .quad _$sBOWV + .quad _OBJC_METACLASS_$__TtC11SimpleClass11SimpleClass + .quad _OBJC_CLASS_$_NSObject + .quad __objc_empty_cache + .quad 0 + .quad __DATA__TtC11SimpleClass11SimpleClass+2 + .long 0 + .long 0 + .long 8 + .short 7 + .short 0 + .long 112 + .long 24 + .quad _$s11SimpleClassAACMn + .quad 0 + .quad _$s11SimpleClassAAC04baseB14InstanceMethods5Int32VyF + + .private_extern "_symbolic _____ 11SimpleClassAAC" + .section __TEXT,__swift5_typeref + .globl "_symbolic _____ 11SimpleClassAAC" + .weak_definition "_symbolic _____ 11SimpleClassAAC" + .p2align 1, 0x0 +"_symbolic _____ 11SimpleClassAAC": + .byte 1 + .long (_$s11SimpleClassAACMn-"_symbolic _____ 11SimpleClassAAC")-1 + .byte 0 + + .section __TEXT,__swift5_fieldmd + .p2align 2, 0x0 +_$s11SimpleClassAACMF: + .long "_symbolic _____ 11SimpleClassAAC"-_$s11SimpleClassAACMF + .long ("_symbolic So8NSObjectC"-_$s11SimpleClassAACMF)-4 + .short 7 + .short 12 + .long 0 + + .section __TEXT,__objc_methname,cstring_literals +"L_selector_data(categoryInstanceMethod)": + .asciz "categoryInstanceMethod" + + .section __TEXT,__cstring,cstring_literals +"l_.str.7.q16@0:8": + .asciz "q16@0:8" + + .section __DATA,__objc_data + .p2align 3, 0x0 +__CATEGORY_INSTANCE_METHODS__TtC11SimpleClass11SimpleClass_$_SimpleClass: + .long 24 + .long 1 + .quad "L_selector_data(categoryInstanceMethod)" + .quad "l_.str.7.q16@0:8" + .quad _$s11SimpleClassAAC22categoryInstanceMethodSiyFTo + + .section __DATA,__objc_const + .p2align 3, 0x0 +__CATEGORY__TtC11SimpleClass11SimpleClass_$_SimpleClass: + .quad l_.str.11.SimpleClass + .quad _$s11SimpleClassAACMf+24 + .quad __CATEGORY_INSTANCE_METHODS__TtC11SimpleClass11SimpleClass_$_SimpleClass + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .long 60 + .space 4 + + .section __TEXT,__swift5_types + .p2align 2, 0x0 +l_$s11SimpleClassAACHn: + .long _$s11SimpleClassAACMn-l_$s11SimpleClassAACHn + + .private_extern ___swift_reflection_version + .section __TEXT,__const + .globl ___swift_reflection_version + .weak_definition ___swift_reflection_version + .p2align 1, 0x0 +___swift_reflection_version: + .short 3 + + .section __DATA,__objc_classlist,regular,no_dead_strip + .p2align 3, 0x0 +_objc_classes_$s11SimpleClassAACN: + .quad _$s11SimpleClassAACN + + .section __DATA,__objc_catlist,regular,no_dead_strip + .p2align 3, 0x0 +_objc_categories: + .quad __CATEGORY__TtC11SimpleClass11SimpleClass_$_SimpleClass + + .no_dead_strip _main + .no_dead_strip l_entry_point + .no_dead_strip _$s11SimpleClassAACMF + .no_dead_strip l_$s11SimpleClassAACHn + .no_dead_strip ___swift_reflection_version + .no_dead_strip _objc_classes_$s11SimpleClassAACN + .no_dead_strip _objc_categories + .section __DATA,__objc_imageinfo,regular,no_dead_strip +L_OBJC_IMAGE_INFO: + .long 0 + .long 100665152 + + .globl _$s11SimpleClassAAC04baseB14InstanceMethods5Int32VyFTq + .private_extern _$s11SimpleClassAAC04baseB14InstanceMethods5Int32VyFTq + .alt_entry _$s11SimpleClassAAC04baseB14InstanceMethods5Int32VyFTq +.set _$s11SimpleClassAAC04baseB14InstanceMethods5Int32VyFTq, _$s11SimpleClassAACMn+52 + .globl _$s11SimpleClassAACN + .private_extern _$s11SimpleClassAACN + .alt_entry _$s11SimpleClassAACN +.set _$s11SimpleClassAACN, _$s11SimpleClassAACMf+24 + .globl _OBJC_CLASS_$__TtC11SimpleClass11SimpleClass + .private_extern _OBJC_CLASS_$__TtC11SimpleClass11SimpleClass +.subsections_via_symbols + +_OBJC_CLASS_$_NSObject: +_OBJC_METACLASS_$_NSObject: +__objc_empty_cache: +_$sBOWV: + .quad 0 diff --git a/wild/tests/lld-macho/objc-methname.s b/wild/tests/lld-macho/objc-methname.s new file mode 100644 index 000000000..3d0647297 --- /dev/null +++ b/wild/tests/lld-macho/objc-methname.s @@ -0,0 +1,44 @@ +# REQUIRES: aarch64 +# RUN: rm -rf %t; split-file %s %t + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/strings.s -o %t/strings.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/main.s -o %t/main.o + +# RUN: %lld -arch arm64 -lSystem -o %t.out %t/strings.o %t/main.o --no-deduplicate-strings + +# RUN: llvm-otool -vs __TEXT __cstring %t.out | FileCheck %s --check-prefix=CSTRING +# RUN: llvm-otool -vs __TEXT __objc_methname %t.out | FileCheck %s --check-prefix=METHNAME + +# RUN: %lld -arch arm64 -lSystem -o %t/duplicates %t/strings.o %t/strings.o %t/main.o + +# RUN: llvm-otool -vs __TEXT __cstring %t/duplicates | FileCheck %s --check-prefix=CSTRING +# RUN: llvm-otool -vs __TEXT __objc_methname %t/duplicates | FileCheck %s --check-prefix=METHNAME + +# CSTRING: Contents of (__TEXT,__cstring) section +# CSTRING-NEXT: existing-cstring +# CSTRING-EMPTY: + +# METHNAME: Contents of (__TEXT,__objc_methname) section +# METHNAME-NEXT: existing_methname +# METHNAME-NEXT: synthetic_methname +# METHNAME-EMPTY: + +#--- strings.s +.cstring +.p2align 2 + .asciz "existing-cstring" + +.section __TEXT,__objc_methname,cstring_literals + .asciz "existing_methname" + +#--- main.s +.text +.globl _objc_msgSend +_objc_msgSend: + ret + +.globl _main +_main: + bl _objc_msgSend$existing_methname + bl _objc_msgSend$synthetic_methname + ret diff --git a/wild/tests/lld-macho/objc-relative-method-lists-simple.s b/wild/tests/lld-macho/objc-relative-method-lists-simple.s new file mode 100644 index 000000000..c8646f596 --- /dev/null +++ b/wild/tests/lld-macho/objc-relative-method-lists-simple.s @@ -0,0 +1,258 @@ +# REQUIRES: aarch64 +# UNSUPPORTED: target=arm{{.*}}-unknown-linux-gnueabihf +# RUN: rm -rf %t; split-file %s %t && cd %t + +## Compile a64_rel_dylib.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos10.15 -o a64_rel_dylib.o a64_simple_class.s + +## Test arm64 + relative method lists +# RUN: %no-lsystem-lld a64_rel_dylib.o -o a64_rel_dylib.dylib -map a64_rel_dylib.map -dylib -arch arm64 +# RUN: llvm-objdump --macho --objc-meta-data a64_rel_dylib.dylib | FileCheck %s --check-prefix=CHK_REL + +## Test arm64 + relative method lists + dead-strip +# RUN: %no-lsystem-lld a64_rel_dylib.o -o a64_rel_dylib.dylib -map a64_rel_dylib.map -dylib -arch arm64 -dead_strip +# RUN: llvm-objdump --macho --objc-meta-data a64_rel_dylib.dylib | FileCheck %s --check-prefix=CHK_REL + +## Test arm64 + traditional method lists (no relative offsets) +# RUN: %no-lsystem-lld a64_rel_dylib.o -o a64_rel_dylib.dylib -map a64_rel_dylib.map -dylib -arch arm64 -no_objc_relative_method_lists +# RUN: llvm-objdump --macho --objc-meta-data a64_rel_dylib.dylib | FileCheck %s --check-prefix=CHK_NO_REL + +## Test arm64 + relative method lists by explicitly adding `-objc_relative_method_lists`. +# RUN: %lld a64_rel_dylib.o -o a64_rel_dylib.dylib -map a64_rel_dylib.map -dylib -arch arm64 -platform_version macOS 10.15 10.15 -objc_relative_method_lists +# RUN: llvm-objdump --macho --objc-meta-data a64_rel_dylib.dylib | FileCheck %s --check-prefix=CHK_REL + +## Test arm64 + no relative method lists by default. +# RUN: %lld a64_rel_dylib.o -o a64_rel_dylib.dylib -map a64_rel_dylib.map -dylib -arch arm64 -platform_version macOS 10.15 10.15 +# RUN: llvm-objdump --macho --objc-meta-data a64_rel_dylib.dylib | FileCheck %s --check-prefix=CHK_NO_REL + + +CHK_REL: Contents of (__DATA_CONST,__objc_classlist) section +CHK_REL-NEXT: _OBJC_CLASS_$_MyClass +CHK_REL: baseMethods +CHK_REL-NEXT: entsize 12 (relative) +CHK_REL-NEXT: count 3 +CHK_REL-NEXT: name 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) instance_method_00 +CHK_REL-NEXT: types 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) v16@0:8 +CHK_REL-NEXT: imp 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) -[MyClass instance_method_00] +CHK_REL-NEXT: name 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) instance_method_01 +CHK_REL-NEXT: types 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) v16@0:8 +CHK_REL-NEXT: imp 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) -[MyClass instance_method_01] +CHK_REL-NEXT: name 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) instance_method_02 +CHK_REL-NEXT: types 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) v16@0:8 +CHK_REL-NEXT: imp 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) -[MyClass instance_method_02] + +CHK_REL: Meta Class +CHK_REL-NEXT: isa 0x{{[0-9a-f]*}} _OBJC_METACLASS_$_MyClass +CHK_REL: baseMethods 0x{{[0-9a-f]*}} (struct method_list_t *) +CHK_REL-NEXT: entsize 12 (relative) +CHK_REL-NEXT: count 3 +CHK_REL-NEXT: name 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) class_method_00 +CHK_REL-NEXT: types 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) v16@0:8 +CHK_REL-NEXT: imp 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) +[MyClass class_method_00] +CHK_REL-NEXT: name 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) class_method_01 +CHK_REL-NEXT: types 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) v16@0:8 +CHK_REL-NEXT: imp 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) +[MyClass class_method_01] +CHK_REL-NEXT: name 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) class_method_02 +CHK_REL-NEXT: types 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) v16@0:8 +CHK_REL-NEXT: imp 0x{{[0-9a-f]*}} (0x{{[0-9a-f]*}}) +[MyClass class_method_02] + + +CHK_NO_REL-NOT: (relative) + +CHK_NO_REL: Contents of (__DATA_CONST,__objc_classlist) section +CHK_NO_REL-NEXT: _OBJC_CLASS_$_MyClass + +CHK_NO_REL: baseMethods 0x{{[0-9a-f]*}} (struct method_list_t *) +CHK_NO_REL-NEXT: entsize 24 +CHK_NO_REL-NEXT: count 3 +CHK_NO_REL-NEXT: name 0x{{[0-9a-f]*}} instance_method_00 +CHK_NO_REL-NEXT: types 0x{{[0-9a-f]*}} v16@0:8 +CHK_NO_REL-NEXT: imp -[MyClass instance_method_00] +CHK_NO_REL-NEXT: name 0x{{[0-9a-f]*}} instance_method_01 +CHK_NO_REL-NEXT: types 0x{{[0-9a-f]*}} v16@0:8 +CHK_NO_REL-NEXT: imp -[MyClass instance_method_01] +CHK_NO_REL-NEXT: name 0x{{[0-9a-f]*}} instance_method_02 +CHK_NO_REL-NEXT: types 0x{{[0-9a-f]*}} v16@0:8 +CHK_NO_REL-NEXT: imp -[MyClass instance_method_02] + + +CHK_NO_REL: Meta Class +CHK_NO_REL-NEXT: _OBJC_METACLASS_$_MyClass + +CHK_NO_REL: baseMethods 0x{{[0-9a-f]*}} (struct method_list_t *) +CHK_NO_REL-NEXT: entsize 24 +CHK_NO_REL-NEXT: count 3 +CHK_NO_REL-NEXT: name 0x{{[0-9a-f]*}} class_method_00 +CHK_NO_REL-NEXT: types 0x{{[0-9a-f]*}} v16@0:8 +CHK_NO_REL-NEXT: imp +[MyClass class_method_00] +CHK_NO_REL-NEXT: name 0x{{[0-9a-f]*}} class_method_01 +CHK_NO_REL-NEXT: types 0x{{[0-9a-f]*}} v16@0:8 +CHK_NO_REL-NEXT: imp +[MyClass class_method_01] +CHK_NO_REL-NEXT: name 0x{{[0-9a-f]*}} class_method_02 +CHK_NO_REL-NEXT: types 0x{{[0-9a-f]*}} v16@0:8 +CHK_NO_REL-NEXT: imp +[MyClass class_method_02] + + +######################## Generate a64_simple_class.s ######################### +# clang -c simple_class.mm -s -o a64_simple_class.s -target arm64-apple-macos -arch arm64 -Oz + +######################## simple_class.mm ######################## +# __attribute__((objc_root_class)) +# @interface MyClass +# - (void)instance_method_00; +# - (void)instance_method_01; +# - (void)instance_method_02; +# + (void)class_method_00; +# + (void)class_method_01; +# + (void)class_method_02; +# @end +# +# @implementation MyClass +# - (void)instance_method_00 {} +# - (void)instance_method_01 {} +# - (void)instance_method_02 {} +# + (void)class_method_00 {} +# + (void)class_method_01 {} +# + (void)class_method_02 {} +# @end +# +# void *_objc_empty_cache; +# void *_objc_empty_vtable; +# + +#--- objc-macros.s +.macro .objc_selector_def name + .p2align 2 +"\name": + .cfi_startproc + ret + .cfi_endproc +.endm + +#--- a64_simple_class.s +.include "objc-macros.s" + +.section __TEXT,__text,regular,pure_instructions +.build_version macos, 10, 15 + +.objc_selector_def "-[MyClass instance_method_00]" +.objc_selector_def "-[MyClass instance_method_01]" +.objc_selector_def "-[MyClass instance_method_02]" + +.objc_selector_def "+[MyClass class_method_00]" +.objc_selector_def "+[MyClass class_method_01]" +.objc_selector_def "+[MyClass class_method_02]" + +.globl __objc_empty_vtable +.zerofill __DATA,__common,__objc_empty_vtable,8,3 +.section __DATA,__objc_data +.globl _OBJC_CLASS_$_MyClass +.p2align 3, 0x0 + +_OBJC_CLASS_$_MyClass: + .quad _OBJC_METACLASS_$_MyClass + .quad 0 + .quad __objc_empty_cache + .quad __objc_empty_vtable + .quad __OBJC_CLASS_RO_$_MyClass + .globl _OBJC_METACLASS_$_MyClass + .p2align 3, 0x0 + +_OBJC_METACLASS_$_MyClass: + .quad _OBJC_METACLASS_$_MyClass + .quad _OBJC_CLASS_$_MyClass + .quad __objc_empty_cache + .quad __objc_empty_vtable + .quad __OBJC_METACLASS_RO_$_MyClass + + .section __TEXT,__objc_classname,cstring_literals +l_OBJC_CLASS_NAME_: + .asciz "MyClass" + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_: + .asciz "class_method_00" + .section __TEXT,__objc_methtype,cstring_literals +l_OBJC_METH_VAR_TYPE_: + .asciz "v16@0:8" + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_.1: + .asciz "class_method_01" +l_OBJC_METH_VAR_NAME_.2: + .asciz "class_method_02" + .section __DATA,__objc_const + .p2align 3, 0x0 +__OBJC_$_CLASS_METHODS_MyClass: + .long 24 + .long 3 + .quad l_OBJC_METH_VAR_NAME_ + .quad l_OBJC_METH_VAR_TYPE_ + .quad "+[MyClass class_method_00]" + .quad l_OBJC_METH_VAR_NAME_.1 + .quad l_OBJC_METH_VAR_TYPE_ + .quad "+[MyClass class_method_01]" + .quad l_OBJC_METH_VAR_NAME_.2 + .quad l_OBJC_METH_VAR_TYPE_ + .quad "+[MyClass class_method_02]" + .p2align 3, 0x0 + +__OBJC_METACLASS_RO_$_MyClass: + .long 3 + .long 40 + .long 40 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad __OBJC_$_CLASS_METHODS_MyClass + .quad 0 + .quad 0 + .quad 0 + .quad 0 + + .section __TEXT,__objc_methname,cstring_literals +l_OBJC_METH_VAR_NAME_.3: + .asciz "instance_method_00" +l_OBJC_METH_VAR_NAME_.4: + .asciz "instance_method_01" +l_OBJC_METH_VAR_NAME_.5: + .asciz "instance_method_02" + + .section __DATA,__objc_const + .p2align 3, 0x0 +__OBJC_$_INSTANCE_METHODS_MyClass: + .long 24 + .long 3 + .quad l_OBJC_METH_VAR_NAME_.3 + .quad l_OBJC_METH_VAR_TYPE_ + .quad "-[MyClass instance_method_00]" + .quad l_OBJC_METH_VAR_NAME_.4 + .quad l_OBJC_METH_VAR_TYPE_ + .quad "-[MyClass instance_method_01]" + .quad l_OBJC_METH_VAR_NAME_.5 + .quad l_OBJC_METH_VAR_TYPE_ + .quad "-[MyClass instance_method_02]" + .p2align 3, 0x0 + +__OBJC_CLASS_RO_$_MyClass: + .long 2 + .long 0 + .long 0 + .space 4 + .quad 0 + .quad l_OBJC_CLASS_NAME_ + .quad __OBJC_$_INSTANCE_METHODS_MyClass + .quad 0 + .quad 0 + .quad 0 + .quad 0 + .globl __objc_empty_cache + +.zerofill __DATA,__common,__objc_empty_cache,8,3 + .section __DATA,__objc_classlist,regular,no_dead_strip + .p2align 3, 0x0 +l_OBJC_LABEL_CLASS_$: + .quad _OBJC_CLASS_$_MyClass + .section __DATA,__objc_imageinfo,regular,no_dead_strip +L_OBJC_IMAGE_INFO: + .long 0 + .long 64 +.subsections_via_symbols diff --git a/wild/tests/lld-macho/objc-selrefs.s b/wild/tests/lld-macho/objc-selrefs.s new file mode 100644 index 000000000..eebe7c647 --- /dev/null +++ b/wild/tests/lld-macho/objc-selrefs.s @@ -0,0 +1,81 @@ +# REQUIRES: aarch64 +# RUN: rm -rf %t; split-file %s %t + +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/explicit-selrefs-1.s -o %t/explicit-selrefs-1.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/explicit-selrefs-2.s -o %t/explicit-selrefs-2.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/implicit-selrefs.s -o %t/implicit-selrefs.o + +# RUN: %lld -dylib -arch arm64 -lSystem -o %t/explicit-only-no-icf \ +# RUN: %t/explicit-selrefs-1.o %t/explicit-selrefs-2.o -no_fixup_chains +# RUN: llvm-otool -vs __DATA __objc_selrefs %t/explicit-only-no-icf | \ +# RUN: FileCheck %s --check-prefix=EXPLICIT-NO-ICF + +## NOTE: ld64 always dedups the selrefs unconditionally, but we only do it when +## ICF is enabled. +# RUN: %lld -dylib -arch arm64 -lSystem -o %t/explicit-only-with-icf \ +# RUN: %t/explicit-selrefs-1.o %t/explicit-selrefs-2.o -no_fixup_chains +# RUN: llvm-otool -vs __DATA __objc_selrefs %t/explicit-only-with-icf \ +# RUN: | FileCheck %s --check-prefix=EXPLICIT-WITH-ICF + +# SELREFS: Contents of (__DATA,__objc_selrefs) section +# SELREFS-NEXT: __TEXT:__objc_methname:foo +# SELREFS-NEXT: __TEXT:__objc_methname:bar +# SELREFS-NEXT: __TEXT:__objc_methname:foo +# SELREFS-NEXT: __TEXT:__objc_methname:length +# SELREFS-EMPTY: + +# RUN: %lld -dylib -arch arm64 -lSystem --icf=all -o %t/explicit-and-implicit \ +# RUN: %t/explicit-selrefs-1.o %t/explicit-selrefs-2.o %t/implicit-selrefs.o \ +# RUN: -no_fixup_chains +# RUN: llvm-otool -vs __DATA __objc_selrefs %t/explicit-and-implicit \ +# RUN: | FileCheck %s --check-prefix=EXPLICIT-AND-IMPLICIT + +# EXPLICIT-NO-ICF: Contents of (__DATA,__objc_selrefs) section +# EXPLICIT-NO-ICF-NEXT: __TEXT:__objc_methname:foo +# EXPLICIT-NO-ICF-NEXT: __TEXT:__objc_methname:bar +# EXPLICIT-NO-ICF-NEXT: __TEXT:__objc_methname:bar +# EXPLICIT-NO-ICF-NEXT: __TEXT:__objc_methname:foo + +# EXPLICIT-WITH-ICF: Contents of (__DATA,__objc_selrefs) section +# EXPLICIT-WITH-ICF-NEXT: __TEXT:__objc_methname:foo +# EXPLICIT-WITH-ICF-NEXT: __TEXT:__objc_methname:bar + +# EXPLICIT-AND-IMPLICIT: Contents of (__DATA,__objc_selrefs) section +# EXPLICIT-AND-IMPLICIT-NEXT: __TEXT:__objc_methname:foo +# EXPLICIT-AND-IMPLICIT-NEXT: __TEXT:__objc_methname:bar +# EXPLICIT-AND-IMPLICIT-NEXT: __TEXT:__objc_methname:length + +#--- explicit-selrefs-1.s +.section __TEXT,__objc_methname,cstring_literals +lselref1: + .asciz "foo" +lselref2: + .asciz "bar" + +.section __DATA,__objc_selrefs,literal_pointers,no_dead_strip +.p2align 3 + .quad lselref1 + .quad lselref2 + .quad lselref2 + +#--- explicit-selrefs-2.s +.section __TEXT,__objc_methname,cstring_literals +lselref1: + .asciz "foo" + +.section __DATA,__objc_selrefs,literal_pointers,no_dead_strip +.p2align 3 + .quad lselref1 + +#--- implicit-selrefs.s +.text +.globl _objc_msgSend +.p2align 2 +_objc_msgSend: + ret + +.p2align 2 +_sender: + bl _objc_msgSend$length + bl _objc_msgSend$foo + ret diff --git a/wild/tests/lld-macho/order-file-cstring-tailmerge.s b/wild/tests/lld-macho/order-file-cstring-tailmerge.s new file mode 100644 index 000000000..20a4d162c --- /dev/null +++ b/wild/tests/lld-macho/order-file-cstring-tailmerge.s @@ -0,0 +1,56 @@ +; REQUIRES: aarch64 +; RUN: rm -rf %t && split-file %s %t + +; RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o +; RUN: %lld -dylib -arch arm64 --no-tail-merge-strings -order_file %t/orderfile.txt %t/a.o -o - | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s +; RUN: %lld -dylib -arch arm64 --tail-merge-strings -order_file %t/orderfile.txt %t/a.o -o - | llvm-nm --numeric-sort --format=just-symbols - | FileCheck %s --check-prefix=MERGED + +; CHECK: _str2 +; CHECK: _str1 +; CHECK: _superstr2 +; CHECK: _superstr3 +; CHECK: _superstr1 +; CHECK: _str3 + +; str1 has a higher priority than superstr1, so str1 must be ordered before +; str3, even though superstr1 is before superstr3 in the orderfile. + +; MERGED: _superstr2 +; MERGED: _str2 +; MERGED: _superstr1 +; MERGED: _str1 +; MERGED: _superstr3 +; MERGED: _str3 + +;--- a.s +.cstring + _superstr1: +.asciz "superstr1" + _str1: +.asciz "str1" + _superstr2: +.asciz "superstr2" + _str2: +.asciz "str2" + _superstr3: +.asciz "superstr3" + _str3: +.asciz "str3" + +; TODO: We could use update_test_body.py to generate the hashes for the +; orderfile. Unfortunately, it seems that LLVM has a different hash +; implementation than the xxh64sum tool. See +; DeduplicatedCStringSection::getStringOffset() for hash details. +; +; while IFS="" read -r line; do +; echo -n $line | xxh64sum | awk '{printf "CSTR;%010d", and(strtonum("0x"$1), 0x7FFFFFFF)}' +; echo " # $line" +; done < orderfile.txt.template + +;--- orderfile.txt +CSTR;1236462241 # str2 +CSTR;1526669509 # str1 +CSTR;1563550684 # superstr2 +CSTR;1044337806 # superstr3 +CSTR;262417687 # superstr1 +CSTR;717161398 # str3 diff --git a/wild/tests/lld-macho/order-file-cstring.s b/wild/tests/lld-macho/order-file-cstring.s new file mode 100644 index 000000000..ca3c32bb1 --- /dev/null +++ b/wild/tests/lld-macho/order-file-cstring.s @@ -0,0 +1,230 @@ +# REQUIRES: aarch64 + +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/test.s -o %t/test.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/more-cstrings.s -o %t/more-cstrings.o + +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/test-0 %t/test.o %t/more-cstrings.o +# RUN: llvm-nm --numeric-sort --format=just-symbols %t/test-0 | FileCheck %s --check-prefix=ORIGIN_SYM +# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/test-0 | FileCheck %s --check-prefix=ORIGIN_SEC + +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/test-1 %t/test.o %t/more-cstrings.o -order_file %t/ord-1 +# RUN: llvm-nm --numeric-sort --format=just-symbols %t/test-1 | FileCheck %s --check-prefix=ONE_SYM +# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/test-1 | FileCheck %s --check-prefix=ONE_SEC + +# RUN: %lld --no-deduplicate-strings -arch arm64 -lSystem -e _main -o %t/test-1-dup %t/test.o %t/more-cstrings.o -order_file %t/ord-1 +# RUN: llvm-nm --numeric-sort --format=just-symbols %t/test-1-dup | FileCheck %s --check-prefix=ONE_SYM +# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/test-1-dup | FileCheck %s --check-prefix=ONE_SEC + +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/test-2 %t/test.o %t/more-cstrings.o -order_file %t/ord-2 +# RUN: llvm-nm --numeric-sort --format=just-symbols %t/test-2 | FileCheck %s --check-prefix=TWO_SYM +# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/test-2 | FileCheck %s --check-prefix=TWO_SEC + +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/test-3 %t/test.o %t/more-cstrings.o -order_file %t/ord-3 +# RUN: llvm-nm --numeric-sort --format=just-symbols %t/test-3 | FileCheck %s --check-prefix=THREE_SYM +# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/test-3 | FileCheck %s --check-prefix=THREE_SEC + +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/test-4 %t/test.o %t/more-cstrings.o -order_file %t/ord-4 +# RUN: llvm-nm --numeric-sort --format=just-symbols %t/test-4 | FileCheck %s --check-prefix=FOUR_SYM +# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/test-4 | FileCheck %s --check-prefix=FOUR_SEC +# RUN: llvm-readobj --string-dump=__cstring %t/test-4 | FileCheck %s --check-prefix=FOUR_SEC_ESCAPE + +# We expect: +# 1) Covered cstring symbols to be reordered +# 2) the rest of the cstring symbols remain in the original relative order within the cstring section + +# ORIGIN_SYM: _local_foo1 +# ORIGIN_SYM: _globl_foo2 +# ORIGIN_SYM: _local_foo2 +# ORIGIN_SYM: _bar +# ORIGIN_SYM: _baz +# ORIGIN_SYM: _baz_dup +# ORIGIN_SYM: _bar2 +# ORIGIN_SYM: _globl_foo3 + +# ORIGIN_SEC: foo1 +# ORIGIN_SEC: foo2 +# ORIGIN_SEC: bar +# ORIGIN_SEC: baz +# ORIGIN_SEC: bar2 +# ORIGIN_SEC: foo3 + +# original order, but only parital covered +#--- ord-1 +#foo2 +CSTR;1433942677 +#bar +CSTR;0x2032D362 +#bar2 +CSTR;1496286555 +#foo3 +CSTR;0x501BCC31 + +# ONE_SYM-DAG: _globl_foo2 +# ONE_SYM-DAG: _local_foo2 +# ONE_SYM: _bar +# ONE_SYM: _bar2 +# ONE_SYM: _globl_foo3 +# ONE_SYM: _local_foo1 +# ONE_SYM: _baz +# ONE_SYM: _baz_dup + +# ONE_SEC: foo2 +# ONE_SEC: bar +# ONE_SEC: bar2 +# ONE_SEC: foo3 +# ONE_SEC: foo1 +# ONE_SEC: baz + + +# TWO_SYM: _globl_foo2 +# TWO_SYM: _local_foo2 +# TWO_SYM: _local_foo1 +# TWO_SYM: _baz +# TWO_SYM: _baz_dup +# TWO_SYM: _bar +# TWO_SYM: _bar2 +# TWO_SYM: _globl_foo3 + +# TWO_SEC: foo2 +# TWO_SEC: foo1 +# TWO_SEC: baz +# TWO_SEC: bar +# TWO_SEC: bar2 +# TWO_SEC: foo3 + + +# THREE_SYM: _local_foo1 +# THREE_SYM: _baz +# THREE_SYM: _baz_dup +# THREE_SYM: _bar +# THREE_SYM: _bar2 +# THREE_SYM: _globl_foo2 +# THREE_SYM: _local_foo2 +# THREE_SYM: _globl_foo3 + +# THREE_SEC: foo1 +# THREE_SEC: baz +# THREE_SEC: bar +# THREE_SEC: bar2 +# THREE_SEC: foo2 +# THREE_SEC: foo3 + + +# FOUR_SYM: _local_escape_white_space +# FOUR_SYM: _globl_foo2 +# FOUR_SYM: _local_foo2 +# FOUR_SYM: _local_escape +# FOUR_SYM: _globl_foo3 +# FOUR_SYM: _bar +# FOUR_SYM: _local_foo1 +# FOUR_SYM: _baz +# FOUR_SYM: _baz_dup +# FOUR_SYM: _bar2 + +# FOUR_SEC: \t\n +# FOUR_SEC: foo2 +# FOUR_SEC: @\"NSDictionary\" +# FOUR_SEC: foo3 +# FOUR_SEC: bar +# FOUR_SEC: foo1 +# FOUR_SEC: baz +# FOUR_SEC: bar2 + +# FOUR_SEC_ESCAPE: .. +# FOUR_SEC_ESCAPE: foo2 +# FOUR_SEC_ESCAPE: @"NSDictionary" +# FOUR_SEC_ESCAPE: foo3 +# FOUR_SEC_ESCAPE: bar +# FOUR_SEC_ESCAPE: foo1 +# FOUR_SEC_ESCAPE: baz +# FOUR_SEC_ESCAPE: bar2 + + +# change order, parital covered +#--- ord-2 +#foo2 +CSTR;1433942677 +#foo1 +CSTR;1663475769 +#baz +CSTR;862947621 +#bar +CSTR;540201826 +#bar2 +CSTR;1496286555 + +# change order, parital covered, with mismatches, duplicates +#--- ord-3 +foo2222 +CSTR;0x11111111 +#bar (mismatched cpu and file name) +fakeCPU:fake-file-name.o:CSTR;540201826 +#not a hash +CSTR;xxx +#foo1 +CSTR;1663475769 +#baz +CSTR;862947621 +#bar +CSTR;540201826 +#bar2 +CSTR;1496286555 +#baz +CSTR;862947621 + +# test escape strings +#--- ord-4 +#\t\n +CSTR;1035903177 +#foo2 +CSTR;0x55783A95 +#@\"NSDictionary\" +CSTR;1202669430 +#foo3 +CSTR;1343999025 +#bar +CSTR;0x2032D362 + + +#--- test.s +.text +.globl _main + +_main: + ret + +.cstring +.p2align 2 +_local_foo1: + .asciz "foo1" +_local_foo2: + .asciz "foo2" +L_.foo1_dup: + .asciz "foo1" +L_.foo2_dup: + .asciz "foo2" +_local_escape: + .asciz "@\"NSDictionary\"" +_local_escape_white_space: + .asciz "\t\n" + +_bar: + .asciz "bar" +_baz: + .asciz "baz" +_bar2: + .asciz "bar2" +_baz_dup: + .asciz "baz" + +.subsections_via_symbols + +#--- more-cstrings.s +.globl _globl_foo1, _globl_foo3 +.cstring +.p2align 4 +_globl_foo3: + .asciz "foo3" +_globl_foo2: + .asciz "foo2" diff --git a/wild/tests/lld-macho/order-file-strip-hashes.s b/wild/tests/lld-macho/order-file-strip-hashes.s new file mode 100644 index 000000000..f843e607a --- /dev/null +++ b/wild/tests/lld-macho/order-file-strip-hashes.s @@ -0,0 +1,96 @@ +# REQUIRES: aarch64 + +# RUN: rm -rf %t && split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/a.s -o %t/a.o + +# RUN: %lld -arch arm64 -lSystem -e _main -o %t/a.out %t/a.o -order_file %t/ord-1 +# RUN: llvm-nm --numeric-sort --format=just-symbols %t/a.out | FileCheck %s + +#--- a.s +.text +.globl _main, A, _B, C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222 + +_main: + ret +A: + ret +F: + add w0, w0, #3 + bl C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222 + ret +C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222: + add w0, w0, #2 + bl A + ret +D: + add w0, w0, #2 + bl B + ret +B: + add w0, w0, #1 + bl A + ret +E: + add w0, w0, #2 + bl C.__uniq.111111111111111111111111111111111111111.llvm.2222222222222222222 + ret + +.section __DATA,__objc_const +# test multiple symbols at the same address, which will be alphabetic sorted based symbol names +_OBJC_$_CATEGORY_CLASS_METHODS_Foo_$_Cat2: + .quad 789 + +_OBJC_$_CATEGORY_SOME_$_FOLDED: +_OBJC_$_CATEGORY_Foo_$_Cat1: +_ALPHABETIC_SORT_FIRST: + .quad 123 + +_OBJC_$_CATEGORY_Foo_$_Cat2: + .quad 222 + +_OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1: + .quad 456 + +.section __DATA,__objc_data +_OBJC_CLASS_$_Foo: + .quad 123 + +_OBJC_CLASS_$_Bar.llvm.1234: + .quad 456 + +_OBJC_CLASS_$_Baz: + .quad 789 + +_OBJC_CLASS_$_Baz2: + .quad 999 + +.section __DATA,__objc_classrefs +.quad _OBJC_CLASS_$_Foo +.quad _OBJC_CLASS_$_Bar.llvm.1234 +.quad _OBJC_CLASS_$_Baz + +.subsections_via_symbols + + +#--- ord-1 +# change order, parital covered +A +B +C.__uniq.555555555555555555555555555555555555555.llvm.6666666666666666666 +_OBJC_CLASS_$_Baz +_OBJC_CLASS_$_Bar.__uniq.12345 +_OBJC_CLASS_$_Foo.__uniq.123.llvm.123456789 +_OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1 +_OBJC_$_CATEGORY_Foo_$_Cat1.llvm.1234567 + +# .text +# CHECK: A +# CHECK: B +# CHECK: C +# .section __DATA,__objc_const +# CHECK: _OBJC_$_CATEGORY_INSTANCE_METHODS_Foo_$_Cat1 +# CHECK: _OBJC_$_CATEGORY_Foo_$_Cat1 +# .section __DATA,__objc_data +# CHECK: _OBJC_CLASS_$_Baz +# CHECK: _OBJC_CLASS_$_Bar +# CHECK: _OBJC_CLASS_$_Foo diff --git a/wild/tests/lld-macho/order-file.s b/wild/tests/lld-macho/order-file.s new file mode 100644 index 000000000..e0ca735ab --- /dev/null +++ b/wild/tests/lld-macho/order-file.s @@ -0,0 +1,188 @@ +# REQUIRES: x86 +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/foo.s -o %t/foo.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/abs.s -o %t/abs.o +# RUN: llvm-ar rcs %t/foo.a %t/foo.o + +# FOO-FIRST: <_bar>: +# FOO-FIRST: <_main>: + +# FOO-SECOND: <_main>: +# FOO-SECOND: <_bar>: + +# RUN: %lld -lSystem -o %t/test-1 %t/test.o %t/foo.o -order_file %t/ord-1 +# RUN: llvm-objdump -d %t/test-1 | FileCheck %s --check-prefix=FOO-FIRST +## Output should be the same regardless of the command-line order of object files +# RUN: %lld -lSystem -o %t/test-1 %t/foo.o %t/test.o -order_file %t/ord-1 +# RUN: llvm-objdump -d %t/test-1 | FileCheck %s --check-prefix=FOO-FIRST + +# RUN: %lld -lSystem -o %t/test-2 %t/test.o %t/foo.o -order_file %t/ord-2 +# RUN: llvm-objdump -d %t/test-2 | FileCheck %s --check-prefix=FOO-SECOND +# RUN: %lld -lSystem -o %t/test-2 %t/foo.o %t/test.o -order_file %t/ord-2 +# RUN: llvm-objdump -d %t/test-2 | FileCheck %s --check-prefix=FOO-SECOND + +# RUN: %lld -lSystem -o %t/test-file-match %t/test.o %t/foo.o -order_file %t/ord-file-match +# RUN: llvm-objdump -d %t/test-file-match | FileCheck %s --check-prefix=FOO-FIRST +## Output should be the same regardless of the command-line order of object files +# RUN: %lld -lSystem -o %t/test-file-match %t/foo.o %t/test.o -order_file %t/ord-file-match +# RUN: llvm-objdump -d %t/test-file-match | FileCheck %s --check-prefix=FOO-FIRST + +# RUN: %lld -lSystem -o %t/test-file-nomatch %t/test.o %t/foo.o -order_file %t/ord-file-nomatch +# RUN: llvm-objdump -d %t/test-file-nomatch | FileCheck %s --check-prefix=FOO-SECOND +# RUN: %lld -lSystem -o %t/test-file-nomatch %t/foo.o %t/test.o -order_file %t/ord-file-nomatch +# RUN: llvm-objdump -d %t/test-file-nomatch | FileCheck %s --check-prefix=FOO-SECOND + +# RUN: %lld -lSystem -o %t/test-arch-match %t/test.o %t/foo.o -order_file %t/ord-arch-match +# RUN: llvm-objdump -d %t/test-arch-match | FileCheck %s --check-prefix=FOO-FIRST +# RUN: %lld -lSystem -o %t/test-arch-match %t/foo.o %t/test.o -order_file %t/ord-arch-match +# RUN: llvm-objdump -d %t/test-arch-match | FileCheck %s --check-prefix=FOO-FIRST + +# RUN: %lld -lSystem -o %t/test-arch-nomatch %t/test.o %t/foo.o -order_file %t/ord-arch-nomatch +# RUN: llvm-objdump -d %t/test-arch-nomatch | FileCheck %s --check-prefix=FOO-SECOND +# RUN: %lld -lSystem -o %t/test-arch-nomatch %t/foo.o %t/test.o -order_file %t/ord-arch-nomatch +# RUN: llvm-objdump -d %t/test-arch-nomatch | FileCheck %s --check-prefix=FOO-SECOND + +# RUN: %lld -lSystem -o %t/test-arch-match %t/test.o %t/foo.o -order_file %t/ord-arch-match +# RUN: llvm-objdump -d %t/test-arch-match | FileCheck %s --check-prefix=FOO-FIRST +# RUN: %lld -lSystem -o %t/test-arch-match %t/foo.o %t/test.o -order_file %t/ord-arch-match +# RUN: llvm-objdump -d %t/test-arch-match | FileCheck %s --check-prefix=FOO-FIRST + +## Test archives + +# RUN: %lld -lSystem -o %t/test-archive-1 %t/test.o %t/foo.a -order_file %t/ord-1 +# RUN: llvm-objdump -d %t/test-archive-1 | FileCheck %s --check-prefix=FOO-FIRST +# RUN: %lld -lSystem -o %t/test-archive-1 %t/foo.a %t/test.o -order_file %t/ord-1 +# RUN: llvm-objdump -d %t/test-archive-1 | FileCheck %s --check-prefix=FOO-FIRST + +# RUN: %lld -lSystem -o %t/test-archive-file-no-match %t/test.o %t/foo.a -order_file %t/ord-file-match +# RUN: llvm-objdump -d %t/test-archive-file-no-match | FileCheck %s --check-prefix=FOO-SECOND +# RUN: %lld -lSystem -o %t/test-archive %t/foo.a %t/test.o -order_file %t/ord-file-match +# RUN: llvm-objdump -d %t/test-archive-file-no-match | FileCheck %s --check-prefix=FOO-SECOND + +# RUN: %lld -lSystem -o %t/test-archive-1 %t/test.o %t/foo.a -order_file %t/ord-archive-match +# RUN: llvm-objdump -d %t/test-archive-1 | FileCheck %s --check-prefix=FOO-FIRST +# RUN: %lld -lSystem -o %t/test-archive-1 %t/foo.a %t/test.o -order_file %t/ord-archive-match +# RUN: llvm-objdump -d %t/test-archive-1 | FileCheck %s --check-prefix=FOO-FIRST + +# RUN: %lld -lSystem -o %t/test-archive-file-no-match %t/test.o %t/foo.a -order_file %t/ord-file-nomatch +# RUN: llvm-objdump -d %t/test-archive-file-no-match | FileCheck %s --check-prefix=FOO-SECOND +# RUN: %lld -lSystem -o %t/test-archive %t/foo.a %t/test.o -order_file %t/ord-file-nomatch +# RUN: llvm-objdump -d %t/test-archive-file-no-match | FileCheck %s --check-prefix=FOO-SECOND + +## The following tests check that if an address is matched by multiple order +## file entries, it should always use the lowest-ordered match. + +# RUN: %lld -lSystem -o %t/test-1 %t/test.o %t/foo.o -order_file %t/ord-multiple-1 +# RUN: llvm-objdump -d %t/test-1 | FileCheck %s --check-prefix=FOO-FIRST +# RUN: %lld -lSystem -o %t/test-1 %t/foo.o %t/test.o -order_file %t/ord-multiple-1 +# RUN: llvm-objdump -d %t/test-1 | FileCheck %s --check-prefix=FOO-FIRST + +# RUN: %lld -lSystem -o %t/test-2 %t/test.o %t/foo.o -order_file %t/ord-multiple-2 +# RUN: llvm-objdump -d %t/test-2 | FileCheck %s --check-prefix=FOO-FIRST +# RUN: %lld -lSystem -o %t/test-2 %t/foo.o %t/test.o -order_file %t/ord-multiple-2 +# RUN: llvm-objdump -d %t/test-2 | FileCheck %s --check-prefix=FOO-FIRST + +# RUN: %lld -lSystem -o %t/test-3 %t/test.o %t/foo.o -order_file %t/ord-multiple-3 +# RUN: llvm-objdump -d %t/test-3 | FileCheck %s --check-prefix=FOO-FIRST +# RUN: %lld -lSystem -o %t/test-3 %t/foo.o %t/test.o -order_file %t/ord-multiple-3 +# RUN: llvm-objdump -d %t/test-3 | FileCheck %s --check-prefix=FOO-FIRST + +# RUN: %lld -lSystem -o %t/test-4 %t/test.o %t/foo.o -order_file %t/ord-multiple-4 +# RUN: llvm-objdump -d %t/test-4 | FileCheck %s --check-prefix=FOO-FIRST +# RUN: %lld -lSystem -o %t/test-4 %t/foo.o %t/test.o -order_file %t/ord-multiple-4 +# RUN: llvm-objdump -d %t/test-4 | FileCheck %s --check-prefix=FOO-FIRST + +## -[Foo doFoo:andBar:] and _bar both point to the same location. When both +## symbols appear in an order file, the location in question should be ordered +## according to the lowest-ordered symbol that references it. + +# RUN: %lld -lSystem -o %t/test-alias %t/test.o %t/foo.o -order_file %t/ord-alias +# RUN: llvm-objdump -d %t/test-alias | FileCheck %s --check-prefix=FOO-FIRST +# RUN: %lld -lSystem -o %t/test-alias %t/foo.o %t/test.o -order_file %t/ord-alias +# RUN: llvm-objdump -d %t/test-alias | FileCheck %s --check-prefix=FOO-FIRST + +## Absolute in symbols in order files make no sense. Just ignore them. +# RUN: %lld -lSystem -dylib -o %t/test-abs %t/abs.o -order_file %t/ord-abs + +#--- ord-1 +-[Foo doFoo:andBar:] # just a comment +_main # another comment + +#--- ord-2 +_main # just a comment +-[Foo doFoo:andBar:] # another comment + +#--- ord-file-match +foo.o:-[Foo doFoo:andBar:] +_main + +#--- ord-archive-match +foo.a(foo.o):-[Foo doFoo:andBar:] +_main + +#--- ord-file-nomatch +bar.o:-[Foo doFoo:andBar:] +_main +-[Foo doFoo:andBar:] + +#--- ord-arch-match +x86_64:-[Foo doFoo:andBar:] +_main + +#--- ord-arch-nomatch +arm64:-[Foo doFoo:andBar:] +_main +-[Foo doFoo:andBar:] + +#--- ord-arch-file-match +x86_64:bar.o:-[Foo doFoo:andBar:] +_main + +#--- ord-multiple-1 +-[Foo doFoo:andBar:] +_main +foo.o:-[Foo doFoo:andBar:] + +#--- ord-multiple-2 +foo.o:-[Foo doFoo:andBar:] +_main +-[Foo doFoo:andBar:] + +#--- ord-multiple-3 +-[Foo doFoo:andBar:] +_main +-[Foo doFoo:andBar:] + +#--- ord-multiple-4 +foo.o:-[Foo doFoo:andBar:] +_main +foo.o:-[Foo doFoo:andBar:] + +#--- ord-alias +_bar +_main +-[Foo doFoo:andBar:] + +#--- ord-abs +_abs + +#--- foo.s +.globl "-[Foo doFoo:andBar:]" +"-[Foo doFoo:andBar:]": +_bar: + ret + +#--- test.s +.globl _main + +_main: + callq "-[Foo doFoo:andBar:]" + ret + +.section __DWARF,__debug_aranges,regular,debug +ltmp1: + .byte 0 + +#--- abs.s +_abs = 42 diff --git a/wild/tests/lld-macho/pagezero.s b/wild/tests/lld-macho/pagezero.s new file mode 100644 index 000000000..684249f65 --- /dev/null +++ b/wild/tests/lld-macho/pagezero.s @@ -0,0 +1,37 @@ +# REQUIRES: x86, aarch64 +# RUN: rm -rf %t; mkdir %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/x86_64.o +# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-darwin %s -o %t/arm64_32.o + +# RUN: %lld -lSystem -arch x86_64 -o %t/x86_64 %t/x86_64.o -pagezero_size 100000 +# RUN: llvm-readobj --macho-segment %t/x86_64 | FileCheck %s -D#VMSIZE=0x100000 -D#SIZE=72 + +# RUN: %lld-watchos -lSystem -arch arm64_32 -o %t/arm64_32 %t/arm64_32.o -pagezero_size 100000 +# RUN: llvm-readobj --macho-segment %t/arm64_32 | FileCheck %s -D#VMSIZE=0x100000 -D#SIZE=56 + +# RUN: %lld -lSystem -arch x86_64 -o %t/zero %t/x86_64.o -pagezero_size 0 +# RUN: llvm-readobj --macho-segment %t/zero | FileCheck %s --check-prefix=CHECK-ZERO -D#VMSIZE=0x1000 -D#SIZE=152 + +# RUN: %no-fatal-warnings-lld -lSystem -arch x86_64 -o %t/x86_64-misalign %t/x86_64.o -pagezero_size 1001 2>&1 | FileCheck %s --check-prefix=LINK -D#SIZE=0x1000 +# RUN: llvm-readobj --macho-segment %t/x86_64-misalign | FileCheck %s -D#VMSIZE=0x1000 -D#SIZE=72 + +# RUN: %no-fatal-warnings-lld-watchos -lSystem -arch arm64_32 -o %t/arm64_32-misalign-4K %t/arm64_32.o -pagezero_size 1001 2>&1 | FileCheck %s --check-prefix=LINK -D#SIZE=0x0 +# RUN: llvm-readobj --macho-segment %t/arm64_32-misalign-4K | FileCheck %s --check-prefix=CHECK-ZERO -D#VMSIZE=0x4000 -D#SIZE=124 + +# RUN: %no-fatal-warnings-lld-watchos -lSystem -arch arm64_32 -o %t/arm64_32-misalign-16K %t/arm64_32.o -pagezero_size 4001 2>&1 | FileCheck %s --check-prefix=LINK -D#SIZE=0x4000 +# RUN: llvm-readobj --macho-segment %t/arm64_32-misalign-16K | FileCheck %s -D#VMSIZE=0x4000 -D#SIZE=56 + +# LINK: warning: __PAGEZERO size is not page aligned, rounding down to 0x[[#%x,SIZE]] + +# CHECK: Name: __PAGEZERO +# CHECK-NEXT: Size: [[#%d,SIZE]] +# CHECK-NEXT: vmaddr: 0x0 +# CHECK-NEXT: vmsize: 0x[[#%x,VMSIZE]] + +# CHECK-ZERO: Name: __TEXT +# CHECK-ZERO-NEXT: Size: [[#%d,SIZE]] +# CHECK-ZERO-NEXT: vmaddr: 0x0 +# CHECK-ZERO-NEXT: vmsize: 0x[[#%x,VMSIZE]] + +.globl _main +_main: diff --git a/wild/tests/lld-macho/reexport-with-symlink.s b/wild/tests/lld-macho/reexport-with-symlink.s new file mode 100644 index 000000000..c9cde5bc4 --- /dev/null +++ b/wild/tests/lld-macho/reexport-with-symlink.s @@ -0,0 +1,75 @@ +# REQUIRES: aarch64 +# UNSUPPORTED: system-windows +# RUN: rm -rf %t; split-file %s %t +# RUN: ln -s Versions/A/Developer %t/Developer/Library/Frameworks/Developer.framework/ +# RUN: llvm-mc -filetype obj -triple arm64-apple-macos11.0 %t/test.s -o %t/test.o +# RUN: %lld -arch arm64 -platform_version macos 11.0 11.0 -o %t/test -framework Developer -F %t/Developer/Library/Frameworks -L %t/Developer/usr/lib %t/test.o -t | FileCheck %s + +# CHECK: {{.*}}/Developer/Library/Frameworks/Developer.framework/Developer +# CHECK: {{.*}}/Developer/usr/lib/libDeveloperSupport.tbd(@rpath/libDeveloperSupport.dylib) +# CHECK-NOT: {{.*}}/Developer/Library/Frameworks/Developer.framework/Versions/A/Developer + +#--- Developer/Library/Frameworks/Developer.framework/Versions/A/Developer +{ + "tapi_tbd_version": 5, + "main_library": { + "target_info": [ + { + "target": "arm64-macos" + } + ], + "install_names": [ + { + "name": "@rpath/Developer.framework/Developer" + } + ], + "exported_symbols": [ + { + "text": { + "global": ["_funcPublic"] + } + } + ] + } +} +#--- Developer/usr/lib/libDeveloperSupport.tbd +{ + "tapi_tbd_version": 5, + "main_library": { + "target_info": [ + { + "target": "arm64-macos" + } + ], + "install_names": [ + { + "name": "@rpath/libDeveloperSupport.dylib" + } + ], + "reexported_libraries": [ + { + "names": [ + "@rpath/Developer.framework/Versions/A/Developer" + ] + } + ], + "exported_symbols": [ + { + "text": { + "global": ["_funcSupport"] + } + } + ] + } +} +#--- test.s +.text +.globl _main +.linker_option "-lDeveloperSupport" + +_main: + ret + +.data + .quad _funcPublic + .quad _funcSupport diff --git a/wild/tests/lld-macho/reexport-without-rpath.s b/wild/tests/lld-macho/reexport-without-rpath.s new file mode 100644 index 000000000..a204c140c --- /dev/null +++ b/wild/tests/lld-macho/reexport-without-rpath.s @@ -0,0 +1,121 @@ +# REQUIRES: aarch64 +# Windows does not support rpath +# UNSUPPORTED: system-windows +# RUN: rm -rf %t; split-file %s %t +# RUN: ln -s Versions/A/Developer %t/Developer/Library/Frameworks/Developer.framework/ +# RUN: ln -s Versions/A/DeveloperCore %t/Developer/Library/PrivateFrameworks/DeveloperCore.framework/ +# RUN: llvm-mc -filetype obj -triple arm64-apple-macos11.0 %t/test.s -o %t/test.o +# RUN: %lld -arch arm64 -platform_version macos 11.0 11.0 -o %t/test -framework Developer -F %t/Developer/Library/Frameworks -L %t/Developer/usr/lib %t/test.o +# RUN: llvm-objdump --bind --no-show-raw-insn -d %t/test | FileCheck %s +# CHECK: Bind table: +# CHECK-DAG: __DATA __data {{.*}} pointer 0 Developer _funcPublic +# CHECK-DAG: __DATA __data {{.*}} pointer 0 Developer _funcCore +# CHECK-DAG: __DATA __data {{.*}} pointer 0 libDeveloperSupport _funcSupport + +#--- Developer/Library/Frameworks/Developer.framework/Versions/A/Developer +{ + "tapi_tbd_version": 5, + "main_library": { + "target_info": [ + { + "target": "arm64-macos" + } + ], + "install_names": [ + { + "name": "@rpath/Developer.framework/Versions/A/Developer" + } + ], + "rpaths": [ + { + "paths": [ + "@loader_path/../../../../PrivateFrameworks/" + ] + } + ], + "reexported_libraries": [ + { + "names": [ + "@rpath/DeveloperCore.framework/Versions/A/DeveloperCore" + ] + } + ], + "exported_symbols": [ + { + "text": { + "global": ["_funcPublic"] + } + } + ] + } +} +#--- Developer/Library/PrivateFrameworks/DeveloperCore.framework/Versions/A/DeveloperCore +{ + "tapi_tbd_version": 5, + "main_library": { + "target_info": [ + { + "target": "arm64-macos" + } + ], + "install_names": [ + { + "name": "@rpath/DeveloperCore.framework/Versions/A/DeveloperCore" + } + ], + "allowable_clients": [ + { + "clients": ["Developer"] + } + ], + "exported_symbols": [ + { + "text": { + "global": ["_funcCore"] + } + } + ] + } +} +#--- Developer/usr/lib/libDeveloperSupport.tbd +{ + "tapi_tbd_version": 5, + "main_library": { + "target_info": [ + { + "target": "arm64-macos" + } + ], + "install_names": [ + { + "name": "@rpath/libDeveloperSupport.dylib" + } + ], + "reexported_libraries": [ + { + "names": [ + "@rpath/Developer.framework/Versions/A/Developer" + ] + } + ], + "exported_symbols": [ + { + "text": { + "global": ["_funcSupport"] + } + } + ] + } +} +#--- test.s +.text +.globl _main +.linker_option "-lDeveloperSupport" + +_main: + ret + +.data + .quad _funcPublic + .quad _funcCore + .quad _funcSupport diff --git a/wild/tests/lld-macho/reloc-subtractor.s b/wild/tests/lld-macho/reloc-subtractor.s new file mode 100644 index 000000000..215593c22 --- /dev/null +++ b/wild/tests/lld-macho/reloc-subtractor.s @@ -0,0 +1,74 @@ +# REQUIRES: x86, aarch64 +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/x86_64.o +# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %t/test.s -o %t/arm64.o +# RUN: %lld -lSystem %t/x86_64.o -o %t/x86_64 -order_file %t/order-file +# RUN: llvm-objdump --syms --full-contents --rebase %t/x86_64 | FileCheck %s +# RUN: %lld -arch arm64 -lSystem %t/arm64.o -o %t/arm64 -order_file %t/order-file +# RUN: llvm-objdump --syms --full-contents --rebase %t/arm64 | FileCheck %s + +# CHECK-LABEL: SYMBOL TABLE: +# CHECK: {{0*}}[[#%x, SUB1ADDR:]] l {{.*}} __DATA,bar _sub1 +# CHECK: {{0*}}[[#%x, SUB2ADDR:]] l {{.*}} __DATA,bar _sub2 +# CHECK: {{0*}}[[#%x, SUB3ADDR:]] l {{.*}} __DATA,bar _sub3 +# CHECK: {{0*}}[[#%x, SUB4ADDR:]] l {{.*}} __DATA,bar _sub4 +# CHECK: {{0*}}[[#%x, SUB5ADDR:]] l {{.*}} __DATA,bar _sub5 +# CHECK-LABEL: Contents of section __DATA,bar: +# CHECK: [[#SUB1ADDR]] 10000000 +# CHECK-NEXT: [[#SUB2ADDR]] f2ffffff +# CHECK-NEXT: [[#SUB3ADDR]] 14000000 00000000 +# CHECK-NEXT: [[#SUB4ADDR]] f6ffffff ffffffff +# CHECK-NEXT: [[#SUB5ADDR]] f1ffffff ffffffff +# CHECK: Rebase table: +# CHECK-NEXT: segment section address type +# CHECK-EMPTY: + +#--- test.s + +.globl _main, _subtrahend_1, _subtrahend_2, _minuend1, _minuend2 + +.section __DATA,foo + .space 16 +L_.minuend: + .space 16 + +.section __DATA,bar +_sub1: + .long _minuend_1 - _subtrahend_1 + .space 12 +_sub2: + .long _minuend_2 - _subtrahend_2 + 2 + .space 12 +_sub3: + .quad _minuend_1 - _subtrahend_1 + 4 + .space 8 +_sub4: + .quad _minuend_2 - _subtrahend_2 + 6 + .space 8 +_sub5: + .quad L_.minuend - _subtrahend_1 + 1 + .space 8 + +_minuend_1: + .space 16 +_minuend_2: + .space 16 +_subtrahend_1: + .space 16 +_subtrahend_2: + .space 16 + +.text +.p2align 2 +_main: + ret + +.subsections_via_symbols + +#--- order-file +## Reorder the symbols to make sure that the addends are being associated with +## the minuend (and not the subtrahend) relocation. +_subtrahend_1 +_minuend_1 +_minuend_2 +_subtrahend_2 diff --git a/wild/tests/lld-macho/section-order.s b/wild/tests/lld-macho/section-order.s new file mode 100644 index 000000000..7a0b6f799 --- /dev/null +++ b/wild/tests/lld-macho/section-order.s @@ -0,0 +1,58 @@ +# REQUIRES: x86 +## Check that section ordering follows from input file ordering. +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/1.s -o %t/1.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/2.s -o %t/2.o +# RUN: %lld -dylib %t/1.o %t/2.o -o %t/12 +# RUN: %lld -dylib %t/2.o %t/1.o -o %t/21 +# RUN: %lld -dylib %t/2.o %t/1.o -o %t/synth-section-order \ +# RUN: -add_empty_section __TEXT __objc_stubs \ +# RUN: -add_empty_section __TEXT __init_offsets \ +# RUN: -add_empty_section __TEXT __stubs \ +# RUN: -add_empty_section __TEXT __stub_helper \ +# RUN: -add_empty_section __TEXT __unwind_info \ +# RUN: -add_empty_section __TEXT __eh_frame \ +# RUN: -add_empty_section __DATA __objc_selrefs +# RUN: llvm-objdump --macho --section-headers %t/12 | FileCheck %s --check-prefix=CHECK-12 +# RUN: llvm-objdump --macho --section-headers %t/21 | FileCheck %s --check-prefix=CHECK-21 +# RUN: llvm-objdump --macho --section-headers %t/synth-section-order | FileCheck %s --check-prefix=CHECK-SYNTHETIC-ORDER + +# CHECK-12: __text +# CHECK-12-NEXT: foo +# CHECK-12-NEXT: bar +# CHECK-12-NEXT: __cstring + +# CHECK-21: __text +## `foo` always sorts next to `__text` since it's a code section +## and needs to be adjacent for arm64 thunk calculations +# CHECK-21-NEXT: foo +# CHECK-21-NEXT: __cstring +# CHECK-21-NEXT: bar + +# CHECK-SYNTHETIC-ORDER: __text +# CHECK-SYNTHETIC-ORDER-NEXT: foo +# CHECK-SYNTHETIC-ORDER-NEXT: __stubs +# CHECK-SYNTHETIC-ORDER-NEXT: __stub_helper +# CHECK-SYNTHETIC-ORDER-NEXT: __objc_stubs +# CHECK-SYNTHETIC-ORDER-NEXT: __init_offsets +# CHECK-SYNTHETIC-ORDER-NEXT: __cstring +# CHECK-SYNTHETIC-ORDER-NEXT: bar +# CHECK-SYNTHETIC-ORDER-NEXT: __unwind_info +# CHECK-SYNTHETIC-ORDER-NEXT: __eh_frame +# CHECK-SYNTHETIC-ORDER-NEXT: __objc_selrefs + +#--- 1.s +.section __TEXT,foo + .space 1 +.section __TEXT,bar + .space 1 +.cstring + .asciz "" + +#--- 2.s +.cstring + .asciz "" +.section __TEXT,bar + .space 1 +.section __TEXT,foo,regular,pure_instructions + .space 1 diff --git a/wild/tests/lld-macho/segments.s b/wild/tests/lld-macho/segments.s new file mode 100644 index 000000000..b167813d4 --- /dev/null +++ b/wild/tests/lld-macho/segments.s @@ -0,0 +1,73 @@ +# REQUIRES: x86, aarch64 +# RUN: rm -rf %t; mkdir -p %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/x86_64.o +# RUN: llvm-mc -filetype=obj -triple=arm64_32-apple-watchos %s -o %t/arm64-32.o +# RUN: %lld -o %t/x86_64 %t/x86_64.o +# RUN: %lld-watchos -o %t/arm64_32 %t/arm64-32.o + +# RUN: llvm-readobj --macho-segment %t/x86_64 > %t/x86_64.out +# RUN: echo "Total file size" >> %t/x86_64.out +# RUN: wc -c %t/x86_64 >> %t/x86_64.out +# RUN: FileCheck %s -DSUFFIX=_64 -DPAGEZERO_SIZE=0x100000000 -DTEXT_ADDR=0x100000000 < %t/x86_64.out + +# RUN: llvm-readobj --macho-segment %t/arm64_32 > %t/arm64-32.out +# RUN: echo "Total file size" >> %t/arm64-32.out +# RUN: wc -c %t/arm64_32 >> %t/arm64-32.out +# RUN: FileCheck %s -DSUFFIX= -DPAGEZERO_SIZE=0x4000 -DTEXT_ADDR=0x4000 < %t/arm64-32.out + +## These two segments must always be present at the start of an executable. +# CHECK-NOT: Segment { +# CHECK: Segment { +# CHECK-NEXT: Cmd: LC_SEGMENT[[SUFFIX]]{{$}} +# CHECK-NEXT: Name: __PAGEZERO +# CHECK-NEXT: Size: +# CHECK-NEXT: vmaddr: 0x0 +# CHECK-NEXT: vmsize: [[PAGEZERO_SIZE]] +# CHECK-NEXT: fileoff: 0 +# CHECK-NEXT: filesize: 0 +## The kernel won't execute a binary with the wrong protections for __PAGEZERO. +# CHECK-NEXT: maxprot: --- +# CHECK-NEXT: initprot: --- +# CHECK-NEXT: nsects: 0 +# CHECK-NEXT: flags: 0x0 +# CHECK-NEXT: } +# CHECK-NEXT: Segment { +# CHECK-NEXT: Cmd: LC_SEGMENT[[SUFFIX]]{{$}} +# CHECK-NEXT: Name: __TEXT +# CHECK-NEXT: Size: +# CHECK-NEXT: vmaddr: [[TEXT_ADDR]] +# CHECK-NEXT: vmsize: +## dyld3 assumes that the __TEXT segment starts from the file header +# CHECK-NEXT: fileoff: 0 +# CHECK-NEXT: filesize: +# CHECK-NEXT: maxprot: r-x +# CHECK-NEXT: initprot: r-x +# CHECK-NEXT: nsects: 1 +# CHECK-NEXT: flags: 0x0 +# CHECK-NEXT: } + +## Check that we handle max-length names correctly. +# CHECK: Cmd: LC_SEGMENT[[SUFFIX]]{{$}} +# CHECK-NEXT: Name: maxlen_16ch_name + +## This segment must always be present at the end of an executable, and cover +## its last byte. +# CHECK: Name: __LINKEDIT +# CHECK-NEXT: Size: +# CHECK-NEXT: vmaddr: +# CHECK-NEXT: vmsize: +# CHECK-NEXT: fileoff: [[#%u, LINKEDIT_OFF:]] +# CHECK-NEXT: filesize: [[#%u, LINKEDIT_SIZE:]] +# CHECK-NEXT: maxprot: r-- +# CHECK-NEXT: initprot: r-- +# CHECK-NOT: Cmd: LC_SEGMENT[[SUFFIX]]{{$}} + +# CHECK-LABEL: Total file size +# CHECK-NEXT: [[#%u, LINKEDIT_OFF + LINKEDIT_SIZE]] + +.text +.global _main +_main: + ret + +.section maxlen_16ch_name,foo diff --git a/wild/tests/lld-macho/skip-platform-checks.s b/wild/tests/lld-macho/skip-platform-checks.s new file mode 100644 index 000000000..bcd82d59d --- /dev/null +++ b/wild/tests/lld-macho/skip-platform-checks.s @@ -0,0 +1,12 @@ +# REQUIRES: x86, aarch64 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-iossimulator %s -o %t.o +## This should succeed even though libsystem_kernel.dylib has a mismatched platform. +# RUN: %no-arg-lld -lSystem -arch x86_64 -platform_version ios-simulator 14.0 15.0 \ +# RUN: -syslibroot %S/Inputs/iPhoneSimulator.sdk %t.o -o %t +# RUN: llvm-objdump --macho --bind %t | FileCheck %s +# CHECK: __DATA_CONST __got 0x100001000 pointer 0 libSystem dyld_stub_binder + +.globl _main +_main: + callq ___fsync + ret diff --git a/wild/tests/lld-macho/tapi-link-by-arch.s b/wild/tests/lld-macho/tapi-link-by-arch.s new file mode 100644 index 000000000..d78b2ea83 --- /dev/null +++ b/wild/tests/lld-macho/tapi-link-by-arch.s @@ -0,0 +1,19 @@ +# REQUIRES: x86, aarch64 + +# RUN: mkdir -p %t +# RUN: llvm-mc -filetype obj -triple arm64-apple-ios14.4 %s -o %t/arm64-ios.o +# RUN: not %no-arg-lld -dylib -arch arm64 -platform_version ios 14.4 15.0 -o /dev/null \ +# RUN: -lSystem %S/Inputs/libStubLink.tbd %t/arm64-ios.o 2>&1 | FileCheck %s + +# RUN: llvm-mc -filetype obj -triple x86_64-apple-iossimulator14.4 %s -o %t/x86_64-sim.o +# RUN: not %no-arg-lld -dylib -arch x86_64 -platform_version ios-simulator 14.4 15.0 -o /dev/null \ +# RUN: -lSystem %S/Inputs/libStubLink.tbd %t/x86_64-sim.o 2>&1 | FileCheck %s + +# RUN: llvm-mc -filetype obj -triple arm64-apple-iossimulator14.4 %s -o %t/arm64-sim.o +# RUN: %no-arg-lld -dylib -arch arm64 -platform_version ios-simulator 14.4 15.0 -o \ +# RUN: /dev/null %S/Inputs/libStubLink.tbd %t/arm64-sim.o + +# CHECK: error: undefined symbol: _arm64_sim_only + +.data +.quad _arm64_sim_only diff --git a/wild/tests/lld-macho/tapi-rpath.s b/wild/tests/lld-macho/tapi-rpath.s new file mode 100644 index 000000000..23187e797 --- /dev/null +++ b/wild/tests/lld-macho/tapi-rpath.s @@ -0,0 +1,89 @@ +# REQUIRES: aarch64 +# Windows does not support rpath +# UNSUPPORTED: system-windows +# RUN: rm -rf %t; split-file %s %t +# RUN: ln -s Versions/A/Developer %t/Developer/Library/Frameworks/Developer.framework/ +# RUN: ln -s Versions/A/DeveloperCore %t/Developer/Library/PrivateFrameworks/DeveloperCore.framework/ +# RUN: llvm-mc -filetype obj -triple arm64-apple-macos11.0 %t/test.s -o %t/test.o +# RUN: %lld -arch arm64 -platform_version macos 11.0 11.0 -o %t/test -framework Developer -F %t/Developer/Library/Frameworks %t/test.o + +# RUN: llvm-objdump --bind --no-show-raw-insn -d %t/test | FileCheck %s +# CHECK: Bind table: +# CHECK-DAG: __DATA __data {{.*}} pointer 0 Developer _funcPublic +# CHECK-DAG: __DATA __data {{.*}} pointer 0 Developer _funcCore + +#--- Developer/Library/Frameworks/Developer.framework/Versions/A/Developer +{ + "tapi_tbd_version": 5, + "main_library": { + "target_info": [ + { + "target": "arm64-macos" + } + ], + "install_names": [ + { + "name": "@rpath/Developer.framework/Versions/A/Developer" + } + ], + "rpaths": [ + { + "paths": [ + "@loader_path/../../../../PrivateFrameworks/" + ] + } + ], + "reexported_libraries": [ + { + "names": [ + "@rpath/DeveloperCore.framework/Versions/A/DeveloperCore" + ] + } + ], + "exported_symbols": [ + { + "text": { + "global": ["_funcPublic"] + } + } + ] + } +} +#--- Developer/Library/PrivateFrameworks/DeveloperCore.framework/Versions/A/DeveloperCore +{ + "tapi_tbd_version": 5, + "main_library": { + "target_info": [ + { + "target": "arm64-macos" + } + ], + "install_names": [ + { + "name": "@rpath/DeveloperCore.framework/Versions/A/DeveloperCore" + } + ], + "allowable_clients": [ + { + "clients": ["Developer"] + } + ], + "exported_symbols": [ + { + "text": { + "global": ["_funcCore"] + } + } + ] + } +} +#--- test.s +.text +.globl _main + +_main: + ret + +.data + .quad _funcPublic + .quad _funcCore diff --git a/wild/tests/lld-macho/tlv.s b/wild/tests/lld-macho/tlv.s new file mode 100644 index 000000000..e71fe7698 --- /dev/null +++ b/wild/tests/lld-macho/tlv.s @@ -0,0 +1,132 @@ +# REQUIRES: x86 +# RUN: rm -rf %t; split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/regular.s -o %t/regular.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/tbss.s -o %t/tbss.o + +# RUN: %lld -lSystem -no_pie -o %t/regular-no-pie %t/regular.o +# RUN: llvm-otool -hv %t/regular-no-pie | FileCheck %s --check-prefix=HEADER +# RUN: llvm-objdump -d --bind --rebase %t/regular-no-pie | FileCheck %s --check-prefixes=REG,LINKEDIT +# RUN: llvm-objdump --macho --section=__DATA,__thread_vars %t/regular-no-pie | \ +# RUN: FileCheck %s --check-prefix=REG-TLVP + +# RUN: %lld -lSystem %t/regular.o -o %t/regular-pie +# RUN: llvm-otool -hv %t/regular-pie | FileCheck %s --check-prefix=HEADER +# RUN: llvm-objdump -d --bind --rebase %t/regular-pie | FileCheck %s --check-prefixes=REG,LINKEDIT +# RUN: llvm-objdump --macho --section=__DATA,__thread_vars %t/regular-pie | \ +# RUN: FileCheck %s --check-prefix=REG-TLVP + +# RUN: %lld -lSystem %t/tbss.o -o %t/tbss -e _f +# RUN: llvm-objdump -d --bind --rebase %t/tbss | FileCheck %s --check-prefixes=TBSS,LINKEDIT +# RUN: llvm-objdump --macho --section=__DATA,__thread_vars %t/tbss | \ +# RUN: FileCheck %s --check-prefix=TBSS-TLVP + +# RUN: %lld -lSystem %t/regular.o %t/tbss.o -o %t/regular-and-tbss +# RUN: llvm-objdump -d --bind --rebase %t/regular-and-tbss | FileCheck %s --check-prefixes=REG,TBSS,LINKEDIT +# RUN: llvm-objdump --macho --section=__DATA,__thread_vars %t/regular-and-tbss | \ +# RUN: FileCheck %s --check-prefix=REG-TBSS-TLVP +# RUN: llvm-objdump --section-headers %t/regular-and-tbss | FileCheck %s --check-prefix=SECTIONS + +## Check that we always put __thread_bss immediately after __thread_data, +## regardless of the order of the input files. +# RUN: %lld -lSystem %t/tbss.o %t/regular.o -o %t/regular-and-tbss +# RUN: llvm-objdump --section-headers %t/regular-and-tbss | FileCheck %s --check-prefix=SECTIONS + +# HEADER: MH_HAS_TLV_DESCRIPTORS + +# REG: <_main>: +# REG-NEXT: leaq {{.*}}(%rip), %rax ## {{.*}} <_foo> +# REG-NEXT: leaq {{.*}}(%rip), %rax ## {{.*}} <_bar> +# REG-NEXT: retq + +# TBSS: <_f>: +# TBSS-NEXT: leaq {{.*}}(%rip), %rax ## {{.*}} <_baz> +# TBSS-NEXT: leaq {{.*}}(%rip), %rax ## {{.*}} <_qux> +# TBSS-NEXT: leaq {{.*}}(%rip), %rax ## {{.*}} <_hoge> +# TBSS-NEXT: retq + +# REG-TLVP: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +# REG-TLVP-NEXT: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +# REG-TLVP-NEXT: 00 00 00 00 00 00 00 00 08 00 00 00 00 00 00 00 + +# TBSS-TLVP: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +# TBSS-TLVP-NEXT: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +# TBSS-TLVP-NEXT: 00 00 00 00 00 00 00 00 08 00 00 00 00 00 00 00 + +# REG-TBSS-TLVP: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +# REG-TBSS-TLVP-NEXT: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +# REG-TBSS-TLVP-NEXT: 00 00 00 00 00 00 00 00 10 00 00 00 00 00 00 00 +# REG-TBSS-TLVP-NEXT: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +# REG-TBSS-TLVP-NEXT: 20 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +# REG-TBSS-TLVP-NEXT: 00 00 00 00 00 00 00 00 28 00 00 00 00 00 00 00 +# REG-TBSS-TLVP-NEXT: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +# REG-TBSS-TLVP-NEXT: 30 00 00 00 00 00 00 00 + +## Make sure we don't emit rebase opcodes for relocations in __thread_vars. +# LINKEDIT: Rebase table: +# LINKEDIT-NEXT: segment section address type +# LINKEDIT-EMPTY: +# LINKEDIT-NEXT: Bind table: +# LINKEDIT: __DATA __thread_vars 0x{{[0-9a-f]*}} pointer 0 libSystem __tlv_bootstrap +# LINKEDIT: __DATA __thread_vars 0x{{[0-9a-f]*}} pointer 0 libSystem __tlv_bootstrap + +## Make sure we have an odd number of tlv vars, and that the __thread_vars +## section starts 16-bytes aligned. This is the setup required for __thread_data +## not to be automatically 16-bytes aligned, ensuring the linker does its +## expected job of aligning _hoge$tlv$init. +# SECTIONS: __thread_vars {{[0-9]+}}8 {{[0-9]+}}0 +# SECTIONS: __thread_data +# SECTIONS: more_thread_data +# SECTIONS-NEXT: __thread_bss + +#--- regular.s +.globl _main +_main: + mov _foo@TLVP(%rip), %rax + mov _bar@TLVP(%rip), %rax + ret + +.section __DATA,__thread_data,thread_local_regular +_foo$tlv$init: + .quad 123 + +.section __DATA,more_thread_data,thread_local_regular +_bar$tlv$init: + .quad 123 + +.section __DATA,__thread_vars,thread_local_variables +.globl _foo, _bar +_foo: + .quad __tlv_bootstrap + .quad 0 + .quad _foo$tlv$init +_bar: + .quad __tlv_bootstrap + .quad 0 + .quad _bar$tlv$init + +#--- tbss.s + +.globl _f +_f: + mov _baz@TLVP(%rip), %rax + mov _qux@TLVP(%rip), %rax + mov _hoge@TLVP(%rip), %rax + ret + +.tbss _baz$tlv$init, 8, 3 +.tbss _qux$tlv$init, 8, 3 +.tbss _hoge$tlv$init, 16, 4 + +.section __DATA,__thread_vars,thread_local_variables +_baz: + .quad __tlv_bootstrap + .quad 0 + .quad _baz$tlv$init +_qux: + .quad __tlv_bootstrap + .quad 0 + .quad _qux$tlv$init +_hoge: + .quad __tlv_bootstrap + .quad 0 + .quad _hoge$tlv$init diff --git a/wild/tests/lld_macho_tests.rs b/wild/tests/lld_macho_tests.rs new file mode 100644 index 000000000..01fe06f8b --- /dev/null +++ b/wild/tests/lld_macho_tests.rs @@ -0,0 +1,136 @@ +//! Test runner for lld MachO assembly tests. +//! +//! These tests are adapted from LLVM lld's MachO test suite +//! (Apache License 2.0 with LLVM Exceptions). +//! +//! Each test assembles a .s file, links with Wild, and verifies +//! the output binary is structurally valid and codesigns cleanly. + +use std::path::Path; +use std::path::PathBuf; +use std::process::Command; + +fn wild_binary_path() -> PathBuf { + PathBuf::from(env!("CARGO_BIN_EXE_wild")) +} + +fn lld_tests_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/lld-macho") +} + +fn collect_tests(tests: &mut Vec) { + let wild_bin = wild_binary_path(); + let test_dir = lld_tests_dir(); + + for entry in std::fs::read_dir(&test_dir).unwrap() { + let entry = entry.unwrap(); + let path = entry.path(); + if path.extension().map_or(true, |e| e != "s") { + continue; + } + let content = std::fs::read_to_string(&path).unwrap(); + + // Skip tests that need split-file (multi-file tests) + if content.contains("split-file") { + continue; + } + + // Only run aarch64/arm64 tests + if content.contains("REQUIRES: x86") || content.contains("REQUIRES: i386") { + continue; + } + + // Extract linker flags from RUN lines + let is_dylib = content.contains("-dylib"); + + let test_name = path.file_stem().unwrap().to_string_lossy().to_string(); + let wild = wild_bin.clone(); + let test_path = path.clone(); + + tests.push( + libtest_mimic::Trial::test(format!("lld-macho/{test_name}"), move || { + run_lld_test(&wild, &test_path, is_dylib).map_err(Into::into) + }) + .with_ignored_flag( + // Known failures — ignore until fixed + test_name == "objc-category-merging-erase-objc-name-test", + ), + ); + } +} + +fn run_lld_test(wild_bin: &Path, test_path: &Path, is_dylib: bool) -> Result<(), String> { + let build_dir = std::env::temp_dir().join("wild-lld-tests"); + std::fs::create_dir_all(&build_dir).map_err(|e| format!("mkdir: {e}"))?; + + let stem = test_path.file_stem().unwrap().to_string_lossy(); + let obj_path = build_dir.join(format!("{stem}.o")); + let out_path = build_dir.join(format!("{stem}.out")); + + // Strip comment lines and assemble + let content = std::fs::read_to_string(test_path).map_err(|e| format!("read: {e}"))?; + let clean: String = content + .lines() + .filter(|l| !l.starts_with('#')) + .collect::>() + .join("\n"); + let clean_path = build_dir.join(format!("{stem}.clean.s")); + std::fs::write(&clean_path, &clean).map_err(|e| format!("write: {e}"))?; + + // Assemble + let asm = Command::new("clang") + .args(["-c", "-target", "arm64-apple-macos"]) + .arg(&clean_path) + .arg("-o") + .arg(&obj_path) + .output() + .map_err(|e| format!("clang: {e}"))?; + if !asm.status.success() { + let stderr = String::from_utf8_lossy(&asm.stderr); + // Some tests have intentional assembly errors + if stderr.contains("error:") { + return Ok(()); // Skip tests with asm errors + } + return Err(format!("Assembly failed:\n{stderr}")); + } + + // Link with Wild + let mut cmd = Command::new(wild_bin); + cmd.arg(&obj_path); + if is_dylib { + cmd.arg("-dylib"); + } + cmd.args(["-arch", "arm64", "-lSystem", "-o"]) + .arg(&out_path) + .env("WILD_VALIDATE_OUTPUT", "1"); + + let link = cmd.output().map_err(|e| format!("wild: {e}"))?; + if !link.status.success() { + let stderr = String::from_utf8_lossy(&link.stderr); + // Check if test expects a link error + if content.contains("error:") || content.contains("not-allowed") { + return Ok(()); // Expected failure + } + return Err(format!("Link failed:\n{stderr}")); + } + + // Verify output is valid: codesign check + let verify = Command::new("codesign") + .args(["-vv"]) + .arg(&out_path) + .output() + .map_err(|e| format!("codesign: {e}"))?; + if !verify.status.success() { + let stderr = String::from_utf8_lossy(&verify.stderr); + return Err(format!("Codesign verification failed:\n{stderr}")); + } + + Ok(()) +} + +fn main() { + let mut tests = Vec::new(); + collect_tests(&mut tests); + let args = libtest_mimic::Arguments::from_args(); + libtest_mimic::run(&args, tests).exit(); +} diff --git a/wild/tests/macho_integration_tests.rs b/wild/tests/macho_integration_tests.rs new file mode 100644 index 000000000..badcb6d5a --- /dev/null +++ b/wild/tests/macho_integration_tests.rs @@ -0,0 +1,660 @@ +//! Integration tests for macOS Mach-O linking. +//! +//! Mirrors the structure of the ELF integration tests (`integration_tests.rs`) but for Mach-O. +//! Test sources live in `tests/sources/macho/{test_name}/{test_name}.{c,cc,rs}`. +//! +//! Supported directives (in `//#Directive:Args` format): +//! +//! Object:{filename} Extra object file to compile and link. +//! CompArgs:... Extra compiler flags. +//! LinkArgs:... Extra linker flags. +//! ExpectError:{regex} Link must fail; stderr must match regex. +//! RunEnabled:{bool} Whether to execute the output (default: true). +//! Contains:{string} Output binary must contain this string. +//! DoesNotContain:{string} Output binary must NOT contain this string. + +use std::path::Path; +use std::path::PathBuf; +use std::process::Command; + +fn main() -> Result<(), Box> { + // Only run on macOS. + if cfg!(not(target_os = "macos")) { + eprintln!("Mach-O integration tests only run on macOS — skipping."); + let args = libtest_mimic::Arguments::from_args(); + let _ = libtest_mimic::run(&args, Vec::new()); + return Ok(()); + } + + let args = libtest_mimic::Arguments::from_args(); + let mut tests = Vec::new(); + collect_tests(&mut tests)?; + let _ = libtest_mimic::run(&args, tests).exit_code(); + Ok(()) +} + +// --------------------------------------------------------------------------- +// Test collection +// --------------------------------------------------------------------------- + +fn collect_tests(tests: &mut Vec) -> Result<(), Box> { + let wild_bin = wild_binary_path(); + let src_root = macho_sources_dir(); + + for entry in std::fs::read_dir(&src_root)? { + let entry = entry?; + let dir = entry.path(); + if !dir.is_dir() { + continue; + } + let test_name = dir.file_name().unwrap().to_string_lossy().to_string(); + + // Find primary source: {test_name}.{c,cc,rs} + let primary = identify_primary_source(&dir, &test_name); + let Some(primary) = primary else { continue }; + + let config = parse_config(&dir, &primary)?; + let wild = wild_bin.clone(); + + let arch = if cfg!(target_arch = "aarch64") { + "aarch64" + } else { + "x86_64" + }; + let ignored = config.ignore_reason.is_some(); + tests.push( + libtest_mimic::Trial::test(format!("macho/{arch}/{test_name}"), move || { + run_test(&wild, &dir, &test_name, &primary, &config).map_err(Into::into) + }) + .with_ignored_flag(ignored), + ); + } + Ok(()) +} + +fn identify_primary_source(dir: &Path, test_name: &str) -> Option { + for ext in &["c", "cc", "rs"] { + let p = dir.join(format!("{test_name}.{ext}")); + if p.exists() { + return Some(p); + } + } + None +} + +// --------------------------------------------------------------------------- +// Config parsing +// --------------------------------------------------------------------------- + +#[derive(Default)] +struct TestConfig { + extra_objects: Vec, + /// Archives to create: (archive_name, vec of source files). + archives: Vec<(String, Vec)>, + /// Shared libraries to build: source file names. + shared_libs: Vec, + /// Groups of sources to partial-link with -r. + relocatables: Vec>, + comp_args: Vec, + link_args: Vec, + expect_error: Option, + run_enabled: bool, + use_clang_driver: bool, + contains: Vec, + does_not_contain: Vec, + expect_syms: Vec, + no_syms: Vec, + ignore_reason: Option, +} + +fn parse_config(test_dir: &Path, primary: &Path) -> Result> { + let mut cfg = TestConfig { + run_enabled: true, + ..Default::default() + }; + + let src = std::fs::read_to_string(primary)?; + for line in src.lines() { + let Some(directive) = line.strip_prefix("//#") else { + continue; + }; + let (key, value) = match directive.split_once(':') { + Some((k, v)) => (k, v), + None => (directive, ""), + }; + match key { + "Object" => cfg.extra_objects.push(value.to_string()), + // Archive:libfoo.a:src1.c,src2.c + "Archive" => { + let parts: Vec<&str> = value.splitn(2, ':').collect(); + let (name, sources) = if parts.len() == 2 { + ( + parts[0].to_string(), + parts[1].split(',').map(|s| s.trim().to_string()).collect(), + ) + } else { + // Archive:src.c — auto-name the archive + let src = value.trim().to_string(); + let stem = src + .strip_suffix(".c") + .or(src.strip_suffix(".cc")) + .unwrap_or(&src); + (format!("{stem}.a"), vec![src]) + }; + cfg.archives.push((name, sources)); + } + "Shared" => cfg.shared_libs.push(value.trim().to_string()), + "Relocatable" => { + let sources: Vec = value.split(',').map(|s| s.trim().to_string()).collect(); + cfg.relocatables.push(sources); + } + "CompArgs" => cfg.comp_args.extend(shell_words(value)), + "LinkArgs" => cfg.link_args.extend(shell_words(value)), + "ExpectError" => cfg.expect_error = Some(value.to_string()), + "RunEnabled" => cfg.run_enabled = value.trim() != "false", + "LinkerDriver" if value.trim().starts_with("clang") => cfg.use_clang_driver = true, + "Contains" => cfg.contains.push(value.to_string()), + "DoesNotContain" => cfg.does_not_contain.push(value.to_string()), + "ExpectSym" => cfg + .expect_syms + .push(value.split_whitespace().next().unwrap_or(value).to_string()), + "NoSym" => cfg.no_syms.push(value.trim().to_string()), + "Ignore" => cfg.ignore_reason = Some(value.to_string()), + _ => {} // Ignore unknown directives for forward-compatibility. + } + } + + // Also parse directives from extra object files (they might have CompArgs etc.) + for obj_name in &cfg.extra_objects { + let obj_path = test_dir.join(obj_name); + if obj_path.exists() { + let obj_src = std::fs::read_to_string(&obj_path)?; + for line in obj_src.lines() { + if let Some(directive) = line.strip_prefix("//#") { + if let Some(("CompArgs", v)) = directive.split_once(':').map(|(k, v)| (k, v)) { + // CompArgs in extra objects only apply to that object — ignored here. + let _ = v; + } + } + } + } + } + + Ok(cfg) +} + +fn shell_words(s: &str) -> Vec { + s.split_whitespace().map(|w| w.to_string()).collect() +} + +// --------------------------------------------------------------------------- +// Test execution +// --------------------------------------------------------------------------- + +fn run_test( + wild_bin: &Path, + test_dir: &Path, + test_name: &str, + primary: &Path, + config: &TestConfig, +) -> Result<(), String> { + let build_dir = build_dir(test_name); + std::fs::create_dir_all(&build_dir).map_err(|e| format!("mkdir: {e}"))?; + + // Compile all source files. + let mut objects = Vec::new(); + let is_cpp = primary.extension().map_or(false, |e| e == "cc"); + + let is_rust = primary.extension().map_or(false, |e| e == "rs"); + + if is_rust { + // Rust files: compile + link via rustc with wild as linker. + let output = build_dir.join(test_name); + let mut cmd = Command::new("rustc"); + cmd.arg(primary) + .arg("-o") + .arg(&output) + .arg("-Clinker=clang") + .arg(format!("-Clink-arg=-fuse-ld={}", wild_bin.display())); + for arg in &config.link_args { + cmd.arg(format!("-Clink-arg={arg}")); + } + let result = cmd.output().map_err(|e| format!("rustc: {e}"))?; + if !result.status.success() { + let stderr = String::from_utf8_lossy(&result.stderr); + if let Some(ref pattern) = config.expect_error { + if stderr.contains(pattern) { + return Ok(()); + } + return Err(format!( + "Expected error matching '{pattern}', got:\n{stderr}" + )); + } + return Err(format!("rustc failed:\n{stderr}")); + } + if config.run_enabled { + let run = Command::new(&output) + .output() + .map_err(|e| format!("run: {e}"))?; + let code = run.status.code().unwrap_or(-1); + if code != 42 { + return Err(format!("Expected exit code 42, got {code}")); + } + } + return Ok(()); + } + + compile_source(primary, &build_dir, &config.comp_args, is_cpp)?; + objects.push(object_path(&build_dir, primary)); + + for obj_name in &config.extra_objects { + let src = test_dir.join(obj_name); + if !src.exists() { + // Non-existent object path — pass directly to linker (for ExpectError tests). + objects.push(PathBuf::from(obj_name)); + continue; + } + let extra_cpp = src.extension().map_or(false, |e| e == "cc"); + compile_source(&src, &build_dir, &config.comp_args, extra_cpp)?; + objects.push(object_path(&build_dir, &src)); + } + + // Build archives from source files. + for (archive_name, sources) in &config.archives { + let mut member_objs = Vec::new(); + for src_name in sources { + let src = test_dir.join(src_name); + let src_cpp = src.extension().map_or(false, |e| e == "cc"); + compile_source(&src, &build_dir, &config.comp_args, src_cpp)?; + member_objs.push(object_path(&build_dir, &src)); + } + let archive_path = build_dir.join(archive_name); + let mut ar_cmd = Command::new("ar"); + ar_cmd.arg("rcs").arg(&archive_path); + for obj in &member_objs { + ar_cmd.arg(obj); + } + let ar_result = ar_cmd.output().map_err(|e| format!("ar: {e}"))?; + if !ar_result.status.success() { + return Err(format!( + "ar failed: {}", + String::from_utf8_lossy(&ar_result.stderr) + )); + } + objects.push(archive_path); + } + + // Build partial-linked relocatables with -r. + for (group_idx, sources) in config.relocatables.iter().enumerate() { + let mut member_objs = Vec::new(); + for src_name in sources { + let src = test_dir.join(src_name); + let src_cpp = src.extension().map_or(false, |e| e == "cc"); + compile_source(&src, &build_dir, &config.comp_args, src_cpp)?; + member_objs.push(object_path(&build_dir, &src)); + } + let reloc_path = build_dir.join(format!("relocatable{group_idx}.o")); + let mut reloc_cmd = Command::new(&wild_bin); + reloc_cmd.arg("-r"); + for obj in &member_objs { + reloc_cmd.arg(obj); + } + reloc_cmd.arg("-o").arg(&reloc_path); + let result = reloc_cmd.output().map_err(|e| format!("wild -r: {e}"))?; + if !result.status.success() { + let stderr = String::from_utf8_lossy(&result.stderr); + return Err(format!("Partial link (-r) failed:\n{stderr}")); + } + objects.push(reloc_path); + } + + // Build shared libraries (dylibs) and add -L/-l flags. + let mut extra_link_args: Vec = Vec::new(); + for lib_src_name in &config.shared_libs { + let src = test_dir.join(lib_src_name); + let stem = src.file_stem().unwrap().to_string_lossy().to_string(); + let dylib_path = build_dir.join(format!("lib{stem}.dylib")); + let src_cpp = src.extension().map_or(false, |e| e == "cc"); + let compiler = if src_cpp { "clang++" } else { "clang" }; + let mut dylib_cmd = Command::new(compiler); + dylib_cmd + .arg("-dynamiclib") + .arg(format!("-fuse-ld={}", wild_bin.display())) + .arg(&src) + .arg("-o") + .arg(&dylib_path) + .arg(format!("-Wl,-install_name,@rpath/lib{stem}.dylib")); + for arg in &config.comp_args { + dylib_cmd.arg(arg); + } + let result = dylib_cmd + .output() + .map_err(|e| format!("dylib build: {e}"))?; + if !result.status.success() { + let stderr = String::from_utf8_lossy(&result.stderr); + return Err(format!( + "Failed to build dylib from {lib_src_name}:\n{stderr}" + )); + } + extra_link_args.push(format!("-L{}", build_dir.display())); + extra_link_args.push(format!("-l{stem}")); + extra_link_args.push(format!("-Wl,-rpath,{}", build_dir.display())); + } + + // Link with wild. + let output = build_dir.join(test_name); + let mut cmd = if config.use_clang_driver { + // Use clang as driver (passes -syslibroot, -L paths, etc.) + let compiler = if is_cpp { "clang++" } else { "clang" }; + let mut c = Command::new(compiler); + c.arg(format!("-fuse-ld={}", wild_bin.display())); + for obj in &objects { + c.arg(obj); + } + c.arg("-o").arg(&output); + for arg in &config.link_args { + c.arg(arg); + } + for arg in &extra_link_args { + c.arg(arg); + } + c + } else { + let mut c = Command::new(wild_bin); + for obj in &objects { + c.arg(obj); + } + c.arg("-o").arg(&output); + for arg in &config.link_args { + c.arg(arg); + } + for arg in &extra_link_args { + c.arg(arg); + } + c + }; + + // Enable output validation in Wild itself. + cmd.env("WILD_VALIDATE_OUTPUT", "1"); + + let link_result = cmd.output().map_err(|e| format!("wild: {e}"))?; + + // Check for expected errors. + if let Some(ref pattern) = config.expect_error { + if link_result.status.success() { + return Err(format!( + "Expected link failure matching '{pattern}', but link succeeded" + )); + } + let stderr = String::from_utf8_lossy(&link_result.stderr); + if !stderr.contains(pattern) { + return Err(format!( + "Expected error matching '{pattern}', got:\n{stderr}" + )); + } + return Ok(()); + } + + if !link_result.status.success() { + let stderr = String::from_utf8_lossy(&link_result.stderr); + return Err(format!("Link failed:\n{stderr}")); + } + + // Verify Mach-O structural invariants. + let binary = std::fs::read(&output).map_err(|e| format!("read output: {e}"))?; + verify_macho_invariants(&binary, &output)?; + + // Binary content checks. + for needle in &config.contains { + if !binary_contains(&binary, needle.as_bytes()) { + return Err(format!("Output binary does not contain '{needle}'")); + } + } + for needle in &config.does_not_contain { + if binary_contains(&binary, needle.as_bytes()) { + return Err(format!("Output binary unexpectedly contains '{needle}'")); + } + } + + // Symbol checks. + if !config.expect_syms.is_empty() || !config.no_syms.is_empty() { + use object::read::Object as _; + use object::read::ObjectSymbol as _; + let obj_file = object::File::parse(&*binary) + .map_err(|e| format!("Failed to parse output binary: {e}"))?; + let sym_names: Vec<&str> = obj_file.symbols().filter_map(|s| s.name().ok()).collect(); + + for expected in &config.expect_syms { + // Mach-O adds a leading underscore to C symbols. + let with_underscore = format!("_{expected}"); + if !sym_names + .iter() + .any(|n| *n == expected.as_str() || *n == with_underscore) + { + return Err(format!("Expected symbol `{expected}` not found in output")); + } + } + for absent in &config.no_syms { + let with_underscore = format!("_{absent}"); + if sym_names + .iter() + .any(|n| *n == absent.as_str() || *n == with_underscore) + { + return Err(format!("Symbol `{absent}` should not be in output")); + } + } + } + + // Run the binary and check exit code. + if config.run_enabled { + let run = Command::new(&output) + .output() + .map_err(|e| format!("run: {e}"))?; + let code = run.status.code().unwrap_or(-1); + if code != 42 { + return Err(format!("Expected exit code 42, got {code}")); + } + } + + Ok(()) +} + +/// Verify structural invariants of a Mach-O binary. +/// +/// These invariants must hold for dyld to load the binary correctly: +/// - All segments must be page-aligned (16KB on arm64). +/// - Section addresses must be within their parent segment's [vmaddr, vmaddr+vmsize). +/// - Section file offsets must be within [segment.fileoff, segment.fileoff+segment.filesize). +/// - Sections within a segment must not overlap. +/// - LC_SYMTAB offsets must be within the file. +/// - Chained fixup page starts must reference offsets within a page (< page_size). +fn verify_macho_invariants(binary: &[u8], path: &std::path::Path) -> Result<(), String> { + use object::read::macho::MachHeader as _; + use object::read::macho::Section as _; + use object::read::macho::Segment as _; + let le = object::Endianness::Little; + let header = object::macho::MachHeader64::::parse(binary, 0) + .map_err(|e| format!("{}: failed to parse Mach-O header: {e}", path.display()))?; + let mut cmds = header + .load_commands(le, binary, 0) + .map_err(|e| format!("{}: bad load commands: {e}", path.display()))?; + + let file_len = binary.len() as u64; + let page_size: u64 = 0x4000; // 16KB on arm64 + + while let Ok(Some(cmd)) = cmds.next() { + if let Ok(Some((seg, seg_data))) = cmd.segment_64() { + let segname = std::str::from_utf8( + &seg.segname[..seg.segname.iter().position(|&b| b == 0).unwrap_or(16)], + ) + .unwrap_or(""); + + let vm_addr = seg.vmaddr.get(le); + let vm_size = seg.vmsize.get(le); + let file_off = seg.fileoff.get(le); + let file_size = seg.filesize.get(le); + + // Invariant: segment vmaddr must be page-aligned. + if vm_addr % page_size != 0 && !segname.is_empty() { + return Err(format!( + "{}: segment {segname} vmaddr {vm_addr:#x} is not page-aligned", + path.display() + )); + } + + // Invariant: segment file offset must be page-aligned (except __PAGEZERO). + if file_size > 0 && file_off % page_size != 0 { + return Err(format!( + "{}: segment {segname} fileoff {file_off:#x} is not page-aligned", + path.display() + )); + } + + // Invariant: segment file content must fit in the file. + if file_off + file_size > file_len { + return Err(format!( + "{}: segment {segname} extends beyond file \ + (fileoff {file_off:#x} + filesize {file_size:#x} > file len {file_len:#x})", + path.display() + )); + } + + // Check sections within this segment. + if let Ok(sections) = seg.sections(le, seg_data) { + let mut prev_end: u64 = 0; + for sec in sections { + let sect_name_raw = sec.sectname(); + let sect_name = std::str::from_utf8( + §_name_raw[..sect_name_raw.iter().position(|&b| b == 0).unwrap_or(16)], + ) + .unwrap_or(""); + + let sec_addr = sec.addr(le); + let sec_size = sec.size(le); + let sec_offset = sec.offset(le) as u64; + let sec_align = sec.align(le); + + // Invariant: section address must be within the segment. + if sec_size > 0 + && (sec_addr < vm_addr || sec_addr + sec_size > vm_addr + vm_size) + { + return Err(format!( + "{}: section {segname},{sect_name} addr {sec_addr:#x}+{sec_size:#x} \ + outside segment [{vm_addr:#x}..{:#x})", + path.display(), + vm_addr + vm_size + )); + } + + // Invariant: section file offset must be within the segment. + let sec_type = sec.flags(le) & 0xFF; + let is_zerofill = sec_type == 0x01 || sec_type == 0x0C; + if sec_size > 0 && !is_zerofill && sec_offset > 0 { + if sec_offset < file_off || sec_offset + sec_size > file_off + file_size { + return Err(format!( + "{}: section {segname},{sect_name} file range \ + [{sec_offset:#x}..{:#x}) outside segment [{file_off:#x}..{:#x})", + path.display(), + sec_offset + sec_size, + file_off + file_size + )); + } + } + + // Invariant: section must respect its alignment. + if sec_size > 0 && sec_align > 0 { + let alignment = 1u64 << sec_align; + if sec_addr % alignment != 0 { + return Err(format!( + "{}: section {segname},{sect_name} addr {sec_addr:#x} \ + not aligned to 2^{sec_align} ({alignment})", + path.display() + )); + } + } + + // Invariant: sections must not overlap (within the same segment). + if sec_addr > 0 && sec_addr < prev_end { + return Err(format!( + "{}: section {segname},{sect_name} at {sec_addr:#x} \ + overlaps previous section ending at {prev_end:#x}", + path.display() + )); + } + if sec_size > 0 { + prev_end = sec_addr + sec_size; + } + } + } + } + } + + Ok(()) +} + +fn compile_source( + src: &Path, + build_dir: &Path, + extra_args: &[String], + is_cpp: bool, +) -> Result<(), String> { + let out = object_path(build_dir, src); + let compiler = if is_cpp { "clang++" } else { "clang" }; + + let mut cmd = Command::new(compiler); + cmd.arg("-c").arg(src).arg("-o").arg(&out); + for arg in extra_args { + cmd.arg(arg); + } + + let result = cmd.output().map_err(|e| format!("{compiler}: {e}"))?; + if !result.status.success() { + let stderr = String::from_utf8_lossy(&result.stderr); + return Err(format!( + "Compilation of {} failed:\n{stderr}", + src.display() + )); + } + Ok(()) +} + +fn object_path(build_dir: &Path, src: &Path) -> PathBuf { + let stem = src.file_stem().unwrap().to_string_lossy(); + build_dir.join(format!("{stem}.o")) +} + +fn binary_contains(haystack: &[u8], needle: &[u8]) -> bool { + haystack + .windows(needle.len()) + .any(|window| window == needle) +} + +// --------------------------------------------------------------------------- +// Paths +// --------------------------------------------------------------------------- + +fn wild_binary_path() -> PathBuf { + let mut path = std::env::current_exe().expect("current_exe"); + path.pop(); // remove test binary name + path.pop(); // remove `deps/` + path.push("wild"); + if !path.exists() { + path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .unwrap() + .join("target/debug/wild"); + } + // clang -fuse-ld= requires an absolute path. + std::fs::canonicalize(&path).unwrap_or(path) +} + +fn macho_sources_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/sources/macho") +} + +fn build_dir(test_name: &str) -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .unwrap() + .join(format!("target/macho-test-build/{test_name}")) +} diff --git a/wild/tests/sold-macho/LICENSE.md b/wild/tests/sold-macho/LICENSE.md new file mode 100644 index 000000000..ae2ecfbef --- /dev/null +++ b/wild/tests/sold-macho/LICENSE.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Rui Ueyama + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/wild/tests/sold-macho/README.md b/wild/tests/sold-macho/README.md new file mode 100644 index 000000000..df3a00b3e --- /dev/null +++ b/wild/tests/sold-macho/README.md @@ -0,0 +1,33 @@ +# sold Mach-O Test Suite + +Tests adapted from the [sold](https://github.com/bluewhalesystems/sold) +Mach-O linker by Rui Ueyama (Blue Whale Systems). + +## Source + + + +## License + +MIT License (Copyright 2023 Rui Ueyama) -- see [LICENSE.md](LICENSE.md). + +## Format + +Each test is a bash script that: + +1. Compiles C/C++ source via heredocs using `$CC` +2. Links with `$CC --ld-path=./ld64` (the test runner symlinks Wild as `ld64`) +3. Runs the output binary and verifies behavior (usually via `grep -q`) + +The `common.inc` file sets up `$CC`, `$CXX`, trap handlers, and `$t` (temp dir). + +## Running + +```sh +cargo test --test sold_macho_tests +``` + +## Note + +The sold repository is archived and no longer maintained. This is a +complete snapshot of its Mach-O test suite as of the final commit. diff --git a/wild/tests/sold-macho/S.sh b/wild/tests/sold-macho/S.sh new file mode 100755 index 000000000..56db433de --- /dev/null +++ b/wild/tests/sold-macho/S.sh @@ -0,0 +1,17 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/a.c +#include +void hello() { printf("Hello world\n"); } +int main(){ hello(); } +EOF + +$CC -o $t/a.o -c -g $t/a.c + +$CC --ld-path=./ld64 -o $t/exe1 $t/a.o -g +nm -pa $t/exe1 | grep -qw OSO + +$CC --ld-path=./ld64 -o $t/exe2 $t/a.o -g -Wl,-S +nm -pa $t/exe2 > $t/log2 +! grep -qw OSO $t/log2 || false diff --git a/wild/tests/sold-macho/U.sh b/wild/tests/sold-macho/U.sh new file mode 100755 index 000000000..7303ea70d --- /dev/null +++ b/wild/tests/sold-macho/U.sh @@ -0,0 +1,10 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log 2>&1 +grep -q 'library not found: -lSystem' $t/log diff --git a/wild/tests/sold-macho/add-ast-path.sh b/wild/tests/sold-macho/add-ast-path.sh new file mode 100755 index 000000000..d8a286958 --- /dev/null +++ b/wild/tests/sold-macho/add-ast-path.sh @@ -0,0 +1,12 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +int main() { + printf("Hello world\n"); +} +EOF + +$CC --ld-path=./ld64 -B. -o $t/exe1 $t/a.o -Wl,-adhoc_codesign +otool -l $t/exe1 | grep -q LC_CODE_SIGNATURE +$t/exe1 | grep -Fq 'Hello world' + +$CC --ld-path=./ld64 -B. -o $t/exe2 $t/a.o -Wl,-no_adhoc_codesign +otool -l $t/exe2 > $t/log2 +! grep -q LC_CODE_SIGNATURE $t/log2 || false +grep -q LC_UUID $t/log2 +! grep -q 'uuid 00000000-0000-0000-0000-000000000000' $t/log2 || false diff --git a/wild/tests/sold-macho/all-load.sh b/wild/tests/sold-macho/all-load.sh new file mode 100755 index 000000000..cc737129f --- /dev/null +++ b/wild/tests/sold-macho/all-load.sh @@ -0,0 +1,21 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/a.tbd +--- !tapi-tbd +tbd-version: 4 +targets: [ x86_64-macos, arm64-macos ] +uuids: + - target: x86_64-macos + value: 00000000-0000-0000-0000-000000000000 + - target: arm64-macos + value: 00000000-0000-0000-0000-000000000000 +install-name: '/usr/frameworks/SomeFramework.framework/SomeFramework' +current-version: 0000 +compatibility-version: 150 +flags: [ not_app_extension_safe ] +exports: + - targets: [ x86_64-macos, arm64-macos ] + symbols: [ _foo ] +... +EOF + +cat <& $t/log1 +! grep -q 'application extension' $t/log1 || false + +$CC --ld-path=./ld64 -o $t/exe1 $t/b.o $t/a.tbd -Wl,-application_extension >& $t/log2 +grep -q 'application extension' $t/log2 diff --git a/wild/tests/sold-macho/application-extension2.sh b/wild/tests/sold-macho/application-extension2.sh new file mode 100755 index 000000000..c374a787b --- /dev/null +++ b/wild/tests/sold-macho/application-extension2.sh @@ -0,0 +1,17 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat <& $t/log + +grep -q 'not safe for use in application extensions' $t/log diff --git a/wild/tests/sold-macho/archive.sh b/wild/tests/sold-macho/archive.sh new file mode 100755 index 000000000..8d0284b43 --- /dev/null +++ b/wild/tests/sold-macho/archive.sh @@ -0,0 +1,31 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +void hello() { + printf("Hello world\n"); +} +EOF + +cat < + +char msg[] = "Hello world\n"; +char *p = msg; + +int main() { + printf("%s", p); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o +$t/exe | grep -q 'Hello world' diff --git a/wild/tests/sold-macho/basic.sh b/wild/tests/sold-macho/basic.sh new file mode 100755 index 000000000..f47ad19b1 --- /dev/null +++ b/wild/tests/sold-macho/basic.sh @@ -0,0 +1,11 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +void hello() { + printf("Hello world\n"); +} +EOF + +cat < $t/log +! grep -q _hello $t/log || false diff --git a/wild/tests/sold-macho/bss.sh b/wild/tests/sold-macho/bss.sh new file mode 100755 index 000000000..2971cc33a --- /dev/null +++ b/wild/tests/sold-macho/bss.sh @@ -0,0 +1,16 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +static int foo[100]; + +int main() { + foo[1] = 5; + printf("%d %d %p\n", foo[0], foo[1], foo); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o +$t/exe | grep -q '^0 5 ' diff --git a/wild/tests/sold-macho/bundle.sh b/wild/tests/sold-macho/bundle.sh new file mode 100755 index 000000000..c96a95a8b --- /dev/null +++ b/wild/tests/sold-macho/bundle.sh @@ -0,0 +1,12 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +void hello() { + printf("Hello world\n"); +} +EOF + +$CC --ld-path=./ld64 -o $t/bundle $t/a.o -Wl,-bundle +file $t/exe | grep -qi bundle diff --git a/wild/tests/sold-macho/comdat.sh b/wild/tests/sold-macho/comdat.sh new file mode 100755 index 000000000..e1658a66f --- /dev/null +++ b/wild/tests/sold-macho/comdat.sh @@ -0,0 +1,28 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +struct T { + T() { std::cout << "foo "; } +}; +T x; +EOF + +cat < +struct T { + T() { std::cout << "foo "; } +}; +T y; +EOF + +cat < +int main() { + std::cout << "bar\n"; +} +EOF + +$CXX --ld-path=./ld64 -o $t/exe $t/a.o $t/b.o $t/c.o +$t/exe | grep -q '^foo foo bar$' diff --git a/wild/tests/sold-macho/common-alignment.sh b/wild/tests/sold-macho/common-alignment.sh new file mode 100755 index 000000000..f324ad2c1 --- /dev/null +++ b/wild/tests/sold-macho/common-alignment.sh @@ -0,0 +1,22 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +#include + +extern int foo; +extern int bar; + +int main() { + printf("%lu %lu\n", (uintptr_t)&foo % 4, (uintptr_t)&bar % 4096); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o $t/b.o +$t/exe | grep -q '^0 0$' diff --git a/wild/tests/sold-macho/common.inc b/wild/tests/sold-macho/common.inc new file mode 100644 index 000000000..d079120a9 --- /dev/null +++ b/wild/tests/sold-macho/common.inc @@ -0,0 +1,40 @@ +# -*- mode: sh -*- + +# Make sure all commands print out messages in English +export LC_ALL=C + +ARCH="${ARCH:-$(uname -m)}" +CC="cc -arch $ARCH" +CXX="c++ -arch $ARCH" + +# Common functions +test_cflags() { + echo 'int main() {}' | $CC "$@" -o /dev/null -xc - >& /dev/null +} + +skip() { + echo skipped + trap - EXIT + exit 0 +} + +on_error() { + code=$? + echo "command failed: $1: $BASH_COMMAND" + trap - EXIT + exit $code +} + +on_exit() { + echo OK + exit 0 +} + +trap 'on_error $LINENO' ERR +trap on_exit EXIT + +# Print out the startup message +testname=$(basename "$0" .sh) +echo -n "Testing $testname ... " +t=out/test/macho/$ARCH/$testname +mkdir -p $t diff --git a/wild/tests/sold-macho/common.sh b/wild/tests/sold-macho/common.sh new file mode 100755 index 000000000..b0aaec557 --- /dev/null +++ b/wild/tests/sold-macho/common.sh @@ -0,0 +1,27 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +extern int foo; +extern int bar; +static int baz[10000]; + +int main() { + printf("%d %d %d\n", foo, bar, baz[0]); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o $t/b.o $t/c.o +$t/exe | grep -q '^0 5 0$' diff --git a/wild/tests/sold-macho/cstring.sh b/wild/tests/sold-macho/cstring.sh new file mode 100755 index 000000000..71b555cdc --- /dev/null +++ b/wild/tests/sold-macho/cstring.sh @@ -0,0 +1,21 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +extern const char *x; +const char *y = "Hello world\n"; +const char *z = "Howdy world\n"; + +int main() { + printf("%d %d\n", x == y, y == z); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o $t/b.o +$t/exe | grep -q '^1 0$' diff --git a/wild/tests/sold-macho/data-in-code-info.sh b/wild/tests/sold-macho/data-in-code-info.sh new file mode 100755 index 000000000..48a2f494f --- /dev/null +++ b/wild/tests/sold-macho/data-in-code-info.sh @@ -0,0 +1,16 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log3 +! grep -q DATA_IN_CODE $t/log3 || false diff --git a/wild/tests/sold-macho/data-reloc.sh b/wild/tests/sold-macho/data-reloc.sh new file mode 100755 index 000000000..be3495987 --- /dev/null +++ b/wild/tests/sold-macho/data-reloc.sh @@ -0,0 +1,23 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +int a = 5; +int *b = &a; + +void print() { + printf("%d %d\n", a, *b); +} +EOF + +$CC --ld-path=./ld64 -shared -o $t/b.dylib $t/a.o + +cat < +void hello() { + printf("Hello world\n"); +} +EOF + +cat < $t/log +! grep -q libfoo.dylib $t/log || false diff --git a/wild/tests/sold-macho/dead-strip-dylibs2.sh b/wild/tests/sold-macho/dead-strip-dylibs2.sh new file mode 100755 index 000000000..ede38fec8 --- /dev/null +++ b/wild/tests/sold-macho/dead-strip-dylibs2.sh @@ -0,0 +1,26 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +mkdir -p $t/Foo.framework + +cat < +void hello() { + printf("Hello world\n"); +} +EOF + +cat < +int main() { + printf("Hello world\n"); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o -Wl,-F$t -Wl,-framework,Foo +otool -l $t/exe | grep -A3 'cmd LC_LOAD_DYLIB' | grep -Fq Foo.framework/Foo + +$CC --ld-path=./ld64 -o $t/exe $t/a.o -Wl,-F$t -Wl,-framework,Foo \ + -Wl,-dead_strip_dylibs +otool -l $t/exe | grep -A3 'cmd LC_LOAD_DYLIB' >& $t/log +! grep -Fq Foo.framework/Foo $t/log || false diff --git a/wild/tests/sold-macho/dead-strip-dylibs3.sh b/wild/tests/sold-macho/dead-strip-dylibs3.sh new file mode 100755 index 000000000..6d46e9a15 --- /dev/null +++ b/wild/tests/sold-macho/dead-strip-dylibs3.sh @@ -0,0 +1,47 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat > $t/libfoo.tbd < +int main() { printf("Hello world\n"); } +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o -L$t -Wl,-lfoo +objdump --macho --dylibs-used $t/exe > $t/log +grep -q libfoo.dylib $t/log +! grep -q libbar.dylib $t/log || false diff --git a/wild/tests/sold-macho/dead-strip.sh b/wild/tests/sold-macho/dead-strip.sh new file mode 100755 index 000000000..4e5bfbe8b --- /dev/null +++ b/wild/tests/sold-macho/dead-strip.sh @@ -0,0 +1,27 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +char msg1[] = "Hello world"; +char msg2[] = "Howdy world"; + +void hello() { + printf("%s\n", msg1); +} + +void howdy() { + printf("%s\n", msg2); +} + +int main() { + hello(); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o -Wl,-dead_strip +$t/exe | grep -q 'Hello world' +otool -tVj $t/exe > $t/log +grep -q 'hello:' $t/log +! grep -q 'howdy:' $t/log || false diff --git a/wild/tests/sold-macho/debuginfo.sh b/wild/tests/sold-macho/debuginfo.sh new file mode 100755 index 000000000..56031b480 --- /dev/null +++ b/wild/tests/sold-macho/debuginfo.sh @@ -0,0 +1,28 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/a.c +#include +extern char *msg; +void hello() { printf("Hello world\n"); } +EOF + +$CC -o $t/a.o -c -g $t/a.c + +cat < $t/b.c +char *msg = "Hello world\n"; +void hello(); +int main() { hello(); } +EOF + +$CC -o $t/b.o -c -g $t/b.c + +rm -f $t/c.a +ar cru $t/c.a $t/b.o + +$CC --ld-path=./ld64 -o $t/exe $t/a.o $t/c.a -g + +$t/exe | grep -q 'Hello world' + +lldb -o 'b main' -o run -o list -o quit $t/exe | \ + grep -Eq '^-> 3\s+int main\(\) { hello\(\); }' diff --git a/wild/tests/sold-macho/dependency-info.sh b/wild/tests/sold-macho/dependency-info.sh new file mode 100755 index 000000000..4469c47e2 --- /dev/null +++ b/wild/tests/sold-macho/dependency-info.sh @@ -0,0 +1,12 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +#include + +int main(int argc, char **argv) { + Dl_info info; + + if (!dladdr((char *)main + 4, &info)) { + printf("dladdr failed\n"); + return 1; + } + + printf("fname=%s fbase=%p sname=%s saddr=%p\n", + info.dli_fname, info.dli_fbase, info.dli_sname, info.dli_saddr); + return 0; +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o +$t/exe | grep -q sname=main diff --git a/wild/tests/sold-macho/duplicate-error.sh b/wild/tests/sold-macho/duplicate-error.sh new file mode 100755 index 000000000..713de1be4 --- /dev/null +++ b/wild/tests/sold-macho/duplicate-error.sh @@ -0,0 +1,14 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log || false +grep -q 'duplicate symbol: .*/b.o: .*/a.o: _hello' $t/log diff --git a/wild/tests/sold-macho/dylib.sh b/wild/tests/sold-macho/dylib.sh new file mode 100755 index 000000000..2b40a7cfd --- /dev/null +++ b/wild/tests/sold-macho/dylib.sh @@ -0,0 +1,27 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +char world[] = "world"; + +char *hello() { + return "Hello"; +} +EOF + +$CC --ld-path=./ld64 -o $t/b.dylib -shared $t/a.o + +cat < + +char *hello(); +extern char world[]; + +int main() { + printf("%s %s\n", hello(), world); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/c.o $t/b.dylib +$t/exe | grep -q 'Hello world' diff --git a/wild/tests/sold-macho/eh-frame.sh b/wild/tests/sold-macho/eh-frame.sh new file mode 100755 index 000000000..5e45bb7e6 --- /dev/null +++ b/wild/tests/sold-macho/eh-frame.sh @@ -0,0 +1,18 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +[ $CXX -xc -femit-dwarf-unwind=always /dev/null 2> /dev/null ] || skip + +cat < + +int hello() { + printf("Hello world\n"); + return 0; +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o -Wl,-e,_hello +$t/exe | grep -q 'Hello world' + +! $CC --ld-path=./ld64 -o $t/exe $t/a.o -Wl,-e,no_such_symbol 2> $t/log || false +grep -q 'undefined entry point symbol: no_such_symbol' $t/log diff --git a/wild/tests/sold-macho/exception-in-static-initializer.sh b/wild/tests/sold-macho/exception-in-static-initializer.sh new file mode 100755 index 000000000..0f3b1b26b --- /dev/null +++ b/wild/tests/sold-macho/exception-in-static-initializer.sh @@ -0,0 +1,26 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +class Error : public std::exception { +public: + const char *what() const noexcept override { + return "ERROR STRING"; + } +}; + +static int foo() { + throw Error(); + return 1; +} + +static inline int bar = foo(); + +int main() {} +EOF + +$CXX --ld-path=./ld64 -o $t/exe $t/a.o +( set +e; $t/exe; true ) >& $t/log +grep -q 'terminating with uncaught exception of type Error: ERROR STRING' $t/log diff --git a/wild/tests/sold-macho/exception.sh b/wild/tests/sold-macho/exception.sh new file mode 100755 index 000000000..452d3c79e --- /dev/null +++ b/wild/tests/sold-macho/exception.sh @@ -0,0 +1,16 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +void hello() { + printf("Hello world\n"); +} + +int main() { + hello(); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe1 $t/a.o -flto -Wl,-no_fixup_chains +$t/exe1 | grep -q 'Hello world' +nm -g $t/exe1 > $t/log1 +! grep -q _hello $t/log1 || false + +$CC --ld-path=./ld64 -o $t/exe2 $t/a.o -flto -Wl,-no_fixup_chains -Wl,-export_dynamic +$t/exe2 | grep -q 'Hello world' +nm -g $t/exe2 > $t/log2 +grep -q _hello $t/log2 diff --git a/wild/tests/sold-macho/exported-symbols-list.sh b/wild/tests/sold-macho/exported-symbols-list.sh new file mode 100755 index 000000000..bd74bc956 --- /dev/null +++ b/wild/tests/sold-macho/exported-symbols-list.sh @@ -0,0 +1,43 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/list +_foo +_a* +EOF + +$CC --ld-path=./ld64 -shared -o $t/c.dylib $t/a.o + +objdump --macho --exports-trie $t/c.dylib > $t/log1 +grep -q _foo $t/log1 +! grep -q _bar $t/log1 || false +grep -q _baz $t/log1 +grep -q _abc $t/log1 +grep -q _xyz $t/log1 + +$CC --ld-path=./ld64 -shared -o $t/d.dylib $t/a.o \ + -Wl,-exported_symbols_list,$t/list + +objdump --macho --exports-trie $t/d.dylib > $t/log2 +grep -q _foo $t/log2 +! grep -q _bar $t/log2 || false +! grep -q _baz $t/log2 || false +grep -q _abc $t/log2 +! grep -q _xyz $t/log2 || false + +$CC --ld-path=./ld64 -shared -o $t/e.dylib $t/a.o -Wl,-exported_symbol,_foo + +objdump --macho --exports-trie $t/e.dylib > $t/log3 +grep -q _foo $t/log3 +! grep -q _bar $t/log3 || false +! grep -q _baz $t/log3 || false +! grep -q _abc $t/log3 || false +! grep -q _xyz $t/log3 || false diff --git a/wild/tests/sold-macho/filepath.sh b/wild/tests/sold-macho/filepath.sh new file mode 100755 index 000000000..cebf53b29 --- /dev/null +++ b/wild/tests/sold-macho/filepath.sh @@ -0,0 +1,20 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +void hello() { printf("Hello world\n"); } +EOF + +cat < $t/filelist +$t/a.o +$t/b.o +EOF + +$CC --ld-path=./ld64 -o $t/exe -Wl,-filelist,$t/filelist +$t/exe | grep -q 'Hello world' diff --git a/wild/tests/sold-macho/filepath2.sh b/wild/tests/sold-macho/filepath2.sh new file mode 100755 index 000000000..bfdd89b95 --- /dev/null +++ b/wild/tests/sold-macho/filepath2.sh @@ -0,0 +1,20 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +void hello() { printf("Hello world\n"); } +EOF + +cat < $t/filelist +a.o +b.o +EOF + +$CC --ld-path=./ld64 -o $t/exe -Xlinker -filelist -Xlinker $t/filelist,$t +$t/exe | grep -q 'Hello world' diff --git a/wild/tests/sold-macho/fixup-chains-addend.sh b/wild/tests/sold-macho/fixup-chains-addend.sh new file mode 100755 index 000000000..3b07848eb --- /dev/null +++ b/wild/tests/sold-macho/fixup-chains-addend.sh @@ -0,0 +1,28 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +extern int arr[5]; + +int *p1 = arr + (1 << 10); + +int main() { + printf("%d %d\n", arr[0], *(p1 - (1 << 10))); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe1 $t/c.o $t/b.dylib -Wl,-fixup_chains +$t/exe1 +$t/exe1 | grep -q '^1 1$' + +$CC --ld-path=./ld64 -o $t/exe2 $t/c.o $t/b.dylib -Wl,-no_fixup_chains +$t/exe2 +$t/exe2 | grep -q '^1 1$' diff --git a/wild/tests/sold-macho/fixup-chains-addend64.sh b/wild/tests/sold-macho/fixup-chains-addend64.sh new file mode 100755 index 000000000..64099d256 --- /dev/null +++ b/wild/tests/sold-macho/fixup-chains-addend64.sh @@ -0,0 +1,26 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +extern int arr[5]; + +int *p1 = arr + (1LL << 40); + +int main() { + printf("%d %d\n", arr[0], *(p1 - (1LL << 40))); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe1 $t/c.o $t/b.dylib -Wl,-fixup_chains +$t/exe1 | grep -q '^1 1$' + +$CC --ld-path=./ld64 -o $t/exe2 $t/c.o $t/b.dylib -Wl,-no_fixup_chains +$t/exe2 | grep -q '^1 1$' diff --git a/wild/tests/sold-macho/fixup-chains-os-version.sh b/wild/tests/sold-macho/fixup-chains-os-version.sh new file mode 100755 index 000000000..3b1fef868 --- /dev/null +++ b/wild/tests/sold-macho/fixup-chains-os-version.sh @@ -0,0 +1,16 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +int main() { + printf("Hello world\n"); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe1 $t/a.o -Wl,-platform_version,macos,11,11 +otool -l $t/exe1 > $t/log1 +! grep -q LC_DYLD_CHAINED_FIXUPS %t/log1 || false + +$CC --ld-path=./ld64 -o $t/exe2 $t/a.o -Wl,-platform_version,macos,13,13 +otool -l $t/exe2 | grep -q LC_DYLD_CHAINED_FIXUPS diff --git a/wild/tests/sold-macho/fixup-chains-unaligned-error.sh b/wild/tests/sold-macho/fixup-chains-unaligned-error.sh new file mode 100755 index 000000000..fdc213a92 --- /dev/null +++ b/wild/tests/sold-macho/fixup-chains-unaligned-error.sh @@ -0,0 +1,19 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat <& $t/log +grep -Fq '/a.o(__DATA,__data): unaligned base relocation' $t/log diff --git a/wild/tests/sold-macho/fixup-chains.sh b/wild/tests/sold-macho/fixup-chains.sh new file mode 100755 index 000000000..a25efa023 --- /dev/null +++ b/wild/tests/sold-macho/fixup-chains.sh @@ -0,0 +1,18 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +void hello() { printf("Hello world\n"); } +EOF + +cat < +void hello() { printf("Hello world\n"); } +void foo() { hello(); } +EOF + +$CC --ld-path=./ld64 -shared -o $t/b.dylib $t/a.o -Wl,-flat_namespace + +objdump --macho --bind --lazy-bind $t/b.dylib | grep -Eq 'flat-namespace\s+_hello' +objdump --macho --bind --lazy-bind $t/b.dylib | grep -Eq 'flat-namespace\s+_printf' + +cat < +void hello() { printf("interposed\n"); } +EOF + +$CC --ld-path=./ld64 -shared -o $t/d.dylib $t/c.o + +cat < +void foo(); +int main() { foo(); } +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/e.o $t/d.dylib $t/b.dylib +$t/exe | grep -q interposed diff --git a/wild/tests/sold-macho/force-load.sh b/wild/tests/sold-macho/force-load.sh new file mode 100755 index 000000000..eb3b9d8a2 --- /dev/null +++ b/wild/tests/sold-macho/force-load.sh @@ -0,0 +1,26 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +grep -q 'D _foo$' $t/log +! grep -q 'D _bar$' $t/log || false diff --git a/wild/tests/sold-macho/framework.sh b/wild/tests/sold-macho/framework.sh new file mode 100755 index 000000000..d31801466 --- /dev/null +++ b/wild/tests/sold-macho/framework.sh @@ -0,0 +1,21 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +mkdir -p $t/Foo.framework + +cat < +void hello() { + printf("Hello world\n"); +} +EOF + +cat < +void hello() { + printf("Hello world\n"); +} +EOF + +cat < + +int main() { + printf("Hello"); + puts(" world"); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o +$t/exe | grep -q 'Hello world' diff --git a/wild/tests/sold-macho/hello3.sh b/wild/tests/sold-macho/hello3.sh new file mode 100755 index 000000000..7e418a623 --- /dev/null +++ b/wild/tests/sold-macho/hello3.sh @@ -0,0 +1,14 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +int main() { + printf("Hello"); + fprintf(stdout, " world\n"); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o +$t/exe | grep -q 'Hello world' diff --git a/wild/tests/sold-macho/hello4.sh b/wild/tests/sold-macho/hello4.sh new file mode 100755 index 000000000..41df723eb --- /dev/null +++ b/wild/tests/sold-macho/hello4.sh @@ -0,0 +1,16 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +int main() { + printf("Hello"); + fprintf(stdout, " world\n"); + fprintf(stderr, "Hello stderr\n"); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o +$t/exe 2> /dev/null | grep -q 'Hello world' +$t/exe 2>&1 > /dev/null | grep -q 'Hello stderr' diff --git a/wild/tests/sold-macho/hello5.sh b/wild/tests/sold-macho/hello5.sh new file mode 100755 index 000000000..1e98ae12e --- /dev/null +++ b/wild/tests/sold-macho/hello5.sh @@ -0,0 +1,19 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +extern char msg[]; + +int main() { + printf("%s\n", msg); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o $t/b.o +$t/exe | grep -q 'Hello world' diff --git a/wild/tests/sold-macho/hidden-l.sh b/wild/tests/sold-macho/hidden-l.sh new file mode 100755 index 000000000..d9271f7cf --- /dev/null +++ b/wild/tests/sold-macho/hidden-l.sh @@ -0,0 +1,33 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +grep -q ' _foo$' $t/log +! grep -q ' _bar$' $t/log || false +grep -q ' _baz$' $t/log diff --git a/wild/tests/sold-macho/indirect-symtab.sh b/wild/tests/sold-macho/indirect-symtab.sh new file mode 100755 index 000000000..93e7e9730 --- /dev/null +++ b/wild/tests/sold-macho/indirect-symtab.sh @@ -0,0 +1,10 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +int main() { printf("Hello world\n"); } +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o +objdump --macho --indirect-symbols $t/exe | grep -q _printf diff --git a/wild/tests/sold-macho/init-offsets-fixup-chains.sh b/wild/tests/sold-macho/init-offsets-fixup-chains.sh new file mode 100755 index 000000000..30c413325 --- /dev/null +++ b/wild/tests/sold-macho/init-offsets-fixup-chains.sh @@ -0,0 +1,17 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +int foo() { std::cout << "foo "; return 3; } +int x = foo(); +int main() {} +EOF + +# -fixup_chains implies -init_offsets +$CXX --ld-path=./ld64 -o $t/exe1 $t/a.o -Wl,-no_fixup_chains +objdump -h $t/exe1 > $t/log1 +! grep -q __init_offsets $t/log1 || false + +$CXX --ld-path=./ld64 -o $t/exe2 $t/a.o -Wl,-fixup_chains +objdump -h $t/exe2 | grep -q __init_offsets diff --git a/wild/tests/sold-macho/init-offsets.sh b/wild/tests/sold-macho/init-offsets.sh new file mode 100755 index 000000000..3a56da107 --- /dev/null +++ b/wild/tests/sold-macho/init-offsets.sh @@ -0,0 +1,24 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +int foo() { std::cout << "foo "; return 3; } + +int x = foo(); +EOF + +cat < + +int bar() { std::cout << "bar "; return 5; } +int y = bar(); + +int main() { + std::cout << "main\n"; +} +EOF + +$CXX --ld-path=./ld64 -o $t/exe $t/a.o $t/b.o -Wl,-init_offsets +objdump -h $t/exe | grep -Eq '__init_offsets\s+00000008\s' +$t/exe | grep -q 'foo bar main' diff --git a/wild/tests/sold-macho/install-name-executable-path.sh b/wild/tests/sold-macho/install-name-executable-path.sh new file mode 100755 index 000000000..e6925caed --- /dev/null +++ b/wild/tests/sold-macho/install-name-executable-path.sh @@ -0,0 +1,30 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +void *foo() { + return printf; +} + +int main() { + printf("Hello world\n"); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o +$t/exe | grep -q 'Hello world' + +objdump --macho --bind $t/exe | grep -q _printf + +objdump --macho --lazy-bind $t/exe > $t/log +! grep -q _printf $t/log || false diff --git a/wild/tests/sold-macho/lc-build-version.sh b/wild/tests/sold-macho/lc-build-version.sh new file mode 100755 index 000000000..ccec93f66 --- /dev/null +++ b/wild/tests/sold-macho/lc-build-version.sh @@ -0,0 +1,9 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +int main() {} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o diff --git a/wild/tests/sold-macho/lib1.sh b/wild/tests/sold-macho/lib1.sh new file mode 100755 index 000000000..9971181ef --- /dev/null +++ b/wild/tests/sold-macho/lib1.sh @@ -0,0 +1,16 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +void hello() { + printf("Hello world\n"); +} +EOF + +cat < + +#include +#include + +static id exception_processor(id exception) { + unw_context_t context; + unw_getcontext(&context); + + unw_cursor_t cursor; + unw_init_local(&cursor, &context); + + do { + unw_proc_info_t frame_info; + if (unw_get_proc_info(&cursor, &frame_info) != UNW_ESUCCESS) { + NSLog(@"unw_get_proc_info failed"); + continue; + } + + char proc_name[64] = ""; + unw_word_t offset; + unw_get_proc_name(&cursor, proc_name, sizeof(proc_name), &offset); + + NSLog(@"proc_name=%s has_handler=%d", proc_name, frame_info.handler != 0); + } while (unw_step(&cursor) > 0); + + return exception; +} + +void throw_exception() { + [NSException raise:@"foo" format:@"bar"]; +} + +int main(int argc, char **argv) { + objc_setExceptionPreprocessor(&exception_processor); + @try { + throw_exception(); + } @catch (id exception) { + NSLog(@"caught an exception"); + } + return 0; +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o -framework Foundation +$t/exe 2>&1 | grep -q 'proc_name=objc_exception_throw has_handler=0' +$t/exe 2>&1 | grep -q 'proc_name=main has_handler=1' diff --git a/wild/tests/sold-macho/linker-optimization-hints.sh b/wild/tests/sold-macho/linker-optimization-hints.sh new file mode 100755 index 000000000..5361796e6 --- /dev/null +++ b/wild/tests/sold-macho/linker-optimization-hints.sh @@ -0,0 +1,38 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +char x1 = -1; +short x2 = -1; +int x3 = -1; +long x4 = -1; +int x5[] = {0, 1, 2, 3}; +long x6[] = {0, 1, 2, 3}; + +void hello() { + printf("Hello world "); +} +EOF + +cat < + +void hello(); + +extern char x1; +extern short x2; +extern int x3; +extern long x4; +extern int x5[]; +extern long x6[]; + +int main() { + hello(); + printf("%d %d %d %ld %d %ld\n", x1, x2, x3, x4, x5[2], x6[3]); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o $t/b.o +$t/exe | grep -q 'Hello world -1 -1 -1 -1 2 3' diff --git a/wild/tests/sold-macho/literals.sh b/wild/tests/sold-macho/literals.sh new file mode 100755 index 000000000..1058dd3ef --- /dev/null +++ b/wild/tests/sold-macho/literals.sh @@ -0,0 +1,14 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +int main() { printf("Hello world\n"); } +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o +$t/exe | grep -q 'Hello world' diff --git a/wild/tests/sold-macho/lto-dead-strip-dylibs.sh b/wild/tests/sold-macho/lto-dead-strip-dylibs.sh new file mode 100755 index 000000000..651cbc224 --- /dev/null +++ b/wild/tests/sold-macho/lto-dead-strip-dylibs.sh @@ -0,0 +1,12 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +int main() { + std::cout << "Hello world\n"; +} +EOF + +$CXX --ld-path=./ld64 -o $t/exe $t/a.o -flto -dead_strip_dylibs +$t/exe | grep -q 'Hello world' diff --git a/wild/tests/sold-macho/lto.sh b/wild/tests/sold-macho/lto.sh new file mode 100755 index 000000000..db6772284 --- /dev/null +++ b/wild/tests/sold-macho/lto.sh @@ -0,0 +1,12 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +int main() { + printf("Hello world\n"); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o -flto +$t/exe | grep -q 'Hello world' diff --git a/wild/tests/sold-macho/macos-version-min.sh b/wild/tests/sold-macho/macos-version-min.sh new file mode 100755 index 000000000..5a150fdb0 --- /dev/null +++ b/wild/tests/sold-macho/macos-version-min.sh @@ -0,0 +1,11 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +grep -q 'platform 1' $t/log +grep -q 'minos 10.9' $t/log diff --git a/wild/tests/sold-macho/map.sh b/wild/tests/sold-macho/map.sh new file mode 100755 index 000000000..fd67ac68a --- /dev/null +++ b/wild/tests/sold-macho/map.sh @@ -0,0 +1,24 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +void hello() { + printf("Hello world\n"); +} +EOF + +cat < +void hello() { printf("Hello world\n"); } +EOF + +$CC --ld-path=./ld64 -shared -o $t/b.dylib $t/a.o +$CC --ld-path=./ld64 -shared -o $t/c.dylib $t/a.o -Wl,-mark_dead_strippable_dylib + +cat < $t/log2 +! grep -Fq c.dylib $t/log2 || false diff --git a/wild/tests/sold-macho/merge-scope.sh b/wild/tests/sold-macho/merge-scope.sh new file mode 100755 index 000000000..deab197f4 --- /dev/null +++ b/wild/tests/sold-macho/merge-scope.sh @@ -0,0 +1,25 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log || false +grep -q 'undefined symbol: .*\.o: _foo' $t/log diff --git a/wild/tests/sold-macho/needed-framework.sh b/wild/tests/sold-macho/needed-framework.sh new file mode 100755 index 000000000..c4aa24b89 --- /dev/null +++ b/wild/tests/sold-macho/needed-framework.sh @@ -0,0 +1,27 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +mkdir -p $t/Foo.framework + +cat < +void hello() { + printf("Hello world\n"); +} +EOF + +cat < +int main() { + printf("Hello world\n"); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o -Wl,-F$t -Wl,-needed_framework,Foo \ + -Wl,-dead_strip_dylibs +otool -l $t/exe | grep -A3 'cmd LC_LOAD_DYLIB' | grep -Fq Foo.framework/Foo + +$CC --ld-path=./ld64 -o $t/exe $t/a.o -Wl,-F$t -Wl,-framework,Foo \ + -Wl,-dead_strip_dylibs +otool -l $t/exe | grep -A3 'cmd LC_LOAD_DYLIB' >& $t/log +! grep -Fq Foo.framework/Foo $t/log || false diff --git a/wild/tests/sold-macho/needed-l.sh b/wild/tests/sold-macho/needed-l.sh new file mode 100755 index 000000000..1e00333f7 --- /dev/null +++ b/wild/tests/sold-macho/needed-l.sh @@ -0,0 +1,18 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +void hello() { + printf("Hello world\n"); +} +EOF + +cat < $t/log +! grep -q LC_FUNCTION_STARTS $t/log || false diff --git a/wild/tests/sold-macho/objc-selector.sh b/wild/tests/sold-macho/objc-selector.sh new file mode 100755 index 000000000..74585aa5f --- /dev/null +++ b/wild/tests/sold-macho/objc-selector.sh @@ -0,0 +1,13 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +int main() { + NSProcessInfo *info = [NSProcessInfo processInfo]; + NSLog(@"processName: %@", [info processName]); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o -framework foundation -Wl,-ObjC +$t/exe 2>&1 | grep -Fq 'processName: exe' diff --git a/wild/tests/sold-macho/objc.sh b/wild/tests/sold-macho/objc.sh new file mode 100755 index 000000000..477000e57 --- /dev/null +++ b/wild/tests/sold-macho/objc.sh @@ -0,0 +1,22 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +@interface MyClass : NSObject +@end +@implementation MyClass +@end +EOF + +ar rcs $t/b.a $t/a.o + +cat < $t/log 2>&1 +grep -q _OBJC_CLASS_ $t/log diff --git a/wild/tests/sold-macho/object-path-lto.sh b/wild/tests/sold-macho/object-path-lto.sh new file mode 100755 index 000000000..958b144b4 --- /dev/null +++ b/wild/tests/sold-macho/object-path-lto.sh @@ -0,0 +1,13 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +int main() { + printf("Hello world\n"); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o -flto -Wl,-object_path_lto,$t/obj +$t/exe | grep -q 'Hello world' +otool -l $t/obj > /dev/null diff --git a/wild/tests/sold-macho/order-file.sh b/wild/tests/sold-macho/order-file.sh new file mode 100755 index 000000000..cf1edcd62 --- /dev/null +++ b/wild/tests/sold-macho/order-file.sh @@ -0,0 +1,32 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +int main(); + +void print() { + printf("%d\n", (char *)print < (char *)main); +} + +int main() { + print(); +} +EOF + +cat < $t/order1 +_print +_main +EOF + +cat < $t/order2 +_main +_print +EOF + +$CC --ld-path=./ld64 -o $t/exe1 $t/a.o -Wl,-order_file,$t/order1 +$t/exe1 | grep -q '^1$' + +$CC --ld-path=./ld64 -o $t/exe2 $t/a.o -Wl,-order_file,$t/order2 +$t/exe2 | grep -q '^0$' diff --git a/wild/tests/sold-macho/oso-prefix.sh b/wild/tests/sold-macho/oso-prefix.sh new file mode 100755 index 000000000..ec66cbf0c --- /dev/null +++ b/wild/tests/sold-macho/oso-prefix.sh @@ -0,0 +1,18 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +int main() { + printf("Hello world\n"); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe1 $t/a.o -g +nm -pa $t/exe1 | grep -q 'OSO /' + +$CC --ld-path=./ld64 -o $t/exe2 $t/a.o -g -Wl,-oso_prefix,. +nm -pa $t/exe2 | grep -Eq 'OSO out' + +$CC --ld-path=./ld64 -o $t/exe3 $t/a.o -g -Wl,-oso_prefix,"`pwd`/" +nm -pa $t/exe3 | grep -Eq 'OSO out' diff --git a/wild/tests/sold-macho/pagezero-size.sh b/wild/tests/sold-macho/pagezero-size.sh new file mode 100755 index 000000000..3dff195ff --- /dev/null +++ b/wild/tests/sold-macho/pagezero-size.sh @@ -0,0 +1,22 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +[ "`uname -p`" = arm ] && { echo skipped; exit; } + +cat < +int main() { + printf("Hello world\n"); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o + +otool -l $t/exe | grep -A5 'segname __PAGEZERO' | \ + grep -q 'vmsize 0x0000000100000000' + +$CC --ld-path=./ld64 -o $t/exe $t/a.o -Wl,-pagezero_size,0x10000 +$t/exe | grep -q 'Hello world' + +otool -l $t/exe | grep -A5 'segname __PAGEZERO' | \ + grep -q 'vmsize 0x0000000000010000' diff --git a/wild/tests/sold-macho/pagezero-size2.sh b/wild/tests/sold-macho/pagezero-size2.sh new file mode 100755 index 000000000..2388dd879 --- /dev/null +++ b/wild/tests/sold-macho/pagezero-size2.sh @@ -0,0 +1,12 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +void hello() { + printf("Hello world\n"); +} +EOF + +! $CC --ld-path=./ld64 -shared -o $t/b.dylib $t/a.o -Wl,-pagezero_size,0x1000 >& $t/log +grep -Fq ' -pagezero_size option can only be used when linking a main executable' $t/log diff --git a/wild/tests/sold-macho/pagezero-size3.sh b/wild/tests/sold-macho/pagezero-size3.sh new file mode 100755 index 000000000..6e80dc14f --- /dev/null +++ b/wild/tests/sold-macho/pagezero-size3.sh @@ -0,0 +1,13 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +void hello() { + printf("Hello world\n"); +} +EOF + +$CC --ld-path=./ld64 -shared -o $t/b.dylib $t/a.o +otool -l $t/b.dylib > $t/log +! grep -q 'segname: __PAGEZERO' $t/log || false diff --git a/wild/tests/sold-macho/platform-version.sh b/wild/tests/sold-macho/platform-version.sh new file mode 100755 index 000000000..7d4f619d0 --- /dev/null +++ b/wild/tests/sold-macho/platform-version.sh @@ -0,0 +1,12 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +grep -Fq 'minos 13.5' $t/log +grep -Fq 'sdk 12.0' $t/log diff --git a/wild/tests/sold-macho/print-dependencies.sh b/wild/tests/sold-macho/print-dependencies.sh new file mode 100755 index 000000000..3f7e7d3ec --- /dev/null +++ b/wild/tests/sold-macho/print-dependencies.sh @@ -0,0 +1,21 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +void hello() { + printf("Hello world\n"); +} +EOF + +cat < $t/log + +grep -Eq '/a\.o\t.*libSystem\S+\tu\t_printf' $t/log +grep -Eq '/b\.o\t.*a.o\tu\t_hello' $t/log diff --git a/wild/tests/sold-macho/private-extern.sh b/wild/tests/sold-macho/private-extern.sh new file mode 100755 index 000000000..ec65ae680 --- /dev/null +++ b/wild/tests/sold-macho/private-extern.sh @@ -0,0 +1,12 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log +grep -q _foo $t/log +! grep -q _bar $t/log || false diff --git a/wild/tests/sold-macho/private-symbols.sh b/wild/tests/sold-macho/private-symbols.sh new file mode 100755 index 000000000..bb636ac67 --- /dev/null +++ b/wild/tests/sold-macho/private-symbols.sh @@ -0,0 +1,12 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +void hello() { printf("Hello world\n"); } +int main() { hello(); } +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o +objdump --macho --syms $t/exe > $t/log +! grep ' ltmp' $t/log || false diff --git a/wild/tests/sold-macho/reexport-l.sh b/wild/tests/sold-macho/reexport-l.sh new file mode 100755 index 000000000..40c89254d --- /dev/null +++ b/wild/tests/sold-macho/reexport-l.sh @@ -0,0 +1,38 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +int main() { + printf("Hello world\n"); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o +cp $t/exe $t/exe1 + +$CC --ld-path=./ld64 -o $t/exe $t/a.o +cp $t/exe $t/exe2 + +diff $t/exe1 $t/exe2 diff --git a/wild/tests/sold-macho/reproducible.sh b/wild/tests/sold-macho/reproducible.sh new file mode 100644 index 000000000..34bada6af --- /dev/null +++ b/wild/tests/sold-macho/reproducible.sh @@ -0,0 +1,9 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/rsp +./ld64 @$t/rsp | grep -q Usage diff --git a/wild/tests/sold-macho/rpath.sh b/wild/tests/sold-macho/rpath.sh new file mode 100755 index 000000000..fe7626a09 --- /dev/null +++ b/wild/tests/sold-macho/rpath.sh @@ -0,0 +1,12 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log + +grep -A3 'cmd LC_RPATH' $t/log | grep -q 'path foo' +grep -A3 'cmd LC_RPATH' $t/log | grep -q 'path @bar' diff --git a/wild/tests/sold-macho/search-dylibs-first.sh b/wild/tests/sold-macho/search-dylibs-first.sh new file mode 100755 index 000000000..2c0682e0b --- /dev/null +++ b/wild/tests/sold-macho/search-dylibs-first.sh @@ -0,0 +1,35 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +void say() { + printf("Hello\n"); +} +EOF + +cat < +void say() { + printf("Howdy\n"); +} +EOF + +cat < +void say() { + printf("Hello\n"); +} +EOF + +cat < +void say() { + printf("Howdy\n"); +} +EOF + +cat < $t/contents + +$CC --ld-path=./ld64 -o $t/exe $t/a.o -Wl,-sectcreate,__TEXT,__foo,$t/contents + +otool -l $t/exe | grep -A3 'sectname __foo' > $t/log +grep -q 'segname __TEXT' $t/log +grep -q 'segname __TEXT' $t/log +grep -q 'size 0x0*7$' $t/log diff --git a/wild/tests/sold-macho/stack-size.sh b/wild/tests/sold-macho/stack-size.sh new file mode 100755 index 000000000..85cd1a582 --- /dev/null +++ b/wild/tests/sold-macho/stack-size.sh @@ -0,0 +1,12 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +#include + +extern char a __asm("section$start$__TEXT$__text"); +extern char b __asm("section$end$__TEXT$__text"); + +extern char c __asm("section$start$__TEXT$__foo"); +extern char d __asm("section$end$__TEXT$__foo"); + +extern char e __asm("section$start$__FOO$__foo"); +extern char f __asm("section$end$__FOO$__foo"); + +extern char g __asm("segment$start$__TEXT"); +extern char h __asm("segment$end$__TEXT"); + +int main() { + printf("%p %p %p %p %p %p %p %p\n", &a, &b, &c, &d, &e, &f, &g, &h); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o +$t/exe > /dev/null diff --git a/wild/tests/sold-macho/strip.sh b/wild/tests/sold-macho/strip.sh new file mode 100755 index 000000000..f9e002c62 --- /dev/null +++ b/wild/tests/sold-macho/strip.sh @@ -0,0 +1,13 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +int main() { + printf("Hello world\n"); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o +strip $t/exe +$t/exe | grep -q 'Hello world' diff --git a/wild/tests/sold-macho/subsections-via-symbols.sh b/wild/tests/sold-macho/subsections-via-symbols.sh new file mode 100755 index 000000000..3f4617519 --- /dev/null +++ b/wild/tests/sold-macho/subsections-via-symbols.sh @@ -0,0 +1,39 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +void fn1(); +void fn2(); +void fn3(); +void fn4(); + +int main() { + printf("%lu %lu\n", (char *)fn2 - (char *)fn1, (char *)fn4 - (char *)fn3); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o $t/b.o $t/c.o +$t/exe | grep -q '^16 1$' diff --git a/wild/tests/sold-macho/syslibroot.sh b/wild/tests/sold-macho/syslibroot.sh new file mode 100755 index 000000000..6b5fde43e --- /dev/null +++ b/wild/tests/sold-macho/syslibroot.sh @@ -0,0 +1,16 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +mkdir -p $t/foo/bar + +cat < $t/libfoo.tbd <<'EOF' +--- !tapi-tbd +tbd-version: 4 +targets: [ x86_64-macos, arm64-macos ] +uuids: + - target: x86_64-macos + value: 00000000-0000-0000-0000-000000000000 + - target: arm64-macos + value: 00000000-0000-0000-0000-000000000000 +install-name: '/foo' +current-version: 0 +compatibility-version: 0 +exports: + - targets: [ x86_64-macos, arm64-macos ] + symbols: [ '$ld$add$os14.0$_foo' ] +... +EOF + +cat <& /dev/null || false + +$CC --ld-path=./ld64 -shared -o $t/b.dylib $t/libfoo.tbd $t/a.o \ + -Wl,-platform_version,macos,14.0,13.0 >& /dev/null diff --git a/wild/tests/sold-macho/tbd-hide.sh b/wild/tests/sold-macho/tbd-hide.sh new file mode 100755 index 000000000..652350872 --- /dev/null +++ b/wild/tests/sold-macho/tbd-hide.sh @@ -0,0 +1,31 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat > $t/libfoo.tbd <<'EOF' +--- !tapi-tbd +tbd-version: 4 +targets: [ x86_64-macos, arm64-macos ] +uuids: + - target: x86_64-macos + value: 00000000-0000-0000-0000-000000000000 + - target: arm64-macos + value: 00000000-0000-0000-0000-000000000000 +install-name: '/foo' +current-version: 0 +compatibility-version: 0 +exports: + - targets: [ x86_64-macos, arm64-macos ] + symbols: [ '$ld$hide$os25.0$_foo', _foo ] +... +EOF + +cat <& /dev/null + +! $CC --ld-path=./ld64 -o $t/exe $t/libfoo.tbd $t/a.o \ + -Wl,-platform_version,macos,25.0,21.0 >& /dev/null || false diff --git a/wild/tests/sold-macho/tbd-install-name.sh b/wild/tests/sold-macho/tbd-install-name.sh new file mode 100755 index 000000000..c841fbf8f --- /dev/null +++ b/wild/tests/sold-macho/tbd-install-name.sh @@ -0,0 +1,35 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat > $t/libfoo.tbd <<'EOF' +--- !tapi-tbd +tbd-version: 4 +targets: [ x86_64-macos, arm64-macos ] +uuids: + - target: x86_64-macos + value: 00000000-0000-0000-0000-000000000000 + - target: arm64-macos + value: 00000000-0000-0000-0000-000000000000 +install-name: '/foo' +current-version: 0 +compatibility-version: 0 +exports: + - targets: [ x86_64-macos, arm64-macos ] + symbols: [ '$ld$install_name$os25.0$/bar', _foo ] +... +EOF + +cat <& /dev/null + +otool -L $t/exe1 | grep -q /foo + +$CC --ld-path=./ld64 -o $t/exe2 $t/libfoo.tbd $t/a.o \ + -Wl,-platform_version,macos,25.0,21.0 >& /dev/null + +otool -L $t/exe2 | grep -q /bar diff --git a/wild/tests/sold-macho/tbd-previous.sh b/wild/tests/sold-macho/tbd-previous.sh new file mode 100755 index 000000000..81c229b58 --- /dev/null +++ b/wild/tests/sold-macho/tbd-previous.sh @@ -0,0 +1,35 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat > $t/libfoo.tbd <<'EOF' +--- !tapi-tbd +tbd-version: 4 +targets: [ x86_64-macos, arm64-macos ] +uuids: + - target: x86_64-macos + value: 00000000-0000-0000-0000-000000000000 + - target: arm64-macos + value: 00000000-0000-0000-0000-000000000000 +install-name: '/foo' +current-version: 0 +compatibility-version: 0 +exports: + - targets: [ x86_64-macos, arm64-macos ] + symbols: [ '$ld$previous$/bar$$1$10.0$15.0$$', _foo ] +... +EOF + +cat < /dev/null + +otool -L $t/b.dylib | grep -q /foo + +$CC --ld-path=./ld64 -shared -o $t/b.dylib $t/libfoo.tbd $t/a.o \ + -Wl,-platform_version,macos,14.0,14.0 2> /dev/null + +otool -L $t/b.dylib | grep -q /bar diff --git a/wild/tests/sold-macho/tbd-reexport.sh b/wild/tests/sold-macho/tbd-reexport.sh new file mode 100755 index 000000000..c3aaae6b7 --- /dev/null +++ b/wild/tests/sold-macho/tbd-reexport.sh @@ -0,0 +1,54 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +mkdir -p $t/libs/SomeFramework.framework/ + +cat > $t/libs/SomeFramework.framework/SomeFramework.tbd < $t/libs/SomeFramework.framework/SomeFramework.tbd < + +extern _Thread_local int foo; +extern _Thread_local int bar; + +int main() { + printf("%d %d\n", foo, bar); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/c.o $t/b.dylib +$t/exe | grep -q '^0 5$' diff --git a/wild/tests/sold-macho/tls-mismatch.sh b/wild/tests/sold-macho/tls-mismatch.sh new file mode 100755 index 000000000..5582d7a65 --- /dev/null +++ b/wild/tests/sold-macho/tls-mismatch.sh @@ -0,0 +1,19 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat <& $t/log1 +grep -Fq 'illegal thread local variable reference to regular symbol `_a`' $t/log1 + +! $CC --ld-path=./ld64 -o $t/exe2 $t/a.o $t/c.o >& $t/log2 +grep -Fq 'illegal thread local variable reference to regular symbol `_a`' $t/log2 diff --git a/wild/tests/sold-macho/tls-mismatch2.sh b/wild/tests/sold-macho/tls-mismatch2.sh new file mode 100755 index 000000000..c28bf36b1 --- /dev/null +++ b/wild/tests/sold-macho/tls-mismatch2.sh @@ -0,0 +1,19 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat <& $t/log1 +grep -Fq 'illegal thread local variable reference to regular symbol `_a`' $t/log1 + +! $CC --ld-path=./ld64 -o $t/exe2 $t/a.o $t/c.o >& $t/log2 +grep -Fq 'illegal thread local variable reference to regular symbol `_a`' $t/log2 diff --git a/wild/tests/sold-macho/tls.sh b/wild/tests/sold-macho/tls.sh new file mode 100755 index 000000000..39869417a --- /dev/null +++ b/wild/tests/sold-macho/tls.sh @@ -0,0 +1,22 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +int a = 3; +extern _Thread_local int b; +extern _Thread_local int c; + +int main() { + printf("%d %d %d\n", a, b, c); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.dylib $t/b.o +$t/exe | grep -q '^3 0 5$' diff --git a/wild/tests/sold-macho/tls2.sh b/wild/tests/sold-macho/tls2.sh new file mode 100755 index 000000000..3af412d3a --- /dev/null +++ b/wild/tests/sold-macho/tls2.sh @@ -0,0 +1,22 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +# For some reason, this test fails only on GitHub CI. +[ "$GITHUB_ACTIONS" = true ] && { echo skipped; exit; } + +cat < + +_Thread_local int a; +static _Thread_local int b = 5; +static _Thread_local int *c; + +int main() { + b = 5; + c = &b; + printf("%d %d %d\n", a, b, *c); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o +$t/exe | grep -q '^0 5 5$' diff --git a/wild/tests/sold-macho/umbrella.sh b/wild/tests/sold-macho/umbrella.sh new file mode 100755 index 000000000..b8a23b713 --- /dev/null +++ b/wild/tests/sold-macho/umbrella.sh @@ -0,0 +1,9 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/log1 +! grep -q _foo $t/log1 || false + +$CC --ld-path=./ld64 -o $t/exe2 $t/b.a $t/c.o -Wl,-u,_foo +nm $t/exe2 > $t/log2 +grep -q _foo $t/log2 diff --git a/wild/tests/sold-macho/undefined.sh b/wild/tests/sold-macho/undefined.sh new file mode 100755 index 000000000..6403b6aeb --- /dev/null +++ b/wild/tests/sold-macho/undefined.sh @@ -0,0 +1,10 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < $t/list +_foo +_a* +EOF + +$CC --ld-path=./ld64 -shared -o $t/c.dylib $t/a.o + +objdump --macho --exports-trie $t/c.dylib > $t/log1 +grep -q _foo $t/log1 +! grep -q _bar $t/log1 || false +grep -q _baz $t/log1 +grep -q _abc $t/log1 +grep -q _xyz $t/log1 + +$CC --ld-path=./ld64 -shared -o $t/d.dylib $t/a.o \ + -Wl,-unexported_symbols_list,$t/list + +objdump --macho --exports-trie $t/d.dylib > $t/log2 +! grep -q _foo $t/log2 || false +! grep -q _bar $t/log2 || false +grep -q _baz $t/log2 || false +! grep -q _abc $t/log2 || false +grep -q _xyz $t/log2 + +$CC --ld-path=./ld64 -shared -o $t/e.dylib $t/a.o -Wl,-unexported_symbol,_foo + +objdump --macho --exports-trie $t/e.dylib > $t/log3 +! grep -q _foo $t/log3 || false +! grep -q _bar $t/log3 || false +grep -q _baz $t/log3 +grep -q _abc $t/log3 +grep -q _xyz $t/log3 diff --git a/wild/tests/sold-macho/universal.sh b/wild/tests/sold-macho/universal.sh new file mode 100755 index 000000000..1021ff9de --- /dev/null +++ b/wild/tests/sold-macho/universal.sh @@ -0,0 +1,21 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +void hello() { + printf("Hello world\n"); +} +EOF + +lipo $t/a.o -create -output $t/fat.o + +cat < $t/b.tbd < +int main() { + printf("Hello world\n"); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe1 $t/a.o -Wl,-final_output,exe1 +otool -l $t/exe1 | grep -q LC_UUID + +$CC --ld-path=./ld64 -o $t/exe2 $t/a.o -Wl,-final_output,exe1 +otool -l $t/exe2 | grep -q LC_UUID + +diff -q $t/exe1 $t/exe2 > /dev/null + +$CC --ld-path=./ld64 -o $t/exe3 $t/a.o -Wl,-no_uuid +otool -l $t/exe3 > $t/log3 +! grep -q LC_UUID $t/log3 || false + +$CC --ld-path=./ld64 -o $t/exe4 $t/a.o -Wl,-random_uuid +otool -l $t/exe4 | grep -q LC_UUID diff --git a/wild/tests/sold-macho/uuid2.sh b/wild/tests/sold-macho/uuid2.sh new file mode 100755 index 000000000..08e8ca01c --- /dev/null +++ b/wild/tests/sold-macho/uuid2.sh @@ -0,0 +1,15 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +int main() { + printf("Hello world\n"); +} +EOF + +$CC --ld-path=./ld64 -B. -o $t/exe1 $t/a.o -Wl,-adhoc_codesign +$CC --ld-path=./ld64 -B. -o $t/exe2 $t/a.o -Wl,-adhoc_codesign + +[ "$(otool -l $t/exe1 | grep 'uuid ')" != "$(otool -l $t/exe2 | grep 'uuid ')" ] diff --git a/wild/tests/sold-macho/version.sh b/wild/tests/sold-macho/version.sh new file mode 100755 index 000000000..19a3ffcab --- /dev/null +++ b/wild/tests/sold-macho/version.sh @@ -0,0 +1,15 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +./ld64 -v | grep -q '[ms]old' + +cat < + +int main() { + printf("Hello world\n"); +} +EOF + +$CC --ld-path=./ld64 -Wl,-v -o $t/exe $t/a.o | grep -q '[ms]old' +$t/exe | grep -q 'Hello world' diff --git a/wild/tests/sold-macho/w.sh b/wild/tests/sold-macho/w.sh new file mode 100755 index 000000000..bc954fe3f --- /dev/null +++ b/wild/tests/sold-macho/w.sh @@ -0,0 +1,22 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat <& $t/log1 + +grep -q warning $t/log1 + +$CC --ld-path=./ld64 -shared -o $t/d.so $t/b.so $t/c.o \ + -Wl,-application_extension -Wl,-w >& $t/log2 + +! grep -q warning $t/log2 || false diff --git a/wild/tests/sold-macho/weak-def-archive.sh b/wild/tests/sold-macho/weak-def-archive.sh new file mode 100755 index 000000000..8e69dc41a --- /dev/null +++ b/wild/tests/sold-macho/weak-def-archive.sh @@ -0,0 +1,39 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +int foo() __attribute__((weak)); +int foo() { return 42; } + +int main() { + printf("foo=%d\n", foo()); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe1 $t/b.a $t/c.o +$t/exe1 | grep -q '^foo=42$' + +cat < + +int foo() __attribute__((weak)); +int foo() { return 42; } +int bar(); + +int main() { + printf("foo=%d bar=%d\n", foo(), bar()); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe2 $t/b.a $t/d.o +$t/exe2 | grep -q '^foo=3 bar=5$' diff --git a/wild/tests/sold-macho/weak-def-dylib.sh b/wild/tests/sold-macho/weak-def-dylib.sh new file mode 100755 index 000000000..6f97b1c2f --- /dev/null +++ b/wild/tests/sold-macho/weak-def-dylib.sh @@ -0,0 +1,29 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +int foo() __attribute((weak)); + +int main() { + printf("%d\n", foo ? foo() : 42); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/b.dylib $t/c.o +$t/exe | grep -q '^3$' + +$CC -c -o $t/d.o -xc /dev/null +$CC --ld-path=./ld64 -shared -o $t/b.dylib $t/d.o +$t/exe | grep -q '^42$' diff --git a/wild/tests/sold-macho/weak-def-ref.sh b/wild/tests/sold-macho/weak-def-ref.sh new file mode 100755 index 000000000..135bce1a2 --- /dev/null +++ b/wild/tests/sold-macho/weak-def-ref.sh @@ -0,0 +1,18 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +struct Foo { + Foo() { std::cout << "foo\n"; } +}; + +Foo x; + +int main() {} +EOF + +$CXX --ld-path=./ld64 -o $t/exe $t/a.o +objdump --macho --exports-trie $t/exe > $t/log +! grep -q __ZN3FooC1Ev $t/log || false diff --git a/wild/tests/sold-macho/weak-def.sh b/wild/tests/sold-macho/weak-def.sh new file mode 100755 index 000000000..0eb62b15c --- /dev/null +++ b/wild/tests/sold-macho/weak-def.sh @@ -0,0 +1,26 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < + +int foo() __attribute__((weak)); + +int foo() { + return 3; +} + +int main() { + printf("%d\n", foo()); +} +EOF + +cat < +void hello() { + printf("Hello world\n"); +} +EOF + +cat < +void hello() __attribute__((weak_import)); + +int main() { + if (hello) + hello(); + else + printf("hello is missing\n"); +} +EOF + +$CC --ld-path=./ld64 -o $t/exe $t/a.o -L$t -Wl,-weak-lfoo +$t/exe | grep -q 'Hello world' + +rm $t/libfoo.dylib +$t/exe | grep -q 'hello is missing' diff --git a/wild/tests/sold-macho/weak-undef.sh b/wild/tests/sold-macho/weak-undef.sh new file mode 100755 index 000000000..e174c57b8 --- /dev/null +++ b/wild/tests/sold-macho/weak-undef.sh @@ -0,0 +1,20 @@ +#!/bin/bash +. $(dirname $0)/common.inc + +cat < +int foo() __attribute__((weak)); +int main() { + printf("%d\n", foo ? foo() : 5); +} +EOF + +cat < $t/a.c +#include +static void hello() { printf("Hello world\n"); } +int main(){ hello(); } +EOF + +$CC -o $t/a.o -c $t/a.c + +$CC --ld-path=./ld64 -o $t/exe1 $t/a.o +nm $t/exe1 | grep -qw _hello + +$CC --ld-path=./ld64 -o $t/exe2 $t/a.o -Wl,-x +nm $t/exe2 > $t/log2 +! grep -qw _hello $t/log2 || false diff --git a/wild/tests/sold_macho_tests.rs b/wild/tests/sold_macho_tests.rs new file mode 100644 index 000000000..7cee77f39 --- /dev/null +++ b/wild/tests/sold_macho_tests.rs @@ -0,0 +1,207 @@ +//! Test runner for sold (bluewhalesystems/sold) Mach-O shell tests. +//! +//! These tests are adapted from the sold linker's Mach-O test suite (MIT License). +//! +//! Each test is a bash script that compiles C/C++ code, links with the linker +//! under test (via `--ld-path=./ld64`), and verifies the output. + +use std::path::Path; +use std::path::PathBuf; +use std::process::Command; + +fn wild_binary_path() -> PathBuf { + PathBuf::from(env!("CARGO_BIN_EXE_wild")) +} + +fn sold_tests_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/sold-macho") +} + +fn collect_tests(tests: &mut Vec) { + let wild_bin = wild_binary_path(); + let test_dir = sold_tests_dir(); + + // Create a working directory with ld64 symlink + let work_dir = std::env::temp_dir().join("wild-sold-tests"); + std::fs::create_dir_all(&work_dir).unwrap(); + let ld64_link = work_dir.join("ld64"); + let _ = std::fs::remove_file(&ld64_link); + std::os::unix::fs::symlink(&wild_bin, &ld64_link).unwrap(); + + for entry in std::fs::read_dir(&test_dir).unwrap() { + let entry = entry.unwrap(); + let path = entry.path(); + if path.extension().map_or(true, |e| e != "sh") { + continue; + } + + let test_name = path.file_stem().unwrap().to_string_lossy().to_string(); + let test_path = path.clone(); + let wd = work_dir.clone(); + + let ignored = should_ignore(&test_name); + + tests.push( + libtest_mimic::Trial::test(format!("sold-macho/{test_name}"), move || { + run_sold_test(&test_path, &wd).map_err(Into::into) + }) + .with_ignored_flag(ignored), + ); + } +} + +fn should_ignore(name: &str) -> bool { + // Tests that don't use --ld-path (invoke ./ld64 directly without cc) + const DIRECT_LD64: &[&str] = &[]; + + // Tests that use flags/features Wild doesn't support yet + const UNSUPPORTED_FLAGS: &[&str] = &[ + "flat-namespace", // -flat_namespace + "undefined", // -undefined warning + "U", // -U (dynamic lookup) + "umbrella", // -umbrella + "application-extension", // -application_extension + "application-extension2", // -application_extension + "exported-symbols-list", // -exported_symbols_list + "unexported-symbols-list", // -unexported_symbols_list + "export-dynamic", // -export_dynamic + "merge-scope", // visibility merging + "hidden-l", // -hidden-l + "needed-l", // -needed-l + "needed-framework", // -needed_framework + "weak-l", // -weak-l + "reexport-l", // -reexport-l + "reexport-library", // -reexport_library + // install-name now passes (-install_name support) + "install-name-executable-path", // @executable_path + "install-name-loader-path", // @loader_path + "install-name-rpath", // @rpath + // rpath now passes (-rpath → LC_RPATH) + "search-paths-first", // -search_paths_first + "search-dylibs-first", // -search_dylibs_first + "sectcreate", // -sectcreate + "order-file", // -order_file + // stack-size now passes + "map", // -map + "dependency-info", // -dependency_info + "print-dependencies", // -print_dependency_info + // macos-version-min now passes + // platform-version now passes + "S", // -S (strip debug) + // strip now passes (LINKEDIT packing + linker-signed codesign) + // no-function-starts now passes + // data-in-code-info now passes + "subsections-via-symbols", // -subsections_via_symbols + "add-ast-path", // -add_ast_path + // add-empty-section now passes + "pagezero-size2", // -pagezero_size variations + "oso-prefix", // -oso_prefix + "start-stop-symbol", // __start_/__stop_ sections + "framework", // -framework (non-system) + ]; + + // Tests requiring LTO + const LTO: &[&str] = &["lto", "lto-dead-strip-dylibs", "object-path-lto"]; + + // Tests that need linking against a .dylib (Wild can't yet consume dylib inputs) + const NEEDS_DYLIB_INPUT: &[&str] = &[ + "dylib", // creates then links against dylib + "tls-dylib", // TLS across dylibs + "data-reloc", // links dylib + object + "fixup-chains-addend", // links dylib + object + "fixup-chains-addend64", // links dylib + object + "weak-def-dylib", // weak defs from dylib + "mark-dead-strippable-dylib", // links against dylib + ]; + + // Validation/correctness bugs in Wild to fix + const WILD_BUGS: &[&str] = &[ + "tls", // TLV descriptor offset validation + "tls-mismatch", // TLS type mismatch errors + "tls-mismatch2", // TLS type mismatch errors + "cstring", // cstring dedup/merging + "duplicate-error", // duplicate symbol error format + "missing-error", // undefined symbol error format + "undef", // undefined symbol handling + "fixup-chains-unaligned-error", // unaligned fixup error + "exception-in-static-initializer", // init func exceptions + "indirect-symtab", // indirect symbol table + "init-offsets", // __mod_init_func offsets + "init-offsets-fixup-chains", // init offsets + fixup chains + "literals", // literal section merging + "libunwind", // libunwind integration + "objc-selector", // ObjC selector refs + "debuginfo", // debug info pass-through + ]; + + // x86_64-specific tests + const X86_ONLY: &[&str] = &[]; + + // Tests that invoke ld64 directly (not through cc --ld-path) + const NO_LD_PATH: &[&str] = &[]; + + // .tbd parsing features not yet supported + const TBD: &[&str] = &[ + "tbd", + "tbd-add", + "tbd-hide", + "tbd-install-name", + "tbd-previous", + "tbd-reexport", + "unkown-tbd-target", + ]; + + // Load command / output format checks + const OUTPUT_FORMAT: &[&str] = &[ + "lc-build-version", // LC_BUILD_VERSION tool field + // uuid now passes (-final_output, -no_uuid, -random_uuid) + // uuid2 now passes + "version", // -current_version / -compatibility_version + "w", // -w (suppress warnings) + "Z", // -Z (no default search paths) + // adhoc-codesign now passes (linker-signed + no_adhoc_codesign flag) + "dead-strip-dylibs", // -dead_strip_dylibs + "dead-strip-dylibs2", // -dead_strip_dylibs + ]; + + DIRECT_LD64.contains(&name) + || UNSUPPORTED_FLAGS.contains(&name) + || LTO.contains(&name) + || WILD_BUGS.contains(&name) + || X86_ONLY.contains(&name) + || NO_LD_PATH.contains(&name) + || NEEDS_DYLIB_INPUT.contains(&name) + || TBD.contains(&name) + || OUTPUT_FORMAT.contains(&name) +} + +fn run_sold_test(test_path: &Path, work_dir: &Path) -> Result<(), String> { + let output = Command::new("bash") + .arg(test_path) + .current_dir(work_dir) + .env("WILD_VALIDATE_OUTPUT", "1") + .output() + .map_err(|e| format!("bash: {e}"))?; + + if !output.status.success() { + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + let mut msg = format!("Test failed with status {}\n", output.status); + if !stdout.is_empty() { + msg.push_str(&format!("stdout:\n{stdout}\n")); + } + if !stderr.is_empty() { + msg.push_str(&format!("stderr:\n{stderr}\n")); + } + return Err(msg); + } + + Ok(()) +} + +fn main() { + let mut tests = Vec::new(); + collect_tests(&mut tests); + let args = libtest_mimic::Arguments::from_args(); + libtest_mimic::run(&args, tests).exit(); +} diff --git a/wild/tests/sources/macho/absolute-symbol/absolute-symbol.c b/wild/tests/sources/macho/absolute-symbol/absolute-symbol.c new file mode 100644 index 000000000..e21e06d5d --- /dev/null +++ b/wild/tests/sources/macho/absolute-symbol/absolute-symbol.c @@ -0,0 +1,6 @@ +//#RunEnabled:false +//#ExpectSym:abs_sym +// Tests that absolute symbols (defined via assembly .set) are preserved. +__asm__(".globl _abs_sym\n.set _abs_sym, 0xCAFE"); + +int main() { return 42; } diff --git a/wild/tests/sources/macho/alignment/alignment.c b/wild/tests/sources/macho/alignment/alignment.c new file mode 100644 index 000000000..e87654c78 --- /dev/null +++ b/wild/tests/sources/macho/alignment/alignment.c @@ -0,0 +1,10 @@ +// Test that the linker respects large alignment requirements. +struct __attribute__((aligned(16384))) S { + int x; +}; +struct S obj = {.x = 42}; + +int main() { + if ((unsigned long long)&obj & 0x3FFF) return 1; + return obj.x; +} diff --git a/wild/tests/sources/macho/archive-activation/archive-activation.c b/wild/tests/sources/macho/archive-activation/archive-activation.c new file mode 100644 index 000000000..c03b79b26 --- /dev/null +++ b/wild/tests/sources/macho/archive-activation/archive-activation.c @@ -0,0 +1,5 @@ +//#Archive:lib.a:archive-activation1.c + +// Tests that archive members are pulled in when referenced. +int get_value(void); +int main() { return get_value(); } diff --git a/wild/tests/sources/macho/archive-activation/archive-activation1.c b/wild/tests/sources/macho/archive-activation/archive-activation1.c new file mode 100644 index 000000000..2888439f1 --- /dev/null +++ b/wild/tests/sources/macho/archive-activation/archive-activation1.c @@ -0,0 +1 @@ +int get_value(void) { return 42; } diff --git a/wild/tests/sources/macho/backtrace-test/backtrace-test.rs b/wild/tests/sources/macho/backtrace-test/backtrace-test.rs new file mode 100644 index 000000000..30bc97bb0 --- /dev/null +++ b/wild/tests/sources/macho/backtrace-test/backtrace-test.rs @@ -0,0 +1,14 @@ +//#LinkerDriver:clang + +fn inner() -> String { + let bt = std::backtrace::Backtrace::force_capture(); + format!("{bt}") +} + +fn main() { + let bt = inner(); + if bt.contains("inner") { + std::process::exit(42); + } + std::process::exit(1); +} diff --git a/wild/tests/sources/macho/bss/bss.c b/wild/tests/sources/macho/bss/bss.c new file mode 100644 index 000000000..cc8071e77 --- /dev/null +++ b/wild/tests/sources/macho/bss/bss.c @@ -0,0 +1,9 @@ +// Test that uninitialised globals are zero-filled (BSS). +int uninit_global; +static int uninit_static; + +int main() { + if (uninit_global != 0) return 1; + if (uninit_static != 0) return 2; + return 42; +} diff --git a/wild/tests/sources/macho/common-symbol/common-symbol.c b/wild/tests/sources/macho/common-symbol/common-symbol.c new file mode 100644 index 000000000..95e8cacc2 --- /dev/null +++ b/wild/tests/sources/macho/common-symbol/common-symbol.c @@ -0,0 +1,6 @@ +//#Object:common-symbol1.c + +// Test that tentative (common) definitions from multiple objects merge +// correctly. +int shared_var; +int main() { return shared_var == 0 ? 42 : 1; } diff --git a/wild/tests/sources/macho/common-symbol/common-symbol1.c b/wild/tests/sources/macho/common-symbol/common-symbol1.c new file mode 100644 index 000000000..db7a202cc --- /dev/null +++ b/wild/tests/sources/macho/common-symbol/common-symbol1.c @@ -0,0 +1,2 @@ +// Another tentative definition of the same symbol. +int shared_var; diff --git a/wild/tests/sources/macho/const-data/const-data.c b/wild/tests/sources/macho/const-data/const-data.c new file mode 100644 index 000000000..f6912c820 --- /dev/null +++ b/wild/tests/sources/macho/const-data/const-data.c @@ -0,0 +1,9 @@ +// Tests that __const section data is correctly placed and accessible. +//#ExpectSym:table + +static const int table[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; +int main() { + int sum = 0; + for (int i = 0; i < 10; i++) sum += table[i]; + return sum == 55 ? 42 : 1; +} diff --git a/wild/tests/sources/macho/constructors/constructors.c b/wild/tests/sources/macho/constructors/constructors.c new file mode 100644 index 000000000..0fa294ff0 --- /dev/null +++ b/wild/tests/sources/macho/constructors/constructors.c @@ -0,0 +1,7 @@ +//#LinkerDriver:clang +// Test that __attribute__((constructor)) functions run before main. +static int init_val = 0; + +__attribute__((constructor)) void my_init(void) { init_val = 42; } + +int main() { return init_val; } diff --git a/wild/tests/sources/macho/cpp-basic/cpp-basic.cc b/wild/tests/sources/macho/cpp-basic/cpp-basic.cc new file mode 100644 index 000000000..6272407e6 --- /dev/null +++ b/wild/tests/sources/macho/cpp-basic/cpp-basic.cc @@ -0,0 +1,19 @@ +//#CompArgs:-std=c++17 +//#LinkerDriver:clang++ +//#LinkArgs:-lc++ + +// Test basic C++ linking: virtual dispatch, new/delete. +struct Base { + virtual int value() { return 1; } + virtual ~Base() = default; +}; + +struct Derived : Base { + int value() override { return 42; } +}; + +int main() { + Derived d; + Base* b = &d; + return b->value(); +} diff --git a/wild/tests/sources/macho/cpp-string/cpp-string.cc b/wild/tests/sources/macho/cpp-string/cpp-string.cc new file mode 100644 index 000000000..5ff09cf4e --- /dev/null +++ b/wild/tests/sources/macho/cpp-string/cpp-string.cc @@ -0,0 +1,12 @@ +//#LinkerDriver:clang++ +//#LinkArgs:-lc++ +//#CompArgs:-std=c++17 + +// Tests C++ std::string and basic stdlib linking. +#include + +int main() { + std::string s = "hello"; + s += " world"; + return s.length() == 11 ? 42 : 1; +} diff --git a/wild/tests/sources/macho/cross-object-call/cross-object-call.c b/wild/tests/sources/macho/cross-object-call/cross-object-call.c new file mode 100644 index 000000000..15b9ac229 --- /dev/null +++ b/wild/tests/sources/macho/cross-object-call/cross-object-call.c @@ -0,0 +1,4 @@ +//#Object:cross-object-call1.c + +int add(int, int); +int main() { return add(30, 12); } diff --git a/wild/tests/sources/macho/cross-object-call/cross-object-call1.c b/wild/tests/sources/macho/cross-object-call/cross-object-call1.c new file mode 100644 index 000000000..1f7644834 --- /dev/null +++ b/wild/tests/sources/macho/cross-object-call/cross-object-call1.c @@ -0,0 +1 @@ +int add(int a, int b) { return a + b; } diff --git a/wild/tests/sources/macho/cstring-data/cstring-data.c b/wild/tests/sources/macho/cstring-data/cstring-data.c new file mode 100644 index 000000000..45c96a0d3 --- /dev/null +++ b/wild/tests/sources/macho/cstring-data/cstring-data.c @@ -0,0 +1,8 @@ +// Tests that __cstring literals are accessible and correctly merged. +#include + +int main() { + const char* a = "hello"; + const char* b = "world"; + return (strlen(a) == 5 && strlen(b) == 5) ? 42 : 1; +} diff --git a/wild/tests/sources/macho/custom-section/custom-section.c b/wild/tests/sources/macho/custom-section/custom-section.c new file mode 100644 index 000000000..d621eb0cf --- /dev/null +++ b/wild/tests/sources/macho/custom-section/custom-section.c @@ -0,0 +1,10 @@ +//#Object:custom-section1.c + +// Tests that data placed in custom sections via __attribute__((section)) +// is correctly linked and accessible at runtime. + +extern int get_custom_value(void); + +static int my_data __attribute__((used, section("__DATA,__custom"))) = 30; + +int main() { return my_data + get_custom_value(); } diff --git a/wild/tests/sources/macho/custom-section/custom-section1.c b/wild/tests/sources/macho/custom-section/custom-section1.c new file mode 100644 index 000000000..4d9da3c67 --- /dev/null +++ b/wild/tests/sources/macho/custom-section/custom-section1.c @@ -0,0 +1,3 @@ +static int other __attribute__((used, section("__DATA,__custom"))) = 12; + +int get_custom_value(void) { return other; } diff --git a/wild/tests/sources/macho/data-pointers/data-pointers.c b/wild/tests/sources/macho/data-pointers/data-pointers.c new file mode 100644 index 000000000..271ce29c7 --- /dev/null +++ b/wild/tests/sources/macho/data-pointers/data-pointers.c @@ -0,0 +1,17 @@ +//#Object:data-pointers1.c + +// Tests that data pointers (function pointers and data addresses) in the +// DATA section are correctly rebased for ASLR. + +extern int values[4]; +extern int (*get_fn(void))(void); + +int main() { + // Check data array values + if (values[0] != 10) return 1; + if (values[1] != 20) return 2; + + // Check function pointer from another object + int (*fn)(void) = get_fn(); + return fn(); +} diff --git a/wild/tests/sources/macho/data-pointers/data-pointers1.c b/wild/tests/sources/macho/data-pointers/data-pointers1.c new file mode 100644 index 000000000..9eef489d4 --- /dev/null +++ b/wild/tests/sources/macho/data-pointers/data-pointers1.c @@ -0,0 +1,8 @@ +int values[4] = {10, 20, 30, 40}; + +static int return_42(void) { return 42; } + +// Function pointer in the data section — requires rebase fixup. +static int (*fn_ptr)(void) = return_42; + +int (*get_fn(void))(void) { return fn_ptr; } diff --git a/wild/tests/sources/macho/data/data.c b/wild/tests/sources/macho/data/data.c new file mode 100644 index 000000000..a9216cae6 --- /dev/null +++ b/wild/tests/sources/macho/data/data.c @@ -0,0 +1,8 @@ +static char data1[] = "Hello"; +char data2[] = "World"; + +int main() { + if (data1[0] != 'H') return 1; + if (data2[0] != 'W') return 2; + return 42; +} diff --git a/wild/tests/sources/macho/duplicate-strong-error/dup1.c b/wild/tests/sources/macho/duplicate-strong-error/dup1.c new file mode 100644 index 000000000..3167837f0 --- /dev/null +++ b/wild/tests/sources/macho/duplicate-strong-error/dup1.c @@ -0,0 +1 @@ +int foo(void) { return 2; } diff --git a/wild/tests/sources/macho/duplicate-strong-error/duplicate-strong-error.c b/wild/tests/sources/macho/duplicate-strong-error/duplicate-strong-error.c new file mode 100644 index 000000000..f6c029498 --- /dev/null +++ b/wild/tests/sources/macho/duplicate-strong-error/duplicate-strong-error.c @@ -0,0 +1,5 @@ +//#Object:dup1.c +//#ExpectError:Duplicate + +int foo(void) { return 1; } +int main() { return foo(); } diff --git a/wild/tests/sources/macho/entry-arg/entry-arg.c b/wild/tests/sources/macho/entry-arg/entry-arg.c new file mode 100644 index 000000000..e4cdbf253 --- /dev/null +++ b/wild/tests/sources/macho/entry-arg/entry-arg.c @@ -0,0 +1,5 @@ +//#LinkArgs:-e _custom_entry +//#RunEnabled:false + +// Tests that -e flag sets a custom entry point. +void custom_entry(void) {} diff --git a/wild/tests/sources/macho/exception/exception.cc b/wild/tests/sources/macho/exception/exception.cc new file mode 100644 index 000000000..d9c31461b --- /dev/null +++ b/wild/tests/sources/macho/exception/exception.cc @@ -0,0 +1,14 @@ +//#LinkerDriver:clang++ +//#LinkArgs:-lc++ +//#CompArgs:-std=c++17 + +#include + +int main() { + try { + throw std::runtime_error("test"); + } catch (const std::runtime_error& e) { + return 42; + } + return 1; +} diff --git a/wild/tests/sources/macho/force-undefined/force-undefined.c b/wild/tests/sources/macho/force-undefined/force-undefined.c new file mode 100644 index 000000000..f6490ee1c --- /dev/null +++ b/wild/tests/sources/macho/force-undefined/force-undefined.c @@ -0,0 +1,10 @@ +//#Archive:lib.a:force-undefined1.c +//#LinkArgs:-u _forced_sym + +// Tests -u flag: forces _forced_sym to be treated as undefined, +// which triggers loading the archive member that defines it. +// Without -u, the archive member wouldn't be loaded since nothing +// in the main object references forced_sym directly. +extern int forced_sym; +extern int get_value(void); +int main() { return get_value(); } diff --git a/wild/tests/sources/macho/force-undefined/force-undefined1.c b/wild/tests/sources/macho/force-undefined/force-undefined1.c new file mode 100644 index 000000000..c26cf1100 --- /dev/null +++ b/wild/tests/sources/macho/force-undefined/force-undefined1.c @@ -0,0 +1,2 @@ +int forced_sym = 100; +int get_value(void) { return 42; } diff --git a/wild/tests/sources/macho/global-definitions/global-definitions.c b/wild/tests/sources/macho/global-definitions/global-definitions.c new file mode 100644 index 000000000..0912e4d65 --- /dev/null +++ b/wild/tests/sources/macho/global-definitions/global-definitions.c @@ -0,0 +1,4 @@ +//#Object:global-definitions1.c + +extern int g1, g2, g3; +int main() { return g1 + g2 + g3; } diff --git a/wild/tests/sources/macho/global-definitions/global-definitions1.c b/wild/tests/sources/macho/global-definitions/global-definitions1.c new file mode 100644 index 000000000..4d4b8cc45 --- /dev/null +++ b/wild/tests/sources/macho/global-definitions/global-definitions1.c @@ -0,0 +1 @@ +int g1 = 10, g2 = 20, g3 = 12; diff --git a/wild/tests/sources/macho/got-ref-to-local/got-ref-to-local.c b/wild/tests/sources/macho/got-ref-to-local/got-ref-to-local.c new file mode 100644 index 000000000..d29157530 --- /dev/null +++ b/wild/tests/sources/macho/got-ref-to-local/got-ref-to-local.c @@ -0,0 +1,14 @@ +// Tests that GOT references to local (static) functions work correctly. +// The compiler may generate GOT-indirect references for function pointers. + +static int local_fn1(void) { return 20; } +static int local_fn2(void) { return 22; } + +typedef int (*fnptr)(void); + +// Force GOT-indirect references by taking addresses in a volatile context. +int main() { + volatile fnptr f1 = local_fn1; + volatile fnptr f2 = local_fn2; + return f1() + f2(); +} diff --git a/wild/tests/sources/macho/hidden-ref/hidden-ref.c b/wild/tests/sources/macho/hidden-ref/hidden-ref.c new file mode 100644 index 000000000..a15fed414 --- /dev/null +++ b/wild/tests/sources/macho/hidden-ref/hidden-ref.c @@ -0,0 +1,8 @@ +//#Object:hidden-ref1.c + +// Tests that hidden visibility references resolve correctly. +// hidden-ref1.c defines foo() with default visibility. +// This file references it with hidden visibility. +__attribute__((visibility("hidden"))) int foo(void); + +int main() { return foo(); } diff --git a/wild/tests/sources/macho/hidden-ref/hidden-ref1.c b/wild/tests/sources/macho/hidden-ref/hidden-ref1.c new file mode 100644 index 000000000..464a23056 --- /dev/null +++ b/wild/tests/sources/macho/hidden-ref/hidden-ref1.c @@ -0,0 +1 @@ +int foo(void) { return 42; } diff --git a/wild/tests/sources/macho/init-order/init-order.c b/wild/tests/sources/macho/init-order/init-order.c new file mode 100644 index 000000000..219bd53ac --- /dev/null +++ b/wild/tests/sources/macho/init-order/init-order.c @@ -0,0 +1,11 @@ +//#LinkerDriver:clang + +static int order = 0; +static int first_val = 0, second_val = 0; + +__attribute__((constructor(101))) void first(void) { first_val = ++order; } +__attribute__((constructor(102))) void second(void) { second_val = ++order; } + +int main() { + return (first_val == 1 && second_val == 2) ? 42 : first_val * 10 + second_val; +} diff --git a/wild/tests/sources/macho/input-does-not-exist/input-does-not-exist.c b/wild/tests/sources/macho/input-does-not-exist/input-does-not-exist.c new file mode 100644 index 000000000..f7ac7aaec --- /dev/null +++ b/wild/tests/sources/macho/input-does-not-exist/input-does-not-exist.c @@ -0,0 +1,4 @@ +//#Object:/does/not/exist.o +//#ExpectError:/does/not/exist.o + +int main() { return 0; } diff --git a/wild/tests/sources/macho/local-symbol-refs/local-symbol-refs.c b/wild/tests/sources/macho/local-symbol-refs/local-symbol-refs.c new file mode 100644 index 000000000..a28077e2c --- /dev/null +++ b/wild/tests/sources/macho/local-symbol-refs/local-symbol-refs.c @@ -0,0 +1,5 @@ +//#Object:local-symbol-refs1.c + +static int local_val = 42; +int* get_local_ptr(void); +int main() { return *get_local_ptr() == local_val ? local_val : 1; } diff --git a/wild/tests/sources/macho/local-symbol-refs/local-symbol-refs1.c b/wild/tests/sources/macho/local-symbol-refs/local-symbol-refs1.c new file mode 100644 index 000000000..f1c3060c2 --- /dev/null +++ b/wild/tests/sources/macho/local-symbol-refs/local-symbol-refs1.c @@ -0,0 +1,2 @@ +static int other_val = 42; +int* get_local_ptr(void) { return &other_val; } diff --git a/wild/tests/sources/macho/mixed-sections/mixed-sections.c b/wild/tests/sources/macho/mixed-sections/mixed-sections.c new file mode 100644 index 000000000..6418a2f6b --- /dev/null +++ b/wild/tests/sources/macho/mixed-sections/mixed-sections.c @@ -0,0 +1,26 @@ +// Tests correct handling of multiple section types together: +// __text, __const, __cstring, __data, __bss, __got. +// This exercises the section header generation for the DATA segment. +//#LinkerDriver:clang +//#Object:mixed-sections1.c + +#include + +extern int mutable_val; +extern const int readonly_table[]; +void bump(void); +const char* get_name(void); + +int main() { + // __data: mutable global + bump(); + if (mutable_val != 11) return 1; + + // __const: read-only table + if (readonly_table[0] + readonly_table[3] != 104) return 2; + + // __cstring: string literal from another TU + if (strcmp(get_name(), "hello") != 0) return 3; + + return 42; +} diff --git a/wild/tests/sources/macho/mixed-sections/mixed-sections1.c b/wild/tests/sources/macho/mixed-sections/mixed-sections1.c new file mode 100644 index 000000000..282ae70c1 --- /dev/null +++ b/wild/tests/sources/macho/mixed-sections/mixed-sections1.c @@ -0,0 +1,4 @@ +int mutable_val = 10; +const int readonly_table[] = {1, 2, 3, 103}; +void bump(void) { mutable_val++; } +const char* get_name(void) { return "hello"; } diff --git a/wild/tests/sources/macho/mutable-globals/mutable-globals.c b/wild/tests/sources/macho/mutable-globals/mutable-globals.c new file mode 100644 index 000000000..bc5e78858 --- /dev/null +++ b/wild/tests/sources/macho/mutable-globals/mutable-globals.c @@ -0,0 +1,12 @@ +// Tests that mutable global data (__data section) works correctly. +//#Object:mutable-globals1.c + +extern int counter; +void increment(void); + +int main() { + increment(); + increment(); + increment(); + return counter == 3 ? 42 : 1; +} diff --git a/wild/tests/sources/macho/mutable-globals/mutable-globals1.c b/wild/tests/sources/macho/mutable-globals/mutable-globals1.c new file mode 100644 index 000000000..a7d87a92f --- /dev/null +++ b/wild/tests/sources/macho/mutable-globals/mutable-globals1.c @@ -0,0 +1,2 @@ +int counter = 0; +void increment(void) { counter++; } diff --git a/wild/tests/sources/macho/relocatables/relocatables.c b/wild/tests/sources/macho/relocatables/relocatables.c new file mode 100644 index 000000000..e0a659d53 --- /dev/null +++ b/wild/tests/sources/macho/relocatables/relocatables.c @@ -0,0 +1,10 @@ +//#Relocatable:relocatables1.c,relocatables2.c + +// Tests -r (partial link / relocatable output). +// Link relocatables1.c and relocatables2.c into a single .o via -r, +// then link that combined .o into the final executable. + +int add(int, int); +int multiply(int, int); + +int main() { return add(30, 12) == 42 && multiply(6, 7) == 42 ? 42 : 1; } diff --git a/wild/tests/sources/macho/relocatables/relocatables1.c b/wild/tests/sources/macho/relocatables/relocatables1.c new file mode 100644 index 000000000..1f7644834 --- /dev/null +++ b/wild/tests/sources/macho/relocatables/relocatables1.c @@ -0,0 +1 @@ +int add(int a, int b) { return a + b; } diff --git a/wild/tests/sources/macho/relocatables/relocatables2.c b/wild/tests/sources/macho/relocatables/relocatables2.c new file mode 100644 index 000000000..20119afa7 --- /dev/null +++ b/wild/tests/sources/macho/relocatables/relocatables2.c @@ -0,0 +1 @@ +int multiply(int a, int b) { return a * b; } diff --git a/wild/tests/sources/macho/rust-build-script-sim/rust-build-script-sim.rs b/wild/tests/sources/macho/rust-build-script-sim/rust-build-script-sim.rs new file mode 100644 index 000000000..b470567d4 --- /dev/null +++ b/wild/tests/sources/macho/rust-build-script-sim/rust-build-script-sim.rs @@ -0,0 +1,64 @@ +//#LinkerDriver:clang + +// Simulates what proc-macro2's build script does: run a subprocess, +// capture output, parse strings, write to files. This exercises +// __const vtables, __data globals, __cstring literals, and GOT +// entries together under realistic conditions. + +use std::collections::HashMap; +use std::io::Write; +use std::process::Command; + +fn probe_rustc_version() -> Option { + let output = Command::new("rustc") + .arg("--version") + .output() + .ok()?; + let stdout = String::from_utf8(output.stdout).ok()?; + // Parse "rustc 1.XX.Y (...)" + let version = stdout.split(' ').nth(1)?; + let minor = version.split('.').nth(1)?; + minor.parse().ok() +} + +fn build_feature_map(version: u32) -> HashMap { + let mut features = HashMap::new(); + features.insert("proc_macro".to_string(), version >= 30); + features.insert("span_locations".to_string(), version >= 45); + features.insert("literal_c_string".to_string(), version >= 77); + features.insert("source_text".to_string(), version >= 80); + features.insert("is_available".to_string(), version >= 71); + features +} + +fn write_output(features: &HashMap) -> std::io::Result<()> { + let mut buf = Vec::new(); + for (name, enabled) in features { + if *enabled { + writeln!(buf, "cargo:rustc-cfg={name}")?; + } + } + // Just verify we can produce output, don't actually write to cargo + assert!(!buf.is_empty()); + Ok(()) +} + +fn main() { + let version = probe_rustc_version().unwrap_or(0); + assert!(version > 50, "rustc version too old: {version}"); + + let features = build_feature_map(version); + assert!(features.len() == 5); + assert!(*features.get("proc_macro").unwrap()); + + write_output(&features).expect("write failed"); + + // Exercise format strings and dynamic allocation + let msgs: Vec = (0..100) + .map(|i| format!("cargo:rustc-check-cfg=cfg(feature_{i})")) + .collect(); + assert_eq!(msgs.len(), 100); + assert!(msgs[42].contains("feature_42")); + + std::process::exit(42); +} diff --git a/wild/tests/sources/macho/rust-format-strings/rust-format-strings.rs b/wild/tests/sources/macho/rust-format-strings/rust-format-strings.rs new file mode 100644 index 000000000..243d02a48 --- /dev/null +++ b/wild/tests/sources/macho/rust-format-strings/rust-format-strings.rs @@ -0,0 +1,31 @@ +//#LinkerDriver:clang + +// Tests that Rust format strings and string constants are correctly linked +// when combined with thread-local storage. This exercises __const vtables, +// __cstring data, and __thread_vars alignment together. +// The proc-macro2 build script crashes because __thread_vars descriptors +// end up at a non-8-byte-aligned address. + +use std::process::Command; +use std::ffi::OsString; +use std::env; + +fn rustc_minor_version() -> Option { + let rustc: OsString = env::var_os("RUSTC").unwrap_or_else(|| "rustc".into()); + let output = Command::new(rustc).arg("--version").output().ok()?; + let version = std::str::from_utf8(&output.stdout).ok()?; + let mut pieces = version.split('.'); + if pieces.next() != Some("rustc 1") { + return None; + } + pieces.next()?.parse().ok() +} + +fn main() { + let version = rustc_minor_version().unwrap_or(0); + if version > 50 { + let msg = format!("rustc version: 1.{version}"); + assert!(msg.contains("rustc version:"), "format! corrupted: {msg:?}"); + } + std::process::exit(42); +} diff --git a/wild/tests/sources/macho/rust-integration/rust-integration.rs b/wild/tests/sources/macho/rust-integration/rust-integration.rs new file mode 100644 index 000000000..4c11cf1c6 --- /dev/null +++ b/wild/tests/sources/macho/rust-integration/rust-integration.rs @@ -0,0 +1,7 @@ +//#LinkerDriver:clang + +fn add(a: i32, b: i32) -> i32 { a + b } + +fn main() { + std::process::exit(add(30, 12)); +} diff --git a/wild/tests/sources/macho/rust-large-data/rust-large-data.rs b/wild/tests/sources/macho/rust-large-data/rust-large-data.rs new file mode 100644 index 000000000..f05647ce1 --- /dev/null +++ b/wild/tests/sources/macho/rust-large-data/rust-large-data.rs @@ -0,0 +1,44 @@ +//#LinkerDriver:clang + +// Tests linking with large __data section (many vtables, string constants, +// and data pointers). This exercises chained fixup rebase entries across +// multiple pages of the DATA segment. + +use std::collections::HashMap; +use std::io::Write; + +fn build_map() -> HashMap> { + let mut map = HashMap::new(); + for i in 0..50 { + let key = format!("key_{i:04}"); + let val: Vec = (0..100).map(|j| ((i * 7 + j * 3) % 256) as u8).collect(); + map.insert(key, val); + } + map +} + +fn format_output(map: &HashMap>) -> Vec { + let mut buf = Vec::new(); + let mut keys: Vec<&String> = map.keys().collect(); + keys.sort(); + for key in keys { + let val = &map[key]; + writeln!(buf, "{}: {} bytes, sum={}", key, val.len(), + val.iter().map(|&b| b as u64).sum::()).unwrap(); + } + buf +} + +fn main() { + let map = build_map(); + assert_eq!(map.len(), 50); + + let output = format_output(&map); + assert!(output.len() > 1000); + + // Verify specific entries + assert!(map.contains_key("key_0042")); + assert_eq!(map["key_0000"].len(), 100); + + std::process::exit(42); +} diff --git a/wild/tests/sources/macho/rust-panic-unwind/rust-panic-unwind.rs b/wild/tests/sources/macho/rust-panic-unwind/rust-panic-unwind.rs new file mode 100644 index 000000000..31f40b65b --- /dev/null +++ b/wild/tests/sources/macho/rust-panic-unwind/rust-panic-unwind.rs @@ -0,0 +1,4 @@ +fn main() { + let r = std::panic::catch_unwind(|| panic!("test")); + std::process::exit(if r.is_err() { 42 } else { 1 }); +} diff --git a/wild/tests/sources/macho/rust-subprocess/rust-subprocess.rs b/wild/tests/sources/macho/rust-subprocess/rust-subprocess.rs new file mode 100644 index 000000000..d3a9b38d2 --- /dev/null +++ b/wild/tests/sources/macho/rust-subprocess/rust-subprocess.rs @@ -0,0 +1,28 @@ +//#LinkerDriver:clang + +// Tests that string formatting, env vars, and subprocess execution work. +// This exercises __const, __data, __cstring, and GOT entries together — +// similar to what proc-macro2's build script does. + +use std::env; +use std::process::Command; + +fn main() { + // String formatting exercises __const vtables and __cstring data. + let msg = format!("hello {} world", 42); + assert_eq!(msg, "hello 42 world"); + + // Env var access exercises libc GOT entries. + env::set_var("WILD_TEST_VAR", "test_value"); + let val = env::var("WILD_TEST_VAR").unwrap(); + assert_eq!(val, "test_value"); + + // Subprocess execution exercises many sections together. + let output = Command::new("echo") + .arg("hi") + .output() + .expect("failed to run echo"); + assert!(output.status.success()); + + std::process::exit(42); +} diff --git a/wild/tests/sources/macho/rust-tls/rust-tls.rs b/wild/tests/sources/macho/rust-tls/rust-tls.rs new file mode 100644 index 000000000..8b6eece64 --- /dev/null +++ b/wild/tests/sources/macho/rust-tls/rust-tls.rs @@ -0,0 +1,18 @@ +//#LinkerDriver:clang + +use std::cell::Cell; +use std::thread; + +thread_local!(static FOO: Cell = Cell::new(1)); + +fn main() { + assert_eq!(FOO.get(), 1); + FOO.set(2); + let t = thread::spawn(move || { + assert_eq!(FOO.get(), 1); + FOO.set(3); + }); + t.join().unwrap(); + assert_eq!(FOO.get(), 2); + std::process::exit(42); +} diff --git a/wild/tests/sources/macho/shared-basic/shared-basic-lib.c b/wild/tests/sources/macho/shared-basic/shared-basic-lib.c new file mode 100644 index 000000000..2888439f1 --- /dev/null +++ b/wild/tests/sources/macho/shared-basic/shared-basic-lib.c @@ -0,0 +1 @@ +int get_value(void) { return 42; } diff --git a/wild/tests/sources/macho/shared-basic/shared-basic.c b/wild/tests/sources/macho/shared-basic/shared-basic.c new file mode 100644 index 000000000..f0255a9e1 --- /dev/null +++ b/wild/tests/sources/macho/shared-basic/shared-basic.c @@ -0,0 +1,6 @@ +//#LinkerDriver:clang +//#Shared:shared-basic-lib.c + +// Tests basic dylib creation and linking. +extern int get_value(void); +int main() { return get_value(); } diff --git a/wild/tests/sources/macho/string-constants/string-constants.c b/wild/tests/sources/macho/string-constants/string-constants.c new file mode 100644 index 000000000..d93d3a9c2 --- /dev/null +++ b/wild/tests/sources/macho/string-constants/string-constants.c @@ -0,0 +1,15 @@ +//#Contains:Hello World + +// Test that string literals are present and the binary links correctly. +const char* get_str1(void) { return "Hello World"; } +const char* get_str2(void) { return "Hello World"; } + +int main() { + // Whether the linker merges identical strings is an optimisation choice. + // We just verify the values are correct. + const char* a = get_str1(); + const char* b = get_str2(); + if (a[0] != 'H') return 1; + if (b[0] != 'H') return 2; + return 42; +} diff --git a/wild/tests/sources/macho/string-merging/string-merging.c b/wild/tests/sources/macho/string-merging/string-merging.c new file mode 100644 index 000000000..04a094195 --- /dev/null +++ b/wild/tests/sources/macho/string-merging/string-merging.c @@ -0,0 +1,13 @@ +//#Object:string-merging1.c +//#Contains:Hello Wild + +extern const char* get_str1(void); +const char* get_str2(void) { return "Hello Wild"; } +int main() { + const char* a = get_str1(); + const char* b = get_str2(); + if (a[0] != 'H') return 1; + if (b[0] != 'H') return 2; + // String merging is optional — just verify both are correct. + return 42; +} diff --git a/wild/tests/sources/macho/string-merging/string-merging1.c b/wild/tests/sources/macho/string-merging/string-merging1.c new file mode 100644 index 000000000..a737bc59b --- /dev/null +++ b/wild/tests/sources/macho/string-merging/string-merging1.c @@ -0,0 +1 @@ +const char* get_str1(void) { return "Hello Wild"; } diff --git a/wild/tests/sources/macho/tls-alignment/tls-alignment.c b/wild/tests/sources/macho/tls-alignment/tls-alignment.c new file mode 100644 index 000000000..53a658697 --- /dev/null +++ b/wild/tests/sources/macho/tls-alignment/tls-alignment.c @@ -0,0 +1,12 @@ +// Tests that TLS variables are properly aligned when preceded by +// odd-sized data sections. The __thread_vars descriptors must be +// 8-byte aligned for dyld to process them correctly. +//#Object:tls-alignment1.c + +extern __thread int tls_val; +int get_tls(void); + +int main() { + tls_val = 10; + return get_tls() == 10 ? 42 : 1; +} diff --git a/wild/tests/sources/macho/tls-alignment/tls-alignment1.c b/wild/tests/sources/macho/tls-alignment/tls-alignment1.c new file mode 100644 index 000000000..0fe74332f --- /dev/null +++ b/wild/tests/sources/macho/tls-alignment/tls-alignment1.c @@ -0,0 +1,6 @@ +// Odd-sized data to misalign subsequent sections +const char padding[] = + "abc"; // 4 bytes including NUL — ensures __data has odd alignment + +__thread int tls_val = 0; +int get_tls(void) { return tls_val; } diff --git a/wild/tests/sources/macho/tls/tls.c b/wild/tests/sources/macho/tls/tls.c new file mode 100644 index 000000000..f61d16748 --- /dev/null +++ b/wild/tests/sources/macho/tls/tls.c @@ -0,0 +1,5 @@ +//#Object:tls1.c + +extern __thread int tls_var; +int get_tls(void); +int main() { return tls_var + get_tls(); } diff --git a/wild/tests/sources/macho/tls/tls1.c b/wild/tests/sources/macho/tls/tls1.c new file mode 100644 index 000000000..d490b0623 --- /dev/null +++ b/wild/tests/sources/macho/tls/tls1.c @@ -0,0 +1,2 @@ +__thread int tls_var = 20; +int get_tls(void) { return tls_var + 2; } diff --git a/wild/tests/sources/macho/trivial-dynamic/trivial-dynamic.c b/wild/tests/sources/macho/trivial-dynamic/trivial-dynamic.c new file mode 100644 index 000000000..10d4efe5d --- /dev/null +++ b/wild/tests/sources/macho/trivial-dynamic/trivial-dynamic.c @@ -0,0 +1,6 @@ +//#LinkerDriver:clang +//#Shared:trivial-dynamic1.c + +// Tests basic dynamic linking with a shared library. +extern int dyn_func(void); +int main() { return dyn_func(); } diff --git a/wild/tests/sources/macho/trivial-dynamic/trivial-dynamic1.c b/wild/tests/sources/macho/trivial-dynamic/trivial-dynamic1.c new file mode 100644 index 000000000..aebe18cfa --- /dev/null +++ b/wild/tests/sources/macho/trivial-dynamic/trivial-dynamic1.c @@ -0,0 +1 @@ +int dyn_func(void) { return 42; } diff --git a/wild/tests/sources/macho/trivial-main/trivial-main.c b/wild/tests/sources/macho/trivial-main/trivial-main.c new file mode 100644 index 000000000..f15ebf8d8 --- /dev/null +++ b/wild/tests/sources/macho/trivial-main/trivial-main.c @@ -0,0 +1,3 @@ +//#LinkerDriver:clang + +int main() { return 42; } diff --git a/wild/tests/sources/macho/trivial/trivial.c b/wild/tests/sources/macho/trivial/trivial.c new file mode 100644 index 000000000..dbff7309f --- /dev/null +++ b/wild/tests/sources/macho/trivial/trivial.c @@ -0,0 +1 @@ +int main() { return 42; } diff --git a/wild/tests/sources/macho/undefined-symbol-error/undefined-symbol-error.c b/wild/tests/sources/macho/undefined-symbol-error/undefined-symbol-error.c new file mode 100644 index 000000000..e2e9aa4b9 --- /dev/null +++ b/wild/tests/sources/macho/undefined-symbol-error/undefined-symbol-error.c @@ -0,0 +1,5 @@ +//#LinkerDriver:clang +//#ExpectError:undefined + +int missing_fn(void); +int main() { return missing_fn(); } diff --git a/wild/tests/sources/macho/undefined-weak-and-strong/undefined-weak-and-strong.c b/wild/tests/sources/macho/undefined-weak-and-strong/undefined-weak-and-strong.c new file mode 100644 index 000000000..419dfcc20 --- /dev/null +++ b/wild/tests/sources/macho/undefined-weak-and-strong/undefined-weak-and-strong.c @@ -0,0 +1,11 @@ +//#Object:undefined-weak-and-strong1.c +//#LinkerDriver:clang +//#ExpectError:foo + +void __attribute__((weak)) foo(void); +void call_foo(void); +int main() { + if (foo) foo(); + call_foo(); + return 42; +} diff --git a/wild/tests/sources/macho/undefined-weak-and-strong/undefined-weak-and-strong1.c b/wild/tests/sources/macho/undefined-weak-and-strong/undefined-weak-and-strong1.c new file mode 100644 index 000000000..81a72045d --- /dev/null +++ b/wild/tests/sources/macho/undefined-weak-and-strong/undefined-weak-and-strong1.c @@ -0,0 +1,2 @@ +void foo(void); +void call_foo(void) { foo(); } diff --git a/wild/tests/sources/macho/undefined-weak-sym/undefined-weak-sym.c b/wild/tests/sources/macho/undefined-weak-sym/undefined-weak-sym.c new file mode 100644 index 000000000..8a983bf52 --- /dev/null +++ b/wild/tests/sources/macho/undefined-weak-sym/undefined-weak-sym.c @@ -0,0 +1,7 @@ +//#LinkerDriver:clang + +int __attribute__((weak)) foo(void); +int main() { + if (foo) return foo(); + return 42; // foo is NULL, so we take this path +} diff --git a/wild/tests/sources/macho/visibility-merging/visibility-merging.c b/wild/tests/sources/macho/visibility-merging/visibility-merging.c new file mode 100644 index 000000000..05c7ddfac --- /dev/null +++ b/wild/tests/sources/macho/visibility-merging/visibility-merging.c @@ -0,0 +1,11 @@ +//#Object:visibility-merging1.c + +// Tests that when two objects define the same symbol with different visibility, +// the more restrictive visibility wins. +// data1: default in this file, hidden in the other → hidden wins. +// data2: stays default → exported. + +int data1 __attribute__((weak)) = 0x42; +int data2 __attribute__((weak)) = 42; + +int main() { return data2; } diff --git a/wild/tests/sources/macho/visibility-merging/visibility-merging1.c b/wild/tests/sources/macho/visibility-merging/visibility-merging1.c new file mode 100644 index 000000000..2dc4e4b7e --- /dev/null +++ b/wild/tests/sources/macho/visibility-merging/visibility-merging1.c @@ -0,0 +1,2 @@ +// Hidden definition of data1 — should make the merged symbol hidden. +int data1 __attribute__((weak, visibility("hidden"))) = 0x100; diff --git a/wild/tests/sources/macho/weak-entry/weak-entry.c b/wild/tests/sources/macho/weak-entry/weak-entry.c new file mode 100644 index 000000000..38a0e98fc --- /dev/null +++ b/wild/tests/sources/macho/weak-entry/weak-entry.c @@ -0,0 +1,4 @@ +// Tests that a strong definition of main overrides a weak one. +//#Object:weak-entry1.c + +__attribute__((weak)) int main() { return 5; } diff --git a/wild/tests/sources/macho/weak-entry/weak-entry1.c b/wild/tests/sources/macho/weak-entry/weak-entry1.c new file mode 100644 index 000000000..dbff7309f --- /dev/null +++ b/wild/tests/sources/macho/weak-entry/weak-entry1.c @@ -0,0 +1 @@ +int main() { return 42; } diff --git a/wild/tests/sources/macho/weak-fns-archive/weak-fns-archive.c b/wild/tests/sources/macho/weak-fns-archive/weak-fns-archive.c new file mode 100644 index 000000000..5032936c6 --- /dev/null +++ b/wild/tests/sources/macho/weak-fns-archive/weak-fns-archive.c @@ -0,0 +1,7 @@ +//#Archive:lib.a:weak-fns-archive1.c + +// Tests that an archive member is loaded to resolve an undefined symbol, +// even when the main object has other weak definitions. +int __attribute__((weak)) unused_weak(void) { return 0; } +int get_value(void); +int main() { return get_value() + unused_weak(); } diff --git a/wild/tests/sources/macho/weak-fns-archive/weak-fns-archive1.c b/wild/tests/sources/macho/weak-fns-archive/weak-fns-archive1.c new file mode 100644 index 000000000..2888439f1 --- /dev/null +++ b/wild/tests/sources/macho/weak-fns-archive/weak-fns-archive1.c @@ -0,0 +1 @@ +int get_value(void) { return 42; } diff --git a/wild/tests/sources/macho/weak-fns/weak-fns.c b/wild/tests/sources/macho/weak-fns/weak-fns.c new file mode 100644 index 000000000..728692595 --- /dev/null +++ b/wild/tests/sources/macho/weak-fns/weak-fns.c @@ -0,0 +1,4 @@ +//#Object:weak-fns1.c + +int __attribute__((weak)) get_value(void) { return 1; } +int main() { return get_value(); } diff --git a/wild/tests/sources/macho/weak-fns/weak-fns1.c b/wild/tests/sources/macho/weak-fns/weak-fns1.c new file mode 100644 index 000000000..9a4805937 --- /dev/null +++ b/wild/tests/sources/macho/weak-fns/weak-fns1.c @@ -0,0 +1,2 @@ +// Strong override of the weak get_value in weak-fns.c +int get_value(void) { return 42; } diff --git a/wild/tests/sources/macho/weak-override-archive/weak-override-archive.c b/wild/tests/sources/macho/weak-override-archive/weak-override-archive.c new file mode 100644 index 000000000..16765196b --- /dev/null +++ b/wild/tests/sources/macho/weak-override-archive/weak-override-archive.c @@ -0,0 +1,16 @@ +// Tests that on Mach-O, archives only satisfy undefined references. +// A weak definition in an object is NOT overridden by a strong one in an +// archive. +//#Object:weak-override-archive1.c +//#Archive:weak-override-archive2.c + +__attribute__((weak)) int foo(void) { return 1; } +int bar(void); + +int main() { + // foo stays 1 (weak def in this TU; archive not pulled in since foo is + // defined) bar is 10 (from companion object) + if (foo() != 1) return foo(); + if (bar() != 10) return bar(); + return 42; +} diff --git a/wild/tests/sources/macho/weak-override-archive/weak-override-archive1.c b/wild/tests/sources/macho/weak-override-archive/weak-override-archive1.c new file mode 100644 index 000000000..8730c4af2 --- /dev/null +++ b/wild/tests/sources/macho/weak-override-archive/weak-override-archive1.c @@ -0,0 +1 @@ +int bar(void) { return 10; } diff --git a/wild/tests/sources/macho/weak-override-archive/weak-override-archive2.c b/wild/tests/sources/macho/weak-override-archive/weak-override-archive2.c new file mode 100644 index 000000000..3167837f0 --- /dev/null +++ b/wild/tests/sources/macho/weak-override-archive/weak-override-archive2.c @@ -0,0 +1 @@ +int foo(void) { return 2; } diff --git a/wild/tests/sources/macho/weak-vars-archive/weak-vars-archive.c b/wild/tests/sources/macho/weak-vars-archive/weak-vars-archive.c new file mode 100644 index 000000000..82ada4591 --- /dev/null +++ b/wild/tests/sources/macho/weak-vars-archive/weak-vars-archive.c @@ -0,0 +1,5 @@ +//#Archive:lib.a:weak-vars-archive1.c + +// Tests that an archive member providing a needed symbol is loaded. +extern int value; +int main() { return value; } diff --git a/wild/tests/sources/macho/weak-vars-archive/weak-vars-archive1.c b/wild/tests/sources/macho/weak-vars-archive/weak-vars-archive1.c new file mode 100644 index 000000000..2498550d5 --- /dev/null +++ b/wild/tests/sources/macho/weak-vars-archive/weak-vars-archive1.c @@ -0,0 +1 @@ +int value = 42; diff --git a/wild/tests/sources/macho/weak-vars/weak-vars.c b/wild/tests/sources/macho/weak-vars/weak-vars.c new file mode 100644 index 000000000..291752e34 --- /dev/null +++ b/wild/tests/sources/macho/weak-vars/weak-vars.c @@ -0,0 +1,4 @@ +//#Object:weak-vars1.c + +int __attribute__((weak)) value = 1; +int main() { return value; } diff --git a/wild/tests/sources/macho/weak-vars/weak-vars1.c b/wild/tests/sources/macho/weak-vars/weak-vars1.c new file mode 100644 index 000000000..f4cfe23c3 --- /dev/null +++ b/wild/tests/sources/macho/weak-vars/weak-vars1.c @@ -0,0 +1,2 @@ +// Strong override of the weak value in weak-vars.c +int value = 42; diff --git a/wild/tests/sources/macho/whole-archive/whole-archive.c b/wild/tests/sources/macho/whole-archive/whole-archive.c new file mode 100644 index 000000000..451a1bee0 --- /dev/null +++ b/wild/tests/sources/macho/whole-archive/whole-archive.c @@ -0,0 +1,10 @@ +//#Archive:lib.a:whole-archive1.c +//#LinkArgs:-all_load +//#LinkerDriver:clang + +// Tests -all_load: forces all archive members to load, even unreferenced ones. +// whole-archive1.c defines get_value() which main calls. +// The main object does NOT reference get_value at compile time (it's extern). +// -all_load ensures the archive member is loaded regardless. +int get_value(void); +int main() { return get_value(); } diff --git a/wild/tests/sources/macho/whole-archive/whole-archive1.c b/wild/tests/sources/macho/whole-archive/whole-archive1.c new file mode 100644 index 000000000..2888439f1 --- /dev/null +++ b/wild/tests/sources/macho/whole-archive/whole-archive1.c @@ -0,0 +1 @@ +int get_value(void) { return 42; }