Skip to content

Commit f1ce087

Browse files
committed
Add two-stage ARM64 trampoline allocation with instruction expansion
The previous fix used alloc_near with a fixed ±128MB range for ARM64 trampolines. This fails on some VM layouts where no free memory exists within that range. - Try ±128MB first (all relocations fit in-place), then ±4GB (covers ADRP), then unrestricted mmap as final fallback - When a relocated instruction overflows its immediate range, expand it to an absolute sequence (LDR X16 + BR X16 for branches, MOVZ/MOVK for address loads, inverted condition + absolute branch for conditionals) - Trampoline is now variable-length: Vec<u32> instead of fixed offsets - Dynamic error messages in alloc_near show actual max_range - 7 new unit tests for all expansion types (B, BL, ADR, ADRP, CBZ, TBZ, near-stays-single)
1 parent 9950f5b commit f1ce087

2 files changed

Lines changed: 350 additions & 42 deletions

File tree

rust-mod/src/hook/aarch64.rs

Lines changed: 294 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,194 @@ pub fn relocate(insn: u32, reloc: &Reloc, old_pc: u64, new_pc: u64) -> Result<u3
7070
}
7171
}
7272

73+
// ---- Relocate-or-expand ----------------------------------------------------
74+
75+
/// Result of relocating a single instruction for the trampoline.
76+
pub enum RelocResult {
77+
/// Relocated in-place as a single 4-byte instruction.
78+
Single(u32),
79+
/// Expanded to a multi-instruction absolute sequence (each element is one u32 word).
80+
Expanded(Vec<u32>),
81+
}
82+
83+
/// Relocate an instruction, falling back to an expanded absolute sequence on overflow.
84+
///
85+
/// Tries the compact in-place relocation first. If the displacement exceeds the instruction's
86+
/// immediate range, it generates a longer sequence that uses absolute addressing.
87+
pub fn relocate_or_expand(
88+
insn: u32,
89+
reloc: &Reloc,
90+
old_pc: u64,
91+
new_pc: u64,
92+
) -> Result<RelocResult, String> {
93+
match relocate(insn, reloc, old_pc, new_pc) {
94+
Ok(relocated) => Ok(RelocResult::Single(relocated)),
95+
Err(_) => {
96+
let words = expand(insn, reloc, old_pc, new_pc)?;
97+
Ok(RelocResult::Expanded(words))
98+
}
99+
}
100+
}
101+
102+
/// Generate an expanded absolute sequence for an instruction that cannot be relocated in-place.
103+
fn expand(insn: u32, reloc: &Reloc, old_pc: u64, new_pc: u64) -> Result<Vec<u32>, String> {
104+
match reloc {
105+
Reloc::Branch26 => expand_branch26(insn, old_pc),
106+
Reloc::Adr => expand_adr(insn, old_pc),
107+
Reloc::Adrp => expand_adrp(insn, old_pc),
108+
Reloc::Imm19 => expand_imm19(insn, old_pc, new_pc),
109+
Reloc::Imm14 => expand_imm14(insn, old_pc, new_pc),
110+
}
111+
}
112+
113+
/// Expand B/BL to an absolute branch via LDR X16 + BR/BLR X16.
114+
///
115+
/// **B (unconditional):** `LDR X16, #8; BR X16; <abs_target>` (4 words)
116+
/// **BL (with a link):** `ADR X30, #20; LDR X16, #12; BR X16; <abs_target>` (5 words).
117+
/// ADR sets LR to the instruction after the sequence so the callee returns correctly.
118+
fn expand_branch26(insn: u32, old_pc: u64) -> Result<Vec<u32>, String> {
119+
let imm26 = (insn & 0x03FF_FFFF) as i32;
120+
let imm26 = (imm26 << 6) >> 6;
121+
let abs_target = old_pc.wrapping_add((imm26 as i64 as u64) << 2);
122+
let is_bl = insn & 0x8000_0000 != 0;
123+
124+
if is_bl {
125+
// ADR X30, #20 (X30 = PC + 20, points past the 8-byte literal)
126+
// LDR X16, #12 (load abs_target from PC+12)
127+
// BR X16
128+
// <abs_target: u64>
129+
let adr_x30 = encode_imm21(0x1000_0000 | 30, 20); // ADR X30, #20
130+
let [lo, hi] = split_u64(abs_target);
131+
Ok(vec![adr_x30, 0x5800_0070, 0xD61F_0200, lo, hi])
132+
} else {
133+
// LDR X16, #8
134+
// BR X16
135+
// <abs_target: u64>
136+
let [lo, hi] = split_u64(abs_target);
137+
Ok(vec![0x5800_0050, 0xD61F_0200, lo, hi])
138+
}
139+
}
140+
141+
/// Expand ADR to a MOVZ/MOVK sequence that loads the absolute address into the same register.
142+
fn expand_adr(insn: u32, old_pc: u64) -> Result<Vec<u32>, String> {
143+
let imm21 = extract_imm21(insn);
144+
let abs_target = old_pc.wrapping_add(imm21 as i64 as u64);
145+
let rd = insn & 0x1F;
146+
Ok(movz_movk_sequence(rd, abs_target))
147+
}
148+
149+
/// Expand ADRP to a MOVZ/MOVK sequence that loads the absolute page address into the same register.
150+
fn expand_adrp(insn: u32, old_pc: u64) -> Result<Vec<u32>, String> {
151+
let imm21 = extract_imm21(insn);
152+
let old_page = old_pc & !0xFFF;
153+
let abs_page = old_page.wrapping_add((imm21 as i64 as u64) << 12);
154+
let rd = insn & 0x1F;
155+
Ok(movz_movk_sequence(rd, abs_page))
156+
}
157+
158+
/// Expand an Imm19 instruction (B.cond, CBZ, CBNZ, LDR literal).
159+
///
160+
/// For branches: invert the condition to skip over an absolute branch.
161+
/// For LDR literal: load the absolute address into X16, then LDR Rd, [X16].
162+
fn expand_imm19(insn: u32, old_pc: u64, new_pc: u64) -> Result<Vec<u32>, String> {
163+
let imm19 = ((insn >> 5) & 0x7FFFF) as i32;
164+
let imm19 = (imm19 << 13) >> 13;
165+
let abs_target = old_pc.wrapping_add((imm19 as i64 as u64) << 2);
166+
167+
if insn & 0x3B00_0000 == 0x1800_0000 {
168+
// LDR literal: load address into X16, then LDR Rd, [X16], then B past the literal.
169+
let rd = insn & 0x1F;
170+
let opc = (insn >> 30) & 0x3; // 00=32-bit, 01=64-bit, 10=SIMD
171+
let v = (insn >> 26) & 0x1; // 0=GPR, 1=SIMD/FP
172+
173+
// LDR X16, #12 (load the absolute data address)
174+
let ldr_x16 = 0x5800_0070_u32; // LDR X16, [PC, #12]
175+
// LDR Rd, [X16] with correct size
176+
let ldr_rd = encode_ldr_unsigned(opc, v, rd, 16); // Rd = [X16]
177+
// B #12 (skip the 8-byte literal)
178+
let b_skip = 0x1400_0003_u32; // B #12
179+
let [lo, hi] = split_u64(abs_target);
180+
Ok(vec![ldr_x16, ldr_rd, b_skip, lo, hi])
181+
} else {
182+
// Conditional branch: invert condition, skip over absolute branch.
183+
// <inverted_cond> #+20 (skip 5 words to land after the sequence)
184+
// LDR X16, #8
185+
// BR X16
186+
// <abs_target: u64>
187+
let inverted = invert_imm19_branch(insn, new_pc)?;
188+
let [lo, hi] = split_u64(abs_target);
189+
Ok(vec![inverted, 0x5800_0050, 0xD61F_0200, lo, hi])
190+
}
191+
}
192+
193+
/// Expand a TBZ/TBNZ instruction by inverting and skipping over an absolute branch.
194+
fn expand_imm14(insn: u32, old_pc: u64, _new_pc: u64) -> Result<Vec<u32>, String> {
195+
let imm14 = ((insn >> 5) & 0x3FFF) as i32;
196+
let imm14 = (imm14 << 18) >> 18;
197+
let abs_target = old_pc.wrapping_add((imm14 as i64 as u64) << 2);
198+
199+
// Invert TBZ↔TBNZ, target = skip 5 words (+20 bytes)
200+
let inverted = insn ^ 0x0100_0000; // flip bit 24 (TBZ↔TBNZ)
201+
let skip_imm14 = 5_u32; // +20 bytes = 5 words, >>2 = 5
202+
let inverted = (inverted & 0xFFF8_001F) | ((skip_imm14 & 0x3FFF) << 5);
203+
204+
let [lo, hi] = split_u64(abs_target);
205+
Ok(vec![inverted, 0x5800_0050, 0xD61F_0200, lo, hi])
206+
}
207+
208+
// ---- Expansion helpers ----------------------------------------------------
209+
210+
/// Generate a MOVZ + 3x MOVK sequence to load a 64-bit value into register `rd`.
211+
fn movz_movk_sequence(rd: u32, value: u64) -> Vec<u32> {
212+
let hw0 = (value & 0xFFFF) as u32;
213+
let hw1 = ((value >> 16) & 0xFFFF) as u32;
214+
let hw2 = ((value >> 32) & 0xFFFF) as u32;
215+
let hw3 = ((value >> 48) & 0xFFFF) as u32;
216+
vec![
217+
0xD280_0000 | (hw0 << 5) | rd, // MOVZ Xd, #hw0
218+
0xF2A0_0000 | (hw1 << 5) | rd, // MOVK Xd, #hw1, LSL #16
219+
0xF2C0_0000 | (hw2 << 5) | rd, // MOVK Xd, #hw2, LSL #32
220+
0xF2E0_0000 | (hw3 << 5) | rd, // MOVK Xd, #hw3, LSL #48
221+
]
222+
}
223+
224+
/// Encode `LDR Rd, [X16]` (unsigned offset 0) for the correct operand size.
225+
///
226+
/// `opc`: 00 = 32-bit, 01 = 64-bit, 10 = prefetch/SIMD-32
227+
/// `v`: 0 = GPR, 1 = SIMD/FP
228+
fn encode_ldr_unsigned(opc: u32, v: u32, rd: u32, rn: u32) -> u32 {
229+
// LDR (unsigned immediate), offset = 0: size[31:30] | 111_0_01_00 | imm12[21:10] | Rn[9:5] | Rt[4:0]
230+
// For zero offset, imm12 = 0.
231+
let size = opc; // maps directly for GPR loads
232+
(size << 30) | (0b111 << 27) | (v << 26) | (0b01 << 24) | (rn << 5) | rd
233+
}
234+
235+
/// Invert an Imm19 conditional branch and set its target to skip 5 words (+20 bytes).
236+
fn invert_imm19_branch(insn: u32, _new_pc: u64) -> Result<u32, String> {
237+
let skip_imm19 = 5_u32; // +20 bytes = 5 instructions
238+
239+
if insn & 0xFF00_0010 == 0x5400_0000 {
240+
// B.cond: invert condition by flipping bit 0 of cond (bits 3:0)
241+
let inverted = insn ^ 0x0000_0001;
242+
Ok((inverted & 0xFF00_001F) | ((skip_imm19 & 0x7FFFF) << 5))
243+
} else if insn & 0x7F00_0000 == 0x3400_0000 {
244+
// CBZ → CBNZ (flip bit 24)
245+
let inverted = insn | 0x0100_0000;
246+
Ok((inverted & 0xFF00_001F) | ((skip_imm19 & 0x7FFFF) << 5))
247+
} else if insn & 0x7F00_0000 == 0x3500_0000 {
248+
// CBNZ → CBZ (clear bit 24)
249+
let inverted = insn & !0x0100_0000;
250+
Ok((inverted & 0xFF00_001F) | ((skip_imm19 & 0x7FFFF) << 5))
251+
} else {
252+
Err(format!("unsupported imm19 instruction for expansion: {insn:#010x}"))
253+
}
254+
}
255+
256+
/// Split a u64 into two u32 words (little-endian order for embedding in instruction stream).
257+
fn split_u64(value: u64) -> [u32; 2] {
258+
[value as u32, (value >> 32) as u32]
259+
}
260+
73261
// ---- ADRP (page-relative, 21-bit signed, <<12) -----------------------------
74262

75263
/// Extract the 21-bit signed immediate from an ADRP/ADR instruction.
@@ -369,4 +557,110 @@ mod tests {
369557
assert_eq!(decoded, val, "round-trip failed for {val}");
370558
}
371559
}
560+
561+
// -- expand: Branch26 ---------------------------------------------------
562+
563+
#[test]
564+
fn expand_b_far_away() {
565+
// B #0x100 at PC=0x1000, trampoline 1GB away (relocation overflows).
566+
let insn = 0x1400_0000 | 64; // B #256
567+
let result = relocate_or_expand(insn, &Reloc::Branch26, 0x1000, 0x4000_0000);
568+
let RelocResult::Expanded(words) = result.unwrap() else { panic!("expected Expanded") };
569+
assert_eq!(words.len(), 4); // LDR X16 + BR X16 + 8-byte addr
570+
assert_eq!(words[0], 0x5800_0050); // LDR X16, #8
571+
assert_eq!(words[1], 0xD61F_0200); // BR X16
572+
// Embedded absolute target: 0x1000 + 64*4 = 0x1100
573+
let target = words[2] as u64 | ((words[3] as u64) << 32);
574+
assert_eq!(target, 0x1100);
575+
}
576+
577+
#[test]
578+
fn expand_bl_far_away() {
579+
// BL #0x100 at PC=0x2000, trampoline 1GB away.
580+
let insn = 0x9400_0000 | 64; // BL #256
581+
let result = relocate_or_expand(insn, &Reloc::Branch26, 0x2000, 0x4000_0000);
582+
let RelocResult::Expanded(words) = result.unwrap() else { panic!("expected Expanded") };
583+
assert_eq!(words.len(), 5); // ADR X30 + LDR X16 + BR X16 + 8-byte addr
584+
assert_eq!(words[1], 0x5800_0070); // LDR X16, #12
585+
assert_eq!(words[2], 0xD61F_0200); // BR X16
586+
// ADR X30, #20: Rd=30
587+
assert_eq!(words[0] & 0x1F, 30);
588+
// Absolute target = 0x2000 + 64*4 = 0x2100
589+
let target = words[3] as u64 | ((words[4] as u64) << 32);
590+
assert_eq!(target, 0x2100);
591+
}
592+
593+
// -- expand: ADR/ADRP --------------------------------------------------
594+
595+
#[test]
596+
fn expand_adr_far_away() {
597+
// ADR X5, #100 at PC=0x1000, trampoline 2MB away (>1MB, overflows).
598+
let insn = encode_imm21(0x1000_0000 | 5, 100);
599+
let result = relocate_or_expand(insn, &Reloc::Adr, 0x1000, 0x20_0000);
600+
let RelocResult::Expanded(words) = result.unwrap() else { panic!("expected Expanded") };
601+
assert_eq!(words.len(), 4); // MOVZ + 3x MOVK
602+
// Target = 0x1000 + 100 = 0x1064
603+
// MOVZ X5, #0x1064 → check Rd = 5
604+
assert_eq!(words[0] & 0x1F, 5);
605+
// Reconstruct value from MOVZ/MOVK
606+
let hw0 = (words[0] >> 5) & 0xFFFF;
607+
assert_eq!(hw0, 0x1064);
608+
}
609+
610+
#[test]
611+
fn expand_adrp_far_away() {
612+
// ADRP X8, #1 (target page = 0x1000 + 1*4096 = 0x2000) at PC=0x1000.
613+
// Trampoline 8GB away (overflows ±4GB).
614+
let insn = encode_imm21(0x9000_0000 | 8, 1);
615+
let result = relocate_or_expand(insn, &Reloc::Adrp, 0x1000, 0x2_0000_0000);
616+
let RelocResult::Expanded(words) = result.unwrap() else { panic!("expected Expanded") };
617+
assert_eq!(words.len(), 4);
618+
assert_eq!(words[0] & 0x1F, 8); // Rd = X8
619+
// Reconstruct absolute page from MOVZ/MOVK
620+
let hw0 = ((words[0] >> 5) & 0xFFFF) as u64;
621+
let hw1 = ((words[1] >> 5) & 0xFFFF) as u64;
622+
let value = hw0 | (hw1 << 16);
623+
assert_eq!(value, 0x2000); // page(0x1000) + 1*4096
624+
}
625+
626+
// -- expand: conditional branches (Imm19, Imm14) ------------------------
627+
628+
#[test]
629+
fn expand_cbz_far_away() {
630+
// CBZ X0, #0x40 at PC=0x1000, trampoline 2MB away.
631+
let insn = 0xB400_0000 | (16 << 5); // CBZ X0, #64 (imm19=16)
632+
let result = relocate_or_expand(insn, &Reloc::Imm19, 0x1000, 0x20_0000);
633+
let RelocResult::Expanded(words) = result.unwrap() else { panic!("expected Expanded") };
634+
assert_eq!(words.len(), 5);
635+
// First word should be CBNZ (inverted CBZ), targeting skip
636+
assert_eq!(words[0] & 0x7F00_0000, 0x3500_0000, "should be CBNZ");
637+
assert_eq!(words[1], 0x5800_0050); // LDR X16, #8
638+
// Target = 0x1000 + 16*4 = 0x1040
639+
let target = words[3] as u64 | ((words[4] as u64) << 32);
640+
assert_eq!(target, 0x1040);
641+
}
642+
643+
#[test]
644+
fn expand_tbz_far_away() {
645+
// TBZ X0, #0, #0x20 at PC=0x1000, trampoline 1MB away.
646+
let insn = 0x3600_0000 | (8 << 5); // TBZ X0, #0, #32
647+
let result = relocate_or_expand(insn, &Reloc::Imm14, 0x1000, 0x10_0000);
648+
let RelocResult::Expanded(words) = result.unwrap() else { panic!("expected Expanded") };
649+
assert_eq!(words.len(), 5);
650+
// First word should be TBNZ (inverted TBZ)
651+
assert_eq!(words[0] & 0x7F00_0000, 0x3700_0000, "should be TBNZ");
652+
// Target = 0x1000 + 8*4 = 0x1020
653+
let target = words[3] as u64 | ((words[4] as u64) << 32);
654+
assert_eq!(target, 0x1020);
655+
}
656+
657+
// -- relocate_or_expand: near = Single ----------------------------------
658+
659+
#[test]
660+
fn relocate_or_expand_near_stays_single() {
661+
// B #0x100 at PC=0x1000, trampoline close by → should use Single.
662+
let insn = 0x1400_0000 | 64;
663+
let result = relocate_or_expand(insn, &Reloc::Branch26, 0x1000, 0x2000);
664+
assert!(matches!(result.unwrap(), RelocResult::Single(_)));
665+
}
372666
}

0 commit comments

Comments
 (0)