From 8d226636ccabb59cb8f236305731b5364700ff87 Mon Sep 17 00:00:00 2001 From: Ivan Boldyrev Date: Mon, 29 Dec 2025 13:37:31 +0100 Subject: [PATCH 1/2] runtime: should be renamed to `translator` or `dynasm` --- .DS_Store | Bin 0 -> 6148 bytes harm-runtime/src/labels.rs | 84 +++++++++++++++++++++++++++++++++++- harm-runtime/src/lib.rs | 1 + harm-runtime/src/runtime.rs | 82 +++++++++++++++++++++++++++++++++++ 4 files changed, 166 insertions(+), 1 deletion(-) create mode 100644 .DS_Store create mode 100644 harm-runtime/src/runtime.rs diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..693900ac1a55f1952f09dd1eb7e71c7c1733a0c5 GIT binary patch literal 6148 zcmeHKy-EW?5S~qZb81%IEVa!_;zmWlcc6r*M4t2WC^z%C#CYB)d{aQDyLF265-dw+w@3zy{ z#uwIJD_M2s;uT@ELLeT{xJin*(}byLFt77VL%udF~Hvk7md-g7#q}|4s`ko04zYQ1>+(QfUZT) zVr&o=h%%u-6RPYLLz!^sUF+vrj18J_Qugwp?9R&GP?X*s>$}>WlxI+SVL%wjGLSdZ zBG><&$@l+kkj#VuVc<_OpbCD~uOKB`TgQ^)TI-_iqp`4GY;Y8UfiA`H, + labels: HashMap, + next_id: usize, +} + +impl LabelRegistry { + #[inline] + pub fn new() -> Self { + Self::default() + } + + #[inline] + pub fn forward_named_label(&mut self, name: &str) -> LabelId { + if let Some(id) = self.named_labels.get(name) { + *id + } else { + let id = self.next_label(); + self.named_labels.insert(name.to_string(), id); + self.labels.insert(id, LabelInfo::Forward); + id + } + } + + #[inline] + pub fn forward_label(&mut self) -> LabelId { + let id = self.next_label(); + self.labels.insert(id, LabelInfo::Forward); + id + } + + pub fn define_label(&mut self, label_id: LabelId, offset: Offset) { + if let Some(info) = self.labels.get_mut(&label_id) { + match info { + LabelInfo::Forward => { + *info = LabelInfo::Offset(offset); + } + LabelInfo::Offset(_) => { + todo!("Label {label_id:?} is already defined"); + } + } + } else { + panic!("Label {label_id:?} is not registered"); + } + } + + #[inline] + pub fn define_named_label(&mut self, name: &str, offset: Offset) -> LabelId { + if let Some(id) = self.named_labels.get(name).copied() { + self.labels.insert(id, LabelInfo::Offset(offset)); + id + } else { + let id = self.next_label(); + self.named_labels.insert(name.to_string(), id); + self.labels.insert(id, LabelInfo::Offset(offset)); + id + } + } + + pub fn name_label(&mut self, id: LabelId, name: &str) { + if self.labels.contains_key(&id) { + self.named_labels.insert(name.to_string(), id); + } else { + panic!("Label {id:?} is not registered"); + } + } + + #[inline] + pub fn label_info(&self, id: LabelId) -> Option<&LabelInfo> { + self.labels.get(&id) + } + + fn next_label(&mut self) -> LabelId { + let id = LabelId(self.next_id); + self.next_id += 1; + id + } +} diff --git a/harm-runtime/src/lib.rs b/harm-runtime/src/lib.rs index cdcc60b..cf7cc34 100644 --- a/harm-runtime/src/lib.rs +++ b/harm-runtime/src/lib.rs @@ -4,3 +4,4 @@ */ pub mod labels; +pub mod runtime; diff --git a/harm-runtime/src/runtime.rs b/harm-runtime/src/runtime.rs new file mode 100644 index 0000000..ddad018 --- /dev/null +++ b/harm-runtime/src/runtime.rs @@ -0,0 +1,82 @@ +/* Copyright (C) 2025 Ivan Boldyrev + * + * This document is licensed under the BSD 3-clause license. + */ + +use std::collections::HashMap; + +use crate::labels::LabelRegistry; +use harm::InstructionCode; +use harm::instructions::InstructionSeq; +use harm::reloc::{LabelId, Offset, Rel64}; + +// N.N. we keep here internal relocation type, and convert it to external on serialization. +#[derive(Default)] +pub struct Assembler { + label_manager: LabelRegistry, + memory: Vec, + relocations: HashMap, +} + +impl Assembler { + #[inline] + pub fn new() -> Self { + <_>::default() + } + + pub fn build(self) { + todo!() + } + + #[inline] + pub fn with_capacity(cap: usize) -> Self { + Self { + label_manager: LabelRegistry::new(), + memory: Vec::with_capacity(cap), + relocations: HashMap::new(), + } + } + + pub fn insert(&mut self, s: InstSeq) { + // TODO align by instruction alignment? + for (inst, rel) in s.encode() { + let pos = self.memory.len(); + if let Some(rel) = rel { + self.relocations.insert(pos, rel); + } + self.memory.push(inst); + } + } + + // TODO the label have to be aligned. Except for data labels?.. + // For an instruction, it is alwasy 4 bytes, but for data it can be different, from 1 to N bytes. + pub fn current_label(&mut self) -> LabelId { + let pos = self.memory.len(); + + // TODO can be fused + let label_id = self.label_manager.forward_label(); + self.label_manager.define_label(label_id, pos as Offset); + + label_id + } + + pub fn current_named_label(&mut self, name: &str) -> LabelId { + let id = self.new_forward_named_label(name); + self.assign_forward_label(id); + id + } + + pub fn new_forward_label(&mut self) -> LabelId { + self.label_manager.forward_label() + } + + pub fn new_forward_named_label(&mut self, name: &str) -> LabelId { + self.label_manager.forward_named_label(name) + } + + pub fn assign_forward_label(&mut self, label_id: LabelId) { + let pos = self.memory.len(); + + self.label_manager.define_label(label_id, pos as Offset); + } +} From c0daa2ca1eb44915af265227af9ef585f2554160 Mon Sep 17 00:00:00 2001 From: Ivan Boldyrev Date: Sun, 18 Jan 2026 19:12:08 +0100 Subject: [PATCH 2/2] runtime: memory abstraction --- harm-runtime/Cargo.toml | 7 +- harm-runtime/src/lib.rs | 1 + harm-runtime/src/memory.rs | 72 ++++++++++ harm-runtime/src/memory/memmap2.rs | 220 +++++++++++++++++++++++++++++ harm-runtime/src/runtime.rs | 5 +- 5 files changed, 301 insertions(+), 4 deletions(-) create mode 100644 harm-runtime/src/memory.rs create mode 100644 harm-runtime/src/memory/memmap2.rs diff --git a/harm-runtime/Cargo.toml b/harm-runtime/Cargo.toml index aa4a5a1..027e327 100644 --- a/harm-runtime/Cargo.toml +++ b/harm-runtime/Cargo.toml @@ -12,4 +12,9 @@ publish = false [dependencies] harm = { workspace = true } -memmap2 = "0.9.9" +memmap2 = { version = "0.9.9", optional = true } +thiserror = "2.0.18" + +[features] +default = ["memmap2"] +memmap2 = ["dep:memmap2"] diff --git a/harm-runtime/src/lib.rs b/harm-runtime/src/lib.rs index cf7cc34..dfb11f2 100644 --- a/harm-runtime/src/lib.rs +++ b/harm-runtime/src/lib.rs @@ -4,4 +4,5 @@ */ pub mod labels; +pub mod memory; pub mod runtime; diff --git a/harm-runtime/src/memory.rs b/harm-runtime/src/memory.rs new file mode 100644 index 0000000..7c014b4 --- /dev/null +++ b/harm-runtime/src/memory.rs @@ -0,0 +1,72 @@ +/* Copyright (C) 2026 Ivan Boldyrev + * + * This document is licensed under the BSD 3-clause license. + */ + +#[cfg(feature = "memmap2")] +mod memmap2; + +#[cfg(feature = "memmap2")] +pub use self::memmap2::{Mmap2Buffer, Mmap2FixedMemory}; + +pub trait Memory { + type ExtendError; + type FixedMemoryError; + + /// Current writing position. + fn pos(&self) -> usize; + + /// If memory has fixed capacity, return it. + /// + /// A `Vec` is not considered a memory of fixed capacity because it can grow indefinitely. + fn capacity(&self) -> Option; + + /// Append data to the memory. Should fail when it reaches memory's capacity. + fn try_extend>(&mut self, bytes: I) -> Result<(), Self::ExtendError>; + + /// Transform into fixed-location memory. + fn into_fixed_memory(self) -> Result; + + /// Align position. + fn align(&mut self, alignment: usize) -> Result<(), Self::ExtendError> { + let pos = self.pos(); + let remn = pos % alignment; + if remn != 0 { + self.try_extend(core::iter::repeat(0).take(alignment - remn))?; + } + Ok(()) + } +} + +/// Memory with fixed location that can be transformed to an executable one after relocations are applied. +pub trait FixedMemory: AsMut<[u8]> { + type ExecutableMemory; + type ExecutableMemoryError; + + fn into_executable_memory(self) -> Result; +} + +#[cfg(test)] +mod tests { + #[cfg(feature = "memmap2")] + #[test] + fn test_align() { + use super::*; + + let mut data = Vec::::new(); + + Memory::align(&mut data, 8); + assert!(data.is_empty()); + + data.push(1); + Memory::align(&mut data, 8); + assert_eq!(data.len(), 8); + + Memory::align(&mut data, 8); + assert_eq!(data.len(), 8); + + data.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7]); + Memory::align(&mut data, 8); + assert_eq!(data.len(), 16); + } +} diff --git a/harm-runtime/src/memory/memmap2.rs b/harm-runtime/src/memory/memmap2.rs new file mode 100644 index 0000000..541442f --- /dev/null +++ b/harm-runtime/src/memory/memmap2.rs @@ -0,0 +1,220 @@ +/* Copyright (C) 2026 Ivan Boldyrev + * + * This document is licensed under the BSD 3-clause license. + */ + +use std::convert::Infallible; + +use super::{FixedMemory, Memory}; + +#[derive(thiserror::Error, Debug)] +pub enum Map2BufferError { + #[error("buffer overflow: {0}")] + Overflow(usize), +} + +pub struct Mmap2Buffer { + pos: usize, + memory: memmap2::MmapMut, +} + +impl Mmap2Buffer { + #[inline] + pub fn new(mmap_mut: memmap2::MmapMut) -> Self { + Self { + pos: 0, + memory: mmap_mut, + } + } + + #[inline] + pub fn allocate(length: usize) -> std::io::Result { + let mmap_mut = memmap2::MmapMut::map_anon(length)?; + Ok(Self::new(mmap_mut)) + } +} + +impl Memory for Mmap2Buffer { + type ExtendError = Map2BufferError; + + type FixedMemoryError = Infallible; + + #[inline] + fn pos(&self) -> usize { + self.pos + } + + #[inline] + fn capacity(&self) -> Option { + Some(self.memory.len()) + } + + #[inline] + fn try_extend>(&mut self, bytes: I) -> Result<(), Self::ExtendError> { + for byte in bytes { + if self.pos >= self.memory.len() { + return Err(Map2BufferError::Overflow(self.pos)); + } + + self.memory[self.pos] = byte; + self.pos += 1; + } + Ok(()) + } + + #[inline] + fn into_fixed_memory(self) -> Result { + Ok(Mmap2FixedMemory::new(self.memory)) + } +} + +pub struct Mmap2FixedMemory(memmap2::MmapMut); + +impl Mmap2FixedMemory { + #[inline] + pub fn new(mmap_mut: memmap2::MmapMut) -> Self { + Self(mmap_mut) + } + + #[inline] + pub fn allocate(length: usize) -> std::io::Result { + let mmap_mut = memmap2::MmapMut::map_anon(length)?; + Ok(Self(mmap_mut)) + } +} + +impl AsRef<[u8]> for Mmap2FixedMemory { + #[inline] + fn as_ref(&self) -> &[u8] { + &self.0 + } +} + +impl AsMut<[u8]> for Mmap2FixedMemory { + #[inline] + fn as_mut(&mut self) -> &mut [u8] { + &mut self.0 + } +} + +impl FixedMemory for Mmap2FixedMemory { + // TODO a wrapper type? + type ExecutableMemory = memmap2::Mmap; + + type ExecutableMemoryError = std::io::Error; + + #[inline] + fn into_executable_memory(self) -> Result { + self.0.make_exec() + } +} + +impl Memory for Vec { + type ExtendError = Infallible; + type FixedMemoryError = std::io::Error; + + #[inline] + fn pos(&self) -> usize { + self.len() + } + + #[inline] + fn capacity(&self) -> Option { + None + } + + #[inline] + fn try_extend>(&mut self, bytes: I) -> Result<(), Self::ExtendError> { + self.extend(bytes); + Ok(()) + } + + #[inline] + fn into_fixed_memory(self) -> Result { + let mut mem = Mmap2FixedMemory::allocate(self.len())?; + mem.as_mut().copy_from_slice(&self); + Ok(mem) + } +} + +impl Memory for &mut Vec { + type ExtendError = Infallible; + type FixedMemoryError = std::io::Error; + + #[inline] + fn pos(&self) -> usize { + self.len() + } + + #[inline] + fn capacity(&self) -> Option { + None + } + + #[inline] + fn try_extend>(&mut self, bytes: I) -> Result<(), Self::ExtendError> { + self.extend(bytes); + Ok(()) + } + + #[inline] + fn into_fixed_memory(self) -> Result { + let mut mem = Mmap2FixedMemory::allocate(self.len())?; + // The memmap2 spec doesn't say that the length can be different... + mem.as_mut().copy_from_slice(self); + Ok(mem) + } +} + +#[cfg(test)] +mod tests { + use harm::instructions::InstructionSeq; + + use super::*; + + #[test] + #[cfg(target_arch = "aarch64")] + fn test_mmap() { + let mut buf = Mmap2Buffer::allocate(1024).expect("mmap failed, system problem"); + buf.try_extend(harm::instructions::control::ret().bytes()) + .unwrap(); + + let mem = buf.into_fixed_memory().unwrap(); + // Doing relocations... + + let exec = mem.into_executable_memory().unwrap(); + + unsafe { + let func: unsafe extern "C" fn() = std::mem::transmute(exec.as_ptr()); + func(); + } + } + + #[test] + fn test_try_extend_1023() { + let mut buf = Mmap2Buffer::allocate(1024).expect("mmap failed, system problem"); + buf.try_extend(vec![1; 1023].into_iter()).unwrap(); + } + + #[test] + fn test_try_extend_1024() { + let mut buf = Mmap2Buffer::allocate(1024).expect("mmap failed, system problem"); + buf.try_extend(vec![1; 1024].into_iter()).unwrap(); + } + + #[test] + fn test_try_extend_1025() { + let mut buf = Mmap2Buffer::allocate(1024).expect("mmap failed, system problem"); + assert!(buf.try_extend(vec![1; 1025].into_iter()).is_err()); + } + + #[test] + fn test_try_extend_pair() { + let mut buf = Mmap2Buffer::allocate(1024).expect("mmap failed, system problem"); + buf.try_extend(vec![1; 512].into_iter()).unwrap(); + buf.try_extend(vec![1; 512].into_iter()).unwrap(); + assert_eq!(buf.pos(), 1024); + + assert!(buf.try_extend(vec![1; 1].into_iter()).is_err()); + } +} diff --git a/harm-runtime/src/runtime.rs b/harm-runtime/src/runtime.rs index ddad018..825fc39 100644 --- a/harm-runtime/src/runtime.rs +++ b/harm-runtime/src/runtime.rs @@ -6,7 +6,6 @@ use std::collections::HashMap; use crate::labels::LabelRegistry; -use harm::InstructionCode; use harm::instructions::InstructionSeq; use harm::reloc::{LabelId, Offset, Rel64}; @@ -14,7 +13,7 @@ use harm::reloc::{LabelId, Offset, Rel64}; #[derive(Default)] pub struct Assembler { label_manager: LabelRegistry, - memory: Vec, + memory: Vec, relocations: HashMap, } @@ -41,10 +40,10 @@ impl Assembler { // TODO align by instruction alignment? for (inst, rel) in s.encode() { let pos = self.memory.len(); + self.memory.extend(inst.0); if let Some(rel) = rel { self.relocations.insert(pos, rel); } - self.memory.push(inst); } }