From a048b78b7ba5ccd6e38ce13b230595dbb8c07a5b Mon Sep 17 00:00:00 2001 From: DexalGT Date: Tue, 31 Mar 2026 12:16:02 +0300 Subject: [PATCH 1/8] feat: add ltk_rst crate for RST (Riot String Table) support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements reading and writing of League of Legends .stringtable / fontconfig files following the ltk API style (from_reader/to_writer, thiserror, byteorder, xxhash-rust from workspace deps). Supports all four file versions (v2–v5), including the v2 font-config header, the deprecated mode byte present in v2–v4, and offset-based string deduplication on both read and write paths. Registers the crate in the league-toolkit umbrella behind a `rst` feature flag. --- crates/league-toolkit/Cargo.toml | 4 +- crates/league-toolkit/src/lib.rs | 3 + crates/ltk_rst/Cargo.toml | 17 +++ crates/ltk_rst/src/error.rs | 15 +++ crates/ltk_rst/src/hash.rs | 30 +++++ crates/ltk_rst/src/lib.rs | 53 ++++++++ crates/ltk_rst/src/rst.rs | 217 +++++++++++++++++++++++++++++++ crates/ltk_rst/src/version.rs | 86 ++++++++++++ 8 files changed, 424 insertions(+), 1 deletion(-) create mode 100644 crates/ltk_rst/Cargo.toml create mode 100644 crates/ltk_rst/src/error.rs create mode 100644 crates/ltk_rst/src/hash.rs create mode 100644 crates/ltk_rst/src/lib.rs create mode 100644 crates/ltk_rst/src/rst.rs create mode 100644 crates/ltk_rst/src/version.rs diff --git a/crates/league-toolkit/Cargo.toml b/crates/league-toolkit/Cargo.toml index 30506a03..7691a71a 100644 --- a/crates/league-toolkit/Cargo.toml +++ b/crates/league-toolkit/Cargo.toml @@ -21,7 +21,7 @@ default = [ "hash", ] -serde = ["ltk_wad/serde", "ltk_file/serde", "ltk_meta/serde"] +serde = ["ltk_wad/serde", "ltk_file/serde", "ltk_meta/serde", "ltk_rst/serde"] anim = ["dep:ltk_anim"] file = ["dep:ltk_file"] @@ -31,6 +31,7 @@ primitives = ["dep:ltk_primitives"] texture = ["dep:ltk_texture"] wad = ["dep:ltk_wad"] hash = ["dep:ltk_hash"] +rst = ["dep:ltk_rst"] [dependencies] ltk_anim = { version = "0.3.3", path = "../ltk_anim", optional = true } @@ -41,3 +42,4 @@ ltk_primitives = { version = "0.3.3", path = "../ltk_primitives", optional = tru ltk_texture = { version = "0.5.0", path = "../ltk_texture", optional = true } ltk_wad = { version = "0.2.14", path = "../ltk_wad", optional = true } ltk_hash = { version = "0.2.6", path = "../ltk_hash", optional = true } +ltk_rst = { version = "0.1.0", path = "../ltk_rst", optional = true } diff --git a/crates/league-toolkit/src/lib.rs b/crates/league-toolkit/src/lib.rs index 4f577afc..98c567b1 100644 --- a/crates/league-toolkit/src/lib.rs +++ b/crates/league-toolkit/src/lib.rs @@ -21,3 +21,6 @@ pub use ltk_wad as wad; #[cfg(feature = "hash")] pub use ltk_hash as hash; + +#[cfg(feature = "rst")] +pub use ltk_rst as rst; diff --git a/crates/ltk_rst/Cargo.toml b/crates/ltk_rst/Cargo.toml new file mode 100644 index 00000000..4a70e0a6 --- /dev/null +++ b/crates/ltk_rst/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "ltk_rst" +version = "0.1.0" +edition = "2021" +description = "RST (Riot String Table) reading/writing for League Toolkit" +license = "MIT OR Apache-2.0" +readme = "../../README.md" + +[features] +serde = ["dep:serde"] + +[dependencies] +thiserror = { workspace = true } +byteorder = { workspace = true } +xxhash-rust = { workspace = true } + +serde = { workspace = true, optional = true } diff --git a/crates/ltk_rst/src/error.rs b/crates/ltk_rst/src/error.rs new file mode 100644 index 00000000..666a52bd --- /dev/null +++ b/crates/ltk_rst/src/error.rs @@ -0,0 +1,15 @@ +use std::io; + +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum RstError { + #[error("invalid magic code (expected [0x52, 0x53, 0x54], got {actual:?})")] + InvalidMagic { actual: [u8; 3] }, + + #[error("unsupported RST version: {version:#04x}")] + UnsupportedVersion { version: u8 }, + + #[error("io error")] + IoError(#[from] io::Error), +} diff --git a/crates/ltk_rst/src/hash.rs b/crates/ltk_rst/src/hash.rs new file mode 100644 index 00000000..15641f92 --- /dev/null +++ b/crates/ltk_rst/src/hash.rs @@ -0,0 +1,30 @@ +use xxhash_rust::xxh64::xxh64; + +use crate::version::RstHashType; + +/// Computes the masked XXHash64 of `key` lowercased as UTF-8, suitable for use +/// as an RST entry hash (without the string-offset component). +/// +/// The result is masked to the bit-width defined by `hash_type`: +/// - [`RstHashType::Complex`] → lower 40 bits +/// - [`RstHashType::Simple`] → lower 39 bits +pub fn compute_hash(key: &str, hash_type: RstHashType) -> u64 { + let lowered = key.to_lowercase(); + let raw = xxh64(lowered.as_bytes(), 0); + raw & hash_type.hash_mask() +} + +/// Packs a pre-computed masked `hash` together with a string `offset` into the +/// single `u64` value written into the RST hash table. +#[inline] +pub fn pack_entry(hash: u64, offset: u64, hash_type: RstHashType) -> u64 { + hash | (offset << hash_type.offset_shift()) +} + +/// Unpacks a raw RST hash-table entry into `(hash, offset)`. +#[inline] +pub fn unpack_entry(entry: u64, hash_type: RstHashType) -> (u64, u64) { + let hash = entry & hash_type.hash_mask(); + let offset = entry >> hash_type.offset_shift(); + (hash, offset) +} diff --git a/crates/ltk_rst/src/lib.rs b/crates/ltk_rst/src/lib.rs new file mode 100644 index 00000000..0506ad37 --- /dev/null +++ b/crates/ltk_rst/src/lib.rs @@ -0,0 +1,53 @@ +//! Reading and writing League of Legends RST (Riot String Table) files. +//! +//! RST files are localisation tables that map XXHash64-based keys to UTF-8 +//! strings. They are typically found at `DATA/Menu/*.stringtable` or +//! `DATA/Menu/fontconfig_*.txt` inside WAD archives. +//! +//! # Reading +//! +//! ```no_run +//! use std::fs::File; +//! use ltk_rst::RstFile; +//! +//! let mut file = File::open("fontconfig_en_us.stringtable")?; +//! let rst = RstFile::from_reader(&mut file)?; +//! +//! for (hash, text) in &rst.entries { +//! println!("{hash:#018x} = {text}"); +//! } +//! # Ok::<(), Box>(()) +//! ``` +//! +//! # Writing +//! +//! ```no_run +//! use std::fs::File; +//! use ltk_rst::{RstFile, RstVersion}; +//! +//! let mut rst = RstFile::new(RstVersion::V5); +//! rst.insert_str("game_client_quit", "Quit"); +//! +//! let mut out = File::create("out.stringtable")?; +//! rst.to_writer(&mut out)?; +//! # Ok::<(), Box>(()) +//! ``` +//! +//! # Hashing keys manually +//! +//! ``` +//! use ltk_rst::{RstHashType, compute_hash}; +//! +//! let hash = compute_hash("game_client_quit", RstHashType::Simple); +//! println!("{hash:#018x}"); +//! ``` + +mod error; +mod hash; +mod rst; +mod version; + +pub use error::*; +pub use hash::*; +pub use rst::*; +pub use version::*; diff --git a/crates/ltk_rst/src/rst.rs b/crates/ltk_rst/src/rst.rs new file mode 100644 index 00000000..83bd4d1a --- /dev/null +++ b/crates/ltk_rst/src/rst.rs @@ -0,0 +1,217 @@ +use std::collections::HashMap; +use std::io::{self, Read, Seek, SeekFrom, Write}; + +use byteorder::{ReadBytesExt as _, WriteBytesExt as _, LE}; + +use crate::error::RstError; +use crate::hash::{compute_hash, pack_entry, unpack_entry}; +use crate::version::{RstMode, RstVersion}; + +/// Magic bytes at the start of every RST file: `"RST"`. +pub const MAGIC: [u8; 3] = [0x52, 0x53, 0x54]; + +/// A parsed RST (Riot String Table) file. +/// +/// RST files are League of Legends localisation tables that map XXHash64-based +/// keys to UTF-8 strings. The hash table entries pack both the string hash and +/// the offset of its null-terminated UTF-8 data into a single `u64`. +/// +/// # Reading +/// +/// ```no_run +/// use std::fs::File; +/// use ltk_rst::RstFile; +/// +/// let mut file = File::open("fontconfig_en_us.stringtable")?; +/// let rst = RstFile::from_reader(&mut file)?; +/// +/// if let Some(text) = rst.get(0x1234_5678_9abc_def0) { +/// println!("{text}"); +/// } +/// # Ok::<(), Box>(()) +/// ``` +/// +/// # Writing +/// +/// ```no_run +/// use std::fs::File; +/// use ltk_rst::{RstFile, RstVersion}; +/// +/// let mut rst = RstFile::new(RstVersion::V5); +/// rst.insert_str("game_client_quit", "Quit"); +/// +/// let mut out = File::create("out.stringtable")?; +/// rst.to_writer(&mut out)?; +/// # Ok::<(), Box>(()) +/// ``` +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RstFile { + /// File version. + pub version: RstVersion, + + /// Optional font-config string. Only present (and written) in v2 files. + pub config: Option, + + /// Deprecated mode byte. Present in v2–v4; always [`RstMode::None`] for v5+. + pub mode: RstMode, + + /// Hash → string mapping. + pub entries: HashMap, +} + +impl RstFile { + /// Creates an empty [`RstFile`] for the given version. + pub fn new(version: RstVersion) -> Self { + Self { + version, + config: None, + mode: RstMode::None, + entries: HashMap::new(), + } + } + + /// Returns the string associated with `hash`, if any. + pub fn get(&self, hash: u64) -> Option<&str> { + self.entries.get(&hash).map(|s| s.as_str()) + } + + /// Inserts an entry by pre-computed hash. + /// + /// The hash must already be masked to the bit-width of the file's + /// [`RstHashType`] — use [`compute_hash`] to produce it. + pub fn insert(&mut self, hash: u64, value: impl Into) { + self.entries.insert(hash, value.into()); + } + + /// Hashes `key` for this file's version and inserts the entry. + pub fn insert_str(&mut self, key: &str, value: impl Into) { + let hash = compute_hash(key, self.version.hash_type()); + self.insert(hash, value); + } + + /// Parses an [`RstFile`] from any [`Read`] + [`Seek`] source. + /// + /// Seeking is required because string data offsets stored in the hash + /// table are relative to the start of the string section, which is only + /// known after the entire hash table has been read. + pub fn from_reader(reader: &mut (impl Read + Seek)) -> Result { + let mut magic = [0u8; 3]; + reader.read_exact(&mut magic)?; + if magic != MAGIC { + return Err(RstError::InvalidMagic { actual: magic }); + } + + let version = RstVersion::try_from_u8(reader.read_u8()?)?; + let hash_type = version.hash_type(); + + let config = if version == RstVersion::V2 { + let has_config = reader.read_u8()? != 0; + if has_config { + let len = reader.read_i32::()? as usize; + let mut buf = vec![0u8; len]; + reader.read_exact(&mut buf)?; + Some(String::from_utf8_lossy(&buf).into_owned()) + } else { + None + } + } else { + None + }; + + let count = reader.read_i32::()? as usize; + let mut pairs: Vec<(u64, u64)> = Vec::with_capacity(count); + for _ in 0..count { + let raw = reader.read_u64::()?; + pairs.push(unpack_entry(raw, hash_type)); + } + + let mode = if version.has_mode_byte() { + RstMode::from_u8(reader.read_u8()?) + } else { + RstMode::None + }; + + let data_start = reader.stream_position()?; + let mut offset_cache: HashMap = HashMap::with_capacity(count); + let mut entries: HashMap = HashMap::with_capacity(count); + + for (hash, offset) in pairs { + let text = if let Some(cached) = offset_cache.get(&offset) { + cached.clone() + } else { + reader.seek(SeekFrom::Start(data_start + offset))?; + let text = read_null_terminated(reader)?; + offset_cache.insert(offset, text.clone()); + text + }; + entries.insert(hash, text); + } + + Ok(Self { version, config, mode, entries }) + } + + /// Serialises this [`RstFile`] to any [`Write`] sink. + pub fn to_writer(&self, writer: &mut impl Write) -> Result<(), RstError> { + let hash_type = self.version.hash_type(); + + let mut header: Vec = Vec::new(); + + header.extend_from_slice(&MAGIC); + header.write_u8(self.version as u8)?; + + if self.version == RstVersion::V2 { + match &self.config { + Some(cfg) if !cfg.is_empty() => { + header.write_u8(1)?; + header.write_i32::(cfg.len() as i32)?; + header.extend_from_slice(cfg.as_bytes()); + } + _ => { + header.write_u8(0)?; + } + } + } + + header.write_i32::(self.entries.len() as i32)?; + + let mut data: Vec = Vec::new(); + let mut text_to_offset: HashMap<&str, u64> = HashMap::with_capacity(self.entries.len()); + + for (hash, text) in &self.entries { + let offset = if let Some(&off) = text_to_offset.get(text.as_str()) { + off + } else { + let off = data.len() as u64; + data.extend_from_slice(text.as_bytes()); + data.push(0x00); + text_to_offset.insert(text.as_str(), off); + off + }; + + let packed = pack_entry(*hash, offset, hash_type); + header.write_u64::(packed)?; + } + + if self.version.has_mode_byte() { + header.write_u8(self.mode as u8)?; + } + + writer.write_all(&header)?; + writer.write_all(&data)?; + + Ok(()) + } +} + +fn read_null_terminated(reader: &mut impl Read) -> Result { + let mut buf: Vec = Vec::new(); + loop { + let b = reader.read_u8()?; + if b == 0x00 { + break; + } + buf.push(b); + } + Ok(String::from_utf8_lossy(&buf).into_owned()) +} diff --git a/crates/ltk_rst/src/version.rs b/crates/ltk_rst/src/version.rs new file mode 100644 index 00000000..0030ba59 --- /dev/null +++ b/crates/ltk_rst/src/version.rs @@ -0,0 +1,86 @@ +use crate::error::RstError; + +/// RST file version. +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum RstVersion { + /// Version 2 — uses complex (40-bit) hashing; supports optional font config. + V2 = 0x02, + /// Version 3 — uses complex (40-bit) hashing. + V3 = 0x03, + /// Version 4 — uses simple (39-bit) hashing. + V4 = 0x04, + /// Version 5 — uses simple (39-bit) hashing; mode byte removed. + V5 = 0x05, +} + +impl RstVersion { + /// Returns the [`RstHashType`] that corresponds to this version. + pub fn hash_type(self) -> RstHashType { + match self { + RstVersion::V2 | RstVersion::V3 => RstHashType::Complex, + RstVersion::V4 | RstVersion::V5 => RstHashType::Simple, + } + } + + /// Returns `true` if this version stores a mode byte in the file. + pub fn has_mode_byte(self) -> bool { + self < RstVersion::V5 + } + + pub(crate) fn try_from_u8(value: u8) -> Result { + match value { + 0x02 => Ok(RstVersion::V2), + 0x03 => Ok(RstVersion::V3), + 0x04 => Ok(RstVersion::V4), + 0x05 => Ok(RstVersion::V5), + _ => Err(RstError::UnsupportedVersion { version: value }), + } + } +} + +/// Determines the hash bit-width used when packing a hash+offset pair into a +/// single `u64` entry in the RST hash table. +/// +/// - [`Complex`](RstHashType::Complex): used by v2/v3 — 40-bit hash, offset in upper 24 bits. +/// - [`Simple`](RstHashType::Simple): used by v4/v5 — 39-bit hash, offset in upper 25 bits. +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum RstHashType { + /// 40-bit hash key (`(1 << 40) - 1`). Used by RST v2 and v3. + Complex = 40, + /// 39-bit hash key (`(1 << 39) - 1`). Used by RST v4 and v5. + Simple = 39, +} + +impl RstHashType { + /// Returns the bitmask for the hash portion of a packed entry. + #[inline] + pub fn hash_mask(self) -> u64 { + (1u64 << (self as u8)) - 1 + } + + /// Returns the bit-shift used when packing or unpacking the string offset. + #[inline] + pub fn offset_shift(self) -> u8 { + self as u8 + } +} + +/// The (deprecated) mode byte stored in RST files with version < 5. +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] +pub enum RstMode { + #[default] + None = 0, + Default = 1, +} + +impl RstMode { + pub(crate) fn from_u8(value: u8) -> Self { + match value { + 1 => RstMode::Default, + _ => RstMode::None, + } + } +} From 6d32ad65f97acfa8c0633d98bf2cdf16f10f95a7 Mon Sep 17 00:00:00 2001 From: DexalGT Date: Tue, 31 Mar 2026 12:18:44 +0300 Subject: [PATCH 2/8] style: fix rustfmt struct literal in RstFile::from_reader --- crates/ltk_rst/src/rst.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crates/ltk_rst/src/rst.rs b/crates/ltk_rst/src/rst.rs index 83bd4d1a..ccb83cda 100644 --- a/crates/ltk_rst/src/rst.rs +++ b/crates/ltk_rst/src/rst.rs @@ -148,7 +148,12 @@ impl RstFile { entries.insert(hash, text); } - Ok(Self { version, config, mode, entries }) + Ok(Self { + version, + config, + mode, + entries, + }) } /// Serialises this [`RstFile`] to any [`Write`] sink. From 660db8a59b7b9b4bc650cfd56022bab7bd970d5e Mon Sep 17 00:00:00 2001 From: DexalGT Date: Tue, 31 Mar 2026 12:29:42 +0300 Subject: [PATCH 3/8] test(ltk_rst): add integration tests using real game files --- crates/ltk_rst/tests/parse_files.rs | 254 ++++++++++++++++++++++++++++ 1 file changed, 254 insertions(+) create mode 100644 crates/ltk_rst/tests/parse_files.rs diff --git a/crates/ltk_rst/tests/parse_files.rs b/crates/ltk_rst/tests/parse_files.rs new file mode 100644 index 00000000..3b51a4e1 --- /dev/null +++ b/crates/ltk_rst/tests/parse_files.rs @@ -0,0 +1,254 @@ +//! Integration tests for RST parsing using real game files. +//! +//! Test files live at `/../test-files/data/menu/`. +//! Tests that reference missing files are skipped rather than failing, so the +//! suite can run in CI environments that do not include game assets. + +use std::fs::File; +use std::io::{BufReader, Cursor}; +use std::path::Path; + +use ltk_rst::{RstError, RstFile, RstVersion, compute_hash, RstHashType}; + +const TEST_FILES_ROOT: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../../../test-files/data/menu"); + +fn open(relative: &str) -> Option> { + let path = Path::new(TEST_FILES_ROOT).join(relative); + if !path.exists() { + println!("skipping missing file: {}", path.display()); + return None; + } + Some(BufReader::new( + File::open(&path).unwrap_or_else(|e| panic!("failed to open {}: {e}", path.display())), + )) +} + +// --------------------------------------------------------------------------- +// Parse tests +// --------------------------------------------------------------------------- + +/// Parses every locale's bootstrap.stringtable to ensure the reader handles +/// all regional encodings (CJK, Arabic, Cyrillic, …) without error. +#[test] +fn parse_all_bootstrap_locales() { + let locales = [ + "ar_ae", "cs_cz", "de_de", "el_gr", "en_au", "en_gb", "en_ph", "en_sg", "en_us", + "es_ar", "es_es", "es_mx", "fr_fr", "hu_hu", "id_id", "it_it", "ja_jp", "ko_kr", + "pl_pl", "pt_br", "ro_ro", "ru_ru", "th_th", "tr_tr", "vi_vn", "zh_cn", "zh_my", + "zh_tw", + ]; + + for locale in locales { + let Some(mut reader) = open(&format!("{locale}/bootstrap.stringtable")) else { + continue; + }; + + let rst = RstFile::from_reader(&mut reader) + .unwrap_or_else(|e| panic!("failed to parse {locale}/bootstrap.stringtable: {e}")); + + assert_eq!( + rst.version, + RstVersion::V5, + "{locale}: expected version 5" + ); + assert!( + !rst.entries.is_empty(), + "{locale}: expected at least one entry" + ); + + println!("{locale}/bootstrap.stringtable: {} entries", rst.entries.len()); + } +} + +/// Parses the large LoL and TFT string tables and checks their entry counts. +#[test] +fn parse_lol_and_tft_stringtables() { + for (name, expected_count) in [("lol", 115310usize), ("tft", 94652usize)] { + let Some(mut reader) = open(&format!("en_us/{name}.stringtable")) else { + continue; + }; + + let rst = RstFile::from_reader(&mut reader) + .unwrap_or_else(|e| panic!("failed to parse en_us/{name}.stringtable: {e}")); + + assert_eq!(rst.version, RstVersion::V5); + assert_eq!( + rst.entries.len(), + expected_count, + "{name}.stringtable entry count mismatch" + ); + + println!("en_us/{name}.stringtable: {} entries", rst.entries.len()); + } +} + +/// Verifies known hash→string mappings in en_us/bootstrap.stringtable. +#[test] +fn parse_bootstrap_known_entries() { + let Some(mut reader) = open("en_us/bootstrap.stringtable") else { + return; + }; + + let rst = + RstFile::from_reader(&mut reader).expect("failed to parse en_us/bootstrap.stringtable"); + + assert_eq!(rst.entries.len(), 201); + + // Known stable entries confirmed from the file. + assert_eq!(rst.get(0x000000008818cc3c), Some("Ignore")); + assert_eq!(rst.get(0x0000004732dbee5e), Some("Cancel")); +} + +// --------------------------------------------------------------------------- +// Round-trip tests +// --------------------------------------------------------------------------- + +/// Parses en_us/bootstrap.stringtable, serialises it back to bytes, parses +/// those bytes again, and asserts the two parsed representations are equal. +#[test] +fn round_trip_bootstrap() { + let Some(mut reader) = open("en_us/bootstrap.stringtable") else { + return; + }; + + let original = + RstFile::from_reader(&mut reader).expect("failed to parse en_us/bootstrap.stringtable"); + + let mut buf = Vec::new(); + original + .to_writer(&mut buf) + .expect("failed to serialise bootstrap.stringtable"); + + let mut cursor = Cursor::new(&buf); + let reloaded = + RstFile::from_reader(&mut cursor).expect("failed to re-parse serialised bootstrap"); + + assert_eq!( + original.version, reloaded.version, + "version mismatch after round-trip" + ); + assert_eq!( + original.entries.len(), + reloaded.entries.len(), + "entry count mismatch after round-trip" + ); + for (hash, text) in &original.entries { + assert_eq!( + reloaded.get(*hash), + Some(text.as_str()), + "entry {hash:#018x} missing or changed after round-trip" + ); + } +} + +// --------------------------------------------------------------------------- +// Hash tests +// --------------------------------------------------------------------------- + +/// compute_hash lowercases before hashing, so both cases must produce the same +/// result. +#[test] +fn compute_hash_is_case_insensitive() { + let lower = compute_hash("game_client_quit", RstHashType::Simple); + let upper = compute_hash("GAME_CLIENT_QUIT", RstHashType::Simple); + let mixed = compute_hash("Game_Client_Quit", RstHashType::Simple); + + assert_eq!(lower, upper); + assert_eq!(lower, mixed); +} + +/// The Simple (v4/v5) mask is 39 bits; the Complex (v2/v3) mask is 40 bits. +/// A hash computed for Simple must fit within 39 bits. +#[test] +fn compute_hash_respects_bit_width() { + let simple_mask = (1u64 << 39) - 1; + let complex_mask = (1u64 << 40) - 1; + + let simple_hash = compute_hash("some_key", RstHashType::Simple); + let complex_hash = compute_hash("some_key", RstHashType::Complex); + + assert_eq!(simple_hash & simple_mask, simple_hash); + assert_eq!(complex_hash & complex_mask, complex_hash); + // Simple hash must be fully contained in the smaller mask too... unless the + // 40th bit happens to be set. The property we actually guarantee is that + // each result fits within its own mask. + assert_eq!(simple_hash & simple_mask, simple_hash); + assert_eq!(complex_hash & complex_mask, complex_hash); +} + +// --------------------------------------------------------------------------- +// Error tests +// --------------------------------------------------------------------------- + +#[test] +fn invalid_magic_returns_error() { + let bad = b"\x00\x00\x00\x05"; + let mut cursor = Cursor::new(bad); + let err = RstFile::from_reader(&mut cursor).unwrap_err(); + assert!( + matches!(err, RstError::InvalidMagic { .. }), + "expected InvalidMagic, got {err:?}" + ); +} + +#[test] +fn unsupported_version_returns_error() { + // Valid magic, then version byte 0x01 which is not a known version. + let bad = b"RST\x01"; + let mut cursor = Cursor::new(bad); + let err = RstFile::from_reader(&mut cursor).unwrap_err(); + assert!( + matches!(err, RstError::UnsupportedVersion { version: 0x01 }), + "expected UnsupportedVersion(0x01), got {err:?}" + ); +} + +// --------------------------------------------------------------------------- +// Builder / insertion tests +// --------------------------------------------------------------------------- + +/// Verifies that insert_str hashes the key and stores the value, and that the +/// resulting file can be written and re-read with no data loss. +#[test] +fn insert_str_round_trips() { + let mut rst = RstFile::new(RstVersion::V5); + rst.insert_str("game_client_quit", "Quit"); + rst.insert_str("game_client_play", "Play"); + + let mut buf = Vec::new(); + rst.to_writer(&mut buf).expect("serialise failed"); + + let mut cursor = Cursor::new(&buf); + let loaded = RstFile::from_reader(&mut cursor).expect("re-parse failed"); + + let quit_hash = compute_hash("game_client_quit", RstHashType::Simple); + let play_hash = compute_hash("game_client_play", RstHashType::Simple); + + assert_eq!(loaded.get(quit_hash), Some("Quit")); + assert_eq!(loaded.get(play_hash), Some("Play")); +} + +/// Entries with identical string values must share a single copy in the +/// serialised byte stream. We verify this by checking the output is smaller +/// than it would be if every entry stored its own copy. +#[test] +fn to_writer_deduplicates_strings() { + let mut rst = RstFile::new(RstVersion::V5); + let shared_value = "Shared string value"; + + // Insert 10 different keys all pointing to the same value. + for i in 0u64..10 { + rst.insert(i, shared_value); + } + + let mut buf = Vec::new(); + rst.to_writer(&mut buf).expect("serialise failed"); + + // If deduplication works the string appears only once in the data section. + let occurrences = buf + .windows(shared_value.len()) + .filter(|w| *w == shared_value.as_bytes()) + .count(); + + assert_eq!(occurrences, 1, "string should appear exactly once in output"); +} From a73c14057525acc8ac6a849d7ff3dbcaf56d7fc5 Mon Sep 17 00:00:00 2001 From: DexalGT Date: Tue, 31 Mar 2026 12:34:01 +0300 Subject: [PATCH 4/8] style: fix rustfmt in parse_files tests --- crates/ltk_rst/tests/parse_files.rs | 44 ++++++++++++----------------- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/crates/ltk_rst/tests/parse_files.rs b/crates/ltk_rst/tests/parse_files.rs index 3b51a4e1..06fcc051 100644 --- a/crates/ltk_rst/tests/parse_files.rs +++ b/crates/ltk_rst/tests/parse_files.rs @@ -8,7 +8,7 @@ use std::fs::File; use std::io::{BufReader, Cursor}; use std::path::Path; -use ltk_rst::{RstError, RstFile, RstVersion, compute_hash, RstHashType}; +use ltk_rst::{compute_hash, RstError, RstFile, RstHashType, RstVersion}; const TEST_FILES_ROOT: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../../../test-files/data/menu"); @@ -18,9 +18,9 @@ fn open(relative: &str) -> Option> { println!("skipping missing file: {}", path.display()); return None; } - Some(BufReader::new( - File::open(&path).unwrap_or_else(|e| panic!("failed to open {}: {e}", path.display())), - )) + Some(BufReader::new(File::open(&path).unwrap_or_else(|e| { + panic!("failed to open {}: {e}", path.display()) + }))) } // --------------------------------------------------------------------------- @@ -32,10 +32,9 @@ fn open(relative: &str) -> Option> { #[test] fn parse_all_bootstrap_locales() { let locales = [ - "ar_ae", "cs_cz", "de_de", "el_gr", "en_au", "en_gb", "en_ph", "en_sg", "en_us", - "es_ar", "es_es", "es_mx", "fr_fr", "hu_hu", "id_id", "it_it", "ja_jp", "ko_kr", - "pl_pl", "pt_br", "ro_ro", "ru_ru", "th_th", "tr_tr", "vi_vn", "zh_cn", "zh_my", - "zh_tw", + "ar_ae", "cs_cz", "de_de", "el_gr", "en_au", "en_gb", "en_ph", "en_sg", "en_us", "es_ar", + "es_es", "es_mx", "fr_fr", "hu_hu", "id_id", "it_it", "ja_jp", "ko_kr", "pl_pl", "pt_br", + "ro_ro", "ru_ru", "th_th", "tr_tr", "vi_vn", "zh_cn", "zh_my", "zh_tw", ]; for locale in locales { @@ -46,17 +45,16 @@ fn parse_all_bootstrap_locales() { let rst = RstFile::from_reader(&mut reader) .unwrap_or_else(|e| panic!("failed to parse {locale}/bootstrap.stringtable: {e}")); - assert_eq!( - rst.version, - RstVersion::V5, - "{locale}: expected version 5" - ); + assert_eq!(rst.version, RstVersion::V5, "{locale}: expected version 5"); assert!( !rst.entries.is_empty(), "{locale}: expected at least one entry" ); - println!("{locale}/bootstrap.stringtable: {} entries", rst.entries.len()); + println!( + "{locale}/bootstrap.stringtable: {} entries", + rst.entries.len() + ); } } @@ -158,7 +156,7 @@ fn compute_hash_is_case_insensitive() { } /// The Simple (v4/v5) mask is 39 bits; the Complex (v2/v3) mask is 40 bits. -/// A hash computed for Simple must fit within 39 bits. +/// Each hash must fit within its own mask. #[test] fn compute_hash_respects_bit_width() { let simple_mask = (1u64 << 39) - 1; @@ -169,11 +167,6 @@ fn compute_hash_respects_bit_width() { assert_eq!(simple_hash & simple_mask, simple_hash); assert_eq!(complex_hash & complex_mask, complex_hash); - // Simple hash must be fully contained in the smaller mask too... unless the - // 40th bit happens to be set. The property we actually guarantee is that - // each result fits within its own mask. - assert_eq!(simple_hash & simple_mask, simple_hash); - assert_eq!(complex_hash & complex_mask, complex_hash); } // --------------------------------------------------------------------------- @@ -193,7 +186,6 @@ fn invalid_magic_returns_error() { #[test] fn unsupported_version_returns_error() { - // Valid magic, then version byte 0x01 which is not a known version. let bad = b"RST\x01"; let mut cursor = Cursor::new(bad); let err = RstFile::from_reader(&mut cursor).unwrap_err(); @@ -229,14 +221,12 @@ fn insert_str_round_trips() { } /// Entries with identical string values must share a single copy in the -/// serialised byte stream. We verify this by checking the output is smaller -/// than it would be if every entry stored its own copy. +/// serialised byte stream. #[test] fn to_writer_deduplicates_strings() { let mut rst = RstFile::new(RstVersion::V5); let shared_value = "Shared string value"; - // Insert 10 different keys all pointing to the same value. for i in 0u64..10 { rst.insert(i, shared_value); } @@ -244,11 +234,13 @@ fn to_writer_deduplicates_strings() { let mut buf = Vec::new(); rst.to_writer(&mut buf).expect("serialise failed"); - // If deduplication works the string appears only once in the data section. let occurrences = buf .windows(shared_value.len()) .filter(|w| *w == shared_value.as_bytes()) .count(); - assert_eq!(occurrences, 1, "string should appear exactly once in output"); + assert_eq!( + occurrences, 1, + "string should appear exactly once in output" + ); } From 3f874935e5c9f66b2612fb999be7612e0687dd42 Mon Sep 17 00:00:00 2001 From: DexalGT Date: Tue, 7 Apr 2026 00:48:29 +0300 Subject: [PATCH 5/8] feat(ltk_hash): add xxhash64 re-export module --- crates/ltk_hash/Cargo.toml | 1 + crates/ltk_hash/src/lib.rs | 1 + crates/ltk_hash/src/xxhash.rs | 10 ++++++++++ 3 files changed, 12 insertions(+) create mode 100644 crates/ltk_hash/src/xxhash.rs diff --git a/crates/ltk_hash/Cargo.toml b/crates/ltk_hash/Cargo.toml index 70a9e0be..0125cd80 100644 --- a/crates/ltk_hash/Cargo.toml +++ b/crates/ltk_hash/Cargo.toml @@ -7,3 +7,4 @@ license = "MIT OR Apache-2.0" readme = "../../README.md" [dependencies] +xxhash-rust = { workspace = true } diff --git a/crates/ltk_hash/src/lib.rs b/crates/ltk_hash/src/lib.rs index bbb2f0e2..4b330549 100644 --- a/crates/ltk_hash/src/lib.rs +++ b/crates/ltk_hash/src/lib.rs @@ -1,3 +1,4 @@ //! Other utilities (hashing, etc) pub mod elf; pub mod fnv1a; +pub mod xxhash; diff --git a/crates/ltk_hash/src/xxhash.rs b/crates/ltk_hash/src/xxhash.rs new file mode 100644 index 00000000..b0b1e36c --- /dev/null +++ b/crates/ltk_hash/src/xxhash.rs @@ -0,0 +1,10 @@ +use xxhash_rust::xxh64::xxh64; + +/// Computes the XXHash64 of `input` bytes with the given `seed`. +/// +/// This is a thin wrapper around [`xxhash_rust::xxh64::xxh64`] that is +/// re-exported so downstream crates can depend on a single hashing crate. +#[inline] +pub fn xxhash64(input: &[u8], seed: u64) -> u64 { + xxh64(input, seed) +} From dd69d763f026ede8ea9570e72d26dfa1b868180a Mon Sep 17 00:00:00 2001 From: DexalGT Date: Tue, 7 Apr 2026 00:50:11 +0300 Subject: [PATCH 6/8] refactor(ltk_rst): address PR review feedback - Use ltk_hash instead of direct xxhash-rust dependency - Use byte string literal for MAGIC constant - Encode config/mode in RstVersion enum variants - Write directly to writer instead of intermediate buffer - Use ltk_io_ext::read_str_until_nul helper --- crates/ltk_rst/Cargo.toml | 3 +- crates/ltk_rst/src/hash.rs | 4 +- crates/ltk_rst/src/rst.rs | 106 +++++++++++++++------------------- crates/ltk_rst/src/version.rs | 86 ++++++++++++++++++++++----- 4 files changed, 122 insertions(+), 77 deletions(-) diff --git a/crates/ltk_rst/Cargo.toml b/crates/ltk_rst/Cargo.toml index 4a70e0a6..8c412490 100644 --- a/crates/ltk_rst/Cargo.toml +++ b/crates/ltk_rst/Cargo.toml @@ -12,6 +12,7 @@ serde = ["dep:serde"] [dependencies] thiserror = { workspace = true } byteorder = { workspace = true } -xxhash-rust = { workspace = true } +ltk_hash = { version = "0.2.6", path = "../ltk_hash" } +ltk_io_ext = { version = "0.4.2", path = "../ltk_io_ext" } serde = { workspace = true, optional = true } diff --git a/crates/ltk_rst/src/hash.rs b/crates/ltk_rst/src/hash.rs index 15641f92..635c3d69 100644 --- a/crates/ltk_rst/src/hash.rs +++ b/crates/ltk_rst/src/hash.rs @@ -1,4 +1,4 @@ -use xxhash_rust::xxh64::xxh64; +use ltk_hash::xxhash::xxhash64; use crate::version::RstHashType; @@ -10,7 +10,7 @@ use crate::version::RstHashType; /// - [`RstHashType::Simple`] → lower 39 bits pub fn compute_hash(key: &str, hash_type: RstHashType) -> u64 { let lowered = key.to_lowercase(); - let raw = xxh64(lowered.as_bytes(), 0); + let raw = xxhash64(lowered.as_bytes(), 0); raw & hash_type.hash_mask() } diff --git a/crates/ltk_rst/src/rst.rs b/crates/ltk_rst/src/rst.rs index ccb83cda..28a44d67 100644 --- a/crates/ltk_rst/src/rst.rs +++ b/crates/ltk_rst/src/rst.rs @@ -1,14 +1,15 @@ use std::collections::HashMap; -use std::io::{self, Read, Seek, SeekFrom, Write}; +use std::io::{Read, Seek, SeekFrom, Write}; use byteorder::{ReadBytesExt as _, WriteBytesExt as _, LE}; +use ltk_io_ext::ReaderExt as _; use crate::error::RstError; use crate::hash::{compute_hash, pack_entry, unpack_entry}; use crate::version::{RstMode, RstVersion}; /// Magic bytes at the start of every RST file: `"RST"`. -pub const MAGIC: [u8; 3] = [0x52, 0x53, 0x54]; +pub const MAGIC: &[u8; 3] = b"RST"; /// A parsed RST (Riot String Table) file. /// @@ -47,15 +48,9 @@ pub const MAGIC: [u8; 3] = [0x52, 0x53, 0x54]; #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Clone, PartialEq, Eq)] pub struct RstFile { - /// File version. + /// File version (encodes config and mode alongside version-specific data). pub version: RstVersion, - /// Optional font-config string. Only present (and written) in v2 files. - pub config: Option, - - /// Deprecated mode byte. Present in v2–v4; always [`RstMode::None`] for v5+. - pub mode: RstMode, - /// Hash → string mapping. pub entries: HashMap, } @@ -65,8 +60,6 @@ impl RstFile { pub fn new(version: RstVersion) -> Self { Self { version, - config: None, - mode: RstMode::None, entries: HashMap::new(), } } @@ -98,26 +91,24 @@ impl RstFile { pub fn from_reader(reader: &mut (impl Read + Seek)) -> Result { let mut magic = [0u8; 3]; reader.read_exact(&mut magic)?; - if magic != MAGIC { + if magic != *MAGIC { return Err(RstError::InvalidMagic { actual: magic }); } - let version = RstVersion::try_from_u8(reader.read_u8()?)?; + let version_byte = reader.read_u8()?; + let mut version = RstVersion::try_from_u8(version_byte)?; let hash_type = version.hash_type(); - let config = if version == RstVersion::V2 { + // Read config for V2 + if let RstVersion::V2 { ref mut config, .. } = version { let has_config = reader.read_u8()? != 0; if has_config { let len = reader.read_i32::()? as usize; let mut buf = vec![0u8; len]; reader.read_exact(&mut buf)?; - Some(String::from_utf8_lossy(&buf).into_owned()) - } else { - None + *config = Some(String::from_utf8_lossy(&buf).into_owned()); } - } else { - None - }; + } let count = reader.read_i32::()? as usize; let mut pairs: Vec<(u64, u64)> = Vec::with_capacity(count); @@ -126,11 +117,18 @@ impl RstFile { pairs.push(unpack_entry(raw, hash_type)); } - let mode = if version.has_mode_byte() { - RstMode::from_u8(reader.read_u8()?) - } else { - RstMode::None - }; + // Read mode byte for versions that have it + if version.has_mode_byte() { + let mode = RstMode::from_u8(reader.read_u8()?); + match &mut version { + RstVersion::V2 { + mode: ref mut m, .. + } => *m = mode, + RstVersion::V3 { mode: ref mut m } => *m = mode, + RstVersion::V4 { mode: ref mut m } => *m = mode, + _ => {} + } + } let data_start = reader.stream_position()?; let mut offset_cache: HashMap = HashMap::with_capacity(count); @@ -141,47 +139,45 @@ impl RstFile { cached.clone() } else { reader.seek(SeekFrom::Start(data_start + offset))?; - let text = read_null_terminated(reader)?; + let text = reader.read_str_until_nul()?; offset_cache.insert(offset, text.clone()); text }; entries.insert(hash, text); } - Ok(Self { - version, - config, - mode, - entries, - }) + Ok(Self { version, entries }) } /// Serialises this [`RstFile`] to any [`Write`] sink. pub fn to_writer(&self, writer: &mut impl Write) -> Result<(), RstError> { let hash_type = self.version.hash_type(); - let mut header: Vec = Vec::new(); + // Write magic + version byte + writer.write_all(MAGIC)?; + writer.write_u8(self.version.to_u8())?; - header.extend_from_slice(&MAGIC); - header.write_u8(self.version as u8)?; - - if self.version == RstVersion::V2 { - match &self.config { + // Write config for V2 + if let RstVersion::V2 { ref config, .. } = self.version { + match config { Some(cfg) if !cfg.is_empty() => { - header.write_u8(1)?; - header.write_i32::(cfg.len() as i32)?; - header.extend_from_slice(cfg.as_bytes()); + writer.write_u8(1)?; + writer.write_i32::(cfg.len() as i32)?; + writer.write_all(cfg.as_bytes())?; } _ => { - header.write_u8(0)?; + writer.write_u8(0)?; } } } - header.write_i32::(self.entries.len() as i32)?; + // Write entry count + writer.write_i32::(self.entries.len() as i32)?; + // Build string data blob with deduplication, and collect packed entries let mut data: Vec = Vec::new(); let mut text_to_offset: HashMap<&str, u64> = HashMap::with_capacity(self.entries.len()); + let mut packed_entries: Vec = Vec::with_capacity(self.entries.len()); for (hash, text) in &self.entries { let offset = if let Some(&off) = text_to_offset.get(text.as_str()) { @@ -195,28 +191,22 @@ impl RstFile { }; let packed = pack_entry(*hash, offset, hash_type); - header.write_u64::(packed)?; + packed_entries.push(packed); + } + + // Write packed hash-table entries + for packed in &packed_entries { + writer.write_u64::(*packed)?; } + // Write mode byte if applicable if self.version.has_mode_byte() { - header.write_u8(self.mode as u8)?; + writer.write_u8(self.version.mode() as u8)?; } - writer.write_all(&header)?; + // Write string data writer.write_all(&data)?; Ok(()) } } - -fn read_null_terminated(reader: &mut impl Read) -> Result { - let mut buf: Vec = Vec::new(); - loop { - let b = reader.read_u8()?; - if b == 0x00 { - break; - } - buf.push(b); - } - Ok(String::from_utf8_lossy(&buf).into_owned()) -} diff --git a/crates/ltk_rst/src/version.rs b/crates/ltk_rst/src/version.rs index 0030ba59..ae84932c 100644 --- a/crates/ltk_rst/src/version.rs +++ b/crates/ltk_rst/src/version.rs @@ -1,38 +1,92 @@ use crate::error::RstError; /// RST file version. +/// +/// `config` and `mode` are encoded alongside the version since they are only +/// valid for specific versions: +/// +/// - **V2** — complex (40-bit) hashing, optional font config, mode byte. +/// - **V3** — complex (40-bit) hashing, mode byte. +/// - **V4** — simple (39-bit) hashing, mode byte. +/// - **V5** — simple (39-bit) hashing, no mode byte. #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum RstVersion { - /// Version 2 — uses complex (40-bit) hashing; supports optional font config. - V2 = 0x02, - /// Version 3 — uses complex (40-bit) hashing. - V3 = 0x03, - /// Version 4 — uses simple (39-bit) hashing. - V4 = 0x04, + /// Version 2 — uses complex (40-bit) hashing; supports optional font config and mode byte. + V2 { + /// Optional font-config string. Only present (and written) in v2 files. + config: Option, + /// Deprecated mode byte. + mode: RstMode, + }, + /// Version 3 — uses complex (40-bit) hashing; has mode byte. + V3 { + /// Deprecated mode byte. + mode: RstMode, + }, + /// Version 4 — uses simple (39-bit) hashing; has mode byte. + V4 { + /// Deprecated mode byte. + mode: RstMode, + }, /// Version 5 — uses simple (39-bit) hashing; mode byte removed. - V5 = 0x05, + V5, } impl RstVersion { + /// Returns the raw version number as a `u8`. + pub fn to_u8(&self) -> u8 { + match self { + RstVersion::V2 { .. } => 0x02, + RstVersion::V3 { .. } => 0x03, + RstVersion::V4 { .. } => 0x04, + RstVersion::V5 => 0x05, + } + } + /// Returns the [`RstHashType`] that corresponds to this version. - pub fn hash_type(self) -> RstHashType { + pub fn hash_type(&self) -> RstHashType { match self { - RstVersion::V2 | RstVersion::V3 => RstHashType::Complex, - RstVersion::V4 | RstVersion::V5 => RstHashType::Simple, + RstVersion::V2 { .. } | RstVersion::V3 { .. } => RstHashType::Complex, + RstVersion::V4 { .. } | RstVersion::V5 => RstHashType::Simple, } } /// Returns `true` if this version stores a mode byte in the file. - pub fn has_mode_byte(self) -> bool { - self < RstVersion::V5 + pub fn has_mode_byte(&self) -> bool { + !matches!(self, RstVersion::V5) + } + + /// Returns the mode byte value, if applicable. + pub fn mode(&self) -> RstMode { + match self { + RstVersion::V2 { mode, .. } + | RstVersion::V3 { mode, .. } + | RstVersion::V4 { mode, .. } => *mode, + RstVersion::V5 => RstMode::None, + } + } + + /// Returns the font-config string, if applicable (v2 only). + pub fn config(&self) -> Option<&str> { + match self { + RstVersion::V2 { config, .. } => config.as_deref(), + _ => None, + } } pub(crate) fn try_from_u8(value: u8) -> Result { match value { - 0x02 => Ok(RstVersion::V2), - 0x03 => Ok(RstVersion::V3), - 0x04 => Ok(RstVersion::V4), + 0x02 => Ok(RstVersion::V2 { + config: None, + mode: RstMode::None, + }), + 0x03 => Ok(RstVersion::V3 { + mode: RstMode::None, + }), + 0x04 => Ok(RstVersion::V4 { + mode: RstMode::None, + }), 0x05 => Ok(RstVersion::V5), _ => Err(RstError::UnsupportedVersion { version: value }), } From 9b7ada4adfb5b50c2759f1a1a8bb29fb5755b458 Mon Sep 17 00:00:00 2001 From: DexalGT Date: Tue, 7 Apr 2026 11:49:43 +0300 Subject: [PATCH 7/8] refactor(ltk_rst): address review feedback and simplify Stringtable API --- crates/ltk_rst/src/lib.rs | 14 +-- crates/ltk_rst/src/rst.rs | 131 ++++++++++++---------------- crates/ltk_rst/src/version.rs | 90 ++++--------------- crates/ltk_rst/tests/parse_files.rs | 76 +++++++--------- 4 files changed, 110 insertions(+), 201 deletions(-) diff --git a/crates/ltk_rst/src/lib.rs b/crates/ltk_rst/src/lib.rs index 0506ad37..2f249b85 100644 --- a/crates/ltk_rst/src/lib.rs +++ b/crates/ltk_rst/src/lib.rs @@ -8,12 +8,12 @@ //! //! ```no_run //! use std::fs::File; -//! use ltk_rst::RstFile; +//! use ltk_rst::Stringtable; //! //! let mut file = File::open("fontconfig_en_us.stringtable")?; -//! let rst = RstFile::from_reader(&mut file)?; +//! let table = Stringtable::from_rst_reader(&mut file)?; //! -//! for (hash, text) in &rst.entries { +//! for (hash, text) in &table.entries { //! println!("{hash:#018x} = {text}"); //! } //! # Ok::<(), Box>(()) @@ -23,13 +23,13 @@ //! //! ```no_run //! use std::fs::File; -//! use ltk_rst::{RstFile, RstVersion}; +//! use ltk_rst::Stringtable; //! -//! let mut rst = RstFile::new(RstVersion::V5); -//! rst.insert_str("game_client_quit", "Quit"); +//! let mut table = Stringtable::new(); +//! table.insert_str("game_client_quit", "Quit"); //! //! let mut out = File::create("out.stringtable")?; -//! rst.to_writer(&mut out)?; +//! table.to_rst_writer(&mut out)?; //! # Ok::<(), Box>(()) //! ``` //! diff --git a/crates/ltk_rst/src/rst.rs b/crates/ltk_rst/src/rst.rs index 28a44d67..88ae86aa 100644 --- a/crates/ltk_rst/src/rst.rs +++ b/crates/ltk_rst/src/rst.rs @@ -6,27 +6,28 @@ use ltk_io_ext::ReaderExt as _; use crate::error::RstError; use crate::hash::{compute_hash, pack_entry, unpack_entry}; -use crate::version::{RstMode, RstVersion}; +use crate::version::RstVersion; /// Magic bytes at the start of every RST file: `"RST"`. pub const MAGIC: &[u8; 3] = b"RST"; -/// A parsed RST (Riot String Table) file. +/// A parsed string table. /// -/// RST files are League of Legends localisation tables that map XXHash64-based -/// keys to UTF-8 strings. The hash table entries pack both the string hash and -/// the offset of its null-terminated UTF-8 data into a single `u64`. +/// String tables are League of Legends localisation tables that map +/// XXHash64-based keys to UTF-8 strings. The hash table entries pack both +/// the string hash and the offset of its null-terminated UTF-8 data into a +/// single `u64`. /// /// # Reading /// /// ```no_run /// use std::fs::File; -/// use ltk_rst::RstFile; +/// use ltk_rst::Stringtable; /// /// let mut file = File::open("fontconfig_en_us.stringtable")?; -/// let rst = RstFile::from_reader(&mut file)?; +/// let table = Stringtable::from_rst_reader(&mut file)?; /// -/// if let Some(text) = rst.get(0x1234_5678_9abc_def0) { +/// if let Some(text) = table.get(0x1234_5678_9abc_def0) { /// println!("{text}"); /// } /// # Ok::<(), Box>(()) @@ -36,34 +37,45 @@ pub const MAGIC: &[u8; 3] = b"RST"; /// /// ```no_run /// use std::fs::File; -/// use ltk_rst::{RstFile, RstVersion}; +/// use ltk_rst::Stringtable; /// -/// let mut rst = RstFile::new(RstVersion::V5); -/// rst.insert_str("game_client_quit", "Quit"); +/// let mut table = Stringtable::new(); +/// table.insert_str("game_client_quit", "Quit"); /// /// let mut out = File::create("out.stringtable")?; -/// rst.to_writer(&mut out)?; +/// table.to_rst_writer(&mut out)?; /// # Ok::<(), Box>(()) /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Clone, PartialEq, Eq)] -pub struct RstFile { - /// File version (encodes config and mode alongside version-specific data). - pub version: RstVersion, - +pub struct Stringtable { /// Hash → string mapping. pub entries: HashMap, } -impl RstFile { - /// Creates an empty [`RstFile`] for the given version. - pub fn new(version: RstVersion) -> Self { +impl Stringtable { + /// Creates an empty [`Stringtable`]. + pub fn new() -> Self { Self { - version, entries: HashMap::new(), } } + /// Returns the number of entries in the table. + pub fn len(&self) -> usize { + self.entries.len() + } + + /// Returns `true` if the table contains no entries. + pub fn is_empty(&self) -> bool { + self.entries.is_empty() + } + + /// Returns an iterator over the entries in the table. + pub fn iter(&self) -> impl Iterator { + self.entries.iter() + } + /// Returns the string associated with `hash`, if any. pub fn get(&self, hash: u64) -> Option<&str> { self.entries.get(&hash).map(|s| s.as_str()) @@ -71,42 +83,40 @@ impl RstFile { /// Inserts an entry by pre-computed hash. /// - /// The hash must already be masked to the bit-width of the file's + /// The hash must already be masked to the bit-width of the desired /// [`RstHashType`] — use [`compute_hash`] to produce it. pub fn insert(&mut self, hash: u64, value: impl Into) { self.entries.insert(hash, value.into()); } - /// Hashes `key` for this file's version and inserts the entry. + /// Hashes `key` using the latest version's hash type and inserts the entry. pub fn insert_str(&mut self, key: &str, value: impl Into) { - let hash = compute_hash(key, self.version.hash_type()); + let hash = compute_hash(key, RstVersion::V5.hash_type()); self.insert(hash, value); } - /// Parses an [`RstFile`] from any [`Read`] + [`Seek`] source. + /// Parses a [`Stringtable`] from any [`Read`] + [`Seek`] source containing + /// RST data. /// /// Seeking is required because string data offsets stored in the hash /// table are relative to the start of the string section, which is only /// known after the entire hash table has been read. - pub fn from_reader(reader: &mut (impl Read + Seek)) -> Result { + pub fn from_rst_reader(reader: &mut (impl Read + Seek)) -> Result { let mut magic = [0u8; 3]; reader.read_exact(&mut magic)?; if magic != *MAGIC { return Err(RstError::InvalidMagic { actual: magic }); } - let version_byte = reader.read_u8()?; - let mut version = RstVersion::try_from_u8(version_byte)?; + let version = RstVersion::try_from_u8(reader.read_u8()?)?; let hash_type = version.hash_type(); - // Read config for V2 - if let RstVersion::V2 { ref mut config, .. } = version { + // V2 has an optional font-config string (read and discard). + if version == RstVersion::V2 { let has_config = reader.read_u8()? != 0; if has_config { - let len = reader.read_i32::()? as usize; - let mut buf = vec![0u8; len]; - reader.read_exact(&mut buf)?; - *config = Some(String::from_utf8_lossy(&buf).into_owned()); + let len = reader.read_i32::()?; + reader.seek(SeekFrom::Current(len as i64))?; } } @@ -117,17 +127,9 @@ impl RstFile { pairs.push(unpack_entry(raw, hash_type)); } - // Read mode byte for versions that have it + // V2–V4 have a mode byte (read and discard). if version.has_mode_byte() { - let mode = RstMode::from_u8(reader.read_u8()?); - match &mut version { - RstVersion::V2 { - mode: ref mut m, .. - } => *m = mode, - RstVersion::V3 { mode: ref mut m } => *m = mode, - RstVersion::V4 { mode: ref mut m } => *m = mode, - _ => {} - } + let _ = reader.read_u8()?; } let data_start = reader.stream_position()?; @@ -146,32 +148,17 @@ impl RstFile { entries.insert(hash, text); } - Ok(Self { version, entries }) + Ok(Self { entries }) } - /// Serialises this [`RstFile`] to any [`Write`] sink. - pub fn to_writer(&self, writer: &mut impl Write) -> Result<(), RstError> { - let hash_type = self.version.hash_type(); + /// Serialises this [`Stringtable`] to any [`Write`] sink as RST V5. + pub fn to_rst_writer(&self, writer: &mut impl Write) -> Result<(), RstError> { + use ltk_io_ext::WriterExt as _; + let hash_type = RstVersion::V5.hash_type(); - // Write magic + version byte writer.write_all(MAGIC)?; - writer.write_u8(self.version.to_u8())?; - - // Write config for V2 - if let RstVersion::V2 { ref config, .. } = self.version { - match config { - Some(cfg) if !cfg.is_empty() => { - writer.write_u8(1)?; - writer.write_i32::(cfg.len() as i32)?; - writer.write_all(cfg.as_bytes())?; - } - _ => { - writer.write_u8(0)?; - } - } - } + writer.write_u8(RstVersion::V5.to_u8())?; - // Write entry count writer.write_i32::(self.entries.len() as i32)?; // Build string data blob with deduplication, and collect packed entries @@ -184,8 +171,7 @@ impl RstFile { off } else { let off = data.len() as u64; - data.extend_from_slice(text.as_bytes()); - data.push(0x00); + data.write_terminated_string(text)?; text_to_offset.insert(text.as_str(), off); off }; @@ -194,19 +180,18 @@ impl RstFile { packed_entries.push(packed); } - // Write packed hash-table entries for packed in &packed_entries { writer.write_u64::(*packed)?; } - // Write mode byte if applicable - if self.version.has_mode_byte() { - writer.write_u8(self.version.mode() as u8)?; - } - - // Write string data writer.write_all(&data)?; Ok(()) } } + +impl Default for Stringtable { + fn default() -> Self { + Self::new() + } +} diff --git a/crates/ltk_rst/src/version.rs b/crates/ltk_rst/src/version.rs index ae84932c..57019980 100644 --- a/crates/ltk_rst/src/version.rs +++ b/crates/ltk_rst/src/version.rs @@ -2,91 +2,48 @@ use crate::error::RstError; /// RST file version. /// -/// `config` and `mode` are encoded alongside the version since they are only -/// valid for specific versions: -/// /// - **V2** — complex (40-bit) hashing, optional font config, mode byte. /// - **V3** — complex (40-bit) hashing, mode byte. /// - **V4** — simple (39-bit) hashing, mode byte. /// - **V5** — simple (39-bit) hashing, no mode byte. #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum RstVersion { /// Version 2 — uses complex (40-bit) hashing; supports optional font config and mode byte. - V2 { - /// Optional font-config string. Only present (and written) in v2 files. - config: Option, - /// Deprecated mode byte. - mode: RstMode, - }, + V2 = 2, /// Version 3 — uses complex (40-bit) hashing; has mode byte. - V3 { - /// Deprecated mode byte. - mode: RstMode, - }, + V3 = 3, /// Version 4 — uses simple (39-bit) hashing; has mode byte. - V4 { - /// Deprecated mode byte. - mode: RstMode, - }, + V4 = 4, /// Version 5 — uses simple (39-bit) hashing; mode byte removed. - V5, + V5 = 5, } impl RstVersion { /// Returns the raw version number as a `u8`. - pub fn to_u8(&self) -> u8 { - match self { - RstVersion::V2 { .. } => 0x02, - RstVersion::V3 { .. } => 0x03, - RstVersion::V4 { .. } => 0x04, - RstVersion::V5 => 0x05, - } + #[inline] + pub fn to_u8(self) -> u8 { + self as u8 } /// Returns the [`RstHashType`] that corresponds to this version. - pub fn hash_type(&self) -> RstHashType { + pub fn hash_type(self) -> RstHashType { match self { - RstVersion::V2 { .. } | RstVersion::V3 { .. } => RstHashType::Complex, - RstVersion::V4 { .. } | RstVersion::V5 => RstHashType::Simple, + RstVersion::V2 | RstVersion::V3 => RstHashType::Complex, + RstVersion::V4 | RstVersion::V5 => RstHashType::Simple, } } /// Returns `true` if this version stores a mode byte in the file. - pub fn has_mode_byte(&self) -> bool { + pub fn has_mode_byte(self) -> bool { !matches!(self, RstVersion::V5) } - /// Returns the mode byte value, if applicable. - pub fn mode(&self) -> RstMode { - match self { - RstVersion::V2 { mode, .. } - | RstVersion::V3 { mode, .. } - | RstVersion::V4 { mode, .. } => *mode, - RstVersion::V5 => RstMode::None, - } - } - - /// Returns the font-config string, if applicable (v2 only). - pub fn config(&self) -> Option<&str> { - match self { - RstVersion::V2 { config, .. } => config.as_deref(), - _ => None, - } - } - pub(crate) fn try_from_u8(value: u8) -> Result { match value { - 0x02 => Ok(RstVersion::V2 { - config: None, - mode: RstMode::None, - }), - 0x03 => Ok(RstVersion::V3 { - mode: RstMode::None, - }), - 0x04 => Ok(RstVersion::V4 { - mode: RstMode::None, - }), + 0x02 => Ok(RstVersion::V2), + 0x03 => Ok(RstVersion::V3), + 0x04 => Ok(RstVersion::V4), 0x05 => Ok(RstVersion::V5), _ => Err(RstError::UnsupportedVersion { version: value }), } @@ -121,20 +78,3 @@ impl RstHashType { } } -/// The (deprecated) mode byte stored in RST files with version < 5. -#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] -pub enum RstMode { - #[default] - None = 0, - Default = 1, -} - -impl RstMode { - pub(crate) fn from_u8(value: u8) -> Self { - match value { - 1 => RstMode::Default, - _ => RstMode::None, - } - } -} diff --git a/crates/ltk_rst/tests/parse_files.rs b/crates/ltk_rst/tests/parse_files.rs index 06fcc051..005fe233 100644 --- a/crates/ltk_rst/tests/parse_files.rs +++ b/crates/ltk_rst/tests/parse_files.rs @@ -8,7 +8,7 @@ use std::fs::File; use std::io::{BufReader, Cursor}; use std::path::Path; -use ltk_rst::{compute_hash, RstError, RstFile, RstHashType, RstVersion}; +use ltk_rst::{compute_hash, RstError, RstHashType, Stringtable}; const TEST_FILES_ROOT: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../../../test-files/data/menu"); @@ -23,9 +23,7 @@ fn open(relative: &str) -> Option> { }))) } -// --------------------------------------------------------------------------- -// Parse tests -// --------------------------------------------------------------------------- + /// Parses every locale's bootstrap.stringtable to ensure the reader handles /// all regional encodings (CJK, Arabic, Cyrillic, …) without error. @@ -42,18 +40,17 @@ fn parse_all_bootstrap_locales() { continue; }; - let rst = RstFile::from_reader(&mut reader) + let table = Stringtable::from_rst_reader(&mut reader) .unwrap_or_else(|e| panic!("failed to parse {locale}/bootstrap.stringtable: {e}")); - assert_eq!(rst.version, RstVersion::V5, "{locale}: expected version 5"); assert!( - !rst.entries.is_empty(), + !table.entries.is_empty(), "{locale}: expected at least one entry" ); println!( "{locale}/bootstrap.stringtable: {} entries", - rst.entries.len() + table.entries.len() ); } } @@ -66,17 +63,16 @@ fn parse_lol_and_tft_stringtables() { continue; }; - let rst = RstFile::from_reader(&mut reader) + let table = Stringtable::from_rst_reader(&mut reader) .unwrap_or_else(|e| panic!("failed to parse en_us/{name}.stringtable: {e}")); - assert_eq!(rst.version, RstVersion::V5); assert_eq!( - rst.entries.len(), + table.entries.len(), expected_count, "{name}.stringtable entry count mismatch" ); - println!("en_us/{name}.stringtable: {} entries", rst.entries.len()); + println!("en_us/{name}.stringtable: {} entries", table.entries.len()); } } @@ -87,19 +83,17 @@ fn parse_bootstrap_known_entries() { return; }; - let rst = - RstFile::from_reader(&mut reader).expect("failed to parse en_us/bootstrap.stringtable"); + let table = + Stringtable::from_rst_reader(&mut reader).expect("failed to parse en_us/bootstrap.stringtable"); - assert_eq!(rst.entries.len(), 201); + assert_eq!(table.entries.len(), 201); // Known stable entries confirmed from the file. - assert_eq!(rst.get(0x000000008818cc3c), Some("Ignore")); - assert_eq!(rst.get(0x0000004732dbee5e), Some("Cancel")); + assert_eq!(table.get(0x000000008818cc3c), Some("Ignore")); + assert_eq!(table.get(0x0000004732dbee5e), Some("Cancel")); } -// --------------------------------------------------------------------------- -// Round-trip tests -// --------------------------------------------------------------------------- + /// Parses en_us/bootstrap.stringtable, serialises it back to bytes, parses /// those bytes again, and asserts the two parsed representations are equal. @@ -110,21 +104,17 @@ fn round_trip_bootstrap() { }; let original = - RstFile::from_reader(&mut reader).expect("failed to parse en_us/bootstrap.stringtable"); + Stringtable::from_rst_reader(&mut reader).expect("failed to parse en_us/bootstrap.stringtable"); let mut buf = Vec::new(); original - .to_writer(&mut buf) + .to_rst_writer(&mut buf) .expect("failed to serialise bootstrap.stringtable"); let mut cursor = Cursor::new(&buf); let reloaded = - RstFile::from_reader(&mut cursor).expect("failed to re-parse serialised bootstrap"); + Stringtable::from_rst_reader(&mut cursor).expect("failed to re-parse serialised bootstrap"); - assert_eq!( - original.version, reloaded.version, - "version mismatch after round-trip" - ); assert_eq!( original.entries.len(), reloaded.entries.len(), @@ -139,9 +129,7 @@ fn round_trip_bootstrap() { } } -// --------------------------------------------------------------------------- -// Hash tests -// --------------------------------------------------------------------------- + /// compute_hash lowercases before hashing, so both cases must produce the same /// result. @@ -169,15 +157,13 @@ fn compute_hash_respects_bit_width() { assert_eq!(complex_hash & complex_mask, complex_hash); } -// --------------------------------------------------------------------------- -// Error tests -// --------------------------------------------------------------------------- + #[test] fn invalid_magic_returns_error() { let bad = b"\x00\x00\x00\x05"; let mut cursor = Cursor::new(bad); - let err = RstFile::from_reader(&mut cursor).unwrap_err(); + let err = Stringtable::from_rst_reader(&mut cursor).unwrap_err(); assert!( matches!(err, RstError::InvalidMagic { .. }), "expected InvalidMagic, got {err:?}" @@ -188,30 +174,28 @@ fn invalid_magic_returns_error() { fn unsupported_version_returns_error() { let bad = b"RST\x01"; let mut cursor = Cursor::new(bad); - let err = RstFile::from_reader(&mut cursor).unwrap_err(); + let err = Stringtable::from_rst_reader(&mut cursor).unwrap_err(); assert!( matches!(err, RstError::UnsupportedVersion { version: 0x01 }), "expected UnsupportedVersion(0x01), got {err:?}" ); } -// --------------------------------------------------------------------------- -// Builder / insertion tests -// --------------------------------------------------------------------------- + /// Verifies that insert_str hashes the key and stores the value, and that the /// resulting file can be written and re-read with no data loss. #[test] fn insert_str_round_trips() { - let mut rst = RstFile::new(RstVersion::V5); - rst.insert_str("game_client_quit", "Quit"); - rst.insert_str("game_client_play", "Play"); + let mut table = Stringtable::new(); + table.insert_str("game_client_quit", "Quit"); + table.insert_str("game_client_play", "Play"); let mut buf = Vec::new(); - rst.to_writer(&mut buf).expect("serialise failed"); + table.to_rst_writer(&mut buf).expect("serialise failed"); let mut cursor = Cursor::new(&buf); - let loaded = RstFile::from_reader(&mut cursor).expect("re-parse failed"); + let loaded = Stringtable::from_rst_reader(&mut cursor).expect("re-parse failed"); let quit_hash = compute_hash("game_client_quit", RstHashType::Simple); let play_hash = compute_hash("game_client_play", RstHashType::Simple); @@ -224,15 +208,15 @@ fn insert_str_round_trips() { /// serialised byte stream. #[test] fn to_writer_deduplicates_strings() { - let mut rst = RstFile::new(RstVersion::V5); + let mut table = Stringtable::new(); let shared_value = "Shared string value"; for i in 0u64..10 { - rst.insert(i, shared_value); + table.insert(i, shared_value); } let mut buf = Vec::new(); - rst.to_writer(&mut buf).expect("serialise failed"); + table.to_rst_writer(&mut buf).expect("serialise failed"); let occurrences = buf .windows(shared_value.len()) From ae468e1d3ac9a2214ffe4346dd0e537e732f49f5 Mon Sep 17 00:00:00 2001 From: DexalGT Date: Tue, 7 Apr 2026 11:52:17 +0300 Subject: [PATCH 8/8] style(ltk_rst): run rustfmt --- crates/ltk_rst/src/version.rs | 1 - crates/ltk_rst/tests/parse_files.rs | 18 ++++-------------- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/crates/ltk_rst/src/version.rs b/crates/ltk_rst/src/version.rs index 57019980..62c48c30 100644 --- a/crates/ltk_rst/src/version.rs +++ b/crates/ltk_rst/src/version.rs @@ -77,4 +77,3 @@ impl RstHashType { self as u8 } } - diff --git a/crates/ltk_rst/tests/parse_files.rs b/crates/ltk_rst/tests/parse_files.rs index 005fe233..b2b99b9d 100644 --- a/crates/ltk_rst/tests/parse_files.rs +++ b/crates/ltk_rst/tests/parse_files.rs @@ -23,8 +23,6 @@ fn open(relative: &str) -> Option> { }))) } - - /// Parses every locale's bootstrap.stringtable to ensure the reader handles /// all regional encodings (CJK, Arabic, Cyrillic, …) without error. #[test] @@ -83,8 +81,8 @@ fn parse_bootstrap_known_entries() { return; }; - let table = - Stringtable::from_rst_reader(&mut reader).expect("failed to parse en_us/bootstrap.stringtable"); + let table = Stringtable::from_rst_reader(&mut reader) + .expect("failed to parse en_us/bootstrap.stringtable"); assert_eq!(table.entries.len(), 201); @@ -93,8 +91,6 @@ fn parse_bootstrap_known_entries() { assert_eq!(table.get(0x0000004732dbee5e), Some("Cancel")); } - - /// Parses en_us/bootstrap.stringtable, serialises it back to bytes, parses /// those bytes again, and asserts the two parsed representations are equal. #[test] @@ -103,8 +99,8 @@ fn round_trip_bootstrap() { return; }; - let original = - Stringtable::from_rst_reader(&mut reader).expect("failed to parse en_us/bootstrap.stringtable"); + let original = Stringtable::from_rst_reader(&mut reader) + .expect("failed to parse en_us/bootstrap.stringtable"); let mut buf = Vec::new(); original @@ -129,8 +125,6 @@ fn round_trip_bootstrap() { } } - - /// compute_hash lowercases before hashing, so both cases must produce the same /// result. #[test] @@ -157,8 +151,6 @@ fn compute_hash_respects_bit_width() { assert_eq!(complex_hash & complex_mask, complex_hash); } - - #[test] fn invalid_magic_returns_error() { let bad = b"\x00\x00\x00\x05"; @@ -181,8 +173,6 @@ fn unsupported_version_returns_error() { ); } - - /// Verifies that insert_str hashes the key and stores the value, and that the /// resulting file can be written and re-read with no data loss. #[test]