Skip to content

Commit f657d72

Browse files
authored
feat: add FieldRegistry + deterministic u64 key encoding for BitmapSilo (#140)
feat: add FieldRegistry + deterministic u64 key encoding for BitmapSilo
2 parents 8baaa60 + 5d1eb2e commit f657d72

3 files changed

Lines changed: 542 additions & 0 deletions

File tree

src/silos/bitmap_keys.rs

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
//! Deterministic u64 key encoding for BitmapSilo.
2+
//!
3+
//! Replaces the string-based manifest (`name_to_key` HashMap) with pure arithmetic.
4+
//! Keys are computed from (field_id, value/bit_layer/bucket_id) and go directly
5+
//! to DataSilo's mmap HashIndex. No heap allocation, no locks, no manifest file.
6+
//!
7+
//! ## Namespace layout (top 2 bits)
8+
//!
9+
//! | Prefix | Binary | Use |
10+
//! |--------|-----------|------------------------------|
11+
//! | 0b00 | 00xx xxxx | Filter keys + system keys |
12+
//! | 0b01 | 01xx xxxx | Reserved |
13+
//! | 0b10 | 10xx xxxx | Sort keys |
14+
//! | 0b11 | 11xx xxxx | Bucket keys |
15+
//!
16+
//! ## System keys (literal small values)
17+
//!
18+
//! - `1` = alive bitmap
19+
//! - `2` = metadata (slot_counter, cursors, etc.)
20+
//!
21+
//! These are safe because real filter keys have `field_id >= 1` in the upper bits,
22+
//! so `(1u64 << 48) | anything` is always >= 2^48, far above 1 or 2.
23+
24+
/// Alive bitmap key — literal value 1.
25+
pub const KEY_ALIVE: u64 = 1;
26+
27+
/// Metadata key — literal value 2.
28+
pub const KEY_META: u64 = 2;
29+
30+
/// Sort namespace prefix (high bit set).
31+
const SORT_PREFIX: u64 = 0x8000_0000_0000_0000;
32+
33+
/// Bucket namespace prefix (high 2 bits set).
34+
const BUCKET_PREFIX: u64 = 0xC000_0000_0000_0000;
35+
36+
/// Maximum field_id that fits without colliding with namespace prefixes.
37+
/// Filter keys use the top 2 bits as namespace (00), so field_id must fit in 14 bits.
38+
/// With ~40 fields in practice, this is never a concern.
39+
pub const MAX_FIELD_ID: u16 = 0x3FFF; // 16383
40+
41+
/// Encode a filter bitmap key: `(field_id << 48) | (value & 0xFFFF_FFFF_FFFF)`.
42+
///
43+
/// 14 bits for field_id (max 16383), 48 bits for value.
44+
/// Top 2 bits are always 0b00 (filter namespace) since field_id <= MAX_FIELD_ID.
45+
#[inline]
46+
pub fn encode_filter_key(field_id: u16, value: u64) -> u64 {
47+
debug_assert!(field_id <= MAX_FIELD_ID, "field_id {field_id} exceeds MAX_FIELD_ID {MAX_FIELD_ID}");
48+
((field_id as u64) << 48) | (value & 0x0000_FFFF_FFFF_FFFF)
49+
}
50+
51+
/// Encode a sort bit-layer key: `0x8000... | (field_id << 32) | bit_layer`.
52+
///
53+
/// High bit = sort namespace. 14 bits field_id, 32 bits bit_layer index.
54+
#[inline]
55+
pub fn encode_sort_key(field_id: u16, bit_layer: u32) -> u64 {
56+
debug_assert!(field_id <= MAX_FIELD_ID, "field_id {field_id} exceeds MAX_FIELD_ID {MAX_FIELD_ID}");
57+
SORT_PREFIX | ((field_id as u64) << 32) | (bit_layer as u64)
58+
}
59+
60+
/// Encode a time bucket key: `0xC000... | (field_id << 16) | bucket_id`.
61+
///
62+
/// High 2 bits = bucket namespace. 14 bits field_id, 16 bits bucket_id.
63+
#[inline]
64+
pub fn encode_bucket_key(field_id: u16, bucket_id: u16) -> u64 {
65+
debug_assert!(field_id <= MAX_FIELD_ID, "field_id {field_id} exceeds MAX_FIELD_ID {MAX_FIELD_ID}");
66+
BUCKET_PREFIX | ((field_id as u64) << 16) | (bucket_id as u64)
67+
}
68+
69+
/// Decoded key with namespace and components.
70+
#[derive(Debug, Clone, PartialEq, Eq)]
71+
pub enum DecodedKey {
72+
/// System key (alive=1, metadata=2).
73+
System(u64),
74+
/// Filter bitmap: (field_id, value).
75+
Filter { field_id: u16, value: u64 },
76+
/// Sort bit-layer: (field_id, bit_layer).
77+
Sort { field_id: u16, bit_layer: u32 },
78+
/// Time bucket: (field_id, bucket_id).
79+
Bucket { field_id: u16, bucket_id: u16 },
80+
}
81+
82+
/// Decode a u64 silo key back to its components.
83+
pub fn decode_key(key: u64) -> DecodedKey {
84+
if key <= 2 {
85+
return DecodedKey::System(key);
86+
}
87+
let top2 = key >> 62;
88+
match top2 {
89+
0b00 | 0b01 => {
90+
// Filter namespace (0b00). 0b01 is reserved but decode as filter for safety.
91+
let field_id = (key >> 48) as u16;
92+
let value = key & 0x0000_FFFF_FFFF_FFFF;
93+
DecodedKey::Filter { field_id, value }
94+
}
95+
0b10 => {
96+
// Sort namespace
97+
let field_id = ((key >> 32) & 0xFFFF) as u16;
98+
let bit_layer = (key & 0xFFFF_FFFF) as u32;
99+
DecodedKey::Sort { field_id, bit_layer }
100+
}
101+
0b11 => {
102+
// Bucket namespace
103+
let field_id = ((key >> 16) & 0xFFFF) as u16;
104+
let bucket_id = (key & 0xFFFF) as u16;
105+
DecodedKey::Bucket { field_id, bucket_id }
106+
}
107+
_ => unreachable!(),
108+
}
109+
}
110+
111+
#[cfg(test)]
112+
mod tests {
113+
use super::*;
114+
115+
#[test]
116+
fn system_keys_are_small() {
117+
assert_eq!(KEY_ALIVE, 1);
118+
assert_eq!(KEY_META, 2);
119+
}
120+
121+
#[test]
122+
fn filter_key_roundtrip() {
123+
for field_id in [1u16, 5, 100, MAX_FIELD_ID] {
124+
for value in [0u64, 1, 42, 0x0000_FFFF_FFFF_FFFF] {
125+
let key = encode_filter_key(field_id, value);
126+
assert!(key > 2, "filter key must not collide with system keys");
127+
match decode_key(key) {
128+
DecodedKey::Filter { field_id: fid, value: v } => {
129+
assert_eq!(fid, field_id);
130+
assert_eq!(v, value);
131+
}
132+
other => panic!("expected Filter, got {:?}", other),
133+
}
134+
}
135+
}
136+
}
137+
138+
#[test]
139+
fn sort_key_roundtrip() {
140+
for field_id in [1u16, 5, 100] {
141+
for bit_layer in [0u32, 1, 31, 63] {
142+
let key = encode_sort_key(field_id, bit_layer);
143+
match decode_key(key) {
144+
DecodedKey::Sort { field_id: fid, bit_layer: bl } => {
145+
assert_eq!(fid, field_id);
146+
assert_eq!(bl, bit_layer);
147+
}
148+
other => panic!("expected Sort, got {:?}", other),
149+
}
150+
}
151+
}
152+
}
153+
154+
#[test]
155+
fn bucket_key_roundtrip() {
156+
for field_id in [1u16, 5, 100] {
157+
for bucket_id in [0u16, 1, 3, 0xFFFF] {
158+
let key = encode_bucket_key(field_id, bucket_id);
159+
match decode_key(key) {
160+
DecodedKey::Bucket { field_id: fid, bucket_id: bid } => {
161+
assert_eq!(fid, field_id);
162+
assert_eq!(bid, bucket_id);
163+
}
164+
other => panic!("expected Bucket, got {:?}", other),
165+
}
166+
}
167+
}
168+
}
169+
170+
#[test]
171+
fn no_namespace_collisions() {
172+
// Filter key with field_id=1, value=0 must differ from sort/bucket keys
173+
let filter = encode_filter_key(1, 0);
174+
let sort = encode_sort_key(1, 0);
175+
let bucket = encode_bucket_key(1, 0);
176+
assert_ne!(filter, sort);
177+
assert_ne!(filter, bucket);
178+
assert_ne!(sort, bucket);
179+
assert_ne!(filter, KEY_ALIVE);
180+
assert_ne!(filter, KEY_META);
181+
}
182+
183+
#[test]
184+
fn filter_keys_never_collide_with_system() {
185+
// field_id starts at 1, so smallest filter key is (1 << 48) | 0 = 2^48
186+
let smallest = encode_filter_key(1, 0);
187+
assert!(smallest > KEY_META, "smallest filter key {} must exceed metadata key {}", smallest, KEY_META);
188+
}
189+
190+
#[test]
191+
fn value_truncation() {
192+
// Values > 48 bits get truncated
193+
let full = 0xFFFF_FFFF_FFFF_FFFF_u64;
194+
let key = encode_filter_key(1, full);
195+
match decode_key(key) {
196+
DecodedKey::Filter { value, .. } => {
197+
assert_eq!(value, 0x0000_FFFF_FFFF_FFFF);
198+
}
199+
other => panic!("expected Filter, got {:?}", other),
200+
}
201+
}
202+
}

0 commit comments

Comments
 (0)