Skip to content

Commit 647c4ac

Browse files
authored
feat: planner cardinality from BitmapSilo frozen bitmaps (#136)
feat: planner cardinality from BitmapSilo frozen bitmaps
2 parents a0defd7 + 596f271 commit 647c4ac

3 files changed

Lines changed: 64 additions & 25 deletions

File tree

src/engine/executor.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,10 @@ impl<'a> QueryExecutor<'a> {
186186
pub fn dictionaries(&self) -> Option<&'a HashMap<String, FieldDictionary>> {
187187
self.dictionaries
188188
}
189+
/// Get bitmap silo (for planner context).
190+
pub fn bitmap_silo(&self) -> Option<&'a crate::silos::bitmap_silo::BitmapSilo> {
191+
self.bitmap_silo
192+
}
189193
/// Resolve a Value to a bitmap key, consulting string_maps for MappedString fields
190194
/// and live dictionaries for LowCardinalityString fields.
191195
/// Applies case-insensitive normalization (lowercase) unless the field is in case_sensitive_fields.
@@ -290,6 +294,7 @@ impl<'a> QueryExecutor<'a> {
290294
let ctx = planner::PlannerContext {
291295
string_maps: self.string_maps,
292296
dictionaries: self.dictionaries,
297+
bitmap_silo: self.bitmap_silo,
293298
};
294299
let plan = planner::plan_query_with_context(filters, self.filters, self.slots, Some(&ctx));
295300
// Step 2: Compute filter bitmap using planned clause order

src/engine/query.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,7 @@ impl ConcurrentEngine {
415415
let planner_ctx = planner::PlannerContext {
416416
string_maps: executor.string_maps(),
417417
dictionaries: executor.dictionaries(),
418+
bitmap_silo: executor.bitmap_silo(),
418419
};
419420
let plan = planner::plan_query_with_context(effective_filters, executor.filter_index(), executor.slot_allocator(), Some(&planner_ctx));
420421
let filter_bitmap = Arc::new(executor.compute_filters(&plan.ordered_clauses)?);

src/query/planner.rs

Lines changed: 58 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use std::collections::HashMap;
22
use crate::engine::filter::FilterIndex;
33
use crate::query::{FilterClause, Value};
44
use crate::engine::slot::SlotAllocator;
5+
use crate::silos::bitmap_silo::BitmapSilo;
56
/// Threshold below which we skip bitmap sort traversal and use a simple in-memory sort.
67
/// For very small result sets, extracting IDs and sorting is faster than walking 32 bit layers.
78
const SORT_FIRST_THRESHOLD: u64 = 1000;
@@ -11,49 +12,75 @@ pub struct PlannerContext<'a> {
1112
pub string_maps: Option<&'a HashMap<String, HashMap<String, i64>>>,
1213
/// Live dictionaries: field_name → FieldDictionary for LCS fields.
1314
pub dictionaries: Option<&'a HashMap<String, crate::dictionary::FieldDictionary>>,
15+
/// BitmapSilo for frozen cardinality reads. When present, estimate_cardinality
16+
/// reads the frozen bitmap length directly from the silo's mmap — cheaper than
17+
/// applying ops, and accurate enough for best-effort planning.
18+
pub bitmap_silo: Option<&'a BitmapSilo>,
1419
}
1520
/// Estimates the cardinality of a filter clause using bitmap metadata.
1621
/// Returns the estimated number of matching documents.
22+
///
23+
/// Priority for single-value lookups:
24+
/// 1. BitmapSilo frozen bitmap (zero-heap, mmap read) — used when silo is present
25+
/// 2. In-memory FilterIndex (VersionedBitmap base_len) — fallback when silo absent or key missing
26+
/// 3. alive_count — worst-case fallback when field is unknown
1727
fn estimate_cardinality(clause: &FilterClause, filters: &FilterIndex, alive_count: u64, ctx: Option<&PlannerContext<'_>>) -> u64 {
1828
match clause {
1929
FilterClause::Eq(field, value) => {
20-
if let Some(ff) = filters.get_field(field) {
21-
if let Some(key) = resolve_value_key(field, value, ctx) {
30+
if let Some(key) = resolve_value_key(field, value, ctx) {
31+
if let Some(card) = silo_cardinality(ctx, field, key) {
32+
return card;
33+
}
34+
if let Some(ff) = filters.get_field(field) {
2235
return ff.cardinality(key);
2336
}
2437
}
2538
// Unknown field or unconvertible value: assume worst case
2639
alive_count
2740
}
2841
FilterClause::NotEq(field, value) => {
29-
if let Some(ff) = filters.get_field(field) {
30-
if let Some(key) = resolve_value_key(field, value, ctx) {
31-
return alive_count.saturating_sub(ff.cardinality(key));
42+
if let Some(key) = resolve_value_key(field, value, ctx) {
43+
let card = silo_cardinality(ctx, field, key)
44+
.or_else(|| filters.get_field(field).map(|ff| ff.cardinality(key)));
45+
if let Some(c) = card {
46+
return alive_count.saturating_sub(c);
3247
}
3348
}
3449
alive_count
3550
}
3651
FilterClause::In(field, values) => {
37-
if let Some(ff) = filters.get_field(field) {
38-
let mut total = 0u64;
39-
for v in values {
40-
if let Some(key) = resolve_value_key(field, v, ctx) {
41-
total += ff.cardinality(key);
52+
let mut total = 0u64;
53+
let mut found = false;
54+
for v in values {
55+
if let Some(key) = resolve_value_key(field, v, ctx) {
56+
let card = silo_cardinality(ctx, field, key)
57+
.or_else(|| filters.get_field(field).map(|ff| ff.cardinality(key)));
58+
if let Some(c) = card {
59+
total += c;
60+
found = true;
4261
}
4362
}
63+
}
64+
if found {
4465
// Union can't exceed alive_count; this is an upper bound (may overcount overlaps)
4566
return total.min(alive_count);
4667
}
4768
alive_count
4869
}
4970
FilterClause::NotIn(field, values) => {
50-
if let Some(ff) = filters.get_field(field) {
51-
let mut total = 0u64;
52-
for v in values {
53-
if let Some(key) = resolve_value_key(field, v, ctx) {
54-
total += ff.cardinality(key);
71+
let mut total = 0u64;
72+
let mut found = false;
73+
for v in values {
74+
if let Some(key) = resolve_value_key(field, v, ctx) {
75+
let card = silo_cardinality(ctx, field, key)
76+
.or_else(|| filters.get_field(field).map(|ff| ff.cardinality(key)));
77+
if let Some(c) = card {
78+
total += c;
79+
found = true;
5580
}
5681
}
82+
}
83+
if found {
5784
return alive_count.saturating_sub(total.min(alive_count));
5885
}
5986
alive_count
@@ -87,23 +114,29 @@ fn estimate_cardinality(clause: &FilterClause, filters: &FilterIndex, alive_coun
87114
FilterClause::BucketBitmap { bitmap, .. } => bitmap.len(),
88115
// IsNull: use the null bitmap's length if it exists, else assume rare (~10% of alive).
89116
FilterClause::IsNull(field) => {
90-
if let Some(ff) = filters.get_field(field) {
91-
ff.cardinality(crate::engine::filter::NULL_BITMAP_KEY)
92-
} else {
93-
alive_count / 10
94-
}
117+
let null_key = crate::engine::filter::NULL_BITMAP_KEY;
118+
silo_cardinality(ctx, field, null_key)
119+
.or_else(|| filters.get_field(field).map(|ff| ff.cardinality(null_key)))
120+
.unwrap_or(alive_count / 10)
95121
}
96122
// IsNotNull: alive minus the null count.
97123
FilterClause::IsNotNull(field) => {
98-
let null_count = if let Some(ff) = filters.get_field(field) {
99-
ff.cardinality(crate::engine::filter::NULL_BITMAP_KEY)
100-
} else {
101-
alive_count / 10
102-
};
124+
let null_key = crate::engine::filter::NULL_BITMAP_KEY;
125+
let null_count = silo_cardinality(ctx, field, null_key)
126+
.or_else(|| filters.get_field(field).map(|ff| ff.cardinality(null_key)))
127+
.unwrap_or(alive_count / 10);
103128
alive_count.saturating_sub(null_count)
104129
}
105130
}
106131
}
132+
133+
/// Read the cardinality of a (field, value) pair from the silo's frozen bitmap.
134+
/// Returns None if no silo is available or the key is absent in the silo.
135+
/// This is cheap — it reads the frozen bitmap length from the mmap without heap allocation.
136+
#[inline]
137+
fn silo_cardinality(ctx: Option<&PlannerContext<'_>>, field: &str, key: u64) -> Option<u64> {
138+
ctx?.bitmap_silo?.get_frozen_filter(field, key).map(|bm| bm.len())
139+
}
107140
/// Resolve a Value to a bitmap key, using string maps/dictionaries for String values.
108141
fn resolve_value_key(field: &str, val: &Value, ctx: Option<&PlannerContext<'_>>) -> Option<u64> {
109142
// Try direct conversion first (Integer, Bool)

0 commit comments

Comments
 (0)