Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
2374094
Use Flexbuffer
FTRobbin Feb 23, 2026
dae76d2
Implement SizeReport
FTRobbin Mar 3, 2026
156f463
Dig deeper into the size blowup
FTRobbin Mar 3, 2026
92cc333
Serialize span into unit
FTRobbin Mar 3, 2026
c37fd3a
Add control for how much size information to output
FTRobbin Mar 3, 2026
a099964
Merge remote-tracking branch 'origin' into haobin-mining
FTRobbin Mar 4, 2026
4234f79
Extract experiment runs
FTRobbin Mar 5, 2026
9c85469
Tweak nightly frontent to display extract experiment results
FTRobbin Mar 5, 2026
54533db
Show egraph size in size report
FTRobbin Mar 5, 2026
dcf81e5
Add include ser time option, add a speedup graph
FTRobbin Mar 5, 2026
41a6fe8
Merge remote-tracking branch 'origin' into haobin-mining
FTRobbin Mar 5, 2026
63d2be2
fmt
FTRobbin Mar 5, 2026
c54b1a2
Skip tests because containers are not yet supported
FTRobbin Mar 5, 2026
c15978f
Merge remote-tracking branch 'origin' into haobin-mining
FTRobbin Mar 5, 2026
85dcdcf
Comment local dev setup
FTRobbin Mar 5, 2026
1d46162
Output a csv file with serialization size data
FTRobbin Mar 6, 2026
a575829
fmt
FTRobbin Mar 6, 2026
96ea226
Hacks
FTRobbin Mar 6, 2026
53cb8f8
fmt
FTRobbin Mar 6, 2026
78f79fb
More more evil hacks
FTRobbin Mar 7, 2026
41742d6
Remove Easteregg from the list of experiments
FTRobbin Mar 7, 2026
35fa1d9
Clean up evil hacks
FTRobbin Mar 26, 2026
a939da6
fmt
FTRobbin Mar 26, 2026
9b6eaf1
Merge remote-tracking branch 'origin' into haobin-mining
FTRobbin Mar 26, 2026
0e3ffb4
fmt
FTRobbin Mar 26, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 91 additions & 8 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,11 @@ getrandom = "0.3"
once_cell = "1.21"
num-bigint = { version = "0.4", features = ["serde"] }
num-rational = {version = "0.4", features = ["serde"]}
csv = "1.3"
csv = "1.4"
typetag = "0.2"
serde = { version = "1.0", features = ["derive", "rc"] }
serde_json = "1.0"
flexbuffers = "25.12.19"

######################
# build dependencies
Expand Down Expand Up @@ -162,6 +163,7 @@ serde_json_diff = "0.2.0"
anyhow.workspace = true
walkdir = "2.5.0"
egglog-reports = { workspace = true }
flexbuffers.workspace = true

[build-dependencies]
chrono = { workspace = true, features = ["now"], optional = true }
Expand Down
1 change: 1 addition & 0 deletions core-relations/src/hash_index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -915,6 +915,7 @@ static THREAD_POOL: Lazy<rayon::ThreadPool> = Lazy::new(|| {
/// to the beginning of an unused vector.
#[derive(Default, Clone, Serialize, Deserialize)]
pub(super) struct FreeList {
#[serde(skip)]
data: HashMap<usize, Vec<BufferIndex>>,
}
impl FreeList {
Expand Down
89 changes: 88 additions & 1 deletion core-relations/src/row_buffer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::{cell::Cell, mem, ops::Deref};
use crate::numeric_id::NumericId;
use egglog_concurrency::ParallelVecWriter;
use rayon::iter::ParallelIterator;
use serde::{ser::SerializeStruct, Deserialize, Deserializer, Serialize};
use serde::{Deserialize, Deserializer, Serialize};
use smallvec::SmallVec;

use crate::{
Expand Down Expand Up @@ -35,6 +35,7 @@ impl<'de> Deserialize<'de> for RowBuffer {
where
D: Deserializer<'de>,
{
/*
#[derive(Deserialize)]
struct Partial {
n_columns: usize,
Expand All @@ -49,19 +50,105 @@ impl<'de> Deserialize<'de> for RowBuffer {
total_rows: helper.total_rows,
data: Pooled::new(helper.data),
})
*/

struct RowBufferVisitor;

impl<'de> serde::de::Visitor<'de> for RowBufferVisitor {
type Value = RowBuffer;

fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("Expecting a byte array")
}

fn visit_bytes<E>(self, bytes: &[u8]) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
let mut it = bytes.iter();
let n_columns = deserialize_compressed(&mut it);
let total_rows = deserialize_compressed(&mut it);
let mut data = <Vec<Cell<Value>>>::new();
for _i in 0..n_columns * total_rows {
data.push(Cell::new(Value::new(deserialize_compressed(&mut it))));
}
Ok(RowBuffer {
n_columns: n_columns.try_into().unwrap(),
total_rows: total_rows.try_into().unwrap(),
data: Pooled::new(data),
})
}
}

deserializer.deserialize_bytes(RowBufferVisitor)
}
}

#[allow(dead_code)]
fn get_n_compressed_bytes(x: u32) -> usize {
if x < (1u32 << 7) {
1
} else if x < (1u32 << 14) {
2
} else if x < (1u32 << 21) {
3
} else if x < (1u32 << 28) {
4
} else {
5
}
}

fn compressed_serialize(buf: &mut Vec<u8>, x: u32) {
let mut rem = x;
while rem >= (1u32 << 7) {
buf.push((rem & ((1u32 << 7) - 1)).try_into().unwrap());
rem = rem >> 7;
}
buf.push((rem | (1u32 << 7)).try_into().unwrap());
}

fn deserialize_compressed<'a, T: Iterator<Item = &'a u8>>(it: &mut T) -> u32 {
let mut ret = 0u32;
let mut delta = 0u32;
let mut val: u32 = <u8>::into(*it.next().unwrap());
while val < (1u32 << 7) {
ret = ret | (val << delta);
delta += 7;
val = <u8>::into(*it.next().unwrap());
}
let last = (val ^ (1u32 << 7)) << delta;
ret | last
}

impl Serialize for RowBuffer {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
/*
let mut state = serializer.serialize_struct("RowBuffer", 3)?;
state.serialize_field("n_columns", &self.n_columns)?;
state.serialize_field("total_rows", &self.total_rows)?;
state.serialize_field("data", &*self.data)?;
state.end()
*/
//let len = mem::size_of::<usize>() * 2 + self.n_columns * self.total_rows * mem::size_of::<u32>();
/*
let mut len = get_n_compressed_bytes(self.n_columns.try_into().unwrap()) + get_n_compressed_bytes(self.total_rows.try_into().unwrap());
for r in self.data.iter() {
len = len + get_n_compressed_bytes(r.get().rep);
}
let mut buf = vec![0u8; len];
//TODO: put data in
*/
let mut buf = Vec::new();
compressed_serialize(&mut buf, self.n_columns.try_into().unwrap());
compressed_serialize(&mut buf, self.total_rows.try_into().unwrap());
for r in self.data.iter() {
compressed_serialize(&mut buf, r.get().rep);
}
serializer.serialize_bytes(&buf)
}
}

Expand Down
Loading
Loading