From 2374094c3f21782f7a10554a294cd8ac1d60f50c Mon Sep 17 00:00:00 2001
From: Haobin Ni <haobin.ni@gmail.com>
Date: Mon, 23 Feb 2026 10:07:23 -0800
Subject: [PATCH 01/21] Use Flexbuffer

---
 Cargo.lock                           | 99 +++++++++++++++++++++++++---
 Cargo.toml                           |  2 +
 core-relations/src/hash_index/mod.rs |  1 +
 src/lib.rs                           | 29 +++++---
 src/poach.rs                         |  2 +-
 5 files changed, 114 insertions(+), 19 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index f4a3a0ff5..405b84829 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -94,6 +94,12 @@ version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
 
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
 [[package]]
 name = "bitflags"
 version = "2.10.0"
@@ -124,6 +130,12 @@ version = "3.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
 
+[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
 [[package]]
 name = "cc"
 version = "1.2.41"
@@ -235,7 +247,7 @@ checksum = "93e373516c58af1c344bfe013b6c9831ce6a08bb59709ab3fa6fe5c9b0e904ff"
 dependencies = [
  "divan-macros",
  "itertools",
- "proc-macro-crate",
+ "proc-macro-crate 3.4.0",
  "proc-macro2",
  "quote",
  "syn 2.0.107",
@@ -460,6 +472,7 @@ dependencies = [
  "egglog-reports",
  "egraph-serialize",
  "env_logger",
+ "flexbuffers",
  "glob",
  "hashbrown 0.16.0",
  "im-rc",
@@ -725,6 +738,19 @@ version = "0.5.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
 
+[[package]]
+name = "flexbuffers"
+version = "25.12.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8bc752b3d049e0705749b9999d0b130d6cf62935bc7762fd3bdb7636047abe43"
+dependencies = [
+ "bitflags 1.3.2",
+ "byteorder",
+ "num_enum",
+ "serde",
+ "serde_derive",
+]
+
 [[package]]
 name = "foldhash"
 version = "0.1.5"
@@ -1016,7 +1042,7 @@ version = "0.30.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6"
 dependencies = [
- "bitflags",
+ "bitflags 2.10.0",
  "cfg-if",
  "cfg_aliases",
  "libc",
@@ -1097,6 +1123,27 @@ dependencies = [
  "autocfg",
 ]
 
+[[package]]
+name = "num_enum"
+version = "0.5.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f646caf906c20226733ed5b1374287eb97e3c2a5c227ce668c1f2ce20ae57c9"
+dependencies = [
+ "num_enum_derive",
+]
+
+[[package]]
+name = "num_enum_derive"
+version = "0.5.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dcbff9bc912032c62bf65ef1d5aea88983b420f4f839db1e9b0c281a25c9c799"
+dependencies = [
+ "proc-macro-crate 1.3.1",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
 [[package]]
 name = "once_cell"
 version = "1.21.3"
@@ -1212,13 +1259,23 @@ dependencies = [
  "zerocopy",
 ]
 
+[[package]]
+name = "proc-macro-crate"
+version = "1.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919"
+dependencies = [
+ "once_cell",
+ "toml_edit 0.19.15",
+]
+
 [[package]]
 name = "proc-macro-crate"
 version = "3.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983"
 dependencies = [
- "toml_edit",
+ "toml_edit 0.23.7",
 ]
 
 [[package]]
@@ -1328,7 +1385,7 @@ version = "0.5.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
 dependencies = [
- "bitflags",
+ "bitflags 2.10.0",
 ]
 
 [[package]]
@@ -1378,7 +1435,7 @@ version = "1.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e"
 dependencies = [
- "bitflags",
+ "bitflags 2.10.0",
  "errno",
  "libc",
  "linux-raw-sys",
@@ -1595,6 +1652,12 @@ dependencies = [
  "syn 2.0.107",
 ]
 
+[[package]]
+name = "toml_datetime"
+version = "0.6.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
+
 [[package]]
 name = "toml_datetime"
 version = "0.7.3"
@@ -1604,6 +1667,17 @@ dependencies = [
  "serde_core",
 ]
 
+[[package]]
+name = "toml_edit"
+version = "0.19.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421"
+dependencies = [
+ "indexmap",
+ "toml_datetime 0.6.11",
+ "winnow 0.5.40",
+]
+
 [[package]]
 name = "toml_edit"
 version = "0.23.7"
@@ -1611,9 +1685,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d"
 dependencies = [
  "indexmap",
- "toml_datetime",
+ "toml_datetime 0.7.3",
  "toml_parser",
- "winnow",
+ "winnow 0.7.13",
 ]
 
 [[package]]
@@ -1622,7 +1696,7 @@ version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e"
 dependencies = [
- "winnow",
+ "winnow 0.7.13",
 ]
 
 [[package]]
@@ -1955,6 +2029,15 @@ version = "0.53.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
 
+[[package]]
+name = "winnow"
+version = "0.5.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "winnow"
 version = "0.7.13"
diff --git a/Cargo.toml b/Cargo.toml
index 86206ef8d..9860f9912 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -56,6 +56,7 @@ csv = "1.3"
 typetag = "0.2"
 serde = { version = "1.0", features = ["derive", "rc"] }
 serde_json = "1.0"
+flexbuffers = "25.12.19"
 
 ######################
 # build dependencies
@@ -162,6 +163,7 @@ serde_json_diff = "0.2.0"
 anyhow.workspace = true
 walkdir = "2.5.0"
 egglog-reports = { workspace = true }
+flexbuffers.workspace = true
 
 [build-dependencies]
 chrono = { workspace = true, features = ["now"], optional = true }
diff --git a/core-relations/src/hash_index/mod.rs b/core-relations/src/hash_index/mod.rs
index 3f19107fe..b377a3bae 100644
--- a/core-relations/src/hash_index/mod.rs
+++ b/core-relations/src/hash_index/mod.rs
@@ -915,6 +915,7 @@ static THREAD_POOL: Lazy<rayon::ThreadPool> = Lazy::new(|| {
 /// to the beginning of an unused vector.
 #[derive(Default, Clone, Serialize, Deserialize)]
 pub(super) struct FreeList {
+    #[serde(skip)]
     data: HashMap<usize, Vec<BufferIndex>>,
 }
 impl FreeList {
diff --git a/src/lib.rs b/src/lib.rs
index ef2fcdd3a..48b345b7d 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -33,6 +33,7 @@ pub use command_macro::{CommandMacro, CommandMacroRegistry};
 // This is used to allow the `add_primitive` macro to work in
 // both this crate and other crates by referring to `::egglog`.
 extern crate self as egglog;
+extern crate flexbuffers;
 use anyhow::{Context, Result};
 use ast::*;
 pub use ast::{ResolvedExpr, ResolvedFact, ResolvedVar};
@@ -2674,8 +2675,11 @@ impl TimedEgraph {
             time_micros: self.timer.elapsed().as_micros(),
         });
 
-        let value = serde_json::to_value(egraph).context("Failed to encode egraph as json")?;
-
+        //let value = serde_json::to_value(egraph).context("Failed to encode egraph as json")?;
+        let mut buf = flexbuffers::FlexbufferSerializer::new();
+        // Have to use the fully qualified syntax because egraph has a method called serailize
+        Serialize::serialize(egraph, &mut buf).expect("Failed to serialize the egraph in Flexbuffer");
+        
         timeline.evts.push(EgraphEvent {
             sexp_idx: 0,
             evt: END,
@@ -2688,10 +2692,11 @@ impl TimedEgraph {
             time_micros: self.timer.elapsed().as_micros(),
         });
 
-        let file = fs::File::create(path)
+        let mut file = fs::File::create(path)
             .with_context(|| format!("failed to create file {}", path.display()))?;
-        serde_json::to_writer(BufWriter::new(file), &value)
-            .context("Failed to write value to file")?;
+        //serde_json::to_writer(BufWriter::new(file), &value)
+        //    .context("Failed to write value to file")?;
+        file.write_all(buf.view()).context("Failed to write value to file")?;
 
         timeline.evts.push(EgraphEvent {
             sexp_idx: 1,
@@ -2713,11 +2718,13 @@ impl TimedEgraph {
             time_micros: self.timer.elapsed().as_micros(),
         });
 
-        let file = fs::File::open(path)
+        let mut file = fs::File::open(path)
             .with_context(|| format!("failed to open file {}", path.display()))?;
-        let reader = BufReader::new(file);
-        let value: serde_json::Value =
-            serde_json::from_reader(reader).context("Failed to read json from file")?;
+        //let reader = BufReader::new(file);
+        //let value: serde_json::Value =
+        //    serde_json::from_reader(reader).context("Failed to read json from file")?;
+        let mut buf = Vec::new();
+        file.read_to_end(&mut buf).context("Failed to read Flatbuffer from file")?;
 
         timeline.evts.push(EgraphEvent {
             sexp_idx: 0,
@@ -2731,7 +2738,9 @@ impl TimedEgraph {
             time_micros: self.timer.elapsed().as_micros(),
         });
 
-        let egraph: EGraph = serde_json::from_value(value)?;
+        //let egraph: EGraph = serde_json::from_value(value)?;
+        let r = flexbuffers::Reader::get_root(buf.as_slice()).unwrap();
+        let egraph: EGraph = EGraph::deserialize(r).unwrap();
 
         timeline.evts.push(EgraphEvent {
             sexp_idx: 1,
diff --git a/src/poach.rs b/src/poach.rs
index 1bc0c361f..5a1a8465e 100644
--- a/src/poach.rs
+++ b/src/poach.rs
@@ -52,7 +52,7 @@ enum RunMode {
     // For each egg file under the input path,
     //      Run the egglog program, recording timing information.
     //      Round trip to JSON Value, but do not read/write from file
-    //      Assert the deserialized egraph has hthe same size as the initial egraph.
+    //      Assert the deserialized egraph has the same size as the initial egraph.
     //      Save the completed timeline, for consumption by the nightly frontend
     NoIO,
 

From dae76d2e967ac8f4750a6b8889063cc3f962d5f3 Mon Sep 17 00:00:00 2001
From: Haobin Ni <haobin.ni@gmail.com>
Date: Mon, 2 Mar 2026 17:17:30 -0800
Subject: [PATCH 02/21] Implement SizeReport

---
 src/lib.rs            |   8 +++
 src/poach.rs          |  14 +++++
 src/serialize_size.rs | 138 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 160 insertions(+)
 create mode 100644 src/serialize_size.rs

diff --git a/src/lib.rs b/src/lib.rs
index 48b345b7d..cf6e97c35 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -30,6 +30,8 @@ mod typechecking;
 pub mod util;
 pub use command_macro::{CommandMacro, CommandMacroRegistry};
 
+mod serialize_size;
+
 // This is used to allow the `add_primitive` macro to work in
 // both this crate and other crates by referring to `::egglog`.
 extern crate self as egglog;
@@ -64,6 +66,7 @@ use serde::ser::SerializeStruct;
 use serde::{Deserialize, Serialize};
 use serde_json::json;
 pub use serialize_vis::{SerializeConfig, SerializeOutput, SerializedNode};
+use serialize_size::GenerateSizeReport;
 use size::GetSizePrimitive;
 use sort::*;
 use std::any::Any;
@@ -2666,6 +2669,11 @@ impl TimedEgraph {
         Ok(())
     }
 
+    pub fn print_size_report(&mut self) -> Result<()> {
+        self.egraphs.last().unwrap().get_sizerp().pretty_print(0);
+        Ok(())
+    }
+
     pub fn to_file(&mut self, path: &Path) -> Result<()> {
         let mut timeline = ProgramTimeline::new("(serialize)\n(write)");
         let egraph = self.egraphs.last().unwrap();
diff --git a/src/poach.rs b/src/poach.rs
index 5a1a8465e..f8543cf11 100644
--- a/src/poach.rs
+++ b/src/poach.rs
@@ -71,6 +71,11 @@ enum RunMode {
     //      Run the egglog program, skipping declarations of Sorts and Rules
     //      Save the completed timeline, for consumption by the nightly frontend
     Mine,
+
+    // For each egg file under the input path,
+    //      run the egglog program and record timing information.
+    //      Print size information on the serialized egraphs.
+    SizeReport,
 }
 
 impl Display for RunMode {
@@ -87,6 +92,7 @@ impl Display for RunMode {
                 RunMode::NoIO => "no-io",
                 RunMode::Extract => "extract",
                 RunMode::Mine => "mine",
+                RunMode::SizeReport => "size-report"
             }
         )
     }
@@ -651,6 +657,14 @@ fn poach(
                 },
             )
         }
+        RunMode::SizeReport => process_files(
+                &files,
+                out_dir,
+                initial_egraph.as_deref(),
+                |egg_file, _, timed_egraph| {
+                    timed_egraph.run_from_file(egg_file)?;
+                    timed_egraph.print_size_report()
+                }),
     }
 }
 
diff --git a/src/serialize_size.rs b/src/serialize_size.rs
new file mode 100644
index 000000000..168e26899
--- /dev/null
+++ b/src/serialize_size.rs
@@ -0,0 +1,138 @@
+use crate::{CommandMacroRegistry, EGraph, RunReport, TypeInfo, term_encoding::EncodingState};
+
+/// Generate a json report for the size of a serialized structu
+/// By default, only uses serialize
+/// Allow specalization to look into subfields
+
+#[allow(dead_code)]
+#[derive (Debug, Clone)]
+pub struct SizeReport {
+    name: String,
+    size: usize,
+    fields: Vec<(String, Box<SizeReport>)>,
+}
+
+fn up_to_two_decimals(a : usize, b : usize) -> String {
+    let a100 = a * 100 / b;
+    let high = a100 / 100;
+    let low = a100 % 100;
+    let low_str = 
+        if low < 10 {
+            "0".to_string() + &low.to_string()
+        } else {
+            low.to_string()
+        };
+    return high.to_string() + "." + &low_str;
+}
+
+fn pretty_print_nbytes(size: usize) -> String {
+    if size < 200 {
+        size.to_string() + "B"
+    } else if size < 200 * 1024 {
+        up_to_two_decimals(size, 1024) + "KB"
+    } else if size < 200 * 1024 * 1024 {
+        up_to_two_decimals(size, 1024 * 1024) + "MB"
+    } else {
+        up_to_two_decimals(size, 1024 * 1024 * 1024) + "GB"
+    }
+}
+
+impl SizeReport {
+
+    pub fn pretty_print(&self, level: usize) {
+        if level == 0 {
+            println!("{} : {}", self.name, pretty_print_nbytes(self.size));
+        }
+        let mut sorted_fields = self.fields.clone();
+        sorted_fields.sort_by(|(_, a), (_, b)| b.size.cmp(&a.size));
+        for (name, sr) in sorted_fields {
+            let percentage = (sr.size as f64 / self.size as f64) * 100.0;
+            println!(". {:level$}{} : {} ({:.2}%)", "", name, pretty_print_nbytes(sr.size), percentage);
+            sr.pretty_print(level + 2);
+        }
+    }
+}
+
+pub trait GenerateSizeReport: serde::Serialize {
+    fn get_sizerp(&self) -> SizeReport {
+        let mut buf = flexbuffers::FlexbufferSerializer::new();
+        serde::Serialize::serialize(self, &mut buf).expect("Failed to serialize in Flexbuffer");
+        SizeReport {
+            name: std::any::type_name::<Self>().to_string(),
+            size: buf.view().len(),
+            fields: Vec::new(),
+        }
+    }
+}
+
+impl GenerateSizeReport for egglog_bridge::EGraph {}
+
+impl <T: serde::Serialize> GenerateSizeReport for Option<T> {} 
+
+impl <K: serde::Serialize, V: serde::Serialize> GenerateSizeReport for egglog::util::IndexMap<K, V> {} 
+
+impl GenerateSizeReport for TypeInfo {}
+
+impl GenerateSizeReport for RunReport {}
+
+impl <K: serde::Serialize, V: serde::Serialize> GenerateSizeReport for egglog_numeric_id::DenseIdMap<K, V> {}
+
+impl GenerateSizeReport for CommandMacroRegistry {}
+
+impl GenerateSizeReport for EncodingState {}
+
+
+impl GenerateSizeReport for EGraph {
+    fn get_sizerp(&self) -> SizeReport {
+        let mut buf = flexbuffers::FlexbufferSerializer::new();
+        serde::Serialize::serialize(self, &mut buf).expect("Failed to serialize in Flexbuffer");
+        let mut ret = SizeReport {
+            name: std::any::type_name::<Self>().to_string(),
+            size: buf.view().len(),
+            fields: Vec::new(),
+        };
+        ret.fields.push(("backend".to_string(), Box::new(self.backend.get_sizerp())));
+        ret.fields.push(("pushed_egraph".to_string(), Box::new(self.pushed_egraph.get_sizerp())));
+        ret.fields.push(("functions".to_string(), Box::new(self.functions.get_sizerp())));
+        ret.fields.push(("rulesets".to_string(), Box::new(self.rulesets.get_sizerp())));
+        ret.fields.push(("type_info".to_string(), Box::new(self.type_info.get_sizerp())));
+        ret.fields.push(("overall_run_report".to_string(), Box::new(self.overall_run_report.get_sizerp())));
+        ret.fields.push(("schedulers".to_string(), Box::new(self.schedulers.get_sizerp())));
+        ret.fields.push(("commands".to_string(), Box::new(self.commands.get_sizerp())));
+        ret.fields.push(("command_macros".to_string(), Box::new(self.command_macros.get_sizerp())));
+        ret.fields.push(("proof_state".to_string(), Box::new(self.proof_state.get_sizerp())));
+        ret
+    }
+}
+
+/*
+pub struct EGraph {
+    backend: egglog_bridge::EGraph,
+
+    pub parser: Parser,
+
+    names: check_shadowing::Names,
+    /// pushed_egraph forms a linked list of pushed egraphs.
+    /// Pop reverts the egraph to the last pushed egraph.
+    pushed_egraph: Option<Box<Self>>,
+
+    functions: IndexMap<String, Function>,
+
+    rulesets: IndexMap<String, Ruleset>,
+    pub fact_directory: Option<PathBuf>,
+    pub seminaive: bool,
+
+    type_info: TypeInfo,
+    /// The run report unioned over all runs so far.
+    overall_run_report: RunReport,
+
+    schedulers: DenseIdMap<SchedulerId, SchedulerRecord>,
+
+    commands: IndexMap<String, Arc<dyn UserDefinedCommand>>,
+    strict_mode: bool,
+    warned_about_missing_global_prefix: bool,
+    /// Registry for command-level macros
+    command_macros: CommandMacroRegistry,
+    proof_state: EncodingState,
+}
+    */
\ No newline at end of file

From 156f463e5f34b1cfdc3e0817f96a60e36db650ee Mon Sep 17 00:00:00 2001
From: Haobin Ni <haobin.ni@gmail.com>
Date: Tue, 3 Mar 2026 14:06:37 -0800
Subject: [PATCH 03/21] Dig deeper into the size blowup

---
 src/lib.rs            |  15 +--
 src/poach.rs          |  17 ++--
 src/serialize_size.rs | 228 ++++++++++++++++++++++++++++++------------
 3 files changed, 182 insertions(+), 78 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index cf6e97c35..db1bc52ca 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -34,8 +34,8 @@ mod serialize_size;
 
 // This is used to allow the `add_primitive` macro to work in
 // both this crate and other crates by referring to `::egglog`.
-extern crate self as egglog;
 extern crate flexbuffers;
+extern crate self as egglog;
 use anyhow::{Context, Result};
 use ast::*;
 pub use ast::{ResolvedExpr, ResolvedFact, ResolvedVar};
@@ -65,8 +65,8 @@ use scheduler::{SchedulerId, SchedulerRecord};
 use serde::ser::SerializeStruct;
 use serde::{Deserialize, Serialize};
 use serde_json::json;
-pub use serialize_vis::{SerializeConfig, SerializeOutput, SerializedNode};
 use serialize_size::GenerateSizeReport;
+pub use serialize_vis::{SerializeConfig, SerializeOutput, SerializedNode};
 use size::GetSizePrimitive;
 use sort::*;
 use std::any::Any;
@@ -2686,8 +2686,9 @@ impl TimedEgraph {
         //let value = serde_json::to_value(egraph).context("Failed to encode egraph as json")?;
         let mut buf = flexbuffers::FlexbufferSerializer::new();
         // Have to use the fully qualified syntax because egraph has a method called serailize
-        Serialize::serialize(egraph, &mut buf).expect("Failed to serialize the egraph in Flexbuffer");
-        
+        Serialize::serialize(egraph, &mut buf)
+            .expect("Failed to serialize the egraph in Flexbuffer");
+
         timeline.evts.push(EgraphEvent {
             sexp_idx: 0,
             evt: END,
@@ -2704,7 +2705,8 @@ impl TimedEgraph {
             .with_context(|| format!("failed to create file {}", path.display()))?;
         //serde_json::to_writer(BufWriter::new(file), &value)
         //    .context("Failed to write value to file")?;
-        file.write_all(buf.view()).context("Failed to write value to file")?;
+        file.write_all(buf.view())
+            .context("Failed to write value to file")?;
 
         timeline.evts.push(EgraphEvent {
             sexp_idx: 1,
@@ -2732,7 +2734,8 @@ impl TimedEgraph {
         //let value: serde_json::Value =
         //    serde_json::from_reader(reader).context("Failed to read json from file")?;
         let mut buf = Vec::new();
-        file.read_to_end(&mut buf).context("Failed to read Flatbuffer from file")?;
+        file.read_to_end(&mut buf)
+            .context("Failed to read Flatbuffer from file")?;
 
         timeline.evts.push(EgraphEvent {
             sexp_idx: 0,
diff --git a/src/poach.rs b/src/poach.rs
index f8543cf11..14f972771 100644
--- a/src/poach.rs
+++ b/src/poach.rs
@@ -92,7 +92,7 @@ impl Display for RunMode {
                 RunMode::NoIO => "no-io",
                 RunMode::Extract => "extract",
                 RunMode::Mine => "mine",
-                RunMode::SizeReport => "size-report"
+                RunMode::SizeReport => "size-report",
             }
         )
     }
@@ -658,13 +658,14 @@ fn poach(
             )
         }
         RunMode::SizeReport => process_files(
-                &files,
-                out_dir,
-                initial_egraph.as_deref(),
-                |egg_file, _, timed_egraph| {
-                    timed_egraph.run_from_file(egg_file)?;
-                    timed_egraph.print_size_report()
-                }),
+            &files,
+            out_dir,
+            initial_egraph.as_deref(),
+            |egg_file, _, timed_egraph| {
+                timed_egraph.run_from_file(egg_file)?;
+                timed_egraph.print_size_report()
+            },
+        ),
     }
 }
 
diff --git a/src/serialize_size.rs b/src/serialize_size.rs
index 168e26899..63a22195c 100644
--- a/src/serialize_size.rs
+++ b/src/serialize_size.rs
@@ -1,27 +1,32 @@
-use crate::{CommandMacroRegistry, EGraph, RunReport, TypeInfo, term_encoding::EncodingState};
+use crate::{
+    ast::ResolvedVar,
+    core::{GenericCoreAction, GenericCoreActions, GenericAtom, Query, ResolvedCall, ResolvedCoreRule},
+    egglog::util::IndexMap,
+    term_encoding::EncodingState,
+    CommandMacroRegistry, EGraph, RunReport, TypeInfo,
+};
 
 /// Generate a json report for the size of a serialized structu
 /// By default, only uses serialize
 /// Allow specalization to look into subfields
 
 #[allow(dead_code)]
-#[derive (Debug, Clone)]
+#[derive(Debug, Clone)]
 pub struct SizeReport {
     name: String,
     size: usize,
     fields: Vec<(String, Box<SizeReport>)>,
 }
 
-fn up_to_two_decimals(a : usize, b : usize) -> String {
+fn up_to_two_decimals(a: usize, b: usize) -> String {
     let a100 = a * 100 / b;
     let high = a100 / 100;
     let low = a100 % 100;
-    let low_str = 
-        if low < 10 {
-            "0".to_string() + &low.to_string()
-        } else {
-            low.to_string()
-        };
+    let low_str = if low < 10 {
+        "0".to_string() + &low.to_string()
+    } else {
+        low.to_string()
+    };
     return high.to_string() + "." + &low_str;
 }
 
@@ -37,102 +42,197 @@ fn pretty_print_nbytes(size: usize) -> String {
     }
 }
 
-impl SizeReport {
+fn truncate_string_with_ellipsis(s: &str, max_len: usize) -> String {
+    if s.chars().count() > max_len {
+        let mut truncated = s.chars().take(max_len).collect::<String>();
+        truncated.push_str(&format!("...{:} chars total", s.len()));
+        truncated
+    } else {
+        s.to_string()
+    }
+}
 
+impl SizeReport {
     pub fn pretty_print(&self, level: usize) {
         if level == 0 {
             println!("{} : {}", self.name, pretty_print_nbytes(self.size));
         }
         let mut sorted_fields = self.fields.clone();
         sorted_fields.sort_by(|(_, a), (_, b)| b.size.cmp(&a.size));
-        for (name, sr) in sorted_fields {
+        for (name, sr) in sorted_fields.iter().take(10) {
             let percentage = (sr.size as f64 / self.size as f64) * 100.0;
-            println!(". {:level$}{} : {} ({:.2}%)", "", name, pretty_print_nbytes(sr.size), percentage);
+            println!(
+                "  {:level$}{} : {} ({:.2}%)",
+                "",
+                name,
+                pretty_print_nbytes(sr.size),
+                percentage
+            );
             sr.pretty_print(level + 2);
         }
+        if sorted_fields.len() > 10 {
+            println!("  {:level$} ... {:} fields total", "", sorted_fields.len());
+        }
+    }
+}
+
+fn get_sizerp_default<T: serde::Serialize>(obj: &T) -> SizeReport {
+    let mut buf = flexbuffers::FlexbufferSerializer::new();
+    serde::Serialize::serialize(obj, &mut buf).expect("Failed to serialize in Flexbuffer");
+    SizeReport {
+        name: std::any::type_name::<T>().to_string(),
+        size: buf.view().len(),
+        fields: Vec::new(),
     }
 }
 
-pub trait GenerateSizeReport: serde::Serialize {
+pub trait GenerateSizeReport: serde::Serialize + Sized {
     fn get_sizerp(&self) -> SizeReport {
-        let mut buf = flexbuffers::FlexbufferSerializer::new();
-        serde::Serialize::serialize(self, &mut buf).expect("Failed to serialize in Flexbuffer");
-        SizeReport {
-            name: std::any::type_name::<Self>().to_string(),
-            size: buf.view().len(),
-            fields: Vec::new(),
-        }
+        get_sizerp_default(self)
     }
 }
 
 impl GenerateSizeReport for egglog_bridge::EGraph {}
 
-impl <T: serde::Serialize> GenerateSizeReport for Option<T> {} 
+impl<T: serde::Serialize> GenerateSizeReport for Option<T> {}
 
-impl <K: serde::Serialize, V: serde::Serialize> GenerateSizeReport for egglog::util::IndexMap<K, V> {} 
+impl<K: serde::Serialize + ToString, V: serde::Serialize + GenerateSizeReport> GenerateSizeReport
+    for IndexMap<K, V>
+{
+    fn get_sizerp(&self) -> SizeReport {
+        let mut ret = get_sizerp_default(self);
+        for (k, v) in self {
+            ret.fields.push((
+                truncate_string_with_ellipsis(&k.to_string(), 20),
+                Box::new(v.get_sizerp()),
+            ));
+        }
+        ret
+    }
+}
 
 impl GenerateSizeReport for TypeInfo {}
 
 impl GenerateSizeReport for RunReport {}
 
-impl <K: serde::Serialize, V: serde::Serialize> GenerateSizeReport for egglog_numeric_id::DenseIdMap<K, V> {}
+impl<K: serde::Serialize, V: serde::Serialize> GenerateSizeReport
+    for egglog_numeric_id::DenseIdMap<K, V>
+{
+}
 
 impl GenerateSizeReport for CommandMacroRegistry {}
 
 impl GenerateSizeReport for EncodingState {}
 
+impl GenerateSizeReport for egglog::Function {}
 
-impl GenerateSizeReport for EGraph {
+use egglog::ast::Ruleset;
+use egglog_ast::span::Span;
+
+impl GenerateSizeReport for Span {}
+
+impl<H: serde::Serialize, L: serde::Serialize> GenerateSizeReport for GenericAtom<H, L> {}
+
+impl<H: serde::Serialize, L: serde::Serialize> GenerateSizeReport for Query<H, L> {
+    fn get_sizerp(&self) -> SizeReport {
+        self.atoms.get_sizerp()
+    }
+}
+
+impl<T: serde::Serialize + GenerateSizeReport> GenerateSizeReport for Vec<T> {
     fn get_sizerp(&self) -> SizeReport {
-        let mut buf = flexbuffers::FlexbufferSerializer::new();
-        serde::Serialize::serialize(self, &mut buf).expect("Failed to serialize in Flexbuffer");
-        let mut ret = SizeReport {
-            name: std::any::type_name::<Self>().to_string(),
-            size: buf.view().len(),
-            fields: Vec::new(),
-        };
-        ret.fields.push(("backend".to_string(), Box::new(self.backend.get_sizerp())));
-        ret.fields.push(("pushed_egraph".to_string(), Box::new(self.pushed_egraph.get_sizerp())));
-        ret.fields.push(("functions".to_string(), Box::new(self.functions.get_sizerp())));
-        ret.fields.push(("rulesets".to_string(), Box::new(self.rulesets.get_sizerp())));
-        ret.fields.push(("type_info".to_string(), Box::new(self.type_info.get_sizerp())));
-        ret.fields.push(("overall_run_report".to_string(), Box::new(self.overall_run_report.get_sizerp())));
-        ret.fields.push(("schedulers".to_string(), Box::new(self.schedulers.get_sizerp())));
-        ret.fields.push(("commands".to_string(), Box::new(self.commands.get_sizerp())));
-        ret.fields.push(("command_macros".to_string(), Box::new(self.command_macros.get_sizerp())));
-        ret.fields.push(("proof_state".to_string(), Box::new(self.proof_state.get_sizerp())));
+        let mut ret = get_sizerp_default(self);
+        for e in self {
+            let rep = e.get_sizerp();
+            ret.fields.push((rep.name.clone(), Box::new(rep)));
+        }
         ret
     }
 }
 
-/*
-pub struct EGraph {
-    backend: egglog_bridge::EGraph,
+impl<H: serde::Serialize, L: serde::Serialize> GenerateSizeReport for GenericCoreAction<H, L> {}
 
-    pub parser: Parser,
+impl<H: serde::Serialize, L: serde::Serialize> GenerateSizeReport for GenericCoreActions<H, L> {
+    fn get_sizerp(&self) -> SizeReport {
+        self.0.get_sizerp()
+    }
+}
 
-    names: check_shadowing::Names,
-    /// pushed_egraph forms a linked list of pushed egraphs.
-    /// Pop reverts the egraph to the last pushed egraph.
-    pushed_egraph: Option<Box<Self>>,
+impl GenerateSizeReport for ResolvedCall {}
 
-    functions: IndexMap<String, Function>,
+impl GenerateSizeReport for ResolvedVar {}
 
-    rulesets: IndexMap<String, Ruleset>,
-    pub fact_directory: Option<PathBuf>,
-    pub seminaive: bool,
+impl GenerateSizeReport for ResolvedCoreRule {
+    fn get_sizerp(&self) -> SizeReport {
+        let mut ret = get_sizerp_default(self);
+        ret.fields
+            .push(("span".to_string(), Box::new(self.span.get_sizerp())));
+        ret.fields
+            .push(("body".to_string(), Box::new(self.body.get_sizerp())));
+        ret.fields
+            .push(("head".to_string(), Box::new(self.head.get_sizerp())));
+        ret
+    }
+}
 
-    type_info: TypeInfo,
-    /// The run report unioned over all runs so far.
-    overall_run_report: RunReport,
+impl<T: serde::Serialize + GenerateSizeReport, S: serde::Serialize + GenerateSizeReport> GenerateSizeReport for (T, S) {
+    fn get_sizerp(&self) -> SizeReport {
+        let mut ret = get_sizerp_default(self);
+        ret.fields
+            .push(("0".to_string(), Box::new(self.0.get_sizerp())));
+        ret.fields
+            .push(("1".to_string(), Box::new(self.1.get_sizerp())));
+        ret
+    }
+}
 
-    schedulers: DenseIdMap<SchedulerId, SchedulerRecord>,
+impl GenerateSizeReport for egglog_bridge::RuleId {}
 
-    commands: IndexMap<String, Arc<dyn UserDefinedCommand>>,
-    strict_mode: bool,
-    warned_about_missing_global_prefix: bool,
-    /// Registry for command-level macros
-    command_macros: CommandMacroRegistry,
-    proof_state: EncodingState,
+impl GenerateSizeReport for egglog::ast::Ruleset {
+    fn get_sizerp(&self) -> SizeReport {
+        match &self {
+            Ruleset::Rules(mp) => mp.get_sizerp(),
+            Ruleset::Combined(_l) => {
+                //TODO if needed
+                get_sizerp_default(self)
+            }
+        }
+    }
+}
+
+impl GenerateSizeReport for EGraph {
+    fn get_sizerp(&self) -> SizeReport {
+        let mut ret = get_sizerp_default(&self);
+        ret.fields
+            .push(("backend".to_string(), Box::new(self.backend.get_sizerp())));
+        ret.fields.push((
+            "pushed_egraph".to_string(),
+            Box::new(self.pushed_egraph.get_sizerp()),
+        ));
+        ret.fields.push((
+            "functions".to_string(),
+            Box::new(self.functions.get_sizerp()),
+        ));
+        ret.fields
+            .push(("rulesets".to_string(), Box::new(self.rulesets.get_sizerp())));
+        ret.fields.push((
+            "type_info".to_string(),
+            Box::new(self.type_info.get_sizerp()),
+        ));
+        ret.fields.push((
+            "overall_run_report".to_string(),
+            Box::new(self.overall_run_report.get_sizerp()),
+        ));
+        ret.fields.push((
+            "schedulers".to_string(),
+            Box::new(self.schedulers.get_sizerp()),
+        ));
+        //ret.fields.push(("commands".to_string(), Box::new(self.commands.get_sizerp())));
+        //ret.fields.push(("command_macros".to_string(), Box::new(self.command_macros.get_sizerp())));
+        ret.fields.push((
+            "proof_state".to_string(),
+            Box::new(self.proof_state.get_sizerp()),
+        ));
+        ret
+    }
 }
-    */
\ No newline at end of file

From 92cc3334a1b5ac106386d80d63582dc89005412b Mon Sep 17 00:00:00 2001
From: Haobin Ni <haobin.ni@gmail.com>
Date: Tue, 3 Mar 2026 14:06:58 -0800
Subject: [PATCH 04/21] Serialize span into unit

---
 egglog-ast/src/span.rs | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/egglog-ast/src/span.rs b/egglog-ast/src/span.rs
index c2c8db320..d062426cf 100644
--- a/egglog-ast/src/span.rs
+++ b/egglog-ast/src/span.rs
@@ -3,13 +3,32 @@ use std::sync::Arc;
 
 use serde::{Deserialize, Serialize};
 
-#[derive(Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
+#[derive(Clone, PartialEq, Eq, Hash)]
 pub enum Span {
     Panic,
     Egglog(Arc<EgglogSpan>),
     Rust(Arc<RustSpan>),
+    POACH,
 }
 
+impl serde::Serialize for Span {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+        where
+            S: serde::Serializer {
+        serializer.serialize_unit()
+    }
+}
+
+impl<'de> serde::Deserialize<'de> for Span {
+    fn deserialize<D>(_: D) -> Result<Self, D::Error>
+        where
+            D: serde::Deserializer<'de> {
+        Ok(Self::POACH)
+    }
+}
+
+
+
 #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
 pub struct EgglogSpan {
     pub file: Arc<SrcFile>,
@@ -55,6 +74,7 @@ impl Span {
             Span::Panic => panic!("Span::Panic in Span::string"),
             Span::Rust(_) => panic!("Span::Rust cannot track end position"),
             Span::Egglog(span) => &span.file.contents[span.i..span.j],
+            Span::POACH => "From POACH deserialization",
         }
     }
 }
@@ -96,7 +116,8 @@ impl Display for Span {
                         write!(f, "In {}:{}-{}: {quote}", start_line, start_col, end_col)
                     }
                 }
-            }
+            },
+            Span::POACH => write!(f, "From POACH deserialization"),
         }
     }
 }

From c37fd3a877d537d345ba07aa90338190a0c30d4a Mon Sep 17 00:00:00 2001
From: Haobin Ni <haobin.ni@gmail.com>
Date: Tue, 3 Mar 2026 14:29:00 -0800
Subject: [PATCH 05/21] Add control for how much size information to output

---
 src/lib.rs            |  4 ++--
 src/poach.rs          |  2 +-
 src/serialize_size.rs | 10 +++++++---
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index db1bc52ca..3eafa05f5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2669,8 +2669,8 @@ impl TimedEgraph {
         Ok(())
     }
 
-    pub fn print_size_report(&mut self) -> Result<()> {
-        self.egraphs.last().unwrap().get_sizerp().pretty_print(0);
+    pub fn print_size_report(&mut self, max_level: usize) -> Result<()> {
+        self.egraphs.last().unwrap().get_sizerp().pretty_print(0, max_level);
         Ok(())
     }
 
diff --git a/src/poach.rs b/src/poach.rs
index 14f972771..a3da3ed87 100644
--- a/src/poach.rs
+++ b/src/poach.rs
@@ -663,7 +663,7 @@ fn poach(
             initial_egraph.as_deref(),
             |egg_file, _, timed_egraph| {
                 timed_egraph.run_from_file(egg_file)?;
-                timed_egraph.print_size_report()
+                timed_egraph.print_size_report(0)
             },
         ),
     }
diff --git a/src/serialize_size.rs b/src/serialize_size.rs
index 63a22195c..c9b49ae03 100644
--- a/src/serialize_size.rs
+++ b/src/serialize_size.rs
@@ -53,7 +53,10 @@ fn truncate_string_with_ellipsis(s: &str, max_len: usize) -> String {
 }
 
 impl SizeReport {
-    pub fn pretty_print(&self, level: usize) {
+    pub fn pretty_print(&self, level: usize, max_level: usize) {
+        if level > max_level {
+            return;
+        }
         if level == 0 {
             println!("{} : {}", self.name, pretty_print_nbytes(self.size));
         }
@@ -61,14 +64,15 @@ impl SizeReport {
         sorted_fields.sort_by(|(_, a), (_, b)| b.size.cmp(&a.size));
         for (name, sr) in sorted_fields.iter().take(10) {
             let percentage = (sr.size as f64 / self.size as f64) * 100.0;
+            let indent = level * 2;
             println!(
-                "  {:level$}{} : {} ({:.2}%)",
+                "  {:indent$}{} : {} ({:.2}%)",
                 "",
                 name,
                 pretty_print_nbytes(sr.size),
                 percentage
             );
-            sr.pretty_print(level + 2);
+            sr.pretty_print(level + 1, max_level);
         }
         if sorted_fields.len() > 10 {
             println!("  {:level$} ... {:} fields total", "", sorted_fields.len());

From 4234f79f04aab3e56bf57a8f97e703be9e49d243 Mon Sep 17 00:00:00 2001
From: Haobin Ni <haobin.ni@gmail.com>
Date: Thu, 5 Mar 2026 10:43:17 -0800
Subject: [PATCH 06/21] Extract experiment runs

---
 infra/nightly.py | 36 ++++++++++++++--------
 infra/nightly.sh |  7 +++--
 src/lib.rs       | 33 ++++++++++----------
 src/poach.rs     | 80 +++++++++++++++++++++++++++++++++++-------------
 4 files changed, 102 insertions(+), 54 deletions(-)
 mode change 100644 => 100755 infra/nightly.sh

diff --git a/infra/nightly.py b/infra/nightly.py
index 3e833356a..f968ec4f0 100644
--- a/infra/nightly.py
+++ b/infra/nightly.py
@@ -88,23 +88,32 @@ def run_test_experiments(top_dir, tmp_dir, aggregator):
       run_poach(benchmark, tmp_dir, run_mode)
       add_benchmark_data(aggregator, timeline_file, f"tests/{benchmark_name}/{benchmark.stem}/timeline.json")
       extra_files = {
-        "sequential-round-trip": [tmp_dir / f"{benchmark.stem}-serialize1.json"],
+        "sequential-round-trip": [tmp_dir / f"{benchmark.stem}-serialize1.fbs"],
         "old-serialize": [
-          tmp_dir / f"{benchmark.stem}-serialize-poach.json",
+          tmp_dir / f"{benchmark.stem}-serialize-poach.fbs",
           tmp_dir / f"{benchmark.stem}-serialize-old.json",
         ],
       }.get(run_mode, [])
       cleanup_benchmark_files(timeline_file, tmp_dir / "summary.json", *extra_files)
 
+def run_extract_experiments(resource_dir, tmp_dir, aggregator):
+  timeline_suites = ["easteregg", "herbie-hamming", "herbie-math-rewrite", "herbie-math-taylor"]
+  for suite in timeline_suites:
+    for benchmark in benchmark_files(resource_dir / "test-files" / suite):
+      timeline_file = tmp_dir / f"{benchmark.stem}-timeline.json"
+      run_poach(benchmark, tmp_dir, "extract")
+      add_benchmark_data(aggregator, timeline_file, f"{suite}/timeline/{benchmark.stem}/timeline.json")
+      cleanup_benchmark_files(timeline_file, tmp_dir / "summary.json")
+
 def run_mined_experiments(resource_dir, tmp_dir, aggregator):
-  mega_serialize_file = tmp_dir / "mega-easteregg-serialize.json"
+  mega_serialize_file = tmp_dir / "mega-easteregg-serialize.fbs"
   mega_timeline_file = tmp_dir / "mega-easteregg-timeline.json"
   run_poach(resource_dir / "mega-easteregg.egg", tmp_dir, "serialize")
   add_benchmark_data(aggregator, mega_timeline_file, "easteregg/serialize/mega-easteregg/timeline.json")
   cleanup_benchmark_files(mega_timeline_file, tmp_dir / "summary.json")
   for benchmark in benchmark_files(resource_dir / "test-files" / "easteregg"):
     timeline_file = tmp_dir / f"{benchmark.stem}-timeline.json"
-    serialize_file = tmp_dir / f"{benchmark.stem}-serialize.json"
+    serialize_file = tmp_dir / f"{benchmark.stem}-serialize.fbs"
     run_poach(benchmark, tmp_dir, "serialize")
     add_benchmark_data(aggregator, timeline_file, f"easteregg/serialize/{benchmark.stem}/timeline.json")
     cleanup_benchmark_files(timeline_file, tmp_dir / "summary.json")
@@ -146,22 +155,25 @@ def run_mined_experiments(resource_dir, tmp_dir, aggregator):
   ##############################################################################
 
   # Run the benchmarks and record timeline-only data.
-  run_timeline_experiments(resource_dir, tmp_dir, aggregator)
+  # run_timeline_experiments(resource_dir, tmp_dir, aggregator)
   
   # Re-run the benchmarks with JSON round-tripping kept entirely in memory.
-  run_no_io_experiments(resource_dir, tmp_dir, aggregator)
+  # run_no_io_experiments(resource_dir, tmp_dir, aggregator)
   
   # Run the egglog tests under each serialization experiment mode.
-  run_test_experiments(top_dir, tmp_dir, aggregator)
+  # run_test_experiments(top_dir, tmp_dir, aggregator)
   
   # Run the mined-egraph experiment using both per-benchmark and mega-egraph seeds.
-  run_mined_experiments(resource_dir, tmp_dir, aggregator)
+  # run_mined_experiments(resource_dir, tmp_dir, aggregator)
+
+  # Run the extract experiment on our heavy benchmarks
+  run_extract_experiments(resource_dir, tmp_dir, aggregator)
 
   ##############################################################################
 
   aggregator.save()
 
-  if shutil.which("perf") is not None:
-    # Generate flamegraphs
-    for egg_file in glob.glob("tests/*.egg") + glob.glob("tests/web-demo/*.egg"):
-      run_cmd([str(script_dir / "flamegraph.sh"), egg_file, str(nightly_dir / "output" / "flamegraphs")])
+  #if shutil.which("perf") is not None:
+  #  # Generate flamegraphs
+  #  for egg_file in glob.glob("tests/*.egg") + glob.glob("tests/web-demo/*.egg"):
+  #    run_cmd([str(script_dir / "flamegraph.sh"), egg_file, str(nightly_dir / "output" / "flamegraphs")])
diff --git a/infra/nightly.sh b/infra/nightly.sh
old mode 100644
new mode 100755
index 766e417cb..777641a8e
--- a/infra/nightly.sh
+++ b/infra/nightly.sh
@@ -47,7 +47,8 @@ mkdir -p nightly/output
 mkdir -p nightly/output/flamegraphs
 mkdir -p nightly/tmp
 
-git clone https://github.com/brendangregg/FlameGraph.git
+# Skip FlameGraphs for mining MVP
+# git clone https://github.com/brendangregg/FlameGraph.git
 
 # Build in release mode before running nightly.py
 cargo build --release
@@ -61,9 +62,9 @@ if [ ! -f nightly/output/data/data.json ]; then
   exit 1
 fi
 
-ls nightly/output/flamegraphs > nightly/output/flamegraphs.txt
+# ls nightly/output/flamegraphs > nightly/output/flamegraphs.txt
 
 cp infra/nightly-resources/web/* nightly/output
 
 # Uncomment for local development
-# cd nightly/output && python3 -m http.server 8002
+cd nightly/output && python3 -m http.server 8002
diff --git a/src/lib.rs b/src/lib.rs
index 941282916..950778ab5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -73,7 +73,7 @@ use std::any::Any;
 use std::fmt::{Debug, Display, Formatter};
 use std::fs::{self, read_to_string, File};
 use std::hash::Hash;
-use std::io::{BufReader, BufWriter, Read, Write as _};
+use std::io::{BufWriter, Read, Write as _};
 use std::iter::once;
 use std::ops::Deref;
 use std::path::{Path, PathBuf};
@@ -2485,10 +2485,14 @@ impl TimedEgraph {
     }
 
     pub fn new_from_file(path: &Path) -> Self {
-        let file = File::open(path).expect("failed to open egraph file");
-        let reader = BufReader::new(file);
+        let mut file = fs::File::open(path)
+            .expect("failed to open file");
+        let mut buf = Vec::new();
+        file.read_to_end(&mut buf)
+            .expect("Failed to read Flatbuffer from file");
 
-        let egraph: EGraph = serde_json::from_reader(reader).expect("failed to parse egraph JSON");
+        let r = flexbuffers::Reader::get_root(buf.as_slice()).unwrap();
+        let egraph: EGraph = EGraph::deserialize(r).unwrap();
 
         Self {
             egraphs: vec![egraph],
@@ -2624,7 +2628,7 @@ impl TimedEgraph {
         Ok(())
     }
 
-    pub fn to_value(&mut self) -> Result<serde_json::Value> {
+    pub fn to_value(&mut self) -> Result<Vec<u8>> {
         let mut timeline = ProgramTimeline::new("(serialize)");
 
         let egraph = self.egraphs.last().unwrap();
@@ -2634,7 +2638,10 @@ impl TimedEgraph {
             time_micros: self.timer.elapsed().as_micros(),
         });
 
-        let value = serde_json::to_value(egraph).context("Failed to encode egraph as json")?;
+        let mut buf = flexbuffers::FlexbufferSerializer::new();
+        Serialize::serialize(egraph, &mut buf)
+            .expect("Failed to serialize the egraph in Flexbuffer");
+        let value = Vec::from(buf.view());
 
         timeline.evts.push(EgraphEvent {
             sexp_idx: 0,
@@ -2646,7 +2653,7 @@ impl TimedEgraph {
         Ok(value)
     }
 
-    pub fn from_value(&mut self, value: serde_json::Value) -> Result<()> {
+    pub fn from_value(&mut self, value: Vec<u8>) -> Result<()> {
         let mut timeline = ProgramTimeline::new("(deserialize)");
 
         timeline.evts.push(EgraphEvent {
@@ -2655,8 +2662,8 @@ impl TimedEgraph {
             time_micros: self.timer.elapsed().as_micros(),
         });
 
-        let egraph: EGraph =
-            serde_json::from_value(value).context("Failed to decode egraph from json")?;
+        let r = flexbuffers::Reader::get_root(value.as_slice()).unwrap();
+        let egraph: EGraph = EGraph::deserialize(r).unwrap();
 
         timeline.evts.push(EgraphEvent {
             sexp_idx: 0,
@@ -2684,9 +2691,7 @@ impl TimedEgraph {
             time_micros: self.timer.elapsed().as_micros(),
         });
 
-        //let value = serde_json::to_value(egraph).context("Failed to encode egraph as json")?;
         let mut buf = flexbuffers::FlexbufferSerializer::new();
-        // Have to use the fully qualified syntax because egraph has a method called serailize
         Serialize::serialize(egraph, &mut buf)
             .expect("Failed to serialize the egraph in Flexbuffer");
 
@@ -2704,8 +2709,6 @@ impl TimedEgraph {
 
         let mut file = fs::File::create(path)
             .with_context(|| format!("failed to create file {}", path.display()))?;
-        //serde_json::to_writer(BufWriter::new(file), &value)
-        //    .context("Failed to write value to file")?;
         file.write_all(buf.view())
             .context("Failed to write value to file")?;
 
@@ -2731,9 +2734,6 @@ impl TimedEgraph {
 
         let mut file = fs::File::open(path)
             .with_context(|| format!("failed to open file {}", path.display()))?;
-        //let reader = BufReader::new(file);
-        //let value: serde_json::Value =
-        //    serde_json::from_reader(reader).context("Failed to read json from file")?;
         let mut buf = Vec::new();
         file.read_to_end(&mut buf)
             .context("Failed to read Flatbuffer from file")?;
@@ -2750,7 +2750,6 @@ impl TimedEgraph {
             time_micros: self.timer.elapsed().as_micros(),
         });
 
-        //let egraph: EGraph = serde_json::from_value(value)?;
         let r = flexbuffers::Reader::get_root(buf.as_slice()).unwrap();
         let egraph: EGraph = EGraph::deserialize(r).unwrap();
 
diff --git a/src/poach.rs b/src/poach.rs
index 3d4de64e6..71471f3c5 100644
--- a/src/poach.rs
+++ b/src/poach.rs
@@ -134,6 +134,8 @@ fn check_egraph_size(egraph: &TimedEgraph) -> Result<()> {
     Ok(())
 }
 
+// TODO: This is not working right now due to no longer using serde_json
+/*
 fn check_idempotent(p1: &PathBuf, p2: &PathBuf, name: &str, out_dir: &PathBuf) {
     let json1: serde_json::Value = serde_json::from_str(
         &fs::read_to_string(p1).expect(&format!("failed to open {}", p1.display())),
@@ -153,6 +155,7 @@ fn check_idempotent(p1: &PathBuf, p2: &PathBuf, name: &str, out_dir: &PathBuf) {
         panic!("Diff for {}", name)
     }
 }
+*/
 
 fn benchmark_name(egg_file: &Path) -> &str {
     egg_file
@@ -210,6 +213,7 @@ where
     (successes, failures)
 }
 
+#[allow(dead_code)]
 fn compare_extracts(
     initial_extracts: &[CommandOutput],
     final_extracts: &[CommandOutput],
@@ -248,6 +252,38 @@ fn compare_extracts(
     Ok(())
 }
 
+fn compare_extracts_weak(
+    initial_extracts: &[CommandOutput],
+    final_extracts: &[CommandOutput],
+) -> Result<()> {
+    if initial_extracts.len() != final_extracts.len() {
+        anyhow::bail!("extract lengths mismatch")
+    }
+
+    for (x, y) in initial_extracts.iter().zip(final_extracts) {
+        match (x, y) {
+            (CommandOutput::ExtractBest(_, _, _), CommandOutput::ExtractBest(_, _, _)) => {
+                
+            }
+            (
+                CommandOutput::ExtractVariants(_, _),
+                CommandOutput::ExtractVariants(_, _),
+            ) => {
+                
+            }
+            (
+                CommandOutput::MultiExtractVariants(_, _),
+                CommandOutput::MultiExtractVariants(_, _),
+            ) => {
+                
+            }
+            _ => anyhow::bail!("No match : {:?} {:?}", x, y),
+        }
+    }
+
+    Ok(())
+}
+
 fn poach(
     files: Vec<PathBuf>,
     out_dir: &PathBuf,
@@ -275,7 +311,7 @@ fn poach(
             |egg_file, out_dir, timed_egraph| {
                 let name = benchmark_name(egg_file);
                 timed_egraph.run_from_file(egg_file)?;
-                timed_egraph.to_file(&out_dir.join(format!("{name}-serialize.json")))?;
+                timed_egraph.to_file(&out_dir.join(format!("{name}-serialize.fbs")))?;
                 timed_egraph.write_timeline(&out_dir.join(format!("{name}-timeline.json")))?;
                 Ok(())
             },
@@ -288,15 +324,15 @@ fn poach(
             |egg_file, out_dir: &PathBuf, timed_egraph| {
                 let name = benchmark_name(egg_file);
                 timed_egraph.run_from_file(egg_file)?;
-                let s1 = out_dir.join(format!("{name}-serialize1.json"));
+                let s1 = out_dir.join(format!("{name}-serialize1.fbs"));
 
                 timed_egraph
                     .to_file(&s1)
-                    .context("Failed to write s1.json")?;
+                    .context("Failed to write s1.fbs")?;
 
                 timed_egraph
                     .from_file(&s1)
-                    .context("failed to read s1.json")?;
+                    .context("failed to read s1.fbs")?;
 
                 check_egraph_number(&timed_egraph, 2)?;
 
@@ -314,37 +350,37 @@ fn poach(
             |egg_file, out_dir, timed_egraph| {
                 let name = benchmark_name(egg_file);
                 timed_egraph.run_from_file(egg_file)?;
-                let s1 = out_dir.join(format!("{name}-serialize1.json"));
-                let s2 = out_dir.join(format!("{name}-serialize2.json"));
-                let s3 = out_dir.join(format!("{name}-serialize3.json"));
+                let s1 = out_dir.join(format!("{name}-serialize1.fbs"));
+                let s2 = out_dir.join(format!("{name}-serialize2.fbs"));
+                let s3 = out_dir.join(format!("{name}-serialize3.fbs"));
 
                 timed_egraph
                     .to_file(&s1)
-                    .context("failed to serialize s1.json")?;
+                    .context("failed to serialize s1.fbs")?;
 
                 timed_egraph
                     .from_file(&s1)
-                    .context("failed to read s1.json")?;
+                    .context("failed to read s1.fbs")?;
 
                 timed_egraph
                     .to_file(&s2)
-                    .context("failed to serialize s2.json")?;
+                    .context("failed to serialize s2.fbs")?;
 
                 timed_egraph
                     .from_file(&s2)
-                    .context("failed to read s2.json")?;
+                    .context("failed to read s2.fbs")?;
 
                 timed_egraph
                     .to_file(&s3)
-                    .context("failed to serialize s3.json")?;
+                    .context("failed to serialize s3.fbs")?;
 
                 timed_egraph
                     .from_file(&s3)
-                    .context("failed to read s3.json")?;
+                    .context("failed to read s3.fbs")?;
 
                 check_egraph_number(&timed_egraph, 4)?;
                 check_egraph_size(&timed_egraph)?;
-                check_idempotent(&s2, &s3, name, out_dir);
+                //check_idempotent(&s2, &s3, name, out_dir);
 
                 timed_egraph.write_timeline(&out_dir.join(format!("{name}-timeline.json")))?;
                 Ok(())
@@ -360,8 +396,8 @@ fn poach(
                 timed_egraph.run_from_file(egg_file)?;
 
                 timed_egraph
-                    .to_file(&out_dir.join(format!("{name}-serialize-poach.json")))
-                    .context("failed to write poach.json")?;
+                    .to_file(&out_dir.join(format!("{name}-serialize-poach.fbs")))
+                    .context("failed to write poach.fbs")?;
 
                 timed_egraph
                     .old_serialize_egraph(&out_dir.join(format!("{name}-serialize-old.json")))
@@ -382,11 +418,11 @@ fn poach(
 
                 let value = timed_egraph
                     .to_value()
-                    .context("Failed to encode egraph as json")?;
+                    .context("Failed to encode egraph as flatbuffer")?;
 
                 timed_egraph
                     .from_value(value)
-                    .context("failed to decode egraph from json")?;
+                    .context("failed to decode egraph from flatbuffer")?;
 
                 check_egraph_number(&timed_egraph, 2)?;
 
@@ -424,7 +460,7 @@ fn poach(
                     if let Sexp::List(xs, _) = sexp {
                         if !xs.is_empty() {
                             match &xs[0] {
-                                Sexp::Atom(s, _) => s == "extract",
+                                Sexp::Atom(s, _) => s == "extract" || s == "multi-extract",
                                 _ => false,
                             }
                         } else {
@@ -452,18 +488,18 @@ fn poach(
 
                 let value = timed_egraph
                     .to_value()
-                    .context("Failed to encode egraph as JSON")?;
+                    .context("Failed to encode egraph as Flatbuffer")?;
 
                 timed_egraph
                     .from_value(value)
-                    .context("failed to decode egraph from json")?;
+                    .context("Failed to decode egraph from Flatbuffer")?;
 
                 check_egraph_number(&timed_egraph, 2)?;
 
                 let final_extracts =
                     timed_egraph.run_program_with_timeline(extract_cmds, &extracts)?;
 
-                compare_extracts(&initial_extracts, &final_extracts)?;
+                compare_extracts_weak(&initial_extracts, &final_extracts)?;
 
                 timed_egraph.write_timeline(&out_dir.join(format!("{name}-timeline.json")))?;
 

From 9c854695edf8716d220c30b575102aa0f711f79f Mon Sep 17 00:00:00 2001
From: Haobin Ni <haobin.ni@gmail.com>
Date: Thu, 5 Mar 2026 12:06:14 -0800
Subject: [PATCH 07/21] Tweak nightly frontent to display extract experiment
 results

---
 infra/nightly-resources/web/chart.js     |  4 +-
 infra/nightly-resources/web/extract.html |  2 +
 infra/nightly-resources/web/extract.js   | 54 +++++++++++++++++++-----
 infra/nightly.py                         |  7 ++-
 4 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/infra/nightly-resources/web/chart.js b/infra/nightly-resources/web/chart.js
index 466b69975..da73b509b 100644
--- a/infra/nightly-resources/web/chart.js
+++ b/infra/nightly-resources/web/chart.js
@@ -156,8 +156,8 @@ function initializeCharts() {
               },
             },
             y: {
-              min: -25,
-              max: 25,
+              min: -1000,
+              max: 3000,
               title: {
                 display: true,
                 text: "time (ms)",
diff --git a/infra/nightly-resources/web/extract.html b/infra/nightly-resources/web/extract.html
index 55de269b5..8af73a4dc 100644
--- a/infra/nightly-resources/web/extract.html
+++ b/infra/nightly-resources/web/extract.html
@@ -25,6 +25,8 @@ <h2>POACH vs Vanilla Egglog</h2>
       Serialization time is not counted
     </p>
 
+    <form id="suite" onchange="plotExtract()"></form>
+
     <div style="max-height: 800px;">
       <canvas class="content" id="difference-chart"></canvas>
     </div>
diff --git a/infra/nightly-resources/web/extract.js b/infra/nightly-resources/web/extract.js
index e83b0c854..562dc5c90 100644
--- a/infra/nightly-resources/web/extract.js
+++ b/infra/nightly-resources/web/extract.js
@@ -1,9 +1,43 @@
 function initializeExtract() {
-  initializeGlobalData().then(initializeCharts).then(plotExtract);
+  initializeGlobalData()
+    .then(initializeExtractOptions)  
+    .then(initializeCharts)
+    .then(plotExtract);
 }
 
+function initializeExtractOptions() {
+  const suiteElt = document.getElementById("suite");
+  Object.keys(GLOBAL_DATA.data).forEach((suite, idx) => {
+    const label = document.createElement("label");
+    const input = document.createElement("input");
+
+    input.type = "radio";
+    input.name = "suiteToggle";
+    input.value = suite;
+
+    if (idx === 0) {
+      input.checked = true; // select first run mode
+    }
+
+    label.appendChild(input);
+    label.append(" " + suite);
+
+    suiteElt.appendChild(label);
+  });
+}
+
+
 function plotExtract() {
-  const all_data = GLOBAL_DATA.data.tests.extract;
+
+  const suite = document.querySelector(
+    'input[name="suiteToggle"]:checked'
+  ).value;
+
+  if (!suite) {
+    return;
+  }
+
+  const all_data = GLOBAL_DATA.data[suite].extract;
 
   if (GLOBAL_DATA.extractChart === null) {
     return;
@@ -31,7 +65,7 @@ function plotExtract() {
     data[b].poachDeser = aggregate(all_data[b].deserialize, "total");
     data[b].poachTotal = data[b].poachDeser + data[b].poachExtract;
 
-    data[b].difference = data[b].poachTotal - data[b].vanillaTotal;
+    data[b].difference = data[b].vanillaTotal - data[b].poachTotal;
   });
 
   GLOBAL_DATA.differenceChart.data = {
@@ -41,18 +75,16 @@ function plotExtract() {
         label: "poach - vanilla",
         data: Object.values(data).map((d) => d.difference),
         backgroundColor: Object.values(data).map((d) => {
-          if (Math.abs(d.difference) > 25) {
-            return "gray";
-          } else {
-            return d.difference >= 0
-              ? "rgba(255, 99, 132, 0.7)"
-              : "rgba(54, 162, 235, 0.7)";
-          }
+          return d.difference >= 0
+            ? "rgba(54, 162, 235, 0.7)"
+            : "rgba(255, 99, 132, 0.7)";
         }),
       },
     ],
   };
 
+  GLOBAL_DATA.differenceChart.update();
+
   GLOBAL_DATA.extractChart.data = {
     labels: benchmarks,
     datasets: [
@@ -85,4 +117,6 @@ function plotExtract() {
       },
     ],
   };
+
+  GLOBAL_DATA.extractChart.update();  
 }
diff --git a/infra/nightly.py b/infra/nightly.py
index f968ec4f0..745e62b1b 100644
--- a/infra/nightly.py
+++ b/infra/nightly.py
@@ -102,8 +102,13 @@ def run_extract_experiments(resource_dir, tmp_dir, aggregator):
     for benchmark in benchmark_files(resource_dir / "test-files" / suite):
       timeline_file = tmp_dir / f"{benchmark.stem}-timeline.json"
       run_poach(benchmark, tmp_dir, "extract")
-      add_benchmark_data(aggregator, timeline_file, f"{suite}/timeline/{benchmark.stem}/timeline.json")
+      add_benchmark_data(aggregator, timeline_file, f"{suite}/extract/{benchmark.stem}/timeline.json")
       cleanup_benchmark_files(timeline_file, tmp_dir / "summary.json")
+  for benchmark in benchmark_files(top_dir / "tests", recursive = True):
+    timeline_file = tmp_dir / f"{benchmark.stem}-timeline.json"
+    run_poach(benchmark, tmp_dir, "extract")
+    add_benchmark_data(aggregator, timeline_file, f"tests/extract/{benchmark.stem}/timeline.json")
+    cleanup_benchmark_files(timeline_file, tmp_dir / "summary.json")
 
 def run_mined_experiments(resource_dir, tmp_dir, aggregator):
   mega_serialize_file = tmp_dir / "mega-easteregg-serialize.fbs"

From 54533db2ac629947c44783eae21ed2418f0d6c3a Mon Sep 17 00:00:00 2001
From: Haobin Ni <haobin.ni@gmail.com>
Date: Thu, 5 Mar 2026 12:22:59 -0800
Subject: [PATCH 08/21] Show egraph size in size report

---
 src/lib.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/lib.rs b/src/lib.rs
index 950778ab5..3c7a0f180 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2678,7 +2678,9 @@ impl TimedEgraph {
     }
 
     pub fn print_size_report(&mut self, max_level: usize) -> Result<()> {
-        self.egraphs.last().unwrap().get_sizerp().pretty_print(0, max_level);
+        let egraph = self.egraphs.last().unwrap();
+        println!("egraph size: {:}", egraph.num_tuples());
+        egraph.get_sizerp().pretty_print(0, max_level);
         Ok(())
     }
 

From dcf81e5c7cb8074aec02d1745a8a35471be86662 Mon Sep 17 00:00:00 2001
From: Haobin Ni <haobin.ni@gmail.com>
Date: Thu, 5 Mar 2026 12:48:07 -0800
Subject: [PATCH 09/21] Add include ser time option, add a speedup graph

---
 infra/nightly-resources/web/chart.js     | 45 ++++++++++++++++++++++++
 infra/nightly-resources/web/extract.html |  7 ++++
 infra/nightly-resources/web/extract.js   | 26 +++++++++++++-
 3 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/infra/nightly-resources/web/chart.js b/infra/nightly-resources/web/chart.js
index da73b509b..aed046dd9 100644
--- a/infra/nightly-resources/web/chart.js
+++ b/infra/nightly-resources/web/chart.js
@@ -124,6 +124,51 @@ function initializeCharts() {
     );
   }
 
+  if (!!document.getElementById("speedup-chart")) {
+    console.assert(GLOBAL_DATA.differenceChart === null);
+
+    GLOBAL_DATA.speedupChart = new Chart(
+      document.getElementById("speedup-chart"),
+      {
+        type: "bar",
+        data: {},
+        options: {
+          responsive: true,
+          plugins: {
+            legend: {
+              display: false,
+            },
+            title: {
+              display: true,
+              text: "Per-benchmark Runtime Speedup",
+            },
+            tooltip: {
+              callbacks: {
+                label: (ctx) => `${ctx.raw.toFixed(2)}x`,
+              },
+            },
+          },
+          scales: {
+            x: {
+              ticks: {
+                maxRotation: 90,
+                minRotation: 45,
+              },
+            },
+            y: {
+              min: 0,
+              max: 50,
+              title: {
+                display: true,
+                text: "Speedup (times)",
+              },
+            },
+          },
+        },
+      },
+    );
+  }
+
   if (!!document.getElementById("difference-chart")) {
     console.assert(GLOBAL_DATA.differenceChart === null);
 
diff --git a/infra/nightly-resources/web/extract.html b/infra/nightly-resources/web/extract.html
index 8af73a4dc..f8a2483d9 100644
--- a/infra/nightly-resources/web/extract.html
+++ b/infra/nightly-resources/web/extract.html
@@ -25,8 +25,15 @@ <h2>POACH vs Vanilla Egglog</h2>
       Serialization time is not counted
     </p>
 
+    <form id="icldser" onchange="plotExtract()">
+      <input type="checkbox" id="icldser1" name="icldser1" >
+      <label for="icldser1"> Include serialization time for Poach </label>
+    </form>
     <form id="suite" onchange="plotExtract()"></form>
 
+    <div style="max-height: 800px;">
+      <canvas class="content" id="speedup-chart"></canvas>
+    </div>  
     <div style="max-height: 800px;">
       <canvas class="content" id="difference-chart"></canvas>
     </div>
diff --git a/infra/nightly-resources/web/extract.js b/infra/nightly-resources/web/extract.js
index 562dc5c90..e922b536a 100644
--- a/infra/nightly-resources/web/extract.js
+++ b/infra/nightly-resources/web/extract.js
@@ -37,6 +37,8 @@ function plotExtract() {
     return;
   }
 
+  const includeser = document.querySelector("input[name='icldser1']:checked");
+
   const all_data = GLOBAL_DATA.data[suite].extract;
 
   if (GLOBAL_DATA.extractChart === null) {
@@ -63,11 +65,33 @@ function plotExtract() {
 
     data[b].poachExtract = aggregate(extracts.slice(midpoint), "total");
     data[b].poachDeser = aggregate(all_data[b].deserialize, "total");
-    data[b].poachTotal = data[b].poachDeser + data[b].poachExtract;
+    if (includeser) {
+      data[b].poachTotal = data[b].poachDeser + data[b].poachExtract;
+    } else {
+      data[b].poachTotal = data[b].poachExtract;
+    }
 
     data[b].difference = data[b].vanillaTotal - data[b].poachTotal;
+    data[b].speedup = data[b].vanillaTotal / data[b].poachTotal;
   });
 
+  GLOBAL_DATA.speedupChart.data = {
+    labels: benchmarks,
+    datasets: [
+      {
+        label: "poach - vanilla",
+        data: Object.values(data).map((d) => d.speedup),
+        backgroundColor: Object.values(data).map((d) => {
+          return d.speedup >= 1
+            ? "rgba(54, 162, 235, 0.7)"
+            : "rgba(255, 99, 132, 0.7)";
+        }),
+      },
+    ],
+  };
+
+  GLOBAL_DATA.speedupChart.update();
+
   GLOBAL_DATA.differenceChart.data = {
     labels: benchmarks,
     datasets: [

From 63d2be20fa3610dae0e338ca4e86697ae0435143 Mon Sep 17 00:00:00 2001
From: Haobin Ni <haobin.ni@gmail.com>
Date: Thu, 5 Mar 2026 12:57:53 -0800
Subject: [PATCH 10/21] fmt

---
 egglog-ast/src/span.rs | 14 +++++++-------
 src/lib.rs             |  3 +--
 src/poach.rs           | 15 +++------------
 src/serialize_size.rs  |  8 ++++++--
 4 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/egglog-ast/src/span.rs b/egglog-ast/src/span.rs
index d062426cf..2651d9cdc 100644
--- a/egglog-ast/src/span.rs
+++ b/egglog-ast/src/span.rs
@@ -13,22 +13,22 @@ pub enum Span {
 
 impl serde::Serialize for Span {
     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-        where
-            S: serde::Serializer {
+    where
+        S: serde::Serializer,
+    {
         serializer.serialize_unit()
     }
 }
 
 impl<'de> serde::Deserialize<'de> for Span {
     fn deserialize<D>(_: D) -> Result<Self, D::Error>
-        where
-            D: serde::Deserializer<'de> {
+    where
+        D: serde::Deserializer<'de>,
+    {
         Ok(Self::POACH)
     }
 }
 
-
-
 #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
 pub struct EgglogSpan {
     pub file: Arc<SrcFile>,
@@ -116,7 +116,7 @@ impl Display for Span {
                         write!(f, "In {}:{}-{}: {quote}", start_line, start_col, end_col)
                     }
                 }
-            },
+            }
             Span::POACH => write!(f, "From POACH deserialization"),
         }
     }
diff --git a/src/lib.rs b/src/lib.rs
index 3c7a0f180..baf03eb21 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2485,8 +2485,7 @@ impl TimedEgraph {
     }
 
     pub fn new_from_file(path: &Path) -> Self {
-        let mut file = fs::File::open(path)
-            .expect("failed to open file");
+        let mut file = fs::File::open(path).expect("failed to open file");
         let mut buf = Vec::new();
         file.read_to_end(&mut buf)
             .expect("Failed to read Flatbuffer from file");
diff --git a/src/poach.rs b/src/poach.rs
index 71471f3c5..d1b7d45d3 100644
--- a/src/poach.rs
+++ b/src/poach.rs
@@ -262,21 +262,12 @@ fn compare_extracts_weak(
 
     for (x, y) in initial_extracts.iter().zip(final_extracts) {
         match (x, y) {
-            (CommandOutput::ExtractBest(_, _, _), CommandOutput::ExtractBest(_, _, _)) => {
-                
-            }
-            (
-                CommandOutput::ExtractVariants(_, _),
-                CommandOutput::ExtractVariants(_, _),
-            ) => {
-                
-            }
+            (CommandOutput::ExtractBest(_, _, _), CommandOutput::ExtractBest(_, _, _)) => {}
+            (CommandOutput::ExtractVariants(_, _), CommandOutput::ExtractVariants(_, _)) => {}
             (
                 CommandOutput::MultiExtractVariants(_, _),
                 CommandOutput::MultiExtractVariants(_, _),
-            ) => {
-                
-            }
+            ) => {}
             _ => anyhow::bail!("No match : {:?} {:?}", x, y),
         }
     }
diff --git a/src/serialize_size.rs b/src/serialize_size.rs
index c9b49ae03..8683e27ca 100644
--- a/src/serialize_size.rs
+++ b/src/serialize_size.rs
@@ -1,6 +1,8 @@
 use crate::{
     ast::ResolvedVar,
-    core::{GenericCoreAction, GenericCoreActions, GenericAtom, Query, ResolvedCall, ResolvedCoreRule},
+    core::{
+        GenericAtom, GenericCoreAction, GenericCoreActions, Query, ResolvedCall, ResolvedCoreRule,
+    },
     egglog::util::IndexMap,
     term_encoding::EncodingState,
     CommandMacroRegistry, EGraph, RunReport, TypeInfo,
@@ -179,7 +181,9 @@ impl GenerateSizeReport for ResolvedCoreRule {
     }
 }
 
-impl<T: serde::Serialize + GenerateSizeReport, S: serde::Serialize + GenerateSizeReport> GenerateSizeReport for (T, S) {
+impl<T: serde::Serialize + GenerateSizeReport, S: serde::Serialize + GenerateSizeReport>
+    GenerateSizeReport for (T, S)
+{
     fn get_sizerp(&self) -> SizeReport {
         let mut ret = get_sizerp_default(self);
         ret.fields

From c54b1a20f392da11a95cdd5b9538ee0a520ced85 Mon Sep 17 00:00:00 2001
From: Haobin Ni <haobin.ni@gmail.com>
Date: Thu, 5 Mar 2026 13:44:46 -0800
Subject: [PATCH 11/21] Skip tests because containers are not yet supported

---
 infra/nightly.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/infra/nightly.py b/infra/nightly.py
index 745e62b1b..50011ff04 100644
--- a/infra/nightly.py
+++ b/infra/nightly.py
@@ -104,11 +104,6 @@ def run_extract_experiments(resource_dir, tmp_dir, aggregator):
       run_poach(benchmark, tmp_dir, "extract")
       add_benchmark_data(aggregator, timeline_file, f"{suite}/extract/{benchmark.stem}/timeline.json")
       cleanup_benchmark_files(timeline_file, tmp_dir / "summary.json")
-  for benchmark in benchmark_files(top_dir / "tests", recursive = True):
-    timeline_file = tmp_dir / f"{benchmark.stem}-timeline.json"
-    run_poach(benchmark, tmp_dir, "extract")
-    add_benchmark_data(aggregator, timeline_file, f"tests/extract/{benchmark.stem}/timeline.json")
-    cleanup_benchmark_files(timeline_file, tmp_dir / "summary.json")
 
 def run_mined_experiments(resource_dir, tmp_dir, aggregator):
   mega_serialize_file = tmp_dir / "mega-easteregg-serialize.fbs"

From 85dcdcf32d4441d70a953b74b3bfc0ec31167bb6 Mon Sep 17 00:00:00 2001
From: Haobin Ni <haobin.ni@gmail.com>
Date: Thu, 5 Mar 2026 14:13:20 -0800
Subject: [PATCH 12/21] Comment local dev setup

---
 infra/nightly.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/infra/nightly.sh b/infra/nightly.sh
index 777641a8e..a0777ca6c 100755
--- a/infra/nightly.sh
+++ b/infra/nightly.sh
@@ -67,4 +67,4 @@ fi
 cp infra/nightly-resources/web/* nightly/output
 
 # Uncomment for local development
-cd nightly/output && python3 -m http.server 8002
+# cd nightly/output && python3 -m http.server 8002

From 1d46162dec79c11b115a9fb80ee0115be1252e36 Mon Sep 17 00:00:00 2001
From: Haobin Ni <haobin.ni@gmail.com>
Date: Thu, 5 Mar 2026 16:43:44 -0800
Subject: [PATCH 13/21] Output a csv file with serialization size data

---
 Cargo.toml         |  2 +-
 infra/nightly.py   | 10 +++++++---
 infra/transform.py | 16 ++++++++++++++++
 src/lib.rs         | 26 +++++++++++++++++++++++++-
 src/poach.rs       | 26 ++++++++++++++++++++++++++
 5 files changed, 75 insertions(+), 5 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 9860f9912..2190a235e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -52,7 +52,7 @@ getrandom = "0.3"
 once_cell = "1.21"
 num-bigint = { version = "0.4", features = ["serde"] }
 num-rational = {version = "0.4", features = ["serde"]}
-csv = "1.3"
+csv = "1.4"
 typetag = "0.2"
 serde = { version = "1.0", features = ["derive", "rc"] }
 serde_json = "1.0"
diff --git a/infra/nightly.py b/infra/nightly.py
index 50011ff04..2ce3f6ae1 100644
--- a/infra/nightly.py
+++ b/infra/nightly.py
@@ -96,14 +96,16 @@ def run_test_experiments(top_dir, tmp_dir, aggregator):
       }.get(run_mode, [])
       cleanup_benchmark_files(timeline_file, tmp_dir / "summary.json", *extra_files)
 
-def run_extract_experiments(resource_dir, tmp_dir, aggregator):
+def run_extract_experiments(resource_dir, tmp_dir, aggregator, csv_aggregator):
   timeline_suites = ["easteregg", "herbie-hamming", "herbie-math-rewrite", "herbie-math-taylor"]
   for suite in timeline_suites:
     for benchmark in benchmark_files(resource_dir / "test-files" / suite):
       timeline_file = tmp_dir / f"{benchmark.stem}-timeline.json"
       run_poach(benchmark, tmp_dir, "extract")
       add_benchmark_data(aggregator, timeline_file, f"{suite}/extract/{benchmark.stem}/timeline.json")
-      cleanup_benchmark_files(timeline_file, tmp_dir / "summary.json")
+      extra_files = [tmp_dir / f"{benchmark.stem}.csv"]
+      csv_aggregator.add_file(extra_files[0])
+      cleanup_benchmark_files(timeline_file, tmp_dir / "summary.json", *extra_files)
 
 def run_mined_experiments(resource_dir, tmp_dir, aggregator):
   mega_serialize_file = tmp_dir / "mega-easteregg-serialize.fbs"
@@ -146,6 +148,7 @@ def run_mined_experiments(resource_dir, tmp_dir, aggregator):
   tmp_dir = nightly_dir / "tmp"
   output_data_dir = nightly_dir / "output" / "data"
   aggregator = transform.TimelineAggregator(output_data_dir)
+  csv_aggregator = transform.CSVAggregator(output_data_dir)
 
   # Make sure we're in the right place
   os.chdir(top_dir)
@@ -167,11 +170,12 @@ def run_mined_experiments(resource_dir, tmp_dir, aggregator):
   # run_mined_experiments(resource_dir, tmp_dir, aggregator)
 
   # Run the extract experiment on our heavy benchmarks
-  run_extract_experiments(resource_dir, tmp_dir, aggregator)
+  run_extract_experiments(resource_dir, tmp_dir, aggregator, csv_aggregator)
 
   ##############################################################################
 
   aggregator.save()
+  csv_aggregator.save()
 
   #if shutil.which("perf") is not None:
   #  # Generate flamegraphs
diff --git a/infra/transform.py b/infra/transform.py
index 2fe95fbfd..f6a334003 100644
--- a/infra/transform.py
+++ b/infra/transform.py
@@ -1,4 +1,5 @@
 import json
+import pandas
 import os
 from pathlib import Path
 
@@ -111,3 +112,18 @@ def add_file(self, input_file, benchmark_name):
     def save(self):
         os.makedirs(self.output_dir, exist_ok=True)
         save_json(self.data_path, self.aggregated)
+
+class CSVAggregator:
+    def __init__(self, output_dir):
+        self.output_dir = Path(output_dir)
+        self.data_path = self.output_dir / "data.csv"
+        self.records = []
+
+    def add_file(self, input_file):
+        df = pandas.read_csv(input_file)
+        self.records.append(df)
+
+    def save(self):
+        os.makedirs(self.output_dir, exist_ok=True)
+        combined = pandas.concat(self.records)
+        combined.to_csv(self.data_path, index=False)
\ No newline at end of file
diff --git a/src/lib.rs b/src/lib.rs
index baf03eb21..64273b913 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2442,13 +2442,33 @@ mod tests {
 static START: &'static str = "start";
 static END: &'static str = "end";
 
-#[derive(Serialize, Clone)]
+#[derive(Serialize, Clone, Eq)]
 pub struct EgraphEvent {
     sexp_idx: i32,
     evt: &'static str,
     time_micros: u128,
 }
 
+impl Ord for EgraphEvent {
+    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+        self.time_micros.cmp(&other.time_micros)
+    }
+}
+
+impl PartialOrd for EgraphEvent {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl PartialEq for EgraphEvent {
+    fn eq(&self, other: &Self) -> bool {
+        self.sexp_idx == other.sexp_idx &&
+        self.evt == other.evt &&
+        self.time_micros == other.time_micros
+    }
+}
+
 #[derive(Serialize, Clone)]
 pub struct ProgramTimeline {
     program_text: String,
@@ -2500,6 +2520,10 @@ impl TimedEgraph {
         }
     }
 
+    pub fn get_total_time(&self, id : usize) -> u128 {
+        self.timeline[id].evts.iter().max().unwrap().time_micros - self.timeline[id].evts.iter().min().unwrap().time_micros
+    }
+
     pub fn egraphs(&self) -> Vec<&EGraph> {
         self.egraphs.iter().map(|x| x).collect()
     }
diff --git a/src/poach.rs b/src/poach.rs
index d1b7d45d3..33479e2b6 100644
--- a/src/poach.rs
+++ b/src/poach.rs
@@ -481,6 +481,8 @@ fn poach(
                     .to_value()
                     .context("Failed to encode egraph as Flatbuffer")?;
 
+                let serialized_size = value.len();
+
                 timed_egraph
                     .from_value(value)
                     .context("Failed to decode egraph from Flatbuffer")?;
@@ -494,6 +496,30 @@ fn poach(
 
                 timed_egraph.write_timeline(&out_dir.join(format!("{name}-timeline.json")))?;
 
+
+                #[derive(Serialize)]
+                struct CSVRecord{
+                    benchname: String,
+                    egraph_size: usize,
+                    serialized_size: usize,
+                    ser_time: u128,
+                    der_time: u128,
+                    ext_time: u128,
+                    run_time: u128,
+                }
+
+                let r = CSVRecord {
+                    benchname: name.to_string(),
+                    egraph_size: timed_egraph.egraphs().last().unwrap().num_tuples(),
+                    serialized_size: serialized_size,
+                    ser_time: timed_egraph.get_total_time(1),
+                    der_time: timed_egraph.get_total_time(2),
+                    ext_time: timed_egraph.get_total_time(3),
+                    run_time: timed_egraph.get_total_time(0)
+                };
+
+                csv::Writer::from_path(&out_dir.join(format!("{name}.csv")))?.serialize(r)?;
+                
                 Ok(())
             },
         ),

From a5758297f1720883219230ca09db6f87900f0690 Mon Sep 17 00:00:00 2001
From: Haobin Ni <haobin.ni@gmail.com>
Date: Thu, 5 Mar 2026 16:45:16 -0800
Subject: [PATCH 14/21] fmt

---
 src/lib.rs   | 11 ++++++-----
 src/poach.rs |  7 +++----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index 64273b913..725506bfb 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2463,9 +2463,9 @@ impl PartialOrd for EgraphEvent {
 
 impl PartialEq for EgraphEvent {
     fn eq(&self, other: &Self) -> bool {
-        self.sexp_idx == other.sexp_idx &&
-        self.evt == other.evt &&
-        self.time_micros == other.time_micros
+        self.sexp_idx == other.sexp_idx
+            && self.evt == other.evt
+            && self.time_micros == other.time_micros
     }
 }
 
@@ -2520,8 +2520,9 @@ impl TimedEgraph {
         }
     }
 
-    pub fn get_total_time(&self, id : usize) -> u128 {
-        self.timeline[id].evts.iter().max().unwrap().time_micros - self.timeline[id].evts.iter().min().unwrap().time_micros
+    pub fn get_total_time(&self, id: usize) -> u128 {
+        self.timeline[id].evts.iter().max().unwrap().time_micros
+            - self.timeline[id].evts.iter().min().unwrap().time_micros
     }
 
     pub fn egraphs(&self) -> Vec<&EGraph> {
diff --git a/src/poach.rs b/src/poach.rs
index 33479e2b6..05a28b653 100644
--- a/src/poach.rs
+++ b/src/poach.rs
@@ -496,9 +496,8 @@ fn poach(
 
                 timed_egraph.write_timeline(&out_dir.join(format!("{name}-timeline.json")))?;
 
-
                 #[derive(Serialize)]
-                struct CSVRecord{
+                struct CSVRecord {
                     benchname: String,
                     egraph_size: usize,
                     serialized_size: usize,
@@ -515,11 +514,11 @@ fn poach(
                     ser_time: timed_egraph.get_total_time(1),
                     der_time: timed_egraph.get_total_time(2),
                     ext_time: timed_egraph.get_total_time(3),
-                    run_time: timed_egraph.get_total_time(0)
+                    run_time: timed_egraph.get_total_time(0),
                 };
 
                 csv::Writer::from_path(&out_dir.join(format!("{name}.csv")))?.serialize(r)?;
-                
+
                 Ok(())
             },
         ),

From 96ea2262c4589d77e0b4c8517c763309719d1fa9 Mon Sep 17 00:00:00 2001
From: Haobin Ni <haobin.ni@gmail.com>
Date: Fri, 6 Mar 2026 15:41:13 -0800
Subject: [PATCH 15/21] Hacks

---
 core-relations/src/free_join/mod.rs  |  6 ++-
 core-relations/src/lib.rs            |  6 ++-
 core-relations/src/row_buffer/mod.rs | 71 ++++++++++++++++++++++++++++
 core-relations/src/table/mod.rs      | 42 ++++++++++++++--
 core-relations/src/table_spec.rs     |  3 +-
 core-relations/src/uf/mod.rs         |  2 +
 egglog-bridge/src/lib.rs             |  3 +-
 numeric-id/src/lib.rs                |  6 ++-
 src/lib.rs                           |  2 +-
 src/poach.rs                         |  2 +-
 src/serialize_size.rs                | 58 +++++++++++++++++++----
 11 files changed, 178 insertions(+), 23 deletions(-)

diff --git a/core-relations/src/free_join/mod.rs b/core-relations/src/free_join/mod.rs
index c97378fa0..9bccd0d1f 100644
--- a/core-relations/src/free_join/mod.rs
+++ b/core-relations/src/free_join/mod.rs
@@ -118,7 +118,8 @@ pub(crate) type HashColumnIndex = Arc<ResettableOnceLock<Index<ColumnIndex>>>;
 pub struct TableInfo {
     pub(crate) name: Option<Arc<str>>,
     pub(crate) spec: TableSpec,
-    pub(crate) table: WrappedTable,
+    // TODO: evil hack for looking at serialization size
+    pub table: WrappedTable,
     #[serde(skip)]
     pub(crate) indexes: IndexCatalog<SmallVec<[ColumnId; 4]>, HashIndex>,
     #[serde(skip)]
@@ -276,7 +277,8 @@ impl Counters {
 pub struct Database {
     // NB: some fields are pub(crate) to allow some internal modules to avoid
     // borrowing the whole table.
-    pub(crate) tables: DenseIdMap<TableId, TableInfo>,
+    // TODO: evil hack for looking at serialization size
+    pub tables: DenseIdMap<TableId, TableInfo>,
     // TODO: having a single AtomicUsize per counter can lead to contention. We
     // should look into prefetching counters when creating a new ExecutionState
     // and incrementing locally. Note that the batch size shouldn't be too big
diff --git a/core-relations/src/lib.rs b/core-relations/src/lib.rs
index 7d0e66140..66fe1248c 100644
--- a/core-relations/src/lib.rs
+++ b/core-relations/src/lib.rs
@@ -7,7 +7,8 @@ pub(crate) mod base_values;
 pub(crate) mod common;
 pub(crate) mod containers;
 pub(crate) mod dependency_graph;
-pub(crate) mod free_join;
+// TODO: evil hack for looking at serialization size
+pub mod free_join;
 pub(crate) mod hash_index;
 pub(crate) mod offsets;
 pub(crate) mod parallel_heuristics;
@@ -16,7 +17,8 @@ pub(crate) mod query;
 pub(crate) mod row_buffer;
 pub(crate) mod table;
 
-pub(crate) mod table_spec;
+// TODO: evil hack for looking at serialization size
+pub mod table_spec;
 pub(crate) mod uf;
 
 #[cfg(test)]
diff --git a/core-relations/src/row_buffer/mod.rs b/core-relations/src/row_buffer/mod.rs
index a4426940c..e24af95f8 100644
--- a/core-relations/src/row_buffer/mod.rs
+++ b/core-relations/src/row_buffer/mod.rs
@@ -35,6 +35,7 @@ impl<'de> Deserialize<'de> for RowBuffer {
     where
         D: Deserializer<'de>,
     {
+        /*
         #[derive(Deserialize)]
         struct Partial {
             n_columns: usize,
@@ -49,19 +50,89 @@ impl<'de> Deserialize<'de> for RowBuffer {
             total_rows: helper.total_rows,
             data: Pooled::new(helper.data),
         })
+        */
+
+        let bytes = <Vec<u8>>::deserialize(deserializer).expect("Failed to parse RowBuffer");
+        let mut it = bytes.iter();
+        let n_columns = deserialize_compressed(&mut it);
+        let total_rows = deserialize_compressed(&mut it);
+        let mut data = <Vec<Cell<Value>>>::new();
+        for i in 0..n_columns * total_rows {
+            data.push(Cell::new(Value::new(deserialize_compressed(&mut it))));
+        }
+        Ok(RowBuffer {
+            n_columns: n_columns.try_into().unwrap(),
+            total_rows: total_rows.try_into().unwrap(),
+            data: Pooled::new(data),
+        })
     }
 }
 
+#[allow(dead_code)]
+fn get_n_compressed_bytes(x: u32) -> usize {
+    if x < (1u32 << 7) {
+        1
+    } else if x < (1u32 << 14) {
+        2
+    } else if x < (1u32 << 21) {
+        3
+    } else if x < (1u32 << 28) {
+        4
+    } else {
+        5
+    }
+}
+
+fn compressed_serialize(buf: &mut Vec<u8>, x: u32) {
+    let mut rem = x;
+    while (rem >= (1u32 << 7)) {
+        buf.push((rem & ((1u32 << 7) - 1)).try_into().unwrap());
+        rem = rem >> 7;
+    }
+    buf.push((rem | (1u32 << 7)).try_into().unwrap());
+}
+
+fn deserialize_compressed<'a, T: Iterator<Item = &'a u8>>(it: &mut T) -> u32 {
+    let mut ret = 0u32;
+    let mut delta = 0u32;
+    let mut val: u32 = <u8>::into(*it.next().unwrap());
+    while (val < (1u32 << 7)) {
+        ret = ret | (val << delta);
+        delta += 7;
+        val = <u8>::into(*it.next().unwrap());
+    }
+    let last = (val ^ (1u32 << 7)) << delta;
+    ret | last
+}
+
 impl Serialize for RowBuffer {
     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
     where
         S: serde::Serializer,
     {
+        /*
         let mut state = serializer.serialize_struct("RowBuffer", 3)?;
         state.serialize_field("n_columns", &self.n_columns)?;
         state.serialize_field("total_rows", &self.total_rows)?;
         state.serialize_field("data", &*self.data)?;
         state.end()
+        */
+        //let len = mem::size_of::<usize>() * 2 + self.n_columns * self.total_rows * mem::size_of::<u32>();
+        /*
+        let mut len = get_n_compressed_bytes(self.n_columns.try_into().unwrap()) + get_n_compressed_bytes(self.total_rows.try_into().unwrap());
+        for r in self.data.iter() {
+            len = len + get_n_compressed_bytes(r.get().rep);
+        }
+        let mut buf = vec![0u8; len];
+        //TODO: put data in
+        */
+        let mut buf = Vec::new();
+        compressed_serialize(&mut buf, self.n_columns.try_into().unwrap());
+        compressed_serialize(&mut buf, self.total_rows.try_into().unwrap());
+        for r in self.data.iter() {
+            compressed_serialize(&mut buf, r.get().rep);
+        }
+        serializer.serialize_bytes(&buf)
     }
 }
 
diff --git a/core-relations/src/table/mod.rs b/core-relations/src/table/mod.rs
index 4628a25b1..01ef79d8e 100644
--- a/core-relations/src/table/mod.rs
+++ b/core-relations/src/table/mod.rs
@@ -20,7 +20,10 @@ use crossbeam_queue::SegQueue;
 use hashbrown::HashTable;
 use rayon::iter::{IndexedParallelIterator, IntoParallelRefMutIterator, ParallelIterator};
 use rustc_hash::FxHasher;
-use serde::{ser::SerializeStruct, Deserialize, Deserializer, Serialize, Serializer};
+use serde::{
+    ser::{SerializeStruct, SerializeTuple},
+    Deserialize, Deserializer, Serialize, Serializer,
+};
 use sharded_hash_table::ShardedHashTable;
 
 use crate::{
@@ -51,12 +54,41 @@ mod tests;
 type HashCode = u64;
 
 /// A pointer to a row in the table.
-#[derive(Clone, Debug, Serialize, Deserialize)]
+#[derive(Clone, Debug)]
 pub(crate) struct TableEntry {
     hashcode: HashCode,
     row: RowId,
 }
 
+impl Serialize for TableEntry {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        let mut bytes = [0u8; 12];
+        let b1 = self.hashcode.to_be_bytes();
+        bytes[..b1.len()].copy_from_slice(&b1);
+        let b2 = self.row.rep.to_be_bytes();
+        bytes[b1.len()..].copy_from_slice(&b2);
+        serializer.serialize_bytes(&bytes)
+    }
+}
+
+impl<'de> Deserialize<'de> for TableEntry {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        let bytes = <[u8; 16]>::deserialize(deserializer).expect("Failed to parse TabelEntry");
+        Ok(TableEntry {
+            hashcode: u64::from_be_bytes(bytes[0..8].try_into().unwrap()),
+            row: RowId {
+                rep: u32::from_be_bytes(bytes[8..12].try_into().unwrap()),
+            },
+        })
+    }
+}
+
 impl TableEntry {
     fn hashcode(&self) -> u64 {
         // We keep the cast here to make it easy to switch to HashCode=u32.
@@ -242,8 +274,8 @@ impl Serialize for SortedWritesTable {
 
         let mut state = serializer.serialize_struct("SortedWritesTable", 11)?;
         state.serialize_field("generation", &self.generation)?;
-        state.serialize_field("shard_data", &self.hash.shard_data())?;
-        state.serialize_field("shards", &serialized_shards)?;
+        //state.serialize_field("shard_data", &self.hash.shard_data())?;
+        //state.serialize_field("shards", &serialized_shards)?;
         state.serialize_field("data", &self.data)?;
         state.serialize_field("n_keys", &self.n_keys)?;
         state.serialize_field("n_columns", &self.n_columns)?;
@@ -251,7 +283,7 @@ impl Serialize for SortedWritesTable {
         state.serialize_field("offsets", &self.offsets)?;
         state.serialize_field("pending_state", &self.pending_state)?;
         state.serialize_field("to_rebuild", &self.to_rebuild)?;
-        state.serialize_field("rebuild_index", &self.rebuild_index)?;
+        //state.serialize_field("rebuild_index", &self.rebuild_index)?;
         state.serialize_field("subset_tracker", &self.subset_tracker)?;
 
         state.end()
diff --git a/core-relations/src/table_spec.rs b/core-relations/src/table_spec.rs
index dc50ce360..5ec0fb8cc 100644
--- a/core-relations/src/table_spec.rs
+++ b/core-relations/src/table_spec.rs
@@ -522,7 +522,8 @@ impl<T: Table> TableWrapper for WrapperImpl<T> {
 /// The implementations here downcast manually to the type used when
 /// constructing the WrappedTable.
 pub struct WrappedTable {
-    inner: Box<dyn Table>,
+    // TODO: evil hack
+    pub inner: Box<dyn Table>,
     wrapper: Box<dyn TableWrapper>,
 }
 
diff --git a/core-relations/src/uf/mod.rs b/core-relations/src/uf/mod.rs
index 5688ddb9e..531706fc6 100644
--- a/core-relations/src/uf/mod.rs
+++ b/core-relations/src/uf/mod.rs
@@ -63,8 +63,10 @@ pub struct DisplacedTable {
     // k columns, k-1 are args, kth is the ID
     // enode is the row index
     // on deserialize: need to recompute this from `displaced`
+    #[serde(skip)]
     displaced: Vec<(Value, Value)>, // this is "the table" everything else can be recomputed from this
     // can even recanonicalize on serialization to get rid of dead things
+    #[serde(skip)]
     changed: bool,
     #[serde(skip)]
     lookup_table: HashMap<Value, RowId>,
diff --git a/egglog-bridge/src/lib.rs b/egglog-bridge/src/lib.rs
index 7232d5def..702fe4cb4 100644
--- a/egglog-bridge/src/lib.rs
+++ b/egglog-bridge/src/lib.rs
@@ -68,7 +68,8 @@ impl Timestamp {
 /// The state associated with an egglog program.
 #[derive(Clone, Serialize, Deserialize)]
 pub struct EGraph {
-    db: Database,
+    // TODO: evil hack for looking at serialization size
+    pub db: Database,
     uf_table: TableId,
     id_counter: CounterId,
     reason_counter: CounterId,
diff --git a/numeric-id/src/lib.rs b/numeric-id/src/lib.rs
index 9825268f2..df7e14cbc 100644
--- a/numeric-id/src/lib.rs
+++ b/numeric-id/src/lib.rs
@@ -47,7 +47,8 @@ impl NumericId for usize {
 /// with no hashing. For sparse mappings, use a HashMap.
 #[derive(Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
 pub struct DenseIdMap<K, V> {
-    data: Vec<Option<V>>,
+    // TODO: evil hack for looking at serialization size
+    pub data: Vec<Option<V>>,
     _marker: PhantomData<K>,
 }
 
@@ -438,7 +439,8 @@ macro_rules! define_id {
         #[derive(Copy, Clone, Default)]
         #[doc = $doc]
         $v struct $name {
-            rep: $repr,
+            // TODO: evil hack
+            pub rep: $repr,
         }
 
         impl serde::Serialize for $name {
diff --git a/src/lib.rs b/src/lib.rs
index 725506bfb..dfd6c6340 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -30,7 +30,7 @@ mod typechecking;
 pub mod util;
 pub use command_macro::{CommandMacro, CommandMacroRegistry};
 
-mod serialize_size;
+pub mod serialize_size;
 
 // This is used to allow the `add_primitive` macro to work in
 // both this crate and other crates by referring to `::egglog`.
diff --git a/src/poach.rs b/src/poach.rs
index 05a28b653..53d350d90 100644
--- a/src/poach.rs
+++ b/src/poach.rs
@@ -724,7 +724,7 @@ fn poach(
             initial_egraph.as_deref(),
             |egg_file, _, timed_egraph| {
                 timed_egraph.run_from_file(egg_file)?;
-                timed_egraph.print_size_report(0)
+                timed_egraph.print_size_report(100)
             },
         ),
     }
diff --git a/src/serialize_size.rs b/src/serialize_size.rs
index 8683e27ca..43ed45a49 100644
--- a/src/serialize_size.rs
+++ b/src/serialize_size.rs
@@ -74,7 +74,9 @@ impl SizeReport {
                 pretty_print_nbytes(sr.size),
                 percentage
             );
-            sr.pretty_print(level + 1, max_level);
+            if percentage > 1.0 {
+                sr.pretty_print(level + 1, max_level);
+            }
         }
         if sorted_fields.len() > 10 {
             println!("  {:level$} ... {:} fields total", "", sorted_fields.len());
@@ -98,8 +100,6 @@ pub trait GenerateSizeReport: serde::Serialize + Sized {
     }
 }
 
-impl GenerateSizeReport for egglog_bridge::EGraph {}
-
 impl<T: serde::Serialize> GenerateSizeReport for Option<T> {}
 
 impl<K: serde::Serialize + ToString, V: serde::Serialize + GenerateSizeReport> GenerateSizeReport
@@ -121,9 +121,22 @@ impl GenerateSizeReport for TypeInfo {}
 
 impl GenerateSizeReport for RunReport {}
 
-impl<K: serde::Serialize, V: serde::Serialize> GenerateSizeReport
+impl<K: serde::Serialize, V: serde::Serialize + GenerateSizeReport> GenerateSizeReport
     for egglog_numeric_id::DenseIdMap<K, V>
 {
+    fn get_sizerp(&self) -> SizeReport {
+        let mut ret = get_sizerp_default(self);
+        for e in self.data.iter() {
+            match e {
+                Some(v) => {
+                    let rep = v.get_sizerp();
+                    ret.fields.push((rep.name.clone(), Box::new(rep)));
+                }
+                _ => {}
+            }
+        }
+        ret
+    }
 }
 
 impl GenerateSizeReport for CommandMacroRegistry {}
@@ -231,10 +244,10 @@ impl GenerateSizeReport for EGraph {
             "overall_run_report".to_string(),
             Box::new(self.overall_run_report.get_sizerp()),
         ));
-        ret.fields.push((
-            "schedulers".to_string(),
-            Box::new(self.schedulers.get_sizerp()),
-        ));
+        //ret.fields.push((
+        //    "schedulers".to_string(),
+        //    Box::new(self.schedulers.get_sizerp()),
+        //));
         //ret.fields.push(("commands".to_string(), Box::new(self.commands.get_sizerp())));
         //ret.fields.push(("command_macros".to_string(), Box::new(self.command_macros.get_sizerp())));
         ret.fields.push((
@@ -244,3 +257,32 @@ impl GenerateSizeReport for EGraph {
         ret
     }
 }
+
+impl GenerateSizeReport for egglog_bridge::EGraph {
+    fn get_sizerp(&self) -> SizeReport {
+        let mut ret = get_sizerp_default(&self);
+        ret.fields
+            .push(("db".to_string(), Box::new(self.db.get_sizerp())));
+        ret
+    }
+}
+
+impl GenerateSizeReport for egglog_core_relations::Database {
+    fn get_sizerp(&self) -> SizeReport {
+        let mut ret = get_sizerp_default(&self);
+        ret.fields
+            .push(("tables".to_string(), Box::new(self.tables.get_sizerp())));
+        ret
+    }
+}
+
+impl GenerateSizeReport for egglog_core_relations::table_spec::WrappedTable {}
+
+impl GenerateSizeReport for egglog_core_relations::free_join::TableInfo {
+    fn get_sizerp(&self) -> SizeReport {
+        let mut ret = get_sizerp_default(&self);
+        ret.fields
+            .push(("table".to_string(), Box::new(self.table.get_sizerp())));
+        ret
+    }
+}

From 53cb8f8409f17565db620fa8c11d67198dc50a31 Mon Sep 17 00:00:00 2001
From: Haobin Ni <haobin.ni@gmail.com>
Date: Fri, 6 Mar 2026 15:41:55 -0800
Subject: [PATCH 16/21] fmt

---
 src/lib.rs            | 32 +++++++++++++++++++-------------
 src/poach.rs          | 22 +++++++++++-----------
 src/serialize_size.rs | 11 ++++-------
 src/typechecking.rs   |  2 +-
 4 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/src/lib.rs b/src/lib.rs
index dfd6c6340..2f7c0ed41 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -259,31 +259,31 @@ impl Serialize for SerializableSort {
             s.serialize_field("type", "FunctionSort")?;
             s.serialize_field("data", sort)?;
             s.end()
-        } else if let Some(_) = sort.as_any().downcast_ref::<BaseSortImpl<BigIntSort>>() {
+        } else if sort.as_any().downcast_ref::<BaseSortImpl<BigIntSort>>().is_some() {
             s.serialize_field("type", "BaseSort")?;
             s.serialize_field("data", "BigIntSort")?;
             s.end()
-        } else if let Some(_) = sort.as_any().downcast_ref::<BaseSortImpl<BigRatSort>>() {
+        } else if sort.as_any().downcast_ref::<BaseSortImpl<BigRatSort>>().is_some() {
             s.serialize_field("type", "BaseSort")?;
             s.serialize_field("data", "BigRatSort")?;
             s.end()
-        } else if let Some(_) = sort.as_any().downcast_ref::<BaseSortImpl<BoolSort>>() {
+        } else if sort.as_any().downcast_ref::<BaseSortImpl<BoolSort>>().is_some() {
             s.serialize_field("type", "BaseSort")?;
             s.serialize_field("data", "BoolSort")?;
             s.end()
-        } else if let Some(_) = sort.as_any().downcast_ref::<BaseSortImpl<F64Sort>>() {
+        } else if sort.as_any().downcast_ref::<BaseSortImpl<F64Sort>>().is_some() {
             s.serialize_field("type", "BaseSort")?;
             s.serialize_field("data", "F64Sort")?;
             s.end()
-        } else if let Some(_) = sort.as_any().downcast_ref::<BaseSortImpl<I64Sort>>() {
+        } else if sort.as_any().downcast_ref::<BaseSortImpl<I64Sort>>().is_some() {
             s.serialize_field("type", "BaseSort")?;
             s.serialize_field("data", "I64Sort")?;
             s.end()
-        } else if let Some(_) = sort.as_any().downcast_ref::<BaseSortImpl<StringSort>>() {
+        } else if sort.as_any().downcast_ref::<BaseSortImpl<StringSort>>().is_some() {
             s.serialize_field("type", "BaseSort")?;
             s.serialize_field("data", "StringSort")?;
             s.end()
-        } else if let Some(_) = sort.as_any().downcast_ref::<BaseSortImpl<UnitSort>>() {
+        } else if sort.as_any().downcast_ref::<BaseSortImpl<UnitSort>>().is_some() {
             s.serialize_field("type", "BaseSort")?;
             s.serialize_field("data", "UnitSort")?;
             s.end()
@@ -1494,7 +1494,7 @@ impl EGraph {
                                 expr.output_type(),
                             )
                             .iter()
-                            .map(|e| e.1.clone())
+                            .map(|e| e.1)
                             .collect();
                         if log_enabled!(Level::Info) {
                             let expr_str = expr.to_string();
@@ -2439,8 +2439,8 @@ mod tests {
 
 /***** TESTING AREA FOR TIMED EGRAPH *****/
 
-static START: &'static str = "start";
-static END: &'static str = "end";
+static START: &str = "start";
+static END: &str = "end";
 
 #[derive(Serialize, Clone, Eq)]
 pub struct EgraphEvent {
@@ -2491,6 +2491,12 @@ pub struct TimedEgraph {
     timer: std::time::Instant,
 }
 
+impl Default for TimedEgraph {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl TimedEgraph {
     /// Create a new TimedEgraph with a default EGraph
     pub fn new() -> Self {
@@ -2526,14 +2532,14 @@ impl TimedEgraph {
     }
 
     pub fn egraphs(&self) -> Vec<&EGraph> {
-        self.egraphs.iter().map(|x| x).collect()
+        self.egraphs.iter().collect()
     }
 
     pub fn write_timeline(&self, path: &Path) -> Result<(), serde_json::Error> {
         if let Some(parent) = path.parent() {
             fs::create_dir_all(parent).expect("Failed to create out dir");
         }
-        let file = File::create(&path).expect("Failed to create timeline.json");
+        let file = File::create(path).expect("Failed to create timeline.json");
         serde_json::to_writer_pretty(BufWriter::new(file), &self.timeline)
     }
 
@@ -2593,7 +2599,7 @@ impl TimedEgraph {
                 time_micros: self.timer.elapsed().as_micros(),
             });
 
-            i = i + 1;
+            i += 1;
         }
 
         self.timeline.push(program_timeline);
diff --git a/src/poach.rs b/src/poach.rs
index 53d350d90..73617b6f1 100644
--- a/src/poach.rs
+++ b/src/poach.rs
@@ -202,7 +202,7 @@ where
             }
         }
     }
-    if failures.len() == 0 {
+    if failures.is_empty() {
         println!("0 failures out of {} files", files.len());
     } else {
         println!("{} failures out of {} files", failures.len(), files.len());
@@ -325,9 +325,9 @@ fn poach(
                     .from_file(&s1)
                     .context("failed to read s1.fbs")?;
 
-                check_egraph_number(&timed_egraph, 2)?;
+                check_egraph_number(timed_egraph, 2)?;
 
-                check_egraph_size(&timed_egraph)?;
+                check_egraph_size(timed_egraph)?;
 
                 timed_egraph.write_timeline(&out_dir.join(format!("{name}-timeline.json")))?;
                 Ok(())
@@ -369,8 +369,8 @@ fn poach(
                     .from_file(&s3)
                     .context("failed to read s3.fbs")?;
 
-                check_egraph_number(&timed_egraph, 4)?;
-                check_egraph_size(&timed_egraph)?;
+                check_egraph_number(timed_egraph, 4)?;
+                check_egraph_size(timed_egraph)?;
                 //check_idempotent(&s2, &s3, name, out_dir);
 
                 timed_egraph.write_timeline(&out_dir.join(format!("{name}-timeline.json")))?;
@@ -415,9 +415,9 @@ fn poach(
                     .from_value(value)
                     .context("failed to decode egraph from flatbuffer")?;
 
-                check_egraph_number(&timed_egraph, 2)?;
+                check_egraph_number(timed_egraph, 2)?;
 
-                check_egraph_size(&timed_egraph)?;
+                check_egraph_size(timed_egraph)?;
 
                 timed_egraph.write_timeline(&out_dir.join(format!("{name}-timeline.json")))?;
 
@@ -487,7 +487,7 @@ fn poach(
                     .from_value(value)
                     .context("Failed to decode egraph from Flatbuffer")?;
 
-                check_egraph_number(&timed_egraph, 2)?;
+                check_egraph_number(timed_egraph, 2)?;
 
                 let final_extracts =
                     timed_egraph.run_program_with_timeline(extract_cmds, &extracts)?;
@@ -510,14 +510,14 @@ fn poach(
                 let r = CSVRecord {
                     benchname: name.to_string(),
                     egraph_size: timed_egraph.egraphs().last().unwrap().num_tuples(),
-                    serialized_size: serialized_size,
+                    serialized_size,
                     ser_time: timed_egraph.get_total_time(1),
                     der_time: timed_egraph.get_total_time(2),
                     ext_time: timed_egraph.get_total_time(3),
                     run_time: timed_egraph.get_total_time(0),
                 };
 
-                csv::Writer::from_path(&out_dir.join(format!("{name}.csv")))?.serialize(r)?;
+                csv::Writer::from_path(out_dir.join(format!("{name}.csv")))?.serialize(r)?;
 
                 Ok(())
             },
@@ -632,7 +632,7 @@ fn poach(
 
                     let all_cmds = EGraph::default()
                         .parser
-                        .get_program_from_string(None, &program_string)?;
+                        .get_program_from_string(None, program_string)?;
 
                     assert!(all_cmds.len() == all_sexps.len());
 
diff --git a/src/serialize_size.rs b/src/serialize_size.rs
index 43ed45a49..666b21dbf 100644
--- a/src/serialize_size.rs
+++ b/src/serialize_size.rs
@@ -29,7 +29,7 @@ fn up_to_two_decimals(a: usize, b: usize) -> String {
     } else {
         low.to_string()
     };
-    return high.to_string() + "." + &low_str;
+    high.to_string() + "." + &low_str
 }
 
 fn pretty_print_nbytes(size: usize) -> String {
@@ -127,12 +127,9 @@ impl<K: serde::Serialize, V: serde::Serialize + GenerateSizeReport> GenerateSize
     fn get_sizerp(&self) -> SizeReport {
         let mut ret = get_sizerp_default(self);
         for e in self.data.iter() {
-            match e {
-                Some(v) => {
-                    let rep = v.get_sizerp();
-                    ret.fields.push((rep.name.clone(), Box::new(rep)));
-                }
-                _ => {}
+            if let Some(v) = e {
+                let rep = v.get_sizerp();
+                ret.fields.push((rep.name.clone(), Box::new(rep)));
             }
         }
         ret
diff --git a/src/typechecking.rs b/src/typechecking.rs
index 2f39e97a0..0ae574174 100644
--- a/src/typechecking.rs
+++ b/src/typechecking.rs
@@ -318,7 +318,7 @@ impl EGraph {
             }
             NCommand::MultiExtract(span, variants, exprs) => {
                 let res_exprs = exprs
-                    .into_iter()
+                    .iter()
                     .map(|expr| {
                         self.type_info
                             .typecheck_expr(symbol_gen, expr, &Default::default())

From 78f79fbd2a6c0fc7ac01bb1defb7562b743a0399 Mon Sep 17 00:00:00 2001
From: Haobin Ni <haobin.ni@gmail.com>
Date: Fri, 6 Mar 2026 16:02:43 -0800
Subject: [PATCH 17/21] More more evil hacks

---
 core-relations/src/row_buffer/mod.rs | 46 +++++++++++++++++++---------
 core-relations/src/table/mod.rs      | 21 ++++++-------
 egglog-bridge/src/lib.rs             | 25 +++++++++++++++
 src/lib.rs                           | 44 +++++++++++++++++++++-----
 4 files changed, 102 insertions(+), 34 deletions(-)

diff --git a/core-relations/src/row_buffer/mod.rs b/core-relations/src/row_buffer/mod.rs
index e24af95f8..df4d88045 100644
--- a/core-relations/src/row_buffer/mod.rs
+++ b/core-relations/src/row_buffer/mod.rs
@@ -6,7 +6,7 @@ use std::{cell::Cell, mem, ops::Deref};
 use crate::numeric_id::NumericId;
 use egglog_concurrency::ParallelVecWriter;
 use rayon::iter::ParallelIterator;
-use serde::{ser::SerializeStruct, Deserialize, Deserializer, Serialize};
+use serde::{Deserialize, Deserializer, Serialize};
 use smallvec::SmallVec;
 
 use crate::{
@@ -52,19 +52,35 @@ impl<'de> Deserialize<'de> for RowBuffer {
         })
         */
 
-        let bytes = <Vec<u8>>::deserialize(deserializer).expect("Failed to parse RowBuffer");
-        let mut it = bytes.iter();
-        let n_columns = deserialize_compressed(&mut it);
-        let total_rows = deserialize_compressed(&mut it);
-        let mut data = <Vec<Cell<Value>>>::new();
-        for i in 0..n_columns * total_rows {
-            data.push(Cell::new(Value::new(deserialize_compressed(&mut it))));
+        struct RowBufferVisitor;
+
+        impl<'de> serde::de::Visitor<'de> for RowBufferVisitor {
+            type Value = RowBuffer;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
+                formatter.write_str("Expecting a byte array")
+            }
+
+            fn visit_bytes<E>(self, bytes: &[u8]) -> Result<Self::Value, E>
+            where
+                E: serde::de::Error,
+            {
+                let mut it = bytes.iter();
+                let n_columns = deserialize_compressed(&mut it);
+                let total_rows = deserialize_compressed(&mut it);
+                let mut data = <Vec<Cell<Value>>>::new();
+                for _i in 0..n_columns * total_rows {
+                    data.push(Cell::new(Value::new(deserialize_compressed(&mut it))));
+                }
+                Ok(RowBuffer {
+                    n_columns: n_columns.try_into().unwrap(),
+                    total_rows: total_rows.try_into().unwrap(),
+                    data: Pooled::new(data),
+                })
+            }
         }
-        Ok(RowBuffer {
-            n_columns: n_columns.try_into().unwrap(),
-            total_rows: total_rows.try_into().unwrap(),
-            data: Pooled::new(data),
-        })
+
+        deserializer.deserialize_bytes(RowBufferVisitor)
     }
 }
 
@@ -85,7 +101,7 @@ fn get_n_compressed_bytes(x: u32) -> usize {
 
 fn compressed_serialize(buf: &mut Vec<u8>, x: u32) {
     let mut rem = x;
-    while (rem >= (1u32 << 7)) {
+    while rem >= (1u32 << 7) {
         buf.push((rem & ((1u32 << 7) - 1)).try_into().unwrap());
         rem = rem >> 7;
     }
@@ -96,7 +112,7 @@ fn deserialize_compressed<'a, T: Iterator<Item = &'a u8>>(it: &mut T) -> u32 {
     let mut ret = 0u32;
     let mut delta = 0u32;
     let mut val: u32 = <u8>::into(*it.next().unwrap());
-    while (val < (1u32 << 7)) {
+    while val < (1u32 << 7) {
         ret = ret | (val << delta);
         delta += 7;
         val = <u8>::into(*it.next().unwrap());
diff --git a/core-relations/src/table/mod.rs b/core-relations/src/table/mod.rs
index 01ef79d8e..08dff3e60 100644
--- a/core-relations/src/table/mod.rs
+++ b/core-relations/src/table/mod.rs
@@ -20,10 +20,7 @@ use crossbeam_queue::SegQueue;
 use hashbrown::HashTable;
 use rayon::iter::{IndexedParallelIterator, IntoParallelRefMutIterator, ParallelIterator};
 use rustc_hash::FxHasher;
-use serde::{
-    ser::{SerializeStruct, SerializeTuple},
-    Deserialize, Deserializer, Serialize, Serializer,
-};
+use serde::{ser::SerializeStruct, Deserialize, Deserializer, Serialize, Serializer};
 use sharded_hash_table::ShardedHashTable;
 
 use crate::{
@@ -203,8 +200,8 @@ impl<'de> Deserialize<'de> for SortedWritesTable {
         #[derive(Deserialize)]
         struct Partial {
             generation: Generation,
-            shard_data: ShardData,
-            shards: Vec<Vec<TableEntry>>,
+            //shard_data: ShardData,
+            //shards: Vec<Vec<TableEntry>>,
             data: Rows,
 
             n_keys: usize,
@@ -215,13 +212,13 @@ impl<'de> Deserialize<'de> for SortedWritesTable {
             pending_state: Arc<PendingState>,
 
             to_rebuild: Vec<ColumnId>,
-            rebuild_index: Index<ColumnIndex>,
-
+            //rebuild_index: Index<ColumnIndex>,
             subset_tracker: SubsetTracker,
         }
 
         let partial = Partial::deserialize(deserializer)?;
 
+        /*
         let shards: Vec<HashTable<TableEntry>> = partial
             .shards
             .iter()
@@ -238,11 +235,12 @@ impl<'de> Deserialize<'de> for SortedWritesTable {
             shard_data: partial.shard_data,
             shards,
         };
+        */
 
         Ok(SortedWritesTable {
             generation: partial.generation,
             data: partial.data,
-            hash,
+            hash: ShardedHashTable::default(),
             n_keys: partial.n_keys,
             n_columns: partial.n_columns,
             sort_by: partial.sort_by,
@@ -250,7 +248,7 @@ impl<'de> Deserialize<'de> for SortedWritesTable {
             pending_state: partial.pending_state,
             merge: Arc::new(|_, _, _, _| true),
             to_rebuild: partial.to_rebuild,
-            rebuild_index: partial.rebuild_index,
+            rebuild_index: <Index<ColumnIndex>>::default(),
             subset_tracker: partial.subset_tracker,
         })
     }
@@ -261,6 +259,7 @@ impl Serialize for SortedWritesTable {
     where
         S: Serializer,
     {
+        /*
         let serialized_shards: Vec<Vec<TableEntry>> = self
             .hash
             .shards
@@ -271,7 +270,7 @@ impl Serialize for SortedWritesTable {
                 v
             })
             .collect();
-
+        */
         let mut state = serializer.serialize_struct("SortedWritesTable", 11)?;
         state.serialize_field("generation", &self.generation)?;
         //state.serialize_field("shard_data", &self.hash.shard_data())?;
diff --git a/egglog-bridge/src/lib.rs b/egglog-bridge/src/lib.rs
index 702fe4cb4..85fcf8117 100644
--- a/egglog-bridge/src/lib.rs
+++ b/egglog-bridge/src/lib.rs
@@ -812,6 +812,31 @@ impl EGraph {
         Ok(iteration_report)
     }
 
+    /// TODO: evil hack for speeding up extraction
+    pub fn run_rules_without_rebuild(&mut self, rules: &[RuleId]) -> Result<IterationReport> {
+        let ts = self.next_ts();
+
+        let rule_set_report =
+            run_rules_impl(&mut self.db, &mut self.rules, rules, ts, self.report_level)?;
+        if let Some(message) = self.panic_message.lock().unwrap().take() {
+            return Err(PanicError(message).into());
+        }
+
+        let iteration_report = IterationReport {
+            rule_set_report,
+            rebuild_time: Duration::ZERO,
+        };
+        if !iteration_report.changed() {
+            return Ok(iteration_report);
+        }
+
+        if let Some(message) = self.panic_message.lock().unwrap().take() {
+            return Err(PanicError(message).into());
+        }
+
+        Ok(iteration_report)
+    }
+
     fn rebuild(&mut self) -> Result<()> {
         fn do_parallel() -> bool {
             #[cfg(test)]
diff --git a/src/lib.rs b/src/lib.rs
index 2f7c0ed41..a5b1a3dc4 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -259,31 +259,59 @@ impl Serialize for SerializableSort {
             s.serialize_field("type", "FunctionSort")?;
             s.serialize_field("data", sort)?;
             s.end()
-        } else if sort.as_any().downcast_ref::<BaseSortImpl<BigIntSort>>().is_some() {
+        } else if sort
+            .as_any()
+            .downcast_ref::<BaseSortImpl<BigIntSort>>()
+            .is_some()
+        {
             s.serialize_field("type", "BaseSort")?;
             s.serialize_field("data", "BigIntSort")?;
             s.end()
-        } else if sort.as_any().downcast_ref::<BaseSortImpl<BigRatSort>>().is_some() {
+        } else if sort
+            .as_any()
+            .downcast_ref::<BaseSortImpl<BigRatSort>>()
+            .is_some()
+        {
             s.serialize_field("type", "BaseSort")?;
             s.serialize_field("data", "BigRatSort")?;
             s.end()
-        } else if sort.as_any().downcast_ref::<BaseSortImpl<BoolSort>>().is_some() {
+        } else if sort
+            .as_any()
+            .downcast_ref::<BaseSortImpl<BoolSort>>()
+            .is_some()
+        {
             s.serialize_field("type", "BaseSort")?;
             s.serialize_field("data", "BoolSort")?;
             s.end()
-        } else if sort.as_any().downcast_ref::<BaseSortImpl<F64Sort>>().is_some() {
+        } else if sort
+            .as_any()
+            .downcast_ref::<BaseSortImpl<F64Sort>>()
+            .is_some()
+        {
             s.serialize_field("type", "BaseSort")?;
             s.serialize_field("data", "F64Sort")?;
             s.end()
-        } else if sort.as_any().downcast_ref::<BaseSortImpl<I64Sort>>().is_some() {
+        } else if sort
+            .as_any()
+            .downcast_ref::<BaseSortImpl<I64Sort>>()
+            .is_some()
+        {
             s.serialize_field("type", "BaseSort")?;
             s.serialize_field("data", "I64Sort")?;
             s.end()
-        } else if sort.as_any().downcast_ref::<BaseSortImpl<StringSort>>().is_some() {
+        } else if sort
+            .as_any()
+            .downcast_ref::<BaseSortImpl<StringSort>>()
+            .is_some()
+        {
             s.serialize_field("type", "BaseSort")?;
             s.serialize_field("data", "StringSort")?;
             s.end()
-        } else if sort.as_any().downcast_ref::<BaseSortImpl<UnitSort>>().is_some() {
+        } else if sort
+            .as_any()
+            .downcast_ref::<BaseSortImpl<UnitSort>>()
+            .is_some()
+        {
             s.serialize_field("type", "BaseSort")?;
             s.serialize_field("data", "UnitSort")?;
             s.end()
@@ -1275,7 +1303,7 @@ impl EGraph {
         );
 
         let id = translator.build();
-        let rule_result = self.backend.run_rules(&[id]);
+        let rule_result = self.backend.run_rules_without_rebuild(&[id]);
         self.backend.free_rule(id);
         self.backend.free_external_func(ext_id);
         let _ = rule_result.map_err(|e| {

From 41742d6bc6b2ee46e985049fc7aa856e3064774f Mon Sep 17 00:00:00 2001
From: Haobin Ni <haobin.ni@gmail.com>
Date: Fri, 6 Mar 2026 16:08:20 -0800
Subject: [PATCH 18/21] Remove Easteregg from the list of experiments

---
 infra/nightly.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/infra/nightly.py b/infra/nightly.py
index 2ce3f6ae1..5da422cc0 100644
--- a/infra/nightly.py
+++ b/infra/nightly.py
@@ -97,7 +97,7 @@ def run_test_experiments(top_dir, tmp_dir, aggregator):
       cleanup_benchmark_files(timeline_file, tmp_dir / "summary.json", *extra_files)
 
 def run_extract_experiments(resource_dir, tmp_dir, aggregator, csv_aggregator):
-  timeline_suites = ["easteregg", "herbie-hamming", "herbie-math-rewrite", "herbie-math-taylor"]
+  timeline_suites = ["herbie-hamming", "herbie-math-rewrite", "herbie-math-taylor"]
   for suite in timeline_suites:
     for benchmark in benchmark_files(resource_dir / "test-files" / suite):
       timeline_file = tmp_dir / f"{benchmark.stem}-timeline.json"

From 35fa1d93a6f82beb6f45dbc25a524ee4d32e6d85 Mon Sep 17 00:00:00 2001
From: Haobin Ni <haobin.ni@gmail.com>
Date: Thu, 26 Mar 2026 15:06:11 -0700
Subject: [PATCH 19/21] Clean up evil hacks

---
 core-relations/src/free_join/mod.rs |  6 ++--
 core-relations/src/lib.rs           |  6 ++--
 core-relations/src/table_spec.rs    |  3 +-
 egglog-bridge/src/lib.rs            | 15 +++-------
 numeric-id/src/lib.rs               |  5 ++--
 src/serialize_size.rs               | 43 ++---------------------------
 6 files changed, 13 insertions(+), 65 deletions(-)

diff --git a/core-relations/src/free_join/mod.rs b/core-relations/src/free_join/mod.rs
index 9bccd0d1f..c97378fa0 100644
--- a/core-relations/src/free_join/mod.rs
+++ b/core-relations/src/free_join/mod.rs
@@ -118,8 +118,7 @@ pub(crate) type HashColumnIndex = Arc<ResettableOnceLock<Index<ColumnIndex>>>;
 pub struct TableInfo {
     pub(crate) name: Option<Arc<str>>,
     pub(crate) spec: TableSpec,
-    // TODO: evil hack for looking at serialization size
-    pub table: WrappedTable,
+    pub(crate) table: WrappedTable,
     #[serde(skip)]
     pub(crate) indexes: IndexCatalog<SmallVec<[ColumnId; 4]>, HashIndex>,
     #[serde(skip)]
@@ -277,8 +276,7 @@ impl Counters {
 pub struct Database {
     // NB: some fields are pub(crate) to allow some internal modules to avoid
     // borrowing the whole table.
-    // TODO: evil hack for looking at serialization size
-    pub tables: DenseIdMap<TableId, TableInfo>,
+    pub(crate) tables: DenseIdMap<TableId, TableInfo>,
     // TODO: having a single AtomicUsize per counter can lead to contention. We
     // should look into prefetching counters when creating a new ExecutionState
     // and incrementing locally. Note that the batch size shouldn't be too big
diff --git a/core-relations/src/lib.rs b/core-relations/src/lib.rs
index 66fe1248c..7d0e66140 100644
--- a/core-relations/src/lib.rs
+++ b/core-relations/src/lib.rs
@@ -7,8 +7,7 @@ pub(crate) mod base_values;
 pub(crate) mod common;
 pub(crate) mod containers;
 pub(crate) mod dependency_graph;
-// TODO: evil hack for looking at serialization size
-pub mod free_join;
+pub(crate) mod free_join;
 pub(crate) mod hash_index;
 pub(crate) mod offsets;
 pub(crate) mod parallel_heuristics;
@@ -17,8 +16,7 @@ pub(crate) mod query;
 pub(crate) mod row_buffer;
 pub(crate) mod table;
 
-// TODO: evil hack for looking at serialization size
-pub mod table_spec;
+pub(crate) mod table_spec;
 pub(crate) mod uf;
 
 #[cfg(test)]
diff --git a/core-relations/src/table_spec.rs b/core-relations/src/table_spec.rs
index 5ec0fb8cc..dc50ce360 100644
--- a/core-relations/src/table_spec.rs
+++ b/core-relations/src/table_spec.rs
@@ -522,8 +522,7 @@ impl<T: Table> TableWrapper for WrapperImpl<T> {
 /// The implementations here downcast manually to the type used when
 /// constructing the WrappedTable.
 pub struct WrappedTable {
-    // TODO: evil hack
-    pub inner: Box<dyn Table>,
+    inner: Box<dyn Table>,
     wrapper: Box<dyn TableWrapper>,
 }
 
diff --git a/egglog-bridge/src/lib.rs b/egglog-bridge/src/lib.rs
index 85fcf8117..cf20eef69 100644
--- a/egglog-bridge/src/lib.rs
+++ b/egglog-bridge/src/lib.rs
@@ -68,8 +68,7 @@ impl Timestamp {
 /// The state associated with an egglog program.
 #[derive(Clone, Serialize, Deserialize)]
 pub struct EGraph {
-    // TODO: evil hack for looking at serialization size
-    pub db: Database,
+    db: Database,
     uf_table: TableId,
     id_counter: CounterId,
     reason_counter: CounterId,
@@ -812,7 +811,9 @@ impl EGraph {
         Ok(iteration_report)
     }
 
-    /// TODO: evil hack for speeding up extraction
+    /// This hack speeds up extraction and 
+    /// avoid certain fields of the backend data structure
+    /// by skipping rebuild
     pub fn run_rules_without_rebuild(&mut self, rules: &[RuleId]) -> Result<IterationReport> {
         let ts = self.next_ts();
 
@@ -826,14 +827,6 @@ impl EGraph {
             rule_set_report,
             rebuild_time: Duration::ZERO,
         };
-        if !iteration_report.changed() {
-            return Ok(iteration_report);
-        }
-
-        if let Some(message) = self.panic_message.lock().unwrap().take() {
-            return Err(PanicError(message).into());
-        }
-
         Ok(iteration_report)
     }
 
diff --git a/numeric-id/src/lib.rs b/numeric-id/src/lib.rs
index df7e14cbc..b1202c26a 100644
--- a/numeric-id/src/lib.rs
+++ b/numeric-id/src/lib.rs
@@ -47,8 +47,7 @@ impl NumericId for usize {
 /// with no hashing. For sparse mappings, use a HashMap.
 #[derive(Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
 pub struct DenseIdMap<K, V> {
-    // TODO: evil hack for looking at serialization size
-    pub data: Vec<Option<V>>,
+    data: Vec<Option<V>>,
     _marker: PhantomData<K>,
 }
 
@@ -439,7 +438,7 @@ macro_rules! define_id {
         #[derive(Copy, Clone, Default)]
         #[doc = $doc]
         $v struct $name {
-            // TODO: evil hack
+            // visibility hack for serialization
             pub rep: $repr,
         }
 
diff --git a/src/serialize_size.rs b/src/serialize_size.rs
index 666b21dbf..e8d824bd8 100644
--- a/src/serialize_size.rs
+++ b/src/serialize_size.rs
@@ -122,19 +122,7 @@ impl GenerateSizeReport for TypeInfo {}
 impl GenerateSizeReport for RunReport {}
 
 impl<K: serde::Serialize, V: serde::Serialize + GenerateSizeReport> GenerateSizeReport
-    for egglog_numeric_id::DenseIdMap<K, V>
-{
-    fn get_sizerp(&self) -> SizeReport {
-        let mut ret = get_sizerp_default(self);
-        for e in self.data.iter() {
-            if let Some(v) = e {
-                let rep = v.get_sizerp();
-                ret.fields.push((rep.name.clone(), Box::new(rep)));
-            }
-        }
-        ret
-    }
-}
+    for egglog_numeric_id::DenseIdMap<K, V> {}
 
 impl GenerateSizeReport for CommandMacroRegistry {}
 
@@ -255,31 +243,4 @@ impl GenerateSizeReport for EGraph {
     }
 }
 
-impl GenerateSizeReport for egglog_bridge::EGraph {
-    fn get_sizerp(&self) -> SizeReport {
-        let mut ret = get_sizerp_default(&self);
-        ret.fields
-            .push(("db".to_string(), Box::new(self.db.get_sizerp())));
-        ret
-    }
-}
-
-impl GenerateSizeReport for egglog_core_relations::Database {
-    fn get_sizerp(&self) -> SizeReport {
-        let mut ret = get_sizerp_default(&self);
-        ret.fields
-            .push(("tables".to_string(), Box::new(self.tables.get_sizerp())));
-        ret
-    }
-}
-
-impl GenerateSizeReport for egglog_core_relations::table_spec::WrappedTable {}
-
-impl GenerateSizeReport for egglog_core_relations::free_join::TableInfo {
-    fn get_sizerp(&self) -> SizeReport {
-        let mut ret = get_sizerp_default(&self);
-        ret.fields
-            .push(("table".to_string(), Box::new(self.table.get_sizerp())));
-        ret
-    }
-}
+impl GenerateSizeReport for egglog_bridge::EGraph {}
\ No newline at end of file

From a939da674c289e2cd94d4acb241159e804d9a9f0 Mon Sep 17 00:00:00 2001
From: Haobin Ni <haobin.ni@gmail.com>
Date: Thu, 26 Mar 2026 15:06:45 -0700
Subject: [PATCH 20/21] fmt

---
 egglog-bridge/src/lib.rs | 2 +-
 src/serialize_size.rs    | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/egglog-bridge/src/lib.rs b/egglog-bridge/src/lib.rs
index cf20eef69..b9c13d1ca 100644
--- a/egglog-bridge/src/lib.rs
+++ b/egglog-bridge/src/lib.rs
@@ -811,7 +811,7 @@ impl EGraph {
         Ok(iteration_report)
     }
 
-    /// This hack speeds up extraction and 
+    /// This hack speeds up extraction and
     /// avoid certain fields of the backend data structure
     /// by skipping rebuild
     pub fn run_rules_without_rebuild(&mut self, rules: &[RuleId]) -> Result<IterationReport> {
diff --git a/src/serialize_size.rs b/src/serialize_size.rs
index e8d824bd8..d99b47083 100644
--- a/src/serialize_size.rs
+++ b/src/serialize_size.rs
@@ -122,7 +122,9 @@ impl GenerateSizeReport for TypeInfo {}
 impl GenerateSizeReport for RunReport {}
 
 impl<K: serde::Serialize, V: serde::Serialize + GenerateSizeReport> GenerateSizeReport
-    for egglog_numeric_id::DenseIdMap<K, V> {}
+    for egglog_numeric_id::DenseIdMap<K, V>
+{
+}
 
 impl GenerateSizeReport for CommandMacroRegistry {}
 
@@ -243,4 +245,4 @@ impl GenerateSizeReport for EGraph {
     }
 }
 
-impl GenerateSizeReport for egglog_bridge::EGraph {}
\ No newline at end of file
+impl GenerateSizeReport for egglog_bridge::EGraph {}

From 0e3ffb45fac8235469acdae2c7024c4ef76dc5f5 Mon Sep 17 00:00:00 2001
From: Haobin Ni <haobin.ni@gmail.com>
Date: Thu, 26 Mar 2026 15:15:09 -0700
Subject: [PATCH 21/21] fmt

---
 core-relations/src/table_spec.rs | 7 ++++---
 src/lib.rs                       | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/core-relations/src/table_spec.rs b/core-relations/src/table_spec.rs
index 1c9b4fab8..6bae2c7d4 100644
--- a/core-relations/src/table_spec.rs
+++ b/core-relations/src/table_spec.rs
@@ -27,8 +27,7 @@ use crate::{
     offsets::{RowId, Subset, SubsetRef},
     pool::{with_pool_set, PoolSet, Pooled},
     row_buffer::{RowBuffer, TaggedRowBuffer},
-    DisplacedTable, DisplacedTableWithProvenance,
-    QueryEntry, TableId, Variable,
+    DisplacedTable, DisplacedTableWithProvenance, QueryEntry, TableId, Variable,
 };
 
 define_id!(pub ColumnId, u32, "a particular column in a table");
@@ -553,7 +552,9 @@ impl<'de> Deserialize<'de> for WrappedTable {
         } else if inner.as_any().is::<DisplacedTableWithProvenance>() {
             wrapper::<DisplacedTableWithProvenance>()
         } else {
-            return Err(serde::de::Error::custom("unknown table type for WrappedTable"));
+            return Err(serde::de::Error::custom(
+                "unknown table type for WrappedTable",
+            ));
         };
 
         Ok(WrappedTable { inner, wrapper })
diff --git a/src/lib.rs b/src/lib.rs
index 9abfe7a11..28d4baa28 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -2611,7 +2611,7 @@ impl TimedEgraph {
         let mut egraph: EGraph = EGraph::deserialize(r).unwrap();
         egraph
             .restore_deserialized_runtime()
-            .expect("Failed to restore deserialized runtime");        
+            .expect("Failed to restore deserialized runtime");
 
         Self {
             egraphs: vec![egraph],