From 2374094c3f21782f7a10554a294cd8ac1d60f50c Mon Sep 17 00:00:00 2001 From: Haobin Ni Date: Mon, 23 Feb 2026 10:07:23 -0800 Subject: [PATCH 01/21] Use Flexbuffer --- Cargo.lock | 99 +++++++++++++++++++++++++--- Cargo.toml | 2 + core-relations/src/hash_index/mod.rs | 1 + src/lib.rs | 29 +++++--- src/poach.rs | 2 +- 5 files changed, 114 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f4a3a0ff5..405b84829 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -94,6 +94,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "bitflags" version = "2.10.0" @@ -124,6 +130,12 @@ version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "cc" version = "1.2.41" @@ -235,7 +247,7 @@ checksum = "93e373516c58af1c344bfe013b6c9831ce6a08bb59709ab3fa6fe5c9b0e904ff" dependencies = [ "divan-macros", "itertools", - "proc-macro-crate", + "proc-macro-crate 3.4.0", "proc-macro2", "quote", "syn 2.0.107", @@ -460,6 +472,7 @@ dependencies = [ "egglog-reports", "egraph-serialize", "env_logger", + "flexbuffers", "glob", "hashbrown 0.16.0", "im-rc", @@ -725,6 +738,19 @@ version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" +[[package]] +name = "flexbuffers" +version = "25.12.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bc752b3d049e0705749b9999d0b130d6cf62935bc7762fd3bdb7636047abe43" +dependencies = [ + "bitflags 1.3.2", + "byteorder", + "num_enum", + "serde", + "serde_derive", +] + [[package]] name = "foldhash" version = "0.1.5" @@ -1016,7 +1042,7 @@ version = "0.30.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" dependencies = [ - "bitflags", + "bitflags 2.10.0", "cfg-if", "cfg_aliases", "libc", @@ -1097,6 +1123,27 @@ dependencies = [ "autocfg", ] +[[package]] +name = "num_enum" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f646caf906c20226733ed5b1374287eb97e3c2a5c227ce668c1f2ce20ae57c9" +dependencies = [ + "num_enum_derive", +] + +[[package]] +name = "num_enum_derive" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcbff9bc912032c62bf65ef1d5aea88983b420f4f839db1e9b0c281a25c9c799" +dependencies = [ + "proc-macro-crate 1.3.1", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "once_cell" version = "1.21.3" @@ -1212,13 +1259,23 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "proc-macro-crate" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919" +dependencies = [ + "once_cell", + "toml_edit 0.19.15", +] + [[package]] name = "proc-macro-crate" version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" dependencies = [ - "toml_edit", + "toml_edit 0.23.7", ] [[package]] @@ -1328,7 +1385,7 @@ version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags", + "bitflags 2.10.0", ] [[package]] @@ -1378,7 +1435,7 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" dependencies = [ - "bitflags", + "bitflags 2.10.0", "errno", "libc", "linux-raw-sys", @@ -1595,6 +1652,12 @@ dependencies = [ "syn 2.0.107", ] +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" + [[package]] name = "toml_datetime" version = "0.7.3" @@ -1604,6 +1667,17 @@ dependencies = [ "serde_core", ] +[[package]] +name = "toml_edit" +version = "0.19.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" +dependencies = [ + "indexmap", + "toml_datetime 0.6.11", + "winnow 0.5.40", +] + [[package]] name = "toml_edit" version = "0.23.7" @@ -1611,9 +1685,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d" dependencies = [ "indexmap", - "toml_datetime", + "toml_datetime 0.7.3", "toml_parser", - "winnow", + "winnow 0.7.13", ] [[package]] @@ -1622,7 +1696,7 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e" dependencies = [ - "winnow", + "winnow 0.7.13", ] [[package]] @@ -1955,6 +2029,15 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" +[[package]] +name = "winnow" +version = "0.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +dependencies = [ + "memchr", +] + [[package]] name = "winnow" version = "0.7.13" diff --git a/Cargo.toml b/Cargo.toml index 86206ef8d..9860f9912 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,6 +56,7 @@ csv = "1.3" typetag = "0.2" serde = { version = "1.0", features = ["derive", "rc"] } serde_json = "1.0" +flexbuffers = "25.12.19" ###################### # build dependencies @@ -162,6 +163,7 @@ serde_json_diff = "0.2.0" anyhow.workspace = true walkdir = "2.5.0" egglog-reports = { workspace = true } +flexbuffers.workspace = true [build-dependencies] chrono = { workspace = true, features = ["now"], optional = true } diff --git a/core-relations/src/hash_index/mod.rs b/core-relations/src/hash_index/mod.rs index 3f19107fe..b377a3bae 100644 --- a/core-relations/src/hash_index/mod.rs +++ b/core-relations/src/hash_index/mod.rs @@ -915,6 +915,7 @@ static THREAD_POOL: Lazy = Lazy::new(|| { /// to the beginning of an unused vector. #[derive(Default, Clone, Serialize, Deserialize)] pub(super) struct FreeList { + #[serde(skip)] data: HashMap>, } impl FreeList { diff --git a/src/lib.rs b/src/lib.rs index ef2fcdd3a..48b345b7d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -33,6 +33,7 @@ pub use command_macro::{CommandMacro, CommandMacroRegistry}; // This is used to allow the `add_primitive` macro to work in // both this crate and other crates by referring to `::egglog`. extern crate self as egglog; +extern crate flexbuffers; use anyhow::{Context, Result}; use ast::*; pub use ast::{ResolvedExpr, ResolvedFact, ResolvedVar}; @@ -2674,8 +2675,11 @@ impl TimedEgraph { time_micros: self.timer.elapsed().as_micros(), }); - let value = serde_json::to_value(egraph).context("Failed to encode egraph as json")?; - + //let value = serde_json::to_value(egraph).context("Failed to encode egraph as json")?; + let mut buf = flexbuffers::FlexbufferSerializer::new(); + // Have to use the fully qualified syntax because egraph has a method called serailize + Serialize::serialize(egraph, &mut buf).expect("Failed to serialize the egraph in Flexbuffer"); + timeline.evts.push(EgraphEvent { sexp_idx: 0, evt: END, @@ -2688,10 +2692,11 @@ impl TimedEgraph { time_micros: self.timer.elapsed().as_micros(), }); - let file = fs::File::create(path) + let mut file = fs::File::create(path) .with_context(|| format!("failed to create file {}", path.display()))?; - serde_json::to_writer(BufWriter::new(file), &value) - .context("Failed to write value to file")?; + //serde_json::to_writer(BufWriter::new(file), &value) + // .context("Failed to write value to file")?; + file.write_all(buf.view()).context("Failed to write value to file")?; timeline.evts.push(EgraphEvent { sexp_idx: 1, @@ -2713,11 +2718,13 @@ impl TimedEgraph { time_micros: self.timer.elapsed().as_micros(), }); - let file = fs::File::open(path) + let mut file = fs::File::open(path) .with_context(|| format!("failed to open file {}", path.display()))?; - let reader = BufReader::new(file); - let value: serde_json::Value = - serde_json::from_reader(reader).context("Failed to read json from file")?; + //let reader = BufReader::new(file); + //let value: serde_json::Value = + // serde_json::from_reader(reader).context("Failed to read json from file")?; + let mut buf = Vec::new(); + file.read_to_end(&mut buf).context("Failed to read Flatbuffer from file")?; timeline.evts.push(EgraphEvent { sexp_idx: 0, @@ -2731,7 +2738,9 @@ impl TimedEgraph { time_micros: self.timer.elapsed().as_micros(), }); - let egraph: EGraph = serde_json::from_value(value)?; + //let egraph: EGraph = serde_json::from_value(value)?; + let r = flexbuffers::Reader::get_root(buf.as_slice()).unwrap(); + let egraph: EGraph = EGraph::deserialize(r).unwrap(); timeline.evts.push(EgraphEvent { sexp_idx: 1, diff --git a/src/poach.rs b/src/poach.rs index 1bc0c361f..5a1a8465e 100644 --- a/src/poach.rs +++ b/src/poach.rs @@ -52,7 +52,7 @@ enum RunMode { // For each egg file under the input path, // Run the egglog program, recording timing information. // Round trip to JSON Value, but do not read/write from file - // Assert the deserialized egraph has hthe same size as the initial egraph. + // Assert the deserialized egraph has the same size as the initial egraph. // Save the completed timeline, for consumption by the nightly frontend NoIO, From dae76d2e967ac8f4750a6b8889063cc3f962d5f3 Mon Sep 17 00:00:00 2001 From: Haobin Ni Date: Mon, 2 Mar 2026 17:17:30 -0800 Subject: [PATCH 02/21] Implement SizeReport --- src/lib.rs | 8 +++ src/poach.rs | 14 +++++ src/serialize_size.rs | 138 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 160 insertions(+) create mode 100644 src/serialize_size.rs diff --git a/src/lib.rs b/src/lib.rs index 48b345b7d..cf6e97c35 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,6 +30,8 @@ mod typechecking; pub mod util; pub use command_macro::{CommandMacro, CommandMacroRegistry}; +mod serialize_size; + // This is used to allow the `add_primitive` macro to work in // both this crate and other crates by referring to `::egglog`. extern crate self as egglog; @@ -64,6 +66,7 @@ use serde::ser::SerializeStruct; use serde::{Deserialize, Serialize}; use serde_json::json; pub use serialize_vis::{SerializeConfig, SerializeOutput, SerializedNode}; +use serialize_size::GenerateSizeReport; use size::GetSizePrimitive; use sort::*; use std::any::Any; @@ -2666,6 +2669,11 @@ impl TimedEgraph { Ok(()) } + pub fn print_size_report(&mut self) -> Result<()> { + self.egraphs.last().unwrap().get_sizerp().pretty_print(0); + Ok(()) + } + pub fn to_file(&mut self, path: &Path) -> Result<()> { let mut timeline = ProgramTimeline::new("(serialize)\n(write)"); let egraph = self.egraphs.last().unwrap(); diff --git a/src/poach.rs b/src/poach.rs index 5a1a8465e..f8543cf11 100644 --- a/src/poach.rs +++ b/src/poach.rs @@ -71,6 +71,11 @@ enum RunMode { // Run the egglog program, skipping declarations of Sorts and Rules // Save the completed timeline, for consumption by the nightly frontend Mine, + + // For each egg file under the input path, + // run the egglog program and record timing information. + // Print size information on the serialized egraphs. + SizeReport, } impl Display for RunMode { @@ -87,6 +92,7 @@ impl Display for RunMode { RunMode::NoIO => "no-io", RunMode::Extract => "extract", RunMode::Mine => "mine", + RunMode::SizeReport => "size-report" } ) } @@ -651,6 +657,14 @@ fn poach( }, ) } + RunMode::SizeReport => process_files( + &files, + out_dir, + initial_egraph.as_deref(), + |egg_file, _, timed_egraph| { + timed_egraph.run_from_file(egg_file)?; + timed_egraph.print_size_report() + }), } } diff --git a/src/serialize_size.rs b/src/serialize_size.rs new file mode 100644 index 000000000..168e26899 --- /dev/null +++ b/src/serialize_size.rs @@ -0,0 +1,138 @@ +use crate::{CommandMacroRegistry, EGraph, RunReport, TypeInfo, term_encoding::EncodingState}; + +/// Generate a json report for the size of a serialized structu +/// By default, only uses serialize +/// Allow specalization to look into subfields + +#[allow(dead_code)] +#[derive (Debug, Clone)] +pub struct SizeReport { + name: String, + size: usize, + fields: Vec<(String, Box)>, +} + +fn up_to_two_decimals(a : usize, b : usize) -> String { + let a100 = a * 100 / b; + let high = a100 / 100; + let low = a100 % 100; + let low_str = + if low < 10 { + "0".to_string() + &low.to_string() + } else { + low.to_string() + }; + return high.to_string() + "." + &low_str; +} + +fn pretty_print_nbytes(size: usize) -> String { + if size < 200 { + size.to_string() + "B" + } else if size < 200 * 1024 { + up_to_two_decimals(size, 1024) + "KB" + } else if size < 200 * 1024 * 1024 { + up_to_two_decimals(size, 1024 * 1024) + "MB" + } else { + up_to_two_decimals(size, 1024 * 1024 * 1024) + "GB" + } +} + +impl SizeReport { + + pub fn pretty_print(&self, level: usize) { + if level == 0 { + println!("{} : {}", self.name, pretty_print_nbytes(self.size)); + } + let mut sorted_fields = self.fields.clone(); + sorted_fields.sort_by(|(_, a), (_, b)| b.size.cmp(&a.size)); + for (name, sr) in sorted_fields { + let percentage = (sr.size as f64 / self.size as f64) * 100.0; + println!(". {:level$}{} : {} ({:.2}%)", "", name, pretty_print_nbytes(sr.size), percentage); + sr.pretty_print(level + 2); + } + } +} + +pub trait GenerateSizeReport: serde::Serialize { + fn get_sizerp(&self) -> SizeReport { + let mut buf = flexbuffers::FlexbufferSerializer::new(); + serde::Serialize::serialize(self, &mut buf).expect("Failed to serialize in Flexbuffer"); + SizeReport { + name: std::any::type_name::().to_string(), + size: buf.view().len(), + fields: Vec::new(), + } + } +} + +impl GenerateSizeReport for egglog_bridge::EGraph {} + +impl GenerateSizeReport for Option {} + +impl GenerateSizeReport for egglog::util::IndexMap {} + +impl GenerateSizeReport for TypeInfo {} + +impl GenerateSizeReport for RunReport {} + +impl GenerateSizeReport for egglog_numeric_id::DenseIdMap {} + +impl GenerateSizeReport for CommandMacroRegistry {} + +impl GenerateSizeReport for EncodingState {} + + +impl GenerateSizeReport for EGraph { + fn get_sizerp(&self) -> SizeReport { + let mut buf = flexbuffers::FlexbufferSerializer::new(); + serde::Serialize::serialize(self, &mut buf).expect("Failed to serialize in Flexbuffer"); + let mut ret = SizeReport { + name: std::any::type_name::().to_string(), + size: buf.view().len(), + fields: Vec::new(), + }; + ret.fields.push(("backend".to_string(), Box::new(self.backend.get_sizerp()))); + ret.fields.push(("pushed_egraph".to_string(), Box::new(self.pushed_egraph.get_sizerp()))); + ret.fields.push(("functions".to_string(), Box::new(self.functions.get_sizerp()))); + ret.fields.push(("rulesets".to_string(), Box::new(self.rulesets.get_sizerp()))); + ret.fields.push(("type_info".to_string(), Box::new(self.type_info.get_sizerp()))); + ret.fields.push(("overall_run_report".to_string(), Box::new(self.overall_run_report.get_sizerp()))); + ret.fields.push(("schedulers".to_string(), Box::new(self.schedulers.get_sizerp()))); + ret.fields.push(("commands".to_string(), Box::new(self.commands.get_sizerp()))); + ret.fields.push(("command_macros".to_string(), Box::new(self.command_macros.get_sizerp()))); + ret.fields.push(("proof_state".to_string(), Box::new(self.proof_state.get_sizerp()))); + ret + } +} + +/* +pub struct EGraph { + backend: egglog_bridge::EGraph, + + pub parser: Parser, + + names: check_shadowing::Names, + /// pushed_egraph forms a linked list of pushed egraphs. + /// Pop reverts the egraph to the last pushed egraph. + pushed_egraph: Option>, + + functions: IndexMap, + + rulesets: IndexMap, + pub fact_directory: Option, + pub seminaive: bool, + + type_info: TypeInfo, + /// The run report unioned over all runs so far. + overall_run_report: RunReport, + + schedulers: DenseIdMap, + + commands: IndexMap>, + strict_mode: bool, + warned_about_missing_global_prefix: bool, + /// Registry for command-level macros + command_macros: CommandMacroRegistry, + proof_state: EncodingState, +} + */ \ No newline at end of file From 156f463e5f34b1cfdc3e0817f96a60e36db650ee Mon Sep 17 00:00:00 2001 From: Haobin Ni Date: Tue, 3 Mar 2026 14:06:37 -0800 Subject: [PATCH 03/21] Dig deeper into the size blowup --- src/lib.rs | 15 +-- src/poach.rs | 17 ++-- src/serialize_size.rs | 228 ++++++++++++++++++++++++++++++------------ 3 files changed, 182 insertions(+), 78 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index cf6e97c35..db1bc52ca 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,8 +34,8 @@ mod serialize_size; // This is used to allow the `add_primitive` macro to work in // both this crate and other crates by referring to `::egglog`. -extern crate self as egglog; extern crate flexbuffers; +extern crate self as egglog; use anyhow::{Context, Result}; use ast::*; pub use ast::{ResolvedExpr, ResolvedFact, ResolvedVar}; @@ -65,8 +65,8 @@ use scheduler::{SchedulerId, SchedulerRecord}; use serde::ser::SerializeStruct; use serde::{Deserialize, Serialize}; use serde_json::json; -pub use serialize_vis::{SerializeConfig, SerializeOutput, SerializedNode}; use serialize_size::GenerateSizeReport; +pub use serialize_vis::{SerializeConfig, SerializeOutput, SerializedNode}; use size::GetSizePrimitive; use sort::*; use std::any::Any; @@ -2686,8 +2686,9 @@ impl TimedEgraph { //let value = serde_json::to_value(egraph).context("Failed to encode egraph as json")?; let mut buf = flexbuffers::FlexbufferSerializer::new(); // Have to use the fully qualified syntax because egraph has a method called serailize - Serialize::serialize(egraph, &mut buf).expect("Failed to serialize the egraph in Flexbuffer"); - + Serialize::serialize(egraph, &mut buf) + .expect("Failed to serialize the egraph in Flexbuffer"); + timeline.evts.push(EgraphEvent { sexp_idx: 0, evt: END, @@ -2704,7 +2705,8 @@ impl TimedEgraph { .with_context(|| format!("failed to create file {}", path.display()))?; //serde_json::to_writer(BufWriter::new(file), &value) // .context("Failed to write value to file")?; - file.write_all(buf.view()).context("Failed to write value to file")?; + file.write_all(buf.view()) + .context("Failed to write value to file")?; timeline.evts.push(EgraphEvent { sexp_idx: 1, @@ -2732,7 +2734,8 @@ impl TimedEgraph { //let value: serde_json::Value = // serde_json::from_reader(reader).context("Failed to read json from file")?; let mut buf = Vec::new(); - file.read_to_end(&mut buf).context("Failed to read Flatbuffer from file")?; + file.read_to_end(&mut buf) + .context("Failed to read Flatbuffer from file")?; timeline.evts.push(EgraphEvent { sexp_idx: 0, diff --git a/src/poach.rs b/src/poach.rs index f8543cf11..14f972771 100644 --- a/src/poach.rs +++ b/src/poach.rs @@ -92,7 +92,7 @@ impl Display for RunMode { RunMode::NoIO => "no-io", RunMode::Extract => "extract", RunMode::Mine => "mine", - RunMode::SizeReport => "size-report" + RunMode::SizeReport => "size-report", } ) } @@ -658,13 +658,14 @@ fn poach( ) } RunMode::SizeReport => process_files( - &files, - out_dir, - initial_egraph.as_deref(), - |egg_file, _, timed_egraph| { - timed_egraph.run_from_file(egg_file)?; - timed_egraph.print_size_report() - }), + &files, + out_dir, + initial_egraph.as_deref(), + |egg_file, _, timed_egraph| { + timed_egraph.run_from_file(egg_file)?; + timed_egraph.print_size_report() + }, + ), } } diff --git a/src/serialize_size.rs b/src/serialize_size.rs index 168e26899..63a22195c 100644 --- a/src/serialize_size.rs +++ b/src/serialize_size.rs @@ -1,27 +1,32 @@ -use crate::{CommandMacroRegistry, EGraph, RunReport, TypeInfo, term_encoding::EncodingState}; +use crate::{ + ast::ResolvedVar, + core::{GenericCoreAction, GenericCoreActions, GenericAtom, Query, ResolvedCall, ResolvedCoreRule}, + egglog::util::IndexMap, + term_encoding::EncodingState, + CommandMacroRegistry, EGraph, RunReport, TypeInfo, +}; /// Generate a json report for the size of a serialized structu /// By default, only uses serialize /// Allow specalization to look into subfields #[allow(dead_code)] -#[derive (Debug, Clone)] +#[derive(Debug, Clone)] pub struct SizeReport { name: String, size: usize, fields: Vec<(String, Box)>, } -fn up_to_two_decimals(a : usize, b : usize) -> String { +fn up_to_two_decimals(a: usize, b: usize) -> String { let a100 = a * 100 / b; let high = a100 / 100; let low = a100 % 100; - let low_str = - if low < 10 { - "0".to_string() + &low.to_string() - } else { - low.to_string() - }; + let low_str = if low < 10 { + "0".to_string() + &low.to_string() + } else { + low.to_string() + }; return high.to_string() + "." + &low_str; } @@ -37,102 +42,197 @@ fn pretty_print_nbytes(size: usize) -> String { } } -impl SizeReport { +fn truncate_string_with_ellipsis(s: &str, max_len: usize) -> String { + if s.chars().count() > max_len { + let mut truncated = s.chars().take(max_len).collect::(); + truncated.push_str(&format!("...{:} chars total", s.len())); + truncated + } else { + s.to_string() + } +} +impl SizeReport { pub fn pretty_print(&self, level: usize) { if level == 0 { println!("{} : {}", self.name, pretty_print_nbytes(self.size)); } let mut sorted_fields = self.fields.clone(); sorted_fields.sort_by(|(_, a), (_, b)| b.size.cmp(&a.size)); - for (name, sr) in sorted_fields { + for (name, sr) in sorted_fields.iter().take(10) { let percentage = (sr.size as f64 / self.size as f64) * 100.0; - println!(". {:level$}{} : {} ({:.2}%)", "", name, pretty_print_nbytes(sr.size), percentage); + println!( + " {:level$}{} : {} ({:.2}%)", + "", + name, + pretty_print_nbytes(sr.size), + percentage + ); sr.pretty_print(level + 2); } + if sorted_fields.len() > 10 { + println!(" {:level$} ... {:} fields total", "", sorted_fields.len()); + } + } +} + +fn get_sizerp_default(obj: &T) -> SizeReport { + let mut buf = flexbuffers::FlexbufferSerializer::new(); + serde::Serialize::serialize(obj, &mut buf).expect("Failed to serialize in Flexbuffer"); + SizeReport { + name: std::any::type_name::().to_string(), + size: buf.view().len(), + fields: Vec::new(), } } -pub trait GenerateSizeReport: serde::Serialize { +pub trait GenerateSizeReport: serde::Serialize + Sized { fn get_sizerp(&self) -> SizeReport { - let mut buf = flexbuffers::FlexbufferSerializer::new(); - serde::Serialize::serialize(self, &mut buf).expect("Failed to serialize in Flexbuffer"); - SizeReport { - name: std::any::type_name::().to_string(), - size: buf.view().len(), - fields: Vec::new(), - } + get_sizerp_default(self) } } impl GenerateSizeReport for egglog_bridge::EGraph {} -impl GenerateSizeReport for Option {} +impl GenerateSizeReport for Option {} -impl GenerateSizeReport for egglog::util::IndexMap {} +impl GenerateSizeReport + for IndexMap +{ + fn get_sizerp(&self) -> SizeReport { + let mut ret = get_sizerp_default(self); + for (k, v) in self { + ret.fields.push(( + truncate_string_with_ellipsis(&k.to_string(), 20), + Box::new(v.get_sizerp()), + )); + } + ret + } +} impl GenerateSizeReport for TypeInfo {} impl GenerateSizeReport for RunReport {} -impl GenerateSizeReport for egglog_numeric_id::DenseIdMap {} +impl GenerateSizeReport + for egglog_numeric_id::DenseIdMap +{ +} impl GenerateSizeReport for CommandMacroRegistry {} impl GenerateSizeReport for EncodingState {} +impl GenerateSizeReport for egglog::Function {} -impl GenerateSizeReport for EGraph { +use egglog::ast::Ruleset; +use egglog_ast::span::Span; + +impl GenerateSizeReport for Span {} + +impl GenerateSizeReport for GenericAtom {} + +impl GenerateSizeReport for Query { + fn get_sizerp(&self) -> SizeReport { + self.atoms.get_sizerp() + } +} + +impl GenerateSizeReport for Vec { fn get_sizerp(&self) -> SizeReport { - let mut buf = flexbuffers::FlexbufferSerializer::new(); - serde::Serialize::serialize(self, &mut buf).expect("Failed to serialize in Flexbuffer"); - let mut ret = SizeReport { - name: std::any::type_name::().to_string(), - size: buf.view().len(), - fields: Vec::new(), - }; - ret.fields.push(("backend".to_string(), Box::new(self.backend.get_sizerp()))); - ret.fields.push(("pushed_egraph".to_string(), Box::new(self.pushed_egraph.get_sizerp()))); - ret.fields.push(("functions".to_string(), Box::new(self.functions.get_sizerp()))); - ret.fields.push(("rulesets".to_string(), Box::new(self.rulesets.get_sizerp()))); - ret.fields.push(("type_info".to_string(), Box::new(self.type_info.get_sizerp()))); - ret.fields.push(("overall_run_report".to_string(), Box::new(self.overall_run_report.get_sizerp()))); - ret.fields.push(("schedulers".to_string(), Box::new(self.schedulers.get_sizerp()))); - ret.fields.push(("commands".to_string(), Box::new(self.commands.get_sizerp()))); - ret.fields.push(("command_macros".to_string(), Box::new(self.command_macros.get_sizerp()))); - ret.fields.push(("proof_state".to_string(), Box::new(self.proof_state.get_sizerp()))); + let mut ret = get_sizerp_default(self); + for e in self { + let rep = e.get_sizerp(); + ret.fields.push((rep.name.clone(), Box::new(rep))); + } ret } } -/* -pub struct EGraph { - backend: egglog_bridge::EGraph, +impl GenerateSizeReport for GenericCoreAction {} - pub parser: Parser, +impl GenerateSizeReport for GenericCoreActions { + fn get_sizerp(&self) -> SizeReport { + self.0.get_sizerp() + } +} - names: check_shadowing::Names, - /// pushed_egraph forms a linked list of pushed egraphs. - /// Pop reverts the egraph to the last pushed egraph. - pushed_egraph: Option>, +impl GenerateSizeReport for ResolvedCall {} - functions: IndexMap, +impl GenerateSizeReport for ResolvedVar {} - rulesets: IndexMap, - pub fact_directory: Option, - pub seminaive: bool, +impl GenerateSizeReport for ResolvedCoreRule { + fn get_sizerp(&self) -> SizeReport { + let mut ret = get_sizerp_default(self); + ret.fields + .push(("span".to_string(), Box::new(self.span.get_sizerp()))); + ret.fields + .push(("body".to_string(), Box::new(self.body.get_sizerp()))); + ret.fields + .push(("head".to_string(), Box::new(self.head.get_sizerp()))); + ret + } +} - type_info: TypeInfo, - /// The run report unioned over all runs so far. - overall_run_report: RunReport, +impl GenerateSizeReport for (T, S) { + fn get_sizerp(&self) -> SizeReport { + let mut ret = get_sizerp_default(self); + ret.fields + .push(("0".to_string(), Box::new(self.0.get_sizerp()))); + ret.fields + .push(("1".to_string(), Box::new(self.1.get_sizerp()))); + ret + } +} - schedulers: DenseIdMap, +impl GenerateSizeReport for egglog_bridge::RuleId {} - commands: IndexMap>, - strict_mode: bool, - warned_about_missing_global_prefix: bool, - /// Registry for command-level macros - command_macros: CommandMacroRegistry, - proof_state: EncodingState, +impl GenerateSizeReport for egglog::ast::Ruleset { + fn get_sizerp(&self) -> SizeReport { + match &self { + Ruleset::Rules(mp) => mp.get_sizerp(), + Ruleset::Combined(_l) => { + //TODO if needed + get_sizerp_default(self) + } + } + } +} + +impl GenerateSizeReport for EGraph { + fn get_sizerp(&self) -> SizeReport { + let mut ret = get_sizerp_default(&self); + ret.fields + .push(("backend".to_string(), Box::new(self.backend.get_sizerp()))); + ret.fields.push(( + "pushed_egraph".to_string(), + Box::new(self.pushed_egraph.get_sizerp()), + )); + ret.fields.push(( + "functions".to_string(), + Box::new(self.functions.get_sizerp()), + )); + ret.fields + .push(("rulesets".to_string(), Box::new(self.rulesets.get_sizerp()))); + ret.fields.push(( + "type_info".to_string(), + Box::new(self.type_info.get_sizerp()), + )); + ret.fields.push(( + "overall_run_report".to_string(), + Box::new(self.overall_run_report.get_sizerp()), + )); + ret.fields.push(( + "schedulers".to_string(), + Box::new(self.schedulers.get_sizerp()), + )); + //ret.fields.push(("commands".to_string(), Box::new(self.commands.get_sizerp()))); + //ret.fields.push(("command_macros".to_string(), Box::new(self.command_macros.get_sizerp()))); + ret.fields.push(( + "proof_state".to_string(), + Box::new(self.proof_state.get_sizerp()), + )); + ret + } } - */ \ No newline at end of file From 92cc3334a1b5ac106386d80d63582dc89005412b Mon Sep 17 00:00:00 2001 From: Haobin Ni Date: Tue, 3 Mar 2026 14:06:58 -0800 Subject: [PATCH 04/21] Serialize span into unit --- egglog-ast/src/span.rs | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/egglog-ast/src/span.rs b/egglog-ast/src/span.rs index c2c8db320..d062426cf 100644 --- a/egglog-ast/src/span.rs +++ b/egglog-ast/src/span.rs @@ -3,13 +3,32 @@ use std::sync::Arc; use serde::{Deserialize, Serialize}; -#[derive(Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[derive(Clone, PartialEq, Eq, Hash)] pub enum Span { Panic, Egglog(Arc), Rust(Arc), + POACH, } +impl serde::Serialize for Span { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer { + serializer.serialize_unit() + } +} + +impl<'de> serde::Deserialize<'de> for Span { + fn deserialize(_: D) -> Result + where + D: serde::Deserializer<'de> { + Ok(Self::POACH) + } +} + + + #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct EgglogSpan { pub file: Arc, @@ -55,6 +74,7 @@ impl Span { Span::Panic => panic!("Span::Panic in Span::string"), Span::Rust(_) => panic!("Span::Rust cannot track end position"), Span::Egglog(span) => &span.file.contents[span.i..span.j], + Span::POACH => "From POACH deserialization", } } } @@ -96,7 +116,8 @@ impl Display for Span { write!(f, "In {}:{}-{}: {quote}", start_line, start_col, end_col) } } - } + }, + Span::POACH => write!(f, "From POACH deserialization"), } } } From c37fd3a877d537d345ba07aa90338190a0c30d4a Mon Sep 17 00:00:00 2001 From: Haobin Ni Date: Tue, 3 Mar 2026 14:29:00 -0800 Subject: [PATCH 05/21] Add control for how much size information to output --- src/lib.rs | 4 ++-- src/poach.rs | 2 +- src/serialize_size.rs | 10 +++++++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index db1bc52ca..3eafa05f5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2669,8 +2669,8 @@ impl TimedEgraph { Ok(()) } - pub fn print_size_report(&mut self) -> Result<()> { - self.egraphs.last().unwrap().get_sizerp().pretty_print(0); + pub fn print_size_report(&mut self, max_level: usize) -> Result<()> { + self.egraphs.last().unwrap().get_sizerp().pretty_print(0, max_level); Ok(()) } diff --git a/src/poach.rs b/src/poach.rs index 14f972771..a3da3ed87 100644 --- a/src/poach.rs +++ b/src/poach.rs @@ -663,7 +663,7 @@ fn poach( initial_egraph.as_deref(), |egg_file, _, timed_egraph| { timed_egraph.run_from_file(egg_file)?; - timed_egraph.print_size_report() + timed_egraph.print_size_report(0) }, ), } diff --git a/src/serialize_size.rs b/src/serialize_size.rs index 63a22195c..c9b49ae03 100644 --- a/src/serialize_size.rs +++ b/src/serialize_size.rs @@ -53,7 +53,10 @@ fn truncate_string_with_ellipsis(s: &str, max_len: usize) -> String { } impl SizeReport { - pub fn pretty_print(&self, level: usize) { + pub fn pretty_print(&self, level: usize, max_level: usize) { + if level > max_level { + return; + } if level == 0 { println!("{} : {}", self.name, pretty_print_nbytes(self.size)); } @@ -61,14 +64,15 @@ impl SizeReport { sorted_fields.sort_by(|(_, a), (_, b)| b.size.cmp(&a.size)); for (name, sr) in sorted_fields.iter().take(10) { let percentage = (sr.size as f64 / self.size as f64) * 100.0; + let indent = level * 2; println!( - " {:level$}{} : {} ({:.2}%)", + " {:indent$}{} : {} ({:.2}%)", "", name, pretty_print_nbytes(sr.size), percentage ); - sr.pretty_print(level + 2); + sr.pretty_print(level + 1, max_level); } if sorted_fields.len() > 10 { println!(" {:level$} ... {:} fields total", "", sorted_fields.len()); From 4234f79f04aab3e56bf57a8f97e703be9e49d243 Mon Sep 17 00:00:00 2001 From: Haobin Ni Date: Thu, 5 Mar 2026 10:43:17 -0800 Subject: [PATCH 06/21] Extract experiment runs --- infra/nightly.py | 36 ++++++++++++++-------- infra/nightly.sh | 7 +++-- src/lib.rs | 33 ++++++++++---------- src/poach.rs | 80 +++++++++++++++++++++++++++++++++++------------- 4 files changed, 102 insertions(+), 54 deletions(-) mode change 100644 => 100755 infra/nightly.sh diff --git a/infra/nightly.py b/infra/nightly.py index 3e833356a..f968ec4f0 100644 --- a/infra/nightly.py +++ b/infra/nightly.py @@ -88,23 +88,32 @@ def run_test_experiments(top_dir, tmp_dir, aggregator): run_poach(benchmark, tmp_dir, run_mode) add_benchmark_data(aggregator, timeline_file, f"tests/{benchmark_name}/{benchmark.stem}/timeline.json") extra_files = { - "sequential-round-trip": [tmp_dir / f"{benchmark.stem}-serialize1.json"], + "sequential-round-trip": [tmp_dir / f"{benchmark.stem}-serialize1.fbs"], "old-serialize": [ - tmp_dir / f"{benchmark.stem}-serialize-poach.json", + tmp_dir / f"{benchmark.stem}-serialize-poach.fbs", tmp_dir / f"{benchmark.stem}-serialize-old.json", ], }.get(run_mode, []) cleanup_benchmark_files(timeline_file, tmp_dir / "summary.json", *extra_files) +def run_extract_experiments(resource_dir, tmp_dir, aggregator): + timeline_suites = ["easteregg", "herbie-hamming", "herbie-math-rewrite", "herbie-math-taylor"] + for suite in timeline_suites: + for benchmark in benchmark_files(resource_dir / "test-files" / suite): + timeline_file = tmp_dir / f"{benchmark.stem}-timeline.json" + run_poach(benchmark, tmp_dir, "extract") + add_benchmark_data(aggregator, timeline_file, f"{suite}/timeline/{benchmark.stem}/timeline.json") + cleanup_benchmark_files(timeline_file, tmp_dir / "summary.json") + def run_mined_experiments(resource_dir, tmp_dir, aggregator): - mega_serialize_file = tmp_dir / "mega-easteregg-serialize.json" + mega_serialize_file = tmp_dir / "mega-easteregg-serialize.fbs" mega_timeline_file = tmp_dir / "mega-easteregg-timeline.json" run_poach(resource_dir / "mega-easteregg.egg", tmp_dir, "serialize") add_benchmark_data(aggregator, mega_timeline_file, "easteregg/serialize/mega-easteregg/timeline.json") cleanup_benchmark_files(mega_timeline_file, tmp_dir / "summary.json") for benchmark in benchmark_files(resource_dir / "test-files" / "easteregg"): timeline_file = tmp_dir / f"{benchmark.stem}-timeline.json" - serialize_file = tmp_dir / f"{benchmark.stem}-serialize.json" + serialize_file = tmp_dir / f"{benchmark.stem}-serialize.fbs" run_poach(benchmark, tmp_dir, "serialize") add_benchmark_data(aggregator, timeline_file, f"easteregg/serialize/{benchmark.stem}/timeline.json") cleanup_benchmark_files(timeline_file, tmp_dir / "summary.json") @@ -146,22 +155,25 @@ def run_mined_experiments(resource_dir, tmp_dir, aggregator): ############################################################################## # Run the benchmarks and record timeline-only data. - run_timeline_experiments(resource_dir, tmp_dir, aggregator) + # run_timeline_experiments(resource_dir, tmp_dir, aggregator) # Re-run the benchmarks with JSON round-tripping kept entirely in memory. - run_no_io_experiments(resource_dir, tmp_dir, aggregator) + # run_no_io_experiments(resource_dir, tmp_dir, aggregator) # Run the egglog tests under each serialization experiment mode. - run_test_experiments(top_dir, tmp_dir, aggregator) + # run_test_experiments(top_dir, tmp_dir, aggregator) # Run the mined-egraph experiment using both per-benchmark and mega-egraph seeds. - run_mined_experiments(resource_dir, tmp_dir, aggregator) + # run_mined_experiments(resource_dir, tmp_dir, aggregator) + + # Run the extract experiment on our heavy benchmarks + run_extract_experiments(resource_dir, tmp_dir, aggregator) ############################################################################## aggregator.save() - if shutil.which("perf") is not None: - # Generate flamegraphs - for egg_file in glob.glob("tests/*.egg") + glob.glob("tests/web-demo/*.egg"): - run_cmd([str(script_dir / "flamegraph.sh"), egg_file, str(nightly_dir / "output" / "flamegraphs")]) + #if shutil.which("perf") is not None: + # # Generate flamegraphs + # for egg_file in glob.glob("tests/*.egg") + glob.glob("tests/web-demo/*.egg"): + # run_cmd([str(script_dir / "flamegraph.sh"), egg_file, str(nightly_dir / "output" / "flamegraphs")]) diff --git a/infra/nightly.sh b/infra/nightly.sh old mode 100644 new mode 100755 index 766e417cb..777641a8e --- a/infra/nightly.sh +++ b/infra/nightly.sh @@ -47,7 +47,8 @@ mkdir -p nightly/output mkdir -p nightly/output/flamegraphs mkdir -p nightly/tmp -git clone https://github.com/brendangregg/FlameGraph.git +# Skip FlameGraphs for mining MVP +# git clone https://github.com/brendangregg/FlameGraph.git # Build in release mode before running nightly.py cargo build --release @@ -61,9 +62,9 @@ if [ ! -f nightly/output/data/data.json ]; then exit 1 fi -ls nightly/output/flamegraphs > nightly/output/flamegraphs.txt +# ls nightly/output/flamegraphs > nightly/output/flamegraphs.txt cp infra/nightly-resources/web/* nightly/output # Uncomment for local development -# cd nightly/output && python3 -m http.server 8002 +cd nightly/output && python3 -m http.server 8002 diff --git a/src/lib.rs b/src/lib.rs index 941282916..950778ab5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -73,7 +73,7 @@ use std::any::Any; use std::fmt::{Debug, Display, Formatter}; use std::fs::{self, read_to_string, File}; use std::hash::Hash; -use std::io::{BufReader, BufWriter, Read, Write as _}; +use std::io::{BufWriter, Read, Write as _}; use std::iter::once; use std::ops::Deref; use std::path::{Path, PathBuf}; @@ -2485,10 +2485,14 @@ impl TimedEgraph { } pub fn new_from_file(path: &Path) -> Self { - let file = File::open(path).expect("failed to open egraph file"); - let reader = BufReader::new(file); + let mut file = fs::File::open(path) + .expect("failed to open file"); + let mut buf = Vec::new(); + file.read_to_end(&mut buf) + .expect("Failed to read Flatbuffer from file"); - let egraph: EGraph = serde_json::from_reader(reader).expect("failed to parse egraph JSON"); + let r = flexbuffers::Reader::get_root(buf.as_slice()).unwrap(); + let egraph: EGraph = EGraph::deserialize(r).unwrap(); Self { egraphs: vec![egraph], @@ -2624,7 +2628,7 @@ impl TimedEgraph { Ok(()) } - pub fn to_value(&mut self) -> Result { + pub fn to_value(&mut self) -> Result> { let mut timeline = ProgramTimeline::new("(serialize)"); let egraph = self.egraphs.last().unwrap(); @@ -2634,7 +2638,10 @@ impl TimedEgraph { time_micros: self.timer.elapsed().as_micros(), }); - let value = serde_json::to_value(egraph).context("Failed to encode egraph as json")?; + let mut buf = flexbuffers::FlexbufferSerializer::new(); + Serialize::serialize(egraph, &mut buf) + .expect("Failed to serialize the egraph in Flexbuffer"); + let value = Vec::from(buf.view()); timeline.evts.push(EgraphEvent { sexp_idx: 0, @@ -2646,7 +2653,7 @@ impl TimedEgraph { Ok(value) } - pub fn from_value(&mut self, value: serde_json::Value) -> Result<()> { + pub fn from_value(&mut self, value: Vec) -> Result<()> { let mut timeline = ProgramTimeline::new("(deserialize)"); timeline.evts.push(EgraphEvent { @@ -2655,8 +2662,8 @@ impl TimedEgraph { time_micros: self.timer.elapsed().as_micros(), }); - let egraph: EGraph = - serde_json::from_value(value).context("Failed to decode egraph from json")?; + let r = flexbuffers::Reader::get_root(value.as_slice()).unwrap(); + let egraph: EGraph = EGraph::deserialize(r).unwrap(); timeline.evts.push(EgraphEvent { sexp_idx: 0, @@ -2684,9 +2691,7 @@ impl TimedEgraph { time_micros: self.timer.elapsed().as_micros(), }); - //let value = serde_json::to_value(egraph).context("Failed to encode egraph as json")?; let mut buf = flexbuffers::FlexbufferSerializer::new(); - // Have to use the fully qualified syntax because egraph has a method called serailize Serialize::serialize(egraph, &mut buf) .expect("Failed to serialize the egraph in Flexbuffer"); @@ -2704,8 +2709,6 @@ impl TimedEgraph { let mut file = fs::File::create(path) .with_context(|| format!("failed to create file {}", path.display()))?; - //serde_json::to_writer(BufWriter::new(file), &value) - // .context("Failed to write value to file")?; file.write_all(buf.view()) .context("Failed to write value to file")?; @@ -2731,9 +2734,6 @@ impl TimedEgraph { let mut file = fs::File::open(path) .with_context(|| format!("failed to open file {}", path.display()))?; - //let reader = BufReader::new(file); - //let value: serde_json::Value = - // serde_json::from_reader(reader).context("Failed to read json from file")?; let mut buf = Vec::new(); file.read_to_end(&mut buf) .context("Failed to read Flatbuffer from file")?; @@ -2750,7 +2750,6 @@ impl TimedEgraph { time_micros: self.timer.elapsed().as_micros(), }); - //let egraph: EGraph = serde_json::from_value(value)?; let r = flexbuffers::Reader::get_root(buf.as_slice()).unwrap(); let egraph: EGraph = EGraph::deserialize(r).unwrap(); diff --git a/src/poach.rs b/src/poach.rs index 3d4de64e6..71471f3c5 100644 --- a/src/poach.rs +++ b/src/poach.rs @@ -134,6 +134,8 @@ fn check_egraph_size(egraph: &TimedEgraph) -> Result<()> { Ok(()) } +// TODO: This is not working right now due to no longer using serde_json +/* fn check_idempotent(p1: &PathBuf, p2: &PathBuf, name: &str, out_dir: &PathBuf) { let json1: serde_json::Value = serde_json::from_str( &fs::read_to_string(p1).expect(&format!("failed to open {}", p1.display())), @@ -153,6 +155,7 @@ fn check_idempotent(p1: &PathBuf, p2: &PathBuf, name: &str, out_dir: &PathBuf) { panic!("Diff for {}", name) } } +*/ fn benchmark_name(egg_file: &Path) -> &str { egg_file @@ -210,6 +213,7 @@ where (successes, failures) } +#[allow(dead_code)] fn compare_extracts( initial_extracts: &[CommandOutput], final_extracts: &[CommandOutput], @@ -248,6 +252,38 @@ fn compare_extracts( Ok(()) } +fn compare_extracts_weak( + initial_extracts: &[CommandOutput], + final_extracts: &[CommandOutput], +) -> Result<()> { + if initial_extracts.len() != final_extracts.len() { + anyhow::bail!("extract lengths mismatch") + } + + for (x, y) in initial_extracts.iter().zip(final_extracts) { + match (x, y) { + (CommandOutput::ExtractBest(_, _, _), CommandOutput::ExtractBest(_, _, _)) => { + + } + ( + CommandOutput::ExtractVariants(_, _), + CommandOutput::ExtractVariants(_, _), + ) => { + + } + ( + CommandOutput::MultiExtractVariants(_, _), + CommandOutput::MultiExtractVariants(_, _), + ) => { + + } + _ => anyhow::bail!("No match : {:?} {:?}", x, y), + } + } + + Ok(()) +} + fn poach( files: Vec, out_dir: &PathBuf, @@ -275,7 +311,7 @@ fn poach( |egg_file, out_dir, timed_egraph| { let name = benchmark_name(egg_file); timed_egraph.run_from_file(egg_file)?; - timed_egraph.to_file(&out_dir.join(format!("{name}-serialize.json")))?; + timed_egraph.to_file(&out_dir.join(format!("{name}-serialize.fbs")))?; timed_egraph.write_timeline(&out_dir.join(format!("{name}-timeline.json")))?; Ok(()) }, @@ -288,15 +324,15 @@ fn poach( |egg_file, out_dir: &PathBuf, timed_egraph| { let name = benchmark_name(egg_file); timed_egraph.run_from_file(egg_file)?; - let s1 = out_dir.join(format!("{name}-serialize1.json")); + let s1 = out_dir.join(format!("{name}-serialize1.fbs")); timed_egraph .to_file(&s1) - .context("Failed to write s1.json")?; + .context("Failed to write s1.fbs")?; timed_egraph .from_file(&s1) - .context("failed to read s1.json")?; + .context("failed to read s1.fbs")?; check_egraph_number(&timed_egraph, 2)?; @@ -314,37 +350,37 @@ fn poach( |egg_file, out_dir, timed_egraph| { let name = benchmark_name(egg_file); timed_egraph.run_from_file(egg_file)?; - let s1 = out_dir.join(format!("{name}-serialize1.json")); - let s2 = out_dir.join(format!("{name}-serialize2.json")); - let s3 = out_dir.join(format!("{name}-serialize3.json")); + let s1 = out_dir.join(format!("{name}-serialize1.fbs")); + let s2 = out_dir.join(format!("{name}-serialize2.fbs")); + let s3 = out_dir.join(format!("{name}-serialize3.fbs")); timed_egraph .to_file(&s1) - .context("failed to serialize s1.json")?; + .context("failed to serialize s1.fbs")?; timed_egraph .from_file(&s1) - .context("failed to read s1.json")?; + .context("failed to read s1.fbs")?; timed_egraph .to_file(&s2) - .context("failed to serialize s2.json")?; + .context("failed to serialize s2.fbs")?; timed_egraph .from_file(&s2) - .context("failed to read s2.json")?; + .context("failed to read s2.fbs")?; timed_egraph .to_file(&s3) - .context("failed to serialize s3.json")?; + .context("failed to serialize s3.fbs")?; timed_egraph .from_file(&s3) - .context("failed to read s3.json")?; + .context("failed to read s3.fbs")?; check_egraph_number(&timed_egraph, 4)?; check_egraph_size(&timed_egraph)?; - check_idempotent(&s2, &s3, name, out_dir); + //check_idempotent(&s2, &s3, name, out_dir); timed_egraph.write_timeline(&out_dir.join(format!("{name}-timeline.json")))?; Ok(()) @@ -360,8 +396,8 @@ fn poach( timed_egraph.run_from_file(egg_file)?; timed_egraph - .to_file(&out_dir.join(format!("{name}-serialize-poach.json"))) - .context("failed to write poach.json")?; + .to_file(&out_dir.join(format!("{name}-serialize-poach.fbs"))) + .context("failed to write poach.fbs")?; timed_egraph .old_serialize_egraph(&out_dir.join(format!("{name}-serialize-old.json"))) @@ -382,11 +418,11 @@ fn poach( let value = timed_egraph .to_value() - .context("Failed to encode egraph as json")?; + .context("Failed to encode egraph as flatbuffer")?; timed_egraph .from_value(value) - .context("failed to decode egraph from json")?; + .context("failed to decode egraph from flatbuffer")?; check_egraph_number(&timed_egraph, 2)?; @@ -424,7 +460,7 @@ fn poach( if let Sexp::List(xs, _) = sexp { if !xs.is_empty() { match &xs[0] { - Sexp::Atom(s, _) => s == "extract", + Sexp::Atom(s, _) => s == "extract" || s == "multi-extract", _ => false, } } else { @@ -452,18 +488,18 @@ fn poach( let value = timed_egraph .to_value() - .context("Failed to encode egraph as JSON")?; + .context("Failed to encode egraph as Flatbuffer")?; timed_egraph .from_value(value) - .context("failed to decode egraph from json")?; + .context("Failed to decode egraph from Flatbuffer")?; check_egraph_number(&timed_egraph, 2)?; let final_extracts = timed_egraph.run_program_with_timeline(extract_cmds, &extracts)?; - compare_extracts(&initial_extracts, &final_extracts)?; + compare_extracts_weak(&initial_extracts, &final_extracts)?; timed_egraph.write_timeline(&out_dir.join(format!("{name}-timeline.json")))?; From 9c854695edf8716d220c30b575102aa0f711f79f Mon Sep 17 00:00:00 2001 From: Haobin Ni Date: Thu, 5 Mar 2026 12:06:14 -0800 Subject: [PATCH 07/21] Tweak nightly frontent to display extract experiment results --- infra/nightly-resources/web/chart.js | 4 +- infra/nightly-resources/web/extract.html | 2 + infra/nightly-resources/web/extract.js | 54 +++++++++++++++++++----- infra/nightly.py | 7 ++- 4 files changed, 54 insertions(+), 13 deletions(-) diff --git a/infra/nightly-resources/web/chart.js b/infra/nightly-resources/web/chart.js index 466b69975..da73b509b 100644 --- a/infra/nightly-resources/web/chart.js +++ b/infra/nightly-resources/web/chart.js @@ -156,8 +156,8 @@ function initializeCharts() { }, }, y: { - min: -25, - max: 25, + min: -1000, + max: 3000, title: { display: true, text: "time (ms)", diff --git a/infra/nightly-resources/web/extract.html b/infra/nightly-resources/web/extract.html index 55de269b5..8af73a4dc 100644 --- a/infra/nightly-resources/web/extract.html +++ b/infra/nightly-resources/web/extract.html @@ -25,6 +25,8 @@

POACH vs Vanilla Egglog

Serialization time is not counted

+
+
diff --git a/infra/nightly-resources/web/extract.js b/infra/nightly-resources/web/extract.js index e83b0c854..562dc5c90 100644 --- a/infra/nightly-resources/web/extract.js +++ b/infra/nightly-resources/web/extract.js @@ -1,9 +1,43 @@ function initializeExtract() { - initializeGlobalData().then(initializeCharts).then(plotExtract); + initializeGlobalData() + .then(initializeExtractOptions) + .then(initializeCharts) + .then(plotExtract); } +function initializeExtractOptions() { + const suiteElt = document.getElementById("suite"); + Object.keys(GLOBAL_DATA.data).forEach((suite, idx) => { + const label = document.createElement("label"); + const input = document.createElement("input"); + + input.type = "radio"; + input.name = "suiteToggle"; + input.value = suite; + + if (idx === 0) { + input.checked = true; // select first run mode + } + + label.appendChild(input); + label.append(" " + suite); + + suiteElt.appendChild(label); + }); +} + + function plotExtract() { - const all_data = GLOBAL_DATA.data.tests.extract; + + const suite = document.querySelector( + 'input[name="suiteToggle"]:checked' + ).value; + + if (!suite) { + return; + } + + const all_data = GLOBAL_DATA.data[suite].extract; if (GLOBAL_DATA.extractChart === null) { return; @@ -31,7 +65,7 @@ function plotExtract() { data[b].poachDeser = aggregate(all_data[b].deserialize, "total"); data[b].poachTotal = data[b].poachDeser + data[b].poachExtract; - data[b].difference = data[b].poachTotal - data[b].vanillaTotal; + data[b].difference = data[b].vanillaTotal - data[b].poachTotal; }); GLOBAL_DATA.differenceChart.data = { @@ -41,18 +75,16 @@ function plotExtract() { label: "poach - vanilla", data: Object.values(data).map((d) => d.difference), backgroundColor: Object.values(data).map((d) => { - if (Math.abs(d.difference) > 25) { - return "gray"; - } else { - return d.difference >= 0 - ? "rgba(255, 99, 132, 0.7)" - : "rgba(54, 162, 235, 0.7)"; - } + return d.difference >= 0 + ? "rgba(54, 162, 235, 0.7)" + : "rgba(255, 99, 132, 0.7)"; }), }, ], }; + GLOBAL_DATA.differenceChart.update(); + GLOBAL_DATA.extractChart.data = { labels: benchmarks, datasets: [ @@ -85,4 +117,6 @@ function plotExtract() { }, ], }; + + GLOBAL_DATA.extractChart.update(); } diff --git a/infra/nightly.py b/infra/nightly.py index f968ec4f0..745e62b1b 100644 --- a/infra/nightly.py +++ b/infra/nightly.py @@ -102,8 +102,13 @@ def run_extract_experiments(resource_dir, tmp_dir, aggregator): for benchmark in benchmark_files(resource_dir / "test-files" / suite): timeline_file = tmp_dir / f"{benchmark.stem}-timeline.json" run_poach(benchmark, tmp_dir, "extract") - add_benchmark_data(aggregator, timeline_file, f"{suite}/timeline/{benchmark.stem}/timeline.json") + add_benchmark_data(aggregator, timeline_file, f"{suite}/extract/{benchmark.stem}/timeline.json") cleanup_benchmark_files(timeline_file, tmp_dir / "summary.json") + for benchmark in benchmark_files(top_dir / "tests", recursive = True): + timeline_file = tmp_dir / f"{benchmark.stem}-timeline.json" + run_poach(benchmark, tmp_dir, "extract") + add_benchmark_data(aggregator, timeline_file, f"tests/extract/{benchmark.stem}/timeline.json") + cleanup_benchmark_files(timeline_file, tmp_dir / "summary.json") def run_mined_experiments(resource_dir, tmp_dir, aggregator): mega_serialize_file = tmp_dir / "mega-easteregg-serialize.fbs" From 54533db2ac629947c44783eae21ed2418f0d6c3a Mon Sep 17 00:00:00 2001 From: Haobin Ni Date: Thu, 5 Mar 2026 12:22:59 -0800 Subject: [PATCH 08/21] Show egraph size in size report --- src/lib.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 950778ab5..3c7a0f180 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2678,7 +2678,9 @@ impl TimedEgraph { } pub fn print_size_report(&mut self, max_level: usize) -> Result<()> { - self.egraphs.last().unwrap().get_sizerp().pretty_print(0, max_level); + let egraph = self.egraphs.last().unwrap(); + println!("egraph size: {:}", egraph.num_tuples()); + egraph.get_sizerp().pretty_print(0, max_level); Ok(()) } From dcf81e5c7cb8074aec02d1745a8a35471be86662 Mon Sep 17 00:00:00 2001 From: Haobin Ni Date: Thu, 5 Mar 2026 12:48:07 -0800 Subject: [PATCH 09/21] Add include ser time option, add a speedup graph --- infra/nightly-resources/web/chart.js | 45 ++++++++++++++++++++++++ infra/nightly-resources/web/extract.html | 7 ++++ infra/nightly-resources/web/extract.js | 26 +++++++++++++- 3 files changed, 77 insertions(+), 1 deletion(-) diff --git a/infra/nightly-resources/web/chart.js b/infra/nightly-resources/web/chart.js index da73b509b..aed046dd9 100644 --- a/infra/nightly-resources/web/chart.js +++ b/infra/nightly-resources/web/chart.js @@ -124,6 +124,51 @@ function initializeCharts() { ); } + if (!!document.getElementById("speedup-chart")) { + console.assert(GLOBAL_DATA.differenceChart === null); + + GLOBAL_DATA.speedupChart = new Chart( + document.getElementById("speedup-chart"), + { + type: "bar", + data: {}, + options: { + responsive: true, + plugins: { + legend: { + display: false, + }, + title: { + display: true, + text: "Per-benchmark Runtime Speedup", + }, + tooltip: { + callbacks: { + label: (ctx) => `${ctx.raw.toFixed(2)}x`, + }, + }, + }, + scales: { + x: { + ticks: { + maxRotation: 90, + minRotation: 45, + }, + }, + y: { + min: 0, + max: 50, + title: { + display: true, + text: "Speedup (times)", + }, + }, + }, + }, + }, + ); + } + if (!!document.getElementById("difference-chart")) { console.assert(GLOBAL_DATA.differenceChart === null); diff --git a/infra/nightly-resources/web/extract.html b/infra/nightly-resources/web/extract.html index 8af73a4dc..f8a2483d9 100644 --- a/infra/nightly-resources/web/extract.html +++ b/infra/nightly-resources/web/extract.html @@ -25,8 +25,15 @@

POACH vs Vanilla Egglog

Serialization time is not counted

+
+ + +
+
+ +
diff --git a/infra/nightly-resources/web/extract.js b/infra/nightly-resources/web/extract.js index 562dc5c90..e922b536a 100644 --- a/infra/nightly-resources/web/extract.js +++ b/infra/nightly-resources/web/extract.js @@ -37,6 +37,8 @@ function plotExtract() { return; } + const includeser = document.querySelector("input[name='icldser1']:checked"); + const all_data = GLOBAL_DATA.data[suite].extract; if (GLOBAL_DATA.extractChart === null) { @@ -63,11 +65,33 @@ function plotExtract() { data[b].poachExtract = aggregate(extracts.slice(midpoint), "total"); data[b].poachDeser = aggregate(all_data[b].deserialize, "total"); - data[b].poachTotal = data[b].poachDeser + data[b].poachExtract; + if (includeser) { + data[b].poachTotal = data[b].poachDeser + data[b].poachExtract; + } else { + data[b].poachTotal = data[b].poachExtract; + } data[b].difference = data[b].vanillaTotal - data[b].poachTotal; + data[b].speedup = data[b].vanillaTotal / data[b].poachTotal; }); + GLOBAL_DATA.speedupChart.data = { + labels: benchmarks, + datasets: [ + { + label: "poach - vanilla", + data: Object.values(data).map((d) => d.speedup), + backgroundColor: Object.values(data).map((d) => { + return d.speedup >= 1 + ? "rgba(54, 162, 235, 0.7)" + : "rgba(255, 99, 132, 0.7)"; + }), + }, + ], + }; + + GLOBAL_DATA.speedupChart.update(); + GLOBAL_DATA.differenceChart.data = { labels: benchmarks, datasets: [ From 63d2be20fa3610dae0e338ca4e86697ae0435143 Mon Sep 17 00:00:00 2001 From: Haobin Ni Date: Thu, 5 Mar 2026 12:57:53 -0800 Subject: [PATCH 10/21] fmt --- egglog-ast/src/span.rs | 14 +++++++------- src/lib.rs | 3 +-- src/poach.rs | 15 +++------------ src/serialize_size.rs | 8 ++++++-- 4 files changed, 17 insertions(+), 23 deletions(-) diff --git a/egglog-ast/src/span.rs b/egglog-ast/src/span.rs index d062426cf..2651d9cdc 100644 --- a/egglog-ast/src/span.rs +++ b/egglog-ast/src/span.rs @@ -13,22 +13,22 @@ pub enum Span { impl serde::Serialize for Span { fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer { + where + S: serde::Serializer, + { serializer.serialize_unit() } } impl<'de> serde::Deserialize<'de> for Span { fn deserialize(_: D) -> Result - where - D: serde::Deserializer<'de> { + where + D: serde::Deserializer<'de>, + { Ok(Self::POACH) } } - - #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct EgglogSpan { pub file: Arc, @@ -116,7 +116,7 @@ impl Display for Span { write!(f, "In {}:{}-{}: {quote}", start_line, start_col, end_col) } } - }, + } Span::POACH => write!(f, "From POACH deserialization"), } } diff --git a/src/lib.rs b/src/lib.rs index 3c7a0f180..baf03eb21 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2485,8 +2485,7 @@ impl TimedEgraph { } pub fn new_from_file(path: &Path) -> Self { - let mut file = fs::File::open(path) - .expect("failed to open file"); + let mut file = fs::File::open(path).expect("failed to open file"); let mut buf = Vec::new(); file.read_to_end(&mut buf) .expect("Failed to read Flatbuffer from file"); diff --git a/src/poach.rs b/src/poach.rs index 71471f3c5..d1b7d45d3 100644 --- a/src/poach.rs +++ b/src/poach.rs @@ -262,21 +262,12 @@ fn compare_extracts_weak( for (x, y) in initial_extracts.iter().zip(final_extracts) { match (x, y) { - (CommandOutput::ExtractBest(_, _, _), CommandOutput::ExtractBest(_, _, _)) => { - - } - ( - CommandOutput::ExtractVariants(_, _), - CommandOutput::ExtractVariants(_, _), - ) => { - - } + (CommandOutput::ExtractBest(_, _, _), CommandOutput::ExtractBest(_, _, _)) => {} + (CommandOutput::ExtractVariants(_, _), CommandOutput::ExtractVariants(_, _)) => {} ( CommandOutput::MultiExtractVariants(_, _), CommandOutput::MultiExtractVariants(_, _), - ) => { - - } + ) => {} _ => anyhow::bail!("No match : {:?} {:?}", x, y), } } diff --git a/src/serialize_size.rs b/src/serialize_size.rs index c9b49ae03..8683e27ca 100644 --- a/src/serialize_size.rs +++ b/src/serialize_size.rs @@ -1,6 +1,8 @@ use crate::{ ast::ResolvedVar, - core::{GenericCoreAction, GenericCoreActions, GenericAtom, Query, ResolvedCall, ResolvedCoreRule}, + core::{ + GenericAtom, GenericCoreAction, GenericCoreActions, Query, ResolvedCall, ResolvedCoreRule, + }, egglog::util::IndexMap, term_encoding::EncodingState, CommandMacroRegistry, EGraph, RunReport, TypeInfo, @@ -179,7 +181,9 @@ impl GenerateSizeReport for ResolvedCoreRule { } } -impl GenerateSizeReport for (T, S) { +impl + GenerateSizeReport for (T, S) +{ fn get_sizerp(&self) -> SizeReport { let mut ret = get_sizerp_default(self); ret.fields From c54b1a20f392da11a95cdd5b9538ee0a520ced85 Mon Sep 17 00:00:00 2001 From: Haobin Ni Date: Thu, 5 Mar 2026 13:44:46 -0800 Subject: [PATCH 11/21] Skip tests because containers are not yet supported --- infra/nightly.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/infra/nightly.py b/infra/nightly.py index 745e62b1b..50011ff04 100644 --- a/infra/nightly.py +++ b/infra/nightly.py @@ -104,11 +104,6 @@ def run_extract_experiments(resource_dir, tmp_dir, aggregator): run_poach(benchmark, tmp_dir, "extract") add_benchmark_data(aggregator, timeline_file, f"{suite}/extract/{benchmark.stem}/timeline.json") cleanup_benchmark_files(timeline_file, tmp_dir / "summary.json") - for benchmark in benchmark_files(top_dir / "tests", recursive = True): - timeline_file = tmp_dir / f"{benchmark.stem}-timeline.json" - run_poach(benchmark, tmp_dir, "extract") - add_benchmark_data(aggregator, timeline_file, f"tests/extract/{benchmark.stem}/timeline.json") - cleanup_benchmark_files(timeline_file, tmp_dir / "summary.json") def run_mined_experiments(resource_dir, tmp_dir, aggregator): mega_serialize_file = tmp_dir / "mega-easteregg-serialize.fbs" From 85dcdcf32d4441d70a953b74b3bfc0ec31167bb6 Mon Sep 17 00:00:00 2001 From: Haobin Ni Date: Thu, 5 Mar 2026 14:13:20 -0800 Subject: [PATCH 12/21] Comment local dev setup --- infra/nightly.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infra/nightly.sh b/infra/nightly.sh index 777641a8e..a0777ca6c 100755 --- a/infra/nightly.sh +++ b/infra/nightly.sh @@ -67,4 +67,4 @@ fi cp infra/nightly-resources/web/* nightly/output # Uncomment for local development -cd nightly/output && python3 -m http.server 8002 +# cd nightly/output && python3 -m http.server 8002 From 1d46162dec79c11b115a9fb80ee0115be1252e36 Mon Sep 17 00:00:00 2001 From: Haobin Ni Date: Thu, 5 Mar 2026 16:43:44 -0800 Subject: [PATCH 13/21] Output a csv file with serialization size data --- Cargo.toml | 2 +- infra/nightly.py | 10 +++++++--- infra/transform.py | 16 ++++++++++++++++ src/lib.rs | 26 +++++++++++++++++++++++++- src/poach.rs | 26 ++++++++++++++++++++++++++ 5 files changed, 75 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 9860f9912..2190a235e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,7 +52,7 @@ getrandom = "0.3" once_cell = "1.21" num-bigint = { version = "0.4", features = ["serde"] } num-rational = {version = "0.4", features = ["serde"]} -csv = "1.3" +csv = "1.4" typetag = "0.2" serde = { version = "1.0", features = ["derive", "rc"] } serde_json = "1.0" diff --git a/infra/nightly.py b/infra/nightly.py index 50011ff04..2ce3f6ae1 100644 --- a/infra/nightly.py +++ b/infra/nightly.py @@ -96,14 +96,16 @@ def run_test_experiments(top_dir, tmp_dir, aggregator): }.get(run_mode, []) cleanup_benchmark_files(timeline_file, tmp_dir / "summary.json", *extra_files) -def run_extract_experiments(resource_dir, tmp_dir, aggregator): +def run_extract_experiments(resource_dir, tmp_dir, aggregator, csv_aggregator): timeline_suites = ["easteregg", "herbie-hamming", "herbie-math-rewrite", "herbie-math-taylor"] for suite in timeline_suites: for benchmark in benchmark_files(resource_dir / "test-files" / suite): timeline_file = tmp_dir / f"{benchmark.stem}-timeline.json" run_poach(benchmark, tmp_dir, "extract") add_benchmark_data(aggregator, timeline_file, f"{suite}/extract/{benchmark.stem}/timeline.json") - cleanup_benchmark_files(timeline_file, tmp_dir / "summary.json") + extra_files = [tmp_dir / f"{benchmark.stem}.csv"] + csv_aggregator.add_file(extra_files[0]) + cleanup_benchmark_files(timeline_file, tmp_dir / "summary.json", *extra_files) def run_mined_experiments(resource_dir, tmp_dir, aggregator): mega_serialize_file = tmp_dir / "mega-easteregg-serialize.fbs" @@ -146,6 +148,7 @@ def run_mined_experiments(resource_dir, tmp_dir, aggregator): tmp_dir = nightly_dir / "tmp" output_data_dir = nightly_dir / "output" / "data" aggregator = transform.TimelineAggregator(output_data_dir) + csv_aggregator = transform.CSVAggregator(output_data_dir) # Make sure we're in the right place os.chdir(top_dir) @@ -167,11 +170,12 @@ def run_mined_experiments(resource_dir, tmp_dir, aggregator): # run_mined_experiments(resource_dir, tmp_dir, aggregator) # Run the extract experiment on our heavy benchmarks - run_extract_experiments(resource_dir, tmp_dir, aggregator) + run_extract_experiments(resource_dir, tmp_dir, aggregator, csv_aggregator) ############################################################################## aggregator.save() + csv_aggregator.save() #if shutil.which("perf") is not None: # # Generate flamegraphs diff --git a/infra/transform.py b/infra/transform.py index 2fe95fbfd..f6a334003 100644 --- a/infra/transform.py +++ b/infra/transform.py @@ -1,4 +1,5 @@ import json +import pandas import os from pathlib import Path @@ -111,3 +112,18 @@ def add_file(self, input_file, benchmark_name): def save(self): os.makedirs(self.output_dir, exist_ok=True) save_json(self.data_path, self.aggregated) + +class CSVAggregator: + def __init__(self, output_dir): + self.output_dir = Path(output_dir) + self.data_path = self.output_dir / "data.csv" + self.records = [] + + def add_file(self, input_file): + df = pandas.read_csv(input_file) + self.records.append(df) + + def save(self): + os.makedirs(self.output_dir, exist_ok=True) + combined = pandas.concat(self.records) + combined.to_csv(self.data_path, index=False) \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index baf03eb21..64273b913 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2442,13 +2442,33 @@ mod tests { static START: &'static str = "start"; static END: &'static str = "end"; -#[derive(Serialize, Clone)] +#[derive(Serialize, Clone, Eq)] pub struct EgraphEvent { sexp_idx: i32, evt: &'static str, time_micros: u128, } +impl Ord for EgraphEvent { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.time_micros.cmp(&other.time_micros) + } +} + +impl PartialOrd for EgraphEvent { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl PartialEq for EgraphEvent { + fn eq(&self, other: &Self) -> bool { + self.sexp_idx == other.sexp_idx && + self.evt == other.evt && + self.time_micros == other.time_micros + } +} + #[derive(Serialize, Clone)] pub struct ProgramTimeline { program_text: String, @@ -2500,6 +2520,10 @@ impl TimedEgraph { } } + pub fn get_total_time(&self, id : usize) -> u128 { + self.timeline[id].evts.iter().max().unwrap().time_micros - self.timeline[id].evts.iter().min().unwrap().time_micros + } + pub fn egraphs(&self) -> Vec<&EGraph> { self.egraphs.iter().map(|x| x).collect() } diff --git a/src/poach.rs b/src/poach.rs index d1b7d45d3..33479e2b6 100644 --- a/src/poach.rs +++ b/src/poach.rs @@ -481,6 +481,8 @@ fn poach( .to_value() .context("Failed to encode egraph as Flatbuffer")?; + let serialized_size = value.len(); + timed_egraph .from_value(value) .context("Failed to decode egraph from Flatbuffer")?; @@ -494,6 +496,30 @@ fn poach( timed_egraph.write_timeline(&out_dir.join(format!("{name}-timeline.json")))?; + + #[derive(Serialize)] + struct CSVRecord{ + benchname: String, + egraph_size: usize, + serialized_size: usize, + ser_time: u128, + der_time: u128, + ext_time: u128, + run_time: u128, + } + + let r = CSVRecord { + benchname: name.to_string(), + egraph_size: timed_egraph.egraphs().last().unwrap().num_tuples(), + serialized_size: serialized_size, + ser_time: timed_egraph.get_total_time(1), + der_time: timed_egraph.get_total_time(2), + ext_time: timed_egraph.get_total_time(3), + run_time: timed_egraph.get_total_time(0) + }; + + csv::Writer::from_path(&out_dir.join(format!("{name}.csv")))?.serialize(r)?; + Ok(()) }, ), From a5758297f1720883219230ca09db6f87900f0690 Mon Sep 17 00:00:00 2001 From: Haobin Ni Date: Thu, 5 Mar 2026 16:45:16 -0800 Subject: [PATCH 14/21] fmt --- src/lib.rs | 11 ++++++----- src/poach.rs | 7 +++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 64273b913..725506bfb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2463,9 +2463,9 @@ impl PartialOrd for EgraphEvent { impl PartialEq for EgraphEvent { fn eq(&self, other: &Self) -> bool { - self.sexp_idx == other.sexp_idx && - self.evt == other.evt && - self.time_micros == other.time_micros + self.sexp_idx == other.sexp_idx + && self.evt == other.evt + && self.time_micros == other.time_micros } } @@ -2520,8 +2520,9 @@ impl TimedEgraph { } } - pub fn get_total_time(&self, id : usize) -> u128 { - self.timeline[id].evts.iter().max().unwrap().time_micros - self.timeline[id].evts.iter().min().unwrap().time_micros + pub fn get_total_time(&self, id: usize) -> u128 { + self.timeline[id].evts.iter().max().unwrap().time_micros + - self.timeline[id].evts.iter().min().unwrap().time_micros } pub fn egraphs(&self) -> Vec<&EGraph> { diff --git a/src/poach.rs b/src/poach.rs index 33479e2b6..05a28b653 100644 --- a/src/poach.rs +++ b/src/poach.rs @@ -496,9 +496,8 @@ fn poach( timed_egraph.write_timeline(&out_dir.join(format!("{name}-timeline.json")))?; - #[derive(Serialize)] - struct CSVRecord{ + struct CSVRecord { benchname: String, egraph_size: usize, serialized_size: usize, @@ -515,11 +514,11 @@ fn poach( ser_time: timed_egraph.get_total_time(1), der_time: timed_egraph.get_total_time(2), ext_time: timed_egraph.get_total_time(3), - run_time: timed_egraph.get_total_time(0) + run_time: timed_egraph.get_total_time(0), }; csv::Writer::from_path(&out_dir.join(format!("{name}.csv")))?.serialize(r)?; - + Ok(()) }, ), From 96ea2262c4589d77e0b4c8517c763309719d1fa9 Mon Sep 17 00:00:00 2001 From: Haobin Ni Date: Fri, 6 Mar 2026 15:41:13 -0800 Subject: [PATCH 15/21] Hacks --- core-relations/src/free_join/mod.rs | 6 ++- core-relations/src/lib.rs | 6 ++- core-relations/src/row_buffer/mod.rs | 71 ++++++++++++++++++++++++++++ core-relations/src/table/mod.rs | 42 ++++++++++++++-- core-relations/src/table_spec.rs | 3 +- core-relations/src/uf/mod.rs | 2 + egglog-bridge/src/lib.rs | 3 +- numeric-id/src/lib.rs | 6 ++- src/lib.rs | 2 +- src/poach.rs | 2 +- src/serialize_size.rs | 58 +++++++++++++++++++---- 11 files changed, 178 insertions(+), 23 deletions(-) diff --git a/core-relations/src/free_join/mod.rs b/core-relations/src/free_join/mod.rs index c97378fa0..9bccd0d1f 100644 --- a/core-relations/src/free_join/mod.rs +++ b/core-relations/src/free_join/mod.rs @@ -118,7 +118,8 @@ pub(crate) type HashColumnIndex = Arc>>; pub struct TableInfo { pub(crate) name: Option>, pub(crate) spec: TableSpec, - pub(crate) table: WrappedTable, + // TODO: evil hack for looking at serialization size + pub table: WrappedTable, #[serde(skip)] pub(crate) indexes: IndexCatalog, HashIndex>, #[serde(skip)] @@ -276,7 +277,8 @@ impl Counters { pub struct Database { // NB: some fields are pub(crate) to allow some internal modules to avoid // borrowing the whole table. - pub(crate) tables: DenseIdMap, + // TODO: evil hack for looking at serialization size + pub tables: DenseIdMap, // TODO: having a single AtomicUsize per counter can lead to contention. We // should look into prefetching counters when creating a new ExecutionState // and incrementing locally. Note that the batch size shouldn't be too big diff --git a/core-relations/src/lib.rs b/core-relations/src/lib.rs index 7d0e66140..66fe1248c 100644 --- a/core-relations/src/lib.rs +++ b/core-relations/src/lib.rs @@ -7,7 +7,8 @@ pub(crate) mod base_values; pub(crate) mod common; pub(crate) mod containers; pub(crate) mod dependency_graph; -pub(crate) mod free_join; +// TODO: evil hack for looking at serialization size +pub mod free_join; pub(crate) mod hash_index; pub(crate) mod offsets; pub(crate) mod parallel_heuristics; @@ -16,7 +17,8 @@ pub(crate) mod query; pub(crate) mod row_buffer; pub(crate) mod table; -pub(crate) mod table_spec; +// TODO: evil hack for looking at serialization size +pub mod table_spec; pub(crate) mod uf; #[cfg(test)] diff --git a/core-relations/src/row_buffer/mod.rs b/core-relations/src/row_buffer/mod.rs index a4426940c..e24af95f8 100644 --- a/core-relations/src/row_buffer/mod.rs +++ b/core-relations/src/row_buffer/mod.rs @@ -35,6 +35,7 @@ impl<'de> Deserialize<'de> for RowBuffer { where D: Deserializer<'de>, { + /* #[derive(Deserialize)] struct Partial { n_columns: usize, @@ -49,19 +50,89 @@ impl<'de> Deserialize<'de> for RowBuffer { total_rows: helper.total_rows, data: Pooled::new(helper.data), }) + */ + + let bytes = >::deserialize(deserializer).expect("Failed to parse RowBuffer"); + let mut it = bytes.iter(); + let n_columns = deserialize_compressed(&mut it); + let total_rows = deserialize_compressed(&mut it); + let mut data = >>::new(); + for i in 0..n_columns * total_rows { + data.push(Cell::new(Value::new(deserialize_compressed(&mut it)))); + } + Ok(RowBuffer { + n_columns: n_columns.try_into().unwrap(), + total_rows: total_rows.try_into().unwrap(), + data: Pooled::new(data), + }) } } +#[allow(dead_code)] +fn get_n_compressed_bytes(x: u32) -> usize { + if x < (1u32 << 7) { + 1 + } else if x < (1u32 << 14) { + 2 + } else if x < (1u32 << 21) { + 3 + } else if x < (1u32 << 28) { + 4 + } else { + 5 + } +} + +fn compressed_serialize(buf: &mut Vec, x: u32) { + let mut rem = x; + while (rem >= (1u32 << 7)) { + buf.push((rem & ((1u32 << 7) - 1)).try_into().unwrap()); + rem = rem >> 7; + } + buf.push((rem | (1u32 << 7)).try_into().unwrap()); +} + +fn deserialize_compressed<'a, T: Iterator>(it: &mut T) -> u32 { + let mut ret = 0u32; + let mut delta = 0u32; + let mut val: u32 = ::into(*it.next().unwrap()); + while (val < (1u32 << 7)) { + ret = ret | (val << delta); + delta += 7; + val = ::into(*it.next().unwrap()); + } + let last = (val ^ (1u32 << 7)) << delta; + ret | last +} + impl Serialize for RowBuffer { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { + /* let mut state = serializer.serialize_struct("RowBuffer", 3)?; state.serialize_field("n_columns", &self.n_columns)?; state.serialize_field("total_rows", &self.total_rows)?; state.serialize_field("data", &*self.data)?; state.end() + */ + //let len = mem::size_of::() * 2 + self.n_columns * self.total_rows * mem::size_of::(); + /* + let mut len = get_n_compressed_bytes(self.n_columns.try_into().unwrap()) + get_n_compressed_bytes(self.total_rows.try_into().unwrap()); + for r in self.data.iter() { + len = len + get_n_compressed_bytes(r.get().rep); + } + let mut buf = vec![0u8; len]; + //TODO: put data in + */ + let mut buf = Vec::new(); + compressed_serialize(&mut buf, self.n_columns.try_into().unwrap()); + compressed_serialize(&mut buf, self.total_rows.try_into().unwrap()); + for r in self.data.iter() { + compressed_serialize(&mut buf, r.get().rep); + } + serializer.serialize_bytes(&buf) } } diff --git a/core-relations/src/table/mod.rs b/core-relations/src/table/mod.rs index 4628a25b1..01ef79d8e 100644 --- a/core-relations/src/table/mod.rs +++ b/core-relations/src/table/mod.rs @@ -20,7 +20,10 @@ use crossbeam_queue::SegQueue; use hashbrown::HashTable; use rayon::iter::{IndexedParallelIterator, IntoParallelRefMutIterator, ParallelIterator}; use rustc_hash::FxHasher; -use serde::{ser::SerializeStruct, Deserialize, Deserializer, Serialize, Serializer}; +use serde::{ + ser::{SerializeStruct, SerializeTuple}, + Deserialize, Deserializer, Serialize, Serializer, +}; use sharded_hash_table::ShardedHashTable; use crate::{ @@ -51,12 +54,41 @@ mod tests; type HashCode = u64; /// A pointer to a row in the table. -#[derive(Clone, Debug, Serialize, Deserialize)] +#[derive(Clone, Debug)] pub(crate) struct TableEntry { hashcode: HashCode, row: RowId, } +impl Serialize for TableEntry { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut bytes = [0u8; 12]; + let b1 = self.hashcode.to_be_bytes(); + bytes[..b1.len()].copy_from_slice(&b1); + let b2 = self.row.rep.to_be_bytes(); + bytes[b1.len()..].copy_from_slice(&b2); + serializer.serialize_bytes(&bytes) + } +} + +impl<'de> Deserialize<'de> for TableEntry { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let bytes = <[u8; 16]>::deserialize(deserializer).expect("Failed to parse TabelEntry"); + Ok(TableEntry { + hashcode: u64::from_be_bytes(bytes[0..8].try_into().unwrap()), + row: RowId { + rep: u32::from_be_bytes(bytes[8..12].try_into().unwrap()), + }, + }) + } +} + impl TableEntry { fn hashcode(&self) -> u64 { // We keep the cast here to make it easy to switch to HashCode=u32. @@ -242,8 +274,8 @@ impl Serialize for SortedWritesTable { let mut state = serializer.serialize_struct("SortedWritesTable", 11)?; state.serialize_field("generation", &self.generation)?; - state.serialize_field("shard_data", &self.hash.shard_data())?; - state.serialize_field("shards", &serialized_shards)?; + //state.serialize_field("shard_data", &self.hash.shard_data())?; + //state.serialize_field("shards", &serialized_shards)?; state.serialize_field("data", &self.data)?; state.serialize_field("n_keys", &self.n_keys)?; state.serialize_field("n_columns", &self.n_columns)?; @@ -251,7 +283,7 @@ impl Serialize for SortedWritesTable { state.serialize_field("offsets", &self.offsets)?; state.serialize_field("pending_state", &self.pending_state)?; state.serialize_field("to_rebuild", &self.to_rebuild)?; - state.serialize_field("rebuild_index", &self.rebuild_index)?; + //state.serialize_field("rebuild_index", &self.rebuild_index)?; state.serialize_field("subset_tracker", &self.subset_tracker)?; state.end() diff --git a/core-relations/src/table_spec.rs b/core-relations/src/table_spec.rs index dc50ce360..5ec0fb8cc 100644 --- a/core-relations/src/table_spec.rs +++ b/core-relations/src/table_spec.rs @@ -522,7 +522,8 @@ impl TableWrapper for WrapperImpl { /// The implementations here downcast manually to the type used when /// constructing the WrappedTable. pub struct WrappedTable { - inner: Box, + // TODO: evil hack + pub inner: Box, wrapper: Box, } diff --git a/core-relations/src/uf/mod.rs b/core-relations/src/uf/mod.rs index 5688ddb9e..531706fc6 100644 --- a/core-relations/src/uf/mod.rs +++ b/core-relations/src/uf/mod.rs @@ -63,8 +63,10 @@ pub struct DisplacedTable { // k columns, k-1 are args, kth is the ID // enode is the row index // on deserialize: need to recompute this from `displaced` + #[serde(skip)] displaced: Vec<(Value, Value)>, // this is "the table" everything else can be recomputed from this // can even recanonicalize on serialization to get rid of dead things + #[serde(skip)] changed: bool, #[serde(skip)] lookup_table: HashMap, diff --git a/egglog-bridge/src/lib.rs b/egglog-bridge/src/lib.rs index 7232d5def..702fe4cb4 100644 --- a/egglog-bridge/src/lib.rs +++ b/egglog-bridge/src/lib.rs @@ -68,7 +68,8 @@ impl Timestamp { /// The state associated with an egglog program. #[derive(Clone, Serialize, Deserialize)] pub struct EGraph { - db: Database, + // TODO: evil hack for looking at serialization size + pub db: Database, uf_table: TableId, id_counter: CounterId, reason_counter: CounterId, diff --git a/numeric-id/src/lib.rs b/numeric-id/src/lib.rs index 9825268f2..df7e14cbc 100644 --- a/numeric-id/src/lib.rs +++ b/numeric-id/src/lib.rs @@ -47,7 +47,8 @@ impl NumericId for usize { /// with no hashing. For sparse mappings, use a HashMap. #[derive(Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct DenseIdMap { - data: Vec>, + // TODO: evil hack for looking at serialization size + pub data: Vec>, _marker: PhantomData, } @@ -438,7 +439,8 @@ macro_rules! define_id { #[derive(Copy, Clone, Default)] #[doc = $doc] $v struct $name { - rep: $repr, + // TODO: evil hack + pub rep: $repr, } impl serde::Serialize for $name { diff --git a/src/lib.rs b/src/lib.rs index 725506bfb..dfd6c6340 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,7 +30,7 @@ mod typechecking; pub mod util; pub use command_macro::{CommandMacro, CommandMacroRegistry}; -mod serialize_size; +pub mod serialize_size; // This is used to allow the `add_primitive` macro to work in // both this crate and other crates by referring to `::egglog`. diff --git a/src/poach.rs b/src/poach.rs index 05a28b653..53d350d90 100644 --- a/src/poach.rs +++ b/src/poach.rs @@ -724,7 +724,7 @@ fn poach( initial_egraph.as_deref(), |egg_file, _, timed_egraph| { timed_egraph.run_from_file(egg_file)?; - timed_egraph.print_size_report(0) + timed_egraph.print_size_report(100) }, ), } diff --git a/src/serialize_size.rs b/src/serialize_size.rs index 8683e27ca..43ed45a49 100644 --- a/src/serialize_size.rs +++ b/src/serialize_size.rs @@ -74,7 +74,9 @@ impl SizeReport { pretty_print_nbytes(sr.size), percentage ); - sr.pretty_print(level + 1, max_level); + if percentage > 1.0 { + sr.pretty_print(level + 1, max_level); + } } if sorted_fields.len() > 10 { println!(" {:level$} ... {:} fields total", "", sorted_fields.len()); @@ -98,8 +100,6 @@ pub trait GenerateSizeReport: serde::Serialize + Sized { } } -impl GenerateSizeReport for egglog_bridge::EGraph {} - impl GenerateSizeReport for Option {} impl GenerateSizeReport @@ -121,9 +121,22 @@ impl GenerateSizeReport for TypeInfo {} impl GenerateSizeReport for RunReport {} -impl GenerateSizeReport +impl GenerateSizeReport for egglog_numeric_id::DenseIdMap { + fn get_sizerp(&self) -> SizeReport { + let mut ret = get_sizerp_default(self); + for e in self.data.iter() { + match e { + Some(v) => { + let rep = v.get_sizerp(); + ret.fields.push((rep.name.clone(), Box::new(rep))); + } + _ => {} + } + } + ret + } } impl GenerateSizeReport for CommandMacroRegistry {} @@ -231,10 +244,10 @@ impl GenerateSizeReport for EGraph { "overall_run_report".to_string(), Box::new(self.overall_run_report.get_sizerp()), )); - ret.fields.push(( - "schedulers".to_string(), - Box::new(self.schedulers.get_sizerp()), - )); + //ret.fields.push(( + // "schedulers".to_string(), + // Box::new(self.schedulers.get_sizerp()), + //)); //ret.fields.push(("commands".to_string(), Box::new(self.commands.get_sizerp()))); //ret.fields.push(("command_macros".to_string(), Box::new(self.command_macros.get_sizerp()))); ret.fields.push(( @@ -244,3 +257,32 @@ impl GenerateSizeReport for EGraph { ret } } + +impl GenerateSizeReport for egglog_bridge::EGraph { + fn get_sizerp(&self) -> SizeReport { + let mut ret = get_sizerp_default(&self); + ret.fields + .push(("db".to_string(), Box::new(self.db.get_sizerp()))); + ret + } +} + +impl GenerateSizeReport for egglog_core_relations::Database { + fn get_sizerp(&self) -> SizeReport { + let mut ret = get_sizerp_default(&self); + ret.fields + .push(("tables".to_string(), Box::new(self.tables.get_sizerp()))); + ret + } +} + +impl GenerateSizeReport for egglog_core_relations::table_spec::WrappedTable {} + +impl GenerateSizeReport for egglog_core_relations::free_join::TableInfo { + fn get_sizerp(&self) -> SizeReport { + let mut ret = get_sizerp_default(&self); + ret.fields + .push(("table".to_string(), Box::new(self.table.get_sizerp()))); + ret + } +} From 53cb8f8409f17565db620fa8c11d67198dc50a31 Mon Sep 17 00:00:00 2001 From: Haobin Ni Date: Fri, 6 Mar 2026 15:41:55 -0800 Subject: [PATCH 16/21] fmt --- src/lib.rs | 32 +++++++++++++++++++------------- src/poach.rs | 22 +++++++++++----------- src/serialize_size.rs | 11 ++++------- src/typechecking.rs | 2 +- 4 files changed, 35 insertions(+), 32 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index dfd6c6340..2f7c0ed41 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -259,31 +259,31 @@ impl Serialize for SerializableSort { s.serialize_field("type", "FunctionSort")?; s.serialize_field("data", sort)?; s.end() - } else if let Some(_) = sort.as_any().downcast_ref::>() { + } else if sort.as_any().downcast_ref::>().is_some() { s.serialize_field("type", "BaseSort")?; s.serialize_field("data", "BigIntSort")?; s.end() - } else if let Some(_) = sort.as_any().downcast_ref::>() { + } else if sort.as_any().downcast_ref::>().is_some() { s.serialize_field("type", "BaseSort")?; s.serialize_field("data", "BigRatSort")?; s.end() - } else if let Some(_) = sort.as_any().downcast_ref::>() { + } else if sort.as_any().downcast_ref::>().is_some() { s.serialize_field("type", "BaseSort")?; s.serialize_field("data", "BoolSort")?; s.end() - } else if let Some(_) = sort.as_any().downcast_ref::>() { + } else if sort.as_any().downcast_ref::>().is_some() { s.serialize_field("type", "BaseSort")?; s.serialize_field("data", "F64Sort")?; s.end() - } else if let Some(_) = sort.as_any().downcast_ref::>() { + } else if sort.as_any().downcast_ref::>().is_some() { s.serialize_field("type", "BaseSort")?; s.serialize_field("data", "I64Sort")?; s.end() - } else if let Some(_) = sort.as_any().downcast_ref::>() { + } else if sort.as_any().downcast_ref::>().is_some() { s.serialize_field("type", "BaseSort")?; s.serialize_field("data", "StringSort")?; s.end() - } else if let Some(_) = sort.as_any().downcast_ref::>() { + } else if sort.as_any().downcast_ref::>().is_some() { s.serialize_field("type", "BaseSort")?; s.serialize_field("data", "UnitSort")?; s.end() @@ -1494,7 +1494,7 @@ impl EGraph { expr.output_type(), ) .iter() - .map(|e| e.1.clone()) + .map(|e| e.1) .collect(); if log_enabled!(Level::Info) { let expr_str = expr.to_string(); @@ -2439,8 +2439,8 @@ mod tests { /***** TESTING AREA FOR TIMED EGRAPH *****/ -static START: &'static str = "start"; -static END: &'static str = "end"; +static START: &str = "start"; +static END: &str = "end"; #[derive(Serialize, Clone, Eq)] pub struct EgraphEvent { @@ -2491,6 +2491,12 @@ pub struct TimedEgraph { timer: std::time::Instant, } +impl Default for TimedEgraph { + fn default() -> Self { + Self::new() + } +} + impl TimedEgraph { /// Create a new TimedEgraph with a default EGraph pub fn new() -> Self { @@ -2526,14 +2532,14 @@ impl TimedEgraph { } pub fn egraphs(&self) -> Vec<&EGraph> { - self.egraphs.iter().map(|x| x).collect() + self.egraphs.iter().collect() } pub fn write_timeline(&self, path: &Path) -> Result<(), serde_json::Error> { if let Some(parent) = path.parent() { fs::create_dir_all(parent).expect("Failed to create out dir"); } - let file = File::create(&path).expect("Failed to create timeline.json"); + let file = File::create(path).expect("Failed to create timeline.json"); serde_json::to_writer_pretty(BufWriter::new(file), &self.timeline) } @@ -2593,7 +2599,7 @@ impl TimedEgraph { time_micros: self.timer.elapsed().as_micros(), }); - i = i + 1; + i += 1; } self.timeline.push(program_timeline); diff --git a/src/poach.rs b/src/poach.rs index 53d350d90..73617b6f1 100644 --- a/src/poach.rs +++ b/src/poach.rs @@ -202,7 +202,7 @@ where } } } - if failures.len() == 0 { + if failures.is_empty() { println!("0 failures out of {} files", files.len()); } else { println!("{} failures out of {} files", failures.len(), files.len()); @@ -325,9 +325,9 @@ fn poach( .from_file(&s1) .context("failed to read s1.fbs")?; - check_egraph_number(&timed_egraph, 2)?; + check_egraph_number(timed_egraph, 2)?; - check_egraph_size(&timed_egraph)?; + check_egraph_size(timed_egraph)?; timed_egraph.write_timeline(&out_dir.join(format!("{name}-timeline.json")))?; Ok(()) @@ -369,8 +369,8 @@ fn poach( .from_file(&s3) .context("failed to read s3.fbs")?; - check_egraph_number(&timed_egraph, 4)?; - check_egraph_size(&timed_egraph)?; + check_egraph_number(timed_egraph, 4)?; + check_egraph_size(timed_egraph)?; //check_idempotent(&s2, &s3, name, out_dir); timed_egraph.write_timeline(&out_dir.join(format!("{name}-timeline.json")))?; @@ -415,9 +415,9 @@ fn poach( .from_value(value) .context("failed to decode egraph from flatbuffer")?; - check_egraph_number(&timed_egraph, 2)?; + check_egraph_number(timed_egraph, 2)?; - check_egraph_size(&timed_egraph)?; + check_egraph_size(timed_egraph)?; timed_egraph.write_timeline(&out_dir.join(format!("{name}-timeline.json")))?; @@ -487,7 +487,7 @@ fn poach( .from_value(value) .context("Failed to decode egraph from Flatbuffer")?; - check_egraph_number(&timed_egraph, 2)?; + check_egraph_number(timed_egraph, 2)?; let final_extracts = timed_egraph.run_program_with_timeline(extract_cmds, &extracts)?; @@ -510,14 +510,14 @@ fn poach( let r = CSVRecord { benchname: name.to_string(), egraph_size: timed_egraph.egraphs().last().unwrap().num_tuples(), - serialized_size: serialized_size, + serialized_size, ser_time: timed_egraph.get_total_time(1), der_time: timed_egraph.get_total_time(2), ext_time: timed_egraph.get_total_time(3), run_time: timed_egraph.get_total_time(0), }; - csv::Writer::from_path(&out_dir.join(format!("{name}.csv")))?.serialize(r)?; + csv::Writer::from_path(out_dir.join(format!("{name}.csv")))?.serialize(r)?; Ok(()) }, @@ -632,7 +632,7 @@ fn poach( let all_cmds = EGraph::default() .parser - .get_program_from_string(None, &program_string)?; + .get_program_from_string(None, program_string)?; assert!(all_cmds.len() == all_sexps.len()); diff --git a/src/serialize_size.rs b/src/serialize_size.rs index 43ed45a49..666b21dbf 100644 --- a/src/serialize_size.rs +++ b/src/serialize_size.rs @@ -29,7 +29,7 @@ fn up_to_two_decimals(a: usize, b: usize) -> String { } else { low.to_string() }; - return high.to_string() + "." + &low_str; + high.to_string() + "." + &low_str } fn pretty_print_nbytes(size: usize) -> String { @@ -127,12 +127,9 @@ impl GenerateSize fn get_sizerp(&self) -> SizeReport { let mut ret = get_sizerp_default(self); for e in self.data.iter() { - match e { - Some(v) => { - let rep = v.get_sizerp(); - ret.fields.push((rep.name.clone(), Box::new(rep))); - } - _ => {} + if let Some(v) = e { + let rep = v.get_sizerp(); + ret.fields.push((rep.name.clone(), Box::new(rep))); } } ret diff --git a/src/typechecking.rs b/src/typechecking.rs index 2f39e97a0..0ae574174 100644 --- a/src/typechecking.rs +++ b/src/typechecking.rs @@ -318,7 +318,7 @@ impl EGraph { } NCommand::MultiExtract(span, variants, exprs) => { let res_exprs = exprs - .into_iter() + .iter() .map(|expr| { self.type_info .typecheck_expr(symbol_gen, expr, &Default::default()) From 78f79fbd2a6c0fc7ac01bb1defb7562b743a0399 Mon Sep 17 00:00:00 2001 From: Haobin Ni Date: Fri, 6 Mar 2026 16:02:43 -0800 Subject: [PATCH 17/21] More more evil hacks --- core-relations/src/row_buffer/mod.rs | 46 +++++++++++++++++++--------- core-relations/src/table/mod.rs | 21 ++++++------- egglog-bridge/src/lib.rs | 25 +++++++++++++++ src/lib.rs | 44 +++++++++++++++++++++----- 4 files changed, 102 insertions(+), 34 deletions(-) diff --git a/core-relations/src/row_buffer/mod.rs b/core-relations/src/row_buffer/mod.rs index e24af95f8..df4d88045 100644 --- a/core-relations/src/row_buffer/mod.rs +++ b/core-relations/src/row_buffer/mod.rs @@ -6,7 +6,7 @@ use std::{cell::Cell, mem, ops::Deref}; use crate::numeric_id::NumericId; use egglog_concurrency::ParallelVecWriter; use rayon::iter::ParallelIterator; -use serde::{ser::SerializeStruct, Deserialize, Deserializer, Serialize}; +use serde::{Deserialize, Deserializer, Serialize}; use smallvec::SmallVec; use crate::{ @@ -52,19 +52,35 @@ impl<'de> Deserialize<'de> for RowBuffer { }) */ - let bytes = >::deserialize(deserializer).expect("Failed to parse RowBuffer"); - let mut it = bytes.iter(); - let n_columns = deserialize_compressed(&mut it); - let total_rows = deserialize_compressed(&mut it); - let mut data = >>::new(); - for i in 0..n_columns * total_rows { - data.push(Cell::new(Value::new(deserialize_compressed(&mut it)))); + struct RowBufferVisitor; + + impl<'de> serde::de::Visitor<'de> for RowBufferVisitor { + type Value = RowBuffer; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("Expecting a byte array") + } + + fn visit_bytes(self, bytes: &[u8]) -> Result + where + E: serde::de::Error, + { + let mut it = bytes.iter(); + let n_columns = deserialize_compressed(&mut it); + let total_rows = deserialize_compressed(&mut it); + let mut data = >>::new(); + for _i in 0..n_columns * total_rows { + data.push(Cell::new(Value::new(deserialize_compressed(&mut it)))); + } + Ok(RowBuffer { + n_columns: n_columns.try_into().unwrap(), + total_rows: total_rows.try_into().unwrap(), + data: Pooled::new(data), + }) + } } - Ok(RowBuffer { - n_columns: n_columns.try_into().unwrap(), - total_rows: total_rows.try_into().unwrap(), - data: Pooled::new(data), - }) + + deserializer.deserialize_bytes(RowBufferVisitor) } } @@ -85,7 +101,7 @@ fn get_n_compressed_bytes(x: u32) -> usize { fn compressed_serialize(buf: &mut Vec, x: u32) { let mut rem = x; - while (rem >= (1u32 << 7)) { + while rem >= (1u32 << 7) { buf.push((rem & ((1u32 << 7) - 1)).try_into().unwrap()); rem = rem >> 7; } @@ -96,7 +112,7 @@ fn deserialize_compressed<'a, T: Iterator>(it: &mut T) -> u32 { let mut ret = 0u32; let mut delta = 0u32; let mut val: u32 = ::into(*it.next().unwrap()); - while (val < (1u32 << 7)) { + while val < (1u32 << 7) { ret = ret | (val << delta); delta += 7; val = ::into(*it.next().unwrap()); diff --git a/core-relations/src/table/mod.rs b/core-relations/src/table/mod.rs index 01ef79d8e..08dff3e60 100644 --- a/core-relations/src/table/mod.rs +++ b/core-relations/src/table/mod.rs @@ -20,10 +20,7 @@ use crossbeam_queue::SegQueue; use hashbrown::HashTable; use rayon::iter::{IndexedParallelIterator, IntoParallelRefMutIterator, ParallelIterator}; use rustc_hash::FxHasher; -use serde::{ - ser::{SerializeStruct, SerializeTuple}, - Deserialize, Deserializer, Serialize, Serializer, -}; +use serde::{ser::SerializeStruct, Deserialize, Deserializer, Serialize, Serializer}; use sharded_hash_table::ShardedHashTable; use crate::{ @@ -203,8 +200,8 @@ impl<'de> Deserialize<'de> for SortedWritesTable { #[derive(Deserialize)] struct Partial { generation: Generation, - shard_data: ShardData, - shards: Vec>, + //shard_data: ShardData, + //shards: Vec>, data: Rows, n_keys: usize, @@ -215,13 +212,13 @@ impl<'de> Deserialize<'de> for SortedWritesTable { pending_state: Arc, to_rebuild: Vec, - rebuild_index: Index, - + //rebuild_index: Index, subset_tracker: SubsetTracker, } let partial = Partial::deserialize(deserializer)?; + /* let shards: Vec> = partial .shards .iter() @@ -238,11 +235,12 @@ impl<'de> Deserialize<'de> for SortedWritesTable { shard_data: partial.shard_data, shards, }; + */ Ok(SortedWritesTable { generation: partial.generation, data: partial.data, - hash, + hash: ShardedHashTable::default(), n_keys: partial.n_keys, n_columns: partial.n_columns, sort_by: partial.sort_by, @@ -250,7 +248,7 @@ impl<'de> Deserialize<'de> for SortedWritesTable { pending_state: partial.pending_state, merge: Arc::new(|_, _, _, _| true), to_rebuild: partial.to_rebuild, - rebuild_index: partial.rebuild_index, + rebuild_index: >::default(), subset_tracker: partial.subset_tracker, }) } @@ -261,6 +259,7 @@ impl Serialize for SortedWritesTable { where S: Serializer, { + /* let serialized_shards: Vec> = self .hash .shards @@ -271,7 +270,7 @@ impl Serialize for SortedWritesTable { v }) .collect(); - + */ let mut state = serializer.serialize_struct("SortedWritesTable", 11)?; state.serialize_field("generation", &self.generation)?; //state.serialize_field("shard_data", &self.hash.shard_data())?; diff --git a/egglog-bridge/src/lib.rs b/egglog-bridge/src/lib.rs index 702fe4cb4..85fcf8117 100644 --- a/egglog-bridge/src/lib.rs +++ b/egglog-bridge/src/lib.rs @@ -812,6 +812,31 @@ impl EGraph { Ok(iteration_report) } + /// TODO: evil hack for speeding up extraction + pub fn run_rules_without_rebuild(&mut self, rules: &[RuleId]) -> Result { + let ts = self.next_ts(); + + let rule_set_report = + run_rules_impl(&mut self.db, &mut self.rules, rules, ts, self.report_level)?; + if let Some(message) = self.panic_message.lock().unwrap().take() { + return Err(PanicError(message).into()); + } + + let iteration_report = IterationReport { + rule_set_report, + rebuild_time: Duration::ZERO, + }; + if !iteration_report.changed() { + return Ok(iteration_report); + } + + if let Some(message) = self.panic_message.lock().unwrap().take() { + return Err(PanicError(message).into()); + } + + Ok(iteration_report) + } + fn rebuild(&mut self) -> Result<()> { fn do_parallel() -> bool { #[cfg(test)] diff --git a/src/lib.rs b/src/lib.rs index 2f7c0ed41..a5b1a3dc4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -259,31 +259,59 @@ impl Serialize for SerializableSort { s.serialize_field("type", "FunctionSort")?; s.serialize_field("data", sort)?; s.end() - } else if sort.as_any().downcast_ref::>().is_some() { + } else if sort + .as_any() + .downcast_ref::>() + .is_some() + { s.serialize_field("type", "BaseSort")?; s.serialize_field("data", "BigIntSort")?; s.end() - } else if sort.as_any().downcast_ref::>().is_some() { + } else if sort + .as_any() + .downcast_ref::>() + .is_some() + { s.serialize_field("type", "BaseSort")?; s.serialize_field("data", "BigRatSort")?; s.end() - } else if sort.as_any().downcast_ref::>().is_some() { + } else if sort + .as_any() + .downcast_ref::>() + .is_some() + { s.serialize_field("type", "BaseSort")?; s.serialize_field("data", "BoolSort")?; s.end() - } else if sort.as_any().downcast_ref::>().is_some() { + } else if sort + .as_any() + .downcast_ref::>() + .is_some() + { s.serialize_field("type", "BaseSort")?; s.serialize_field("data", "F64Sort")?; s.end() - } else if sort.as_any().downcast_ref::>().is_some() { + } else if sort + .as_any() + .downcast_ref::>() + .is_some() + { s.serialize_field("type", "BaseSort")?; s.serialize_field("data", "I64Sort")?; s.end() - } else if sort.as_any().downcast_ref::>().is_some() { + } else if sort + .as_any() + .downcast_ref::>() + .is_some() + { s.serialize_field("type", "BaseSort")?; s.serialize_field("data", "StringSort")?; s.end() - } else if sort.as_any().downcast_ref::>().is_some() { + } else if sort + .as_any() + .downcast_ref::>() + .is_some() + { s.serialize_field("type", "BaseSort")?; s.serialize_field("data", "UnitSort")?; s.end() @@ -1275,7 +1303,7 @@ impl EGraph { ); let id = translator.build(); - let rule_result = self.backend.run_rules(&[id]); + let rule_result = self.backend.run_rules_without_rebuild(&[id]); self.backend.free_rule(id); self.backend.free_external_func(ext_id); let _ = rule_result.map_err(|e| { From 41742d6bc6b2ee46e985049fc7aa856e3064774f Mon Sep 17 00:00:00 2001 From: Haobin Ni Date: Fri, 6 Mar 2026 16:08:20 -0800 Subject: [PATCH 18/21] Remove Easteregg from the list of experiments --- infra/nightly.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infra/nightly.py b/infra/nightly.py index 2ce3f6ae1..5da422cc0 100644 --- a/infra/nightly.py +++ b/infra/nightly.py @@ -97,7 +97,7 @@ def run_test_experiments(top_dir, tmp_dir, aggregator): cleanup_benchmark_files(timeline_file, tmp_dir / "summary.json", *extra_files) def run_extract_experiments(resource_dir, tmp_dir, aggregator, csv_aggregator): - timeline_suites = ["easteregg", "herbie-hamming", "herbie-math-rewrite", "herbie-math-taylor"] + timeline_suites = ["herbie-hamming", "herbie-math-rewrite", "herbie-math-taylor"] for suite in timeline_suites: for benchmark in benchmark_files(resource_dir / "test-files" / suite): timeline_file = tmp_dir / f"{benchmark.stem}-timeline.json" From 35fa1d93a6f82beb6f45dbc25a524ee4d32e6d85 Mon Sep 17 00:00:00 2001 From: Haobin Ni Date: Thu, 26 Mar 2026 15:06:11 -0700 Subject: [PATCH 19/21] Clean up evil hacks --- core-relations/src/free_join/mod.rs | 6 ++-- core-relations/src/lib.rs | 6 ++-- core-relations/src/table_spec.rs | 3 +- egglog-bridge/src/lib.rs | 15 +++------- numeric-id/src/lib.rs | 5 ++-- src/serialize_size.rs | 43 ++--------------------------- 6 files changed, 13 insertions(+), 65 deletions(-) diff --git a/core-relations/src/free_join/mod.rs b/core-relations/src/free_join/mod.rs index 9bccd0d1f..c97378fa0 100644 --- a/core-relations/src/free_join/mod.rs +++ b/core-relations/src/free_join/mod.rs @@ -118,8 +118,7 @@ pub(crate) type HashColumnIndex = Arc>>; pub struct TableInfo { pub(crate) name: Option>, pub(crate) spec: TableSpec, - // TODO: evil hack for looking at serialization size - pub table: WrappedTable, + pub(crate) table: WrappedTable, #[serde(skip)] pub(crate) indexes: IndexCatalog, HashIndex>, #[serde(skip)] @@ -277,8 +276,7 @@ impl Counters { pub struct Database { // NB: some fields are pub(crate) to allow some internal modules to avoid // borrowing the whole table. - // TODO: evil hack for looking at serialization size - pub tables: DenseIdMap, + pub(crate) tables: DenseIdMap, // TODO: having a single AtomicUsize per counter can lead to contention. We // should look into prefetching counters when creating a new ExecutionState // and incrementing locally. Note that the batch size shouldn't be too big diff --git a/core-relations/src/lib.rs b/core-relations/src/lib.rs index 66fe1248c..7d0e66140 100644 --- a/core-relations/src/lib.rs +++ b/core-relations/src/lib.rs @@ -7,8 +7,7 @@ pub(crate) mod base_values; pub(crate) mod common; pub(crate) mod containers; pub(crate) mod dependency_graph; -// TODO: evil hack for looking at serialization size -pub mod free_join; +pub(crate) mod free_join; pub(crate) mod hash_index; pub(crate) mod offsets; pub(crate) mod parallel_heuristics; @@ -17,8 +16,7 @@ pub(crate) mod query; pub(crate) mod row_buffer; pub(crate) mod table; -// TODO: evil hack for looking at serialization size -pub mod table_spec; +pub(crate) mod table_spec; pub(crate) mod uf; #[cfg(test)] diff --git a/core-relations/src/table_spec.rs b/core-relations/src/table_spec.rs index 5ec0fb8cc..dc50ce360 100644 --- a/core-relations/src/table_spec.rs +++ b/core-relations/src/table_spec.rs @@ -522,8 +522,7 @@ impl TableWrapper for WrapperImpl { /// The implementations here downcast manually to the type used when /// constructing the WrappedTable. pub struct WrappedTable { - // TODO: evil hack - pub inner: Box, + inner: Box, wrapper: Box, } diff --git a/egglog-bridge/src/lib.rs b/egglog-bridge/src/lib.rs index 85fcf8117..cf20eef69 100644 --- a/egglog-bridge/src/lib.rs +++ b/egglog-bridge/src/lib.rs @@ -68,8 +68,7 @@ impl Timestamp { /// The state associated with an egglog program. #[derive(Clone, Serialize, Deserialize)] pub struct EGraph { - // TODO: evil hack for looking at serialization size - pub db: Database, + db: Database, uf_table: TableId, id_counter: CounterId, reason_counter: CounterId, @@ -812,7 +811,9 @@ impl EGraph { Ok(iteration_report) } - /// TODO: evil hack for speeding up extraction + /// This hack speeds up extraction and + /// avoid certain fields of the backend data structure + /// by skipping rebuild pub fn run_rules_without_rebuild(&mut self, rules: &[RuleId]) -> Result { let ts = self.next_ts(); @@ -826,14 +827,6 @@ impl EGraph { rule_set_report, rebuild_time: Duration::ZERO, }; - if !iteration_report.changed() { - return Ok(iteration_report); - } - - if let Some(message) = self.panic_message.lock().unwrap().take() { - return Err(PanicError(message).into()); - } - Ok(iteration_report) } diff --git a/numeric-id/src/lib.rs b/numeric-id/src/lib.rs index df7e14cbc..b1202c26a 100644 --- a/numeric-id/src/lib.rs +++ b/numeric-id/src/lib.rs @@ -47,8 +47,7 @@ impl NumericId for usize { /// with no hashing. For sparse mappings, use a HashMap. #[derive(Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] pub struct DenseIdMap { - // TODO: evil hack for looking at serialization size - pub data: Vec>, + data: Vec>, _marker: PhantomData, } @@ -439,7 +438,7 @@ macro_rules! define_id { #[derive(Copy, Clone, Default)] #[doc = $doc] $v struct $name { - // TODO: evil hack + // visibility hack for serialization pub rep: $repr, } diff --git a/src/serialize_size.rs b/src/serialize_size.rs index 666b21dbf..e8d824bd8 100644 --- a/src/serialize_size.rs +++ b/src/serialize_size.rs @@ -122,19 +122,7 @@ impl GenerateSizeReport for TypeInfo {} impl GenerateSizeReport for RunReport {} impl GenerateSizeReport - for egglog_numeric_id::DenseIdMap -{ - fn get_sizerp(&self) -> SizeReport { - let mut ret = get_sizerp_default(self); - for e in self.data.iter() { - if let Some(v) = e { - let rep = v.get_sizerp(); - ret.fields.push((rep.name.clone(), Box::new(rep))); - } - } - ret - } -} + for egglog_numeric_id::DenseIdMap {} impl GenerateSizeReport for CommandMacroRegistry {} @@ -255,31 +243,4 @@ impl GenerateSizeReport for EGraph { } } -impl GenerateSizeReport for egglog_bridge::EGraph { - fn get_sizerp(&self) -> SizeReport { - let mut ret = get_sizerp_default(&self); - ret.fields - .push(("db".to_string(), Box::new(self.db.get_sizerp()))); - ret - } -} - -impl GenerateSizeReport for egglog_core_relations::Database { - fn get_sizerp(&self) -> SizeReport { - let mut ret = get_sizerp_default(&self); - ret.fields - .push(("tables".to_string(), Box::new(self.tables.get_sizerp()))); - ret - } -} - -impl GenerateSizeReport for egglog_core_relations::table_spec::WrappedTable {} - -impl GenerateSizeReport for egglog_core_relations::free_join::TableInfo { - fn get_sizerp(&self) -> SizeReport { - let mut ret = get_sizerp_default(&self); - ret.fields - .push(("table".to_string(), Box::new(self.table.get_sizerp()))); - ret - } -} +impl GenerateSizeReport for egglog_bridge::EGraph {} \ No newline at end of file From a939da674c289e2cd94d4acb241159e804d9a9f0 Mon Sep 17 00:00:00 2001 From: Haobin Ni Date: Thu, 26 Mar 2026 15:06:45 -0700 Subject: [PATCH 20/21] fmt --- egglog-bridge/src/lib.rs | 2 +- src/serialize_size.rs | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/egglog-bridge/src/lib.rs b/egglog-bridge/src/lib.rs index cf20eef69..b9c13d1ca 100644 --- a/egglog-bridge/src/lib.rs +++ b/egglog-bridge/src/lib.rs @@ -811,7 +811,7 @@ impl EGraph { Ok(iteration_report) } - /// This hack speeds up extraction and + /// This hack speeds up extraction and /// avoid certain fields of the backend data structure /// by skipping rebuild pub fn run_rules_without_rebuild(&mut self, rules: &[RuleId]) -> Result { diff --git a/src/serialize_size.rs b/src/serialize_size.rs index e8d824bd8..d99b47083 100644 --- a/src/serialize_size.rs +++ b/src/serialize_size.rs @@ -122,7 +122,9 @@ impl GenerateSizeReport for TypeInfo {} impl GenerateSizeReport for RunReport {} impl GenerateSizeReport - for egglog_numeric_id::DenseIdMap {} + for egglog_numeric_id::DenseIdMap +{ +} impl GenerateSizeReport for CommandMacroRegistry {} @@ -243,4 +245,4 @@ impl GenerateSizeReport for EGraph { } } -impl GenerateSizeReport for egglog_bridge::EGraph {} \ No newline at end of file +impl GenerateSizeReport for egglog_bridge::EGraph {} From 0e3ffb45fac8235469acdae2c7024c4ef76dc5f5 Mon Sep 17 00:00:00 2001 From: Haobin Ni Date: Thu, 26 Mar 2026 15:15:09 -0700 Subject: [PATCH 21/21] fmt --- core-relations/src/table_spec.rs | 7 ++++--- src/lib.rs | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/core-relations/src/table_spec.rs b/core-relations/src/table_spec.rs index 1c9b4fab8..6bae2c7d4 100644 --- a/core-relations/src/table_spec.rs +++ b/core-relations/src/table_spec.rs @@ -27,8 +27,7 @@ use crate::{ offsets::{RowId, Subset, SubsetRef}, pool::{with_pool_set, PoolSet, Pooled}, row_buffer::{RowBuffer, TaggedRowBuffer}, - DisplacedTable, DisplacedTableWithProvenance, - QueryEntry, TableId, Variable, + DisplacedTable, DisplacedTableWithProvenance, QueryEntry, TableId, Variable, }; define_id!(pub ColumnId, u32, "a particular column in a table"); @@ -553,7 +552,9 @@ impl<'de> Deserialize<'de> for WrappedTable { } else if inner.as_any().is::() { wrapper::() } else { - return Err(serde::de::Error::custom("unknown table type for WrappedTable")); + return Err(serde::de::Error::custom( + "unknown table type for WrappedTable", + )); }; Ok(WrappedTable { inner, wrapper }) diff --git a/src/lib.rs b/src/lib.rs index 9abfe7a11..28d4baa28 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2611,7 +2611,7 @@ impl TimedEgraph { let mut egraph: EGraph = EGraph::deserialize(r).unwrap(); egraph .restore_deserialized_runtime() - .expect("Failed to restore deserialized runtime"); + .expect("Failed to restore deserialized runtime"); Self { egraphs: vec![egraph],