From 36cfb6fc5296417f6b865d645dba3ae5f4be149a Mon Sep 17 00:00:00 2001 From: Alexandre Maurel Date: Sun, 1 Mar 2026 11:09:03 +0100 Subject: [PATCH 1/3] Extract tercen SDK into standalone tercen-rs crate Move all Tercen SDK modules (gRPC client, data streaming, context, color handling, facets, pages, etc.) into the separate tercen-rs crate and depend on it via path. Keep operator_properties locally as it uses include_str!("../../operator.json") which is operator-specific. - Replace all crate::tercen:: imports with tercen_rs:: - Remove build.rs and tercen_grpc_api submodule (now in tercen-rs) - Handle DataFrame wrapping at boundaries (tercen-rs returns polars DataFrame, wrapped to ggrs_core DataFrame where needed) Co-Authored-By: Claude Opus 4.6 --- .gitmodules | 3 - Cargo.lock | 26 +- Cargo.toml | 12 +- build.rs | 16 - src/bin/dev.rs | 4 +- src/bin/prepare.rs | 4 +- src/config.rs | 8 +- src/ggrs_integration/stream_generator.rs | 134 +- src/lib.rs | 2 +- src/main.rs | 18 +- .../mod.rs} | 2 +- src/pipeline.rs | 10 +- src/tercen/README.md | 122 -- src/tercen/client.rs | 208 --- src/tercen/color_processor.rs | 537 ------- src/tercen/colors.rs | 1366 ----------------- src/tercen/context/base.rs | 533 ------- src/tercen/context/dev_context.rs | 370 ----- src/tercen/context/helpers.rs | 925 ----------- src/tercen/context/mod.rs | 155 -- src/tercen/context/production_context.rs | 451 ------ src/tercen/error.rs | 50 - src/tercen/facets.rs | 327 ---- src/tercen/logger.rs | 54 - src/tercen/mod.rs | 71 - src/tercen/pages.rs | 201 --- src/tercen/palettes.rs | 337 ---- src/tercen/properties.rs | 218 --- src/tercen/result.rs | 643 -------- src/tercen/table.rs | 158 -- src/tercen/table_convert.rs | 176 --- src/tercen/tson_convert.rs | 211 --- tercen_grpc_api | 1 - 33 files changed, 111 insertions(+), 7242 deletions(-) delete mode 100644 .gitmodules delete mode 100644 build.rs rename src/{tercen/operator_properties.rs => operator_props/mod.rs} (99%) delete mode 100644 src/tercen/README.md delete mode 100644 src/tercen/client.rs delete mode 100644 src/tercen/color_processor.rs delete mode 100644 src/tercen/colors.rs delete mode 100644 src/tercen/context/base.rs delete mode 100644 src/tercen/context/dev_context.rs delete mode 100644 src/tercen/context/helpers.rs delete mode 100644 src/tercen/context/mod.rs delete mode 100644 src/tercen/context/production_context.rs delete mode 100644 src/tercen/error.rs delete mode 100644 src/tercen/facets.rs delete mode 100644 src/tercen/logger.rs delete mode 100644 src/tercen/mod.rs delete mode 100644 src/tercen/pages.rs delete mode 100644 src/tercen/palettes.rs delete mode 100644 src/tercen/properties.rs delete mode 100644 src/tercen/result.rs delete mode 100644 src/tercen/table.rs delete mode 100644 src/tercen/table_convert.rs delete mode 100644 src/tercen/tson_convert.rs delete mode 160000 tercen_grpc_api diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index bfa101f..0000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "tercen_grpc_api"] - path = tercen_grpc_api - url = https://github.com/tercen/tercen_grpc_api.git diff --git a/Cargo.lock b/Cargo.lock index 87421ba..9d4bc05 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1165,16 +1165,15 @@ dependencies = [ "polars", "prost", "prost-types", - "rustson", "serde", "serde_json", + "tercen-rs", "thiserror 1.0.69", "tikv-jemallocator", "tokio", "tokio-stream", "tonic", "tonic-prost", - "tonic-prost-build", "uuid", ] @@ -3836,6 +3835,29 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "tercen-rs" +version = "0.1.0" +source = "git+https://github.com/tercen/tercen-rs?branch=main#112126ec4cc68ccd754a2bc2127974f4b83f259a" +dependencies = [ + "base64", + "futures", + "once_cell", + "polars", + "prost", + "prost-types", + "rustson", + "serde", + "serde_json", + "thiserror 1.0.69", + "tokio", + "tokio-stream", + "tonic", + "tonic-prost", + "tonic-prost-build", + "uuid", +] + [[package]] name = "termcolor" version = "1.4.1" diff --git a/Cargo.toml b/Cargo.toml index 15e51e7..5d9b4f4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,8 +34,7 @@ prost-types = "0.14" thiserror = "1.0" anyhow = "1.0" -# Data parsing - TSON format from Tercen -rustson = { git = "https://github.com/tercen/rustson", branch = "master" } +# Data parsing serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" once_cell = "1.19" @@ -47,6 +46,12 @@ base64 = "0.22" # UUID generation uuid = { version = "1.0", features = ["v4"] } +# Tercen SDK +# Use local path for development: +# tercen-rs = { path = "../tercen-rs" } +# Use git dependency for CI/production: +tercen-rs = { git = "https://github.com/tercen/tercen-rs", branch = "main" } + # GGRS plotting library with WebGPU backend # Use local path for development: # ggrs-core = { path = "../ggrs/crates/ggrs-core", features = ["webgpu-backend", "cairo-backend"] } @@ -61,9 +66,6 @@ jemalloc = ["tikv-jemallocator"] version = "0.6" optional = true -[build-dependencies] -tonic-prost-build = "0.14" - [profile.release] opt-level = 3 lto = true diff --git a/build.rs b/build.rs deleted file mode 100644 index e54d9fd..0000000 --- a/build.rs +++ /dev/null @@ -1,16 +0,0 @@ -fn main() -> Result<(), Box> { - // Compile proto files from tercen_grpc_api submodule - // This ensures we stay in sync with the canonical API definitions - tonic_prost_build::configure() - .build_server(false) // Client only, no server code generation - .build_transport(false) // Don't generate transport code (avoid naming conflicts) - .compile_protos( - &[ - "tercen_grpc_api/protos/tercen.proto", - "tercen_grpc_api/protos/tercen_model.proto", - ], - &["tercen_grpc_api/protos"], - )?; - - Ok(()) -} diff --git a/src/bin/dev.rs b/src/bin/dev.rs index f1b5217..4ae9a24 100644 --- a/src/bin/dev.rs +++ b/src/bin/dev.rs @@ -15,9 +15,9 @@ use ggrs_plot_operator::config::OperatorConfig; use ggrs_plot_operator::memprof; use ggrs_plot_operator::pipeline; -use ggrs_plot_operator::tercen::{DevContext, TercenClient, TercenContext}; use std::sync::Arc; use std::time::Instant; +use tercen_rs::{DevContext, TercenClient, TercenContext}; fn log_phase(start: Instant, phase: &str) { let elapsed = start.elapsed(); @@ -117,8 +117,8 @@ async fn main() -> Result<(), Box> { fn load_dev_config( ui_point_size: Option, ) -> Result> { - use ggrs_plot_operator::tercen::client::proto::{OperatorRef, OperatorSettings, PropertyValue}; use std::fs; + use tercen_rs::client::proto::{OperatorRef, OperatorSettings, PropertyValue}; let config_path = "operator_config.json"; let config_json = match fs::read_to_string(config_path) { diff --git a/src/bin/prepare.rs b/src/bin/prepare.rs index c982b9b..ba492e9 100644 --- a/src/bin/prepare.rs +++ b/src/bin/prepare.rs @@ -16,9 +16,9 @@ //! cargo run --bin prepare -- --delete-project PROJECT_ID //! ``` -use ggrs_plot_operator::tercen::client::proto; -use ggrs_plot_operator::tercen::TercenClient; use std::sync::Arc; +use tercen_rs::client::proto; +use tercen_rs::TercenClient; #[tokio::main] async fn main() -> Result<(), Box> { diff --git a/src/config.rs b/src/config.rs index 6d71601..7645f87 100644 --- a/src/config.rs +++ b/src/config.rs @@ -6,9 +6,9 @@ //! Property definitions and defaults are parsed from operator.json at compile time //! via the `OperatorPropertyReader` which ensures single-source-of-truth for defaults. -use crate::tercen::client::proto::OperatorSettings; -use crate::tercen::operator_properties::OperatorPropertyReader; -use crate::tercen::properties::PlotDimension; +use crate::operator_props::OperatorPropertyReader; +use tercen_rs::client::proto::OperatorSettings; +use tercen_rs::PlotDimension; /// How to aggregate multiple data points in the same heatmap cell #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] @@ -369,7 +369,7 @@ impl OperatorConfig { /// but not yet used by GGRS for positioning along edges - that requires extending /// the GGRS rendering logic. pub fn to_legend_position(&self) -> ggrs_core::theme::LegendPosition { - use crate::tercen::operator_properties::registry; + use crate::operator_props::registry; use ggrs_core::theme::LegendPosition; match self.legend_position.to_lowercase().as_str() { diff --git a/src/ggrs_integration/stream_generator.rs b/src/ggrs_integration/stream_generator.rs index 6510069..3132d8f 100644 --- a/src/ggrs_integration/stream_generator.rs +++ b/src/ggrs_integration/stream_generator.rs @@ -4,9 +4,6 @@ //! enabling lazy loading of data directly from Tercen's gRPC API. use crate::config::HeatmapCellAggregation; -use crate::tercen::{ - tson_to_dataframe, ChartKind, FacetInfo, SchemaCache, TableStreamer, TercenClient, -}; use ggrs_core::{ aes::Aes, data::DataFrame, @@ -19,6 +16,10 @@ use ggrs_core::{ use polars::prelude::IntoColumn; use std::collections::HashMap; use std::sync::{Arc, RwLock}; +use tercen_rs::{ + extract_column_names_from_schema, tson_to_dataframe, ChartKind, FacetInfo, SchemaCache, + TableStreamer, TercenClient, +}; /// Default number of categorical color levels in Tercen's built-in palette. /// When no actual category names are available, generic labels "Level 0" through "Level 7" are used. @@ -43,9 +44,9 @@ pub struct TercenStreamConfig { /// Chunk size for streaming data pub chunk_size: usize, /// Color factor configurations (legacy - used when all layers share same colors) - pub color_infos: Vec, + pub color_infos: Vec, /// Per-layer color configuration (for mixed-layer scenarios) - pub per_layer_colors: Option, + pub per_layer_colors: Option, /// Page factor names for pagination pub page_factors: Vec, /// Optional schema cache for multi-page plots @@ -116,13 +117,13 @@ impl TercenStreamConfig { } /// Set color information (legacy - use per_layer_colors for mixed scenarios) - pub fn colors(mut self, color_infos: Vec) -> Self { + pub fn colors(mut self, color_infos: Vec) -> Self { self.color_infos = color_infos; self } /// Set per-layer color configuration (for mixed-layer scenarios) - pub fn per_layer_colors(mut self, config: Option) -> Self { + pub fn per_layer_colors(mut self, config: Option) -> Self { self.per_layer_colors = config; self } @@ -181,9 +182,9 @@ impl TercenStreamConfig { /// Extract row count from schema fn extract_row_count_from_schema( - schema: &crate::tercen::client::proto::ESchema, + schema: &tercen_rs::client::proto::ESchema, ) -> Result> { - use crate::tercen::client::proto::e_schema; + use tercen_rs::client::proto::e_schema; // All schema types (TableSchema, ComputedTableSchema, CubeQueryTableSchema) have nRows field match &schema.object { @@ -195,27 +196,6 @@ fn extract_row_count_from_schema( } } -/// Helper function to extract column names from a schema -pub fn extract_column_names_from_schema( - schema: &crate::tercen::client::proto::ESchema, -) -> Result, Box> { - use crate::tercen::client::proto::e_schema; - - if let Some(e_schema::Object::Cubequerytableschema(cqts)) = &schema.object { - let mut column_names = Vec::new(); - for col in &cqts.columns { - if let Some(crate::tercen::client::proto::e_column_schema::Object::Columnschema(cs)) = - &col.object - { - column_names.push(cs.name.clone()); - } - } - Ok(column_names) - } else { - Err("Schema is not a CubeQueryTableSchema".into()) - } -} - /// Tercen implementation of GGRS StreamGenerator /// /// Streams raw data from Tercen tables. Does NOT transform coordinates. @@ -252,10 +232,10 @@ pub struct TercenStreamGenerator { chunk_size: usize, /// Color information (factors and palettes) - legacy field - color_infos: Vec, + color_infos: Vec, /// Per-layer color configuration (for mixed-layer scenarios) - per_layer_colors: Option, + per_layer_colors: Option, /// Cached legend scale (loaded during initialization) cached_legend_scale: LegendScale, @@ -501,7 +481,7 @@ impl TercenStreamGenerator { if !color_infos.is_empty() { eprintln!("DEBUG: Color factor: '{}'", color_infos[0].factor_name); match &color_infos[0].mapping { - crate::tercen::ColorMapping::Continuous(palette) => { + tercen_rs::ColorMapping::Continuous(palette) => { eprintln!( "DEBUG: Continuous palette with {} color stops", palette.stops.len() @@ -513,7 +493,7 @@ impl TercenStreamGenerator { ); } } - crate::tercen::ColorMapping::Categorical(color_map) => { + tercen_rs::ColorMapping::Categorical(color_map) => { eprintln!( "DEBUG: Categorical palette with {} categories", color_map.mappings.len() @@ -625,7 +605,7 @@ impl TercenStreamGenerator { axis_ranges: HashMap<(usize, usize), (AxisData, AxisData)>, total_rows: usize, chunk_size: usize, - color_infos: Vec, + color_infos: Vec, page_factors: Vec, ) -> Self { // Aesthetics use dequantized coordinates: .x and .y (actual data values) @@ -792,13 +772,13 @@ impl TercenStreamGenerator { let mut has_color_levels = false; for color_info in &self.color_infos { match &color_info.mapping { - crate::tercen::ColorMapping::Categorical(_) => { + tercen_rs::ColorMapping::Categorical(_) => { if !has_color_levels { columns.push(".colorLevels".to_string()); has_color_levels = true; } } - crate::tercen::ColorMapping::Continuous(_) => { + tercen_rs::ColorMapping::Continuous(_) => { columns.push(color_info.factor_name.clone()); } } @@ -839,7 +819,7 @@ impl TercenStreamGenerator { } let chunk_df = tson_to_dataframe(&tson_data)?; - let chunk_rows = chunk_df.nrow(); + let chunk_rows = chunk_df.height(); if chunk_rows == 0 { break; @@ -850,7 +830,7 @@ impl TercenStreamGenerator { offset, chunk_rows ); - accumulated_dfs.push(chunk_df.inner().clone()); + accumulated_dfs.push(chunk_df); offset += chunk_rows; } @@ -891,7 +871,7 @@ impl TercenStreamGenerator { let mut has_color_levels_agg = false; for color_info in &self.color_infos { match &color_info.mapping { - crate::tercen::ColorMapping::Categorical(_) => { + tercen_rs::ColorMapping::Categorical(_) => { if !has_color_levels_agg { // Categorical always uses last (mean/median don't make sense) let expr = col(".colorLevels").last(); @@ -899,7 +879,7 @@ impl TercenStreamGenerator { has_color_levels_agg = true; } } - crate::tercen::ColorMapping::Continuous(_) => { + tercen_rs::ColorMapping::Continuous(_) => { // For continuous colors, use the configured aggregation method let col_name = &color_info.factor_name; let expr = match self.heatmap_cell_aggregation { @@ -926,17 +906,18 @@ impl TercenStreamGenerator { offset ); - // Wrap in ggrs DataFrame - let mut df = ggrs_core::data::DataFrame::from_polars(aggregated); - // Add color columns to the aggregated data - if !self.color_infos.is_empty() { + let result = if !self.color_infos.is_empty() { eprintln!("DEBUG: Adding color columns to aggregated data"); - df = crate::tercen::color_processor::add_color_columns(df, &self.color_infos)?; + let colored = + tercen_rs::color_processor::add_color_columns(aggregated, &self.color_infos)?; eprintln!("DEBUG: Color columns added to aggregated data"); - } + colored + } else { + aggregated + }; - Ok(df) + Ok(ggrs_core::data::DataFrame::from_polars(result)) } /// Load axis ranges from pre-computed Y-axis table @@ -1015,7 +996,7 @@ impl TercenStreamGenerator { .await?; println!(" Parsing {} bytes...", data.len()); - let df = tson_to_dataframe(&data)?; + let df = ggrs_core::data::DataFrame::from_polars(tson_to_dataframe(&data)?); println!(" Parsed {} rows", df.nrow()); // Get total row count from main table schema @@ -1230,7 +1211,7 @@ impl TercenStreamGenerator { .await?; println!(" Parsing {} bytes...", data.len()); - let df = tson_to_dataframe(&data)?; + let df = ggrs_core::data::DataFrame::from_polars(tson_to_dataframe(&data)?); println!(" Parsed {} rows", df.nrow()); let has_ci = df.columns().contains(&".ci".to_string()); @@ -1324,7 +1305,7 @@ impl TercenStreamGenerator { let entries: Vec<(String, [u8; 3])> = (0..DEFAULT_PALETTE_LEVELS) .map(|i| { let label = format!("Level {}", i); - let color = crate::tercen::categorical_color_from_level(i as i32); + let color = tercen_rs::categorical_color_from_level(i as i32); (label, color) }) .collect(); @@ -1341,8 +1322,8 @@ impl TercenStreamGenerator { /// For mixed-layer scenarios (some layers with colors, some without), /// creates a combined legend with sections for each type. fn load_legend_scale( - color_infos: &[crate::tercen::ColorInfo], - per_layer_colors: Option<&crate::tercen::PerLayerColorConfig>, + color_infos: &[tercen_rs::ColorInfo], + per_layer_colors: Option<&tercen_rs::PerLayerColorConfig>, layer_y_factor_names: &[String], ) -> Result> { // Handle mixed-layer scenarios @@ -1368,7 +1349,7 @@ impl TercenStreamGenerator { // Build combined aesthetic name from all categorical factor names let categorical_names: Vec<&str> = color_infos .iter() - .filter(|ci| matches!(ci.mapping, crate::tercen::ColorMapping::Categorical(_))) + .filter(|ci| matches!(ci.mapping, tercen_rs::ColorMapping::Categorical(_))) .map(|ci| ci.factor_name.as_str()) .collect(); let combined_name = if categorical_names.is_empty() { @@ -1381,7 +1362,7 @@ impl TercenStreamGenerator { let color_info = &color_infos[0]; match &color_info.mapping { - crate::tercen::ColorMapping::Continuous(palette) => { + tercen_rs::ColorMapping::Continuous(palette) => { // For continuous colors, get the min/max and color stops from the palette if let Some((min_val, max_val)) = palette.range() { // Convert Tercen ColorStops to GGRS LegendColorStops @@ -1409,7 +1390,7 @@ impl TercenStreamGenerator { Ok(LegendScale::None) } } - crate::tercen::ColorMapping::Categorical(color_map) => { + tercen_rs::ColorMapping::Categorical(color_map) => { // For categorical colors, check if we have explicit mappings if !color_map.mappings.is_empty() { // Explicit label→color mappings from palette @@ -1435,7 +1416,7 @@ impl TercenStreamGenerator { .iter() .enumerate() .map(|(i, label)| { - let color = crate::tercen::categorical_color_from_level(i as i32); + let color = tercen_rs::categorical_color_from_level(i as i32); (label.clone(), color) }) .collect(); @@ -1452,7 +1433,7 @@ impl TercenStreamGenerator { let entries: Vec<(String, [u8; 3])> = (0..n_levels) .map(|i| { let label = format!("Level {}", i); - let color = crate::tercen::categorical_color_from_level(i as i32); + let color = tercen_rs::categorical_color_from_level(i as i32); (label, color) }) .collect(); @@ -1477,10 +1458,10 @@ impl TercenStreamGenerator { /// - Layers with explicit color factors (continuous or discrete) /// - Layers without color factors (discrete with Y-factor name and pre-computed layer color) fn build_combined_legend( - per_layer_colors: &crate::tercen::PerLayerColorConfig, + per_layer_colors: &tercen_rs::PerLayerColorConfig, layer_y_factor_names: &[String], ) -> Result> { - use crate::tercen::LayerColorConfig; + use tercen_rs::LayerColorConfig; let mut sections: Vec = Vec::new(); @@ -1587,10 +1568,10 @@ impl TercenStreamGenerator { /// Build a discrete legend for layer-based colors (all layers with constant colors) fn build_layer_based_legend( - per_layer_colors: &crate::tercen::PerLayerColorConfig, + per_layer_colors: &tercen_rs::PerLayerColorConfig, layer_y_factor_names: &[String], ) -> Result> { - use crate::tercen::LayerColorConfig; + use tercen_rs::LayerColorConfig; let entries: Vec<(String, [u8; 3])> = per_layer_colors .layer_configs @@ -1701,7 +1682,7 @@ impl TercenStreamGenerator { // Fetch color columns for layers that have explicit colors if let Some(ref plc) = self.per_layer_colors { - use crate::tercen::LayerColorConfig; + use tercen_rs::LayerColorConfig; for (layer_idx, config) in plc.layer_configs.iter().enumerate() { match config { LayerColorConfig::Categorical { .. } => { @@ -1739,13 +1720,13 @@ impl TercenStreamGenerator { // Legacy uniform colors: all layers share the same color config for color_info in &self.color_infos { match &color_info.mapping { - crate::tercen::ColorMapping::Categorical(_) => { + tercen_rs::ColorMapping::Categorical(_) => { // Add .colorLevels for categorical colors if !columns.contains(&".colorLevels".to_string()) { columns.push(".colorLevels".to_string()); } } - crate::tercen::ColorMapping::Continuous(_) => { + tercen_rs::ColorMapping::Continuous(_) => { // Add the factor column for continuous colors columns.push(color_info.factor_name.clone()); } @@ -1777,24 +1758,21 @@ impl TercenStreamGenerator { // Parse TSON to DataFrame - contains .ci, .ri, .xs, .ys, and color factors let mut df = tson_to_dataframe(&tson_data)?; - eprintln!("DEBUG: Parsed DataFrame with {} rows", df.nrow()); - eprintln!("DEBUG: Returned columns: {:?}", df.columns()); + eprintln!("DEBUG: Parsed DataFrame with {} rows", df.height()); + eprintln!("DEBUG: Returned columns: {:?}", df.get_column_names()); // DEBUG: Print heatmap column info (first chunk only) if data_range.start == 0 { - let polars_df = df.inner(); - if let Ok(n_x_levels) = polars_df.column(".nXLevels") { + if let Ok(n_x_levels) = df.column(".nXLevels") { if let Ok(n_x_i64) = n_x_levels.i64() { let n_levels = n_x_i64.get(0).unwrap_or(0); eprintln!("DEBUG HEATMAP: Total X levels (columns) = {}", n_levels); } } // Compare .xs, .ys, .xLevels - if let (Ok(xs_col), Ok(ys_col), Ok(xl_col)) = ( - polars_df.column(".xs"), - polars_df.column(".ys"), - polars_df.column(".xLevels"), - ) { + if let (Ok(xs_col), Ok(ys_col), Ok(xl_col)) = + (df.column(".xs"), df.column(".ys"), df.column(".xLevels")) + { if let (Ok(xs_i64), Ok(ys_i64), Ok(xl_i64)) = (xs_col.i64(), ys_col.i64(), xl_col.i64()) { @@ -1831,7 +1809,7 @@ impl TercenStreamGenerator { plc.is_mixed(), plc.has_constant_colors() ); - df = crate::tercen::color_processor::add_mixed_layer_colors(df, plc)?; + df = tercen_rs::color_processor::add_mixed_layer_colors(df, plc)?; eprintln!("DEBUG: Per-layer colors added successfully"); } else if !self.color_infos.is_empty() { // Single-layer: legacy uniform colors (explicit color factors) @@ -1839,7 +1817,7 @@ impl TercenStreamGenerator { "DEBUG: Adding color columns for {} color factors (legacy path)", self.color_infos.len() ); - df = crate::tercen::color_processor::add_color_columns(df, &self.color_infos)?; + df = tercen_rs::color_processor::add_color_columns(df, &self.color_infos)?; eprintln!("DEBUG: Color columns added successfully"); } else if use_layer_colors { // Pure layer-based coloring (no color factors on any layer) @@ -1847,14 +1825,14 @@ impl TercenStreamGenerator { "DEBUG: Adding layer-based colors for {} layers using palette {:?}", self.n_layers, self.layer_palette_name ); - df = crate::tercen::color_processor::add_layer_colors( + df = tercen_rs::color_processor::add_layer_colors( df, self.layer_palette_name.as_deref(), )?; eprintln!("DEBUG: Layer colors added successfully"); } - Ok(df) + Ok(ggrs_core::data::DataFrame::from_polars(df)) } // NOTE: Dequantization now happens in GGRS, not in the operator diff --git a/src/lib.rs b/src/lib.rs index 868a61e..f53338c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,5 +6,5 @@ pub mod config; pub mod ggrs_integration; pub mod memprof; +pub mod operator_props; pub mod pipeline; -pub mod tercen; diff --git a/src/main.rs b/src/main.rs index 4a8a544..d11e2fe 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,8 +11,10 @@ pub mod config; pub mod ggrs_integration; pub mod memprof; +pub mod operator_props; pub mod pipeline; -pub mod tercen; + +use tercen_rs::TercenContext; #[cfg(feature = "jemalloc")] use tikv_jemallocator::Jemalloc; @@ -35,7 +37,7 @@ async fn main() { // Connect to Tercen println!("Attempting to connect to Tercen..."); - match tercen::TercenClient::from_env().await { + match tercen_rs::TercenClient::from_env().await { Ok(client) => { println!("✓ Successfully connected to Tercen!\n"); @@ -119,16 +121,14 @@ fn print_env_info() { /// Process a Tercen task: fetch data, generate plot, upload result async fn process_task( - client_arc: std::sync::Arc, + client_arc: std::sync::Arc, task_id: &str, ) -> Result<(), Box> { - use tercen::TercenContext; - println!("=== Task Processing Started ==="); println!("Task ID: {}\n", task_id); // Create ProductionContext - let ctx = tercen::ProductionContext::from_task_id(client_arc.clone(), task_id).await?; + let ctx = tercen_rs::ProductionContext::from_task_id(client_arc.clone(), task_id).await?; // Load configuration let config = @@ -141,7 +141,7 @@ async fn process_task( println!("\n[5/5] Uploading result(s) to Tercen..."); let mut task_service = client_arc.task_service()?; - let request = tonic::Request::new(tercen::client::proto::GetRequest { + let request = tonic::Request::new(tercen_rs::client::proto::GetRequest { id: task_id.to_string(), ..Default::default() }); @@ -150,7 +150,7 @@ async fn process_task( if plot_results.len() == 1 { let plot = plot_results.into_iter().next().unwrap(); - tercen::result::save_result( + tercen_rs::result::save_result( client_arc.clone(), ctx.project_id(), ctx.namespace(), @@ -175,7 +175,7 @@ async fn process_task( ); } - tercen::result::save_results( + tercen_rs::result::save_results( client_arc.clone(), ctx.project_id(), ctx.namespace(), diff --git a/src/tercen/operator_properties.rs b/src/operator_props/mod.rs similarity index 99% rename from src/tercen/operator_properties.rs rename to src/operator_props/mod.rs index ea4980b..a7146e3 100644 --- a/src/tercen/operator_properties.rs +++ b/src/operator_props/mod.rs @@ -4,8 +4,8 @@ //! and their default values. This ensures defaults are defined in ONE place (operator.json) //! and eliminates hardcoded fallback values scattered throughout the codebase. -use crate::tercen::client::proto::OperatorSettings; use std::collections::HashMap; +use tercen_rs::client::proto::OperatorSettings; /// Operator.json embedded at compile time const OPERATOR_JSON: &str = include_str!("../../operator.json"); diff --git a/src/pipeline.rs b/src/pipeline.rs index cd8fd60..3a6fb04 100644 --- a/src/pipeline.rs +++ b/src/pipeline.rs @@ -12,13 +12,13 @@ use crate::config::OperatorConfig; use crate::ggrs_integration::{TercenStreamConfig, TercenStreamGenerator}; use crate::memprof; -use crate::tercen::{ - extract_page_values, new_schema_cache, ChartKind, ColorMapping, PlotResult, TercenContext, -}; use ggrs_core::scale::ContinuousScale; use ggrs_core::stream::{DataCache, StreamGenerator}; use ggrs_core::theme::elements::Element; use ggrs_core::{EnginePlotSpec, Geom, HeatmapLayout, PlotGenerator, PlotRenderer}; +use tercen_rs::{ + extract_page_values, new_schema_cache, ChartKind, ColorMapping, PlotResult, TercenContext, +}; /// Error type for pipeline operations pub type PipelineError = Box; @@ -198,7 +198,7 @@ fn render_page( ctx: &C, config: &OperatorConfig, stream_gen: TercenStreamGenerator, - page_value: &crate::tercen::PageValue, + page_value: &tercen_rs::PageValue, page_idx: usize, total_pages: usize, cache: Option<&DataCache>, @@ -491,7 +491,7 @@ fn print_context_info(ctx: &C, config: &OperatorConfig) { fn print_color_info(ctx: &C) { // Check for per-layer color configuration first if let Some(plc) = ctx.per_layer_colors() { - use crate::tercen::LayerColorConfig; + use tercen_rs::LayerColorConfig; println!(" Per-layer color configuration:"); println!( diff --git a/src/tercen/README.md b/src/tercen/README.md deleted file mode 100644 index 2ef5c3b..0000000 --- a/src/tercen/README.md +++ /dev/null @@ -1,122 +0,0 @@ -# Tercen Module - Future Library Extraction - -This module contains all Tercen gRPC client code and is designed to be extracted into a separate `tercen-rust` crate in the future. - -## Planned Structure - -``` -src/tercen/ -├── mod.rs # Module root with re-exports -├── client.rs # TercenClient with connection and auth -├── error.rs # TercenError type -├── types.rs # Common types and conversions -└── services/ - ├── mod.rs - ├── task.rs # TaskService wrapper - ├── table.rs # TableSchemaService wrapper - └── file.rs # FileService wrapper -``` - -## Extraction Plan - -When ready to create the `tercen-rust` library: - -### Step 1: Create new crate -```bash -cd .. -cargo new --lib tercen-rust -``` - -### Step 2: Move module contents -```bash -cp -r ggrs_plot_operator/src/tercen/* tercen-rust/src/ -``` - -### Step 3: Move proto files -```bash -mkdir tercen-rust/protos -cp ggrs_plot_operator/protos/*.proto tercen-rust/protos/ -``` - -### Step 4: Update dependencies -In `tercen-rust/Cargo.toml`: -```toml -[dependencies] -tonic = "0.11" -prost = "0.12" -tokio = { version = "1.35", features = ["rt-multi-thread"] } -thiserror = "1.0" - -[build-dependencies] -tonic-build = "0.11" -``` - -### Step 5: Update this project -In `ggrs_plot_operator/Cargo.toml`: -```toml -[dependencies] -tercen-rust = { path = "../tercen-rust" } -# Or from crates.io when published: -# tercen-rust = "0.1" -``` - -In `ggrs_plot_operator/src/main.rs`: -```rust -// Remove: mod tercen; -// Add: -use tercen_rust::{TercenClient, TercenError}; -``` - -## Design Principles - -To make extraction easy, this module follows these principles: - -1. **Self-contained**: All Tercen-specific code lives here -2. **No GGRS dependencies**: Keep plotting logic separate -3. **Clear public API**: Re-export only what's needed via mod.rs -4. **Standard error handling**: Use thiserror for error types -5. **Async-first**: All I/O operations are async - -## Public API (when complete) - -```rust -// Main client -pub struct TercenClient { /* ... */ } - -impl TercenClient { - pub async fn connect(uri: &str, username: &str, password: &str) -> Result; - pub async fn from_env() -> Result; - - pub fn task_service(&self) -> TaskService; - pub fn table_service(&self) -> TableService; - pub fn file_service(&self) -> FileService; -} - -// Services -pub struct TaskService { /* ... */ } -pub struct TableService { /* ... */ } -pub struct FileService { /* ... */ } - -// Error handling -pub enum TercenError { /* ... */ } -``` - -## Usage Example (after extraction) - -```rust -use tercen_rust::{TercenClient, TercenError}; - -#[tokio::main] -async fn main() -> Result<(), TercenError> { - // Connect to Tercen - let client = TercenClient::from_env().await?; - - // Use services - let task = client.task_service().get_task("task-id").await?; - let data = client.table_service() - .stream_table("table-id", &[".x", ".y"]) - .await?; - - Ok(()) -} -``` diff --git a/src/tercen/client.rs b/src/tercen/client.rs deleted file mode 100644 index 67a0a72..0000000 --- a/src/tercen/client.rs +++ /dev/null @@ -1,208 +0,0 @@ -use super::error::{Result, TercenError}; -use tonic::metadata::MetadataValue; -use tonic::service::Interceptor; -use tonic::transport::{Channel, ClientTlsConfig}; -use tonic::{Request, Status}; - -// Include the generated protobuf code -#[allow( - dead_code, - unused_imports, - clippy::large_enum_variant, - clippy::enum_variant_names -)] -pub mod proto { - tonic::include_proto!("tercen"); -} - -use proto::document_service_client::DocumentServiceClient; -use proto::event_service_client::EventServiceClient; -use proto::file_service_client::FileServiceClient; -use proto::table_schema_service_client::TableSchemaServiceClient; -use proto::task_service_client::TaskServiceClient; -use proto::user_service_client::UserServiceClient; -use proto::workflow_service_client::WorkflowServiceClient; - -/// Type alias for authenticated TaskService client -pub type AuthTaskServiceClient = - TaskServiceClient>; - -/// Type alias for authenticated UserService client -#[allow(dead_code)] -pub type AuthUserServiceClient = - UserServiceClient>; - -/// Type alias for authenticated EventService client -pub type AuthEventServiceClient = - EventServiceClient>; - -/// Type alias for authenticated TableSchemaService client -#[allow(dead_code)] -pub type AuthTableSchemaServiceClient = TableSchemaServiceClient< - tonic::service::interceptor::InterceptedService, ->; - -/// Type alias for authenticated WorkflowService client -#[allow(dead_code)] -pub type AuthWorkflowServiceClient = WorkflowServiceClient< - tonic::service::interceptor::InterceptedService, ->; - -/// Type alias for authenticated DocumentService client -#[allow(dead_code)] -pub type AuthDocumentServiceClient = DocumentServiceClient< - tonic::service::interceptor::InterceptedService, ->; - -/// Type alias for authenticated FileService client -pub type AuthFileServiceClient = - FileServiceClient>; - -/// Interceptor that adds Bearer token authentication to all requests -#[derive(Clone)] -pub struct AuthInterceptor { - token: MetadataValue, -} - -impl AuthInterceptor { - fn new(token: String) -> Result { - // gRPC expects token without "Bearer" prefix (unlike REST APIs) - let token = token - .parse() - .map_err(|e| TercenError::Auth(format!("Invalid token format: {}", e)))?; - - Ok(AuthInterceptor { token }) - } -} - -impl Interceptor for AuthInterceptor { - fn call(&mut self, mut request: Request<()>) -> std::result::Result, Status> { - request - .metadata_mut() - .insert("authorization", self.token.clone()); - Ok(request) - } -} - -/// Main Tercen gRPC client -pub struct TercenClient { - channel: Channel, - token: String, -} - -impl TercenClient { - /// Create a new TercenClient by connecting to the specified endpoint with a token - pub async fn connect(endpoint: String, token: String) -> Result { - // Configure TLS only for https:// endpoints - let use_tls = endpoint.starts_with("https://"); - - // Parse and connect to the endpoint - let mut channel_builder = Channel::from_shared(endpoint.clone()) - .map_err(|e| TercenError::Config(format!("Invalid endpoint '{}': {}", endpoint, e)))?; - - // Add TLS config only for HTTPS - if use_tls { - let tls = ClientTlsConfig::new(); - channel_builder = channel_builder.tls_config(tls).map_err(|e| { - TercenError::Config(format!("Failed to configure TLS for '{}': {}", endpoint, e)) - })?; - } - - let channel = channel_builder.connect().await.map_err(|e| { - TercenError::Connection(format!("Failed to connect to '{}': {}", endpoint, e)) - })?; - - Ok(TercenClient { channel, token }) - } - - /// Create a new TercenClient from environment variables - /// - /// Required environment variables: - /// - `TERCEN_URI`: The Tercen server URI (e.g., https://tercen.com:5400) - /// - `TERCEN_TOKEN`: The authentication token - pub async fn from_env() -> Result { - let uri = std::env::var("TERCEN_URI") - .map_err(|_| TercenError::Config("TERCEN_URI environment variable not set".into()))?; - - let token = std::env::var("TERCEN_TOKEN") - .map_err(|_| TercenError::Config("TERCEN_TOKEN environment variable not set".into()))?; - - Self::connect(uri, token).await - } - - /// Get a UserService client with authentication - #[allow(dead_code)] - pub fn user_service(&self) -> Result { - let interceptor = AuthInterceptor::new(self.token.clone())?; - Ok(UserServiceClient::with_interceptor( - self.channel.clone(), - interceptor, - )) - } - - /// Get a TaskService client with authentication - pub fn task_service(&self) -> Result { - let interceptor = AuthInterceptor::new(self.token.clone())?; - Ok(TaskServiceClient::with_interceptor( - self.channel.clone(), - interceptor, - )) - } - - /// Get an EventService client with authentication - pub fn event_service(&self) -> Result { - let interceptor = AuthInterceptor::new(self.token.clone())?; - Ok(EventServiceClient::with_interceptor( - self.channel.clone(), - interceptor, - )) - } - - /// Get a TableSchemaService client with authentication - #[allow(dead_code)] - pub fn table_service(&self) -> Result { - let interceptor = AuthInterceptor::new(self.token.clone())?; - Ok(TableSchemaServiceClient::with_interceptor( - self.channel.clone(), - interceptor, - )) - } - - /// Get a WorkflowService client with authentication - pub fn workflow_service(&self) -> Result { - let interceptor = AuthInterceptor::new(self.token.clone())?; - Ok(WorkflowServiceClient::with_interceptor( - self.channel.clone(), - interceptor, - )) - } - - /// Get a DocumentService client with authentication - #[allow(dead_code)] - pub fn document_service(&self) -> Result { - let interceptor = AuthInterceptor::new(self.token.clone())?; - Ok(DocumentServiceClient::with_interceptor( - self.channel.clone(), - interceptor, - )) - } - - /// Get a FileService client with authentication - pub fn file_service(&self) -> Result { - let interceptor = AuthInterceptor::new(self.token.clone())?; - Ok(FileServiceClient::with_interceptor( - self.channel.clone(), - interceptor, - )) - } - - /// Get the underlying channel for use with arbitrary service clients - pub fn channel(&self) -> &Channel { - &self.channel - } - - /// Create an auth interceptor for use with arbitrary service clients - pub fn auth_interceptor(&self) -> Result { - AuthInterceptor::new(self.token.clone()) - } -} diff --git a/src/tercen/color_processor.rs b/src/tercen/color_processor.rs deleted file mode 100644 index b517313..0000000 --- a/src/tercen/color_processor.rs +++ /dev/null @@ -1,537 +0,0 @@ -//! Color column processing for DataFrames -//! -//! Transforms color factor values into packed RGB colors for rendering. -//! This module handles both continuous (palette interpolation) and categorical -//! (level-based) color mapping. - -use crate::tercen::{ - categorical_color_from_level, interpolate_color, ColorInfo, ColorMapping, ColorPalette, -}; -use ggrs_core::data::DataFrame; -use polars::prelude::*; -use std::borrow::Cow; - -/// Add packed RGB color column to DataFrame based on color factors -/// -/// For continuous mapping: interpolates values using the palette -/// For categorical mapping: maps levels to default palette colors -/// -/// # Arguments -/// * `df` - DataFrame with color factor column(s) -/// * `color_infos` - Color configuration (factor name, mapping, quartiles) -/// -/// # Returns -/// DataFrame with `.color` column added (packed RGB as i64) -/// -/// # Errors -/// Returns error if: -/// - Color column is missing from DataFrame -/// - Color column has wrong type for the mapping -/// - `.colorLevels` column missing for categorical mapping -pub fn add_color_columns( - mut df: DataFrame, - color_infos: &[ColorInfo], -) -> Result> { - // For now, only use the first color factor - // TODO: Handle multiple color factors (blend? choose first? user option?) - let color_info = &color_infos[0]; - - // Get mutable reference to inner Polars DataFrame (no cloning) - let polars_df = df.inner_mut(); - - // Generate RGB values based on mapping type - let nrows = polars_df.height(); - let mut r_values = Vec::with_capacity(nrows); - let mut g_values = Vec::with_capacity(nrows); - let mut b_values = Vec::with_capacity(nrows); - - match &color_info.mapping { - ColorMapping::Continuous(palette) => { - add_continuous_colors( - polars_df, - color_info, - palette, - &mut r_values, - &mut g_values, - &mut b_values, - )?; - } - - ColorMapping::Categorical(color_map) => { - add_categorical_colors( - polars_df, - color_info, - color_map, - &mut r_values, - &mut g_values, - &mut b_values, - )?; - } - } - - // Pack RGB values directly as u32 (stored as i64 in Polars) - // This avoids String allocation per point and hex parsing at render time - // Memory saving: ~24MB for 475K points (Option vs i64) - let packed_colors: Vec = (0..r_values.len()) - .map(|i| ggrs_core::PackedRgba::rgb(r_values[i], g_values[i], b_values[i]).to_u32() as i64) - .collect(); - - // Add color column as packed integers - polars_df.with_column(Series::new(".color".into(), packed_colors))?; - - // Debug: Print first color values - if polars_df.height() > 0 { - if let Ok(color_col) = polars_df.column(".color") { - let int_col = color_col.i64().unwrap(); - let first_colors: Vec = int_col - .into_iter() - .take(3) - .map(|opt| { - opt.map(|v| { - let packed = ggrs_core::PackedRgba::from_u32(v as u32); - format!("RGB({},{},{})", packed.red(), packed.green(), packed.blue()) - }) - .unwrap_or_else(|| "NULL".to_string()) - }) - .collect(); - eprintln!("DEBUG: First 3 .color packed values: {:?}", first_colors); - } - } - - Ok(df) -} - -/// Add continuous colors using palette interpolation -fn add_continuous_colors( - polars_df: &polars::frame::DataFrame, - color_info: &ColorInfo, - palette: &ColorPalette, - r_values: &mut Vec, - g_values: &mut Vec, - b_values: &mut Vec, -) -> Result<(), Box> { - let color_col_name = &color_info.factor_name; - eprintln!( - "DEBUG add_color_columns: Using continuous color mapping for '{}', is_user_defined={}", - color_col_name, palette.is_user_defined - ); - - // Rescale palette if is_user_defined=false and quartiles are available - let effective_palette: Cow<'_, ColorPalette> = if !palette.is_user_defined { - if let Some(ref quartiles) = color_info.quartiles { - eprintln!( - "DEBUG add_color_columns: Rescaling palette using quartiles: {:?}", - quartiles - ); - let rescaled = palette.rescale_from_quartiles(quartiles); - eprintln!( - "DEBUG add_color_columns: Original range: {:?}, Rescaled range: {:?}", - palette.range(), - rescaled.range() - ); - Cow::Owned(rescaled) - } else { - eprintln!( - "WARN add_color_columns: is_user_defined=false but no quartiles available, using original palette" - ); - Cow::Borrowed(palette) - } - } else { - Cow::Borrowed(palette) - }; - - // Get the color factor column - let color_series = polars_df - .column(color_col_name) - .map_err(|e| format!("Color column '{}' not found: {}", color_col_name, e))?; - - // Extract f64 values - let color_values = color_series.f64().map_err(|e| { - format!( - "Color column '{}' is not f64 for continuous mapping: {}", - color_col_name, e - ) - })?; - - // Debug: Print first few color factor values to verify we're getting expected data - let sample_values: Vec = color_values.iter().take(5).flatten().collect(); - if !sample_values.is_empty() { - let min_val = color_values.min().unwrap_or(0.0); - let max_val = color_values.max().unwrap_or(0.0); - eprintln!( - "DEBUG add_color_columns: {} values range [{:.2}, {:.2}], first 5: {:?}", - color_col_name, min_val, max_val, sample_values - ); - } - - // Map each value to RGB using palette interpolation - for opt_value in color_values.iter() { - if let Some(value) = opt_value { - let rgb = interpolate_color(value, &effective_palette); - r_values.push(rgb[0]); - g_values.push(rgb[1]); - b_values.push(rgb[2]); - } else { - // Handle null values with a default color (gray) - r_values.push(128); - g_values.push(128); - b_values.push(128); - } - } - - Ok(()) -} - -/// Add categorical colors using level mapping or explicit category mappings -fn add_categorical_colors( - polars_df: &polars::frame::DataFrame, - color_info: &ColorInfo, - color_map: &crate::tercen::CategoryColorMap, - r_values: &mut Vec, - g_values: &mut Vec, - b_values: &mut Vec, -) -> Result<(), Box> { - eprintln!("DEBUG add_color_columns: Using categorical color mapping"); - eprintln!( - "DEBUG add_color_columns: Category map has {} entries", - color_map.mappings.len() - ); - - // For categorical colors, Tercen uses .colorLevels column (int32) with level indices - // If color_map has explicit mappings, use them; otherwise generate from levels - let use_levels = color_map.mappings.is_empty(); - - if use_levels { - add_categorical_colors_from_levels(polars_df, r_values, g_values, b_values)?; - } else { - add_categorical_colors_from_mappings( - polars_df, color_info, color_map, r_values, g_values, b_values, - )?; - } - - Ok(()) -} - -/// Map .colorLevels column to colors using default categorical palette -fn add_categorical_colors_from_levels( - polars_df: &polars::frame::DataFrame, - r_values: &mut Vec, - g_values: &mut Vec, - b_values: &mut Vec, -) -> Result<(), Box> { - eprintln!("DEBUG add_color_columns: Using .colorLevels column for categorical colors"); - - // Get .colorLevels column instead of the factor column - let levels_series = polars_df - .column(".colorLevels") - .map_err(|e| format!("Categorical colors require .colorLevels column: {}", e))?; - - // Schema says int32 but it comes back as i64, so accept both - let levels = levels_series - .i64() - .map_err(|e| format!(".colorLevels column is not i64: {}", e))?; - - // Map each level to RGB using default categorical palette - for opt_level in levels.iter() { - if let Some(level) = opt_level { - let rgb = categorical_color_from_level(level as i32); - r_values.push(rgb[0]); - g_values.push(rgb[1]); - b_values.push(rgb[2]); - } else { - // Handle null values with a default color (gray) - r_values.push(128); - g_values.push(128); - b_values.push(128); - } - } - - Ok(()) -} - -/// Map categorical values using explicit category→color mappings -fn add_categorical_colors_from_mappings( - polars_df: &polars::frame::DataFrame, - color_info: &ColorInfo, - color_map: &crate::tercen::CategoryColorMap, - r_values: &mut Vec, - g_values: &mut Vec, - b_values: &mut Vec, -) -> Result<(), Box> { - let color_col_name = &color_info.factor_name; - eprintln!( - "DEBUG add_color_columns: Using explicit category mappings for '{}'", - color_col_name - ); - - // Get the color factor column - let color_series = polars_df - .column(color_col_name) - .map_err(|e| format!("Color column '{}' not found: {}", color_col_name, e))?; - - let color_values = color_series.str().map_err(|e| { - format!( - "Color column '{}' is not string for categorical mapping: {}", - color_col_name, e - ) - })?; - - for opt_value in color_values.iter() { - if let Some(category) = opt_value { - let rgb = color_map - .mappings - .get(category) - .unwrap_or(&color_map.default_color); - r_values.push(rgb[0]); - g_values.push(rgb[1]); - b_values.push(rgb[2]); - } else { - r_values.push(128); - g_values.push(128); - b_values.push(128); - } - } - - Ok(()) -} - -/// Add mixed-layer colors using the unified LayerColorConfig -/// -/// Each layer has a LayerColorConfig that determines how its points are colored: -/// - Continuous: interpolate values using the layer's palette -/// - Categorical: map levels to colors -/// - Constant: all points get the pre-computed constant color -/// -/// # Arguments -/// * `df` - DataFrame with `.axisIndex` column and color factor columns -/// * `per_layer_config` - Per-layer color configuration (every layer has a config) -/// -/// # Returns -/// DataFrame with `.color` column added (packed RGB as i64) -pub fn add_mixed_layer_colors( - mut df: DataFrame, - per_layer_config: &crate::tercen::PerLayerColorConfig, -) -> Result> { - use crate::tercen::LayerColorConfig; - use std::borrow::Cow; - - let polars_df = df.inner_mut(); - let nrows = polars_df.height(); - - eprintln!( - "DEBUG add_mixed_layer_colors: Processing {} rows with {} layers", - nrows, per_layer_config.n_layers - ); - - // Get .axisIndex column (optional for single-layer case) - // When there's only 1 layer, all rows belong to layer 0 by definition - let axis_indices_opt = polars_df - .column(".axisIndex") - .ok() - .and_then(|col| col.i64().ok()); - - // For logging - if axis_indices_opt.is_none() && per_layer_config.n_layers == 1 { - eprintln!("DEBUG add_mixed_layer_colors: Single layer, no .axisIndex column - all rows belong to layer 0"); - } - - // Pre-extract and rescale palettes for continuous layers - let mut continuous_data: std::collections::HashMap< - usize, - (Cow<'_, crate::tercen::ColorPalette>, Vec), - > = std::collections::HashMap::new(); - - for (layer_idx, config) in per_layer_config.layer_configs.iter().enumerate() { - if let LayerColorConfig::Continuous { - palette, - factor_name, - quartiles, - .. - } = config - { - // Get the factor column for this layer - if let Ok(col) = polars_df.column(factor_name) { - if let Ok(f64_col) = col.f64() { - let values: Vec = f64_col.iter().map(|v| v.unwrap_or(0.0)).collect(); - - // Rescale palette if quartiles available - let effective_palette: Cow<'_, crate::tercen::ColorPalette> = if !palette - .is_user_defined - { - if let Some(q) = quartiles { - eprintln!( - "DEBUG add_mixed_layer_colors: Rescaling palette for layer {} using quartiles", - layer_idx - ); - Cow::Owned(palette.rescale_from_quartiles(q)) - } else { - Cow::Borrowed(palette) - } - } else { - Cow::Borrowed(palette) - }; - - eprintln!( - "DEBUG add_mixed_layer_colors: Layer {} uses continuous factor '{}' ({} values)", - layer_idx, factor_name, values.len() - ); - continuous_data.insert(layer_idx, (effective_palette, values)); - } - } - } - } - - // Check if we need .colorLevels for any categorical layers - let needs_color_levels = per_layer_config.has_categorical(); - - let color_levels_column: Option> = if needs_color_levels { - polars_df - .column(".colorLevels") - .ok() - .and_then(|col| col.i64().ok()) - .map(|i64_col| i64_col.iter().map(|v| v.unwrap_or(0)).collect()) - } else { - None - }; - - // Build packed color values row by row - let mut packed_colors: Vec = Vec::with_capacity(nrows); - - for row_idx in 0..nrows { - // Get layer index: from .axisIndex if available, otherwise 0 (single layer) - let layer_idx = axis_indices_opt - .as_ref() - .map(|indices| indices.get(row_idx).unwrap_or(0) as usize) - .unwrap_or(0); - - let rgb = match per_layer_config.get(layer_idx) { - Some(LayerColorConfig::Continuous { .. }) => { - // Use pre-extracted continuous data - if let Some((palette, values)) = continuous_data.get(&layer_idx) { - let value = values.get(row_idx).copied().unwrap_or(0.0); - interpolate_color(value, palette) - } else { - [128, 128, 128] // Fallback gray - } - } - Some(LayerColorConfig::Categorical { .. }) => { - // Use .colorLevels - if let Some(ref levels) = color_levels_column { - let level = levels.get(row_idx).copied().unwrap_or(0) as i32; - categorical_color_from_level(level) - } else { - [128, 128, 128] // Fallback gray - } - } - Some(LayerColorConfig::Constant { color }) => { - // Use pre-computed constant color - *color - } - None => { - // No config for this layer (shouldn't happen) - use gray - [128, 128, 128] - } - }; - - packed_colors.push(ggrs_core::PackedRgba::rgb(rgb[0], rgb[1], rgb[2]).to_u32() as i64); - } - - // Debug: Show color distribution by layer - let mut layer_color_counts: std::collections::HashMap = - std::collections::HashMap::new(); - if let Some(indices) = &axis_indices_opt { - for opt_idx in indices.iter().flatten() { - *layer_color_counts.entry(opt_idx as usize).or_insert(0) += 1; - } - } else { - // Single layer case - all rows belong to layer 0 - layer_color_counts.insert(0, nrows); - } - for (layer_idx, count) in layer_color_counts.iter() { - let config_type = per_layer_config - .get(*layer_idx) - .map(|c| match c { - LayerColorConfig::Continuous { .. } => "continuous", - LayerColorConfig::Categorical { .. } => "categorical", - LayerColorConfig::Constant { .. } => "constant", - }) - .unwrap_or("none"); - eprintln!( - "DEBUG add_mixed_layer_colors: Layer {} has {} points, config={}", - layer_idx, count, config_type - ); - } - - polars_df.with_column(Series::new(".color".into(), packed_colors))?; - - Ok(df) -} - -/// Add layer-based colors when no color factor is specified -/// -/// When multiple layers exist (axis_queries > 1) and no colors are explicitly mapped, -/// this function colors points by their layer index using the specified palette. -/// -/// # Arguments -/// * `df` - DataFrame with `.axisIndex` column -/// * `palette_name` - Optional palette name (defaults to Palette-1 if None) -/// -/// # Returns -/// DataFrame with `.color` column added (packed RGB as i64) -pub fn add_layer_colors( - mut df: DataFrame, - palette_name: Option<&str>, -) -> Result> { - use crate::tercen::palettes::{DEFAULT_CATEGORICAL_PALETTE, PALETTE_REGISTRY}; - - let polars_df = df.inner_mut(); - let nrows = polars_df.height(); - - // Get .axisIndex column - let axis_index_series = polars_df - .column(".axisIndex") - .map_err(|e| format!(".axisIndex column not found: {}", e))?; - - let axis_indices = axis_index_series - .i64() - .map_err(|e| format!(".axisIndex column is not i64: {}", e))?; - - // Use specified palette or fallback to default categorical palette - let effective_palette_name = palette_name.unwrap_or(DEFAULT_CATEGORICAL_PALETTE); - let palette = PALETTE_REGISTRY - .get(effective_palette_name) - .ok_or_else(|| format!("Palette '{}' not found", effective_palette_name))?; - - eprintln!( - "DEBUG add_layer_colors: Coloring {} points by layer using '{}' palette ({} colors)", - nrows, - effective_palette_name, - palette.len() - ); - - // Map each axis index to a color from Palette-1 - let packed_colors: Vec = axis_indices - .iter() - .map(|opt_idx| { - let idx = opt_idx.unwrap_or(0) as usize; - let rgb = palette.get_color(idx); - ggrs_core::PackedRgba::rgb(rgb[0], rgb[1], rgb[2]).to_u32() as i64 - }) - .collect(); - - // Debug: Show which colors are used for which layers - let mut seen_layers: std::collections::HashSet = std::collections::HashSet::new(); - for idx in axis_indices.iter().flatten() { - if seen_layers.insert(idx) { - let rgb = palette.get_color(idx as usize); - eprintln!( - "DEBUG add_layer_colors: Layer {} -> RGB({},{},{})", - idx, rgb[0], rgb[1], rgb[2] - ); - } - } - - polars_df.with_column(Series::new(".color".into(), packed_colors))?; - - Ok(df) -} diff --git a/src/tercen/colors.rs b/src/tercen/colors.rs deleted file mode 100644 index 25a12d5..0000000 --- a/src/tercen/colors.rs +++ /dev/null @@ -1,1366 +0,0 @@ -//! Color palette handling and RGB interpolation for continuous and categorical color scales -//! -//! This module provides functionality to: -//! - Parse Tercen color palettes (JetPalette, RampPalette, CategoryPalette) -//! - Interpolate color values to RGB (continuous) -//! - Map category strings to RGB (categorical) -//! - Extract color information from workflow steps - -use super::client::proto; -use super::error::{Result, TercenError}; -use std::collections::HashMap; - -/// Information about a color factor and its associated palette -#[derive(Debug, Clone)] -pub struct ColorInfo { - /// Name of the column containing color values (e.g., "Age", "Country") - pub factor_name: String, - /// Type of the factor (e.g., "double", "int32", "string") - pub factor_type: String, - /// The color mapping for this factor - pub mapping: ColorMapping, - /// Optional color table ID (for categorical colors with .colorLevels) - /// This table contains the mapping from level index to category name - pub color_table_id: Option, - /// Quartiles from the color column schema metadata. - /// Used to rescale the palette when is_user_defined=false. - /// Format: [Q1, Q2, Q3, min, max] as strings - pub quartiles: Option>, - /// Number of categorical levels (from color table schema nRows) - /// Used to generate legend labels without streaming the table - pub n_levels: Option, - /// Actual category labels from the color table (for categorical colors). - /// These are the values from the factor column in the color table. - /// When available, used instead of generic "Level N" labels. - pub color_labels: Option>, -} - -/// Color mapping - either continuous interpolation or categorical lookup -#[derive(Debug, Clone)] -pub enum ColorMapping { - /// Continuous color scale: numeric value → RGB via interpolation - Continuous(ColorPalette), - /// Categorical color scale: string value → RGB via lookup - Categorical(CategoryColorMap), -} - -/// A color palette with sorted color stops for interpolation -#[derive(Debug, Clone)] -pub struct ColorPalette { - /// Sorted list of color stops (by value, ascending) - pub stops: Vec, - /// Whether the user explicitly defined the color breakpoints. - /// If false, the palette should be rescaled based on data quartiles. - pub is_user_defined: bool, -} - -/// A single color stop in a palette -#[derive(Debug, Clone, PartialEq)] -pub struct ColorStop { - /// Numeric value at this stop - pub value: f64, - /// RGB color at this stop - pub color: [u8; 3], // [r, g, b] -} - -/// Categorical color mapping: string → RGB -#[derive(Debug, Clone)] -pub struct CategoryColorMap { - /// Map from category string to RGB color - pub mappings: HashMap, - /// Default color for unknown categories - pub default_color: [u8; 3], -} - -impl ColorPalette { - /// Create a new empty palette - pub fn new() -> Self { - ColorPalette { - stops: Vec::new(), - is_user_defined: true, // Default to user-defined - } - } - - /// Add a color stop and maintain sorted order - pub fn add_stop(&mut self, value: f64, color: [u8; 3]) { - let stop = ColorStop { value, color }; - // Insert in sorted position - match self - .stops - .binary_search_by(|s| s.value.partial_cmp(&value).unwrap()) - { - Ok(pos) => self.stops[pos] = stop, // Replace if exists - Err(pos) => self.stops.insert(pos, stop), - } - } - - /// Get the value range of this palette - pub fn range(&self) -> Option<(f64, f64)> { - if self.stops.is_empty() { - None - } else { - Some(( - self.stops.first().unwrap().value, - self.stops.last().unwrap().value, - )) - } - } - - /// Rescale the palette based on quartiles. - /// - /// When `is_user_defined=false`, Tercen auto-scales the palette based on data quartiles. - /// The formula is: - /// - min = Q2 - 1.5 * IQR (where IQR = Q3 - Q1) - /// - max = Q2 + 1.5 * IQR - /// - middle = (min + max) / 2 - /// - /// The existing color stops are linearly remapped from their original positions - /// to the new [min, middle, max] range. - /// - /// # Arguments - /// * `quartiles` - Array of quartile values as strings: [Q1, Q2, Q3, min, max] - /// - /// # Returns - /// A new palette with rescaled stops, or the original palette if rescaling fails. - pub fn rescale_from_quartiles(&self, quartiles: &[String]) -> Self { - // Need at least Q1, Q2, Q3 (first 3 values) - if quartiles.len() < 3 { - eprintln!( - "DEBUG rescale_from_quartiles: Not enough quartiles ({} < 3), returning unchanged", - quartiles.len() - ); - return self.clone(); - } - - // Parse Q1, Q2, Q3 - let q1: f64 = match quartiles[0].parse() { - Ok(v) => v, - Err(e) => { - eprintln!( - "DEBUG rescale_from_quartiles: Failed to parse Q1 '{}': {}", - quartiles[0], e - ); - return self.clone(); - } - }; - let q2: f64 = match quartiles[1].parse() { - Ok(v) => v, - Err(e) => { - eprintln!( - "DEBUG rescale_from_quartiles: Failed to parse Q2 '{}': {}", - quartiles[1], e - ); - return self.clone(); - } - }; - let q3: f64 = match quartiles[2].parse() { - Ok(v) => v, - Err(e) => { - eprintln!( - "DEBUG rescale_from_quartiles: Failed to parse Q3 '{}': {}", - quartiles[2], e - ); - return self.clone(); - } - }; - - // Calculate IQR and new range - let iqr = q3 - q1; - let new_min = q2 - 1.5 * iqr; - let new_max = q2 + 1.5 * iqr; - let new_middle = (new_min + new_max) / 2.0; - - eprintln!( - "DEBUG rescale_from_quartiles: Q1={:.2}, Q2={:.2}, Q3={:.2}, IQR={:.2}", - q1, q2, q3, iqr - ); - eprintln!( - "DEBUG rescale_from_quartiles: new_min={:.2}, new_middle={:.2}, new_max={:.2}", - new_min, new_middle, new_max - ); - - // Get current palette range - let (old_min, old_max) = match self.range() { - Some(r) => r, - None => return self.clone(), - }; - - eprintln!( - "DEBUG rescale_from_quartiles: old_min={:.2}, old_max={:.2}", - old_min, old_max - ); - - // Create new palette with rescaled stops - let mut new_palette = ColorPalette { - stops: Vec::with_capacity(self.stops.len()), - is_user_defined: true, // After rescaling, it's effectively "user defined" - }; - - // Linear remap from [old_min, old_max] to [new_min, new_max] - for stop in &self.stops { - let t = if old_max > old_min { - (stop.value - old_min) / (old_max - old_min) - } else { - 0.5 - }; - let new_value = new_min + t * (new_max - new_min); - - eprintln!( - "DEBUG rescale_from_quartiles: stop {:.2} -> {:.2} (t={:.2})", - stop.value, new_value, t - ); - - new_palette.stops.push(ColorStop { - value: new_value, - color: stop.color, - }); - } - - new_palette - } -} - -impl Default for ColorPalette { - fn default() -> Self { - Self::new() - } -} - -/// Parse a Tercen EPalette proto into a ColorMapping -pub fn parse_palette(e_palette: &proto::EPalette) -> Result { - let palette_obj = e_palette - .object - .as_ref() - .ok_or_else(|| TercenError::Data("EPalette has no object".to_string()))?; - - match palette_obj { - proto::e_palette::Object::Jetpalette(jet) => { - Ok(ColorMapping::Continuous(parse_jet_palette(jet)?)) - } - proto::e_palette::Object::Ramppalette(ramp) => { - Ok(ColorMapping::Continuous(parse_ramp_palette(ramp)?)) - } - proto::e_palette::Object::Categorypalette(cat) => { - Ok(ColorMapping::Categorical(parse_category_palette(cat)?)) - } - proto::e_palette::Object::Palette(_) => Err(TercenError::Data( - "Base Palette type not supported".to_string(), - )), - } -} - -/// Parse a JetPalette into a ColorPalette -fn parse_jet_palette(jet: &proto::JetPalette) -> Result { - eprintln!( - "DEBUG parse_jet_palette: is_user_defined = {}", - jet.is_user_defined - ); - parse_double_color_elements(&jet.double_color_elements, jet.is_user_defined, "Jet") -} - -/// Parse a RampPalette into a ColorPalette -fn parse_ramp_palette(ramp: &proto::RampPalette) -> Result { - // Extract palette name from properties (property with name="name") - let palette_name = ramp - .properties - .iter() - .find(|p| p.name == "name") - .map(|p| p.value.as_str()) - .unwrap_or("Spectral"); // Fallback to Spectral if not specified - - // "Divergent" is a special type where user defines min/middle/max colors manually. - // Always use element colors for Divergent, regardless of is_user_defined flag. - let use_element_colors = ramp.is_user_defined || palette_name.eq_ignore_ascii_case("divergent"); - - eprintln!( - "DEBUG parse_ramp_palette: is_user_defined = {}, palette_name = '{}', use_element_colors = {}", - ramp.is_user_defined, palette_name, use_element_colors - ); - - parse_double_color_elements( - &ramp.double_color_elements, - use_element_colors, - palette_name, - ) -} - -/// Parse a CategoryPalette into a CategoryColorMap -/// -/// For categorical colors, Tercen stores color levels (indices) in the `.colorLevels` column -/// of the main data table. The actual category strings are in a separate color table. -/// -/// If the palette has `stringColorElements`, use those explicit mappings. -/// Otherwise, we'll create mappings later from the data (using `.colorLevels`). -fn parse_category_palette(cat: &proto::CategoryPalette) -> Result { - let mut mappings = HashMap::new(); - - eprintln!( - "DEBUG parse_category_palette: Processing {} string color elements", - cat.string_color_elements.len() - ); - - // If we have explicit string→color mappings, use them - if !cat.string_color_elements.is_empty() { - for (i, element) in cat.string_color_elements.iter().enumerate() { - let category = element.string_value.clone(); - let rgb = int_to_rgb(element.color); - - eprintln!( - "DEBUG parse_category_palette: [{}] '{}' → RGB({}, {}, {})", - i, category, rgb[0], rgb[1], rgb[2] - ); - - mappings.insert(category, rgb); - } - } else { - // No explicit mappings - colors will be generated from .colorLevels in the data - // The actual mapping happens in the stream generator when we see the data - eprintln!( - "DEBUG parse_category_palette: No string_color_elements, will use .colorLevels from data" - ); - if let Some(ref color_list) = cat.color_list { - eprintln!( - "DEBUG parse_category_palette: ColorList name: '{}'", - color_list.name - ); - } - } - - Ok(CategoryColorMap { - mappings, - default_color: [128, 128, 128], // Gray for unknown categories - }) -} - -/// Parse DoubleColorElement array into ColorPalette -/// -/// When color_int == -1, all colors from the named palette are distributed -/// across the value range [min, max] from the elements. -fn parse_double_color_elements( - elements: &[proto::DoubleColorElement], - is_user_defined: bool, - default_palette_name: &str, -) -> Result { - use crate::tercen::palettes::PALETTE_REGISTRY; - - let mut palette = ColorPalette::new(); - palette.is_user_defined = is_user_defined; - - if elements.is_empty() { - return Err(TercenError::Data( - "Palette has no color elements".to_string(), - )); - } - - // Use is_user_defined to decide whether to use element colors or named palette. - // When is_user_defined=false, Tercen sends palette endpoint colors but we should - // distribute ALL colors from the named palette across the range instead. - if is_user_defined { - // User-defined colors: use them directly from elements - for element in elements { - let value = element.string_value.parse::().map_err(|err| { - TercenError::Data(format!( - "Invalid color value '{}': {}", - element.string_value, err - )) - })?; - - let color = int_to_rgb(element.color); - eprintln!( - "DEBUG parse_palette: User color at {}: RGB({}, {}, {})", - value, color[0], color[1], color[2] - ); - palette.add_stop(value, color); - } - } else { - // No user colors: distribute named palette across [min, max] - let values: Vec = elements - .iter() - .map(|e| e.string_value.parse::()) - .collect::, _>>() - .map_err(|e| TercenError::Data(format!("Invalid value: {}", e)))?; - - let min_val = values.iter().cloned().fold(f64::INFINITY, f64::min); - let max_val = values.iter().cloned().fold(f64::NEG_INFINITY, f64::max); - - let named_palette = PALETTE_REGISTRY.get(default_palette_name).ok_or_else(|| { - TercenError::Data(format!("Palette '{}' not found", default_palette_name)) - })?; - - let n_colors = named_palette.len(); - eprintln!( - "DEBUG parse_palette: Distributing {} {} colors across [{}, {}]", - n_colors, default_palette_name, min_val, max_val - ); - - for i in 0..n_colors { - let t = if n_colors > 1 { - i as f64 / (n_colors - 1) as f64 - } else { - 0.5 - }; - let value = min_val + t * (max_val - min_val); - let color = named_palette.get_color(i); - palette.add_stop(value, color); - } - } - - Ok(palette) -} - -/// Convert Tercen color integer (AARRGGBB) to RGB array -/// -/// Tercen stores colors as 32-bit integers with the format: -/// - Bits 24-31: Alpha (ignored for now) -/// - Bits 16-23: Red -/// - Bits 8-15: Green -/// - Bits 0-7: Blue -fn int_to_rgb(color_int: i32) -> [u8; 3] { - let color = color_int as u32; - [ - ((color >> 16) & 0xFF) as u8, // Red - ((color >> 8) & 0xFF) as u8, // Green - (color & 0xFF) as u8, // Blue - ] -} - -/// Extract color information from a workflow step -/// -/// Navigates to step.model.axis.xyAxis[0].colors and extracts: -/// - Color factors (column names and types) -/// - Associated palettes -/// - Optional color table IDs (indexed by factor position, e.g., color_0, color_1) -/// -/// Returns a Vec (can be empty if no colors defined) -pub fn extract_color_info_from_step( - workflow: &proto::Workflow, - step_id: &str, - color_table_ids: &[Option], -) -> Result> { - // Find the step by ID - let step = workflow - .steps - .iter() - .find(|s| { - if let Some(proto::e_step::Object::Datastep(ds)) = &s.object { - ds.id == step_id - } else { - false - } - }) - .ok_or_else(|| TercenError::Data(format!("Step '{}' not found in workflow", step_id)))?; - - // Extract DataStep - let data_step = match &step.object { - Some(proto::e_step::Object::Datastep(ds)) => ds, - _ => return Err(TercenError::Data("Step is not a DataStep".to_string())), - }; - - // Navigate to model.axis.xyAxis - let model = data_step - .model - .as_ref() - .ok_or_else(|| TercenError::Data("DataStep has no model".to_string()))?; - - let axis = model - .axis - .as_ref() - .ok_or_else(|| TercenError::Data("Model has no axis".to_string()))?; - - // Get first xyAxis (usually there's only one for plot operators) - let xy_axis = axis - .xy_axis - .first() - .ok_or_else(|| TercenError::Data("Axis has no xyAxis array".to_string()))?; - - // Extract colors object - let colors = match &xy_axis.colors { - Some(c) => c, - None => { - eprintln!("DEBUG extract_color_info: No colors object in xyAxis"); - return Ok(Vec::new()); // No colors defined - this is OK - } - }; - - eprintln!( - "DEBUG extract_color_info: Found colors object with {} factors", - colors.factors.len() - ); - eprintln!( - "DEBUG extract_color_info: Palette present: {}", - colors.palette.is_some() - ); - - // Parse each color factor - let mut color_infos = Vec::new(); - for (i, factor) in colors.factors.iter().enumerate() { - eprintln!( - "DEBUG extract_color_info: Processing factor {}: name='{}', type='{}'", - i, factor.name, factor.r#type - ); - - // Parse the palette/mapping - let mapping = match &colors.palette { - Some(p) => { - eprintln!("DEBUG extract_color_info: Calling parse_palette..."); - let parsed = parse_palette(p)?; - match &parsed { - ColorMapping::Continuous(palette) => { - eprintln!( - "DEBUG extract_color_info: Continuous palette with {} stops", - palette.stops.len() - ); - } - ColorMapping::Categorical(color_map) => { - eprintln!( - "DEBUG extract_color_info: Categorical palette with {} categories", - color_map.mappings.len() - ); - } - } - parsed - } - None => { - return Err(TercenError::Data( - "Color factors defined but no palette provided".to_string(), - )) - } - }; - - // Get the color table ID for this factor (if available) - let color_table_id = color_table_ids.get(i).and_then(|opt| opt.clone()); - - color_infos.push(ColorInfo { - factor_name: factor.name.clone(), - factor_type: factor.r#type.clone(), - mapping, - color_table_id, - quartiles: None, // Will be populated later from column schema metadata - n_levels: None, // Will be populated later from color table schema nRows - color_labels: None, // Will be populated later from color table data - }); - } - - eprintln!( - "DEBUG extract_color_info: Returning {} ColorInfo objects", - color_infos.len() - ); - Ok(color_infos) -} - -/// Configuration for how a single layer gets its colors -/// -/// Each layer has exactly one color configuration: -/// - Continuous: interpolate values using a palette (e.g., Jet, Viridis) -/// - Categorical: map discrete levels to colors -/// - Constant: all points in layer get the same pre-computed color -#[derive(Debug, Clone)] -pub enum LayerColorConfig { - /// Layer has a continuous color factor - interpolate using palette - Continuous { - palette: ColorPalette, - factor_name: String, - quartiles: Option>, - color_table_id: Option, - }, - /// Layer has a categorical color factor - map levels to colors - Categorical { - color_map: CategoryColorMap, - factor_name: String, - color_table_id: Option, - }, - /// Layer has no color factor - all points get this constant color - Constant { color: [u8; 3] }, -} - -impl LayerColorConfig { - /// Get the factor name if this config uses a color factor - pub fn factor_name(&self) -> Option<&str> { - match self { - LayerColorConfig::Continuous { factor_name, .. } => Some(factor_name), - LayerColorConfig::Categorical { factor_name, .. } => Some(factor_name), - LayerColorConfig::Constant { .. } => None, - } - } - - /// Check if this is a continuous mapping - pub fn is_continuous(&self) -> bool { - matches!(self, LayerColorConfig::Continuous { .. }) - } - - /// Check if this is a categorical mapping - pub fn is_categorical(&self) -> bool { - matches!(self, LayerColorConfig::Categorical { .. }) - } - - /// Check if this is a constant color (no color factor) - pub fn is_constant(&self) -> bool { - matches!(self, LayerColorConfig::Constant { .. }) - } - - /// Get the palette for continuous mappings - pub fn palette(&self) -> Option<&ColorPalette> { - match self { - LayerColorConfig::Continuous { palette, .. } => Some(palette), - _ => None, - } - } - - /// Get quartiles for continuous mappings - pub fn quartiles(&self) -> Option<&Vec> { - match self { - LayerColorConfig::Continuous { quartiles, .. } => quartiles.as_ref(), - _ => None, - } - } - - /// Set quartiles for continuous mappings - pub fn set_quartiles(&mut self, q: Vec) { - if let LayerColorConfig::Continuous { quartiles, .. } = self { - *quartiles = Some(q); - } - } - - /// Get color table ID if available - pub fn color_table_id(&self) -> Option<&str> { - match self { - LayerColorConfig::Continuous { color_table_id, .. } => color_table_id.as_deref(), - LayerColorConfig::Categorical { color_table_id, .. } => color_table_id.as_deref(), - LayerColorConfig::Constant { .. } => None, - } - } - - /// Set color table ID - pub fn set_color_table_id(&mut self, id: String) { - match self { - LayerColorConfig::Continuous { color_table_id, .. } => *color_table_id = Some(id), - LayerColorConfig::Categorical { color_table_id, .. } => *color_table_id = Some(id), - LayerColorConfig::Constant { .. } => {} - } - } -} - -/// Per-layer color configuration -/// -/// Every layer has exactly one LayerColorConfig that determines how its -/// points are colored. This unified structure handles all scenarios: -/// - Layers with continuous color factors (interpolated palettes) -/// - Layers with categorical color factors (discrete mappings) -/// - Layers without color factors (constant colors from layer palette) -#[derive(Debug, Clone, Default)] -pub struct PerLayerColorConfig { - /// Color configuration for each layer. Index = layer index (axisIndex). - pub layer_configs: Vec, - /// Total number of layers - pub n_layers: usize, -} - -impl PerLayerColorConfig { - /// Check if any layer has explicit colors (not constant) - pub fn has_explicit_colors(&self) -> bool { - self.layer_configs.iter().any(|c| !c.is_constant()) - } - - /// Check if any layer uses constant coloring (no color factor) - pub fn has_constant_colors(&self) -> bool { - self.layer_configs.iter().any(|c| c.is_constant()) - } - - /// Check if this is a mixed scenario (some layers have colors, some don't) - pub fn is_mixed(&self) -> bool { - self.has_explicit_colors() && self.has_constant_colors() - } - - /// Get the config for a specific layer - pub fn get(&self, layer_idx: usize) -> Option<&LayerColorConfig> { - self.layer_configs.get(layer_idx) - } - - /// Get mutable config for a specific layer - pub fn get_mut(&mut self, layer_idx: usize) -> Option<&mut LayerColorConfig> { - self.layer_configs.get_mut(layer_idx) - } - - /// Get all color factor names across all layers (excludes constant-color layers) - pub fn all_color_factor_names(&self) -> Vec { - self.layer_configs - .iter() - .filter_map(|c| c.factor_name().map(|s| s.to_string())) - .collect() - } - - /// Check if any layer uses categorical colors - pub fn has_categorical(&self) -> bool { - self.layer_configs.iter().any(|c| c.is_categorical()) - } - - /// Check if any layer uses continuous colors - pub fn has_continuous(&self) -> bool { - self.layer_configs.iter().any(|c| c.is_continuous()) - } - - // Legacy compatibility methods (to be removed after full migration) - - /// Legacy: Check if any layer needs layer-based coloring - #[deprecated(note = "Use has_constant_colors() instead")] - pub fn has_layers_needing_layer_colors(&self) -> bool { - self.has_constant_colors() - } - - /// Legacy: Get the ColorInfo for a specific layer (if any) - /// Returns None for constant-color layers - #[deprecated(note = "Use get() and match on LayerColorConfig instead")] - pub fn get_color_info(&self, layer_idx: usize) -> Option { - match self.layer_configs.get(layer_idx)? { - LayerColorConfig::Continuous { - palette, - factor_name, - quartiles, - color_table_id, - } => Some(ColorInfo { - factor_name: factor_name.clone(), - factor_type: "double".to_string(), - mapping: ColorMapping::Continuous(palette.clone()), - color_table_id: color_table_id.clone(), - quartiles: quartiles.clone(), - n_levels: None, - color_labels: None, - }), - LayerColorConfig::Categorical { - color_map, - factor_name, - color_table_id, - } => Some(ColorInfo { - factor_name: factor_name.clone(), - factor_type: "string".to_string(), - mapping: ColorMapping::Categorical(color_map.clone()), - color_table_id: color_table_id.clone(), - quartiles: None, - n_levels: None, - color_labels: None, - }), - LayerColorConfig::Constant { .. } => None, - } - } -} - -/// Extract palette name from an EPalette -fn extract_palette_name_from_epalette(palette: &proto::EPalette) -> Option { - use proto::e_palette::Object as PaletteObject; - - match &palette.object { - Some(PaletteObject::Categorypalette(cat)) => { - // Try colorList.name first - cat.color_list - .as_ref() - .and_then(|cl| { - if !cl.name.is_empty() { - Some(cl.name.clone()) - } else { - None - } - }) - // Fallback to properties["name"] - .or_else(|| { - cat.properties - .iter() - .find(|p| p.name == "name") - .map(|p| p.value.clone()) - }) - } - Some(PaletteObject::Ramppalette(ramp)) => ramp - .properties - .iter() - .find(|p| p.name == "name") - .map(|p| p.value.clone()), - Some(PaletteObject::Jetpalette(_)) => Some("Jet".to_string()), - Some(PaletteObject::Palette(p)) => p - .properties - .iter() - .find(|p| p.name == "name") - .map(|p| p.value.clone()), - None => None, - } -} - -/// Get a constant color for a layer without color factors -/// -/// Uses the layer's palette at the layer index to determine the color. -fn get_constant_color_for_layer(layer_idx: usize, palette_name: Option<&str>) -> [u8; 3] { - use super::palettes::{DEFAULT_CATEGORICAL_PALETTE, PALETTE_REGISTRY}; - - let effective_name = palette_name.unwrap_or(DEFAULT_CATEGORICAL_PALETTE); - - let color = PALETTE_REGISTRY - .get(effective_name) - .map(|p| p.get_color(layer_idx)) - .unwrap_or_else(|| { - // Fallback to default palette if named palette not found - PALETTE_REGISTRY - .get(DEFAULT_CATEGORICAL_PALETTE) - .map(|p| p.get_color(layer_idx)) - .unwrap_or([128, 128, 128]) - }); - - eprintln!( - "DEBUG get_constant_color_for_layer: layer {} using palette '{}' -> RGB({},{},{})", - layer_idx, effective_name, color[0], color[1], color[2] - ); - - color -} - -/// Extract color information for each layer from a workflow step -/// -/// Navigates to step.model.axis.xyAxis[i].colors for each layer and extracts: -/// - For layers with color factors: Continuous or Categorical config -/// - For layers without color factors: Constant color from layer's palette at layer_idx -/// -/// Returns a PerLayerColorConfig with a LayerColorConfig for every layer. -pub fn extract_per_layer_color_info( - workflow: &proto::Workflow, - step_id: &str, - color_table_ids: &[Option], -) -> Result { - // Find the step by ID - let step = workflow - .steps - .iter() - .find(|s| { - if let Some(proto::e_step::Object::Datastep(ds)) = &s.object { - ds.id == step_id - } else { - false - } - }) - .ok_or_else(|| TercenError::Data(format!("Step '{}' not found in workflow", step_id)))?; - - // Extract DataStep - let data_step = match &step.object { - Some(proto::e_step::Object::Datastep(ds)) => ds, - _ => return Err(TercenError::Data("Step is not a DataStep".to_string())), - }; - - // Navigate to model.axis.xyAxis - let model = data_step - .model - .as_ref() - .ok_or_else(|| TercenError::Data("DataStep has no model".to_string()))?; - - let axis = model - .axis - .as_ref() - .ok_or_else(|| TercenError::Data("Model has no axis".to_string()))?; - - let n_layers = axis.xy_axis.len(); - eprintln!( - "DEBUG extract_per_layer_color_info: Found {} layers (xyAxis entries)", - n_layers - ); - - let mut layer_configs: Vec = Vec::with_capacity(n_layers); - - for (layer_idx, xy_axis) in axis.xy_axis.iter().enumerate() { - // Extract colors object for this layer - let colors = match &xy_axis.colors { - Some(c) => c, - None => { - // No colors object - use constant gray - eprintln!( - "DEBUG extract_per_layer_color_info: Layer {} has no colors object, using gray", - layer_idx - ); - layer_configs.push(LayerColorConfig::Constant { - color: [128, 128, 128], - }); - continue; - } - }; - - // Check if this layer has color factors - if colors.factors.is_empty() { - // No color factors - extract palette name and compute constant color - let palette_name = colors - .palette - .as_ref() - .and_then(extract_palette_name_from_epalette); - - eprintln!( - "DEBUG extract_per_layer_color_info: Layer {} has no color factors, palette='{}', using constant color", - layer_idx, - palette_name.as_deref().unwrap_or("(none)") - ); - - let color = get_constant_color_for_layer(layer_idx, palette_name.as_deref()); - layer_configs.push(LayerColorConfig::Constant { color }); - continue; - } - - // Layer has color factors - extract mapping - eprintln!( - "DEBUG extract_per_layer_color_info: Layer {} has {} color factors", - layer_idx, - colors.factors.len() - ); - - // For now, only use the first color factor per layer - let factor = &colors.factors[0]; - eprintln!( - "DEBUG extract_per_layer_color_info: Layer {} factor: name='{}', type='{}'", - layer_idx, factor.name, factor.r#type - ); - - // Parse the palette/mapping - let mapping = match &colors.palette { - Some(p) => parse_palette(p)?, - None => { - // Has factors but no palette - use constant color as fallback - eprintln!( - "DEBUG extract_per_layer_color_info: Layer {} has factors but no palette, using constant color", - layer_idx - ); - let color = get_constant_color_for_layer(layer_idx, None); - layer_configs.push(LayerColorConfig::Constant { color }); - continue; - } - }; - - // Get the color table ID for this factor (if available) - let color_table_id = color_table_ids.first().and_then(|opt| opt.clone()); - - // Create appropriate LayerColorConfig based on mapping type - let config = match mapping { - ColorMapping::Continuous(palette) => { - eprintln!( - "DEBUG extract_per_layer_color_info: Layer {} has continuous palette with {} stops", - layer_idx, - palette.stops.len() - ); - LayerColorConfig::Continuous { - palette, - factor_name: factor.name.clone(), - quartiles: None, - color_table_id, - } - } - ColorMapping::Categorical(color_map) => { - eprintln!( - "DEBUG extract_per_layer_color_info: Layer {} has categorical palette with {} categories", - layer_idx, - color_map.mappings.len() - ); - LayerColorConfig::Categorical { - color_map, - factor_name: factor.name.clone(), - color_table_id, - } - } - }; - - layer_configs.push(config); - } - - let config = PerLayerColorConfig { - layer_configs, - n_layers, - }; - - eprintln!( - "DEBUG extract_per_layer_color_info: Config - has_explicit={}, has_constant={}, is_mixed={}", - config.has_explicit_colors(), - config.has_constant_colors(), - config.is_mixed() - ); - - Ok(config) -} - -/// Extract the crosstab palette name for layer coloring -/// -/// Returns the palette name from the crosstab's color configuration, even when -/// there are no color factors. This is used for layer-based coloring. -/// -/// The palette name is extracted from: -/// - CategoryPalette.colorList.name -/// - RampPalette properties (name="name") -/// - JetPalette defaults to "Jet" -/// -/// Returns None if no palette is configured. -pub fn extract_crosstab_palette_name(workflow: &proto::Workflow, step_id: &str) -> Option { - use proto::e_palette::Object as PaletteObject; - use proto::e_step::Object as StepObject; - - // Find the step - let step = workflow.steps.iter().find_map(|e_step| { - if let Some(StepObject::Datastep(ds)) = &e_step.object { - if ds.id == step_id { - return Some(ds); - } - } - None - })?; - - // Navigate to xyAxis.colors.palette - let model = step.model.as_ref()?; - let axis = model.axis.as_ref()?; - let xy_axis = axis.xy_axis.first()?; - let colors = xy_axis.colors.as_ref()?; - let palette = colors.palette.as_ref()?; - let palette_obj = palette.object.as_ref()?; - - // Extract palette name based on type - // Priority: colorList.name, then properties["name"] - let name = match palette_obj { - PaletteObject::Categorypalette(cat) => { - // First try colorList.name - let cl_name = cat.color_list.as_ref().and_then(|cl| { - if !cl.name.is_empty() { - Some(cl.name.clone()) - } else { - None - } - }); - // Fallback to properties["name"] - cl_name.or_else(|| { - cat.properties - .iter() - .find(|p| p.name == "name") - .map(|p| p.value.clone()) - }) - } - PaletteObject::Ramppalette(ramp) => ramp - .properties - .iter() - .find(|p| p.name == "name") - .map(|p| p.value.clone()), - PaletteObject::Jetpalette(_) => Some("Jet".to_string()), - PaletteObject::Palette(_) => None, - }; - - // Debug: Print available properties for CategoryPalette - if let PaletteObject::Categorypalette(cat) = palette_obj { - eprintln!( - "DEBUG extract_crosstab_palette_name: CategoryPalette properties: {:?}", - cat.properties - .iter() - .map(|p| format!("{}={}", p.name, p.value)) - .collect::>() - ); - if let Some(cl) = &cat.color_list { - eprintln!( - "DEBUG extract_crosstab_palette_name: colorList.name='{}'", - cl.name - ); - } - } - - eprintln!( - "DEBUG extract_crosstab_palette_name: palette type={}, name={:?}", - match palette_obj { - PaletteObject::Categorypalette(_) => "Category", - PaletteObject::Ramppalette(_) => "Ramp", - PaletteObject::Jetpalette(_) => "Jet", - PaletteObject::Palette(_) => "Base", - }, - name - ); - - name -} - -/// Interpolate a color value using the palette -/// -/// Uses linear interpolation between the surrounding color stops. -/// Values outside the palette range clamp to the min/max colors. -pub fn interpolate_color(value: f64, palette: &ColorPalette) -> [u8; 3] { - if palette.stops.is_empty() { - return [128, 128, 128]; // Gray default - } - - let stops = &palette.stops; - - // Clamp to min - if value <= stops.first().unwrap().value { - return stops.first().unwrap().color; - } - - // Clamp to max - if value >= stops.last().unwrap().value { - return stops.last().unwrap().color; - } - - // Find surrounding stops using binary search - let idx = stops.partition_point(|stop| stop.value < value); - let lower = &stops[idx - 1]; - let upper = &stops[idx]; - - // Linear interpolation - let t = (value - lower.value) / (upper.value - lower.value); - [ - (lower.color[0] as f64 * (1.0 - t) + upper.color[0] as f64 * t) as u8, - (lower.color[1] as f64 * (1.0 - t) + upper.color[1] as f64 * t) as u8, - (lower.color[2] as f64 * (1.0 - t) + upper.color[2] as f64 * t) as u8, - ] -} - -/// Extract point size from workflow step -/// -/// Returns the pointSize from the chart configuration (1-10 scale from UI). -/// Returns None if not found, in which case the caller should use a default. -pub fn extract_point_size_from_step( - workflow: &proto::Workflow, - step_id: &str, -) -> Result> { - // Find the step - let step = workflow - .steps - .iter() - .find(|s| { - if let Some(proto::e_step::Object::Datastep(ds)) = &s.object { - ds.id == step_id - } else { - false - } - }) - .ok_or_else(|| TercenError::Data(format!("Step '{}' not found in workflow", step_id)))?; - - // Extract DataStep - let data_step = match &step.object { - Some(proto::e_step::Object::Datastep(ds)) => ds, - _ => return Err(TercenError::Data("Step is not a DataStep".to_string())), - }; - - // Navigate to model.axis.xyAxis - let model = match data_step.model.as_ref() { - Some(m) => m, - None => return Ok(None), // No model, use default - }; - - let axis = match model.axis.as_ref() { - Some(a) => a, - None => return Ok(None), // No axis, use default - }; - - // Get first xyAxis - let xy_axis = match axis.xy_axis.first() { - Some(xy) => xy, - None => return Ok(None), // No xyAxis, use default - }; - - // Extract pointSize from chart - let chart = match xy_axis.chart.as_ref() { - Some(c) => c, - None => return Ok(None), // No chart, use default - }; - - // Check the chart type and extract pointSize - let point_size = match &chart.object { - Some(proto::e_chart::Object::Chartpoint(cp)) => Some(cp.point_size), - Some(proto::e_chart::Object::Chartline(cl)) => Some(cl.point_size), - _ => None, // Other chart types don't have pointSize - }; - - eprintln!( - "DEBUG extract_point_size: Found pointSize = {:?}", - point_size - ); - - Ok(point_size) -} - -/// Chart type variants supported by Tercen -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] -pub enum ChartKind { - /// Scatter plot (default) - #[default] - Point, - /// Heatmap (tile-based visualization) - Heatmap, - /// Line plot - Line, - /// Bar chart - Bar, -} - -/// Extract chart type from workflow step -/// -/// Navigates: workflow.steps[step_id].model.axis.xyAxis[0].chart.object -/// Returns ChartKind based on the EChart variant. -pub fn extract_chart_kind_from_step( - workflow: &proto::Workflow, - step_id: &str, -) -> Result { - // Find the step - check both DataStep and CrossTabStep - let step = workflow - .steps - .iter() - .find(|s| match &s.object { - Some(proto::e_step::Object::Datastep(ds)) => ds.id == step_id, - Some(proto::e_step::Object::Crosstabstep(cs)) => cs.id == step_id, - _ => false, - }) - .ok_or_else(|| TercenError::Data(format!("Step '{}' not found in workflow", step_id)))?; - - // Extract the Crosstab model (both DataStep and CrossTabStep have it) - let model = match &step.object { - Some(proto::e_step::Object::Datastep(ds)) => ds.model.as_ref(), - Some(proto::e_step::Object::Crosstabstep(cs)) => cs.model.as_ref(), - _ => { - return Err(TercenError::Data( - "Step type does not have a model".to_string(), - )) - } - } - .ok_or_else(|| TercenError::Data("Step has no model".to_string()))?; - - // Navigate to model.axis.xyAxis - let axis = match model.axis.as_ref() { - Some(a) => a, - None => { - eprintln!("DEBUG extract_chart_kind: No axis in model, defaulting to Point"); - return Ok(ChartKind::Point); - } - }; - - // Get first xyAxis - let xy_axis = match axis.xy_axis.first() { - Some(xy) => xy, - None => { - eprintln!("DEBUG extract_chart_kind: No xyAxis, defaulting to Point"); - return Ok(ChartKind::Point); - } - }; - - // Extract chart type from EChart - let chart = match xy_axis.chart.as_ref() { - Some(c) => c, - None => { - eprintln!("DEBUG extract_chart_kind: No chart in xyAxis, defaulting to Point"); - return Ok(ChartKind::Point); - } - }; - - // Map EChart variant to ChartKind - let chart_kind = match &chart.object { - Some(proto::e_chart::Object::Chartpoint(_)) => ChartKind::Point, - Some(proto::e_chart::Object::Chartheatmap(_)) => ChartKind::Heatmap, - Some(proto::e_chart::Object::Chartline(_)) => ChartKind::Line, - Some(proto::e_chart::Object::Chartbar(_)) => ChartKind::Bar, - Some(proto::e_chart::Object::Chart(_)) => ChartKind::Point, // Generic chart defaults to point - Some(proto::e_chart::Object::Chartsize(_)) => ChartKind::Point, // Size chart treated as point - None => ChartKind::Point, - }; - - eprintln!( - "DEBUG extract_chart_kind: Found chart type = {:?}", - chart_kind - ); - - Ok(chart_kind) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_int_to_rgb() { - // White: 0xFFFFFFFF - assert_eq!(int_to_rgb(-1), [255, 255, 255]); - - // Red: 0x00FF0000 - assert_eq!(int_to_rgb(0x00FF0000u32 as i32), [255, 0, 0]); - - // Green: 0x0000FF00 - assert_eq!(int_to_rgb(0x0000FF00u32 as i32), [0, 255, 0]); - - // Blue: 0x000000FF - assert_eq!(int_to_rgb(0x000000FFu32 as i32), [0, 0, 255]); - - // Gray: 0x00808080 - assert_eq!(int_to_rgb(0x00808080u32 as i32), [128, 128, 128]); - } - - #[test] - fn test_palette_add_stop() { - let mut palette = ColorPalette::new(); - palette.add_stop(0.0, [0, 0, 0]); - palette.add_stop(100.0, [255, 255, 255]); - palette.add_stop(50.0, [128, 128, 128]); - - assert_eq!(palette.stops.len(), 3); - assert_eq!(palette.stops[0].value, 0.0); - assert_eq!(palette.stops[1].value, 50.0); - assert_eq!(palette.stops[2].value, 100.0); - assert!(palette.is_user_defined); // Default is true - } - - #[test] - fn test_palette_rescale_from_quartiles() { - // Create a palette with stops at 0, 50, 100 - let mut palette = ColorPalette::new(); - palette.is_user_defined = false; - palette.add_stop(0.0, [0, 0, 255]); // Blue at min - palette.add_stop(50.0, [255, 255, 255]); // White at middle - palette.add_stop(100.0, [255, 0, 0]); // Red at max - - // Quartiles: Q1=40, Q2=50, Q3=60 - // IQR = 60 - 40 = 20 - // new_min = 50 - 1.5 * 20 = 20 - // new_max = 50 + 1.5 * 20 = 80 - let quartiles = vec!["40".to_string(), "50".to_string(), "60".to_string()]; - let rescaled = palette.rescale_from_quartiles(&quartiles); - - // Check that the stops have been rescaled - assert_eq!(rescaled.stops.len(), 3); - assert!((rescaled.stops[0].value - 20.0).abs() < 0.001); // min -> 20 - assert!((rescaled.stops[1].value - 50.0).abs() < 0.001); // middle -> 50 - assert!((rescaled.stops[2].value - 80.0).abs() < 0.001); // max -> 80 - - // Colors should be preserved - assert_eq!(rescaled.stops[0].color, [0, 0, 255]); - assert_eq!(rescaled.stops[1].color, [255, 255, 255]); - assert_eq!(rescaled.stops[2].color, [255, 0, 0]); - } - - #[test] - fn test_interpolate_color_edge_cases() { - let mut palette = ColorPalette::new(); - palette.add_stop(0.0, [0, 0, 0]); - palette.add_stop(100.0, [255, 255, 255]); - - // Below min - clamps to first color - assert_eq!(interpolate_color(-10.0, &palette), [0, 0, 0]); - - // At min - assert_eq!(interpolate_color(0.0, &palette), [0, 0, 0]); - - // At max - assert_eq!(interpolate_color(100.0, &palette), [255, 255, 255]); - - // Above max - clamps to last color - assert_eq!(interpolate_color(110.0, &palette), [255, 255, 255]); - } - - #[test] - fn test_interpolate_color_midpoint() { - let mut palette = ColorPalette::new(); - palette.add_stop(0.0, [0, 0, 0]); - palette.add_stop(100.0, [100, 200, 255]); - - // Midpoint - let mid = interpolate_color(50.0, &palette); - assert_eq!(mid, [50, 100, 127]); // (0+100)/2, (0+200)/2, (0+255)/2 rounded - } - - #[test] - fn test_palette_range() { - let mut palette = ColorPalette::new(); - assert_eq!(palette.range(), None); - - palette.add_stop(10.0, [0, 0, 0]); - palette.add_stop(50.0, [255, 255, 255]); - - assert_eq!(palette.range(), Some((10.0, 50.0))); - } -} diff --git a/src/tercen/context/base.rs b/src/tercen/context/base.rs deleted file mode 100644 index bd81af8..0000000 --- a/src/tercen/context/base.rs +++ /dev/null @@ -1,533 +0,0 @@ -//! ContextBase - Common implementation for TercenContext -//! -//! Contains all shared fields and methods used by both ProductionContext and DevContext. -//! The specific contexts wrap this struct and provide different constructors. - -use crate::tercen::client::proto::{CubeQuery, ETask, OperatorSettings}; -use crate::tercen::colors::{ChartKind, ColorInfo, PerLayerColorConfig}; -use crate::tercen::result::PlotResult; -use crate::tercen::table::{SchemaCache, TableStreamer}; -use crate::tercen::TercenClient; -use ggrs_core::data::DataFrame; -use std::sync::Arc; - -/// Common base containing all TercenContext data -/// -/// Both ProductionContext and DevContext wrap this struct using the newtype pattern. -/// This eliminates field duplication and provides a single place for common methods. -pub struct ContextBase { - // Core - pub(super) client: Arc, - pub(super) cube_query: CubeQuery, - pub(super) schema_ids: Vec, - - // Identifiers - pub(super) workflow_id: String, - pub(super) step_id: String, - pub(super) project_id: String, - pub(super) namespace: String, - - // Configuration - pub(super) operator_settings: Option, - pub(super) color_infos: Vec, - pub(super) page_factors: Vec, - - // Axis tables - pub(super) y_axis_table_id: Option, - pub(super) x_axis_table_id: Option, - - // UI settings - pub(super) point_size: Option, - pub(super) chart_kind: ChartKind, - pub(super) crosstab_dimensions: Option<(i32, i32)>, - - // Transforms - pub(super) y_transform: Option, - pub(super) x_transform: Option, - - // Layer coloring - /// Palette name from crosstab for layer-based coloring (when no color factors) - pub(super) layer_palette_name: Option, - - /// Per-layer color configuration (for mixed-layer scenarios) - pub(super) per_layer_colors: Option, - - /// Y-axis factor names per layer (for legend entries) - pub(super) layer_y_factor_names: Vec, -} - -impl ContextBase { - // === Getters (used by TercenContext trait implementations) === - - pub fn cube_query(&self) -> &CubeQuery { - &self.cube_query - } - - pub fn schema_ids(&self) -> &[String] { - &self.schema_ids - } - - pub fn workflow_id(&self) -> &str { - &self.workflow_id - } - - pub fn step_id(&self) -> &str { - &self.step_id - } - - pub fn project_id(&self) -> &str { - &self.project_id - } - - pub fn namespace(&self) -> &str { - &self.namespace - } - - pub fn operator_settings(&self) -> Option<&OperatorSettings> { - self.operator_settings.as_ref() - } - - pub fn color_infos(&self) -> &[ColorInfo] { - &self.color_infos - } - - pub fn page_factors(&self) -> &[String] { - &self.page_factors - } - - pub fn y_axis_table_id(&self) -> Option<&str> { - self.y_axis_table_id.as_deref() - } - - pub fn x_axis_table_id(&self) -> Option<&str> { - self.x_axis_table_id.as_deref() - } - - pub fn point_size(&self) -> Option { - self.point_size - } - - pub fn chart_kind(&self) -> ChartKind { - self.chart_kind - } - - pub fn crosstab_dimensions(&self) -> Option<(i32, i32)> { - self.crosstab_dimensions - } - - pub fn y_transform(&self) -> Option<&str> { - self.y_transform.as_deref() - } - - pub fn x_transform(&self) -> Option<&str> { - self.x_transform.as_deref() - } - - pub fn layer_palette_name(&self) -> Option<&str> { - self.layer_palette_name.as_deref() - } - - pub fn per_layer_colors(&self) -> Option<&PerLayerColorConfig> { - self.per_layer_colors.as_ref() - } - - pub fn layer_y_factor_names(&self) -> &[String] { - &self.layer_y_factor_names - } - - pub fn client(&self) -> &Arc { - &self.client - } - - // === Convenience methods (same as trait defaults but available directly) === - - pub fn qt_hash(&self) -> &str { - &self.cube_query.qt_hash - } - - pub fn column_hash(&self) -> &str { - &self.cube_query.column_hash - } - - pub fn row_hash(&self) -> &str { - &self.cube_query.row_hash - } - - // === Table Streamer Factory === - - /// Create a TableStreamer for accessing Tercen tables - pub fn streamer(&self) -> TableStreamer<'_> { - TableStreamer::new(&self.client) - } - - /// Create a TableStreamer with schema caching for multi-page plots - pub fn streamer_with_cache(&self, cache: SchemaCache) -> TableStreamer<'_> { - TableStreamer::with_cache(&self.client, cache) - } - - // === Data Access Methods (async) === - - /// Fetch data from the main table (qt_hash) - /// - /// # Arguments - /// * `columns` - Optional list of column names to fetch (None = all columns) - /// * `offset` - Number of rows to skip - /// * `limit` - Maximum number of rows to fetch - pub async fn select( - &self, - columns: Option>, - offset: i64, - limit: i64, - ) -> Result> { - let streamer = self.streamer(); - let tson_data = streamer - .stream_tson(&self.cube_query.qt_hash, columns, offset, limit) - .await?; - let df = crate::tercen::tson_to_dataframe(&tson_data)?; - Ok(df) - } - - /// Fetch data from the column facet table (column_hash) - /// - /// # Arguments - /// * `columns` - Optional list of column names to fetch (None = all columns) - pub async fn cselect( - &self, - columns: Option>, - ) -> Result> { - if self.cube_query.column_hash.is_empty() { - return Ok(DataFrame::new()); - } - let streamer = self.streamer(); - let tson_data = streamer - .stream_tson(&self.cube_query.column_hash, columns, 0, -1) - .await?; - let df = crate::tercen::tson_to_dataframe(&tson_data)?; - Ok(df) - } - - /// Fetch data from the row facet table (row_hash) - /// - /// # Arguments - /// * `columns` - Optional list of column names to fetch (None = all columns) - pub async fn rselect( - &self, - columns: Option>, - ) -> Result> { - if self.cube_query.row_hash.is_empty() { - return Ok(DataFrame::new()); - } - let streamer = self.streamer(); - let tson_data = streamer - .stream_tson(&self.cube_query.row_hash, columns, 0, -1) - .await?; - let df = crate::tercen::tson_to_dataframe(&tson_data)?; - Ok(df) - } - - // === Column Name Methods (async) === - - /// Get column names from the main table schema - pub async fn names(&self) -> Result, Box> { - self.get_table_column_names(&self.cube_query.qt_hash).await - } - - /// Get column names from the column facet table schema - pub async fn cnames(&self) -> Result, Box> { - if self.cube_query.column_hash.is_empty() { - return Ok(Vec::new()); - } - self.get_table_column_names(&self.cube_query.column_hash) - .await - } - - /// Get column names from the row facet table schema - pub async fn rnames(&self) -> Result, Box> { - if self.cube_query.row_hash.is_empty() { - return Ok(Vec::new()); - } - self.get_table_column_names(&self.cube_query.row_hash).await - } - - /// Internal helper to get column names from a table schema - async fn get_table_column_names( - &self, - table_id: &str, - ) -> Result, Box> { - let streamer = self.streamer(); - let schema = streamer.get_schema(table_id).await?; - - // Extract column names from schema - use crate::tercen::client::proto::{e_column_schema, e_schema}; - - // Helper to extract name from EColumnSchema - let extract_name = |c: &crate::tercen::client::proto::EColumnSchema| -> Option { - if let Some(e_column_schema::Object::Columnschema(cs)) = &c.object { - Some(cs.name.clone()) - } else { - None - } - }; - - let names = match schema.object.as_ref() { - Some(e_schema::Object::Schema(s)) => { - s.columns.iter().filter_map(extract_name).collect() - } - Some(e_schema::Object::Cubequerytableschema(cqts)) => { - cqts.columns.iter().filter_map(extract_name).collect() - } - Some(e_schema::Object::Computedtableschema(cts)) => { - cts.columns.iter().filter_map(extract_name).collect() - } - Some(e_schema::Object::Tableschema(ts)) => { - ts.columns.iter().filter_map(extract_name).collect() - } - None => Vec::new(), - }; - Ok(names) - } - - // === Result Save Methods (async) === - - /// Save a single PNG plot result back to Tercen - /// - /// # Arguments - /// * `png_buffer` - Raw PNG bytes from the renderer - /// * `width` - Plot width in pixels - /// * `height` - Plot height in pixels - /// * `task` - Mutable reference to the task - pub async fn save_result( - &self, - png_buffer: Vec, - width: i32, - height: i32, - output_ext: &str, - filename: &str, - task: &mut ETask, - ) -> Result<(), Box> { - crate::tercen::result::save_result( - Arc::clone(&self.client), - &self.project_id, - &self.namespace, - png_buffer, - width, - height, - output_ext, - filename, - task, - ) - .await - } - - /// Save multiple PNG plot results back to Tercen (multi-page) - /// - /// # Arguments - /// * `plots` - Vector of PlotResult structs - /// * `task` - Mutable reference to the task - pub async fn save_results( - &self, - plots: Vec, - task: &mut ETask, - ) -> Result<(), Box> { - crate::tercen::result::save_results( - Arc::clone(&self.client), - &self.project_id, - &self.namespace, - plots, - task, - ) - .await - } -} - -/// Builder for constructing ContextBase -/// -/// Used by ProductionContext and DevContext constructors to build the common base. -pub struct ContextBaseBuilder { - client: Option>, - cube_query: Option, - schema_ids: Vec, - workflow_id: String, - step_id: String, - project_id: String, - namespace: String, - operator_settings: Option, - color_infos: Vec, - page_factors: Vec, - y_axis_table_id: Option, - x_axis_table_id: Option, - point_size: Option, - chart_kind: ChartKind, - crosstab_dimensions: Option<(i32, i32)>, - y_transform: Option, - x_transform: Option, - layer_palette_name: Option, - per_layer_colors: Option, - layer_y_factor_names: Vec, -} - -impl Default for ContextBaseBuilder { - fn default() -> Self { - Self::new() - } -} - -impl ContextBaseBuilder { - pub fn new() -> Self { - Self { - client: None, - cube_query: None, - schema_ids: Vec::new(), - workflow_id: String::new(), - step_id: String::new(), - project_id: String::new(), - namespace: String::new(), - operator_settings: None, - color_infos: Vec::new(), - page_factors: Vec::new(), - y_axis_table_id: None, - x_axis_table_id: None, - point_size: None, - chart_kind: ChartKind::Point, - crosstab_dimensions: None, - y_transform: None, - x_transform: None, - layer_palette_name: None, - per_layer_colors: None, - layer_y_factor_names: Vec::new(), - } - } - - pub fn client(mut self, client: Arc) -> Self { - self.client = Some(client); - self - } - - pub fn cube_query(mut self, cube_query: CubeQuery) -> Self { - self.cube_query = Some(cube_query); - self - } - - pub fn schema_ids(mut self, schema_ids: Vec) -> Self { - self.schema_ids = schema_ids; - self - } - - pub fn workflow_id(mut self, workflow_id: String) -> Self { - self.workflow_id = workflow_id; - self - } - - pub fn step_id(mut self, step_id: String) -> Self { - self.step_id = step_id; - self - } - - pub fn project_id(mut self, project_id: String) -> Self { - self.project_id = project_id; - self - } - - pub fn namespace(mut self, namespace: String) -> Self { - self.namespace = namespace; - self - } - - pub fn operator_settings(mut self, operator_settings: Option) -> Self { - self.operator_settings = operator_settings; - self - } - - pub fn color_infos(mut self, color_infos: Vec) -> Self { - self.color_infos = color_infos; - self - } - - pub fn page_factors(mut self, page_factors: Vec) -> Self { - self.page_factors = page_factors; - self - } - - pub fn y_axis_table_id(mut self, y_axis_table_id: Option) -> Self { - self.y_axis_table_id = y_axis_table_id; - self - } - - pub fn x_axis_table_id(mut self, x_axis_table_id: Option) -> Self { - self.x_axis_table_id = x_axis_table_id; - self - } - - pub fn point_size(mut self, point_size: Option) -> Self { - self.point_size = point_size; - self - } - - pub fn chart_kind(mut self, chart_kind: ChartKind) -> Self { - self.chart_kind = chart_kind; - self - } - - pub fn crosstab_dimensions(mut self, crosstab_dimensions: Option<(i32, i32)>) -> Self { - self.crosstab_dimensions = crosstab_dimensions; - self - } - - pub fn y_transform(mut self, y_transform: Option) -> Self { - self.y_transform = y_transform; - self - } - - pub fn x_transform(mut self, x_transform: Option) -> Self { - self.x_transform = x_transform; - self - } - - pub fn layer_palette_name(mut self, layer_palette_name: Option) -> Self { - self.layer_palette_name = layer_palette_name; - self - } - - pub fn per_layer_colors(mut self, per_layer_colors: Option) -> Self { - self.per_layer_colors = per_layer_colors; - self - } - - pub fn layer_y_factor_names(mut self, names: Vec) -> Self { - self.layer_y_factor_names = names; - self - } - - /// Build the ContextBase, returning an error if required fields are missing - pub fn build(self) -> Result> { - let client = self - .client - .ok_or("ContextBaseBuilder: client is required")?; - let cube_query = self - .cube_query - .ok_or("ContextBaseBuilder: cube_query is required")?; - - Ok(ContextBase { - client, - cube_query, - schema_ids: self.schema_ids, - workflow_id: self.workflow_id, - step_id: self.step_id, - project_id: self.project_id, - namespace: self.namespace, - operator_settings: self.operator_settings, - color_infos: self.color_infos, - page_factors: self.page_factors, - y_axis_table_id: self.y_axis_table_id, - x_axis_table_id: self.x_axis_table_id, - point_size: self.point_size, - chart_kind: self.chart_kind, - crosstab_dimensions: self.crosstab_dimensions, - y_transform: self.y_transform, - x_transform: self.x_transform, - layer_palette_name: self.layer_palette_name, - per_layer_colors: self.per_layer_colors, - layer_y_factor_names: self.layer_y_factor_names, - }) - } -} diff --git a/src/tercen/context/dev_context.rs b/src/tercen/context/dev_context.rs deleted file mode 100644 index 1cbf1a2..0000000 --- a/src/tercen/context/dev_context.rs +++ /dev/null @@ -1,370 +0,0 @@ -//! DevContext - TercenContext implementation for development/testing mode -//! -//! Initialized from workflow_id + step_id, fetches data from workflow structure. -//! This mirrors Python's OperatorContextDev. - -use super::base::{ContextBase, ContextBaseBuilder}; -use super::TercenContext; -use crate::tercen::client::proto::{CubeQuery, OperatorSettings}; -use crate::tercen::colors::{ChartKind, ColorInfo}; -use crate::tercen::TercenClient; -use std::ops::Deref; -use std::sync::Arc; - -/// Development context initialized from workflow_id + step_id -/// -/// This is used for local testing when we don't have a task_id. -/// Wraps ContextBase using the newtype pattern. -pub struct DevContext(ContextBase); - -impl Deref for DevContext { - type Target = ContextBase; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl DevContext { - /// Create a new DevContext from workflow_id and step_id - /// - /// This fetches the workflow, finds the step, and extracts the CubeQuery - /// either from the step's model.task_id or by calling getCubeQuery. - pub async fn from_workflow_step( - client: Arc, - workflow_id: &str, - step_id: &str, - ) -> Result> { - use crate::tercen::client::proto::{e_step, e_task, e_workflow, GetRequest}; - - println!("[DevContext] Fetching workflow {}...", workflow_id); - - // Fetch workflow - let mut workflow_service = client.workflow_service()?; - let request = tonic::Request::new(GetRequest { - id: workflow_id.to_string(), - ..Default::default() - }); - let response = workflow_service.get(request).await?; - let e_workflow = response.into_inner(); - - let workflow = match e_workflow.object { - Some(e_workflow::Object::Workflow(wf)) => wf, - _ => return Err("No workflow object".into()), - }; - - println!("[DevContext] Workflow name: {}", workflow.name); - - // Find the DataStep - let data_step = workflow - .steps - .iter() - .find_map(|e_step| { - if let Some(e_step::Object::Datastep(ds)) = &e_step.object { - if ds.id == step_id { - return Some(ds.clone()); - } - } - None - }) - .ok_or_else(|| format!("DataStep {} not found in workflow", step_id))?; - - println!("[DevContext] Step name: {}", data_step.name); - - // Get task_id from model (if exists) - let task_id = data_step - .model - .as_ref() - .map(|m| m.task_id.clone()) - .unwrap_or_default(); - - println!("[DevContext] Model task_id: '{}'", task_id); - - // Get CubeQuery and schema_ids - let (cube_query, schema_ids, project_id) = if task_id.is_empty() { - // No task_id - call getCubeQuery - println!("[DevContext] Calling getCubeQuery..."); - let mut workflow_service = client.workflow_service()?; - let request = tonic::Request::new(crate::tercen::client::proto::ReqGetCubeQuery { - workflow_id: workflow_id.to_string(), - step_id: step_id.to_string(), - }); - let response = workflow_service.get_cube_query(request).await?; - let resp = response.into_inner(); - let query = resp.result.ok_or("getCubeQuery returned no result")?; - - // getCubeQuery doesn't return schema_ids, so we can't get Y-axis/color tables this way - // We'll have to leave schema_ids empty - (query, Vec::new(), String::new()) - } else { - // Retrieve task to get CubeQuery and schema_ids - println!("[DevContext] Retrieving task {}...", task_id); - let mut task_service = client.task_service()?; - let request = tonic::Request::new(GetRequest { - id: task_id.clone(), - ..Default::default() - }); - let response = task_service.get(request).await?; - let task = response.into_inner(); - - match task.object.as_ref() { - Some(e_task::Object::Cubequerytask(cqt)) => { - let query = cqt.query.as_ref().ok_or("CubeQueryTask has no query")?; - ( - query.clone(), - cqt.schema_ids.clone(), - cqt.project_id.clone(), - ) - } - Some(e_task::Object::Computationtask(ct)) => { - let query = ct.query.as_ref().ok_or("ComputationTask has no query")?; - (query.clone(), ct.schema_ids.clone(), ct.project_id.clone()) - } - Some(e_task::Object::Runcomputationtask(rct)) => { - let query = rct - .query - .as_ref() - .ok_or("RunComputationTask has no query")?; - ( - query.clone(), - rct.schema_ids.clone(), - rct.project_id.clone(), - ) - } - _ => return Err("Task is not a query task".into()), - } - }; - - println!("[DevContext] CubeQuery retrieved"); - println!("[DevContext] qt_hash: {}", cube_query.qt_hash); - println!("[DevContext] column_hash: {}", cube_query.column_hash); - println!("[DevContext] row_hash: {}", cube_query.row_hash); - println!( - "[DevContext] axis_queries count: {}", - cube_query.axis_queries.len() - ); - for (i, aq) in cube_query.axis_queries.iter().enumerate() { - println!( - "[DevContext] axis_queries[{}]: chart_type='{}', point_size={}, colors={:?}", - i, - aq.chart_type, - aq.point_size, - aq.colors.iter().map(|f| &f.name).collect::>() - ); - } - - // Extract operator settings and namespace from cube_query - let operator_settings = cube_query.operator_settings.clone(); - let namespace = operator_settings - .as_ref() - .map(|os| os.namespace.clone()) - .unwrap_or_default(); - - // Find Y-axis table - let y_axis_table_id = if !schema_ids.is_empty() { - super::helpers::find_y_axis_table(&client, &schema_ids, &cube_query, "DevContext") - .await? - } else { - None - }; - - // Find X-axis table - let x_axis_table_id = if !schema_ids.is_empty() { - super::helpers::find_x_axis_table(&client, &schema_ids, &cube_query, "DevContext") - .await? - } else { - None - }; - - // Extract per-layer color information - let per_layer_colors = super::helpers::extract_per_layer_color_info_from_workflow( - &client, - &schema_ids, - &workflow, - step_id, - "DevContext", - ) - .await?; - - // Also extract legacy color_infos for backwards compatibility - let color_infos = super::helpers::extract_color_info_from_workflow( - &client, - &schema_ids, - &workflow, - step_id, - "DevContext", - ) - .await?; - - // Extract page factors - let page_factors = crate::tercen::extract_page_factors(operator_settings.as_ref()); - - // Extract point size from workflow step - let point_size = match crate::tercen::extract_point_size_from_step(&workflow, step_id) { - Ok(ps) => ps, - Err(e) => { - eprintln!("[DevContext] Failed to extract point_size: {}", e); - None - } - }; - - // Extract chart kind from workflow step - let chart_kind = match crate::tercen::extract_chart_kind_from_step(&workflow, step_id) { - Ok(ck) => { - println!("[DevContext] Chart kind: {:?}", ck); - ck - } - Err(e) => { - eprintln!("[DevContext] Failed to extract chart_kind: {}", e); - ChartKind::Point - } - }; - - // Extract crosstab dimensions from step model - let crosstab_dimensions = super::helpers::extract_crosstab_dimensions(&workflow, step_id); - if let Some((w, h)) = crosstab_dimensions { - println!("[DevContext] Crosstab dimensions: {}×{} pixels", w, h); - } - - // Extract axis transforms from Crosstab model (not CubeQuery) - // Transforms are in step.model.axis.xyAxis[0].preprocessors - let (y_transform, x_transform) = - super::helpers::extract_transforms_from_step(&workflow, step_id, &cube_query); - - // Extract layer palette name from GlTask (preferred) or fallback to crosstab palette - let layer_palette_name = - match super::helpers::extract_layer_palette_from_gltask(&client, &workflow, step_id) - .await - { - Ok(Some(name)) => { - println!("[DevContext] Layer palette (from GlTask): {}", name); - Some(name) - } - Ok(None) | Err(_) => { - // Fallback to crosstab palette extraction - let name = crate::tercen::extract_crosstab_palette_name(&workflow, step_id); - if let Some(ref n) = name { - println!("[DevContext] Layer palette (from crosstab): {}", n); - } - name - } - }; - - // Extract Y-axis factor names per layer (for legend entries) - let layer_y_factor_names = super::helpers::extract_layer_y_factor_names(&workflow, step_id); - if !layer_y_factor_names.is_empty() { - println!( - "[DevContext] Layer Y-factor names: {:?}", - layer_y_factor_names - ); - } - - // Build ContextBase using the builder - let base = ContextBaseBuilder::new() - .client(client) - .cube_query(cube_query) - .schema_ids(schema_ids) - .workflow_id(workflow_id.to_string()) - .step_id(step_id.to_string()) - .project_id(project_id) - .namespace(namespace) - .operator_settings(operator_settings) - .color_infos(color_infos) - .per_layer_colors(Some(per_layer_colors)) - .page_factors(page_factors) - .y_axis_table_id(y_axis_table_id) - .x_axis_table_id(x_axis_table_id) - .point_size(point_size) - .chart_kind(chart_kind) - .crosstab_dimensions(crosstab_dimensions) - .y_transform(y_transform) - .x_transform(x_transform) - .layer_palette_name(layer_palette_name) - .layer_y_factor_names(layer_y_factor_names) - .build()?; - - Ok(Self(base)) - } -} - -impl TercenContext for DevContext { - fn cube_query(&self) -> &CubeQuery { - self.0.cube_query() - } - - fn schema_ids(&self) -> &[String] { - self.0.schema_ids() - } - - fn workflow_id(&self) -> &str { - self.0.workflow_id() - } - - fn step_id(&self) -> &str { - self.0.step_id() - } - - fn project_id(&self) -> &str { - self.0.project_id() - } - - fn namespace(&self) -> &str { - self.0.namespace() - } - - fn operator_settings(&self) -> Option<&OperatorSettings> { - self.0.operator_settings() - } - - fn color_infos(&self) -> &[ColorInfo] { - self.0.color_infos() - } - - fn page_factors(&self) -> &[String] { - self.0.page_factors() - } - - fn y_axis_table_id(&self) -> Option<&str> { - self.0.y_axis_table_id() - } - - fn x_axis_table_id(&self) -> Option<&str> { - self.0.x_axis_table_id() - } - - fn point_size(&self) -> Option { - self.0.point_size() - } - - fn chart_kind(&self) -> ChartKind { - self.0.chart_kind() - } - - fn crosstab_dimensions(&self) -> Option<(i32, i32)> { - self.0.crosstab_dimensions() - } - - fn y_transform(&self) -> Option<&str> { - self.0.y_transform() - } - - fn x_transform(&self) -> Option<&str> { - self.0.x_transform() - } - - fn layer_palette_name(&self) -> Option<&str> { - self.0.layer_palette_name() - } - - fn per_layer_colors(&self) -> Option<&crate::tercen::PerLayerColorConfig> { - self.0.per_layer_colors() - } - - fn layer_y_factor_names(&self) -> &[String] { - self.0.layer_y_factor_names() - } - - fn client(&self) -> &Arc { - self.0.client() - } -} diff --git a/src/tercen/context/helpers.rs b/src/tercen/context/helpers.rs deleted file mode 100644 index 68bae36..0000000 --- a/src/tercen/context/helpers.rs +++ /dev/null @@ -1,925 +0,0 @@ -//! Shared helper functions for context implementations -//! -//! These functions are used by both ProductionContext and DevContext to avoid duplication. - -use crate::tercen::client::proto::{CubeQuery, CubeQueryTableSchema, Workflow}; -use crate::tercen::colors::ColorInfo; -use crate::tercen::TercenClient; -use std::collections::HashMap; - -/// Find Y-axis table from schema_ids -/// -/// Searches through schema_ids to find a table with query_table_type == "y". -/// Skips known tables (qt_hash, column_hash, row_hash). -pub async fn find_y_axis_table( - client: &TercenClient, - schema_ids: &[String], - cube_query: &CubeQuery, - context_name: &str, -) -> Result, Box> { - use crate::tercen::client::proto::e_schema; - use crate::tercen::TableStreamer; - - let streamer = TableStreamer::new(client); - - let known_tables = [ - cube_query.qt_hash.as_str(), - cube_query.column_hash.as_str(), - cube_query.row_hash.as_str(), - ]; - - eprintln!( - "DEBUG find_y_axis_table: schema_ids={:?}, known_tables={:?}", - schema_ids, known_tables - ); - - for schema_id in schema_ids { - if !known_tables.contains(&schema_id.as_str()) { - let schema = streamer.get_schema(schema_id).await?; - if let Some(e_schema::Object::Cubequerytableschema(cqts)) = schema.object { - eprintln!( - "DEBUG find_y_axis_table: schema {} has query_table_type='{}'", - schema_id, cqts.query_table_type - ); - if cqts.query_table_type == "y" { - println!("[{}] Found Y-axis table: {}", context_name, schema_id); - return Ok(Some(schema_id.clone())); - } - } - } else { - eprintln!( - "DEBUG find_y_axis_table: skipping known table {}", - schema_id - ); - } - } - - eprintln!("DEBUG find_y_axis_table: No Y-axis table found"); - Ok(None) -} - -/// Find X-axis table from schema_ids -/// -/// Searches through schema_ids to find a table with query_table_type == "x". -/// Skips known tables (qt_hash, column_hash, row_hash). -pub async fn find_x_axis_table( - client: &TercenClient, - schema_ids: &[String], - cube_query: &CubeQuery, - context_name: &str, -) -> Result, Box> { - use crate::tercen::client::proto::e_schema; - use crate::tercen::TableStreamer; - - let streamer = TableStreamer::new(client); - - let known_tables = [ - cube_query.qt_hash.as_str(), - cube_query.column_hash.as_str(), - cube_query.row_hash.as_str(), - ]; - - for schema_id in schema_ids { - if !known_tables.contains(&schema_id.as_str()) { - let schema = streamer.get_schema(schema_id).await?; - if let Some(e_schema::Object::Cubequerytableschema(cqts)) = schema.object { - if cqts.query_table_type == "x" { - println!("[{}] Found X-axis table: {}", context_name, schema_id); - return Ok(Some(schema_id.clone())); - } - } - } - } - - Ok(None) -} - -/// Find color tables from schema_ids -/// -/// Returns a tuple of: -/// - Vec of color table IDs (indexed by color_N suffix) -/// - HashMap of schema_id -> CubeQueryTableSchema for color tables -pub async fn find_color_tables( - client: &TercenClient, - schema_ids: &[String], -) -> Result<(Vec>, HashMap), Box> -{ - use crate::tercen::client::proto::e_schema; - use crate::tercen::TableStreamer; - - let streamer = TableStreamer::new(client); - let mut color_table_ids: Vec> = Vec::new(); - let mut color_table_schemas: HashMap = HashMap::new(); - - for schema_id in schema_ids { - let schema = streamer.get_schema(schema_id).await?; - if let Some(e_schema::Object::Cubequerytableschema(cqts)) = schema.object { - if cqts.query_table_type.starts_with("color_") { - if let Some(idx_str) = cqts.query_table_type.strip_prefix("color_") { - if let Ok(idx) = idx_str.parse::() { - while color_table_ids.len() <= idx { - color_table_ids.push(None); - } - color_table_ids[idx] = Some(schema_id.clone()); - color_table_schemas.insert(schema_id.clone(), cqts); - } - } - } - } - } - - Ok((color_table_ids, color_table_schemas)) -} - -/// Extract per-layer color information from workflow -/// -/// This is the new per-layer implementation that handles mixed scenarios where -/// some layers have colors and some don't. -pub async fn extract_per_layer_color_info_from_workflow( - client: &TercenClient, - schema_ids: &[String], - workflow: &Workflow, - step_id: &str, - context_name: &str, -) -> Result> { - use crate::tercen::client::proto::e_column_schema; - use crate::tercen::LayerColorConfig; - - if schema_ids.is_empty() { - println!( - "[{}] No schema_ids available - returning empty per-layer color config", - context_name - ); - return Ok(crate::tercen::PerLayerColorConfig::default()); - } - - // Find color tables and cache their schemas - let (color_table_ids, color_table_schemas) = find_color_tables(client, schema_ids).await?; - - for (idx, table_id) in color_table_ids.iter().enumerate() { - if let Some(id) = table_id { - println!("[{}] Found color table {}: {}", context_name, idx, id); - } - } - - // Extract per-layer color info from step - let mut per_layer_config = - crate::tercen::extract_per_layer_color_info(workflow, step_id, &color_table_ids)?; - - eprintln!( - "[{}] Per-layer color config: n_layers={}, has_explicit={}, is_mixed={}", - context_name, - per_layer_config.n_layers, - per_layer_config.has_explicit_colors(), - per_layer_config.is_mixed() - ); - - // Assign shared color table ID to layers that need it and fetch quartiles - let shared_color_table_id = color_table_ids.first().and_then(|opt| opt.clone()); - - for config in per_layer_config.layer_configs.iter_mut() { - match config { - LayerColorConfig::Continuous { - palette, - factor_name, - quartiles, - color_table_id, - } => { - // Assign shared color table ID if not set - if color_table_id.is_none() { - if let Some(ref table_id) = shared_color_table_id { - eprintln!( - "DEBUG extract_per_layer_color_info: assigning shared color table {} to factor '{}'", - table_id, factor_name - ); - *color_table_id = Some(table_id.clone()); - } - } - - // Fetch quartiles for non-user-defined palettes - if !palette.is_user_defined && quartiles.is_none() { - if let Some(ref table_id) = color_table_id { - if let Some(cqts) = color_table_schemas.get(table_id) { - for col_schema in &cqts.columns { - if let Some(e_column_schema::Object::Columnschema(cs)) = - &col_schema.object - { - if cs.name == *factor_name { - if let Some(ref meta) = cs.meta_data { - if !meta.quartiles.is_empty() { - eprintln!( - "DEBUG extract_per_layer_color_info: Found quartiles for '{}': {:?}", - factor_name, meta.quartiles - ); - *quartiles = Some(meta.quartiles.clone()); - } - } - break; - } - } - } - } - } - } - } - LayerColorConfig::Categorical { - color_table_id, - factor_name, - .. - } => { - // Assign shared color table ID if not set - if color_table_id.is_none() { - if let Some(ref table_id) = shared_color_table_id { - eprintln!( - "DEBUG extract_per_layer_color_info: assigning shared color table {} to categorical factor '{}'", - table_id, factor_name - ); - *color_table_id = Some(table_id.clone()); - } - } - } - LayerColorConfig::Constant { .. } => { - // Constant colors don't need color table IDs or quartiles - } - } - } - - Ok(per_layer_config) -} - -/// Extract color information from workflow (core implementation) -/// -/// This is the shared implementation used by both ProductionContext and DevContext. -/// The workflow must already be fetched. -/// -/// DEPRECATED: Use extract_per_layer_color_info_from_workflow for mixed-layer scenarios. -pub async fn extract_color_info_from_workflow( - client: &TercenClient, - schema_ids: &[String], - workflow: &Workflow, - step_id: &str, - context_name: &str, -) -> Result, Box> { - use crate::tercen::client::proto::e_column_schema; - use crate::tercen::TableStreamer; - - if schema_ids.is_empty() { - println!( - "[{}] No schema_ids available - skipping color extraction", - context_name - ); - return Ok(Vec::new()); - } - - // Find color tables and cache their schemas - let (color_table_ids, color_table_schemas) = find_color_tables(client, schema_ids).await?; - - for (idx, table_id) in color_table_ids.iter().enumerate() { - if let Some(id) = table_id { - println!("[{}] Found color table {}: {}", context_name, idx, id); - } - } - - // Extract color info from step - let mut color_infos = - crate::tercen::extract_color_info_from_step(workflow, step_id, &color_table_ids)?; - - // All color factors share the same color table (color_0) - // Assign the color table ID to ALL factors, not just the first - let shared_color_table_id = color_table_ids.first().and_then(|opt| opt.clone()); - if let Some(ref table_id) = shared_color_table_id { - for color_info in &mut color_infos { - if color_info.color_table_id.is_none() { - eprintln!( - "DEBUG extract_color_info: assigning shared color table {} to factor '{}'", - table_id, color_info.factor_name - ); - color_info.color_table_id = Some(table_id.clone()); - } - } - } - - // Fetch actual color labels from color table for categorical colors - let streamer = TableStreamer::new(client); - if let Some(first_categorical_idx) = color_infos - .iter() - .position(|ci| matches!(ci.mapping, crate::tercen::ColorMapping::Categorical(_))) - { - let color_info = &color_infos[first_categorical_idx]; - - if let Some(ref table_id) = color_info.color_table_id { - if let Some(cqts) = color_table_schemas.get(table_id) { - let n_rows = cqts.n_rows as usize; - - let factor_columns: Vec = cqts - .columns - .iter() - .filter_map(|c| { - if let Some(e_column_schema::Object::Columnschema(cs)) = &c.object { - Some(cs.name.clone()) - } else { - None - } - }) - .collect(); - - if n_rows > 0 && !factor_columns.is_empty() { - eprintln!( - "DEBUG extract_color_info: fetching combined color labels from table {} ({} rows, columns: {:?})", - table_id, n_rows, factor_columns - ); - - match streamer - .stream_tson(table_id, Some(factor_columns.clone()), 0, n_rows as i64) - .await - { - Ok(tson_data) => { - if !tson_data.is_empty() { - match crate::tercen::tson_to_dataframe(&tson_data) { - Ok(df) => { - let mut combined_labels = Vec::with_capacity(n_rows); - for i in 0..df.nrow() { - let parts: Vec = factor_columns - .iter() - .filter_map(|col| { - df.get_value(i, col).ok().map(|v| v.as_string()) - }) - .collect(); - combined_labels.push(parts.join(", ")); - } - eprintln!( - "DEBUG extract_color_info: got {} combined color labels: {:?}", - combined_labels.len(), - combined_labels - ); - - color_infos[first_categorical_idx].n_levels = - Some(combined_labels.len()); - color_infos[first_categorical_idx].color_labels = - Some(combined_labels); - } - Err(e) => { - eprintln!( - "WARN extract_color_info: failed to parse color table TSON: {}", - e - ); - } - } - } - } - Err(e) => { - eprintln!( - "WARN extract_color_info: failed to stream color table {}: {}", - table_id, e - ); - } - } - } - } - } - } - - // Fetch quartiles for continuous color mappings that are not user-defined - for color_info in &mut color_infos { - let is_user_defined = match &color_info.mapping { - crate::tercen::ColorMapping::Continuous(palette) => palette.is_user_defined, - _ => true, - }; - - eprintln!( - "DEBUG extract_color_info: factor='{}' is_user_defined={}", - color_info.factor_name, is_user_defined - ); - - if !is_user_defined { - if let Some(ref table_id) = color_info.color_table_id { - if let Some(cqts) = color_table_schemas.get(table_id) { - for col_schema in &cqts.columns { - if let Some(e_column_schema::Object::Columnschema(cs)) = &col_schema.object - { - if cs.name == color_info.factor_name { - if let Some(ref meta) = cs.meta_data { - if !meta.quartiles.is_empty() { - eprintln!( - "DEBUG extract_color_info: Found quartiles for '{}': {:?}", - color_info.factor_name, meta.quartiles - ); - color_info.quartiles = Some(meta.quartiles.clone()); - } - } - break; - } - } - } - } - } - - if color_info.quartiles.is_none() { - eprintln!( - "WARN extract_color_info: is_user_defined=false for '{}' but no quartiles found", - color_info.factor_name - ); - } - } - } - - Ok(color_infos) -} - -/// Extract axis transform types from CubeQuery -/// -/// Transforms are stored in CubeQuery.axisQueries[0].preprocessors -/// The structure is: -/// - preprocessors[i].type = "y" or "x" (which axis the transform applies to) -/// - preprocessors[i].operatorRef.name = "log", "asinh", etc. (the actual transform) -pub fn extract_transforms_from_cube_query( - cube_query: &CubeQuery, -) -> (Option, Option) { - for (i, aq) in cube_query.axis_queries.iter().enumerate() { - for (j, pp) in aq.preprocessors.iter().enumerate() { - let transform_name = pp - .operator_ref - .as_ref() - .map(|op_ref| op_ref.name.as_str()) - .unwrap_or(""); - - eprintln!( - "DEBUG extract_transforms: axisQuery[{}].preprocessors[{}] type='{}', operatorRef.name='{}'", - i, j, pp.r#type, transform_name - ); - } - } - - let axis_query = match cube_query.axis_queries.first() { - Some(aq) => aq, - None => return (None, None), - }; - - let mut y_transform = None; - let mut x_transform = None; - - for pp in &axis_query.preprocessors { - let transform_name = pp - .operator_ref - .as_ref() - .map(|op_ref| op_ref.name.as_str()) - .unwrap_or(""); - - let axis_type = pp.r#type.as_str(); - - let is_valid_transform = matches!( - transform_name, - "log" | "log10" | "ln" | "log2" | "asinh" | "sqrt" - ); - - if is_valid_transform { - match axis_type { - "y" => { - println!("[Context] Y-axis transform: {}", transform_name); - y_transform = Some(transform_name.to_string()); - } - "x" => { - println!("[Context] X-axis transform: {}", transform_name); - x_transform = Some(transform_name.to_string()); - } - _ => {} - } - } - } - - (y_transform, x_transform) -} - -/// Extract axis transform types from Crosstab model (step.model.axis.xyAxis) -/// -/// This is used by DevContext which has direct access to the step model. -/// The structure is: -/// - xyAxis[0].preprocessors[i].type = "y" or "x" -/// - xyAxis[0].preprocessors[i].operatorRef.name = "log", "asinh", etc. -pub fn extract_transforms_from_step( - workflow: &Workflow, - step_id: &str, - cube_query: &CubeQuery, -) -> (Option, Option) { - use crate::tercen::client::proto::e_step; - - // First, try to get transforms from the Crosstab model (step.model.axis.xyAxis) - let step = workflow.steps.iter().find(|s| match &s.object { - Some(e_step::Object::Datastep(ds)) => ds.id == step_id, - Some(e_step::Object::Crosstabstep(cs)) => cs.id == step_id, - _ => false, - }); - - if let Some(step) = step { - let model = match &step.object { - Some(e_step::Object::Datastep(ds)) => ds.model.as_ref(), - Some(e_step::Object::Crosstabstep(cs)) => cs.model.as_ref(), - _ => None, - }; - - if let Some(crosstab) = model { - if let Some(ref axis_list) = crosstab.axis { - eprintln!( - "DEBUG extract_transforms: Found {} xyAxis in Crosstab.axis", - axis_list.xy_axis.len() - ); - - for (i, xy_axis) in axis_list.xy_axis.iter().enumerate() { - eprintln!( - "DEBUG extract_transforms: xyAxis[{}] has {} preprocessors", - i, - xy_axis.preprocessors.len() - ); - for (j, pp) in xy_axis.preprocessors.iter().enumerate() { - eprintln!( - "DEBUG extract_transforms: xyAxis[{}].preprocessors[{}].type = '{}'", - i, j, pp.r#type - ); - } - } - - if let Some(xy_axis) = axis_list.xy_axis.first() { - // Check yAxis.axisSettings.meta for transform info - if let Some(ref y_axis) = xy_axis.y_axis { - if let Some(ref axis_settings) = y_axis.axis_settings { - eprintln!( - "DEBUG extract_transforms: yAxis.axisSettings.meta has {} pairs", - axis_settings.meta.len() - ); - for pair in &axis_settings.meta { - eprintln!( - "DEBUG extract_transforms: yAxis.axisSettings.meta['{}'] = '{}'", - pair.key, pair.value - ); - } - - for pair in &axis_settings.meta { - if pair.key == "transform" || pair.key == "scale" { - let t = pair.value.as_str(); - if matches!( - t, - "log" | "log10" | "ln" | "log2" | "asinh" | "sqrt" - ) { - println!( - "[DevContext] Y-axis transform (from yAxis.axisSettings): {}", - t - ); - return (Some(t.to_string()), None); - } - } - } - } - } - - // Check xAxis.axisSettings.meta - if let Some(ref x_axis) = xy_axis.x_axis { - if let Some(ref axis_settings) = x_axis.axis_settings { - eprintln!( - "DEBUG extract_transforms: xAxis.axisSettings.meta has {} pairs", - axis_settings.meta.len() - ); - for pair in &axis_settings.meta { - eprintln!( - "DEBUG extract_transforms: xAxis.axisSettings.meta['{}'] = '{}'", - pair.key, pair.value - ); - } - } - } - - // Extract transforms from preprocessors - let mut y_transform = None; - let mut x_transform = None; - - for pp in &xy_axis.preprocessors { - let transform_name = pp - .operator_ref - .as_ref() - .map(|op_ref| op_ref.name.as_str()) - .unwrap_or(""); - - let axis_type = pp.r#type.as_str(); - - eprintln!( - "DEBUG extract_transforms: preprocessor type='{}', operatorRef.name='{}'", - axis_type, transform_name - ); - - let is_valid_transform = matches!( - transform_name, - "log" | "log10" | "ln" | "log2" | "asinh" | "sqrt" - ); - - if is_valid_transform { - match axis_type { - "y" => { - println!("[DevContext] Y-axis transform: {}", transform_name); - y_transform = Some(transform_name.to_string()); - } - "x" => { - println!("[DevContext] X-axis transform: {}", transform_name); - x_transform = Some(transform_name.to_string()); - } - _ => {} - } - } - } - - if y_transform.is_some() || x_transform.is_some() { - return (y_transform, x_transform); - } - } - } - } - } - - // Fallback: check CubeQuery.axisQueries - eprintln!( - "DEBUG extract_transforms: Checking CubeQuery.axisQueries ({} queries)", - cube_query.axis_queries.len() - ); - - for (i, aq) in cube_query.axis_queries.iter().enumerate() { - eprintln!( - "DEBUG extract_transforms: axisQuery[{}] has {} preprocessors, chart_type='{}'", - i, - aq.preprocessors.len(), - aq.chart_type - ); - for (j, pp) in aq.preprocessors.iter().enumerate() { - eprintln!( - "DEBUG extract_transforms: axisQuery[{}].preprocessors[{}].type = '{}'", - i, j, pp.r#type - ); - } - } - - if let Some(axis_query) = cube_query.axis_queries.first() { - let y_transform = axis_query.preprocessors.iter().find_map(|pp| { - let t = pp.r#type.as_str(); - match t { - "log" | "log10" | "ln" | "log2" | "asinh" | "sqrt" => { - println!("[DevContext] Y-axis transform (from CubeQuery): {}", t); - Some(t.to_string()) - } - _ => None, - } - }); - - if y_transform.is_some() { - return (y_transform, None); - } - } - - (None, None) -} - -/// Extract crosstab dimensions from workflow step model -/// -/// Returns (width, height) calculated as: -/// - width = columnTable.cellSize × columnTable.nRows -/// - height = rowTable.cellSize × rowTable.nRows -pub fn extract_crosstab_dimensions(workflow: &Workflow, step_id: &str) -> Option<(i32, i32)> { - use crate::tercen::client::proto::e_step; - - let step = workflow.steps.iter().find(|s| match &s.object { - Some(e_step::Object::Datastep(ds)) => ds.id == step_id, - Some(e_step::Object::Crosstabstep(cs)) => cs.id == step_id, - _ => false, - })?; - - let model = match &step.object { - Some(e_step::Object::Datastep(ds)) => ds.model.as_ref(), - Some(e_step::Object::Crosstabstep(cs)) => cs.model.as_ref(), - _ => None, - }?; - - let width = model.column_table.as_ref().map(|ct| { - let cell_size = ct.cell_size as i32; - let n_rows = ct.n_rows.max(1); - cell_size * n_rows - })?; - - let height = model.row_table.as_ref().map(|rt| { - let cell_size = rt.cell_size as i32; - let n_rows = rt.n_rows.max(1); - cell_size * n_rows - })?; - - if width > 0 && height > 0 { - Some((width, height)) - } else { - None - } -} - -/// Fetch workflow by ID -pub async fn fetch_workflow( - client: &TercenClient, - workflow_id: &str, -) -> Result> { - use crate::tercen::client::proto::{e_workflow, GetRequest}; - - let mut workflow_service = client.workflow_service()?; - let request = tonic::Request::new(GetRequest { - id: workflow_id.to_string(), - ..Default::default() - }); - let response = workflow_service.get(request).await?; - let e_workflow = response.into_inner(); - - let workflow = match e_workflow.object { - Some(e_workflow::Object::Workflow(wf)) => wf, - _ => return Err("No workflow object".into()), - }; - - Ok(workflow) -} - -/// Extract the layer palette name from GlTask -/// -/// The layer palette is stored in GlTask.palettes[0]. This is the palette used to -/// color layers that don't have their own explicit color factors. -/// -/// Path: step.model.taskId → GlTask.palettes[0].colorList.name (for CategoryPalette) -pub async fn extract_layer_palette_from_gltask( - client: &TercenClient, - workflow: &Workflow, - step_id: &str, -) -> Result, Box> { - use crate::tercen::client::proto::{e_palette, e_step, e_task, GetRequest}; - - // Find the step by ID - let step = workflow.steps.iter().find(|s| match &s.object { - Some(e_step::Object::Datastep(ds)) => ds.id == step_id, - Some(e_step::Object::Crosstabstep(cs)) => cs.id == step_id, - _ => false, - }); - - // Get the task ID from step.model - let task_id = match step.and_then(|s| match &s.object { - Some(e_step::Object::Datastep(ds)) => ds.model.as_ref().map(|m| &m.task_id), - Some(e_step::Object::Crosstabstep(cs)) => cs.model.as_ref().map(|m| &m.task_id), - _ => None, - }) { - Some(id) if !id.is_empty() => id.clone(), - _ => { - eprintln!("DEBUG extract_layer_palette_from_gltask: No model.taskId found in step"); - return Ok(None); - } - }; - - eprintln!( - "DEBUG extract_layer_palette_from_gltask: Fetching task {} to check for GlTask palettes", - task_id - ); - - // Fetch the task - let mut task_service = client.task_service()?; - let request = tonic::Request::new(GetRequest { - id: task_id.clone(), - ..Default::default() - }); - let response = task_service.get(request).await?; - let task = response.into_inner(); - - // Check if it's a GlTask and extract palettes - let gltask = match task.object { - Some(e_task::Object::Gltask(gt)) => gt, - _ => { - eprintln!( - "DEBUG extract_layer_palette_from_gltask: Task {} is not a GlTask", - task_id - ); - return Ok(None); - } - }; - - eprintln!( - "DEBUG extract_layer_palette_from_gltask: GlTask has {} palettes", - gltask.palettes.len() - ); - - // Get the first palette (layer palette) - let first_palette = match gltask.palettes.first() { - Some(p) => p, - None => { - eprintln!("DEBUG extract_layer_palette_from_gltask: GlTask has no palettes"); - return Ok(None); - } - }; - - // Extract the palette name based on type - let palette_name = match &first_palette.object { - Some(e_palette::Object::Categorypalette(cat)) => { - // For CategoryPalette, try colorList.name first - let name = cat.color_list.as_ref().and_then(|cl| { - if !cl.name.is_empty() { - Some(cl.name.clone()) - } else { - None - } - }); - // Fallback to properties["name"] - name.or_else(|| { - cat.properties - .iter() - .find(|p| p.name == "name") - .map(|p| p.value.clone()) - }) - } - Some(e_palette::Object::Ramppalette(ramp)) => ramp - .properties - .iter() - .find(|p| p.name == "name") - .map(|p| p.value.clone()), - Some(e_palette::Object::Jetpalette(_)) => Some("Jet".to_string()), - Some(e_palette::Object::Palette(p)) => p - .properties - .iter() - .find(|p| p.name == "name") - .map(|p| p.value.clone()), - None => None, - }; - - if let Some(ref name) = palette_name { - eprintln!( - "DEBUG extract_layer_palette_from_gltask: Found layer palette name: '{}'", - name - ); - } - - Ok(palette_name) -} - -/// Extract Y-axis factor names per layer from workflow step model -/// -/// Each layer (xyAxis entry) has a yAxis.graphical_factor.factor.name -/// that identifies what data is being plotted. These names are used -/// in legends for layers without explicit color factors. -pub fn extract_layer_y_factor_names(workflow: &Workflow, step_id: &str) -> Vec { - use crate::tercen::client::proto::e_step; - - // Find the step by ID - let step = workflow.steps.iter().find(|s| { - if let Some(e_step::Object::Datastep(ds)) = &s.object { - ds.id == step_id - } else { - false - } - }); - - let data_step = match step.and_then(|s| match &s.object { - Some(e_step::Object::Datastep(ds)) => Some(ds), - _ => None, - }) { - Some(ds) => ds, - None => { - eprintln!("DEBUG extract_layer_y_factor_names: Step not found or not a DataStep"); - return Vec::new(); - } - }; - - // Navigate to model.axis.xyAxis - let xy_axis_list = match data_step - .model - .as_ref() - .and_then(|m| m.axis.as_ref()) - .map(|a| &a.xy_axis) - { - Some(list) => list, - None => { - eprintln!("DEBUG extract_layer_y_factor_names: No model.axis.xyAxis found"); - return Vec::new(); - } - }; - - let names: Vec = xy_axis_list - .iter() - .enumerate() - .map(|(i, xy)| { - // Navigate to y_axis.graphical_factor.factor.name - let name = xy - .y_axis - .as_ref() - .and_then(|axis| axis.graphical_factor.as_ref()) - .and_then(|gf| gf.factor.as_ref()) - .map(|f| f.name.clone()) - .unwrap_or_else(|| format!("Layer {}", i + 1)); - - eprintln!( - "DEBUG extract_layer_y_factor_names: Layer {} Y-factor name = '{}'", - i, name - ); - name - }) - .collect(); - - eprintln!( - "DEBUG extract_layer_y_factor_names: Extracted {} names: {:?}", - names.len(), - names - ); - - names -} diff --git a/src/tercen/context/mod.rs b/src/tercen/context/mod.rs deleted file mode 100644 index 4b5cdfd..0000000 --- a/src/tercen/context/mod.rs +++ /dev/null @@ -1,155 +0,0 @@ -//! TercenContext trait and implementations -//! -//! This module provides a unified interface for accessing Tercen task/query data -//! regardless of whether we're in production mode (with task_id) or dev mode -//! (with workflow_id + step_id). -//! -//! This mirrors Python's OperatorContext / OperatorContextDev pattern. - -use crate::tercen::client::proto::{CubeQuery, OperatorSettings}; -use crate::tercen::colors::{ChartKind, ColorInfo}; -use crate::tercen::TercenClient; -use std::sync::Arc; - -mod base; -mod dev_context; -mod helpers; -mod production_context; - -pub use base::{ContextBase, ContextBaseBuilder}; - -pub use dev_context::DevContext; -pub use production_context::ProductionContext; - -/// Trait for accessing Tercen context data -/// -/// Implementations: -/// - `ProductionContext`: Initialized from task_id (production mode) -/// - `DevContext`: Initialized from workflow_id + step_id (dev/test mode) -pub trait TercenContext: Send + Sync { - /// Get the CubeQuery containing table hashes - fn cube_query(&self) -> &CubeQuery; - - /// Get the schema IDs (table IDs for Y-axis, colors, etc.) - fn schema_ids(&self) -> &[String]; - - /// Get the workflow ID - fn workflow_id(&self) -> &str; - - /// Get the step ID - fn step_id(&self) -> &str; - - /// Get the project ID - fn project_id(&self) -> &str; - - /// Get the namespace - fn namespace(&self) -> &str; - - /// Get the operator settings (if available) - fn operator_settings(&self) -> Option<&OperatorSettings>; - - /// Get the color information extracted from the workflow - fn color_infos(&self) -> &[ColorInfo]; - - /// Get the page factor names - fn page_factors(&self) -> &[String]; - - /// Get the Y-axis table ID (if available) - fn y_axis_table_id(&self) -> Option<&str>; - - /// Get the X-axis table ID (if available) - fn x_axis_table_id(&self) -> Option<&str>; - - /// Get the point size from crosstab model (UI scale 1-10, None = use default) - fn point_size(&self) -> Option; - - /// Get the chart kind (Point, Heatmap, Line, Bar) - fn chart_kind(&self) -> ChartKind; - - /// Get the crosstab dimensions from the model (cellSize × nRows for each axis) - /// Returns (width, height) in pixels, or None if not available - fn crosstab_dimensions(&self) -> Option<(i32, i32)>; - - /// Get the Y-axis transform type (e.g., "log", "asinh", "sqrt") - /// Returns None if no transform is applied (identity) - fn y_transform(&self) -> Option<&str>; - - /// Get the X-axis transform type (e.g., "log", "asinh", "sqrt") - /// Returns None if no transform is applied (identity) or if no X-axis is defined - fn x_transform(&self) -> Option<&str>; - - /// Get the Tercen client - fn client(&self) -> &Arc; - - /// Get per-layer color configuration (for mixed-layer scenarios) - /// - /// Returns None for legacy configurations that use uniform colors across all layers. - /// When Some, this takes precedence over color_infos() for color processing. - fn per_layer_colors(&self) -> Option<&crate::tercen::PerLayerColorConfig>; - - /// Get Y-axis factor names per layer - /// - /// Used for legend entries when layers don't have explicit color factors. - /// Each name comes from axis_queries[i].yAxis.graphical_factor.factor.name - fn layer_y_factor_names(&self) -> &[String]; - - // Convenience methods with default implementations - - /// Get the main table hash (qt_hash) - fn qt_hash(&self) -> &str { - &self.cube_query().qt_hash - } - - /// Get the column facet table hash - fn column_hash(&self) -> &str { - &self.cube_query().column_hash - } - - /// Get the row facet table hash - fn row_hash(&self) -> &str { - &self.cube_query().row_hash - } - - // === Factor name accessors (from CubeQuery.axisQueries[0]) === - - /// Get the color factor names from the first axis query - fn colors(&self) -> Vec<&str> { - self.cube_query() - .axis_queries - .first() - .map(|aq| aq.colors.iter().map(|f| f.name.as_str()).collect()) - .unwrap_or_default() - } - - /// Get the label factor names from the first axis query - fn labels(&self) -> Vec<&str> { - self.cube_query() - .axis_queries - .first() - .map(|aq| aq.labels.iter().map(|f| f.name.as_str()).collect()) - .unwrap_or_default() - } - - /// Get the error factor names from the first axis query - fn errors(&self) -> Vec<&str> { - self.cube_query() - .axis_queries - .first() - .map(|aq| aq.errors.iter().map(|f| f.name.as_str()).collect()) - .unwrap_or_default() - } - - /// Get the number of layers (axis queries) - /// - /// Each axis query represents a layer in the plot. When there are multiple layers - /// and no colors are specified, we can use layer-based coloring. - fn n_layers(&self) -> usize { - self.cube_query().axis_queries.len().max(1) - } - - /// Get the palette name for layer-based coloring - /// - /// Returns the palette name from the crosstab configuration, used when - /// coloring points by layer when no explicit color factors are defined. - fn layer_palette_name(&self) -> Option<&str>; -} diff --git a/src/tercen/context/production_context.rs b/src/tercen/context/production_context.rs deleted file mode 100644 index 5a55141..0000000 --- a/src/tercen/context/production_context.rs +++ /dev/null @@ -1,451 +0,0 @@ -//! ProductionContext - TercenContext implementation for production mode -//! -//! Initialized from a task_id, extracts all necessary data from the task object. - -use super::base::{ContextBase, ContextBaseBuilder}; -use super::TercenContext; -use crate::tercen::client::proto::{CubeQuery, OperatorSettings}; -use crate::tercen::colors::{ChartKind, ColorInfo}; -use crate::tercen::TercenClient; -use std::ops::Deref; -use std::sync::Arc; - -/// Production context initialized from task_id -/// -/// This is used when the operator is run by Tercen with --taskId argument. -/// Wraps ContextBase using the newtype pattern. -pub struct ProductionContext(ContextBase); - -impl Deref for ProductionContext { - type Target = ContextBase; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl ProductionContext { - /// Create a new ProductionContext from a task_id - /// - /// This fetches the task, extracts the CubeQuery, and retrieves schema_ids - /// from the CubeQueryTask (via step.model.taskId). - /// - /// The CubeQueryTask MUST complete before the operator runs, so schema_ids - /// should always be available. No retry logic - if missing, it's a bug. - pub async fn from_task_id( - client: Arc, - task_id: &str, - ) -> Result> { - use crate::tercen::client::proto::{e_task, GetRequest}; - - println!("[ProductionContext] Fetching task {}...", task_id); - - // Fetch the operator task - let mut task_service = client.task_service()?; - let request = tonic::Request::new(GetRequest { - id: task_id.to_string(), - ..Default::default() - }); - let response = task_service.get(request).await?; - let task = response.into_inner(); - - // Extract CubeQuery and metadata from task (but NOT schema_ids - that comes from CubeQueryTask) - let (cube_query, project_id, operator_settings, task_environment) = - match task.object.as_ref() { - Some(e_task::Object::Computationtask(ct)) => ( - ct.query - .as_ref() - .ok_or("ComputationTask has no query")? - .clone(), - ct.project_id.clone(), - ct.query.as_ref().and_then(|q| q.operator_settings.clone()), - &ct.environment, - ), - Some(e_task::Object::Runcomputationtask(rct)) => ( - rct.query - .as_ref() - .ok_or("RunComputationTask has no query")? - .clone(), - rct.project_id.clone(), - rct.query.as_ref().and_then(|q| q.operator_settings.clone()), - &rct.environment, - ), - Some(e_task::Object::Cubequerytask(cqt)) => ( - cqt.query - .as_ref() - .ok_or("CubeQueryTask has no query")? - .clone(), - cqt.project_id.clone(), - cqt.query.as_ref().and_then(|q| q.operator_settings.clone()), - &cqt.environment, - ), - _ => return Err("Unsupported task type".into()), - }; - - // Extract namespace from operator settings - let namespace = operator_settings - .as_ref() - .map(|os| os.namespace.clone()) - .unwrap_or_default(); - - // Get workflow_id and step_id from task environment - let workflow_id = task_environment - .iter() - .find(|p| p.key == "workflow.id") - .map(|p| p.value.clone()) - .or_else(|| std::env::var("WORKFLOW_ID").ok()) - .ok_or("workflow.id not found in task environment")?; - - let step_id = task_environment - .iter() - .find(|p| p.key == "step.id") - .map(|p| p.value.clone()) - .or_else(|| std::env::var("STEP_ID").ok()) - .ok_or("step.id not found in task environment")?; - - println!( - "[ProductionContext] workflow_id={}, step_id={}", - workflow_id, step_id - ); - - // Fetch schema_ids from CubeQueryTask (the canonical source) - // Path: workflow → step → model.taskId → CubeQueryTask.schema_ids - let schema_ids = - Self::fetch_schema_ids_from_cube_query_task(&client, &workflow_id, &step_id).await?; - - if schema_ids.is_empty() { - println!("[ProductionContext] schema_ids is empty"); - } else { - println!( - "[ProductionContext] Found {} schema_ids: {:?}", - schema_ids.len(), - schema_ids - ); - } - - // Find Y-axis table - let y_axis_table_id = super::helpers::find_y_axis_table( - &client, - &schema_ids, - &cube_query, - "ProductionContext", - ) - .await?; - - // Find X-axis table - let x_axis_table_id = super::helpers::find_x_axis_table( - &client, - &schema_ids, - &cube_query, - "ProductionContext", - ) - .await?; - - // Fetch workflow for color extraction - let workflow = super::helpers::fetch_workflow(&client, &workflow_id).await?; - - // Extract per-layer color information - let per_layer_colors = super::helpers::extract_per_layer_color_info_from_workflow( - &client, - &schema_ids, - &workflow, - &step_id, - "ProductionContext", - ) - .await?; - - // Also extract legacy color_infos for backwards compatibility - let color_infos = super::helpers::extract_color_info_from_workflow( - &client, - &schema_ids, - &workflow, - &step_id, - "ProductionContext", - ) - .await?; - - // Extract page factors from operator settings - let page_factors = crate::tercen::extract_page_factors(operator_settings.as_ref()); - - // Extract point size from workflow step (use already fetched workflow) - let point_size = match crate::tercen::extract_point_size_from_step(&workflow, &step_id) { - Ok(ps) => ps, - Err(e) => { - eprintln!("[ProductionContext] Failed to extract point_size: {}", e); - None - } - }; - - // Extract chart kind from workflow step (use already fetched workflow) - let chart_kind = match crate::tercen::extract_chart_kind_from_step(&workflow, &step_id) { - Ok(ck) => { - println!("[ProductionContext] Chart kind: {:?}", ck); - ck - } - Err(e) => { - eprintln!("[ProductionContext] Failed to extract chart_kind: {}", e); - ChartKind::Point - } - }; - - // Extract crosstab dimensions from workflow step model (use already fetched workflow) - let crosstab_dimensions = super::helpers::extract_crosstab_dimensions(&workflow, &step_id); - if let Some((w, h)) = crosstab_dimensions { - println!( - "[ProductionContext] Crosstab dimensions: {}×{} pixels", - w, h - ); - } - - // Extract axis transforms from CubeAxisQuery - let (y_transform, x_transform) = - super::helpers::extract_transforms_from_cube_query(&cube_query); - - // Extract layer palette name from GlTask (preferred) or fallback to crosstab palette - let layer_palette_name = - match super::helpers::extract_layer_palette_from_gltask(&client, &workflow, &step_id) - .await - { - Ok(Some(name)) => { - println!("[ProductionContext] Layer palette (from GlTask): {}", name); - Some(name) - } - Ok(None) | Err(_) => { - // Fallback to crosstab palette extraction - let name = crate::tercen::extract_crosstab_palette_name(&workflow, &step_id); - if let Some(ref n) = name { - println!("[ProductionContext] Layer palette (from crosstab): {}", n); - } - name - } - }; - - // Extract Y-axis factor names per layer (for legend entries) - let layer_y_factor_names = - super::helpers::extract_layer_y_factor_names(&workflow, &step_id); - if !layer_y_factor_names.is_empty() { - println!( - "[ProductionContext] Layer Y-factor names: {:?}", - layer_y_factor_names - ); - } - - // Build ContextBase using the builder - let base = ContextBaseBuilder::new() - .client(client) - .cube_query(cube_query) - .schema_ids(schema_ids) - .workflow_id(workflow_id) - .step_id(step_id) - .project_id(project_id) - .namespace(namespace) - .operator_settings(operator_settings) - .color_infos(color_infos) - .per_layer_colors(Some(per_layer_colors)) - .page_factors(page_factors) - .y_axis_table_id(y_axis_table_id) - .x_axis_table_id(x_axis_table_id) - .point_size(point_size) - .chart_kind(chart_kind) - .crosstab_dimensions(crosstab_dimensions) - .y_transform(y_transform) - .x_transform(x_transform) - .layer_palette_name(layer_palette_name) - .layer_y_factor_names(layer_y_factor_names) - .build()?; - - Ok(Self(base)) - } - - /// Fetch schema_ids from the CubeQueryTask (canonical source) - /// - /// Path: workflow → step → model.taskId → CubeQueryTask.schema_ids - /// - /// The CubeQueryTask must complete before the operator runs, so this - /// should always succeed. No retry logic - if missing, it's a bug. - async fn fetch_schema_ids_from_cube_query_task( - client: &TercenClient, - workflow_id: &str, - step_id: &str, - ) -> Result, Box> { - use crate::tercen::client::proto::{e_step, e_task, GetRequest}; - - // Fetch workflow - let mut workflow_service = client.workflow_service()?; - let request = tonic::Request::new(GetRequest { - id: workflow_id.to_string(), - ..Default::default() - }); - let response = workflow_service.get(request).await?; - let e_workflow = response.into_inner(); - - let workflow = e_workflow - .object - .as_ref() - .map(|obj| match obj { - crate::tercen::client::proto::e_workflow::Object::Workflow(wf) => wf, - }) - .ok_or("EWorkflow has no workflow object")?; - - // Find the step by step_id - let step = workflow - .steps - .iter() - .find(|s| { - s.object.as_ref().is_some_and(|obj| match obj { - e_step::Object::Datastep(ds) => ds.id == step_id, - e_step::Object::Crosstabstep(cs) => cs.id == step_id, - _ => false, - }) - }) - .ok_or_else(|| format!("Step {} not found in workflow {}", step_id, workflow_id))?; - - // Get the CubeQueryTask ID from the step's model.taskId - let cube_query_task_id = match step.object.as_ref() { - Some(e_step::Object::Datastep(ds)) => ds.model.as_ref().and_then(|m| { - if !m.task_id.is_empty() { - Some(m.task_id.clone()) - } else { - None - } - }), - Some(e_step::Object::Crosstabstep(cs)) => cs.model.as_ref().and_then(|m| { - if !m.task_id.is_empty() { - Some(m.task_id.clone()) - } else { - None - } - }), - _ => None, - } - .ok_or_else(|| { - format!( - "Step {} has no model.taskId - CubeQueryTask may not have completed", - step_id - ) - })?; - - println!( - "[ProductionContext] CubeQueryTask ID: {}", - cube_query_task_id - ); - - // Fetch the CubeQueryTask - let mut task_service = client.task_service()?; - let request = tonic::Request::new(GetRequest { - id: cube_query_task_id.clone(), - ..Default::default() - }); - let response = task_service.get(request).await?; - let task = response.into_inner(); - - // Extract schema_ids from CubeQueryTask - let schema_ids = match task.object.as_ref() { - Some(e_task::Object::Cubequerytask(cqt)) => cqt.schema_ids.clone(), - _ => { - return Err(format!( - "Task {} is not a CubeQueryTask as expected", - cube_query_task_id - ) - .into()) - } - }; - - if schema_ids.is_empty() { - return Err(format!( - "CubeQueryTask {} has empty schema_ids - this should not happen", - cube_query_task_id - ) - .into()); - } - - println!( - "[ProductionContext] Found {} schema_ids from CubeQueryTask", - schema_ids.len() - ); - - Ok(schema_ids) - } -} - -impl TercenContext for ProductionContext { - fn cube_query(&self) -> &CubeQuery { - self.0.cube_query() - } - - fn schema_ids(&self) -> &[String] { - self.0.schema_ids() - } - - fn workflow_id(&self) -> &str { - self.0.workflow_id() - } - - fn step_id(&self) -> &str { - self.0.step_id() - } - - fn project_id(&self) -> &str { - self.0.project_id() - } - - fn namespace(&self) -> &str { - self.0.namespace() - } - - fn operator_settings(&self) -> Option<&OperatorSettings> { - self.0.operator_settings() - } - - fn color_infos(&self) -> &[ColorInfo] { - self.0.color_infos() - } - - fn page_factors(&self) -> &[String] { - self.0.page_factors() - } - - fn y_axis_table_id(&self) -> Option<&str> { - self.0.y_axis_table_id() - } - - fn x_axis_table_id(&self) -> Option<&str> { - self.0.x_axis_table_id() - } - - fn point_size(&self) -> Option { - self.0.point_size() - } - - fn chart_kind(&self) -> ChartKind { - self.0.chart_kind() - } - - fn crosstab_dimensions(&self) -> Option<(i32, i32)> { - self.0.crosstab_dimensions() - } - - fn y_transform(&self) -> Option<&str> { - self.0.y_transform() - } - - fn x_transform(&self) -> Option<&str> { - self.0.x_transform() - } - - fn layer_palette_name(&self) -> Option<&str> { - self.0.layer_palette_name() - } - - fn per_layer_colors(&self) -> Option<&crate::tercen::PerLayerColorConfig> { - self.0.per_layer_colors() - } - - fn layer_y_factor_names(&self) -> &[String] { - self.0.layer_y_factor_names() - } - - fn client(&self) -> &Arc { - self.0.client() - } -} diff --git a/src/tercen/error.rs b/src/tercen/error.rs deleted file mode 100644 index 9fb2fca..0000000 --- a/src/tercen/error.rs +++ /dev/null @@ -1,50 +0,0 @@ -use thiserror::Error; - -/// Errors that can occur when interacting with Tercen services -#[derive(Debug, Error)] -pub enum TercenError { - /// gRPC transport or protocol error - #[error("gRPC error: {0}")] - Grpc(Box), - - /// gRPC transport error - #[error("Transport error: {0}")] - Transport(Box), - - /// Authentication error - #[error("Authentication error: {0}")] - Auth(String), - - /// Configuration error (missing env vars, invalid URIs, etc.) - #[error("Configuration error: {0}")] - Config(String), - - /// Connection error - #[error("Connection error: {0}")] - Connection(String), - - /// Data processing or validation error - #[error("Data error: {0}")] - Data(String), - - /// Generic error - #[allow(dead_code)] - #[error("{0}")] - Other(String), -} - -/// Type alias for Results using TercenError -pub type Result = std::result::Result; - -// Manual From implementations for boxed error types -impl From for TercenError { - fn from(err: tonic::Status) -> Self { - TercenError::Grpc(Box::new(err)) - } -} - -impl From for TercenError { - fn from(err: tonic::transport::Error) -> Self { - TercenError::Transport(Box::new(err)) - } -} diff --git a/src/tercen/facets.rs b/src/tercen/facets.rs deleted file mode 100644 index aa51c8e..0000000 --- a/src/tercen/facets.rs +++ /dev/null @@ -1,327 +0,0 @@ -//! Facet metadata loading and management -//! -//! This module handles loading and parsing facet tables (column.csv and row.csv) -//! which define the structure of faceted plots. - -use super::error::Result; -use super::table::TableStreamer; -use super::tson_convert::tson_to_dataframe; -use super::TercenClient; -use crate::ggrs_integration::stream_generator::extract_column_names_from_schema; -use std::collections::HashMap; - -/// Represents a single facet group -#[derive(Debug, Clone)] -pub struct FacetGroup { - /// Index of this facet group (0-based, for GGRS) - pub index: usize, - /// Original index from the table (before filtering/remapping) - pub original_index: usize, - /// Label for display (combination of all column values) - pub label: String, - /// Raw column values for this facet - pub values: HashMap, -} - -/// Collection of facet groups for one dimension (column or row) -#[derive(Debug, Clone)] -pub struct FacetMetadata { - /// All facet groups in order - pub groups: Vec, - /// Column names in the facet table - pub column_names: Vec, -} - -impl FacetMetadata { - /// Load facet metadata from a Tercen table - pub async fn load(client: &TercenClient, table_id: &str) -> Result { - let streamer = TableStreamer::new(client); - - // Get row count from schema - let schema = streamer.get_schema(table_id).await?; - - use crate::tercen::client::proto::e_schema; - let n_rows = match &schema.object { - Some(e_schema::Object::Cubequerytableschema(cqts)) => { - eprintln!("DEBUG: CubeQueryTableSchema nRows={}", cqts.n_rows); - cqts.n_rows as usize - } - Some(e_schema::Object::Tableschema(ts)) => { - eprintln!("DEBUG: TableSchema nRows={}", ts.n_rows); - ts.n_rows as usize - } - Some(e_schema::Object::Computedtableschema(cts)) => { - eprintln!("DEBUG: ComputedTableSchema nRows={}", cts.n_rows); - cts.n_rows as usize - } - other => { - eprintln!("DEBUG: Unknown schema type: {:?}", other); - 0 - } - }; - - if n_rows == 0 { - return Ok(FacetMetadata { - groups: vec![], - column_names: vec![], - }); - } - - // Get column names from schema first - let column_names = match extract_column_names_from_schema(&schema) { - Ok(cols) => cols, - Err(e) => { - eprintln!("DEBUG: Failed to extract column names: {}", e); - vec![] - } - }; - eprintln!("DEBUG: Facet table has columns: {:?}", column_names); - - // Stream TSON data to get actual facet values - // Request specific columns (not None) to ensure data is materialized - let columns_to_fetch = if column_names.is_empty() { - None - } else { - Some(column_names.clone()) - }; - - let tson_data = streamer - .stream_tson(table_id, columns_to_fetch, 0, n_rows as i64) - .await?; - - // If no data, return placeholder labels - if tson_data.is_empty() || tson_data.len() < 30 { - eprintln!( - "DEBUG: Facet table has no data ({} bytes), using index labels", - tson_data.len() - ); - let groups: Vec = (0..n_rows) - .map(|index| FacetGroup { - index, - original_index: index, - label: format!("{}", index), - values: Default::default(), - }) - .collect(); - - return Ok(FacetMetadata { - groups, - column_names: vec![], - }); - } - - // Parse TSON to DataFrame - let df = tson_to_dataframe(&tson_data)?; - eprintln!( - "DEBUG: Parsed facet table: {} rows × {} columns", - df.nrow(), - df.ncol() - ); - - let column_names: Vec = df.columns().iter().map(|s| s.to_string()).collect(); - eprintln!("DEBUG: Facet columns: {:?}", column_names); - - // Create groups from parsed data - let mut groups = Vec::new(); - for index in 0..df.nrow() { - let mut values = HashMap::new(); - let mut label_parts = Vec::new(); - - // Collect all column values for this row - for col_name in &column_names { - if let Ok(value) = df.get_value(index, col_name) { - let value_str = value.as_string(); - values.insert(col_name.clone(), value_str.clone()); - label_parts.push(value_str); - } - } - - // Join all values with ", " to create label - let label = if label_parts.is_empty() { - format!("{}", index) - } else { - label_parts.join(", ") - }; - - groups.push(FacetGroup { - index, - original_index: index, - label, - values, - }); - } - - eprintln!("DEBUG: Created {} facet groups", groups.len()); - for (i, group) in groups.iter().enumerate() { - eprintln!("DEBUG: Facet[{}] label='{}'", i, group.label); - } - - Ok(FacetMetadata { - groups, - column_names, - }) - } - - /// Load facet metadata with filtering by page values - /// - /// # Arguments - /// * `filter` - Map of column names to values (e.g., {"Gender": "male"}) - /// - /// Only facet groups matching ALL filter criteria will be loaded. - /// Note: Indices are NOT remapped - they keep their original values from the table. - pub async fn load_with_filter( - client: &TercenClient, - table_id: &str, - filter: &HashMap, - ) -> Result { - // Load all facets first - let mut metadata = Self::load(client, table_id).await?; - - let original_count = metadata.groups.len(); - - // Filter groups to only those matching all criteria - metadata.groups.retain(|group| { - filter.iter().all(|(col_name, expected_value)| { - group - .values - .get(col_name) - .map(|actual_value| actual_value == expected_value) - .unwrap_or(false) - }) - }); - - eprintln!( - "DEBUG: Filtered facets from {} to {} groups", - original_count, - metadata.groups.len() - ); - - // CRITICAL: Remap facet indices to 0-based for GGRS grid positioning - // GGRS expects facet groups with indices 0..N for rendering grid - // The original indices are preserved in group.original_index for data matching - // - // Data flow: - // 1. Operator loads only male facets (original_index=12-23, index=0-11) - // 2. Operator streams raw data with .ri=12-23 (no filtering/remapping) - // 3. GGRS uses original_index to route data[.ri=12] → panel[index=0] - for (new_idx, group) in metadata.groups.iter_mut().enumerate() { - eprintln!( - " Remapping facet {} from original_index {} to index {}", - group.label, group.original_index, new_idx - ); - group.index = new_idx; - // original_index is NOT changed - it keeps the value from the full table - } - - Ok(metadata) - } - - /// Get number of facet groups - pub fn len(&self) -> usize { - self.groups.len() - } - - /// Check if empty - pub fn is_empty(&self) -> bool { - self.groups.is_empty() - } - - /// Get a specific facet group by index - pub fn get(&self, index: usize) -> Option<&FacetGroup> { - self.groups.get(index) - } -} - -/// Complete faceting information for a plot -#[derive(Debug, Clone)] -pub struct FacetInfo { - /// Column facet metadata - pub col_facets: FacetMetadata, - /// Row facet metadata - pub row_facets: FacetMetadata, -} - -impl FacetInfo { - /// Load both column and row facet metadata - pub async fn load( - client: &TercenClient, - col_table_id: &str, - row_table_id: &str, - ) -> Result { - // Load both facet tables in parallel - let (col_result, row_result) = tokio::join!( - FacetMetadata::load(client, col_table_id), - FacetMetadata::load(client, row_table_id) - ); - - let row_facets = row_result?; - - // Follow Tercen's natural ordering (smallest → largest, top to bottom) - // No reversal - preserve the table order for correct axis range mapping - // Reversal was previously done to match ggplot2 convention but broke axis range lookups - - Ok(FacetInfo { - col_facets: col_result?, - row_facets, - }) - } - - /// Load facet metadata with filtering on row facets - /// - /// # Arguments - /// * `row_filter` - Filter to apply to row facets (e.g., {"Gender": "male"}) - /// - /// Column facets are loaded normally, row facets are filtered. - pub async fn load_with_filter( - client: &TercenClient, - col_table_id: &str, - row_table_id: &str, - row_filter: &HashMap, - ) -> Result { - // Load column facets normally, row facets with filter - let (col_result, row_result) = tokio::join!( - FacetMetadata::load(client, col_table_id), - FacetMetadata::load_with_filter(client, row_table_id, row_filter) - ); - - let row_facets = row_result?; - - // Follow Tercen's natural ordering (smallest → largest, top to bottom) - // No reversal - preserve the table order for correct axis range mapping - - Ok(FacetInfo { - col_facets: col_result?, - row_facets, - }) - } - - /// Get total number of column facets - pub fn n_col_facets(&self) -> usize { - if self.col_facets.is_empty() { - 1 // No faceting = 1 facet - } else { - self.col_facets.len() - } - } - - /// Get total number of row facets - pub fn n_row_facets(&self) -> usize { - if self.row_facets.is_empty() { - 1 // No faceting = 1 facet - } else { - self.row_facets.len() - } - } - - /// Get total number of facet cells (col × row) - pub fn total_facets(&self) -> usize { - self.n_col_facets() * self.n_row_facets() - } - - /// Check if plot has any faceting - pub fn has_faceting(&self) -> bool { - !self.col_facets.is_empty() || !self.row_facets.is_empty() - } -} - -// Tests removed - CSV parsing replaced with TSON format diff --git a/src/tercen/logger.rs b/src/tercen/logger.rs deleted file mode 100644 index db32472..0000000 --- a/src/tercen/logger.rs +++ /dev/null @@ -1,54 +0,0 @@ -use super::client::proto::{e_event, EEvent, TaskLogEvent, TaskProgressEvent}; -use super::client::TercenClient; -use super::error::Result; - -/// Logger for sending log messages and progress updates to Tercen -pub struct TercenLogger<'a> { - client: &'a TercenClient, - task_id: String, -} - -impl<'a> TercenLogger<'a> { - /// Create a new logger for a specific task - pub fn new(client: &'a TercenClient, task_id: String) -> Self { - TercenLogger { client, task_id } - } - - /// Send a log message to Tercen - pub async fn log(&self, message: impl Into) -> Result<()> { - let log_event = EEvent { - object: Some(e_event::Object::Tasklogevent(TaskLogEvent { - task_id: self.task_id.clone(), - message: message.into(), - ..Default::default() - })), - }; - - self.send_event(log_event).await - } - - /// Send a progress update to Tercen - #[allow(dead_code)] - pub async fn progress(&self, _percent: f64, message: impl Into) -> Result<()> { - let progress_event = EEvent { - object: Some(e_event::Object::Taskprogressevent(TaskProgressEvent { - task_id: self.task_id.clone(), - message: message.into(), - // Note: Progress is stored in the message field in TaskProgressEvent - // The percent parameter is included here for API convenience - ..Default::default() - })), - }; - - self.send_event(progress_event).await - } - - /// Send an event to Tercen's EventService - async fn send_event(&self, event: EEvent) -> Result<()> { - let mut event_service = self.client.event_service()?; - let request = tonic::Request::new(event); - - event_service.create(request).await?; - Ok(()) - } -} diff --git a/src/tercen/mod.rs b/src/tercen/mod.rs deleted file mode 100644 index 7bdd0af..0000000 --- a/src/tercen/mod.rs +++ /dev/null @@ -1,71 +0,0 @@ -//! Tercen gRPC client module -//! -//! This module contains all Tercen-specific code and will be extracted -//! into a separate `tercen-rust` crate in the future. -//! -//! Structure: -//! - `client.rs`: Core gRPC client and authentication -//! - `services/`: Service wrappers (task, table, file) -//! - `types.rs`: Common types and conversions -//! - `error.rs`: Error types - -// Module declarations -pub mod error; - -// Client module (Phase 2) -pub mod client; -pub mod logger; - -// Data modules (Phase 4) -pub mod table; -pub mod tson_convert; - -// Facet modules (Phase 5) -pub mod facets; - -// Result upload modules (Phase 8) -pub mod result; -pub mod table_convert; - -// Operator properties (Phase 9 - Version 0.0.2) -pub mod properties; - -// Operator property definitions from operator.json -pub mod operator_properties; - -// Color handling (Version 0.0.3) -pub mod color_processor; -pub mod colors; -pub mod palettes; - -// Page handling (Version 0.0.4) -pub mod pages; - -// Context abstraction (Version 0.0.4) -pub mod context; - -// Re-exports for convenience -pub use client::TercenClient; -pub use color_processor::{add_color_columns, add_layer_colors, add_mixed_layer_colors}; -pub use colors::{ - extract_chart_kind_from_step, extract_color_info_from_step, extract_crosstab_palette_name, - extract_per_layer_color_info, extract_point_size_from_step, interpolate_color, parse_palette, - CategoryColorMap, ChartKind, ColorInfo, ColorMapping, ColorPalette, ColorStop, - LayerColorConfig, PerLayerColorConfig, -}; -pub use context::{DevContext, ProductionContext, TercenContext}; -#[allow(unused_imports)] -pub use error::{Result, TercenError}; -#[allow(unused_imports)] -pub use facets::{FacetGroup, FacetInfo, FacetMetadata}; -pub use logger::TercenLogger; -pub use operator_properties::{OperatorPropertyReader, PropertyRegistry}; -pub use pages::{extract_page_factors, extract_page_values, PageValue}; -pub use palettes::{ - categorical_color_from_level, get_palette_colors, PaletteRegistry, PALETTE_REGISTRY, -}; -pub use properties::{PlotDimension, PropertyReader}; -pub use result::PlotResult; -#[allow(unused_imports)] -pub use table::{new_schema_cache, SchemaCache, TableStreamer}; -pub use tson_convert::tson_to_dataframe; diff --git a/src/tercen/pages.rs b/src/tercen/pages.rs deleted file mode 100644 index d030405..0000000 --- a/src/tercen/pages.rs +++ /dev/null @@ -1,201 +0,0 @@ -//! Page factor extraction from operator spec -//! -//! Pages allow splitting a plot into multiple separate plots based on factor values. -//! Unlike facets (which create a grid of panels in one plot), pages create separate -//! output files - one per unique page value. -//! -//! ## How it works: -//! 1. Page factors are defined in the operator's input spec (MetaFactor with name="Page") -//! 2. Page factor columns are stored in the row facet table -//! 3. We extract unique values from the page columns -//! 4. Generate one plot per unique page value combination - -use crate::memprof; -use crate::tercen::client::proto::{e_meta_factor, e_operator_input_spec, OperatorSettings}; -use crate::tercen::{tson_to_dataframe, TableStreamer, TercenClient}; -use std::collections::HashMap; - -/// Extract page factor names from operator settings -/// -/// Returns a vector of column names that should be used for pagination. -/// Empty vector means no pages (generate single plot). -/// -/// # Algorithm -/// 1. Get operatorRef.operatorSpec from operator settings -/// 2. Get inputSpecs[0].metaFactors[] (array of MetaFactor) -/// 3. Find MetaFactor where name matches "Page" or "page" -/// 4. Extract factors[].name from that MetaFactor -pub fn extract_page_factors(operator_settings: Option<&OperatorSettings>) -> Vec { - let operator_settings = match operator_settings { - Some(os) => os, - None => return Vec::new(), - }; - - // Get operator_ref - let operator_ref = match operator_settings.operator_ref.as_ref() { - Some(or_ref) => or_ref, - None => return Vec::new(), - }; - - // Get operator_spec - let operator_spec = match operator_ref.operator_spec.as_ref() { - Some(spec) => spec, - None => return Vec::new(), - }; - - // Get first input spec (crosstab spec) - if operator_spec.input_specs.is_empty() { - return Vec::new(); - } - - let first_input_spec = &operator_spec.input_specs[0]; - - // Extract CrosstabSpec from EOperatorInputSpec - let crosstab_spec = match first_input_spec.object.as_ref() { - Some(e_operator_input_spec::Object::Crosstabspec(cs)) => cs, - _ => return Vec::new(), - }; - - // Find MetaFactor with name="Page" or "page" - for e_meta_factor in &crosstab_spec.meta_factors { - let meta_factor = match e_meta_factor.object.as_ref() { - Some(e_meta_factor::Object::Metafactor(mf)) => mf, - _ => continue, - }; - - // Check if this is the "Page" metafactor - if meta_factor.name.to_lowercase() == "page" { - // Extract factor names - return meta_factor.factors.iter().map(|f| f.name.clone()).collect(); - } - } - - Vec::new() -} - -/// Page value - represents one unique combination of page factor values -/// -/// For single page factor (e.g., Country): -/// PageValue { values: {"Country": "USA"}, label: "USA" } -/// -/// For multiple page factors (e.g., Country + Year): -/// PageValue { values: {"Country": "USA", "Year": "2020"}, label: "USA_2020" } -#[derive(Debug, Clone)] -pub struct PageValue { - /// Map of page factor names to their values - pub values: HashMap, - /// Human-readable label for this page (used in filename) - pub label: String, -} - -/// Extract unique page values from row facet table -/// -/// Returns a vector of PageValue objects representing each unique page. -/// If page_factors is empty, returns a single PageValue with empty values (no pagination). -/// -/// # Arguments -/// * `client` - Tercen client -/// * `row_table_id` - Row facet table ID -/// * `page_factors` - Page factor column names (from extract_page_factors) -/// -/// # Algorithm -/// 1. Stream entire row facet table (it's small - just unique facet combinations) -/// 2. Parse to Polars DataFrame -/// 3. Extract unique combinations of page factor values -/// 4. Build PageValue for each unique combination -pub async fn extract_page_values( - client: &TercenClient, - row_table_id: &str, - page_factors: &[String], -) -> Result, Box> { - // If no page factors, return single "page" (no pagination) - if page_factors.is_empty() { - return Ok(vec![PageValue { - values: HashMap::new(), - label: "all".to_string(), - }]); - } - - let m0 = memprof::checkpoint_return("extract_page_values: START"); - - // Stream row facet table - let streamer = TableStreamer::new(client); - - // Get schema to know row count - let schema = streamer.get_schema(row_table_id).await?; - let m1 = memprof::delta("extract_page_values: After get_schema", m0); - - use crate::tercen::client::proto::e_schema; - let n_rows = match &schema.object { - Some(e_schema::Object::Cubequerytableschema(cqts)) => cqts.n_rows as usize, - Some(e_schema::Object::Tableschema(ts)) => ts.n_rows as usize, - Some(e_schema::Object::Computedtableschema(cts)) => cts.n_rows as usize, - _ => return Err("Unknown schema type for row table".into()), - }; - - println!("Extracting page values from row table ({} rows)...", n_rows); - - // Stream entire table with only page factor columns - let tson_data = streamer - .stream_tson(row_table_id, Some(page_factors.to_vec()), 0, n_rows as i64) - .await?; - let m2 = memprof::delta("extract_page_values: After stream_tson", m1); - eprintln!("DEBUG: TSON data size: {} bytes", tson_data.len()); - - // Parse to DataFrame - let df = tson_to_dataframe(&tson_data)?; - let m3 = memprof::delta("extract_page_values: After tson_to_dataframe", m2); - - println!(" Found {} total rows", df.nrow()); - - // Extract unique combinations using simple HashSet (avoids Polars lazy init overhead) - // For small page tables (typically < 100 rows), this is much more efficient - use std::collections::HashSet; - - let polars_df = df.inner(); - let mut seen: HashSet> = HashSet::new(); - let mut page_values = Vec::new(); - - for row_idx in 0..polars_df.height() { - // Build key from all page factor values for this row - let mut key = Vec::with_capacity(page_factors.len()); - let mut values = HashMap::new(); - let mut label_parts = Vec::new(); - - for factor_name in page_factors { - let value = polars_df - .column(factor_name)? - .get(row_idx)? - .to_string() - .trim_matches('"') - .to_string(); - key.push(value.clone()); - values.insert(factor_name.clone(), value.clone()); - label_parts.push(value); - } - - // Only add if we haven't seen this combination before - if seen.insert(key) { - let label = label_parts.join("_"); - page_values.push(PageValue { values, label }); - } - } - - let _ = memprof::delta("extract_page_values: After unique extraction", m3); - - println!(" Found {} unique page combinations", page_values.len()); - - Ok(page_values) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_extract_page_factors_none() { - // No operator settings - let result = extract_page_factors(None); - assert!(result.is_empty()); - } -} diff --git a/src/tercen/palettes.rs b/src/tercen/palettes.rs deleted file mode 100644 index 0d1a505..0000000 --- a/src/tercen/palettes.rs +++ /dev/null @@ -1,337 +0,0 @@ -//! Palette registry for loading and accessing color palettes -//! -//! Loads palettes from palettes.json (embedded at compile time) and provides -//! access by name. Matches the R plot_operator's palette definitions for -//! consistency with Tercen's ecosystem. -//! -//! Palette types: -//! - `categorical`: Discrete colors for distinct categories (colors repeat after exhausting the list) -//! - `sequential`: Gradient from low to high values -//! - `diverging`: Gradient with a neutral midpoint (e.g., for +/- deviations) - -use once_cell::sync::Lazy; -use serde::Deserialize; -use std::collections::HashMap; - -/// Embedded palettes.json content (from R plot_operator) -const PALETTES_JSON: &str = include_str!("../../palettes.json"); - -/// Global palette registry, initialized lazily on first access -pub static PALETTE_REGISTRY: Lazy = Lazy::new(|| { - PaletteRegistry::from_json(PALETTES_JSON).unwrap_or_else(|e| { - eprintln!("ERROR: Failed to load palettes.json: {}", e); - PaletteRegistry::default() - }) -}); - -/// Default categorical palette name (Tercen's default) -pub const DEFAULT_CATEGORICAL_PALETTE: &str = "Palette-1"; - -/// Default sequential palette name -pub const DEFAULT_SEQUENTIAL_PALETTE: &str = "Viridis"; - -/// Default diverging palette name -pub const DEFAULT_DIVERGING_PALETTE: &str = "RdBu"; - -/// Palette type as defined in palettes.json -#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)] -#[serde(rename_all = "lowercase")] -pub enum PaletteType { - Categorical, - Sequential, - Diverging, -} - -/// A single palette definition from palettes.json -#[derive(Debug, Clone, Deserialize)] -pub struct PaletteDefinition { - pub name: String, - #[serde(rename = "type")] - pub palette_type: PaletteType, - pub colors: Vec, -} - -impl PaletteDefinition { - /// Get a color by index (wraps around for categorical palettes) - pub fn get_color(&self, index: usize) -> [u8; 3] { - if self.colors.is_empty() { - return [128, 128, 128]; // Gray fallback - } - let idx = index % self.colors.len(); - parse_hex_color(&self.colors[idx]).unwrap_or([128, 128, 128]) - } - - /// Get all colors as RGB arrays - pub fn get_colors_rgb(&self) -> Vec<[u8; 3]> { - self.colors - .iter() - .filter_map(|hex| parse_hex_color(hex)) - .collect() - } - - /// Get the number of colors in this palette - pub fn len(&self) -> usize { - self.colors.len() - } - - /// Check if the palette is empty - pub fn is_empty(&self) -> bool { - self.colors.is_empty() - } - - /// Interpolate a color from the palette at position t ∈ [0, 1] - /// - /// t=0 returns the first color, t=1 returns the last color. - /// Values in between are linearly interpolated. - pub fn interpolate(&self, t: f64) -> [u8; 3] { - if self.colors.is_empty() { - return [128, 128, 128]; // Gray fallback - } - - let t = t.clamp(0.0, 1.0); - let n = self.colors.len(); - - if n == 1 { - return self.get_color(0); - } - - // Map t to position in the color array - let pos = t * (n - 1) as f64; - let idx_low = pos.floor() as usize; - let idx_high = (idx_low + 1).min(n - 1); - let frac = pos - idx_low as f64; - - let color_low = self.get_color(idx_low); - let color_high = self.get_color(idx_high); - - // Linear interpolation between the two colors - [ - (color_low[0] as f64 * (1.0 - frac) + color_high[0] as f64 * frac) as u8, - (color_low[1] as f64 * (1.0 - frac) + color_high[1] as f64 * frac) as u8, - (color_low[2] as f64 * (1.0 - frac) + color_high[2] as f64 * frac) as u8, - ] - } -} - -/// Registry of all available palettes -#[derive(Debug, Clone, Default)] -pub struct PaletteRegistry { - /// All palettes by name (lowercase keys for case-insensitive lookup) - palettes: HashMap, - /// Categorical palette names (for listing) - categorical_names: Vec, - /// Sequential palette names (for listing) - sequential_names: Vec, - /// Diverging palette names (for listing) - diverging_names: Vec, -} - -impl PaletteRegistry { - /// Load palettes from JSON string - pub fn from_json(json: &str) -> Result { - let definitions: Vec = serde_json::from_str(json) - .map_err(|e| format!("Failed to parse palettes JSON: {}", e))?; - - let mut registry = Self::default(); - - for def in definitions { - let name = def.name.clone(); - match def.palette_type { - PaletteType::Categorical => registry.categorical_names.push(name.clone()), - PaletteType::Sequential => registry.sequential_names.push(name.clone()), - PaletteType::Diverging => registry.diverging_names.push(name.clone()), - } - // Store with lowercase key for case-insensitive lookup - registry.palettes.insert(name.to_lowercase(), def); - } - - eprintln!( - "DEBUG PaletteRegistry: Loaded {} palettes ({} categorical, {} sequential, {} diverging)", - registry.palettes.len(), - registry.categorical_names.len(), - registry.sequential_names.len(), - registry.diverging_names.len() - ); - - Ok(registry) - } - - /// Get a palette by name (case-insensitive) - pub fn get(&self, name: &str) -> Option<&PaletteDefinition> { - self.palettes.get(&name.to_lowercase()) - } - - /// Get the default categorical palette - pub fn default_categorical(&self) -> Option<&PaletteDefinition> { - self.get(DEFAULT_CATEGORICAL_PALETTE) - } - - /// Get the default sequential palette - pub fn default_sequential(&self) -> Option<&PaletteDefinition> { - self.get(DEFAULT_SEQUENTIAL_PALETTE) - } - - /// Get the default diverging palette - pub fn default_diverging(&self) -> Option<&PaletteDefinition> { - self.get(DEFAULT_DIVERGING_PALETTE) - } - - /// List all categorical palette names - pub fn categorical_palettes(&self) -> &[String] { - &self.categorical_names - } - - /// List all sequential palette names - pub fn sequential_palettes(&self) -> &[String] { - &self.sequential_names - } - - /// List all diverging palette names - pub fn diverging_palettes(&self) -> &[String] { - &self.diverging_names - } -} - -/// Parse a hex color string to RGB array -/// -/// Supports formats: -/// - `#RRGGBB` (6 hex digits) -/// - `#RRGGBBAA` (8 hex digits, alpha ignored) -/// - `RRGGBB` (without #) -/// - `RRGGBBAA` (without #) -fn parse_hex_color(hex: &str) -> Option<[u8; 3]> { - let hex = hex.trim_start_matches('#'); - - // Handle 6-digit (RGB) or 8-digit (RGBA) hex - if hex.len() != 6 && hex.len() != 8 { - eprintln!("WARN: Invalid hex color length '{}': {}", hex, hex.len()); - return None; - } - - let r = u8::from_str_radix(&hex[0..2], 16).ok()?; - let g = u8::from_str_radix(&hex[2..4], 16).ok()?; - let b = u8::from_str_radix(&hex[4..6], 16).ok()?; - - Some([r, g, b]) -} - -/// Get a categorical color from the default palette by level index -/// -/// This is the main entry point for categorical coloring. Uses "Palette-1" -/// (Tercen's default) and wraps around if the index exceeds the palette size. -pub fn categorical_color_from_level(level: i32) -> [u8; 3] { - let palette = PALETTE_REGISTRY - .default_categorical() - .expect("Default categorical palette 'Palette-1' not found"); - - palette.get_color(level as usize) -} - -/// Get a categorical color from a named palette by level index -/// -/// Falls back to the default palette if the named palette is not found. -pub fn categorical_color_from_palette(palette_name: &str, level: i32) -> [u8; 3] { - let palette = PALETTE_REGISTRY - .get(palette_name) - .or_else(|| PALETTE_REGISTRY.default_categorical()) - .expect("No categorical palette available"); - - palette.get_color(level as usize) -} - -/// Get all colors from a palette as a Vec of RGB arrays -/// -/// Falls back to the default categorical palette if the named palette is not found. -pub fn get_palette_colors(palette_name: &str) -> Vec<[u8; 3]> { - let palette = PALETTE_REGISTRY - .get(palette_name) - .or_else(|| PALETTE_REGISTRY.default_categorical()) - .expect("No palette available"); - - palette.get_colors_rgb() -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_parse_hex_color() { - // 6-digit hex - assert_eq!(parse_hex_color("#FF0000"), Some([255, 0, 0])); - assert_eq!(parse_hex_color("#00FF00"), Some([0, 255, 0])); - assert_eq!(parse_hex_color("#0000FF"), Some([0, 0, 255])); - assert_eq!(parse_hex_color("#1F78B4"), Some([31, 120, 180])); - - // Without # - assert_eq!(parse_hex_color("FF0000"), Some([255, 0, 0])); - - // 8-digit hex (with alpha, ignored) - assert_eq!(parse_hex_color("#440154FF"), Some([68, 1, 84])); - assert_eq!(parse_hex_color("440154FF"), Some([68, 1, 84])); - - // Invalid - assert_eq!(parse_hex_color("#FFF"), None); // Too short - assert_eq!(parse_hex_color("GGGGGG"), None); // Invalid hex - } - - #[test] - fn test_palette_registry_loads() { - // Should load without error - let registry = &*PALETTE_REGISTRY; - - // Should have palettes - assert!(!registry.palettes.is_empty()); - - // Should have Palette-1 - let palette1 = registry.get("Palette-1"); - assert!(palette1.is_some()); - let palette1 = palette1.unwrap(); - assert_eq!(palette1.palette_type, PaletteType::Categorical); - assert!(!palette1.colors.is_empty()); - - // First color of Palette-1 should be #1F78B4 (blue) - assert_eq!(palette1.get_color(0), [31, 120, 180]); - } - - #[test] - fn test_categorical_color_from_level() { - // First few colors from Palette-1 - assert_eq!(categorical_color_from_level(0), [31, 120, 180]); // #1F78B4 - assert_eq!(categorical_color_from_level(1), [227, 26, 28]); // #E31A1C - assert_eq!(categorical_color_from_level(2), [51, 160, 44]); // #33A02C - } - - #[test] - fn test_palette_color_wrapping() { - let palette = PALETTE_REGISTRY.get("Palette-1").unwrap(); - let len = palette.len(); - - // Color at index 0 should equal color at index len - assert_eq!(palette.get_color(0), palette.get_color(len)); - assert_eq!(palette.get_color(1), palette.get_color(len + 1)); - } - - #[test] - fn test_palette_types() { - let registry = &*PALETTE_REGISTRY; - - // Check categorical palettes - assert!(registry - .categorical_palettes() - .contains(&"Palette-1".to_string())); - assert!(registry - .categorical_palettes() - .contains(&"Set1".to_string())); - - // Check sequential palettes - assert!(registry - .sequential_palettes() - .contains(&"Viridis".to_string())); - assert!(registry.sequential_palettes().contains(&"Jet".to_string())); - - // Check diverging palettes - assert!(registry.diverging_palettes().contains(&"RdBu".to_string())); - assert!(registry.diverging_palettes().contains(&"PiYG".to_string())); - } -} diff --git a/src/tercen/properties.rs b/src/tercen/properties.rs deleted file mode 100644 index 48847a8..0000000 --- a/src/tercen/properties.rs +++ /dev/null @@ -1,218 +0,0 @@ -//! Operator property reading and parsing -//! -//! Reads operator properties from OperatorSettings proto and provides -//! type-safe conversions with explicit defaults. - -use super::client::proto::{OperatorSettings, PropertyValue}; - -/// Reads operator properties from Tercen with type-safe conversions -pub struct PropertyReader { - properties: Vec, -} - -impl PropertyReader { - /// Create from OperatorSettings (may be None if no properties set) - pub fn from_operator_settings(settings: Option<&OperatorSettings>) -> Self { - let properties = settings - .and_then(|s| s.operator_ref.as_ref()) - .map(|op_ref| op_ref.property_values.clone()) - .unwrap_or_default(); - - eprintln!( - "DEBUG PropertyReader: Found {} properties", - properties.len() - ); - for prop in &properties { - eprintln!(" DEBUG: '{}' = '{}'", prop.name, prop.value); - } - - Self { properties } - } - - /// Get raw property value (None if not set or empty) - fn get_raw(&self, name: &str) -> Option<&str> { - self.properties - .iter() - .find(|p| p.name == name) - .and_then(|p| { - if p.value.is_empty() { - None // Empty string = not set (Tercen convention) - } else { - Some(p.value.as_str()) - } - }) - } - - /// Get string property with explicit default - pub fn get_string(&self, name: &str, default: &str) -> String { - let value = self.get_raw(name).unwrap_or(default); - eprintln!( - "DEBUG PropertyReader::get_string('{}') -> '{}' (default: '{}')", - name, value, default - ); - value.to_string() - } - - /// Get i32 property with validation and explicit default - pub fn get_i32(&self, name: &str, default: i32) -> i32 { - self.get_raw(name) - .and_then(|v| v.parse::().ok()) - .unwrap_or_else(|| { - if let Some(raw) = self.get_raw(name) { - eprintln!( - "⚠ Invalid integer value for '{}': '{}', using default: {}", - name, raw, default - ); - } - default - }) - } - - /// Get boolean property (handles "true"/"false" strings) with explicit default - pub fn get_bool(&self, name: &str, default: bool) -> bool { - match self.get_raw(name) { - Some("true") => true, - Some("false") => false, - Some(other) => { - eprintln!( - "⚠ Invalid boolean value for '{}': '{}', using default: {}", - name, other, default - ); - default - } - None => default, - } - } -} - -/// Plot dimension - either explicit pixels or "auto" (derived from crosstab) -#[derive(Debug, Clone, PartialEq, Default)] -pub enum PlotDimension { - #[default] - Auto, - Pixels(i32), -} - -impl PlotDimension { - /// Parse from string property value - /// - /// Valid formats: - /// - "auto" or "" (empty) → Auto - /// - "1500" → Pixels(1500) if in valid range [100, 128000] - pub fn from_str(value: &str, default: PlotDimension) -> Self { - let trimmed = value.trim(); - - // Empty or "auto" → Auto - if trimmed.is_empty() || trimmed.eq_ignore_ascii_case("auto") { - return PlotDimension::Auto; - } - - // Parse as integer - match trimmed.parse::() { - Ok(px) if (100..=128000).contains(&px) => PlotDimension::Pixels(px), - Ok(px) => { - eprintln!( - "⚠ Plot dimension {} out of valid range [100-128000], using default: {:?}", - px, default - ); - default - } - Err(_) => { - eprintln!( - "⚠ Invalid plot dimension '{}', using default: {:?}", - trimmed, default - ); - default - } - } - } - - /// Resolve to actual pixels - /// - /// For Auto: derives from grid dimension using formula: - /// - base_size (800px) + (n - 1) * size_per_unit (400px) - /// - No upper limit (grows with grid size) - /// - /// Examples: - /// - 1 unit → 800px - /// - 2 units → 1200px - /// - 3 units → 1600px - /// - 10 units → 4400px - /// - 50 units → 20400px - pub fn resolve(&self, n_units: usize) -> i32 { - match self { - PlotDimension::Pixels(px) => *px, - PlotDimension::Auto => { - const BASE_SIZE: i32 = 800; - const SIZE_PER_UNIT: i32 = 400; - - BASE_SIZE + (n_units.saturating_sub(1) as i32 * SIZE_PER_UNIT) - } - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_property_reader_empty() { - let reader = PropertyReader::from_operator_settings(None); - assert_eq!(reader.get_string("foo", "default"), "default"); - assert_eq!(reader.get_i32("bar", 42), 42); - assert!(reader.get_bool("baz", true)); - } - - #[test] - fn test_plot_dimension_auto() { - let dim = PlotDimension::from_str("auto", PlotDimension::Auto); - assert_eq!(dim, PlotDimension::Auto); - assert_eq!(dim.resolve(1), 800); // 1 facet - assert_eq!(dim.resolve(2), 1200); // 2 facets - assert_eq!(dim.resolve(3), 1600); // 3 facets - assert_eq!(dim.resolve(4), 2000); // 4 facets - assert_eq!(dim.resolve(10), 4400); // 10 facets (no cap) - } - - #[test] - fn test_plot_dimension_empty_string() { - let dim = PlotDimension::from_str("", PlotDimension::Auto); - assert_eq!(dim, PlotDimension::Auto); - } - - #[test] - fn test_plot_dimension_pixels() { - let dim = PlotDimension::from_str("1500", PlotDimension::Auto); - assert_eq!(dim, PlotDimension::Pixels(1500)); - assert_eq!(dim.resolve(10), 1500); // Ignores facet count - } - - #[test] - fn test_plot_dimension_invalid() { - let dim = PlotDimension::from_str("abc", PlotDimension::Auto); - assert_eq!(dim, PlotDimension::Auto); // Falls back to default - } - - #[test] - fn test_plot_dimension_out_of_range() { - // Too small - let dim = PlotDimension::from_str("50", PlotDimension::Auto); - assert_eq!(dim, PlotDimension::Auto); - - // Too large - let dim = PlotDimension::from_str("200000", PlotDimension::Auto); - assert_eq!(dim, PlotDimension::Auto); - } - - #[test] - fn test_plot_dimension_edge_cases() { - // Minimum valid - let dim = PlotDimension::from_str("100", PlotDimension::Auto); - assert_eq!(dim, PlotDimension::Pixels(100)); - - // Maximum valid - let dim = PlotDimension::from_str("128000", PlotDimension::Auto); - assert_eq!(dim, PlotDimension::Pixels(128000)); - } -} diff --git a/src/tercen/result.rs b/src/tercen/result.rs deleted file mode 100644 index 6188c97..0000000 --- a/src/tercen/result.rs +++ /dev/null @@ -1,643 +0,0 @@ -//! Result upload module for saving operator results back to Tercen -//! -//! This module handles the complete flow of saving a generated PNG plot -//! back to Tercen so it can be displayed in the workflow UI. -//! -//! Flow (following Python client pattern): -//! 1. PNG bytes → Base64 string -//! 2. Create DataFrame with .content, filename, mimetype columns -//! 3. Convert DataFrame → Tercen Table (with TSON encoding) -//! 4. Serialize to Sarno-compatible TSON format -//! 5. Upload via TableSchemaService.uploadTable() -//! 6. Create NEW RunComputationTask with fileResultId and original query -//! 7. Submit task via TaskService.create() -//! 8. Run task via TaskService.runTask() -//! 9. Wait for completion via TaskService.waitDone() -//! 10. Server automatically creates computedRelation linking result to step - -use super::client::proto; -use super::client::TercenClient; -use super::table_convert; -use polars::prelude::*; -use std::sync::Arc; - -/// Struct representing a single plot result for multi-page uploads -pub struct PlotResult { - /// Page label (e.g., "female", "male") - pub label: String, - /// Plot image bytes (PNG or SVG) - pub png_buffer: Vec, - /// Plot width in pixels - pub width: i32, - /// Plot height in pixels - pub height: i32, - /// Page factor values as key-value pairs (e.g., [("Gender", "female")]) - pub page_factors: Vec<(String, String)>, - /// File extension: "png" or "svg" - pub output_ext: String, - /// Base filename without extension (e.g., "plot" or "myplot") - pub filename: String, -} - -/// Get MIME type from file extension -fn mimetype_for_ext(ext: &str) -> &'static str { - match ext { - "svg" => "image/svg+xml", - _ => "image/png", - } -} - -/// Save multiple PNG plot results back to Tercen -/// -/// Each plot gets its own row in the result table, with page factor columns -/// to identify which page it belongs to. -/// -/// # Arguments -/// * `client` - Tercen client for gRPC calls -/// * `project_id` - Project ID to upload the result to -/// * `namespace` - Operator namespace for prefixing column names -/// * `plots` - Vector of PlotResult structs -/// * `task` - Mutable reference to the task -pub async fn save_results( - client: Arc, - project_id: &str, - namespace: &str, - plots: Vec, - task: &mut proto::ETask, -) -> Result<(), Box> { - use base64::Engine; - - println!("Encoding {} plots to base64...", plots.len()); - - // Build vectors for each column - let mut ci_vec: Vec = Vec::new(); - let mut ri_vec: Vec = Vec::new(); - let mut content_vec: Vec = Vec::new(); - let mut filename_vec: Vec = Vec::new(); - let mut mimetype_vec: Vec = Vec::new(); - let mut width_vec: Vec = Vec::new(); - let mut height_vec: Vec = Vec::new(); - - // Collect all unique page factor names - let mut page_factor_names: Vec = Vec::new(); - if let Some(first_plot) = plots.first() { - for (name, _) in &first_plot.page_factors { - page_factor_names.push(name.clone()); - } - } - - // Page factor value columns (one vec per factor) - let mut page_factor_values: Vec> = vec![Vec::new(); page_factor_names.len()]; - - for (idx, plot) in plots.iter().enumerate() { - let base64_png = base64::engine::general_purpose::STANDARD.encode(&plot.png_buffer); - println!( - " Plot {}: {} -> {} bytes (base64: {})", - idx + 1, - plot.label, - plot.png_buffer.len(), - base64_png.len() - ); - - // Add row data - ci_vec.push(0); - ri_vec.push(idx as i32); - content_vec.push(base64_png); - filename_vec.push(format!( - "{}_{}.{}", - plot.filename, plot.label, plot.output_ext - )); - mimetype_vec.push(mimetype_for_ext(&plot.output_ext).to_string()); - width_vec.push(plot.width as f64); - height_vec.push(plot.height as f64); - - // Add page factor values - for (i, (_, value)) in plot.page_factors.iter().enumerate() { - if i < page_factor_values.len() { - page_factor_values[i].push(value.clone()); - } - } - } - - // Create DataFrame - println!("Creating result DataFrame with {} rows...", plots.len()); - let mut df = df! { - ".ci" => ci_vec, - ".ri" => ri_vec, - ".content" => content_vec, - &format!("{}.filename", namespace) => filename_vec, - &format!("{}.mimetype", namespace) => mimetype_vec, - &format!("{}.plot_width", namespace) => width_vec, - &format!("{}.plot_height", namespace) => height_vec - }?; - - // Add page factor columns - for (i, name) in page_factor_names.iter().enumerate() { - let col_name = format!("{}.{}", namespace, name); - let values = &page_factor_values[i]; - let series = Series::new(col_name.into(), values); - df.with_column(series)?; - } - - println!(" DataFrame: {} rows, {} columns", df.height(), df.width()); - - // Convert to Table and upload (same as single result) - let table = dataframe_to_table(&df)?; - let operator_result = create_operator_result(table)?; - let result_bytes = serialize_operator_result(&operator_result)?; - let file_doc = create_file_document(project_id, result_bytes.len() as i32); - - let existing_file_result_id = get_task_file_result_id(task)?; - - if existing_file_result_id.is_empty() { - println!("Uploading result file (webapp scenario)..."); - let file_doc_id = upload_result_file(&client, file_doc, result_bytes).await?; - println!(" Uploaded file with ID: {}", file_doc_id); - - update_task_file_result_id(task, &file_doc_id)?; - let mut task_service = client.task_service()?; - task_service.update(task.clone()).await?; - println!("Result uploaded - exiting for server to process"); - } else { - println!( - "Uploading to existing result file: {}", - existing_file_result_id - ); - let mut file_service = client.file_service()?; - let get_req = proto::GetRequest { - id: existing_file_result_id.clone(), - ..Default::default() - }; - let e_file_doc = file_service.get(get_req).await?.into_inner(); - use proto::e_file_document; - let file_doc_obj = e_file_doc.object.ok_or("EFileDocument has no object")?; - let e_file_document::Object::Filedocument(file_doc) = file_doc_obj; - upload_result_file(&client, file_doc, result_bytes).await?; - println!("Result uploaded - exiting normally"); - } - - Ok(()) -} - -/// Save a PNG plot result back to Tercen -/// -/// Takes the generated PNG buffer, converts it to Tercen's result format, -/// uploads it, updates the existing task with the fileResultId, and waits -/// for the server to process the result and link it to the workflow step. -/// -/// This follows the Python production client pattern (OperatorContext.save): -/// 1. Upload file via FileService.upload() → get FileDocument ID -/// 2. Update EXISTING task's fileResultId field -/// 3. Call TaskService.update() to save the updated task -/// 4. Call TaskService.waitDone() to wait for server processing -/// 5. Server (Sarno) processes file and creates computedRelation automatically -/// -/// # Arguments -/// * `client` - Tercen client for gRPC calls -/// * `project_id` - Project ID to upload the result to -/// * `namespace` - Operator namespace for prefixing column names -/// * `png_buffer` - Raw PNG bytes from the renderer -/// * `plot_width` - Width of the plot in pixels -/// * `plot_height` - Height of the plot in pixels -/// * `task` - Mutable reference to the task (will be updated with fileResultId) -/// -/// # Returns -/// Result indicating success or error during upload -#[allow(clippy::too_many_arguments)] -pub async fn save_result( - client: Arc, - project_id: &str, - namespace: &str, - png_buffer: Vec, - plot_width: i32, - plot_height: i32, - output_ext: &str, - filename: &str, - task: &mut proto::ETask, -) -> Result<(), Box> { - use base64::Engine; - - println!("Encoding plot to base64..."); - // 1. Encode to base64 - let base64_png = base64::engine::general_purpose::STANDARD.encode(&png_buffer); - println!( - " Plot size: {} bytes, base64 size: {} bytes", - png_buffer.len(), - base64_png.len() - ); - - // 2. Create result DataFrame with namespace-prefixed columns - println!("Creating result DataFrame..."); - let filename = format!("{}.{}", filename, output_ext); - let mimetype = mimetype_for_ext(output_ext); - let result_df = create_result_dataframe( - base64_png, - namespace, - plot_width, - plot_height, - &filename, - mimetype, - )?; - println!( - " DataFrame: {} rows, {} columns", - result_df.height(), - result_df.width() - ); - - // 3. Convert to Table - println!("Converting DataFrame to Table..."); - let table = dataframe_to_table(&result_df)?; - println!( - " Table: {} rows, {} columns", - table.n_rows, - table.columns.len() - ); - - // 4. Wrap table in OperatorResult structure - println!("Creating OperatorResult..."); - let operator_result = create_operator_result(table)?; - - // 5. Serialize OperatorResult to TSON - println!("Serializing OperatorResult to TSON..."); - let result_bytes = serialize_operator_result(&operator_result)?; - println!(" TSON size: {} bytes", result_bytes.len()); - - // 5. Create FileDocument - println!("Creating FileDocument..."); - let file_doc = create_file_document(project_id, result_bytes.len() as i32); - - // 6. Check if task already has a fileResultId (normal operator flow) - let existing_file_result_id = get_task_file_result_id(task)?; - - if existing_file_result_id.is_empty() { - // Webapp/test scenario: Create new file and update task - println!("Uploading result file (webapp scenario)..."); - let file_doc_id = upload_result_file(&client, file_doc, result_bytes).await?; - println!(" Uploaded file with ID: {}", file_doc_id); - - println!("Updating task with fileResultId..."); - update_task_file_result_id(task, &file_doc_id)?; - println!(" Task fileResultId set to: {}", file_doc_id); - - println!("Saving updated task..."); - let mut task_service = client.task_service()?; - let update_response = task_service.update(task.clone()).await?; - let _updated_task = update_response.into_inner(); - println!(" Task updated"); - - // Note: Python calls waitDone() here in webapp scenario - // We should exit cleanly and let the task runner process the result - println!("Result uploaded - exiting for server to process"); - } else { - // Normal operator scenario: Upload to existing file - println!( - "Uploading to existing result file: {}", - existing_file_result_id - ); - - // Get the existing FileDocument - let mut file_service = client.file_service()?; - let get_req = proto::GetRequest { - id: existing_file_result_id.clone(), - ..Default::default() - }; - let e_file_doc = file_service.get(get_req).await?.into_inner(); - - // Extract FileDocument - use proto::e_file_document; - let file_doc_obj = e_file_doc.object.ok_or("EFileDocument has no object")?; - let e_file_document::Object::Filedocument(file_doc) = file_doc_obj; - - // Upload to existing file (overwrites content) - upload_result_file(&client, file_doc, result_bytes).await?; - println!(" Uploaded to existing file"); - - // No update(), no waitDone() - just exit cleanly - println!("Result uploaded - exiting normally"); - } - - Ok(()) -} - -/// Create a result DataFrame with base64-encoded PNG -/// -/// Creates a DataFrame with columns matching R plot_operator output: -/// - .ci: Column facet index (int32, value 0 for single plot) -/// - .ri: Row facet index (int32, value 0 for single plot) -/// - .content: Base64-encoded PNG bytes (chunked if > 1MB) -/// - {namespace}.filename: "plot.png" (namespace-prefixed by operator) -/// - {namespace}.mimetype: "image/png" (namespace-prefixed by operator) -/// - {namespace}.plot_width: plot width in pixels (namespace-prefixed by operator) -/// - {namespace}.plot_height: plot height in pixels (namespace-prefixed by operator) -/// -/// If the base64 string is larger than 1MB, it will be split into multiple rows -/// with the same .ci and .ri values. -/// -/// Note: .ci, .ri, and .content have leading dots. Other columns get namespace prefix. -fn create_result_dataframe( - png_base64: String, - namespace: &str, - plot_width: i32, - plot_height: i32, - filename: &str, - mimetype: &str, -) -> Result> { - const CHUNK_SIZE: usize = 1_000_000; // 1MB chunks - - let base64_len = png_base64.len(); - - // Check if we need to chunk - if base64_len <= CHUNK_SIZE { - // Single row - no chunking needed - let df = df! { - ".ci" => [0i32], - ".ri" => [0i32], - ".content" => [png_base64], - &format!("{}.filename", namespace) => [filename], - &format!("{}.mimetype", namespace) => [mimetype], - &format!("{}.plot_width", namespace) => [plot_width as f64], - &format!("{}.plot_height", namespace) => [plot_height as f64] - }?; - Ok(df) - } else { - // Multiple rows - chunk the base64 string - let chunks: Vec = png_base64 - .as_bytes() - .chunks(CHUNK_SIZE) - .map(|chunk| String::from_utf8(chunk.to_vec()).unwrap()) - .collect(); - - let n_chunks = chunks.len(); - println!( - " Chunking large image: {} bytes into {} chunks", - base64_len, n_chunks - ); - - // Create vectors for each column (all chunks have same .ci/.ri) - let ci_vec = vec![0i32; n_chunks]; - let ri_vec = vec![0i32; n_chunks]; - let filename_vec = vec![filename; n_chunks]; - let mimetype_vec = vec![mimetype; n_chunks]; - let width_vec = vec![plot_width as f64; n_chunks]; - let height_vec = vec![plot_height as f64; n_chunks]; - - let df = df! { - ".ci" => ci_vec, - ".ri" => ri_vec, - ".content" => chunks, - &format!("{}.filename", namespace) => filename_vec, - &format!("{}.mimetype", namespace) => mimetype_vec, - &format!("{}.plot_width", namespace) => width_vec, - &format!("{}.plot_height", namespace) => height_vec - }?; - Ok(df) - } -} - -/// Convert DataFrame to Tercen Table with TSON encoding -/// -/// This is delegated to the table_convert module -fn dataframe_to_table(df: &DataFrame) -> Result> { - table_convert::dataframe_to_table(df) -} - -/// Create an OperatorResult wrapping the table -/// -/// OperatorResult structure (full Tercen model format): -/// ```json -/// { -/// "kind": "OperatorResult", -/// "tables": [ -/// { -/// "kind": "Table", -/// "nRows": ..., -/// "properties": {"kind": "TableProperties", "name": "...", ...}, -/// "columns": [...] -/// } -/// ], -/// "joinOperators": [] -/// } -/// ``` -fn create_operator_result( - table: proto::Table, -) -> Result> { - use rustson::Value as TsonValue; - use std::collections::HashMap; - - // Convert Table to full Tercen model TSON format (NOT simplified Sarno format) - let table_tson = table_to_tercen_tson(&table)?; - - // Create OperatorResult structure - let mut operator_result = HashMap::new(); - operator_result.insert( - "kind".to_string(), - TsonValue::STR("OperatorResult".to_string()), - ); - operator_result.insert("tables".to_string(), TsonValue::LST(vec![table_tson])); - operator_result.insert("joinOperators".to_string(), TsonValue::LST(vec![])); - - Ok(TsonValue::MAP(operator_result)) -} - -/// Serialize OperatorResult to TSON bytes -fn serialize_operator_result( - operator_result: &rustson::Value, -) -> Result, Box> { - let bytes = rustson::encode(operator_result) - .map_err(|e| format!("Failed to encode OperatorResult to TSON: {:?}", e))?; - Ok(bytes) -} - -/// Convert Table proto to full Tercen model TSON format -/// -/// Creates a complete Table object with kind, properties, and columns: -/// ```json -/// { -/// "kind": "Table", -/// "nRows": ..., -/// "properties": { -/// "kind": "TableProperties", -/// "name": "uuid", -/// "sortOrder": [], -/// "ascending": false -/// }, -/// "columns": [ -/// { -/// "kind": "Column", -/// "name": "...", -/// "type": "...", -/// "nRows": ..., -/// "size": ..., -/// "values": -/// } -/// ] -/// } -/// ``` -fn table_to_tercen_tson( - table: &proto::Table, -) -> Result> { - use rustson::Value as TsonValue; - use std::collections::HashMap; - - let mut table_map = HashMap::new(); - - // Add kind - table_map.insert("kind".to_string(), TsonValue::STR("Table".to_string())); - - // Add nRows - table_map.insert("nRows".to_string(), TsonValue::I32(table.n_rows)); - - // Add properties - if let Some(props) = &table.properties { - let mut props_map = HashMap::new(); - props_map.insert( - "kind".to_string(), - TsonValue::STR("TableProperties".to_string()), - ); - props_map.insert("name".to_string(), TsonValue::STR(props.name.clone())); - props_map.insert( - "sortOrder".to_string(), - TsonValue::LST( - props - .sort_order - .iter() - .map(|s| TsonValue::STR(s.clone())) - .collect(), - ), - ); - props_map.insert("ascending".to_string(), TsonValue::BOOL(props.ascending)); - - table_map.insert("properties".to_string(), TsonValue::MAP(props_map)); - } - - // Add columns - let mut cols_list = Vec::new(); - for col in &table.columns { - let mut col_map = HashMap::new(); - - col_map.insert("kind".to_string(), TsonValue::STR("Column".to_string())); - col_map.insert("name".to_string(), TsonValue::STR(col.name.clone())); - col_map.insert("type".to_string(), TsonValue::STR(col.r#type.clone())); - col_map.insert("nRows".to_string(), TsonValue::I32(col.n_rows)); - col_map.insert("size".to_string(), TsonValue::I32(col.size)); - - // Decode the TSON-encoded values to get the actual data - let col_values = rustson::decode_bytes(&col.values) - .map_err(|e| format!("Failed to decode column values for '{}': {:?}", col.name, e))?; - col_map.insert("values".to_string(), col_values); - - cols_list.push(TsonValue::MAP(col_map)); - } - table_map.insert("columns".to_string(), TsonValue::LST(cols_list)); - - Ok(TsonValue::MAP(table_map)) -} - -/// Create FileDocument for result upload -fn create_file_document(project_id: &str, size: i32) -> proto::FileDocument { - // Set file metadata - let file_metadata = proto::FileMetadata { - content_type: "application/octet-stream".to_string(), - ..Default::default() - }; - - let e_metadata = proto::EFileMetadata { - object: Some(proto::e_file_metadata::Object::Filemetadata(file_metadata)), - }; - - // Note: ACL will be assigned by the server based on projectId - proto::FileDocument { - name: "result".to_string(), - project_id: project_id.to_string(), - size, - metadata: Some(e_metadata), - ..Default::default() - } -} - -/// Upload result file via FileService.upload() -/// -/// This uploads an OperatorResult (TSON-encoded table) as a FileDocument. -/// The returned FileDocument ID is what goes into task.fileResultId. -/// The server (Sarno) will then process this file to create the actual schemas -/// and computedRelation. -/// -/// Note: We use FileService.upload() (NOT TableSchemaService.uploadTable()) -/// because we need a FileDocument with a dataUri, not just a Schema. -async fn upload_result_file( - client: &TercenClient, - file_doc: proto::FileDocument, - result_bytes: Vec, -) -> Result> { - let mut file_service = client.file_service()?; - - // Create EFileDocument wrapper - let e_file_doc = proto::EFileDocument { - object: Some(proto::e_file_document::Object::Filedocument(file_doc)), - }; - - // Create upload request (single message in a stream) - let request = proto::ReqUpload { - file: Some(e_file_doc), - bytes: result_bytes, - }; - - // Wrap in stream (even though it's just one message) - use futures::stream; - let request_stream = stream::iter(vec![request]); - - // Send request - let response = file_service.upload(request_stream).await?; - let resp_upload = response.into_inner(); - - // Extract FileDocument ID from response - let e_file_doc = resp_upload - .result - .ok_or("Upload response missing file document")?; - - // Extract the actual FileDocument from the wrapper - use proto::e_file_document; - let file_doc_obj = e_file_doc.object.ok_or("EFileDocument has no object")?; - - // EFileDocument only has one variant: filedocument - let file_doc_id = match file_doc_obj { - e_file_document::Object::Filedocument(fd) => fd.id, - }; - - Ok(file_doc_id) -} - -/// Get the task's fileResultId if it exists -/// -/// Returns empty string if fileResultId is not set. -fn get_task_file_result_id(task: &proto::ETask) -> Result> { - use proto::e_task; - - let task_obj = task.object.as_ref().ok_or("Task has no object field")?; - - match task_obj { - e_task::Object::Runcomputationtask(rct) => Ok(rct.file_result_id.clone()), - _ => Err("Expected RunComputationTask".into()), - } -} - -/// Update the task's fileResultId field -/// -/// This updates the EXISTING task (following Python OperatorContext pattern). -/// The server will process the file and create the computedRelation automatically. -fn update_task_file_result_id( - task: &mut proto::ETask, - file_doc_id: &str, -) -> Result<(), Box> { - use proto::e_task; - - let task_obj = task.object.as_mut().ok_or("Task has no object field")?; - - match task_obj { - e_task::Object::Runcomputationtask(rct) => { - rct.file_result_id = file_doc_id.to_string(); - Ok(()) - } - _ => Err("Expected RunComputationTask".into()), - } -} diff --git a/src/tercen/table.rs b/src/tercen/table.rs deleted file mode 100644 index 1ee72c9..0000000 --- a/src/tercen/table.rs +++ /dev/null @@ -1,158 +0,0 @@ -#![allow(dead_code)] -use super::client::proto::ReqStreamTable; -use super::client::TercenClient; -use super::error::{Result, TercenError}; -use std::collections::HashMap; -use std::sync::{Arc, Mutex}; -use tokio_stream::StreamExt; - -/// Schema cache type alias for reuse across pages -/// Key: table_id, Value: cached schema -pub type SchemaCache = Arc>>; - -/// Create a new empty schema cache -pub fn new_schema_cache() -> SchemaCache { - Arc::new(Mutex::new(HashMap::new())) -} - -/// Tercen table data streamer -pub struct TableStreamer<'a> { - client: &'a TercenClient, - /// Optional schema cache (shared across multiple streamers) - schema_cache: Option, -} - -impl<'a> TableStreamer<'a> { - /// Create a new table streamer without caching - pub fn new(client: &'a TercenClient) -> Self { - TableStreamer { - client, - schema_cache: None, - } - } - - /// Create a new table streamer with schema caching - /// - /// The cache is shared across multiple streamers, so schemas fetched - /// for one page are reused for subsequent pages. - pub fn with_cache(client: &'a TercenClient, cache: SchemaCache) -> Self { - TableStreamer { - client, - schema_cache: Some(cache), - } - } - - /// Get the schema for a table to retrieve metadata like row count - /// - /// If a schema cache was provided via `with_cache`, this will check - /// the cache first and only make a network request on cache miss. - /// Cached schemas are reused across pages in multi-page plots. - pub async fn get_schema(&self, table_id: &str) -> Result { - // Check cache first - if let Some(ref cache) = self.schema_cache { - let guard = cache.lock().unwrap(); - if let Some(schema) = guard.get(table_id) { - eprintln!("DEBUG: Schema cache HIT for table {}", table_id); - return Ok(schema.clone()); - } - } - - eprintln!("DEBUG: Schema cache MISS for table {} - fetching", table_id); - - use super::client::proto::GetRequest; - - let mut table_service = self.client.table_service()?; - let request = tonic::Request::new(GetRequest { - id: table_id.to_string(), - ..Default::default() - }); - - let response = table_service - .get(request) - .await - .map_err(|e| TercenError::Grpc(Box::new(e)))?; - - let schema = response.into_inner(); - - // Populate cache - if let Some(ref cache) = self.schema_cache { - let mut guard = cache.lock().unwrap(); - guard.insert(table_id.to_string(), schema.clone()); - } - - Ok(schema) - } - - pub async fn stream_tson( - &self, - table_id: &str, - columns: Option>, - offset: i64, - limit: i64, - ) -> Result> { - let mut table_service = self.client.table_service()?; - - let request = tonic::Request::new(ReqStreamTable { - table_id: table_id.to_string(), - cnames: columns.unwrap_or_default(), - offset, - limit, - binary_format: String::new(), // Empty = TSON format (default) - }); - - let mut stream = table_service - .stream_table(request) - .await - .map_err(|e| TercenError::Grpc(Box::new(e)))? - .into_inner(); - - let mut all_data = Vec::new(); - - while let Some(chunk_result) = stream.next().await { - match chunk_result { - Ok(chunk) => { - all_data.extend_from_slice(&chunk.result); - } - Err(e) => return Err(TercenError::Grpc(Box::new(e))), - } - } - - Ok(all_data) - } - - /// Stream entire table in chunks, calling callback for each chunk - /// - /// # Arguments - /// * `table_id` - The Tercen table ID to stream - /// * `columns` - Optional list of columns to fetch - /// * `chunk_size` - Number of rows per chunk - /// * `callback` - Function to call with each TSON chunk - pub async fn stream_table_chunked( - &self, - table_id: &str, - columns: Option>, - chunk_size: i64, - mut callback: F, - ) -> Result<()> - where - F: FnMut(Vec) -> Result<()>, - { - let mut offset = 0; - - loop { - let chunk = self - .stream_tson(table_id, columns.clone(), offset, chunk_size) - .await?; - - if chunk.is_empty() { - break; - } - - callback(chunk)?; - - offset += chunk_size; - } - - Ok(()) - } -} diff --git a/src/tercen/table_convert.rs b/src/tercen/table_convert.rs deleted file mode 100644 index 5ef1f68..0000000 --- a/src/tercen/table_convert.rs +++ /dev/null @@ -1,176 +0,0 @@ -//! DataFrame to Tercen Table conversion -//! -//! Converts Polars DataFrames to Tercen Table proto messages with TSON encoding. -//! This is the core transformation needed to save results back to Tercen. - -use super::client::proto; -use polars::prelude::*; -use rustson::Value as TsonValue; - -/// Convert a Polars DataFrame to a Tercen Table -/// -/// This function: -/// 1. Infers Tercen types from Polars DataTypes -/// 2. Encodes column values using TSON format -/// 3. Constructs the Table proto message -/// -/// # Arguments -/// * `df` - Polars DataFrame to convert -/// -/// # Returns -/// Tercen Table proto message ready for upload -pub fn dataframe_to_table(df: &DataFrame) -> Result> { - let nrows = df.height() as i32; - - // Create TableProperties with a unique name (required by Sarno) - let properties = proto::TableProperties { - name: uuid::Uuid::new_v4().to_string(), - sort_order: vec![], - ascending: false, - }; - - // Convert each column - let mut columns = Vec::new(); - - for col in df.get_columns() { - let series = col.as_materialized_series(); - let values = encode_column_values(series)?; - - let column = proto::Column { - name: series.name().to_string(), - r#type: infer_column_type(series.dtype()), - n_rows: nrows, - size: nrows, - values, - ..Default::default() - }; - - columns.push(column); - } - - // Create Table - let table = proto::Table { - n_rows: nrows, - properties: Some(properties), - columns, - }; - - Ok(table) -} - -/// Infer Tercen column type from Polars DataType -/// -/// Maps Polars types to Tercen type strings: -/// - String/Utf8 → "string" -/// - Float64 → "double" -/// - Int32 → "int32" -/// - Int64 → "int64" -fn infer_column_type(dtype: &DataType) -> String { - match dtype { - DataType::String => "string".to_string(), - DataType::Float64 => "double".to_string(), - DataType::Int32 => "int32".to_string(), - DataType::Int64 => "int64".to_string(), - _ => "string".to_string(), // Default to string for unknown types - } -} - -/// Encode column values to TSON binary format -/// -/// Uses the rustson crate to encode column data in Tercen's binary format. -/// The TSON format for a column is a MAP with structure: -/// ```json -/// { -/// "name": "column_name", -/// "type": "s"|"d"|"i", -/// "data": [values...] -/// } -/// ``` -fn encode_column_values(series: &Series) -> Result, Box> { - // Convert series to TSON Value - // CRITICAL: Must use typed lists (LSTSTR, LSTF64, LSTI32, LSTI64) - // NOT generic LST with wrapped values - Sarno expects TypedData! - let tson_data = match series.dtype() { - DataType::String => { - // String column - use LSTSTR for CStringList - let str_vec: Vec = series - .str()? - .into_iter() - .map(|opt| opt.map(|s| s.to_string()).unwrap_or_else(String::new)) // TODO: Handle nulls properly - .collect(); - TsonValue::LSTSTR(str_vec.into()) - } - DataType::Float64 => { - // Double column - use LSTF64 for Float64List - let f64_vec: Vec = series - .f64()? - .into_iter() - .map(|opt| opt.unwrap_or(0.0)) // TODO: Handle nulls properly - .collect(); - TsonValue::LSTF64(f64_vec) - } - DataType::Int32 => { - // Int32 column - use LSTI32 for Int32List - let i32_vec: Vec = series - .i32()? - .into_iter() - .map(|opt| opt.unwrap_or(0)) // TODO: Handle nulls properly - .collect(); - TsonValue::LSTI32(i32_vec) - } - DataType::Int64 => { - // Int64 column - use LSTI64 for Int64List - let i64_values: Vec = series - .i64()? - .into_iter() - .map(|opt| opt.unwrap_or(0)) // TODO: Handle nulls properly - .collect(); - TsonValue::LSTI64(i64_values) - } - _ => { - return Err(format!("Unsupported column type: {:?}", series.dtype()).into()); - } - }; - - // Encode to TSON binary - let bytes = - rustson::encode(&tson_data).map_err(|e| format!("Failed to encode TSON: {:?}", e))?; - - Ok(bytes) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_create_result_dataframe_to_table() { - // Create a simple result DataFrame - let df = df! { - ".content" => ["base64encodedstring"], - "filename" => ["plot.png"], - "mimetype" => ["image/png"] - } - .unwrap(); - - // Convert to Table - let table = dataframe_to_table(&df).unwrap(); - - // Verify structure - assert_eq!(table.n_rows, 1); - assert_eq!(table.columns.len(), 3); - - // Check column names and types - assert_eq!(table.columns[0].name, ".content"); - assert_eq!(table.columns[0].r#type, "string"); - assert_eq!(table.columns[1].name, "filename"); - assert_eq!(table.columns[1].r#type, "string"); - assert_eq!(table.columns[2].name, "mimetype"); - assert_eq!(table.columns[2].r#type, "string"); - - // Verify TSON encoding produced bytes - assert!(!table.columns[0].values.is_empty()); - assert!(!table.columns[1].values.is_empty()); - assert!(!table.columns[2].values.is_empty()); - } -} diff --git a/src/tercen/tson_convert.rs b/src/tercen/tson_convert.rs deleted file mode 100644 index 4d65d2d..0000000 --- a/src/tercen/tson_convert.rs +++ /dev/null @@ -1,211 +0,0 @@ -//! TSON to GGRS DataFrame conversion -//! -//! Converts TSON binary format (Tercen's native format) to GGRS DataFrame. -//! This module uses Polars for efficient columnar data processing. -//! -//! TSON (Tercen Serialized Object Notation) is a binary format used by Tercen -//! for efficient data transfer. See: https://github.com/tercen/rustson - -use super::error::{Result, TercenError}; -use ggrs_core::data::DataFrame; -use polars::prelude::*; -use rustson::Value as TsonValue; - -/// Convert TSON bytes to GGRS DataFrame -/// -/// This function decodes TSON binary format and converts it to a GGRS DataFrame. -/// The TSON data is expected to represent a table structure. -pub fn tson_to_dataframe(tson_bytes: &[u8]) -> Result { - if tson_bytes.is_empty() { - return Ok(DataFrame::new()); - } - - // Decode TSON - let tson_value = rustson::decode_bytes(tson_bytes) - .map_err(|e| TercenError::Other(format!("Failed to decode TSON: {:?}", e)))?; - - // Convert TSON value to DataFrame - tson_value_to_dataframe(&tson_value) -} - -/// Convert a TSON value to GGRS DataFrame -/// -/// Expects the TSON value to be a Tercen table structure: -/// ```json -/// { -/// "cols": [ -/// {name: "col1", type: "d", data: [...]}, -/// {name: "col2", type: "s", data: [...]}, -/// ] -/// } -/// ``` -fn tson_value_to_dataframe(tson: &TsonValue) -> Result { - // TSON tables are represented as MAP - let map = match tson { - TsonValue::MAP(m) => m, - _ => { - return Err(TercenError::Other( - "Expected TSON MAP, got different type".to_string(), - )) - } - }; - - // Extract column definitions from 'cols' - let cols = map - .get("cols") - .ok_or_else(|| TercenError::Other("Missing 'cols' field in TSON".to_string()))?; - - let col_defs = match cols { - TsonValue::LST(defs) => defs, - _ => return Err(TercenError::Other("Expected 'cols' to be LST".to_string())), - }; - - if col_defs.is_empty() { - // Empty table - return Ok(DataFrame::new()); - } - - // Extract column names and data from each column definition - let mut col_names = Vec::new(); - let mut col_data_arrays = Vec::new(); - - for col_def in col_defs { - if let TsonValue::MAP(col_map) = col_def { - // Extract name - if let Some(TsonValue::STR(name)) = col_map.get("name") { - col_names.push(name.clone()); - - // Extract data array - if let Some(data) = col_map.get("data") { - col_data_arrays.push(data); - } else { - return Err(TercenError::Other(format!( - "Column '{}' missing 'data' field", - name - ))); - } - } - } - } - - // Determine number of rows from first column (unused but kept for future validation) - let _nrows = if let Some(first_col_data) = col_data_arrays.first() { - get_column_length(first_col_data)? - } else { - 0 - }; - - // Convert TSON columnar arrays directly to Polars Columns (STAY COLUMNAR!) - let mut columns_vec = Vec::new(); - - for (col_name, col_data) in col_names.iter().zip(col_data_arrays.iter()) { - let series = tson_column_to_polars_series(col_name, col_data)?; - // Convert Series to Column for Polars 0.44 API - columns_vec.push(series.into_column()); - } - - // Create Polars DataFrame directly from columns - let polars_df = polars::frame::DataFrame::new(columns_vec) - .map_err(|e| TercenError::Other(format!("Failed to create Polars DataFrame: {}", e)))?; - - Ok(DataFrame::from_polars(polars_df)) -} - -/// Convert TSON column array directly to Polars Series (COLUMNAR - NO RECORDS!) -fn tson_column_to_polars_series(col_name: &str, col_data: &TsonValue) -> Result { - match col_data { - TsonValue::LSTF64(values) => { - // f64 array - direct conversion - Ok(Series::new(col_name.into(), values.as_slice())) - } - TsonValue::LSTI32(values) => { - // i32 array - convert to i64 for Polars - let i64_values: Vec = values.iter().map(|&v| v as i64).collect(); - Ok(Series::new(col_name.into(), i64_values)) - } - TsonValue::LSTI16(values) => { - // i16 array - convert to i64 - let i64_values: Vec = values.iter().map(|&v| v as i64).collect(); - Ok(Series::new(col_name.into(), i64_values)) - } - TsonValue::LSTU16(values) => { - // u16 array - convert to i64 - let i64_values: Vec = values.iter().map(|&v| v as i64).collect(); - Ok(Series::new(col_name.into(), i64_values)) - } - TsonValue::LSTSTR(strvec) => { - // String array - let strings = strvec - .try_to_vec() - .map_err(|e| TercenError::Other(format!("Failed to parse LSTSTR: {:?}", e)))?; - Ok(Series::new(col_name.into(), strings)) - } - TsonValue::LST(values) => { - // Mixed-type list - convert each element - let mut any_values = Vec::with_capacity(values.len()); - for val in values { - let any_val = tson_value_to_any_value(val)?; - any_values.push(any_val); - } - Ok(Series::new(col_name.into(), any_values)) - } - _ => { - // Single value or unsupported type - Err(TercenError::Other(format!( - "Unsupported TSON column type for column: {}", - col_name - ))) - } - } -} - -/// Convert a single TSON value to Polars AnyValue -fn tson_value_to_any_value(tson: &TsonValue) -> Result> { - match tson { - TsonValue::NULL => Ok(AnyValue::Null), - TsonValue::BOOL(b) => Ok(AnyValue::Boolean(*b)), - TsonValue::I32(i) => Ok(AnyValue::Int64(*i as i64)), - TsonValue::F64(f) => Ok(AnyValue::Float64(*f)), - TsonValue::STR(s) => { - // Convert to owned string to satisfy 'static lifetime - Ok(AnyValue::StringOwned(s.clone().into())) - } - _ => { - // For complex types, convert to owned string - let owned_str = format!("{:?}", tson); - Ok(AnyValue::StringOwned(owned_str.into())) - } - } -} - -/// Get the length of a TSON column array -fn get_column_length(col_data: &TsonValue) -> Result { - match col_data { - TsonValue::LST(values) => Ok(values.len()), - TsonValue::LSTI32(values) => Ok(values.len()), - TsonValue::LSTF64(values) => Ok(values.len()), - TsonValue::LSTU16(values) => Ok(values.len()), - TsonValue::LSTI16(values) => Ok(values.len()), - TsonValue::LSTSTR(strvec) => strvec - .try_to_vec() - .map(|v| v.len()) - .map_err(|e| TercenError::Other(format!("Failed to get LSTSTR length: {:?}", e))), - _ => Ok(0), - } -} - -// NOTE: Dead functions removed - we now use pure columnar Polars operations -// No more row-by-row Record building! - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_empty_tson() { - let result = tson_to_dataframe(&[]); - assert!(result.is_ok()); - let df = result.unwrap(); - assert_eq!(df.nrow(), 0); - } -} diff --git a/tercen_grpc_api b/tercen_grpc_api deleted file mode 160000 index 6a3a1be..0000000 --- a/tercen_grpc_api +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 6a3a1be55490c528f6191e650be3c7414f324522 From d460a632aead87ef283f58cc324bc0e7479ad370 Mon Sep 17 00:00:00 2001 From: Alexandre Maurel Date: Sun, 1 Mar 2026 11:19:07 +0100 Subject: [PATCH 2/3] Remove build.rs and tercen_grpc_api references from Dockerfile These are no longer needed since proto compilation moved to the tercen-rs crate. Co-Authored-By: Claude Opus 4.6 --- Dockerfile | 9 --------- 1 file changed, 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index f1cb56e..ac50c52 100644 --- a/Dockerfile +++ b/Dockerfile @@ -31,18 +31,9 @@ WORKDIR /app COPY Cargo.toml ./ COPY Cargo.lock ./ -# Copy build script -COPY build.rs ./ - -# Copy proto files submodule (needed for build.rs) -COPY tercen_grpc_api ./tercen_grpc_api - # Copy source tree COPY src ./src -# Copy palettes.json (used by include_str! at compile time) -COPY palettes.json ./ - # Copy operator.json (used by include_str! at compile time) COPY operator.json ./ From 85ca362fa0e7ed7633d3ca83bc3b339359c23cfc Mon Sep 17 00:00:00 2001 From: Alexandre Maurel Date: Sun, 1 Mar 2026 11:56:14 +0100 Subject: [PATCH 3/3] Update container tag to match branch name The operator.json container field must reference the branch-specific image tag so Tercen pulls the correct Docker image when installing from this branch. Co-Authored-By: Claude Opus 4.6 --- operator.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/operator.json b/operator.json index 5116552..83f5e6b 100644 --- a/operator.json +++ b/operator.json @@ -5,7 +5,7 @@ "communicationProtocol": "grpc", "authors": ["Tercen"], "urls": ["https://github.com/tercen/ggrs_plot_operator"], - "container": "ghcr.io/tercen/ggrs_plot_operator:main", + "container": "ghcr.io/tercen/ggrs_plot_operator:extract-tercen-rs", "operatorSpec": { "kind": "OperatorSpec", "ontologyUri": "https://tercen.com/_ontology/tercen",