Skip to content

Commit b088ae5

Browse files
feat(experiments): auto-discover ClickHouse label columns in SQL planner via system.columns HTTP API (#396)
* added plan file * added v1 implementation * added debug logs
1 parent 4bc9bf7 commit b088ae5

12 files changed

Lines changed: 590 additions & 14 deletions

File tree

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
use std::collections::HashSet;
2+
use std::thread;
3+
use std::time::Duration;
4+
5+
use serde::Deserialize;
6+
use tracing::{debug, warn};
7+
8+
use crate::error::ControllerError;
9+
10+
const MAX_RETRIES: u32 = 15;
11+
const RETRY_DELAY: Duration = Duration::from_secs(2);
12+
13+
#[derive(Deserialize)]
14+
struct ColumnRow {
15+
name: String,
16+
#[serde(rename = "type")]
17+
column_type: String,
18+
}
19+
20+
/// Fetch `(name, type)` pairs for all columns in `database.table` via the
21+
/// ClickHouse HTTP API (`system.columns`).
22+
fn fetch_columns_for_table(
23+
clickhouse_url: &str,
24+
database: &str,
25+
table: &str,
26+
) -> Result<Vec<(String, String)>, ControllerError> {
27+
let base_url = clickhouse_url.trim_end_matches('/');
28+
let sql = format!(
29+
"SELECT name, type FROM system.columns WHERE database = '{}' AND table = '{}'",
30+
database, table
31+
);
32+
let client = reqwest::blocking::Client::new();
33+
34+
for attempt in 1..=MAX_RETRIES {
35+
let response = client
36+
.get(base_url)
37+
.query(&[("query", sql.as_str()), ("default_format", "JSONEachRow")])
38+
.send()
39+
.map_err(|e| {
40+
ControllerError::ClickHouseClient(format!(
41+
"HTTP request failed for table '{}.{}': {}",
42+
database, table, e
43+
))
44+
})?;
45+
46+
let status = response.status();
47+
48+
if status == reqwest::StatusCode::SERVICE_UNAVAILABLE {
49+
warn!(
50+
"ClickHouse returned 503 for table '{}.{}' (attempt {}/{}); retrying in {}s",
51+
database,
52+
table,
53+
attempt,
54+
MAX_RETRIES,
55+
RETRY_DELAY.as_secs(),
56+
);
57+
thread::sleep(RETRY_DELAY);
58+
continue;
59+
}
60+
61+
if !status.is_success() {
62+
return Err(ControllerError::ClickHouseClient(format!(
63+
"ClickHouse returned HTTP {} for table '{}.{}'",
64+
status, database, table
65+
)));
66+
}
67+
68+
let body = response.text().map_err(|e| {
69+
ControllerError::ClickHouseClient(format!(
70+
"Failed to read ClickHouse response for table '{}.{}': {}",
71+
database, table, e
72+
))
73+
})?;
74+
75+
let mut columns = Vec::new();
76+
for line in body.lines() {
77+
let row: ColumnRow = serde_json::from_str(line).map_err(|e| {
78+
ControllerError::ClickHouseClient(format!(
79+
"Failed to parse ClickHouse column row {:?}: {}",
80+
line, e
81+
))
82+
})?;
83+
columns.push((row.name, row.column_type));
84+
}
85+
86+
debug!(
87+
"Fetched {} columns for table '{}.{}'",
88+
columns.len(),
89+
database,
90+
table
91+
);
92+
return Ok(columns);
93+
}
94+
95+
Err(ControllerError::ClickHouseClient(format!(
96+
"ClickHouse returned 503 for table '{}.{}' after {} attempts; giving up",
97+
database, table, MAX_RETRIES
98+
)))
99+
}
100+
101+
/// Query `system.columns` and return all column names that are not the time
102+
/// column or one of the value columns, sorted alphabetically.
103+
///
104+
/// These are the metadata (dimension) columns the planner uses for rollup,
105+
/// analogous to PromQL label sets discovered from Prometheus.
106+
pub fn infer_metadata_columns(
107+
clickhouse_url: &str,
108+
database: &str,
109+
table_name: &str,
110+
time_column: &str,
111+
value_columns: &[String],
112+
) -> Result<Vec<String>, ControllerError> {
113+
let all_columns = fetch_columns_for_table(clickhouse_url, database, table_name)?;
114+
115+
let exclude: HashSet<&str> = std::iter::once(time_column)
116+
.chain(value_columns.iter().map(String::as_str))
117+
.collect();
118+
119+
let mut metadata: Vec<String> = all_columns
120+
.into_iter()
121+
.map(|(name, _)| name)
122+
.filter(|name| !exclude.contains(name.as_str()))
123+
.collect();
124+
metadata.sort();
125+
126+
debug!(
127+
"Inferred metadata columns for table '{}': {:?}",
128+
table_name, metadata
129+
);
130+
Ok(metadata)
131+
}

asap-planner-rs/src/config/input.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ pub struct TableDefinition {
142142
pub name: String,
143143
pub time_column: String,
144144
pub value_columns: Vec<String>,
145+
#[serde(default)]
145146
pub metadata_columns: Vec<String>,
146147
}
147148

asap-planner-rs/src/error.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ pub enum ControllerError {
2020
UnknownTable(String),
2121
#[error("Prometheus client error: {0}")]
2222
PrometheusClient(String),
23+
#[error("ClickHouse client error: {0}")]
24+
ClickHouseClient(String),
2325
#[error("Elasticsearch DSL parse error: {0}")]
2426
ElasticDSLParse(String),
2527
#[error("Unsupported Elasticsearch DSL query: {0}")]

asap-planner-rs/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
pub mod clickhouse_client;
12
pub mod config;
23
pub mod elastic_dsl;
34
pub mod error;

asap-planner-rs/src/main.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,14 @@ struct Args {
4848
#[arg(long = "data-ingestion-interval", required = false)]
4949
data_ingestion_interval: Option<u64>,
5050

51+
/// ClickHouse base URL for auto-inferring metadata_columns when not listed
52+
/// in the config file. Example: http://localhost:8123
53+
#[arg(long = "clickhouse-url", required = false)]
54+
clickhouse_url: Option<String>,
55+
56+
#[arg(long = "clickhouse-database", required = false)]
57+
clickhouse_database: Option<String>,
58+
5159
#[arg(short, long, action = clap::ArgAction::Count)]
5260
verbose: u8,
5361
}
@@ -126,7 +134,16 @@ fn main() -> anyhow::Result<()> {
126134
query_evaluation_time: None,
127135
data_ingestion_interval: interval,
128136
};
129-
SQLController::from_file(&config_path, opts)?.generate_to_dir(&args.output_dir)?;
137+
let controller = match args.clickhouse_url {
138+
Some(ref url) => SQLController::from_file_with_discovery(
139+
&config_path,
140+
url,
141+
args.clickhouse_database.as_deref().unwrap_or("default"),
142+
opts,
143+
)?,
144+
None => SQLController::from_file(&config_path, opts)?,
145+
};
146+
controller.generate_to_dir(&args.output_dir)?;
130147
}
131148
QueryLanguage::elastic_querydsl => {
132149
let interval = args.data_ingestion_interval.ok_or_else(|| {

asap-planner-rs/src/sql/controller.rs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
use std::path::Path;
22

3+
use tracing::debug;
4+
35
use super::generator;
6+
use crate::clickhouse_client;
47
use crate::config::input::SQLControllerConfig;
58
use crate::error::ControllerError;
69
use crate::planner_output::PlannerOutput;
@@ -21,6 +24,47 @@ impl SQLController {
2124
Self::from_yaml(&yaml_str, opts)
2225
}
2326

27+
/// Build a `SQLController` from a config file, filling in any empty
28+
/// `metadata_columns` via auto-discovery from the ClickHouse HTTP API.
29+
///
30+
/// Mirrors `promql::Controller::from_file`, which fetches label sets from
31+
/// Prometheus. Tables whose `metadata_columns` is already populated in the
32+
/// config are left untouched; only empty ones are discovered.
33+
pub fn from_file_with_discovery(
34+
path: &Path,
35+
clickhouse_url: &str,
36+
clickhouse_database: &str,
37+
opts: SQLRuntimeOptions,
38+
) -> Result<Self, ControllerError> {
39+
let yaml_str = std::fs::read_to_string(path)?;
40+
let mut config: SQLControllerConfig = serde_yaml::from_str(&yaml_str)?;
41+
for table in &mut config.tables {
42+
if table.metadata_columns.is_empty() {
43+
debug!(
44+
"Table '{}' has no metadata_columns; discovering via ClickHouse system.columns at {}",
45+
table.name, clickhouse_url
46+
);
47+
table.metadata_columns = clickhouse_client::infer_metadata_columns(
48+
clickhouse_url,
49+
clickhouse_database,
50+
&table.name,
51+
&table.time_column,
52+
&table.value_columns,
53+
)?;
54+
} else {
55+
debug!(
56+
"Table '{}' has {} metadata_columns in config; skipping discovery",
57+
table.name,
58+
table.metadata_columns.len()
59+
);
60+
}
61+
}
62+
Ok(Self {
63+
config,
64+
options: opts,
65+
})
66+
}
67+
2468
pub fn from_yaml(yaml: &str, opts: SQLRuntimeOptions) -> Result<Self, ControllerError> {
2569
let config: SQLControllerConfig = serde_yaml::from_str(yaml)?;
2670
Ok(Self {

asap-planner-rs/src/sql/generator.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,18 @@ pub fn generate_sql_plan(
4848
}
4949
}
5050

51+
// Validate that all tables have metadata_columns populated (either from config
52+
// or filled in by from_file_with_discovery before reaching here).
53+
for t in &config.tables {
54+
if t.metadata_columns.is_empty() {
55+
return Err(ControllerError::PlannerError(format!(
56+
"Table '{}' has no metadata_columns. List them in the config file \
57+
or pass --clickhouse-url for auto-discovery.",
58+
t.name
59+
)));
60+
}
61+
}
62+
5163
// Check for duplicate queries
5264
let mut seen_queries = std::collections::HashSet::new();
5365
for qg in &config.query_groups {
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
use std::io::{Read, Write};
2+
use std::net::TcpListener;
3+
4+
use asap_planner::clickhouse_client::infer_metadata_columns;
5+
6+
/// Spawn a single-shot HTTP server that returns a hardcoded `system.columns`
7+
/// response, then verify that `infer_metadata_columns` correctly excludes the
8+
/// time column and value columns and returns the rest sorted.
9+
///
10+
/// Table: hits
11+
/// EventTime DateTime ← excluded (time_column)
12+
/// ResolutionWidth UInt16 ← excluded (value_column)
13+
/// OS UInt8 ← metadata
14+
/// RegionID UInt32 ← metadata
15+
///
16+
/// Expected result: ["OS", "RegionID"]
17+
#[test]
18+
fn test_infer_metadata_columns_via_mock() {
19+
let body = concat!(
20+
"{\"name\":\"EventTime\",\"type\":\"DateTime\"}\n",
21+
"{\"name\":\"ResolutionWidth\",\"type\":\"UInt16\"}\n",
22+
"{\"name\":\"OS\",\"type\":\"UInt8\"}\n",
23+
"{\"name\":\"RegionID\",\"type\":\"UInt32\"}\n",
24+
);
25+
26+
let listener = TcpListener::bind("127.0.0.1:0").unwrap();
27+
let port = listener.local_addr().unwrap().port();
28+
29+
let server = std::thread::spawn(move || {
30+
let (mut stream, _) = listener.accept().unwrap();
31+
let mut buf = [0u8; 4096];
32+
let _ = stream.read(&mut buf);
33+
let response = format!(
34+
"HTTP/1.1 200 OK\r\nContent-Length: {}\r\nContent-Type: application/x-ndjson\r\nConnection: close\r\n\r\n{}",
35+
body.len(),
36+
body
37+
);
38+
stream.write_all(response.as_bytes()).unwrap();
39+
});
40+
41+
let url = format!("http://127.0.0.1:{}", port);
42+
let result = infer_metadata_columns(
43+
&url,
44+
"default",
45+
"hits",
46+
"EventTime",
47+
&["ResolutionWidth".to_string()],
48+
)
49+
.unwrap();
50+
51+
server.join().unwrap();
52+
assert_eq!(result, vec!["OS".to_string(), "RegionID".to_string()]);
53+
}

0 commit comments

Comments
 (0)