Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
edb24a3
perf: Add new version of stream run for 10TB from perf team (#78)
rzIBM Mar 17, 2026
f55e6b4
build(deps): bump golang.org/x/crypto from 0.36.0 to 0.45.0 (#79)
dependabot[bot] Mar 18, 2026
01b7891
build(deps): bump filippo.io/edwards25519 from 1.1.0 to 1.1.1 (#80)
dependabot[bot] Mar 19, 2026
84ad21f
fix: address code review findings across stage, cmd, and utils packages
ethanyzhang Mar 19, 2026
89eb056
chore: update presto-go-client to v2.0.1
ethanyzhang Mar 19, 2026
a5ca816
fix: explicit returns mixed with implicit (fall through) returns (#81)
ethanyzhang Mar 20, 2026
e78455f
chore: delete orphaned Presto query records
ethanyzhang Mar 22, 2026
35b312f
fix: unused local variable (#82)
ethanyzhang Mar 24, 2026
c9e52b4
chore: add pre-commit hook for fmt, vet, staticcheck, tests, and cove…
ethanyzhang Mar 24, 2026
3c77594
fix: delete_run query
ethanyzhang Mar 25, 2026
5f819c5
fix: address code review findings — SQL injection, error handling, co…
ethanyzhang Mar 25, 2026
0d7d29c
chore: update presto-go-client to v2.1.1
ethanyzhang Mar 25, 2026
3628c2b
Enhanced ingestion workflow
ron-daniel1 Mar 30, 2026
0c9d733
Insert teamplate and schema creation fix
ron-daniel1 Apr 1, 2026
f5f6bb3
Create config_enhanced_ingestion.json.example
ron-daniel1 Apr 1, 2026
502ddee
target table sql syntax fix
ron-daniel1 Apr 1, 2026
8123238
Generate specific sql files as per config provided.
ron-daniel1 Apr 6, 2026
eaab879
Partitioned tables creation
ron-daniel1 Apr 10, 2026
99ea6cb
Column header name fix
ron-daniel1 Apr 16, 2026
2f4a5cb
Overwrite minscale factor for partitioned tables with config parameter
ron-daniel1 Apr 21, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
58 changes: 58 additions & 0 deletions .githooks/pre-commit
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/bin/bash
# Git pre-commit hook: runs fmt, vet, staticcheck, tests, and coverage

REPO_ROOT=$(git rev-parse --show-toplevel)
cd "$REPO_ROOT" || exit 1

FAILED=0

# Format check
echo "Checking formatting..."
UNFORMATTED=$(gofmt -l .)
if [ -n "$UNFORMATTED" ]; then
echo "Files not formatted:"
echo "$UNFORMATTED"
FAILED=1
fi

# Vet
echo "Running go vet..."
if ! go vet ./...; then
FAILED=1
fi

# Staticcheck
if command -v staticcheck &>/dev/null; then
echo "Running staticcheck..."
if ! staticcheck ./...; then
FAILED=1
fi
fi

# Tests with race detection
echo "Running tests..."
if ! go test ./... -race -count=1 -timeout 120s; then
FAILED=1
fi

# Coverage threshold (80%)
echo "Checking coverage..."
go test ./... -count=1 -coverprofile=coverage.out -covermode=atomic -timeout 120s
COVERAGE=$(go tool cover -func=coverage.out 2>/dev/null | grep total | awk '{print $3}' | sed 's/%//')
if [ -n "$COVERAGE" ]; then
if (( $(echo "$COVERAGE < 50" | bc -l) )); then
echo "Coverage ${COVERAGE}% is below 50% threshold"
FAILED=1
else
echo "Coverage: ${COVERAGE}%"
fi
fi
rm -f coverage.out

if [ "$FAILED" -eq 1 ]; then
echo "Pre-commit checks failed. Fix issues before committing."
exit 1
fi

echo "All checks passed."
exit 0
1 change: 1 addition & 0 deletions benchmarks/scripts/mysql_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@ def execute_mysql_query(connection, query, cluster_name):
return result
except Error as e:
print(f"The error '{e}' occurred")
return []
1 change: 1 addition & 0 deletions benchmarks/scripts/system_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def execute_ssh_command(worker_ip, login_user, ssh_key_path, command):
sys.exit(1)
else:
print(f'Successfully finished running command on {worker_ip}')
return stdout_output
except paramiko.SSHException as ssh_err:
print(f'SSH error on {worker_ip}: {ssh_err}')
sys.exit(1)
Expand Down
9 changes: 9 additions & 0 deletions benchmarks/tpc-ds/ds_4streams_v2_10000_1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"description": "Run 4 parallel queries streams starting from s1 (s1 to s4) defined in the TPC-DS spec Appendix D: Query Ordering.",
"next": [
"streams_v2/10000/s1.json",
"streams_v2/10000/s2.json",
"streams_v2/10000/s3.json",
"streams_v2/10000/s4.json"
]
}
9 changes: 9 additions & 0 deletions benchmarks/tpc-ds/ds_4streams_v2_10000_2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"description": "Run 4 parallel queries streams starting from s5 (s5 to s8) defined in the TPC-DS spec Appendix D: Query Ordering.",
"next": [
"streams_v2/10000/s5.json",
"streams_v2/10000/s6.json",
"streams_v2/10000/s7.json",
"streams_v2/10000/s8.json"
]
}
13 changes: 13 additions & 0 deletions benchmarks/tpc-ds/ds_8streams_v2_10000_1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"description": "Run 8 parallel queries streams starting from s1 (s1 to s8) defined in the TPC-DS spec Appendix D: Query Ordering.",
"next": [
"streams_v2/10000/s1.json",
"streams_v2/10000/s2.json",
"streams_v2/10000/s3.json",
"streams_v2/10000/s4.json",
"streams_v2/10000/s5.json",
"streams_v2/10000/s6.json",
"streams_v2/10000/s7.json",
"streams_v2/10000/s8.json"
]
}
13 changes: 13 additions & 0 deletions benchmarks/tpc-ds/ds_8streams_v2_10000_2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"description": "Run 8 parallel queries streams starting from s9 (s9 to s16) defined in the TPC-DS spec Appendix D: Query Ordering.",
"next": [
"streams_v2/10000/s9.json",
"streams_v2/10000/s10.json",
"streams_v2/10000/s11.json",
"streams_v2/10000/s12.json",
"streams_v2/10000/s13.json",
"streams_v2/10000/s14.json",
"streams_v2/10000/s15.json",
"streams_v2/10000/s16.json"
]
}
6 changes: 6 additions & 0 deletions benchmarks/tpc-ds/ds_power_v2_10000.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"description": "Run the first 1 parallel queries streams defined in the TPC-DS spec Appendix D: Query Ordering.",
"next": [
"streams_v2/10000/s0.json"
]
}
24 changes: 24 additions & 0 deletions benchmarks/tpc-ds/queries_v2/10000/s0/q01.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
-- q01.sql

with customer_total_return as
(select sr_customer_sk as ctr_customer_sk
,sr_store_sk as ctr_store_sk
,sum(SR_FEE) as ctr_total_return
from store_returns
,date_dim
where sr_returned_date_sk = d_date_sk
and d_year =1998
group by sr_customer_sk
,sr_store_sk)
select c_customer_id
from customer_total_return ctr1
,store
,customer
where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
from customer_total_return ctr2
where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
and s_store_sk = ctr1.ctr_store_sk
and s_state = 'IN'
and ctr1.ctr_customer_sk = c_customer_sk
order by c_customer_id
limit 100;
59 changes: 59 additions & 0 deletions benchmarks/tpc-ds/queries_v2/10000/s0/q02.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
-- q02.sql

with wscs as
(select sold_date_sk
,sales_price
from (select ws_sold_date_sk sold_date_sk
,ws_ext_sales_price sales_price
from web_sales
union all
select cs_sold_date_sk sold_date_sk
,cs_ext_sales_price sales_price
from catalog_sales) x),
wswscs as
(select d_week_seq,
sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales,
sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales,
sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales,
sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales,
sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales,
sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales,
sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales
from wscs
,date_dim
where d_date_sk = sold_date_sk
group by d_week_seq)
select d_week_seq1
,round(sun_sales1/sun_sales2,2)
,round(mon_sales1/mon_sales2,2)
,round(tue_sales1/tue_sales2,2)
,round(wed_sales1/wed_sales2,2)
,round(thu_sales1/thu_sales2,2)
,round(fri_sales1/fri_sales2,2)
,round(sat_sales1/sat_sales2,2)
from
(select wswscs.d_week_seq d_week_seq1
,sun_sales sun_sales1
,mon_sales mon_sales1
,tue_sales tue_sales1
,wed_sales wed_sales1
,thu_sales thu_sales1
,fri_sales fri_sales1
,sat_sales sat_sales1
from wswscs,date_dim
where date_dim.d_week_seq = wswscs.d_week_seq and
d_year = 2001) y,
(select wswscs.d_week_seq d_week_seq2
,sun_sales sun_sales2
,mon_sales mon_sales2
,tue_sales tue_sales2
,wed_sales wed_sales2
,thu_sales thu_sales2
,fri_sales fri_sales2
,sat_sales sat_sales2
from wswscs
,date_dim
where date_dim.d_week_seq = wswscs.d_week_seq and
d_year = 2001+1) z
where d_week_seq1=d_week_seq2-53
order by d_week_seq1;
20 changes: 20 additions & 0 deletions benchmarks/tpc-ds/queries_v2/10000/s0/q03.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
-- q03.sql

select dt.d_year
,item.i_brand_id brand_id
,item.i_brand brand
,sum(ss_sales_price) sum_agg
from date_dim dt
,store_sales
,item
where dt.d_date_sk = store_sales.ss_sold_date_sk
and store_sales.ss_item_sk = item.i_item_sk
and item.i_manufact_id = 977
and dt.d_moy=11
group by dt.d_year
,item.i_brand
,item.i_brand_id
order by dt.d_year
,sum_agg desc
,brand_id
limit 100;
115 changes: 115 additions & 0 deletions benchmarks/tpc-ds/queries_v2/10000/s0/q04.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
-- q04.sql

with year_total as (
select c_customer_id customer_id
,c_first_name customer_first_name
,c_last_name customer_last_name
,c_preferred_cust_flag customer_preferred_cust_flag
,c_birth_country customer_birth_country
,c_login customer_login
,c_email_address customer_email_address
,d_year dyear
,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total
,'s' sale_type
from customer
,store_sales
,date_dim
where c_customer_sk = ss_customer_sk
and ss_sold_date_sk = d_date_sk
group by c_customer_id
,c_first_name
,c_last_name
,c_preferred_cust_flag
,c_birth_country
,c_login
,c_email_address
,d_year
union all
select c_customer_id customer_id
,c_first_name customer_first_name
,c_last_name customer_last_name
,c_preferred_cust_flag customer_preferred_cust_flag
,c_birth_country customer_birth_country
,c_login customer_login
,c_email_address customer_email_address
,d_year dyear
,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total
,'c' sale_type
from customer
,catalog_sales
,date_dim
where c_customer_sk = cs_bill_customer_sk
and cs_sold_date_sk = d_date_sk
group by c_customer_id
,c_first_name
,c_last_name
,c_preferred_cust_flag
,c_birth_country
,c_login
,c_email_address
,d_year
union all
select c_customer_id customer_id
,c_first_name customer_first_name
,c_last_name customer_last_name
,c_preferred_cust_flag customer_preferred_cust_flag
,c_birth_country customer_birth_country
,c_login customer_login
,c_email_address customer_email_address
,d_year dyear
,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total
,'w' sale_type
from customer
,web_sales
,date_dim
where c_customer_sk = ws_bill_customer_sk
and ws_sold_date_sk = d_date_sk
group by c_customer_id
,c_first_name
,c_last_name
,c_preferred_cust_flag
,c_birth_country
,c_login
,c_email_address
,d_year
)
select
t_s_secyear.customer_id
,t_s_secyear.customer_first_name
,t_s_secyear.customer_last_name
,t_s_secyear.customer_preferred_cust_flag
from year_total t_s_firstyear
,year_total t_s_secyear
,year_total t_c_firstyear
,year_total t_c_secyear
,year_total t_w_firstyear
,year_total t_w_secyear
where t_s_secyear.customer_id = t_s_firstyear.customer_id
and t_s_firstyear.customer_id = t_c_secyear.customer_id
and t_s_firstyear.customer_id = t_c_firstyear.customer_id
and t_s_firstyear.customer_id = t_w_firstyear.customer_id
and t_s_firstyear.customer_id = t_w_secyear.customer_id
and t_s_firstyear.sale_type = 's'
and t_c_firstyear.sale_type = 'c'
and t_w_firstyear.sale_type = 'w'
and t_s_secyear.sale_type = 's'
and t_c_secyear.sale_type = 'c'
and t_w_secyear.sale_type = 'w'
and t_s_firstyear.dyear = 2001
and t_s_secyear.dyear = 2001+1
and t_c_firstyear.dyear = 2001
and t_c_secyear.dyear = 2001+1
and t_w_firstyear.dyear = 2001
and t_w_secyear.dyear = 2001+1
and t_s_firstyear.year_total > 0
and t_c_firstyear.year_total > 0
and t_w_firstyear.year_total > 0
and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end
> case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end
and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end
> case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end
order by t_s_secyear.customer_id
,t_s_secyear.customer_first_name
,t_s_secyear.customer_last_name
,t_s_secyear.customer_preferred_cust_flag
limit 100;
Loading