diff --git a/cmd/genddl/definition/tpc-h/customer.json b/cmd/genddl/definition/tpc-h/customer.json new file mode 100644 index 00000000..0917e09d --- /dev/null +++ b/cmd/genddl/definition/tpc-h/customer.json @@ -0,0 +1,37 @@ +{ + "name": "customer", + "columns": [ + { + "name": "c_custkey", + "type": "BIGINT" + }, + { + "name": "c_name", + "type": "VARCHAR(25)" + }, + { + "name": "c_address", + "type": "VARCHAR(40)" + }, + { + "name": "c_nationkey", + "type": "BIGINT" + }, + { + "name": "c_phone", + "type": "VARCHAR(15)" + }, + { + "name": "c_acctbal", + "type": "DOUBLE" + }, + { + "name": "c_mktsegment", + "type": "VARCHAR(10)" + }, + { + "name": "c_comment", + "type": "VARCHAR(117)" + } + ] +} diff --git a/cmd/genddl/definition/tpc-h/lineitem.json b/cmd/genddl/definition/tpc-h/lineitem.json new file mode 100644 index 00000000..644b51bb --- /dev/null +++ b/cmd/genddl/definition/tpc-h/lineitem.json @@ -0,0 +1,71 @@ +{ + "name": "lineitem", + "partitioned": true, + "columns": [ + { + "name": "l_orderkey", + "type": "BIGINT" + }, + { + "name": "l_partkey", + "type": "BIGINT" + }, + { + "name": "l_suppkey", + "type": "BIGINT" + }, + { + "name": "l_linenumber", + "type": "INTEGER" + }, + { + "name": "l_quantity", + "type": "DOUBLE" + }, + { + "name": "l_extendedprice", + "type": "DOUBLE" + }, + { + "name": "l_discount", + "type": "DOUBLE" + }, + { + "name": "l_tax", + "type": "DOUBLE" + }, + { + "name": "l_returnflag", + "type": "VARCHAR(1)" + }, + { + "name": "l_linestatus", + "type": "VARCHAR(1)" + }, + { + "name": "l_shipdate", + "type": "DATE", + "partition_key": true + }, + { + "name": "l_commitdate", + "type": "DATE" + }, + { + "name": "l_receiptdate", + "type": "DATE" + }, + { + "name": "l_shipinstruct", + "type": "VARCHAR(25)" + }, + { + "name": "l_shipmode", + "type": "VARCHAR(10)" + }, + { + "name": "l_comment", + "type": "VARCHAR(44)" + } + ] +} diff --git a/cmd/genddl/definition/tpc-h/nation.json b/cmd/genddl/definition/tpc-h/nation.json new file mode 100644 index 00000000..144812a9 --- /dev/null +++ b/cmd/genddl/definition/tpc-h/nation.json @@ -0,0 +1,21 @@ +{ + "name": "nation", + "columns": [ + { + "name": "n_nationkey", + "type": "BIGINT" + }, + { + "name": "n_name", + "type": "VARCHAR(25)" + }, + { + "name": "n_regionkey", + "type": "BIGINT" + }, + { + "name": "n_comment", + "type": "VARCHAR(152)" + } + ] +} diff --git a/cmd/genddl/definition/tpc-h/orders.json b/cmd/genddl/definition/tpc-h/orders.json new file mode 100644 index 00000000..d5dbd8b4 --- /dev/null +++ b/cmd/genddl/definition/tpc-h/orders.json @@ -0,0 +1,43 @@ +{ + "name": "orders", + "partitioned": true, + "columns": [ + { + "name": "o_orderkey", + "type": "BIGINT" + }, + { + "name": "o_custkey", + "type": "BIGINT" + }, + { + "name": "o_orderstatus", + "type": "VARCHAR(1)" + }, + { + "name": "o_totalprice", + "type": "DOUBLE" + }, + { + "name": "o_orderdate", + "type": "DATE", + "partition_key": true + }, + { + "name": "o_orderpriority", + "type": "VARCHAR(15)" + }, + { + "name": "o_clerk", + "type": "VARCHAR(15)" + }, + { + "name": "o_shippriority", + "type": "INTEGER" + }, + { + "name": "o_comment", + "type": "VARCHAR(79)" + } + ] +} diff --git a/cmd/genddl/definition/tpc-h/part.json b/cmd/genddl/definition/tpc-h/part.json new file mode 100644 index 00000000..97759768 --- /dev/null +++ b/cmd/genddl/definition/tpc-h/part.json @@ -0,0 +1,41 @@ +{ + "name": "part", + "columns": [ + { + "name": "p_partkey", + "type": "BIGINT" + }, + { + "name": "p_name", + "type": "VARCHAR(55)" + }, + { + "name": "p_mfgr", + "type": "VARCHAR(25)" + }, + { + "name": "p_brand", + "type": "VARCHAR(10)" + }, + { + "name": "p_type", + "type": "VARCHAR(25)" + }, + { + "name": "p_size", + "type": "INTEGER" + }, + { + "name": "p_container", + "type": "VARCHAR(10)" + }, + { + "name": "p_retailprice", + "type": "DOUBLE" + }, + { + "name": "p_comment", + "type": "VARCHAR(23)" + } + ] +} diff --git a/cmd/genddl/definition/tpc-h/partsupp.json b/cmd/genddl/definition/tpc-h/partsupp.json new file mode 100644 index 00000000..18626d32 --- /dev/null +++ b/cmd/genddl/definition/tpc-h/partsupp.json @@ -0,0 +1,25 @@ +{ + "name": "partsupp", + "columns": [ + { + "name": "ps_partkey", + "type": "BIGINT" + }, + { + "name": "ps_suppkey", + "type": "BIGINT" + }, + { + "name": "ps_availqty", + "type": "INTEGER" + }, + { + "name": "ps_supplycost", + "type": "DOUBLE" + }, + { + "name": "ps_comment", + "type": "VARCHAR(199)" + } + ] +} diff --git a/cmd/genddl/definition/tpc-h/region.json b/cmd/genddl/definition/tpc-h/region.json new file mode 100644 index 00000000..82c63784 --- /dev/null +++ b/cmd/genddl/definition/tpc-h/region.json @@ -0,0 +1,17 @@ +{ + "name": "region", + "columns": [ + { + "name": "r_regionkey", + "type": "BIGINT" + }, + { + "name": "r_name", + "type": "VARCHAR(25)" + }, + { + "name": "r_comment", + "type": "VARCHAR(152)" + } + ] +} diff --git a/cmd/genddl/definition/tpc-h/supplier.json b/cmd/genddl/definition/tpc-h/supplier.json new file mode 100644 index 00000000..99956fbd --- /dev/null +++ b/cmd/genddl/definition/tpc-h/supplier.json @@ -0,0 +1,33 @@ +{ + "name": "supplier", + "columns": [ + { + "name": "s_suppkey", + "type": "BIGINT" + }, + { + "name": "s_name", + "type": "VARCHAR(25)" + }, + { + "name": "s_address", + "type": "VARCHAR(40)" + }, + { + "name": "s_nationkey", + "type": "BIGINT" + }, + { + "name": "s_phone", + "type": "VARCHAR(15)" + }, + { + "name": "s_acctbal", + "type": "DOUBLE" + }, + { + "name": "s_comment", + "type": "VARCHAR(101)" + } + ] +} diff --git a/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/1-create-tpch-sf10-parquet-iceberg-zstd.sql b/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/1-create-tpch-sf10-parquet-iceberg-zstd.sql new file mode 100644 index 00000000..de01b14c --- /dev/null +++ b/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/1-create-tpch-sf10-parquet-iceberg-zstd.sql @@ -0,0 +1,128 @@ +SET SESSION iceberg.compression_codec='ZSTD'; +SET SESSION query_max_execution_time='12h'; +SET SESSION query_max_run_time='12h'; + +CREATE SCHEMA IF NOT EXISTS iceberg.tpch_sf10_parquet_iceberg_zstd +WITH ( + location = 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg-zstd/' +); + +USE iceberg.tpch_sf10_parquet_iceberg_zstd; + +CREATE TABLE IF NOT EXISTS customer ( + c_custkey BIGINT, + c_name VARCHAR(25), + c_address VARCHAR(40), + c_nationkey BIGINT, + c_phone VARCHAR(15), + c_acctbal DOUBLE, + c_mktsegment VARCHAR(10), + c_comment VARCHAR(117) +) +WITH ( + format = 'PARQUET', + location = 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg-zstd/customer' +); + +CREATE TABLE IF NOT EXISTS lineitem ( + l_orderkey BIGINT, + l_partkey BIGINT, + l_suppkey BIGINT, + l_linenumber INTEGER, + l_quantity DOUBLE, + l_extendedprice DOUBLE, + l_discount DOUBLE, + l_tax DOUBLE, + l_returnflag VARCHAR(1), + l_linestatus VARCHAR(1), + l_shipdate DATE, + l_commitdate DATE, + l_receiptdate DATE, + l_shipinstruct VARCHAR(25), + l_shipmode VARCHAR(10), + l_comment VARCHAR(44) +) +WITH ( + format = 'PARQUET', + location = 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg-zstd/lineitem' +); + +CREATE TABLE IF NOT EXISTS nation ( + n_nationkey BIGINT, + n_name VARCHAR(25), + n_regionkey BIGINT, + n_comment VARCHAR(152) +) +WITH ( + format = 'PARQUET', + location = 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg-zstd/nation' +); + +CREATE TABLE IF NOT EXISTS orders ( + o_orderkey BIGINT, + o_custkey BIGINT, + o_orderstatus VARCHAR(1), + o_totalprice DOUBLE, + o_orderdate DATE, + o_orderpriority VARCHAR(15), + o_clerk VARCHAR(15), + o_shippriority INTEGER, + o_comment VARCHAR(79) +) +WITH ( + format = 'PARQUET', + location = 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg-zstd/orders' +); + +CREATE TABLE IF NOT EXISTS part ( + p_partkey BIGINT, + p_name VARCHAR(55), + p_mfgr VARCHAR(25), + p_brand VARCHAR(10), + p_type VARCHAR(25), + p_size INTEGER, + p_container VARCHAR(10), + p_retailprice DOUBLE, + p_comment VARCHAR(23) +) +WITH ( + format = 'PARQUET', + location = 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg-zstd/part' +); + +CREATE TABLE IF NOT EXISTS partsupp ( + ps_partkey BIGINT, + ps_suppkey BIGINT, + ps_availqty INTEGER, + ps_supplycost DOUBLE, + ps_comment VARCHAR(199) +) +WITH ( + format = 'PARQUET', + location = 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg-zstd/partsupp' +); + +CREATE TABLE IF NOT EXISTS region ( + r_regionkey BIGINT, + r_name VARCHAR(25), + r_comment VARCHAR(152) +) +WITH ( + format = 'PARQUET', + location = 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg-zstd/region' +); + +CREATE TABLE IF NOT EXISTS supplier ( + s_suppkey BIGINT, + s_name VARCHAR(25), + s_address VARCHAR(40), + s_nationkey BIGINT, + s_phone VARCHAR(15), + s_acctbal DOUBLE, + s_comment VARCHAR(101) +) +WITH ( + format = 'PARQUET', + location = 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg-zstd/supplier' +); + diff --git a/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/2-insert-tpch-sf10-parquet-iceberg-zstd.sql b/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/2-insert-tpch-sf10-parquet-iceberg-zstd.sql new file mode 100644 index 00000000..fd851951 --- /dev/null +++ b/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/2-insert-tpch-sf10-parquet-iceberg-zstd.sql @@ -0,0 +1,38 @@ +SET SESSION iceberg.compression_codec='ZSTD'; +SET SESSION query_max_execution_time='12h'; +SET SESSION query_max_run_time='12h'; + +USE iceberg.tpch_sf10_parquet_iceberg_zstd; + +INSERT INTO customer +SELECT * FROM iceberg.tpch_sf10_parquet_iceberg.customer; + +INSERT INTO lineitem +SELECT * FROM iceberg.tpch_sf10_parquet_iceberg.lineitem; + +INSERT INTO nation +SELECT * FROM iceberg.tpch_sf10_parquet_iceberg.nation; + +INSERT INTO orders +SELECT * FROM iceberg.tpch_sf10_parquet_iceberg.orders; + +INSERT INTO part +SELECT * FROM iceberg.tpch_sf10_parquet_iceberg.part; + +INSERT INTO partsupp +SELECT * FROM iceberg.tpch_sf10_parquet_iceberg.partsupp; + +INSERT INTO region +SELECT * FROM iceberg.tpch_sf10_parquet_iceberg.region; + +INSERT INTO supplier +SELECT * FROM iceberg.tpch_sf10_parquet_iceberg.supplier; + +ANALYZE customer; +ANALYZE lineitem; +ANALYZE nation; +ANALYZE orders; +ANALYZE part; +ANALYZE partsupp; +ANALYZE region; +ANALYZE supplier; diff --git a/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/3-create-tpch-sf10-parquet-partitioned-iceberg-zstd.sql b/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/3-create-tpch-sf10-parquet-partitioned-iceberg-zstd.sql new file mode 100644 index 00000000..dd2195c7 --- /dev/null +++ b/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/3-create-tpch-sf10-parquet-partitioned-iceberg-zstd.sql @@ -0,0 +1,57 @@ +SET SESSION iceberg.compression_codec='ZSTD'; +SET SESSION query_max_execution_time='12h'; +SET SESSION query_max_run_time='12h'; + +CREATE SCHEMA IF NOT EXISTS iceberg.tpch_sf10_parquet_partitioned_iceberg_zstd +WITH ( + location = 's3a://presto-workload-v2/tpch-sf10-parquet-partitioned-iceberg-zstd/' +); + +USE iceberg.tpch_sf10_parquet_partitioned_iceberg_zstd; + +CALL iceberg.system.register_table('tpch_sf10_parquet_partitioned_iceberg_zstd', 'customer', 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg-zstd/customer/metadata'); +CALL iceberg.system.register_table('tpch_sf10_parquet_partitioned_iceberg_zstd', 'nation', 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg-zstd/nation/metadata'); +CALL iceberg.system.register_table('tpch_sf10_parquet_partitioned_iceberg_zstd', 'part', 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg-zstd/part/metadata'); +CALL iceberg.system.register_table('tpch_sf10_parquet_partitioned_iceberg_zstd', 'partsupp', 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg-zstd/partsupp/metadata'); +CALL iceberg.system.register_table('tpch_sf10_parquet_partitioned_iceberg_zstd', 'region', 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg-zstd/region/metadata'); +CALL iceberg.system.register_table('tpch_sf10_parquet_partitioned_iceberg_zstd', 'supplier', 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg-zstd/supplier/metadata'); + +CREATE TABLE IF NOT EXISTS lineitem ( + l_orderkey BIGINT, + l_partkey BIGINT, + l_suppkey BIGINT, + l_linenumber INTEGER, + l_quantity DOUBLE, + l_extendedprice DOUBLE, + l_discount DOUBLE, + l_tax DOUBLE, + l_returnflag VARCHAR(1), + l_linestatus VARCHAR(1), + l_commitdate DATE, + l_receiptdate DATE, + l_shipinstruct VARCHAR(25), + l_shipmode VARCHAR(10), + l_comment VARCHAR(44), + l_shipdate DATE +) +WITH ( + format = 'PARQUET', + partitioning = array['l_shipdate'] +); + +CREATE TABLE IF NOT EXISTS orders ( + o_orderkey BIGINT, + o_custkey BIGINT, + o_orderstatus VARCHAR(1), + o_totalprice DOUBLE, + o_orderpriority VARCHAR(15), + o_clerk VARCHAR(15), + o_shippriority INTEGER, + o_comment VARCHAR(79), + o_orderdate DATE +) +WITH ( + format = 'PARQUET', + partitioning = array['o_orderdate'] +); + diff --git a/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/4-insert-tpch-sf10-parquet-partitioned-iceberg-zstd.sql b/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/4-insert-tpch-sf10-parquet-partitioned-iceberg-zstd.sql new file mode 100644 index 00000000..9422a4f3 --- /dev/null +++ b/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/4-insert-tpch-sf10-parquet-partitioned-iceberg-zstd.sql @@ -0,0 +1,14 @@ +SET SESSION iceberg.compression_codec='ZSTD'; +SET SESSION query_max_execution_time='12h'; +SET SESSION query_max_run_time='12h'; + +USE iceberg.tpch_sf10_parquet_partitioned_iceberg_zstd; + +INSERT INTO lineitem +SELECT * FROM iceberg.tpch_sf10_parquet_partitioned_iceberg.lineitem; + +INSERT INTO orders +SELECT * FROM iceberg.tpch_sf10_parquet_partitioned_iceberg.orders; + +ANALYZE lineitem; +ANALYZE orders; diff --git a/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/5-create-tpch-sf10-parquet-hive-zstd.sql b/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/5-create-tpch-sf10-parquet-hive-zstd.sql new file mode 100644 index 00000000..c31233d5 --- /dev/null +++ b/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/5-create-tpch-sf10-parquet-hive-zstd.sql @@ -0,0 +1,136 @@ +SET SESSION hive.compression_codec='ZSTD'; +SET SESSION query_max_execution_time='12h'; +SET SESSION query_max_run_time='12h'; + +CREATE SCHEMA IF NOT EXISTS hive.tpch_sf10_parquet_hive_zstd +WITH ( + location = 's3a://presto-workload-v2/tpch-sf10-parquet-hive-zstd/' +); + +USE hive.tpch_sf10_parquet_hive_zstd; + +CREATE TABLE IF NOT EXISTS customer ( + c_custkey BIGINT, + c_name VARCHAR(25), + c_address VARCHAR(40), + c_nationkey BIGINT, + c_phone VARCHAR(15), + c_acctbal DOUBLE, + c_mktsegment VARCHAR(10), + c_comment VARCHAR(117) +) +WITH ( + format = 'PARQUET', + external_location = 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg/customer/data/' +); + +CREATE TABLE IF NOT EXISTS lineitem ( + l_orderkey BIGINT, + l_partkey BIGINT, + l_suppkey BIGINT, + l_linenumber INTEGER, + l_quantity DOUBLE, + l_extendedprice DOUBLE, + l_discount DOUBLE, + l_tax DOUBLE, + l_returnflag VARCHAR(1), + l_linestatus VARCHAR(1), + l_shipdate DATE, + l_commitdate DATE, + l_receiptdate DATE, + l_shipinstruct VARCHAR(25), + l_shipmode VARCHAR(10), + l_comment VARCHAR(44) +) +WITH ( + format = 'PARQUET', + external_location = 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg/lineitem/data/' +); + +CREATE TABLE IF NOT EXISTS nation ( + n_nationkey BIGINT, + n_name VARCHAR(25), + n_regionkey BIGINT, + n_comment VARCHAR(152) +) +WITH ( + format = 'PARQUET', + external_location = 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg/nation/data/' +); + +CREATE TABLE IF NOT EXISTS orders ( + o_orderkey BIGINT, + o_custkey BIGINT, + o_orderstatus VARCHAR(1), + o_totalprice DOUBLE, + o_orderdate DATE, + o_orderpriority VARCHAR(15), + o_clerk VARCHAR(15), + o_shippriority INTEGER, + o_comment VARCHAR(79) +) +WITH ( + format = 'PARQUET', + external_location = 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg/orders/data/' +); + +CREATE TABLE IF NOT EXISTS part ( + p_partkey BIGINT, + p_name VARCHAR(55), + p_mfgr VARCHAR(25), + p_brand VARCHAR(10), + p_type VARCHAR(25), + p_size INTEGER, + p_container VARCHAR(10), + p_retailprice DOUBLE, + p_comment VARCHAR(23) +) +WITH ( + format = 'PARQUET', + external_location = 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg/part/data/' +); + +CREATE TABLE IF NOT EXISTS partsupp ( + ps_partkey BIGINT, + ps_suppkey BIGINT, + ps_availqty INTEGER, + ps_supplycost DOUBLE, + ps_comment VARCHAR(199) +) +WITH ( + format = 'PARQUET', + external_location = 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg/partsupp/data/' +); + +CREATE TABLE IF NOT EXISTS region ( + r_regionkey BIGINT, + r_name VARCHAR(25), + r_comment VARCHAR(152) +) +WITH ( + format = 'PARQUET', + external_location = 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg/region/data/' +); + +CREATE TABLE IF NOT EXISTS supplier ( + s_suppkey BIGINT, + s_name VARCHAR(25), + s_address VARCHAR(40), + s_nationkey BIGINT, + s_phone VARCHAR(15), + s_acctbal DOUBLE, + s_comment VARCHAR(101) +) +WITH ( + format = 'PARQUET', + external_location = 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg/supplier/data/' +); +ANALYZE customer; +ANALYZE lineitem; +ANALYZE nation; +ANALYZE orders; +ANALYZE part; +ANALYZE partsupp; +ANALYZE region; +ANALYZE supplier; + diff --git a/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/6a-create-tpch-sf10-parquet-partitioned-hive-zstd.sql b/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/6a-create-tpch-sf10-parquet-partitioned-hive-zstd.sql new file mode 100644 index 00000000..3b0019f9 --- /dev/null +++ b/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/6a-create-tpch-sf10-parquet-partitioned-hive-zstd.sql @@ -0,0 +1,130 @@ +SET SESSION hive.compression_codec='ZSTD'; +SET SESSION query_max_execution_time='12h'; +SET SESSION query_max_run_time='12h'; + +CREATE SCHEMA IF NOT EXISTS hive.tpch_sf10_parquet_partitioned_hive_zstd +WITH ( + location = 's3a://presto-workload-v2/tpch-sf10-parquet-partitioned-hive-zstd/' +); + +USE hive.tpch_sf10_parquet_partitioned_hive_zstd; + +CREATE TABLE IF NOT EXISTS customer ( + c_custkey BIGINT, + c_name VARCHAR(25), + c_address VARCHAR(40), + c_nationkey BIGINT, + c_phone VARCHAR(15), + c_acctbal DOUBLE, + c_mktsegment VARCHAR(10), + c_comment VARCHAR(117) +) +WITH ( + format = 'PARQUET', + external_location = 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg/customer/data/' +); + +CREATE TABLE IF NOT EXISTS lineitem ( + l_orderkey BIGINT, + l_partkey BIGINT, + l_suppkey BIGINT, + l_linenumber INTEGER, + l_quantity DOUBLE, + l_extendedprice DOUBLE, + l_discount DOUBLE, + l_tax DOUBLE, + l_returnflag VARCHAR(1), + l_linestatus VARCHAR(1), + l_commitdate DATE, + l_receiptdate DATE, + l_shipinstruct VARCHAR(25), + l_shipmode VARCHAR(10), + l_comment VARCHAR(44), + l_shipdate DATE +) +WITH ( + format = 'PARQUET', + partitioned_by = array['l_shipdate'], + external_location = 's3a://presto-workload-v2/tpch-sf10-parquet-partitioned-iceberg/lineitem/data' +); + +CREATE TABLE IF NOT EXISTS nation ( + n_nationkey BIGINT, + n_name VARCHAR(25), + n_regionkey BIGINT, + n_comment VARCHAR(152) +) +WITH ( + format = 'PARQUET', + external_location = 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg/nation/data/' +); + +CREATE TABLE IF NOT EXISTS orders ( + o_orderkey BIGINT, + o_custkey BIGINT, + o_orderstatus VARCHAR(1), + o_totalprice DOUBLE, + o_orderpriority VARCHAR(15), + o_clerk VARCHAR(15), + o_shippriority INTEGER, + o_comment VARCHAR(79), + o_orderdate DATE +) +WITH ( + format = 'PARQUET', + partitioned_by = array['o_orderdate'], + external_location = 's3a://presto-workload-v2/tpch-sf10-parquet-partitioned-iceberg/orders/data' +); + +CREATE TABLE IF NOT EXISTS part ( + p_partkey BIGINT, + p_name VARCHAR(55), + p_mfgr VARCHAR(25), + p_brand VARCHAR(10), + p_type VARCHAR(25), + p_size INTEGER, + p_container VARCHAR(10), + p_retailprice DOUBLE, + p_comment VARCHAR(23) +) +WITH ( + format = 'PARQUET', + external_location = 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg/part/data/' +); + +CREATE TABLE IF NOT EXISTS partsupp ( + ps_partkey BIGINT, + ps_suppkey BIGINT, + ps_availqty INTEGER, + ps_supplycost DOUBLE, + ps_comment VARCHAR(199) +) +WITH ( + format = 'PARQUET', + external_location = 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg/partsupp/data/' +); + +CREATE TABLE IF NOT EXISTS region ( + r_regionkey BIGINT, + r_name VARCHAR(25), + r_comment VARCHAR(152) +) +WITH ( + format = 'PARQUET', + external_location = 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg/region/data/' +); + +CREATE TABLE IF NOT EXISTS supplier ( + s_suppkey BIGINT, + s_name VARCHAR(25), + s_address VARCHAR(40), + s_nationkey BIGINT, + s_phone VARCHAR(15), + s_acctbal DOUBLE, + s_comment VARCHAR(101) +) +WITH ( + format = 'PARQUET', + external_location = 's3a://presto-workload-v2/tpch-sf10-parquet-iceberg/supplier/data/' +); + diff --git a/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/6b-s3-mv-tpch-sf10-parquet-partitioned-hive-zstd.sh b/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/6b-s3-mv-tpch-sf10-parquet-partitioned-hive-zstd.sh new file mode 100644 index 00000000..f9c6bc1b --- /dev/null +++ b/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/6b-s3-mv-tpch-sf10-parquet-partitioned-hive-zstd.sh @@ -0,0 +1,2 @@ +aws s3 mv --recursive s3://presto-workload-v2/tpch-sf10-parquet-partitioned-iceberg/lineitem/data/l_shipdate=null/ s3://presto-workload-v2/tpch-sf10-parquet-partitioned-iceberg/lineitem/data/l_shipdate=__HIVE_DEFAULT_PARTITION__/ +aws s3 mv --recursive s3://presto-workload-v2/tpch-sf10-parquet-partitioned-iceberg/orders/data/o_orderdate=null/ s3://presto-workload-v2/tpch-sf10-parquet-partitioned-iceberg/orders/data/o_orderdate=__HIVE_DEFAULT_PARTITION__/ diff --git a/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/6c-call-analyze-tpch-sf10-parquet-partitioned-hive-zstd.sql b/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/6c-call-analyze-tpch-sf10-parquet-partitioned-hive-zstd.sql new file mode 100644 index 00000000..f3b843b1 --- /dev/null +++ b/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/6c-call-analyze-tpch-sf10-parquet-partitioned-hive-zstd.sql @@ -0,0 +1,13 @@ +USE hive.tpch_sf10_parquet_partitioned_hive_zstd; + +CALL system.sync_partition_metadata('tpch_sf10_parquet_partitioned_hive_zstd', 'lineitem', 'FULL'); +CALL system.sync_partition_metadata('tpch_sf10_parquet_partitioned_hive_zstd', 'orders', 'FULL'); + +ANALYZE customer; +ANALYZE lineitem; +ANALYZE nation; +ANALYZE orders; +ANALYZE part; +ANALYZE partsupp; +ANALYZE region; +ANALYZE supplier; diff --git a/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/6d-s3-cp-tpch-sf10-parquet-partitioned-hive-zstd.sh b/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/6d-s3-cp-tpch-sf10-parquet-partitioned-hive-zstd.sh new file mode 100644 index 00000000..b0012672 --- /dev/null +++ b/cmd/genddl/generated-examples/tpch-sf10-parquet-zstd/6d-s3-cp-tpch-sf10-parquet-partitioned-hive-zstd.sh @@ -0,0 +1,2 @@ +aws s3 cp --recursive s3://presto-workload-v2/tpch-sf10-parquet-partitioned-iceberg/lineitem/data/l_shipdate=__HIVE_DEFAULT_PARTITION__/ s3://presto-workload-v2/tpch-sf10-parquet-partitioned-iceberg/lineitem/data/l_shipdate=null/ +aws s3 cp --recursive s3://presto-workload-v2/tpch-sf10-parquet-partitioned-iceberg/orders/data/o_orderdate=__HIVE_DEFAULT_PARTITION__/ s3://presto-workload-v2/tpch-sf10-parquet-partitioned-iceberg/orders/data/o_orderdate=null/ diff --git a/cmd/genddl/main_test.go b/cmd/genddl/main_test.go index ca87b733..f730226d 100644 --- a/cmd/genddl/main_test.go +++ b/cmd/genddl/main_test.go @@ -9,36 +9,48 @@ import ( "github.com/stretchr/testify/require" ) -// TestGeneratedExamplesMatch runs genddl with config.json and verifies the +// TestGeneratedExamplesMatch runs genddl with each config and verifies the // output in generated-examples/ matches the checked-in golden files. func TestGeneratedExamplesMatch(t *testing.T) { - configPath := filepath.Join("config.json") - absConfig, err := filepath.Abs(configPath) - require.NoError(t, err) - - configDir := filepath.Dir(absConfig) - examplesDir := filepath.Join(configDir, "generated-examples") - - // Snapshot all golden files before regeneration. - golden := snapshotDir(t, examplesDir) - require.NotEmpty(t, golden, "no golden files found in generated-examples/") - - // Run genddl (overwrites generated-examples/ in place). - Run(nil, []string{configPath}) - - // Compare every regenerated file against the golden snapshot. - for relPath, expected := range golden { - actual, readErr := os.ReadFile(filepath.Join(examplesDir, relPath)) - require.NoError(t, readErr, "failed to read regenerated file %s", relPath) - assert.Equal(t, string(expected), string(actual), - "generated output differs from checked-in golden file: %s", relPath) + configs := []struct { + name string + configFile string + }{ + {"tpcds", "config.json"}, + {"tpch", "tpch_config.json"}, } - // Also check that no extra files were produced. - regenerated := snapshotDir(t, examplesDir) - for relPath := range regenerated { - assert.Contains(t, golden, relPath, - "regeneration produced unexpected file: %s", relPath) + for _, tc := range configs { + t.Run(tc.name, func(t *testing.T) { + configPath := filepath.Join(tc.configFile) + absConfig, err := filepath.Abs(configPath) + require.NoError(t, err) + + configDir := filepath.Dir(absConfig) + examplesDir := filepath.Join(configDir, "generated-examples") + + // Snapshot all golden files before regeneration. + golden := snapshotDir(t, examplesDir) + require.NotEmpty(t, golden, "no golden files found in generated-examples/") + + // Run genddl (overwrites generated-examples/ in place). + Run(nil, []string{configPath}) + + // Compare every regenerated file against the golden snapshot. + for relPath, expected := range golden { + actual, readErr := os.ReadFile(filepath.Join(examplesDir, relPath)) + require.NoError(t, readErr, "failed to read regenerated file %s", relPath) + assert.Equal(t, string(expected), string(actual), + "generated output differs from checked-in golden file: %s", relPath) + } + + // Also check that no extra files were produced. + regenerated := snapshotDir(t, examplesDir) + for relPath := range regenerated { + assert.Contains(t, golden, relPath, + "regeneration produced unexpected file: %s", relPath) + } + }) } } diff --git a/cmd/genddl/tpch_config.json b/cmd/genddl/tpch_config.json new file mode 100644 index 00000000..34ecbc45 --- /dev/null +++ b/cmd/genddl/tpch_config.json @@ -0,0 +1,7 @@ +{ + "scale_factor": "10", + "file_format": "parquet", + "compression_method": "zstd", + "workload": "tpch", + "workload_definition": "tpc-h" +}