Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion docker/demo/compaction.commands
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,7 @@ connect --path /user/hive/warehouse/stock_ticks_mor
compactions show all
compaction schedule
compaction run --parallelism 2 --sparkMemory 1G --schemaFilePath /var/demo/config/schema.avsc --retry 1

connect --path /user/hive/warehouse/stock_ticks_mor_bs
compactions show all
compaction schedule
compaction run --parallelism 2 --sparkMemory 1G --schemaFilePath /var/demo/config/schema.avsc --retry 1
8 changes: 8 additions & 0 deletions docker/demo/hive-batch1.commands
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,12 @@ select symbol, ts, volume, open, close from stock_ticks_cow where symbol = 'GO
select symbol, ts, volume, open, close from stock_ticks_mor_ro where symbol = 'GOOG';
select symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = 'GOOG';

select symbol, max(ts) from stock_ticks_cow_bs group by symbol HAVING symbol = 'GOOG';
select symbol, max(ts) from stock_ticks_mor_bs_ro group by symbol HAVING symbol = 'GOOG';
select symbol, max(ts) from stock_ticks_mor_bs_rt group by symbol HAVING symbol = 'GOOG';

select symbol, ts, volume, open, close from stock_ticks_cow_bs where symbol = 'GOOG';
select symbol, ts, volume, open, close from stock_ticks_mor_bs_ro where symbol = 'GOOG';
select symbol, ts, volume, open, close from stock_ticks_mor_bs_rt where symbol = 'GOOG';

!quit
6 changes: 6 additions & 0 deletions docker/demo/hive-batch2-after-compaction.commands
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,10 @@ select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = '
select symbol, ts, volume, open, close from stock_ticks_mor_ro where symbol = 'GOOG';
select symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = 'GOOG';

select symbol, max(ts) from stock_ticks_mor_bs_ro group by symbol HAVING symbol = 'GOOG';
select symbol, max(ts) from stock_ticks_mor_bs_rt group by symbol HAVING symbol = 'GOOG';

select symbol, ts, volume, open, close from stock_ticks_mor_bs_ro where symbol = 'GOOG';
select symbol, ts, volume, open, close from stock_ticks_mor_bs_rt where symbol = 'GOOG';

!quit
6 changes: 6 additions & 0 deletions docker/demo/hive-incremental-cow.commands
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,11 @@ set hoodie.stock_ticks_cow.consume.start.timestamp='${min.commit.time}';

select symbol, ts, volume, open, close from stock_ticks_cow where symbol = 'GOOG' and `_hoodie_commit_time` > '${min.commit.time}';

set hoodie.stock_ticks_cow_bs.consume.mode=INCREMENTAL;
set hoodie.stock_ticks_cow_bs.consume.max.commits=3;
set hoodie.stock_ticks_cow_bs.consume.start.timestamp='00000000000001';

select symbol, ts, volume, open, close from stock_ticks_cow_bs where symbol = 'GOOG' and `_hoodie_commit_time` > '00000000000001';

!quit

6 changes: 6 additions & 0 deletions docker/demo/hive-incremental-mor-ro.commands
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,11 @@ set hoodie.stock_ticks_mor.consume.start.timestamp='${min.commit.time}';

select symbol, ts, volume, open, close from stock_ticks_mor_ro where symbol = 'GOOG' and `_hoodie_commit_time` > '${min.commit.time}';

set hoodie.stock_ticks_mor_bs.consume.mode=INCREMENTAL;
set hoodie.stock_ticks_mor_bs.consume.max.commits=3;
set hoodie.stock_ticks_mor_bs.consume.start.timestamp='00000000000001';

select symbol, ts, volume, open, close from stock_ticks_mor_bs_ro where symbol = 'GOOG' and `_hoodie_commit_time` > '00000000000001';

!quit

6 changes: 6 additions & 0 deletions docker/demo/hive-incremental-mor-rt.commands
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,11 @@ set hoodie.stock_ticks_mor.consume.start.timestamp='${min.commit.time}';

select symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = 'GOOG' and `_hoodie_commit_time` > '${min.commit.time}';

set hoodie.stock_ticks_mor_bs.consume.mode=INCREMENTAL;
set hoodie.stock_ticks_mor_bs.consume.max.commits=3;
set hoodie.stock_ticks_mor_bs.consume.start.timestamp='00000000000001';

select symbol, ts, volume, open, close from stock_ticks_mor_bs_rt where symbol = 'GOOG' and `_hoodie_commit_time` > '00000000000001';

!quit

10 changes: 10 additions & 0 deletions docker/demo/sparksql-batch1.commands
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,14 @@ spark.sql("select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_ro where symbol = 'GOOG'").show(100, false)
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = 'GOOG'").show(100, false)

// Bootstrapped Copy-On-Write table
spark.sql("select symbol, max(ts) from stock_ticks_cow_bs group by symbol HAVING symbol = 'GOOG'").show(100, false)
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow_bs where symbol = 'GOOG'").show(100, false)

// Bootstrapped Merge-On-Read table
spark.sql("select symbol, max(ts) from stock_ticks_mor_bs_ro group by symbol HAVING symbol = 'GOOG'").show(100, false)
spark.sql("select symbol, max(ts) from stock_ticks_mor_bs_rt group by symbol HAVING symbol = 'GOOG'").show(100, false)
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_bs_ro where symbol = 'GOOG'").show(100, false)
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_bs_rt where symbol = 'GOOG'").show(100, false)

System.exit(0)
10 changes: 10 additions & 0 deletions docker/demo/sparksql-batch2.commands
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,14 @@ spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from s
spark.sql("select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = 'GOOG'").show(100, false)
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = 'GOOG'").show(100, false)

// Copy-On-Write Bootstrapped table
spark.sql("select symbol, max(ts) from stock_ticks_cow_bs group by symbol HAVING symbol = 'GOOG'").show(100, false)
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow_bs where symbol = 'GOOG'").show(100, false)

// Merge-On-Read table Bootstrapped Table
spark.sql("select symbol, max(ts) from stock_ticks_mor_bs_ro group by symbol HAVING symbol = 'GOOG'").show(100, false)
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_bs_ro where symbol = 'GOOG'").show(100, false)
spark.sql("select symbol, max(ts) from stock_ticks_mor_bs_rt group by symbol HAVING symbol = 'GOOG'").show(100, false)
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_bs_rt where symbol = 'GOOG'").show(100, false)

System.exit(0)
22 changes: 22 additions & 0 deletions docker/demo/sparksql-bootstrap-prep-source.commands
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.spark.sql.functions.col

val df = spark.read.format("org.apache.hudi").load("/user/hive/warehouse/stock_ticks_cow/*/*/*").drop("_hoodie_commit_time", "_hoodie_record_key", "_hoodie_file_name", "_hoodie_commit_seqno", "_hoodie_partition_path")
df.write.format("parquet").save("/user/hive/warehouse/stock_ticks_cow_bs_src/2018/08/31/")
System.exit(0)
34 changes: 32 additions & 2 deletions docker/demo/sparksql-incremental.commands
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,38 @@ spark.sql("select key, `_hoodie_partition_path` as datestr, symbol, ts, open, cl
mode(SaveMode.Overwrite).
save("/user/hive/warehouse/stock_ticks_derived_mor");

spark.sql("show tables").show(20, false)
spark.sql("select count(*) from stock_ticks_derived_mor_ro").show(20, false)
spark.sql("select count(*) from stock_ticks_derived_mor_rt").show(20, false)

System.exit(0);
val hoodieIncQueryBsDF = spark.read.format("org.apache.hudi").
option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL).
option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY, "00000000000001").
load("/user/hive/warehouse/stock_ticks_cow_bs");
hoodieIncQueryBsDF.registerTempTable("stock_ticks_cow_bs_incr")
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow_bs_incr where symbol = 'GOOG'").show(100, false);

spark.sql("select key, `_hoodie_partition_path` as datestr, symbol, ts, open, close from stock_ticks_cow_bs_incr").
write.format("org.apache.hudi").
option("hoodie.insert.shuffle.parallelism", "2").
option("hoodie.upsert.shuffle.parallelism","2").
option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL).
option(DataSourceWriteOptions.OPERATION_OPT_KEY, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL).
option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "key").
option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "datestr").
option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "ts").
option(HoodieWriteConfig.TABLE_NAME, "stock_ticks_derived_mor_bs").
option(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY, "stock_ticks_derived_mor_bs").
option(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY, "default").
option(DataSourceWriteOptions.HIVE_URL_OPT_KEY, "jdbc:hive2://hiveserver:10000").
option(DataSourceWriteOptions.HIVE_USER_OPT_KEY, "hive").
option(DataSourceWriteOptions.HIVE_PASS_OPT_KEY, "hive").
option(DataSourceWriteOptions.HIVE_SYNC_ENABLED_OPT_KEY, "true").
option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY, "datestr").
mode(SaveMode.Overwrite).
save("/user/hive/warehouse/stock_ticks_derived_mor_bs");

spark.sql("show tables").show(20, false)
spark.sql("select count(*) from stock_ticks_derived_mor_bs_ro").show(20, false)
spark.sql("select count(*) from stock_ticks_derived_mor_bs_rt").show(20, false)

System.exit(0);
4 changes: 3 additions & 1 deletion hudi-cli/hudi-cli.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,6 @@ if [ -z "$CLIENT_JAR" ]; then
echo "Client jar location not set, please set it in conf/hudi-env.sh"
fi

java -cp ${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:$DIR/target/lib/*:$HOODIE_JAR:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.springframework.shell.Bootstrap $@
OTHER_JARS=`ls ${DIR}/target/lib/* | grep -v 'hudi-[^/]*jar' | tr '\n' ':'`
echo "Running : java -cp ${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:${HOODIE_JAR}:${OTHER_JARS}:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.springframework.shell.Bootstrap $@"
java -cp ${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:${HOODIE_JAR}:${OTHER_JARS}:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.springframework.shell.Bootstrap $@
41 changes: 41 additions & 0 deletions hudi-cli/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,41 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>${maven-shade-plugin.version}</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<createSourcesJar>true</createSourcesJar>
<dependencyReducedPomLocation>${project.build.directory}/dependency-reduced-pom.xml
</dependencyReducedPomLocation>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer">
</transformer>
<transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
<addHeader>true</addHeader>
</transformer>
<transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
<resource>META-INF/LICENSE</resource>
<file>target/classes/META-INF/LICENSE</file>
</transformer>
</transformers>
<artifactSet>
<includes>
<include>org.apache.hudi:hudi-utilities-bundle_${scala.binary.version}</include>
</includes>
</artifactSet>
<finalName>${project.artifactId}-${project.version}</finalName>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>

Expand Down Expand Up @@ -202,6 +237,12 @@
<type>test-jar</type>
</dependency>

<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-utilities-bundle_${scala.binary.version}</artifactId>
<version>${project.version}</version>
</dependency>

<!-- Logging -->
<dependency>
<groupId>log4j</groupId>
Expand Down
Loading