diff --git a/docker/demo/compaction.commands b/docker/demo/compaction.commands
index 9853a355176f3..bdb0c6c3b632b 100644
--- a/docker/demo/compaction.commands
+++ b/docker/demo/compaction.commands
@@ -19,4 +19,7 @@ connect --path /user/hive/warehouse/stock_ticks_mor
 compactions show all
 compaction schedule
 compaction run --parallelism 2 --sparkMemory 1G  --schemaFilePath /var/demo/config/schema.avsc --retry 1 
-
+connect --path /user/hive/warehouse/stock_ticks_mor_bs
+compactions show all
+compaction schedule
+compaction run --parallelism 2 --sparkMemory 1G  --schemaFilePath /var/demo/config/schema.avsc --retry 1 
diff --git a/docker/demo/hive-batch1.commands b/docker/demo/hive-batch1.commands
index 93bf3b67930aa..021c6d55b800d 100644
--- a/docker/demo/hive-batch1.commands
+++ b/docker/demo/hive-batch1.commands
@@ -25,4 +25,12 @@ select symbol, ts, volume, open, close  from stock_ticks_cow where  symbol = 'GO
 select symbol, ts, volume, open, close  from stock_ticks_mor_ro where  symbol = 'GOOG';
 select symbol, ts, volume, open, close  from stock_ticks_mor_rt where  symbol = 'GOOG';
 
+select symbol, max(ts) from stock_ticks_cow_bs group by symbol HAVING symbol = 'GOOG';
+select symbol, max(ts) from stock_ticks_mor_bs_ro group by symbol HAVING symbol = 'GOOG';
+select symbol, max(ts) from stock_ticks_mor_bs_rt group by symbol HAVING symbol = 'GOOG';
+
+select symbol, ts, volume, open, close  from stock_ticks_cow_bs where  symbol = 'GOOG';
+select symbol, ts, volume, open, close  from stock_ticks_mor_bs_ro where  symbol = 'GOOG';
+select symbol, ts, volume, open, close  from stock_ticks_mor_bs_rt where  symbol = 'GOOG';
+
 !quit
diff --git a/docker/demo/hive-batch2-after-compaction.commands b/docker/demo/hive-batch2-after-compaction.commands
index 6b087019d5cca..06582a309ae00 100644
--- a/docker/demo/hive-batch2-after-compaction.commands
+++ b/docker/demo/hive-batch2-after-compaction.commands
@@ -23,4 +23,10 @@ select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = '
 select symbol, ts, volume, open, close  from stock_ticks_mor_ro where  symbol = 'GOOG';
 select symbol, ts, volume, open, close  from stock_ticks_mor_rt where  symbol = 'GOOG';
 
+select symbol, max(ts) from stock_ticks_mor_bs_ro group by symbol HAVING symbol = 'GOOG';
+select symbol, max(ts) from stock_ticks_mor_bs_rt group by symbol HAVING symbol = 'GOOG';
+
+select symbol, ts, volume, open, close  from stock_ticks_mor_bs_ro where  symbol = 'GOOG';
+select symbol, ts, volume, open, close  from stock_ticks_mor_bs_rt where  symbol = 'GOOG';
+
 !quit
diff --git a/docker/demo/hive-incremental-cow.commands b/docker/demo/hive-incremental-cow.commands
index 7f43548071863..702b2afa52733 100644
--- a/docker/demo/hive-incremental-cow.commands
+++ b/docker/demo/hive-incremental-cow.commands
@@ -23,5 +23,11 @@ set hoodie.stock_ticks_cow.consume.start.timestamp='${min.commit.time}';
 
 select symbol, ts, volume, open, close  from stock_ticks_cow where  symbol = 'GOOG' and `_hoodie_commit_time` > '${min.commit.time}';
 
+set hoodie.stock_ticks_cow_bs.consume.mode=INCREMENTAL;
+set hoodie.stock_ticks_cow_bs.consume.max.commits=3;
+set hoodie.stock_ticks_cow_bs.consume.start.timestamp='00000000000001';
+
+select symbol, ts, volume, open, close  from stock_ticks_cow_bs where  symbol = 'GOOG' and `_hoodie_commit_time` > '00000000000001';
+
 !quit
 
diff --git a/docker/demo/hive-incremental-mor-ro.commands b/docker/demo/hive-incremental-mor-ro.commands
index 8b97c0aac9b5e..51683c010a496 100644
--- a/docker/demo/hive-incremental-mor-ro.commands
+++ b/docker/demo/hive-incremental-mor-ro.commands
@@ -23,5 +23,11 @@ set hoodie.stock_ticks_mor.consume.start.timestamp='${min.commit.time}';
 
 select symbol, ts, volume, open, close  from stock_ticks_mor_ro where  symbol = 'GOOG' and `_hoodie_commit_time` > '${min.commit.time}';
 
+set hoodie.stock_ticks_mor_bs.consume.mode=INCREMENTAL;
+set hoodie.stock_ticks_mor_bs.consume.max.commits=3;
+set hoodie.stock_ticks_mor_bs.consume.start.timestamp='00000000000001';
+
+select symbol, ts, volume, open, close  from stock_ticks_mor_bs_ro where  symbol = 'GOOG' and `_hoodie_commit_time` > '00000000000001';
+
 !quit
 
diff --git a/docker/demo/hive-incremental-mor-rt.commands b/docker/demo/hive-incremental-mor-rt.commands
index a81fb77e077d8..c29fc7ce55730 100644
--- a/docker/demo/hive-incremental-mor-rt.commands
+++ b/docker/demo/hive-incremental-mor-rt.commands
@@ -23,5 +23,11 @@ set hoodie.stock_ticks_mor.consume.start.timestamp='${min.commit.time}';
 
 select symbol, ts, volume, open, close  from stock_ticks_mor_rt where  symbol = 'GOOG' and `_hoodie_commit_time` > '${min.commit.time}';
 
+set hoodie.stock_ticks_mor_bs.consume.mode=INCREMENTAL;
+set hoodie.stock_ticks_mor_bs.consume.max.commits=3;
+set hoodie.stock_ticks_mor_bs.consume.start.timestamp='00000000000001';
+
+select symbol, ts, volume, open, close  from stock_ticks_mor_bs_rt where  symbol = 'GOOG' and `_hoodie_commit_time` > '00000000000001';
+
 !quit
 
diff --git a/docker/demo/sparksql-batch1.commands b/docker/demo/sparksql-batch1.commands
index 727aa1633154d..4de2486c6ce58 100644
--- a/docker/demo/sparksql-batch1.commands
+++ b/docker/demo/sparksql-batch1.commands
@@ -27,4 +27,14 @@ spark.sql("select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING
 spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close  from stock_ticks_mor_ro where  symbol = 'GOOG'").show(100, false)
 spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close  from stock_ticks_mor_rt where  symbol = 'GOOG'").show(100, false)
 
+// Bootstrapped Copy-On-Write table
+spark.sql("select symbol, max(ts) from stock_ticks_cow_bs group by symbol HAVING symbol = 'GOOG'").show(100, false)
+spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close  from stock_ticks_cow_bs where  symbol = 'GOOG'").show(100, false)
+
+// Bootstrapped Merge-On-Read table
+spark.sql("select symbol, max(ts) from stock_ticks_mor_bs_ro group by symbol HAVING symbol = 'GOOG'").show(100, false)
+spark.sql("select symbol, max(ts) from stock_ticks_mor_bs_rt group by symbol HAVING symbol = 'GOOG'").show(100, false)
+spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close  from stock_ticks_mor_bs_ro where  symbol = 'GOOG'").show(100, false)
+spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close  from stock_ticks_mor_bs_rt where  symbol = 'GOOG'").show(100, false)
+
 System.exit(0)
diff --git a/docker/demo/sparksql-batch2.commands b/docker/demo/sparksql-batch2.commands
index 391e11b971a27..739d991dbbc1d 100644
--- a/docker/demo/sparksql-batch2.commands
+++ b/docker/demo/sparksql-batch2.commands
@@ -26,4 +26,14 @@ spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close  from s
 spark.sql("select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = 'GOOG'").show(100, false)
 spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close  from stock_ticks_mor_rt where  symbol = 'GOOG'").show(100, false)
 
+ // Copy-On-Write Bootstrapped table
+spark.sql("select symbol, max(ts) from stock_ticks_cow_bs group by symbol HAVING symbol = 'GOOG'").show(100, false)
+spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close  from stock_ticks_cow_bs where  symbol = 'GOOG'").show(100, false)
+
+// Merge-On-Read table Bootstrapped Table
+spark.sql("select symbol, max(ts) from stock_ticks_mor_bs_ro group by symbol HAVING symbol = 'GOOG'").show(100, false)
+spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close  from stock_ticks_mor_bs_ro where  symbol = 'GOOG'").show(100, false)
+spark.sql("select symbol, max(ts) from stock_ticks_mor_bs_rt group by symbol HAVING symbol = 'GOOG'").show(100, false)
+spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close  from stock_ticks_mor_bs_rt where  symbol = 'GOOG'").show(100, false)
+
 System.exit(0)
diff --git a/docker/demo/sparksql-bootstrap-prep-source.commands b/docker/demo/sparksql-bootstrap-prep-source.commands
new file mode 100644
index 0000000000000..23db3e4d38c4b
--- /dev/null
+++ b/docker/demo/sparksql-bootstrap-prep-source.commands
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import org.apache.spark.sql.functions.col
+
+val df = spark.read.format("org.apache.hudi").load("/user/hive/warehouse/stock_ticks_cow/*/*/*").drop("_hoodie_commit_time", "_hoodie_record_key", "_hoodie_file_name", "_hoodie_commit_seqno", "_hoodie_partition_path")
+df.write.format("parquet").save("/user/hive/warehouse/stock_ticks_cow_bs_src/2018/08/31/")
+System.exit(0)
diff --git a/docker/demo/sparksql-incremental.commands b/docker/demo/sparksql-incremental.commands
index 8e3e153e27e7e..febfcd28a1116 100644
--- a/docker/demo/sparksql-incremental.commands
+++ b/docker/demo/sparksql-incremental.commands
@@ -52,8 +52,38 @@ spark.sql("select key, `_hoodie_partition_path` as datestr, symbol, ts, open, cl
     mode(SaveMode.Overwrite).
     save("/user/hive/warehouse/stock_ticks_derived_mor");
 
-spark.sql("show tables").show(20, false)
 spark.sql("select count(*) from stock_ticks_derived_mor_ro").show(20, false)
 spark.sql("select count(*) from stock_ticks_derived_mor_rt").show(20, false)
 
-System.exit(0);
\ No newline at end of file
+val hoodieIncQueryBsDF =  spark.read.format("org.apache.hudi").
+                      option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL).
+                      option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY, "00000000000001").
+                      load("/user/hive/warehouse/stock_ticks_cow_bs");
+hoodieIncQueryBsDF.registerTempTable("stock_ticks_cow_bs_incr")
+spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close  from stock_ticks_cow_bs_incr where  symbol = 'GOOG'").show(100, false);
+
+spark.sql("select key, `_hoodie_partition_path` as datestr, symbol, ts, open, close from stock_ticks_cow_bs_incr").
+    write.format("org.apache.hudi").
+    option("hoodie.insert.shuffle.parallelism", "2").
+    option("hoodie.upsert.shuffle.parallelism","2").
+    option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL).
+    option(DataSourceWriteOptions.OPERATION_OPT_KEY, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL).
+    option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "key").
+    option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "datestr").
+    option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "ts").
+    option(HoodieWriteConfig.TABLE_NAME, "stock_ticks_derived_mor_bs").
+    option(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY, "stock_ticks_derived_mor_bs").
+    option(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY, "default").
+    option(DataSourceWriteOptions.HIVE_URL_OPT_KEY, "jdbc:hive2://hiveserver:10000").
+    option(DataSourceWriteOptions.HIVE_USER_OPT_KEY, "hive").
+    option(DataSourceWriteOptions.HIVE_PASS_OPT_KEY, "hive").
+    option(DataSourceWriteOptions.HIVE_SYNC_ENABLED_OPT_KEY, "true").
+    option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY, "datestr").
+    mode(SaveMode.Overwrite).
+    save("/user/hive/warehouse/stock_ticks_derived_mor_bs");
+
+spark.sql("show tables").show(20, false)
+spark.sql("select count(*) from stock_ticks_derived_mor_bs_ro").show(20, false)
+spark.sql("select count(*) from stock_ticks_derived_mor_bs_rt").show(20, false)
+
+System.exit(0);
diff --git a/hudi-cli/hudi-cli.sh b/hudi-cli/hudi-cli.sh
index b6e708c14436d..78d8f4d6056f6 100755
--- a/hudi-cli/hudi-cli.sh
+++ b/hudi-cli/hudi-cli.sh
@@ -25,4 +25,6 @@ if [ -z "$CLIENT_JAR" ]; then
   echo "Client jar location not set, please set it in conf/hudi-env.sh"
 fi
 
-java -cp ${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:$DIR/target/lib/*:$HOODIE_JAR:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.springframework.shell.Bootstrap $@
+OTHER_JARS=`ls ${DIR}/target/lib/* | grep -v 'hudi-[^/]*jar' | tr '\n' ':'`
+echo "Running : java -cp ${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:${HOODIE_JAR}:${OTHER_JARS}:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.springframework.shell.Bootstrap $@"
+java -cp ${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:${HOODIE_JAR}:${OTHER_JARS}:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.springframework.shell.Bootstrap $@
diff --git a/hudi-cli/pom.xml b/hudi-cli/pom.xml
index dbb44639f3b83..cd2c975bd9bdb 100644
--- a/hudi-cli/pom.xml
+++ b/hudi-cli/pom.xml
@@ -147,6 +147,41 @@
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <version>${maven-shade-plugin.version}</version>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>shade</goal>
+            </goals>
+            <configuration>
+              <createSourcesJar>true</createSourcesJar>
+              <dependencyReducedPomLocation>${project.build.directory}/dependency-reduced-pom.xml
+              </dependencyReducedPomLocation>
+              <transformers>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer">
+                </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer">
+                    <addHeader>true</addHeader>
+                </transformer>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
+                  <resource>META-INF/LICENSE</resource>
+                  <file>target/classes/META-INF/LICENSE</file>
+                </transformer>
+              </transformers>
+              <artifactSet>
+                <includes>
+                  <include>org.apache.hudi:hudi-utilities-bundle_${scala.binary.version}</include>
+                </includes>
+              </artifactSet>
+              <finalName>${project.artifactId}-${project.version}</finalName>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
 
@@ -202,6 +237,12 @@
       <type>test-jar</type>
     </dependency>
 
+    <dependency>
+      <groupId>org.apache.hudi</groupId>
+      <artifactId>hudi-utilities-bundle_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
     <!-- Logging -->
     <dependency>
       <groupId>log4j</groupId>
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/BootstrapCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/BootstrapCommand.java
new file mode 100644
index 0000000000000..e0e4742a9f269
--- /dev/null
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/BootstrapCommand.java
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.cli.commands;
+
+import org.apache.hudi.avro.model.BootstrapIndexInfo;
+import org.apache.hudi.cli.HoodieCLI;
+import org.apache.hudi.cli.HoodiePrintHelper;
+import org.apache.hudi.cli.TableHeader;
+import org.apache.hudi.cli.commands.SparkMain.SparkCommand;
+import org.apache.hudi.cli.utils.InputStreamConsumer;
+import org.apache.hudi.cli.utils.SparkUtil;
+import org.apache.hudi.common.bootstrap.index.BootstrapIndex;
+import org.apache.hudi.common.model.BootstrapSourceFileMapping;
+import org.apache.hudi.common.model.HoodieFileGroupId;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.utilities.UtilHelpers;
+
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.launcher.SparkLauncher;
+import org.apache.spark.util.Utils;
+import org.springframework.shell.core.CommandMarker;
+import org.springframework.shell.core.annotation.CliCommand;
+import org.springframework.shell.core.annotation.CliOption;
+import org.springframework.stereotype.Component;
+
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import scala.collection.JavaConverters;
+
+/**
+ * CLI command to perform bootstrap action & display bootstrap index.
+ */
+@Component
+public class BootstrapCommand implements CommandMarker {
+
+  private static final Logger LOG = LogManager.getLogger(BootstrapCommand.class);
+
+  @CliCommand(value = "bootstrap", help = "Run a bootstrap action for current Hudi table")
+  public String bootstrap(
+      @CliOption(key = {"sourcePath"}, mandatory = true, help = "Source data path of the table") final String sourcePath,
+      @CliOption(key = {"recordKeyColumns"}, mandatory = true, help = "Record key columns for bootstrap data") final String recordKeyCols,
+      @CliOption(key = {"parallelism"}, unspecifiedDefaultValue = "1500", help = "Bootstrap writer parallelism") final int parallelism,
+      @CliOption(key = {"selectorClass"}, unspecifiedDefaultValue = "org.apache.hudi.client.bootstrap.selector.MetadataOnlyBootstrapModeSelector",
+          help = "Selector class for bootstrap") final String selectorClass,
+      @CliOption(key = {"schema"}, unspecifiedDefaultValue = "", help = "Schema of the source data file") final String schema)
+      throws IOException, InterruptedException, URISyntaxException {
+
+    boolean initialized = HoodieCLI.initConf();
+    HoodieCLI.initFS(initialized);
+
+    HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
+
+    String sparkPropertiesPath =
+        Utils.getDefaultPropertiesFile(JavaConverters.mapAsScalaMapConverter(System.getenv()).asScala());
+
+    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
+
+    String cmd = SparkCommand.BOOTSTRAP.toString();
+
+    sparkLauncher.addAppArgs(cmd, metaClient.getTableConfig().getTableName(), metaClient.getBasePath(), sourcePath, recordKeyCols, String.valueOf(parallelism), selectorClass, schema);
+    UtilHelpers.validateAndAddProperties(new String[] {}, sparkLauncher);
+    Process process = sparkLauncher.launch();
+    InputStreamConsumer.captureOutput(process);
+    int exitCode = process.waitFor();
+    if (exitCode != 0) {
+      return "Failed to import source data to hudi dataset";
+    }
+    return "Imported source data as hudi dataset";
+  }
+
+  @CliCommand(value = "show bootstrap index mapping", help = "Show bootstrap index mapping")
+  public String showBootstrapIndexMapping(
+      @CliOption(key = {"partitionPath"}, unspecifiedDefaultValue = "", help = "A valid paritition path") String partition,
+      @CliOption(key = {"fileIds"}, unspecifiedDefaultValue = "", help = "Valid fileIds split by comma") String fileIds,
+      @CliOption(key = {"limit"}, unspecifiedDefaultValue = "-1", help = "Limit rows to be displayed") Integer limit,
+      @CliOption(key = {"sortBy"}, unspecifiedDefaultValue = "", help = "Sorting Field") final String sortByField,
+      @CliOption(key = {"desc"}, unspecifiedDefaultValue = "false", help = "Ordering") final boolean descending,
+      @CliOption(key = {"headeronly"}, unspecifiedDefaultValue = "false", help = "Print Header Only")
+      final boolean headerOnly) {
+
+    if (partition.isEmpty() && !fileIds.isEmpty()) {
+      throw new IllegalStateException("Both paritionPath and fileIds are required");
+    }
+    BootstrapIndex.IndexReader indexReader = createBootstrapIndexReader();
+
+    // TODO tmp solution because the indexedPartition name is not clean
+    // List<String> indexedPartitions = indexReader.getIndexedPartitions();
+    List<String> indexedPartitions = indexReader.getIndexedPartitions().stream()
+        .map(p -> p.split("//")[0].substring(5)).collect(Collectors.toList());
+
+    if (!partition.isEmpty() && !indexedPartitions.contains(partition)) {
+      return partition + " is not an valid indexed partition";
+    }
+
+    List<BootstrapSourceFileMapping> mappingList = new ArrayList<>();
+    if (!fileIds.isEmpty()) {
+      List<HoodieFileGroupId> fileGroupIds = Arrays.stream(fileIds.split(","))
+          .map(fileId -> new HoodieFileGroupId(partition, fileId)).collect(Collectors.toList());
+      mappingList.addAll(indexReader.getSourceFileMappingForFileIds(fileGroupIds).values());
+    } else if (!partition.isEmpty()) {
+      mappingList.addAll(indexReader.getSourceFileMappingForPartition(partition));
+    } else {
+      for (String part : indexedPartitions) {
+        mappingList.addAll(indexReader.getSourceFileMappingForPartition(part));
+      }
+    }
+
+    final List<Comparable[]> rows = convertBootstrapSourceFileMapping(mappingList);
+    final TableHeader header = new TableHeader()
+        .addTableHeaderField("Hudi Partition")
+        .addTableHeaderField("FileId")
+        .addTableHeaderField("Source File Base Path")
+        .addTableHeaderField("Source File Parition")
+        .addTableHeaderField("Source File Path");
+
+    return HoodiePrintHelper.print(header, new HashMap<>(), sortByField, descending,
+        limit, headerOnly, rows);
+  }
+
+  @CliCommand(value = "show indexed partitions", help = "Show bootstrap indexed partitions")
+  public String showIndexedPartitions() {
+
+    BootstrapIndex.IndexReader indexReader = createBootstrapIndexReader();
+    List<String> indexedPartitions = indexReader.getIndexedPartitions();
+
+    String[] header = new String[] {"Indexed partitions"};
+    String[][] rows = new String[indexedPartitions.size()][1];
+    for (int i = 0; i < indexedPartitions.size(); i++) {
+      rows[i][0] = indexedPartitions.get(i);
+    }
+    return HoodiePrintHelper.print(header, rows);
+  }
+
+  @CliCommand(value = "show bootstrap info", help = "Show bootstrap index info")
+  public String showBootstrapIndexInfo() {
+
+    BootstrapIndex.IndexReader indexReader = createBootstrapIndexReader();
+    BootstrapIndexInfo indexInfo = indexReader.getIndexInfo();
+
+    String[] header = new String[] {"Version", "Source Base Path", "Created Timestamp", "Number of keys"};
+    String[][] rows = {{String.valueOf(indexInfo.getVersion()), indexInfo.getSourceBasePath(),
+        String.valueOf(indexInfo.getCreatedTimestamp()), String.valueOf(indexInfo.getNumKeys())}};
+
+    return HoodiePrintHelper.print(header, rows);
+  }
+
+  private BootstrapIndex.IndexReader createBootstrapIndexReader() {
+    HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
+    BootstrapIndex index = BootstrapIndex.getBootstrapIndex(metaClient);
+    if (!index.checkIndex()) {
+      throw new IllegalStateException("This is not a bootstraped Hudi table. Don't have any index info");
+    }
+    return index.createReader();
+  }
+
+  private List<Comparable[]> convertBootstrapSourceFileMapping(List<BootstrapSourceFileMapping> mappingList) {
+    final List<Comparable[]> rows = new ArrayList<>();
+    for (BootstrapSourceFileMapping mapping : mappingList) {
+      rows.add(new Comparable[] {mapping.getHudiPartitionPath(), mapping.getHudiFileId(),
+          mapping.getSourceBasePath(), mapping.getSourcePartitionPath(), mapping.getSourceFileStatus().getPath().getUri()});
+    }
+    return rows;
+  }
+}
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java
index 65dcde895208f..ff4bc7dbea4f4 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java
@@ -55,7 +55,7 @@ public class SparkMain {
    * Commands.
    */
   enum SparkCommand {
-    ROLLBACK, DEDUPLICATE, ROLLBACK_TO_SAVEPOINT, SAVEPOINT, IMPORT, UPSERT, COMPACT_SCHEDULE, COMPACT_RUN,
+    BOOTSTRAP, ROLLBACK, DEDUPLICATE, ROLLBACK_TO_SAVEPOINT, SAVEPOINT, IMPORT, UPSERT, COMPACT_SCHEDULE, COMPACT_RUN,
     COMPACT_UNSCHEDULE_PLAN, COMPACT_UNSCHEDULE_FILE, COMPACT_VALIDATE, COMPACT_REPAIR, CLEAN, DELETE_SAVEPOINT
   }
 
@@ -70,6 +70,10 @@ public static void main(String[] args) throws Exception {
         : SparkUtil.initJavaSparkConf("hoodie-cli-" + command);
     int returnCode = 0;
     switch (cmd) {
+      case BOOTSTRAP:
+        assert (args.length == 8);
+        returnCode = doBootstrap(jsc, args[1], args[2], args[3], args[4], Integer.parseInt(args[5]), args[6], args[7]);
+        break;
       case ROLLBACK:
         assert (args.length == 3);
         returnCode = rollback(jsc, args[1], args[2]);
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/TableCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/TableCommand.java
index da3d57a4f6a80..0d28bae4ced49 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/TableCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/TableCommand.java
@@ -88,7 +88,9 @@ public String createTable(
       @CliOption(key = {"archiveLogFolder"}, help = "Folder Name for storing archived timeline") String archiveFolder,
       @CliOption(key = {"layoutVersion"}, help = "Specific Layout Version to use") Integer layoutVersion,
       @CliOption(key = {"payloadClass"}, unspecifiedDefaultValue = "org.apache.hudi.common.model.HoodieAvroPayload",
-          help = "Payload Class") final String payloadClass)
+          help = "Payload Class") final String payloadClass,
+      @CliOption(key = {"bootstrapIndexClass"}, unspecifiedDefaultValue = "org.apache.hudi.common.bootstrap.index.HFileBasedBootstrapIndex",
+          help = "Bootstrap Index class") final String bootstrapIndexClass)
       throws IOException {
 
     boolean initialized = HoodieCLI.initConf();
@@ -109,7 +111,7 @@ public String createTable(
 
     final HoodieTableType tableType = HoodieTableType.valueOf(tableTypeStr);
     HoodieTableMetaClient.initTableType(HoodieCLI.conf, path, tableType, name, archiveFolder,
-        payloadClass, layoutVersion);
+        payloadClass, layoutVersion, bootstrapIndexClass);
 
     // Now connect to ensure loading works
     return connect(path, layoutVersion, false, 0, 0, 0);
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestArchivedCommitsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestArchivedCommitsCommand.java
index 88fcb39e45862..2c658044b892d 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestArchivedCommitsCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestArchivedCommitsCommand.java
@@ -64,7 +64,7 @@ public void init() throws IOException {
     tablePath = basePath + File.separator + tableName;
     new TableCommand().createTable(
         tablePath, tableName,
-        "COPY_ON_WRITE", "", 1, "org.apache.hudi.common.model.HoodieAvroPayload");
+        "COPY_ON_WRITE", "", 1, "org.apache.hudi.common.model.HoodieAvroPayload", null);
 
     metaClient = HoodieCLI.getTableMetaClient();
 
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCleansCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCleansCommand.java
index 6722c98915f2c..6edcf28dbb6c5 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCleansCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCleansCommand.java
@@ -72,7 +72,7 @@ public void init() throws IOException {
     // Create table and connect
     new TableCommand().createTable(
         tablePath, tableName, HoodieTableType.COPY_ON_WRITE.name(),
-        "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
+        "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload", null);
 
     Configuration conf = HoodieCLI.conf;
 
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestFileSystemViewCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestFileSystemViewCommand.java
index 83ad7fc23324c..cd3b727b7bb42 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestFileSystemViewCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestFileSystemViewCommand.java
@@ -69,7 +69,7 @@ public void init() throws IOException {
     String tablePath = Paths.get(basePath, tableName).toString();
     new TableCommand().createTable(
         tablePath, tableName,
-        "COPY_ON_WRITE", "", 1, "org.apache.hudi.common.model.HoodieAvroPayload");
+        "COPY_ON_WRITE", "", 1, "org.apache.hudi.common.model.HoodieAvroPayload", null);
 
     metaClient = HoodieCLI.getTableMetaClient();
 
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java
index b0d2504193a5f..79147c23ea281 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java
@@ -87,7 +87,7 @@ public void init() throws IOException, InterruptedException, URISyntaxException
     partitionPath = tablePath + File.separator + HoodieTestCommitMetadataGenerator.DEFAULT_FIRST_PARTITION_PATH;
     new TableCommand().createTable(
         tablePath, tableName, HoodieTableType.MERGE_ON_READ.name(),
-        "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
+        "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload", null);
 
     Files.createDirectories(Paths.get(partitionPath));
 
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java
index 9fc49181ddfce..8740a9e8b2028 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java
@@ -26,6 +26,7 @@
 import org.apache.hudi.cli.HoodieTableHeaderFields;
 import org.apache.hudi.cli.common.HoodieTestCommitMetadataGenerator;
 import org.apache.hudi.common.HoodieTestDataGenerator;
+import org.apache.hudi.common.bootstrap.index.HFileBasedBootstrapIndex;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -66,7 +67,8 @@ public void init() throws IOException {
     // Create table and connect
     new TableCommand().createTable(
         tablePath, tableName, HoodieTableType.COPY_ON_WRITE.name(),
-        "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
+        "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload",
+        HFileBasedBootstrapIndex.class.getName());
   }
 
   /**
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRollbacksCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRollbacksCommand.java
index 5a82d778f86e8..60d13301aa399 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRollbacksCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRollbacksCommand.java
@@ -26,6 +26,7 @@
 import org.apache.hudi.cli.TableHeader;
 import org.apache.hudi.client.HoodieWriteClient;
 import org.apache.hudi.common.HoodieTestDataGenerator;
+import org.apache.hudi.common.bootstrap.index.HFileBasedBootstrapIndex;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.model.HoodieTestUtils;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
@@ -64,7 +65,8 @@ public void init() throws IOException {
     String tablePath = basePath + File.separator + tableName;
     new TableCommand().createTable(
         tablePath, tableName, HoodieTableType.MERGE_ON_READ.name(),
-        "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
+        "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload",
+        HFileBasedBootstrapIndex.class.getName());
 
     //Create some commits files and parquet files
     String commitTime1 = "100";
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestSavepointsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestSavepointsCommand.java
index 2c6a3f2939bc6..da246a6ac0ac5 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestSavepointsCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestSavepointsCommand.java
@@ -23,6 +23,7 @@
 import org.apache.hudi.cli.HoodiePrintHelper;
 import org.apache.hudi.cli.HoodieTableHeaderFields;
 import org.apache.hudi.common.HoodieTestDataGenerator;
+import org.apache.hudi.common.bootstrap.index.HFileBasedBootstrapIndex;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
@@ -53,7 +54,8 @@ public void init() throws IOException {
     // Create table and connect
     new TableCommand().createTable(
         tablePath, "test_table", HoodieTableType.COPY_ON_WRITE.name(),
-        "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
+        "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload",
+        HFileBasedBootstrapIndex.class.getName());
   }
 
   /**
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestStatsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestStatsCommand.java
index 85fbc0ab031a5..0064fa1320b1b 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestStatsCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestStatsCommand.java
@@ -28,6 +28,7 @@
 import org.apache.hudi.cli.TableHeader;
 import org.apache.hudi.cli.common.HoodieTestCommitMetadataGenerator;
 import org.apache.hudi.common.HoodieTestDataGenerator;
+import org.apache.hudi.common.bootstrap.index.HFileBasedBootstrapIndex;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.model.HoodieTestUtils;
 import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
@@ -65,7 +66,8 @@ public void init() throws IOException {
     // Create table and connect
     new TableCommand().createTable(
         tablePath, tableName, HoodieTableType.COPY_ON_WRITE.name(),
-        "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
+        "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload",
+        HFileBasedBootstrapIndex.class.getName());
   }
 
   /**
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestCleansCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestCleansCommand.java
index f76c79b5339dd..34b7b73528137 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestCleansCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestCleansCommand.java
@@ -59,7 +59,7 @@ public void init() throws IOException {
     // Create table and connect
     new TableCommand().createTable(
         tablePath, tableName, HoodieTableType.COPY_ON_WRITE.name(),
-        "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
+        "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload", null);
 
     Configuration conf = HoodieCLI.conf;
 
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestRepairsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestRepairsCommand.java
index 4f48bc34fa295..cfc2767c7ceef 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestRepairsCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestRepairsCommand.java
@@ -28,6 +28,7 @@
 import org.apache.hudi.cli.commands.TableCommand;
 import org.apache.hudi.common.HoodieClientTestUtils;
 import org.apache.hudi.common.HoodieTestDataGenerator;
+import org.apache.hudi.common.bootstrap.index.HFileBasedBootstrapIndex;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieBaseFile;
 import org.apache.hudi.common.model.HoodieLogFile;
@@ -80,7 +81,8 @@ public void init() throws IOException, URISyntaxException {
     // Create table and connect
     new TableCommand().createTable(
         tablePath, "test_table", HoodieTableType.COPY_ON_WRITE.name(),
-        "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
+        "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload",
+        HFileBasedBootstrapIndex.class.getName());
 
     // generate 200 records
     Schema schema = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestSavepointsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestSavepointsCommand.java
index ee9a18e4b29e2..ddbb8281587d9 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestSavepointsCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestSavepointsCommand.java
@@ -22,6 +22,7 @@
 import org.apache.hudi.cli.HoodieCLI;
 import org.apache.hudi.cli.commands.TableCommand;
 import org.apache.hudi.common.HoodieTestDataGenerator;
+import org.apache.hudi.common.bootstrap.index.HFileBasedBootstrapIndex;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
@@ -59,7 +60,8 @@ public void init() throws IOException {
     // Create table and connect
     new TableCommand().createTable(
         tablePath, "test_table", HoodieTableType.COPY_ON_WRITE.name(),
-        "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
+        "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload",
+        HFileBasedBootstrapIndex.class.getName());
   }
 
   /**
diff --git a/hudi-client/src/main/java/org/apache/hudi/client/HoodieWriteClient.java b/hudi-client/src/main/java/org/apache/hudi/client/HoodieWriteClient.java
index 13986c4bf06b8..a92d6de05e24e 100644
--- a/hudi-client/src/main/java/org/apache/hudi/client/HoodieWriteClient.java
+++ b/hudi-client/src/main/java/org/apache/hudi/client/HoodieWriteClient.java
@@ -149,12 +149,12 @@ public JavaRDD<HoodieRecord<T>> filterExists(JavaRDD<HoodieRecord<T>> hoodieReco
   /**
    * Main API to run bootstrap to hudi.
    */
-  public void bootstrap() {
+  public void bootstrap(Option<Map<String, String>> extraMetadata) {
     if (rollbackPending) {
       rollBackPendingBootstrap();
     }
     HoodieTable<T> table = getTableAndInitCtx(WriteOperationType.UPSERT);
-    table.bootstrap(jsc);
+    table.bootstrap(jsc, extraMetadata);
   }
 
   /**
diff --git a/hudi-client/src/main/java/org/apache/hudi/client/bootstrap/BootstrapSourceSchemaProvider.java b/hudi-client/src/main/java/org/apache/hudi/client/bootstrap/BootstrapSourceSchemaProvider.java
index 6569525354fc3..555f6f53349ef 100644
--- a/hudi-client/src/main/java/org/apache/hudi/client/bootstrap/BootstrapSourceSchemaProvider.java
+++ b/hudi-client/src/main/java/org/apache/hudi/client/bootstrap/BootstrapSourceSchemaProvider.java
@@ -18,9 +18,11 @@
 
 package org.apache.hudi.client.bootstrap;
 
+import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.avro.model.HoodieFileStatus;
 import org.apache.hudi.common.bootstrap.FileStatusUtils;
 import org.apache.hudi.common.util.ParquetUtils;
+import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.spark.api.java.JavaSparkContext;
@@ -48,9 +50,12 @@ public BootstrapSourceSchemaProvider(HoodieWriteConfig bootstrapConfig) {
    * @return Avro Schema
    */
   public final Schema getBootstrapSchema(JavaSparkContext jsc, List<Pair<String, List<HoodieFileStatus>>> partitions) {
-    if (bootstrapConfig.getSchema() != null) {
+    if (!StringUtils.isNullOrEmpty(bootstrapConfig.getSchema())) {
       // Use schema specified by user if set
-      return Schema.parse(bootstrapConfig.getSchema());
+      Schema userSchema = Schema.parse(bootstrapConfig.getSchema());
+      if (!HoodieAvroUtils.getNullSchema().equals(userSchema)) {
+        return userSchema;
+      }
     }
     return getBootstrapSourceSchema(jsc, partitions);
   }
diff --git a/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java b/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
index 8b6d99887b41e..fd5c647c05633 100644
--- a/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
+++ b/hudi-client/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
@@ -125,7 +125,7 @@ public class HoodieWriteConfig extends DefaultHoodieConfig {
 
   private ConsistencyGuardConfig consistencyGuardConfig;
 
-  private static final String SOURCE_BASE_PATH_PROP = "hoodie.bootstrap.source.base.path";
+  public static final String SOURCE_BASE_PATH_PROP = "hoodie.bootstrap.source.base.path";
   private static final String BOOTSTRAP_MODE_SELECTOR = "hoodie.bootstrap.mode.selector";
   private static final String FULL_BOOTRAP_INPUT_PROVIDER = "hoodie.bootstrap.full.input.provider";
   private static final String BOOTSTRAP_KEYGEN_CLASS = "hoodie.bootstrap.keygen.class";
diff --git a/hudi-client/src/main/java/org/apache/hudi/table/HoodieCopyOnWriteTable.java b/hudi-client/src/main/java/org/apache/hudi/table/HoodieCopyOnWriteTable.java
index e385ab0e725a7..ec8541b65804b 100644
--- a/hudi-client/src/main/java/org/apache/hudi/table/HoodieCopyOnWriteTable.java
+++ b/hudi-client/src/main/java/org/apache/hudi/table/HoodieCopyOnWriteTable.java
@@ -42,7 +42,7 @@
 import org.apache.hudi.exception.HoodieUpsertException;
 import org.apache.hudi.io.HoodieCreateHandle;
 import org.apache.hudi.io.HoodieMergeHandle;
-import org.apache.hudi.table.action.bootstrap.BootstrapActionExecutor;
+import org.apache.hudi.table.action.bootstrap.BootstrapCommitActionExecutor;
 import org.apache.hudi.table.action.bootstrap.HoodieBootstrapWriteMetadata;
 import org.apache.hudi.table.action.clean.CleanActionExecutor;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
@@ -139,8 +139,8 @@ public HoodieWriteMetadata compact(JavaSparkContext jsc, String compactionInstan
   }
 
   @Override
-  public HoodieBootstrapWriteMetadata bootstrap(JavaSparkContext jsc) {
-    return new BootstrapActionExecutor(jsc, config, this).execute();
+  public HoodieBootstrapWriteMetadata bootstrap(JavaSparkContext jsc, Option<Map<String, String>> extraMetadata) {
+    return new BootstrapCommitActionExecutor(jsc, config, this, extraMetadata).execute();
   }
 
   @Override
diff --git a/hudi-client/src/main/java/org/apache/hudi/table/HoodieMergeOnReadTable.java b/hudi-client/src/main/java/org/apache/hudi/table/HoodieMergeOnReadTable.java
index d60bbfc807851..07cd200f0ddc0 100644
--- a/hudi-client/src/main/java/org/apache/hudi/table/HoodieMergeOnReadTable.java
+++ b/hudi-client/src/main/java/org/apache/hudi/table/HoodieMergeOnReadTable.java
@@ -34,6 +34,8 @@
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
+import org.apache.hudi.table.action.bootstrap.BootstrapDeltaCommitActionExecutor;
+import org.apache.hudi.table.action.bootstrap.HoodieBootstrapWriteMetadata;
 import org.apache.hudi.table.action.compact.RunCompactionActionExecutor;
 import org.apache.hudi.table.action.deltacommit.BulkInsertDeltaCommitActionExecutor;
 import org.apache.hudi.table.action.deltacommit.BulkInsertPreppedDeltaCommitActionExecutor;
@@ -131,6 +133,11 @@ public HoodieWriteMetadata compact(JavaSparkContext jsc, String compactionInstan
     return compactionExecutor.execute();
   }
 
+  @Override
+  public HoodieBootstrapWriteMetadata bootstrap(JavaSparkContext jsc, Option<Map<String, String>> extraMetadata) {
+    return new BootstrapDeltaCommitActionExecutor(jsc, config, this, extraMetadata).execute();
+  }
+
   @Override
   public void rollbackBootstrap(JavaSparkContext jsc, String instantTime) {
     new MergeOnReadRestoreActionExecutor(jsc, config, this, instantTime, HoodieTimeline.INIT_INSTANT_TS).execute();
diff --git a/hudi-client/src/main/java/org/apache/hudi/table/HoodieTable.java b/hudi-client/src/main/java/org/apache/hudi/table/HoodieTable.java
index 4a2fcc17e1bfb..b414e91bf4fd9 100644
--- a/hudi-client/src/main/java/org/apache/hudi/table/HoodieTable.java
+++ b/hudi-client/src/main/java/org/apache/hudi/table/HoodieTable.java
@@ -333,9 +333,11 @@ public abstract HoodieWriteMetadata compact(JavaSparkContext jsc,
   /**
    * Perform metadata/full bootstrap of a Hudi table.
    * @param jsc JavaSparkContext
+   * @param extraMetadata Additional Metadata for storing in commit file.
    * @return HoodieBootstrapWriteMetadata
    */
-  public abstract HoodieBootstrapWriteMetadata bootstrap(JavaSparkContext jsc);
+  public abstract HoodieBootstrapWriteMetadata bootstrap(JavaSparkContext jsc,
+      Option<Map<String, String>> extraMetadata);
 
   /**
    * Perform rollback of bootstrap of a Hudi table.
diff --git a/hudi-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapActionExecutor.java b/hudi-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapCommitActionExecutor.java
similarity index 97%
rename from hudi-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapActionExecutor.java
rename to hudi-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapCommitActionExecutor.java
index 1daf12311e9b6..65fbaf59f752c 100644
--- a/hudi-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapActionExecutor.java
+++ b/hudi-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapCommitActionExecutor.java
@@ -75,6 +75,7 @@
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
+import org.apache.hudi.table.action.commit.CommitActionExecutor;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.parquet.avro.AvroParquetReader;
@@ -96,17 +97,19 @@
 import java.util.Map;
 import java.util.stream.Collectors;
 
-public class BootstrapActionExecutor<T extends HoodieRecordPayload<T>>
+public class BootstrapCommitActionExecutor<T extends HoodieRecordPayload<T>>
     extends BaseCommitActionExecutor<T, HoodieBootstrapWriteMetadata> {
 
-  private static final Logger LOG = LogManager.getLogger(BootstrapActionExecutor.class);
-  private String bootstrapSchema = null;
+  private static final Logger LOG = LogManager.getLogger(BootstrapCommitActionExecutor.class);
+  protected String bootstrapSchema = null;
 
-  public BootstrapActionExecutor(JavaSparkContext jsc, HoodieWriteConfig config, HoodieTable<?> table) {
+  public BootstrapCommitActionExecutor(JavaSparkContext jsc, HoodieWriteConfig config, HoodieTable<?> table,
+      Option<Map<String, String>> extraMetadata) {
     super(jsc, new HoodieWriteConfig.Builder().withProps(config.getProps())
         .withAutoCommit(true).withWriteStatusClass(BootstrapWriteStatus.class)
         .withBulkInsertParallelism(config.getBootstrapParallelism())
-        .build(), table, HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS, WriteOperationType.BOOTSTRAP);
+        .build(), table, HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS, WriteOperationType.BOOTSTRAP,
+        extraMetadata);
   }
 
   private void checkArguments() {
@@ -143,6 +146,7 @@ public HoodieBootstrapWriteMetadata execute() {
     }
   }
 
+  @Override
   protected String getSchemaToStoreInCommit() {
     return bootstrapSchema;
   }
@@ -220,9 +224,13 @@ protected HoodieWriteMetadata fullBootstrap(List<Pair<String, List<HoodieFileSta
         HoodieTimeline.FULL_BOOTSTRAP_INSTANT_TS);
     table.getActiveTimeline().createNewInstant(requested);
     // Setup correct schema and run bulk insert.
+    return getBulkInsertActionExecutor(inputRecordsRDD).execute();
+  }
+
+  protected CommitActionExecutor<T> getBulkInsertActionExecutor(JavaRDD<HoodieRecord> inputRecordsRDD) {
     return new BulkInsertCommitActionExecutor(jsc, new HoodieWriteConfig.Builder().withProps(config.getProps())
         .withSchema(bootstrapSchema).build(), table, HoodieTimeline.FULL_BOOTSTRAP_INSTANT_TS,
-        inputRecordsRDD, Option.empty()).execute();
+        inputRecordsRDD, extraMetadata);
   }
 
   private BootstrapWriteStatus handleMetadataBootstrap(String srcPartitionPath, String partitionPath,
diff --git a/hudi-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapDeltaCommitActionExecutor.java b/hudi-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapDeltaCommitActionExecutor.java
new file mode 100644
index 0000000000000..08760cc3d272f
--- /dev/null
+++ b/hudi-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapDeltaCommitActionExecutor.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.bootstrap;
+
+import java.util.Map;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.table.HoodieTable;
+import org.apache.hudi.table.action.commit.CommitActionExecutor;
+import org.apache.hudi.table.action.deltacommit.BulkInsertDeltaCommitActionExecutor;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+
+public class BootstrapDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>>
+    extends BootstrapCommitActionExecutor<T> {
+
+  public BootstrapDeltaCommitActionExecutor(JavaSparkContext jsc,
+      HoodieWriteConfig config, HoodieTable<?> table,
+      Option<Map<String, String>> extraMetadata) {
+    super(jsc, config, table, extraMetadata);
+  }
+
+  protected CommitActionExecutor<T> getBulkInsertActionExecutor(JavaRDD<HoodieRecord> inputRecordsRDD) {
+    return new BulkInsertDeltaCommitActionExecutor(jsc, new HoodieWriteConfig.Builder().withProps(config.getProps())
+        .withSchema(bootstrapSchema).build(), table, HoodieTimeline.FULL_BOOTSTRAP_INSTANT_TS,
+        inputRecordsRDD, extraMetadata);
+  }
+}
diff --git a/hudi-client/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java b/hudi-client/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java
index f52317fac5bcb..e1dbd48f62506 100644
--- a/hudi-client/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java
+++ b/hudi-client/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java
@@ -60,19 +60,21 @@ public abstract class BaseCommitActionExecutor<T extends HoodieRecordPayload<T>,
 
   private static final Logger LOG = LogManager.getLogger(BaseCommitActionExecutor.class);
 
+  protected final Option<Map<String, String>> extraMetadata;
   private final WriteOperationType operationType;
   protected final SparkTaskContextSupplier sparkTaskContextSupplier = new SparkTaskContextSupplier();
 
   public BaseCommitActionExecutor(JavaSparkContext jsc, HoodieWriteConfig config,
       HoodieTable table, String instantTime, WriteOperationType operationType) {
-    this(jsc, config, table, instantTime, operationType, null);
+    this(jsc, config, table, instantTime, operationType, Option.empty());
   }
 
   public BaseCommitActionExecutor(JavaSparkContext jsc, HoodieWriteConfig config,
       HoodieTable table, String instantTime, WriteOperationType operationType,
-      JavaRDD<HoodieRecord<T>> inputRecordsRDD) {
+      Option<Map<String, String>> extraMetadata) {
     super(jsc, config, table, instantTime);
     this.operationType = operationType;
+    this.extraMetadata = extraMetadata;
   }
 
   public HoodieWriteMetadata execute(JavaRDD<HoodieRecord<T>> inputRecordsRDD) {
@@ -171,7 +173,7 @@ protected void updateIndexAndCommitIfNeeded(JavaRDD<WriteStatus> writeStatusRDD,
   protected void commitOnAutoCommit(HoodieWriteMetadata result) {
     if (config.shouldAutoCommit()) {
       LOG.info("Auto commit enabled: Committing " + instantTime);
-      commit(Option.empty(), result);
+      commit(extraMetadata, result);
     } else {
       LOG.info("Auto commit disabled for " + instantTime);
     }
diff --git a/hudi-client/src/main/java/org/apache/hudi/table/action/commit/BulkInsertCommitActionExecutor.java b/hudi-client/src/main/java/org/apache/hudi/table/action/commit/BulkInsertCommitActionExecutor.java
index 9f5468e5c721d..4929865fbf57c 100644
--- a/hudi-client/src/main/java/org/apache/hudi/table/action/commit/BulkInsertCommitActionExecutor.java
+++ b/hudi-client/src/main/java/org/apache/hudi/table/action/commit/BulkInsertCommitActionExecutor.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.table.action.commit;
 
+import java.util.Map;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.model.WriteOperationType;
@@ -41,7 +42,14 @@ public BulkInsertCommitActionExecutor(JavaSparkContext jsc,
       HoodieWriteConfig config, HoodieTable table,
       String instantTime, JavaRDD<HoodieRecord<T>> inputRecordsRDD,
       Option<UserDefinedBulkInsertPartitioner> bulkInsertPartitioner) {
-    super(jsc, config, table, instantTime, WriteOperationType.BULK_INSERT);
+    this(jsc, config, table, instantTime, inputRecordsRDD, bulkInsertPartitioner, Option.empty());
+  }
+
+  public BulkInsertCommitActionExecutor(JavaSparkContext jsc,
+      HoodieWriteConfig config, HoodieTable table,
+      String instantTime, JavaRDD<HoodieRecord<T>> inputRecordsRDD,
+      Option<UserDefinedBulkInsertPartitioner> bulkInsertPartitioner, Option<Map<String, String>> extraMetadata) {
+    super(jsc, config, table, instantTime, WriteOperationType.BULK_INSERT, extraMetadata);
     this.inputRecordsRDD = inputRecordsRDD;
     this.bulkInsertPartitioner = bulkInsertPartitioner;
   }
diff --git a/hudi-client/src/main/java/org/apache/hudi/table/action/commit/CommitActionExecutor.java b/hudi-client/src/main/java/org/apache/hudi/table/action/commit/CommitActionExecutor.java
index 196600dc1b15e..a0d68e8b87073 100644
--- a/hudi-client/src/main/java/org/apache/hudi/table/action/commit/CommitActionExecutor.java
+++ b/hudi-client/src/main/java/org/apache/hudi/table/action/commit/CommitActionExecutor.java
@@ -23,6 +23,7 @@
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.model.WriteOperationType;
+import org.apache.hudi.common.util.Option;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieUpsertException;
 import org.apache.hudi.execution.LazyInsertIterable;
@@ -50,7 +51,13 @@ public abstract class CommitActionExecutor<T extends HoodieRecordPayload<T>>
   public CommitActionExecutor(JavaSparkContext jsc,
       HoodieWriteConfig config, HoodieTable table,
       String instantTime, WriteOperationType operationType) {
-    super(jsc, config, table, instantTime, operationType);
+    this(jsc, config, table, instantTime, operationType, Option.empty());
+  }
+
+  public CommitActionExecutor(JavaSparkContext jsc,
+      HoodieWriteConfig config, HoodieTable table,
+      String instantTime, WriteOperationType operationType, Option<Map<String, String>> extraMetadata) {
+    super(jsc, config, table, instantTime, operationType, extraMetadata);
   }
 
   @Override
diff --git a/hudi-client/src/main/java/org/apache/hudi/table/action/deltacommit/BulkInsertDeltaCommitActionExecutor.java b/hudi-client/src/main/java/org/apache/hudi/table/action/deltacommit/BulkInsertDeltaCommitActionExecutor.java
index 5e4b915cb2cf3..88bf4f5739f89 100644
--- a/hudi-client/src/main/java/org/apache/hudi/table/action/deltacommit/BulkInsertDeltaCommitActionExecutor.java
+++ b/hudi-client/src/main/java/org/apache/hudi/table/action/deltacommit/BulkInsertDeltaCommitActionExecutor.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.table.action.deltacommit;
 
+import java.util.Map;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.model.WriteOperationType;
@@ -42,7 +43,14 @@ public BulkInsertDeltaCommitActionExecutor(JavaSparkContext jsc,
       HoodieWriteConfig config, HoodieTable table,
       String instantTime, JavaRDD<HoodieRecord<T>> inputRecordsRDD,
       Option<UserDefinedBulkInsertPartitioner> bulkInsertPartitioner) {
-    super(jsc, config, table, instantTime, WriteOperationType.BULK_INSERT);
+    this(jsc, config, table, instantTime, inputRecordsRDD, bulkInsertPartitioner, Option.empty());
+  }
+
+  public BulkInsertDeltaCommitActionExecutor(JavaSparkContext jsc,
+      HoodieWriteConfig config, HoodieTable table,
+      String instantTime, JavaRDD<HoodieRecord<T>> inputRecordsRDD,
+      Option<UserDefinedBulkInsertPartitioner> bulkInsertPartitioner, Option<Map<String, String>> extraMetadata) {
+    super(jsc, config, table, instantTime, WriteOperationType.BULK_INSERT, extraMetadata);
     this.inputRecordsRDD = inputRecordsRDD;
     this.bulkInsertPartitioner = bulkInsertPartitioner;
   }
diff --git a/hudi-client/src/main/java/org/apache/hudi/table/action/deltacommit/DeltaCommitActionExecutor.java b/hudi-client/src/main/java/org/apache/hudi/table/action/deltacommit/DeltaCommitActionExecutor.java
index be3806e46c236..b50581fad7206 100644
--- a/hudi-client/src/main/java/org/apache/hudi/table/action/deltacommit/DeltaCommitActionExecutor.java
+++ b/hudi-client/src/main/java/org/apache/hudi/table/action/deltacommit/DeltaCommitActionExecutor.java
@@ -18,10 +18,12 @@
 
 package org.apache.hudi.table.action.deltacommit;
 
+import java.util.Map;
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.model.WriteOperationType;
+import org.apache.hudi.common.util.Option;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieUpsertException;
 import org.apache.hudi.execution.LazyInsertIterable;
@@ -51,7 +53,13 @@ public abstract class DeltaCommitActionExecutor<T extends HoodieRecordPayload<T>
   public DeltaCommitActionExecutor(JavaSparkContext jsc,
       HoodieWriteConfig config, HoodieTable table,
       String instantTime, WriteOperationType operationType) {
-    super(jsc, config, table, instantTime, operationType);
+    this(jsc, config, table, instantTime, operationType, Option.empty());
+  }
+
+  public DeltaCommitActionExecutor(JavaSparkContext jsc,
+      HoodieWriteConfig config, HoodieTable table,
+      String instantTime, WriteOperationType operationType, Option<Map<String, String>> extraMetadata) {
+    super(jsc, config, table, instantTime, operationType, extraMetadata);
   }
 
   @Override
diff --git a/hudi-client/src/test/java/org/apache/hudi/common/HoodieMergeOnReadTestUtils.java b/hudi-client/src/test/java/org/apache/hudi/common/HoodieMergeOnReadTestUtils.java
index 22dc0f4db5bd8..53c56bf8cf5cd 100644
--- a/hudi-client/src/test/java/org/apache/hudi/common/HoodieMergeOnReadTestUtils.java
+++ b/hudi-client/src/test/java/org/apache/hudi/common/HoodieMergeOnReadTestUtils.java
@@ -18,8 +18,11 @@
 
 package org.apache.hudi.common;
 
+import org.apache.avro.Schema.Field;
+import org.apache.hadoop.hive.ql.io.IOConstants;
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.model.HoodieTestUtils;
+import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.hadoop.HoodieParquetInputFormat;
 import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat;
 
@@ -45,6 +48,7 @@
  * Utility methods to aid in testing MergeOnRead (workaround for HoodieReadClient for MOR).
  */
 public class HoodieMergeOnReadTestUtils {
+
   public static List<GenericRecord> getRecordsUsingInputFormat(List<String> inputPaths, String basePath) {
     return getRecordsUsingInputFormat(inputPaths, basePath, new Configuration());
   }
@@ -56,19 +60,36 @@ public static List<GenericRecord> getRecordsUsingInputFormat(List<String> inputP
   }
 
   public static List<GenericRecord> getRecordsUsingInputFormat(List<String> inputPaths,
-                                                               String basePath,
-                                                               JobConf jobConf,
-                                                               HoodieParquetInputFormat inputFormat) {
+      String basePath,
+      JobConf jobConf,
+      HoodieParquetInputFormat inputFormat) {
     Schema schema = new Schema.Parser().parse(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA);
     return getRecordsUsingInputFormat(inputPaths, basePath, jobConf, inputFormat, schema,
         HoodieTestDataGenerator.TRIP_HIVE_COLUMN_TYPES);
   }
 
   public static List<GenericRecord> getRecordsUsingInputFormat(List<String> inputPaths, String basePath,
-        JobConf jobConf, HoodieParquetInputFormat inputFormat, Schema rawSchema, String rawHiveColumnTypes) {
+      JobConf jobConf, HoodieParquetInputFormat inputFormat, Schema rawSchema, String rawHiveColumnTypes) {
+    return getRecordsUsingInputFormat(inputPaths, basePath, jobConf, inputFormat, rawSchema, rawHiveColumnTypes,
+        false, new ArrayList<>());
+  }
+
+  public static List<GenericRecord> getRecordsUsingInputFormat(List<String> inputPaths, String basePath,
+        JobConf jobConf, HoodieParquetInputFormat inputFormat, Schema rawSchema, String rawHiveColumnTypes,
+      boolean projectCols, List<String> projectedColumns) {
     Schema schema = HoodieAvroUtils.addMetadataFields(rawSchema);
     String hiveColumnTypes = HoodieAvroUtils.addMetadataColumnTypes(rawHiveColumnTypes);
-    setPropsForInputFormat(inputFormat, jobConf, schema, hiveColumnTypes);
+    setPropsForInputFormat(inputFormat, jobConf, schema, hiveColumnTypes, projectCols, projectedColumns);
+    final List<Field> fields;
+    if (projectCols) {
+      fields = schema.getFields().stream().filter(f -> projectedColumns.contains(f.name()))
+          .collect(Collectors.toList());
+    } else {
+      fields = schema.getFields();
+    }
+    final Schema projectedSchema = Schema.createRecord(fields.stream()
+        .map(f -> new Schema.Field(f.name(), f.schema(), f.doc(), f.defaultVal()))
+        .collect(Collectors.toList()));
     return inputPaths.stream().map(path -> {
       setInputPath(jobConf, path);
       List<GenericRecord> records = new ArrayList<>();
@@ -76,17 +97,19 @@ public static List<GenericRecord> getRecordsUsingInputFormat(List<String> inputP
         List<InputSplit> splits = Arrays.asList(inputFormat.getSplits(jobConf, 1));
         for (InputSplit split : splits) {
           RecordReader recordReader = inputFormat.getRecordReader(split, jobConf, null);
-          Void key = (Void) recordReader.createKey();
+          Object key = recordReader.createKey();
           ArrayWritable writable = (ArrayWritable) recordReader.createValue();
           while (recordReader.next(key, writable)) {
-            GenericRecordBuilder newRecord = new GenericRecordBuilder(schema);
+            GenericRecordBuilder newRecord = new GenericRecordBuilder(projectedSchema);
             // writable returns an array with [field1, field2, _hoodie_commit_time,
             // _hoodie_commit_seqno]
             Writable[] values = writable.get();
-            assert schema.getFields().size() <= values.length;
-            schema.getFields().forEach(field -> {
-              newRecord.set(field, values[field.pos()]);
-            });
+            assert projectedSchema.getFields().size() <= values.length;
+            schema.getFields().stream()
+                .filter(f -> !projectCols || projectedColumns.contains(f.name()))
+                .map(f -> Pair.of(projectedSchema.getFields().stream()
+                        .filter(p -> f.name().equals(p.name())).findFirst().get(), f))
+                .forEach(fieldsPair -> newRecord.set(fieldsPair.getKey(), values[fieldsPair.getValue().pos()]));
             records.add(newRecord.build());
           }
         }
@@ -101,16 +124,27 @@ public static List<GenericRecord> getRecordsUsingInputFormat(List<String> inputP
   }
 
   private static void setPropsForInputFormat(HoodieParquetInputFormat inputFormat, JobConf jobConf,
-      Schema schema, String hiveColumnTyps) {
+      Schema schema, String hiveColumnTyps, boolean projectCols, List<String> projectedCols) {
     List<Schema.Field> fields = schema.getFields();
-    String names = fields.stream().map(f -> f.name().toString()).collect(Collectors.joining(","));
-    String postions = fields.stream().map(f -> String.valueOf(f.pos())).collect(Collectors.joining(","));
-    Configuration conf = HoodieTestUtils.getDefaultHadoopConf();
+    final List<String> projectedColNames;
+    if (!projectCols) {
+      projectedColNames = fields.stream().map(f -> f.name().toString()).collect(Collectors.toList());
+    } else {
+      projectedColNames = projectedCols;
+    }
 
-    String hiveColumnNames = fields.stream().filter(field -> !field.name().equalsIgnoreCase("datestr"))
+    String names = fields.stream()
+        .filter(f -> projectedColNames.contains(f.name().toString()))
+        .map(f -> f.name().toString()).collect(Collectors.joining(","));
+    String postions = fields.stream()
+        .filter(f -> projectedColNames.contains(f.name().toString()))
+        .map(f -> String.valueOf(f.pos())).collect(Collectors.joining(","));
+    String hiveColumnNames = fields.stream()
+        .filter(field -> !field.name().equalsIgnoreCase("datestr"))
         .map(Schema.Field::name).collect(Collectors.joining(","));
     hiveColumnNames = hiveColumnNames + ",datestr";
 
+    Configuration conf = HoodieTestUtils.getDefaultHadoopConf();
     String hiveColumnTypes = hiveColumnTyps;
     hiveColumnTypes = hiveColumnTypes + ",string";
     jobConf.set(hive_metastoreConstants.META_TABLE_COLUMNS, hiveColumnNames);
@@ -123,6 +157,8 @@ private static void setPropsForInputFormat(HoodieParquetInputFormat inputFormat,
     conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, postions);
     conf.set(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS, "datestr");
     conf.set(hive_metastoreConstants.META_TABLE_COLUMN_TYPES, hiveColumnTypes);
+    conf.set(IOConstants.COLUMNS, hiveColumnNames);
+    conf.get(IOConstants.COLUMNS_TYPES, hiveColumnTypes);
     inputFormat.setConf(conf);
     jobConf.addResource(conf);
   }
diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
index 99b8bab8763d8..86b5b5806816d 100644
--- a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
@@ -445,4 +445,8 @@ private static boolean isLogicalTypeDate(Schema fieldSchema) {
     }
     return fieldSchema.getLogicalType() == LogicalTypes.date();
   }
+
+  public static Schema getNullSchema() {
+    return Schema.create(Schema.Type.NULL);
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/BootstrapIndex.java b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/BootstrapIndex.java
index f3e47f179bbc1..73f0c36aa0706 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/BootstrapIndex.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/BootstrapIndex.java
@@ -34,6 +34,8 @@
  */
 public abstract class BootstrapIndex implements Serializable {
 
+  protected static final long serialVersionUID = 1L;
+
   protected final HoodieTableMetaClient metaClient;
 
   public BootstrapIndex(HoodieTableMetaClient metaClient) {
@@ -71,7 +73,7 @@ public final boolean isIndexAvailable() {
   /**
    * Check if bootstrap Index is present and ensures readable.
    */
-  protected abstract boolean checkIndex();
+  public abstract boolean checkIndex();
 
   /**
    * Bootstrap Index Reader Interface.
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBasedBootstrapIndex.java b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBasedBootstrapIndex.java
index f93a5642f6a4e..ece86b878e99a 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBasedBootstrapIndex.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBasedBootstrapIndex.java
@@ -132,8 +132,8 @@ private static Path getIndexByFileIdPath(HoodieTableMetaClient metaClient) {
   private static HFile.Reader createReader(String hFilePath, Configuration conf, FileSystem fileSystem) {
     try {
       LOG.info("Opening HFile for reading :" + hFilePath);
-      HFile.Reader reader = HFile.createReader(fileSystem, new HFilePathForReader(hFilePath), new CacheConfig(conf),
-          conf);
+      HFile.Reader reader = HFile.createReader(fileSystem, new HFilePathForReader(hFilePath),
+          new CacheConfig(conf), conf);
       return reader;
     } catch (IOException ioe) {
       throw new HoodieIOException(ioe.getMessage(), ioe);
@@ -166,7 +166,7 @@ public void dropIndex() {
   }
 
   @Override
-  protected boolean checkIndex() {
+  public boolean checkIndex() {
     return hasIndex;
   }
 
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java
index 1d5f238a1dc0d..d1e2cc6adb67b 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.common.model;
 
+import org.apache.hadoop.fs.Path;
 import org.apache.hudi.common.fs.FSUtils;
 
 import com.fasterxml.jackson.annotation.JsonAutoDetect;
@@ -126,6 +127,18 @@ public HashMap<String, String> getFileIdAndFullPaths(String basePath) {
     return fullPaths;
   }
 
+  public Map<HoodieFileGroupId, String> getFileGroupIdAndFullPaths(String basePath) {
+    Map<HoodieFileGroupId, String> fileGroupIdToFullPaths = new HashMap<>();
+    for (Map.Entry<String, List<HoodieWriteStat>> entry : getPartitionToWriteStats().entrySet()) {
+      for (HoodieWriteStat stat : entry.getValue()) {
+        HoodieFileGroupId fileGroupId = new HoodieFileGroupId(stat.getPartitionPath(), stat.getFileId());
+        Path fullPath = new Path(basePath, stat.getPath());
+        fileGroupIdToFullPaths.put(fileGroupId, fullPath.toString());
+      }
+    }
+    return fileGroupIdToFullPaths;
+  }
+
   public String toJsonString() throws IOException {
     if (partitionToWriteStats.containsKey(null)) {
       LOG.info("partition path is null for " + partitionToWriteStats.get(null));
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java
index 94298be96a08b..650a341a17892 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java
@@ -338,6 +338,13 @@ public static HoodieTableMetaClient initTableType(Configuration hadoopConf, Stri
   public static HoodieTableMetaClient initTableType(Configuration hadoopConf, String basePath,
       HoodieTableType tableType, String tableName, String archiveLogFolder, String payloadClassName,
       Integer timelineLayoutVersion) throws IOException {
+    return initTableType(hadoopConf, basePath, tableType, tableName, archiveLogFolder, payloadClassName, timelineLayoutVersion, null);
+  }
+
+  public static HoodieTableMetaClient initTableType(Configuration hadoopConf, String basePath,
+      HoodieTableType tableType, String tableName, String archiveLogFolder, String payloadClassName,
+      Integer timelineLayoutVersion, String bootstrapIndexClassName) throws IOException {
+
     Properties properties = new Properties();
     properties.setProperty(HoodieTableConfig.HOODIE_TABLE_NAME_PROP_NAME, tableName);
     properties.setProperty(HoodieTableConfig.HOODIE_TABLE_TYPE_PROP_NAME, tableType.name());
@@ -352,6 +359,10 @@ public static HoodieTableMetaClient initTableType(Configuration hadoopConf, Stri
     if (null != timelineLayoutVersion) {
       properties.put(HoodieTableConfig.HOODIE_TIMELINE_LAYOUT_VERSION, String.valueOf(timelineLayoutVersion));
     }
+
+    if (null != bootstrapIndexClassName) {
+      properties.put(HoodieTableConfig.HOODIE_BOOTSTRAP_INDEX_CLASS_PROP_NAME, bootstrapIndexClassName);
+    }
     return HoodieTableMetaClient.initTableAndGetMetaClient(hadoopConf, basePath, properties);
   }
 
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java
index 3c8d9e8614b27..eb5e2b5fa068f 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java
@@ -190,7 +190,7 @@ private static Map<String, String> readParquetFooter(Configuration configuration
   }
 
   public static Schema readAvroSchema(Configuration configuration, Path parquetFilePath) {
-    return new AvroSchemaConverter().convert(readSchema(configuration, parquetFilePath));
+    return new AvroSchemaConverter(configuration).convert(readSchema(configuration, parquetFilePath));
   }
 
   /**
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieColumnProjectionUtils.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieColumnProjectionUtils.java
index 55891f9bc2e4d..b7141a8ee762f 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieColumnProjectionUtils.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieColumnProjectionUtils.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.hadoop;
 
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hudi.common.util.CollectionUtils;
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.common.util.collection.Pair;
 
@@ -39,8 +40,8 @@
 
 /**
  * Utility funcitons copied from Hive ColumnProjectionUtils.java.
- * Needed to copy as we see NoSuchMethod errors when directly using these APIs with/without Spark. Some of these
- * methods are not available across hive versions.
+ * Needed to copy as we see NoSuchMethod errors when directly using these APIs with/without Spark.
+ * Some of these methods are not available across hive versions.
  */
 public class HoodieColumnProjectionUtils {
   public static final Logger LOG = LoggerFactory.getLogger(ColumnProjectionUtils.class);
@@ -63,6 +64,15 @@ public class HoodieColumnProjectionUtils {
 
   private static final String COMMA = ",";
 
+  /** Special Column Names added during Parquet Projection. **/
+  public static final String PARQUET_BLOCK_OFFSET_COL_NAME = "BLOCK__OFFSET__INSIDE__FILE";
+  public static final String PARQUET_INPUT_FILE_NAME = "INPUT__FILE__NAME";
+  public static final String PARQUET_ROW_ID = "ROW__ID";
+
+  public static final List<String> PARQUET_SPECIAL_COLUMN_NAMES =  CollectionUtils
+      .createImmutableList(PARQUET_BLOCK_OFFSET_COL_NAME, PARQUET_INPUT_FILE_NAME,
+          PARQUET_ROW_ID);
+
   /**
    * Sets the <em>READ_ALL_COLUMNS</em> flag and removes any previously
    * set column ids.
@@ -88,6 +98,7 @@ public static void setReadColumns(Configuration conf, List<Integer> ids, List<St
     setReadColumnIDConf(conf, READ_COLUMN_IDS_CONF_STR_DEFAULT);
     setReadColumnNamesConf(conf, READ_COLUMN_NAMES_CONF_STR_DEFAULT);
     appendReadColumns(conf, ids);
+    appendReadColumnNames(conf, names);
   }
 
   /**
@@ -210,7 +221,6 @@ public static List<String> getIOColumns(Configuration conf) {
 
   public static List<String> getIOColumnTypes(Configuration conf) {
     String colTypes = conf.get(IOConstants.COLUMNS_TYPES, "");
-    TypeInfoUtils.getTypeInfosFromTypeString(colTypes);
     if (colTypes != null && !colTypes.isEmpty()) {
       return TypeInfoUtils.getTypeInfosFromTypeString(colTypes).stream()
           .map(t -> t.getTypeName()).collect(Collectors.toList());
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieColumnStichingRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieColumnStichingRecordReader.java
index 0ae65e584b400..f47ce6f8aa6a9 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieColumnStichingRecordReader.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieColumnStichingRecordReader.java
@@ -39,6 +39,7 @@ public class HoodieColumnStichingRecordReader implements RecordReader<NullWritab
   private final int numLeftColumns;
   private final int numRightColumns;
   private final ArrayWritable values;
+  private final boolean validate;
 
   public HoodieColumnStichingRecordReader(RecordReader<NullWritable, ArrayWritable> left,
       int numLeftColumns, RecordReader<NullWritable, ArrayWritable> right, int numRightColumns, boolean validate) {
@@ -78,7 +79,7 @@ public boolean next(NullWritable key, ArrayWritable value) throws IOException {
     for (int j = numLeftColumns; j < right.get().length; j++) {
       value.get()[j] = right.get()[j];
     }
-    return hasMoreOnLeft;
+    return hasMoreOnLeft && hasMoreOnRight;
   }
 
   @Override
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java
index 3ed8853b83f78..5c0bbf85d9089 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java
@@ -18,10 +18,10 @@
 
 package org.apache.hudi.hadoop;
 
-import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.model.HoodieBaseFile;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodiePartitionMetadata;
+import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieDefaultTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
@@ -40,6 +40,7 @@
 import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.io.ArrayWritable;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapred.FileSplit;
@@ -360,13 +361,21 @@ public RecordReader<NullWritable, ArrayWritable> getRecordReader(final InputSpli
       ExternalBaseFileSplit eSplit = (ExternalBaseFileSplit)split;
       String[] rawColNames = HoodieColumnProjectionUtils.getReadColumnNames(job);
       List<Integer> rawColIds = HoodieColumnProjectionUtils.getReadColumnIDs(job);
-      List<Pair<Integer, String>> colsWithIndex =
+      List<Pair<Integer, String>> projectedColsWithIndex =
           IntStream.range(0, rawColIds.size()).mapToObj(idx -> Pair.of(rawColIds.get(idx), rawColNames[idx]))
               .collect(Collectors.toList());
 
-      List<Pair<Integer, String>> hoodieColsProjected = colsWithIndex.stream()
-          .filter(idxWithName -> idxWithName.getKey() < HoodieAvroUtils.NUM_HUDI_METADATA_COLS)
+      List<Pair<Integer, String>> hoodieColsProjected = projectedColsWithIndex.stream()
+          .filter(idxWithName -> HoodieRecord.HOODIE_META_COLUMNS.contains(idxWithName.getValue()))
           .collect(Collectors.toList());
+      List<Pair<Integer, String>> externalColsProjected = projectedColsWithIndex.stream()
+          .filter(idxWithName -> !HoodieRecord.HOODIE_META_COLUMNS.contains(idxWithName.getValue())
+              && !VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(idxWithName.getValue()))
+          .collect(Collectors.toList());
+      List<Pair<Integer, String>> virtualParquetColsProjected = projectedColsWithIndex.stream()
+          .filter(idxWithName -> VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(
+              idxWithName.getValue())).collect(Collectors.toList());
+
       // This always matches hive table description
       List<Pair<String, String>> colNameWithTypes = HoodieColumnProjectionUtils.getIOColumnNameAndTypes(job);
       List<Pair<String, String>> hoodieColNamesOnlyWithTypes = colNameWithTypes.stream()
@@ -389,34 +398,40 @@ public RecordReader<NullWritable, ArrayWritable> getRecordReader(final InputSpli
       } else if (externalColsProjected.isEmpty()) {
         return super.getRecordReader(split, job, reporter);
       } else {
+        JobConf jobConf1 = new JobConf(job);
+        JobConf jobConf2 = new JobConf(job);
+        HoodieColumnProjectionUtils.setIOColumnNameAndTypes(jobConf1, hoodieColNamesOnlyWithTypes);
+        HoodieColumnProjectionUtils.setIOColumnNameAndTypes(jobConf2, colNamesWithTypesForExternal);
+
+        // Adjust Projection Settings
         HoodieColumnProjectionUtils.setReadColumns(jobConf1, new ArrayList<>(), new ArrayList<>());
         HoodieColumnProjectionUtils.setReadColumns(jobConf2, new ArrayList<>(), new ArrayList<>());
-        List<String> hoodieColNames = colsWithIndex.stream()
-            .filter(idxWithName -> idxWithName.getKey() < HoodieAvroUtils.NUM_HUDI_METADATA_COLS)
+        List<String> hoodieColNames = projectedColsWithIndex.stream()
+            .filter(idxWithName -> HoodieRecord.HOODIE_META_COLUMNS.contains(idxWithName.getValue()))
             .map(idxWithName -> idxWithName.getValue()).collect(Collectors.toList());
-        List<Integer> hoodieColIds = colsWithIndex.stream()
-            .filter(idxWithName -> idxWithName.getKey() < HoodieAvroUtils.NUM_HUDI_METADATA_COLS)
+        List<Integer> hoodieColIds = projectedColsWithIndex.stream()
+            .filter(idxWithName -> HoodieRecord.HOODIE_META_COLUMNS.contains(idxWithName.getValue()))
             .map(idxWithName -> idxWithName.getKey()).collect(Collectors.toList());
-        List<String> nonHoodieColNames = colsWithIndex.stream()
-            .filter(idxWithName -> idxWithName.getKey() >= HoodieAvroUtils.NUM_HUDI_METADATA_COLS)
+
+        List<String> externalColNamesWithVirtualCols = projectedColsWithIndex.stream()
+            .filter(idxWithName -> !HoodieRecord.HOODIE_META_COLUMNS.contains(idxWithName.getValue()))
             .map(idxWithName -> idxWithName.getValue()).collect(Collectors.toList());
-        List<Integer> nonHoodieColIdsAdjusted = colsWithIndex.stream()
-            .filter(idxWithName -> idxWithName.getKey() >= HoodieAvroUtils.NUM_HUDI_METADATA_COLS)
-            .map(idxWithName -> idxWithName.getKey() - HoodieAvroUtils.NUM_HUDI_METADATA_COLS)
+        List<Integer> externalColIds = projectedColsWithIndex.stream()
+            .filter(idxWithName -> !HoodieRecord.HOODIE_META_COLUMNS.contains(idxWithName.getValue())
+                && !VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(idxWithName.getValue()))
+            .map(idxWithName -> idxWithName.getKey() - HoodieRecord.HOODIE_META_COLUMNS.size())
             .collect(Collectors.toList());
+        List<Integer> externalColIdsWithVirtualCols = new ArrayList<>(externalColIds);
+        IntStream.range(0, virtualParquetColsProjected.size())
+            .forEach(idx -> externalColIdsWithVirtualCols.add(idx + externalColIds.size()));
+        HoodieColumnProjectionUtils.setReadColumns(jobConf1, new ArrayList<>(), new ArrayList<>());
+        HoodieColumnProjectionUtils.setReadColumns(jobConf2, new ArrayList<>(), new ArrayList<>());
         List<String> groupCols = Arrays.asList(job.get(READ_NESTED_COLUMN_PATH_CONF_STR, "").split(","));
-        HoodieColumnProjectionUtils.appendReadColumns(jobConf1, hoodieColIds, hoodieColNames, new ArrayList<>());
-        HoodieColumnProjectionUtils.appendReadColumns(jobConf2, nonHoodieColIdsAdjusted, nonHoodieColNames, groupCols);
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("hoodieColNames=" + hoodieColNames + ", hoodieColIds=" + hoodieColIds
-              + ", SIZES : hoodieColNames=" + hoodieColNames.size() + ", hoodieColIds=" + hoodieColIds.size()
-              + ", nonHoodieColNames=" + nonHoodieColNames + ", nonHoodieColIdsAdjusted=" + nonHoodieColIdsAdjusted
-              + ", nonHoodieColNames=" + nonHoodieColNames.size() + ", nonHoodieColIdsAdjusted="
-              + nonHoodieColIdsAdjusted.size());
-        }
-        FileSystem fs = FileSystem.get(job);
-        //FileSplit rightSplit =
-        //    makeSplit(externalFile, 0, externalFileStatus.getLen(), new String[0], new String[0]);
+        HoodieColumnProjectionUtils.appendReadColumns(jobConf1, hoodieColIds,
+            hoodieColNames, new ArrayList<>());
+        HoodieColumnProjectionUtils.appendReadColumns(jobConf2, externalColIdsWithVirtualCols,
+            externalColNamesWithVirtualCols, groupCols);
+
         FileSplit rightSplit = eSplit.getExternalFileSplit();
         LOG.info("Generating column stitching reader for " + eSplit.getPath() + " and " + rightSplit.getPath());
         return new HoodieColumnStichingRecordReader(super.getRecordReader(eSplit, job, reporter),
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/NullSkeletonRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/NullSkeletonRecordReader.java
new file mode 100644
index 0000000000000..c21dc92738cae
--- /dev/null
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/NullSkeletonRecordReader.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.hadoop;
+
+import java.io.IOException;
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hudi.common.model.HoodieRecord;
+
+public class NullSkeletonRecordReader implements RecordReader<NullWritable, ArrayWritable> {
+
+  private final ArrayWritable rec = new ArrayWritable(NullWritable.class,
+      new Writable[HoodieRecord.HOODIE_META_COLUMNS.size()]);
+
+  @Override
+  public boolean next(NullWritable key, ArrayWritable value) throws IOException {
+    return true;
+  }
+
+  @Override
+  public NullWritable createKey() {
+    return NullWritable.get();
+  }
+
+  @Override
+  public ArrayWritable createValue() {
+    return rec;
+  }
+
+  @Override
+  public long getPos() throws IOException {
+    return 0;
+  }
+
+  @Override
+  public void close() throws IOException {
+
+  }
+
+  @Override
+  public float getProgress() throws IOException {
+    return 0;
+  }
+}
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java
index 5e346224adc72..e5f7f36bd079c 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java
@@ -34,6 +34,7 @@
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.hadoop.ExternalBaseFileSplit;
+import org.apache.hudi.hadoop.HoodieColumnProjectionUtils;
 import org.apache.hudi.hadoop.HoodieParquetInputFormat;
 import org.apache.hudi.hadoop.UseFileSplitsFromInputFormat;
 
@@ -212,10 +213,15 @@ private static Configuration addProjectionField(Configuration conf, String field
   }
 
   private static void addRequiredProjectionFields(Configuration configuration) {
-    // Need this to do merge records in HoodieRealtimeRecordReader
-    addProjectionField(configuration, HoodieRecord.RECORD_KEY_METADATA_FIELD, HOODIE_RECORD_KEY_COL_POS);
-    addProjectionField(configuration, HoodieRecord.COMMIT_TIME_METADATA_FIELD, HOODIE_COMMIT_TIME_COL_POS);
-    addProjectionField(configuration, HoodieRecord.PARTITION_PATH_METADATA_FIELD, HOODIE_PARTITION_PATH_COL_POS);
+    List<Integer> projectedIds = new ArrayList<>(HoodieColumnProjectionUtils.getReadColumnIDs(configuration));
+    List<String> projectedNames = new ArrayList<>(
+        Arrays.asList(HoodieColumnProjectionUtils.getReadColumnNames(configuration)));
+    projectedIds.addAll(Arrays.asList(HOODIE_RECORD_KEY_COL_POS, HOODIE_COMMIT_TIME_COL_POS,
+        HOODIE_PARTITION_PATH_COL_POS));
+    projectedNames.addAll(Arrays.asList(HoodieRecord.RECORD_KEY_METADATA_FIELD, HoodieRecord.COMMIT_TIME_METADATA_FIELD,
+        HoodieRecord.PARTITION_PATH_METADATA_FIELD));
+
+    HoodieColumnProjectionUtils.setReadColumns(configuration, projectedIds, projectedNames);
   }
 
   /**
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeExternalBaseFileSplit.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeExternalBaseFileSplit.java
index 71e84ac18aa06..31ddfb8b81d30 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeExternalBaseFileSplit.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeExternalBaseFileSplit.java
@@ -38,6 +38,10 @@ public class RealtimeExternalBaseFileSplit extends ExternalBaseFileSplit impleme
 
   private String basePath;
 
+  public RealtimeExternalBaseFileSplit() {
+    super();
+  }
+
   public RealtimeExternalBaseFileSplit(FileSplit baseSplit, String basePath, List<String> deltaLogPaths,
       String maxInstantTime, FileSplit externalFileSplit) throws IOException {
     super(baseSplit, externalFileSplit);
diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java
index cf8727394788c..5f36d72dd3553 100644
--- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java
+++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java
@@ -268,7 +268,8 @@ void assertStdOutContains(Pair<String, String> stdOutErr, String expectedOutput,
       saveUpLogs();
     }
 
-    assertEquals(times, count, "Did not find output the expected number of times");
+    assertEquals(times, count, "Did not find output the expected number of times. stdOutSingleSpaced="
+        + stdOutSingleSpaced);
   }
 
   public class TestExecStartResultCallback extends ExecStartResultCallback {
diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java
index fa061dccaa4c4..e56067195f483 100644
--- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java
+++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestHoodieDemo.java
@@ -53,11 +53,18 @@ public class ITTestHoodieDemo extends ITTestBase {
   private static final String COW_TABLE_NAME = "stock_ticks_cow";
   private static final String MOR_TABLE_NAME = "stock_ticks_mor";
 
+  private static final String BOOTSTRAPPED_SRC_PATH = "/user/hive/warehouse/stock_ticks_cow_bs_src";
+  private static final String COW_BOOTSTRAPPED_BASE_PATH = "/user/hive/warehouse/stock_ticks_cow_bs";
+  private static final String MOR_BOOTSTRAPPED_BASE_PATH = "/user/hive/warehouse/stock_ticks_mor_bs";
+  private static final String COW_BOOTSTRAPPED_TABLE_NAME = "stock_ticks_cow_bs";
+  private static final String MOR_BOOTSTRAPPED_TABLE_NAME = "stock_ticks_mor_bs";
+
   private static final String DEMO_CONTAINER_SCRIPT = HOODIE_WS_ROOT + "/docker/demo/setup_demo_container.sh";
   private static final String MIN_COMMIT_TIME_COW_SCRIPT = HOODIE_WS_ROOT + "/docker/demo/get_min_commit_time_cow.sh";
   private static final String MIN_COMMIT_TIME_MOR_SCRIPT = HOODIE_WS_ROOT + "/docker/demo/get_min_commit_time_mor.sh";
   private static final String HUDI_CLI_TOOL = HOODIE_WS_ROOT + "/hudi-cli/hudi-cli.sh";
   private static final String COMPACTION_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/compaction.commands";
+  private static final String SPARKSQL_BS_PREP_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/sparksql-bootstrap-prep-source.commands";
   private static final String SPARKSQL_BATCH1_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/sparksql-batch1.commands";
   private static final String SPARKSQL_BATCH2_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/sparksql-batch2.commands";
   private static final String SPARKSQL_INCREMENTAL_COMMANDS = HOODIE_WS_ROOT + "/docker/demo/sparksql-incremental.commands";
@@ -96,6 +103,7 @@ public void testDemo() throws Exception {
 
     // compaction
     scheduleAndRunCompaction();
+
     testHiveAfterSecondBatchAfterCompaction();
     testPrestoAfterSecondBatchAfterCompaction();
     testIncrementalHiveQueryAfterCompaction();
@@ -182,35 +190,42 @@ private void ingestFirstBatchAndHiveSync() throws Exception {
   private void testHiveAfterFirstBatch() throws Exception {
     Pair<String, String> stdOutErrPair = executeHiveCommandFile(HIVE_TBLCHECK_COMMANDS);
     assertStdOutContains(stdOutErrPair, "| stock_ticks_cow     |");
+    assertStdOutContains(stdOutErrPair, "| stock_ticks_cow_bs  |");
     assertStdOutContains(stdOutErrPair, "| stock_ticks_mor_ro  |");
     assertStdOutContains(stdOutErrPair, "| stock_ticks_mor_rt  |");
-
+    assertStdOutContains(stdOutErrPair, "| stock_ticks_mor_bs_ro  |");
+    assertStdOutContains(stdOutErrPair, "| stock_ticks_mor_bs_rt  |");
     assertStdOutContains(stdOutErrPair,
         "|   partition    |\n+----------------+\n| dt=2018-08-31  |\n+----------------+\n", 3);
 
     stdOutErrPair = executeHiveCommandFile(HIVE_BATCH1_COMMANDS);
     assertStdOutContains(stdOutErrPair, "| symbol  |         _c1          |\n+---------+----------------------+\n"
-        + "| GOOG    | 2018-08-31 10:29:00  |\n", 3);
+        + "| GOOG    | 2018-08-31 10:29:00  |\n", 6);
     assertStdOutContains(stdOutErrPair,
         "| symbol  |          ts          | volume  |    open    |   close   |\n"
             + "+---------+----------------------+---------+------------+-----------+\n"
             + "| GOOG    | 2018-08-31 09:59:00  | 6330    | 1230.5     | 1230.02   |\n"
             + "| GOOG    | 2018-08-31 10:29:00  | 3391    | 1230.1899  | 1230.085  |\n",
-        3);
+        6);
   }
 
   private void testSparkSQLAfterFirstBatch() throws Exception {
     Pair<String, String> stdOutErrPair = executeSparkSQLCommand(SPARKSQL_BATCH1_COMMANDS, true);
     assertStdOutContains(stdOutErrPair, "|default |stock_ticks_cow   |false      |\n"
+                                                    + "|default |stock_ticks_cow_bs   |false      |\n"
+                                                    + "|default |stock_ticks_mor_bs_ro |false      |\n"
+                                                    +  "|default |stock_ticks_mor_bs_rt |false      |"
                                                     + "|default |stock_ticks_mor_ro |false      |\n"
-                                                    + "|default |stock_ticks_mor_rt |false      |");
+                                                    +  "|default |stock_ticks_mor_rt |false      |");
     assertStdOutContains(stdOutErrPair,
-        "+------+-------------------+\n|GOOG  |2018-08-31 10:29:00|\n+------+-------------------+", 3);
-    assertStdOutContains(stdOutErrPair, "|GOOG  |2018-08-31 09:59:00|6330  |1230.5   |1230.02 |", 3);
-    assertStdOutContains(stdOutErrPair, "|GOOG  |2018-08-31 10:29:00|3391  |1230.1899|1230.085|", 3);
+        "+------+-------------------+\n|GOOG  |2018-08-31 10:29:00|\n+------+-------------------+", 6);
+    assertStdOutContains(stdOutErrPair, "|GOOG  |2018-08-31 09:59:00|6330  |1230.5   |1230.02 |", 6);
+    assertStdOutContains(stdOutErrPair, "|GOOG  |2018-08-31 10:29:00|3391  |1230.1899|1230.085|", 6);
   }
 
   private void ingestSecondBatchAndHiveSync() throws Exception {
+    // Note : Unlike normal tables, bootstrapped tables do not have checkpoint. So, they
+    // begin with null checkpoint and read all states.
     List<String> cmds = CollectionUtils.createImmutableList(
             ("hdfs dfs -copyFromLocal -f " + INPUT_BATCH_PATH2 + " " + HDFS_BATCH_PATH2),
             ("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE
@@ -226,14 +241,28 @@ private void ingestSecondBatchAndHiveSync() throws Exception {
             + " --target-base-path " + MOR_BASE_PATH + " --target-table " + MOR_TABLE_NAME
             + " --props /var/demo/config/dfs-source.properties"
             + " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
-            + " --disable-compaction " + String.format(HIVE_SYNC_CMD_FMT, "dt", MOR_TABLE_NAME)));
+            + " --disable-compaction " + String.format(HIVE_SYNC_CMD_FMT, "dt", MOR_TABLE_NAME)),
+            ("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE
+            + " --table-type COPY_ON_WRITE "
+            + " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
+            + " --target-base-path " + COW_BOOTSTRAPPED_BASE_PATH + " --target-table " + COW_BOOTSTRAPPED_TABLE_NAME
+            + " --props /var/demo/config/dfs-source.properties"
+            + " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
+            + String.format(HIVE_SYNC_CMD_FMT, "dt", COW_BOOTSTRAPPED_TABLE_NAME)),
+            ("spark-submit --class org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer " + HUDI_UTILITIES_BUNDLE
+            + " --table-type MERGE_ON_READ "
+            + " --source-class org.apache.hudi.utilities.sources.JsonDFSSource --source-ordering-field ts "
+            + " --target-base-path " + MOR_BOOTSTRAPPED_BASE_PATH + " --target-table " + MOR_BOOTSTRAPPED_TABLE_NAME
+            + " --props /var/demo/config/dfs-source.properties"
+            + " --schemaprovider-class org.apache.hudi.utilities.schema.FilebasedSchemaProvider "
+            + " --disable-compaction " + String.format(HIVE_SYNC_CMD_FMT, "dt", MOR_BOOTSTRAPPED_TABLE_NAME)));
     executeCommandStringsInDocker(ADHOC_1_CONTAINER, cmds);
   }
 
   private void testPrestoAfterFirstBatch() throws Exception {
     Pair<String, String> stdOutErrPair = executePrestoCommandFile(HDFS_PRESTO_INPUT_TABLE_CHECK_PATH);
-    assertStdOutContains(stdOutErrPair, "stock_ticks_cow");
-    assertStdOutContains(stdOutErrPair, "stock_ticks_mor",2);
+    assertStdOutContains(stdOutErrPair, "stock_ticks_cow", 2);
+    assertStdOutContains(stdOutErrPair, "stock_ticks_mor",4);
 
     stdOutErrPair = executePrestoCommandFile(HDFS_PRESTO_INPUT_BATCH1_PATH);
     assertStdOutContains(stdOutErrPair,
@@ -247,20 +276,20 @@ private void testPrestoAfterFirstBatch() throws Exception {
   private void testHiveAfterSecondBatch() throws Exception {
     Pair<String, String> stdOutErrPair = executeHiveCommandFile(HIVE_BATCH1_COMMANDS);
     assertStdOutContains(stdOutErrPair, "| symbol  |         _c1          |\n+---------+----------------------+\n"
-        + "| GOOG    | 2018-08-31 10:29:00  |\n");
+        + "| GOOG    | 2018-08-31 10:29:00  |\n", 2);
     assertStdOutContains(stdOutErrPair, "| symbol  |         _c1          |\n+---------+----------------------+\n"
-        + "| GOOG    | 2018-08-31 10:59:00  |\n", 2);
+        + "| GOOG    | 2018-08-31 10:59:00  |\n", 4);
     assertStdOutContains(stdOutErrPair,
         "| symbol  |          ts          | volume  |    open    |   close   |\n"
             + "+---------+----------------------+---------+------------+-----------+\n"
             + "| GOOG    | 2018-08-31 09:59:00  | 6330    | 1230.5     | 1230.02   |\n"
-            + "| GOOG    | 2018-08-31 10:29:00  | 3391    | 1230.1899  | 1230.085  |\n");
+            + "| GOOG    | 2018-08-31 10:29:00  | 3391    | 1230.1899  | 1230.085  |\n", 2);
     assertStdOutContains(stdOutErrPair,
         "| symbol  |          ts          | volume  |    open    |   close   |\n"
             + "+---------+----------------------+---------+------------+-----------+\n"
             + "| GOOG    | 2018-08-31 09:59:00  | 6330    | 1230.5     | 1230.02   |\n"
             + "| GOOG    | 2018-08-31 10:59:00  | 9021    | 1227.1993  | 1227.215  |\n",
-        2);
+        4);
   }
 
   private void testPrestoAfterSecondBatch() throws Exception {
@@ -280,13 +309,13 @@ private void testPrestoAfterSecondBatch() throws Exception {
   private void testHiveAfterSecondBatchAfterCompaction() throws Exception {
     Pair<String, String> stdOutErrPair = executeHiveCommandFile(HIVE_BATCH2_COMMANDS);
     assertStdOutContains(stdOutErrPair, "| symbol  |         _c1          |\n+---------+----------------------+\n"
-        + "| GOOG    | 2018-08-31 10:59:00  |", 2);
+        + "| GOOG    | 2018-08-31 10:59:00  |", 4);
     assertStdOutContains(stdOutErrPair,
         "| symbol  |          ts          | volume  |    open    |   close   |\n"
             + "+---------+----------------------+---------+------------+-----------+\n"
             + "| GOOG    | 2018-08-31 09:59:00  | 6330    | 1230.5     | 1230.02   |\n"
             + "| GOOG    | 2018-08-31 10:59:00  | 9021    | 1227.1993  | 1227.215  |",
-        2);
+        4);
   }
 
   private void testPrestoAfterSecondBatchAfterCompaction() throws Exception {
@@ -302,13 +331,13 @@ private void testPrestoAfterSecondBatchAfterCompaction() throws Exception {
   private void testSparkSQLAfterSecondBatch() throws Exception {
     Pair<String, String> stdOutErrPair = executeSparkSQLCommand(SPARKSQL_BATCH2_COMMANDS, true);
     assertStdOutContains(stdOutErrPair,
-        "+------+-------------------+\n|GOOG  |2018-08-31 10:59:00|\n+------+-------------------+", 2);
+        "+------+-------------------+\n|GOOG  |2018-08-31 10:59:00|\n+------+-------------------+", 4);
 
-    assertStdOutContains(stdOutErrPair, "|GOOG  |2018-08-31 09:59:00|6330  |1230.5   |1230.02 |", 3);
-    assertStdOutContains(stdOutErrPair, "|GOOG  |2018-08-31 10:59:00|9021  |1227.1993|1227.215|", 2);
+    assertStdOutContains(stdOutErrPair, "|GOOG  |2018-08-31 09:59:00|6330  |1230.5   |1230.02 |", 6);
+    assertStdOutContains(stdOutErrPair, "|GOOG  |2018-08-31 10:59:00|9021  |1227.1993|1227.215|", 4);
     assertStdOutContains(stdOutErrPair,
-        "+------+-------------------+\n|GOOG  |2018-08-31 10:29:00|\n+------+-------------------+");
-    assertStdOutContains(stdOutErrPair, "|GOOG  |2018-08-31 10:29:00|3391  |1230.1899|1230.085|");
+        "+------+-------------------+\n|GOOG  |2018-08-31 10:29:00|\n+------+-------------------+", 2);
+    assertStdOutContains(stdOutErrPair, "|GOOG  |2018-08-31 10:29:00|3391  |1230.1899|1230.085|", 2);
   }
 
   private void testIncrementalHiveQuery(String minCommitTimeScript, String incrementalCommandsFile,
@@ -324,36 +353,40 @@ private void testIncrementalHiveQueryBeforeCompaction() throws Exception {
     String expectedOutput = "| GOOG    | 2018-08-31 10:59:00  | 9021    | 1227.1993  | 1227.215  |";
 
     // verify that 10:59 is present in COW table because there is no compaction process for COW
-    testIncrementalHiveQuery(MIN_COMMIT_TIME_COW_SCRIPT, HIVE_INCREMENTAL_COW_COMMANDS, expectedOutput, 1);
+    testIncrementalHiveQuery(MIN_COMMIT_TIME_COW_SCRIPT, HIVE_INCREMENTAL_COW_COMMANDS, expectedOutput, 2);
 
     // verify that 10:59 is NOT present in RO table because of pending compaction
     testIncrementalHiveQuery(MIN_COMMIT_TIME_MOR_SCRIPT, HIVE_INCREMENTAL_MOR_RO_COMMANDS, expectedOutput, 0);
 
     // verify that 10:59 is present in RT table even with pending compaction
-    testIncrementalHiveQuery(MIN_COMMIT_TIME_MOR_SCRIPT, HIVE_INCREMENTAL_MOR_RT_COMMANDS, expectedOutput, 1);
+    testIncrementalHiveQuery(MIN_COMMIT_TIME_MOR_SCRIPT, HIVE_INCREMENTAL_MOR_RT_COMMANDS, expectedOutput, 2);
   }
 
   private void testIncrementalHiveQueryAfterCompaction() throws Exception {
     String expectedOutput = "| symbol  |          ts          | volume  |    open    |   close   |\n"
-            + "+---------+----------------------+---------+------------+-----------+\n"
-            + "| GOOG    | 2018-08-31 10:59:00  | 9021    | 1227.1993  | 1227.215  |";
+        + "+---------+----------------------+---------+------------+-----------+\n"
+        + "| GOOG    | 2018-08-31 10:59:00  | 9021    | 1227.1993  | 1227.215  |";
 
     // verify that 10:59 is present for all views because compaction is complete
-    testIncrementalHiveQuery(MIN_COMMIT_TIME_COW_SCRIPT, HIVE_INCREMENTAL_COW_COMMANDS, expectedOutput, 1);
-    testIncrementalHiveQuery(MIN_COMMIT_TIME_MOR_SCRIPT, HIVE_INCREMENTAL_MOR_RO_COMMANDS, expectedOutput, 1);
-    testIncrementalHiveQuery(MIN_COMMIT_TIME_MOR_SCRIPT, HIVE_INCREMENTAL_MOR_RT_COMMANDS, expectedOutput, 1);
+    testIncrementalHiveQuery(MIN_COMMIT_TIME_COW_SCRIPT, HIVE_INCREMENTAL_COW_COMMANDS, expectedOutput, 2);
+    testIncrementalHiveQuery(MIN_COMMIT_TIME_MOR_SCRIPT, HIVE_INCREMENTAL_MOR_RO_COMMANDS, expectedOutput, 2);
+    testIncrementalHiveQuery(MIN_COMMIT_TIME_MOR_SCRIPT, HIVE_INCREMENTAL_MOR_RT_COMMANDS, expectedOutput, 2);
   }
 
   private void testIncrementalSparkSQLQuery() throws Exception {
     Pair<String, String> stdOutErrPair = executeSparkSQLCommand(SPARKSQL_INCREMENTAL_COMMANDS, true);
-    assertStdOutContains(stdOutErrPair, "|GOOG  |2018-08-31 10:59:00|9021  |1227.1993|1227.215|");
-    assertStdOutContains(stdOutErrPair, "|default |stock_ticks_cow           |false      |\n"
-        + "|default |stock_ticks_derived_mor_ro|false      |\n"
-        + "|default |stock_ticks_derived_mor_rt|false      |\n"
-        + "|default |stock_ticks_mor_ro        |false      |\n"
-        + "|default |stock_ticks_mor_rt        |false      |\n"
-        + "|        |stock_ticks_cow_incr      |true       |");
-    assertStdOutContains(stdOutErrPair, "|count(1)|\n+--------+\n|99     |", 2);
+    assertStdOutContains(stdOutErrPair, "|GOOG  |2018-08-31 10:59:00|9021  |1227.1993|1227.215|", 2);
+    assertStdOutContains(stdOutErrPair, "|default |stock_ticks_cow              |false      |\n"
+        + "|default |stock_ticks_cow_bs           |false      |\n"
+        + "|default |stock_ticks_derived_mor_bs_ro|false      |\n"
+        + "|default |stock_ticks_derived_mor_bs_rt|false      |\n"
+        + "|default |stock_ticks_derived_mor_ro   |false      |\n"
+        + "|default |stock_ticks_derived_mor_rt   |false      |\n"
+        + "|default |stock_ticks_mor_bs_ro        |false      |\n"
+        + "|default |stock_ticks_mor_bs_rt        |false      |"
+        + "|default |stock_ticks_mor_ro           |false      |\n"
+        + "|default |stock_ticks_mor_rt           |false      |");
+    assertStdOutContains(stdOutErrPair, "|count(1)|\n+--------+\n|99     |", 4);
   }
 
   private void scheduleAndRunCompaction() throws Exception {
diff --git a/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java b/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java
index b51805f91b0e7..e80c66e9522a3 100644
--- a/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/DataSourceUtils.java
@@ -18,13 +18,18 @@
 
 package org.apache.hudi;
 
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hudi.client.HoodieReadClient;
 import org.apache.hudi.client.HoodieWriteClient;
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.model.HoodiePartitionMetadata;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.ReflectionUtils;
 import org.apache.hudi.common.util.StringUtils;
@@ -34,6 +39,7 @@
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieNotSupportedException;
 import org.apache.hudi.exception.TableNotFoundException;
+import org.apache.hudi.hadoop.HoodieHiveUtil;
 import org.apache.hudi.hive.HiveSyncConfig;
 import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor;
 import org.apache.hudi.index.HoodieIndex;
@@ -41,6 +47,8 @@
 import org.apache.hudi.table.UserDefinedBulkInsertPartitioner;
 
 import org.apache.avro.generic.GenericRecord;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 
@@ -55,6 +63,8 @@
  */
 public class DataSourceUtils {
 
+  private static final Logger LOG = LogManager.getLogger(DataSourceUtils.class);
+
   /**
    * Create a key generator class via reflection, passing in any configs needed.
    * <p>
@@ -212,4 +222,58 @@ public static HiveSyncConfig buildHiveSyncConfig(TypedProperties props, String b
             DataSourceWriteOptions.DEFAULT_HIVE_USE_JDBC_OPT_VAL()));
     return hiveSyncConfig;
   }
+
+  public static String getTablePath(FileSystem fs, Path[] paths) throws IOException {
+    LOG.info("Getting table path..");
+    for (Path path: paths) {
+      FileStatus fileStatus = fs.getFileStatus(path);
+      Option<Path> tablePath;
+
+      if (fileStatus.isFile()) {
+        tablePath = getTablePathFromFile(fs, fileStatus);
+      } else {
+        tablePath = getTablePathFromDir(fs, fileStatus);
+      }
+
+      if (tablePath.isPresent()) {
+        return tablePath.get().toString();
+      }
+    }
+
+    throw new TableNotFoundException("Cannot find Hudi table for the path provided");
+  }
+
+  private static Option<Path> getTablePathFromFile(FileSystem fs, FileStatus fileStatus) throws IOException {
+    LOG.info("Getting table path from file path : " + fileStatus.getPath());
+    Path filePath = fileStatus.getPath();
+    String filePathStr = filePath.toString();
+
+    if (filePathStr.contains("/" + HoodieTableMetaClient.METAFOLDER_NAME + "/")) {
+      // Handle file inside metadata folder
+      Path tablePath = new Path(filePathStr);
+      while (!tablePath.toString().endsWith(HoodieTableMetaClient.METAFOLDER_NAME)) {
+        tablePath = tablePath.getParent();
+      }
+      return Option.of(tablePath.getParent());
+    } else if (HoodiePartitionMetadata.hasPartitionMetadata(fs, filePath.getParent())) {
+      // Handle partition path
+      Path partitionPath = filePath.getParent();
+      HoodiePartitionMetadata metadata = new HoodiePartitionMetadata(fs, partitionPath);
+      metadata.readFromFS();
+      return Option.of(HoodieHiveUtil.getNthParent(partitionPath, metadata.getPartitionDepth()));
+    }
+
+    return Option.empty();
+  }
+
+  private static Option<Path> getTablePathFromDir(FileSystem fs, FileStatus fileStatus) throws IOException {
+    System.out.println("Getting table path from directory path : " + fileStatus.getPath().toString());
+    Path tablePath = new Path(fileStatus.getPath().toString());
+
+    while (tablePath != null && !fs.exists(new Path(tablePath, HoodieTableMetaClient.METAFOLDER_NAME))) {
+      tablePath = tablePath.getParent();
+    }
+
+    return tablePath == null ? Option.empty() : Option.of(tablePath);
+  }
 }
diff --git a/hudi-spark/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java
index e810ff1779dcb..af4504573ffb4 100644
--- a/hudi-spark/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java
@@ -38,6 +38,8 @@ public class ComplexKeyGenerator extends KeyGenerator {
 
   protected final boolean hiveStylePartitioning;
 
+  protected final boolean encodePartitionPath;
+
   public ComplexKeyGenerator(TypedProperties props) {
     super(props);
     this.recordKeyFields = Arrays.asList(props.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY()).split(","))
@@ -47,6 +49,8 @@ public ComplexKeyGenerator(TypedProperties props) {
                 .stream().map(String::trim).collect(Collectors.toList());
     this.hiveStylePartitioning = props.getBoolean(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING_OPT_KEY(),
         Boolean.parseBoolean(DataSourceWriteOptions.DEFAULT_HIVE_STYLE_PARTITIONING_OPT_VAL()));
+    this.encodePartitionPath = props.getBoolean(DataSourceWriteOptions.URL_ENCODE_PARTITIONING_OPT_KEY(),
+        Boolean.parseBoolean(DataSourceWriteOptions.DEFAULT_URL_ENCODE_PARTITIONING_OPT_VAL()));
   }
 
   @Override
diff --git a/hudi-spark/src/main/java/org/apache/hudi/keygen/SimpleKeyGenerator.java b/hudi-spark/src/main/java/org/apache/hudi/keygen/SimpleKeyGenerator.java
index a9df3ee751db6..8568d4eaccecf 100644
--- a/hudi-spark/src/main/java/org/apache/hudi/keygen/SimpleKeyGenerator.java
+++ b/hudi-spark/src/main/java/org/apache/hudi/keygen/SimpleKeyGenerator.java
@@ -37,12 +37,16 @@ public class SimpleKeyGenerator extends KeyGenerator {
 
   protected final boolean hiveStylePartitioning;
 
+  protected final boolean encodePartitionPath;
+
   public SimpleKeyGenerator(TypedProperties props) {
     super(props);
     this.recordKeyField = props.getString(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY());
     this.partitionPathField = props.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY());
     this.hiveStylePartitioning = props.getBoolean(DataSourceWriteOptions.HIVE_STYLE_PARTITIONING_OPT_KEY(),
         Boolean.parseBoolean(DataSourceWriteOptions.DEFAULT_HIVE_STYLE_PARTITIONING_OPT_VAL()));
+    this.encodePartitionPath = props.getBoolean(DataSourceWriteOptions.URL_ENCODE_PARTITIONING_OPT_KEY(),
+        Boolean.parseBoolean(DataSourceWriteOptions.DEFAULT_URL_ENCODE_PARTITIONING_OPT_VAL()));
   }
 
   @Override
diff --git a/hudi-spark/src/main/scala/org/apache/hudi/DataSourceOptions.scala b/hudi-spark/src/main/scala/org/apache/hudi/DataSourceOptions.scala
index 3d1172f0f6fa8..d7431473f9581 100644
--- a/hudi-spark/src/main/scala/org/apache/hudi/DataSourceOptions.scala
+++ b/hudi-spark/src/main/scala/org/apache/hudi/DataSourceOptions.scala
@@ -50,6 +50,8 @@ object DataSourceReadOptions {
   val QUERY_TYPE_INCREMENTAL_OPT_VAL = "incremental"
   val DEFAULT_QUERY_TYPE_OPT_VAL: String = QUERY_TYPE_SNAPSHOT_OPT_VAL
 
+  val READ_PATHS_OPT_KEY = "hoodie.datasource.read.paths"
+
   @Deprecated
   val VIEW_TYPE_OPT_KEY = "hoodie.datasource.view.type"
   @Deprecated
@@ -129,6 +131,7 @@ object DataSourceWriteOptions {
   val INSERT_OPERATION_OPT_VAL = "insert"
   val UPSERT_OPERATION_OPT_VAL = "upsert"
   val DELETE_OPERATION_OPT_VAL = "delete"
+  val BOOTSTRAP_OPERATION_OPT_VAL = "bootstrap"
   val DEFAULT_OPERATION_OPT_VAL = UPSERT_OPERATION_OPT_VAL
 
   /**
@@ -207,7 +210,8 @@ object DataSourceWriteOptions {
     */
   val HIVE_STYLE_PARTITIONING_OPT_KEY = "hoodie.datasource.write.hive_style_partitioning"
   val DEFAULT_HIVE_STYLE_PARTITIONING_OPT_VAL = "false"
-
+  val URL_ENCODE_PARTITIONING_OPT_KEY = "hoodie.datasource.write.partitionpath.urlencode"
+  val DEFAULT_URL_ENCODE_PARTITIONING_OPT_VAL = "false"
   /**
     * Key generator class, that implements will extract the key out of incoming record
     *
diff --git a/hudi-spark/src/main/scala/org/apache/hudi/DefaultSource.scala b/hudi-spark/src/main/scala/org/apache/hudi/DefaultSource.scala
index fbdd4ea9cfb1b..e003232c838a3 100644
--- a/hudi-spark/src/main/scala/org/apache/hudi/DefaultSource.scala
+++ b/hudi-spark/src/main/scala/org/apache/hudi/DefaultSource.scala
@@ -17,10 +17,16 @@
 
 package org.apache.hudi
 
+import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hudi.DataSourceReadOptions._
+import org.apache.hudi.DataSourceWriteOptions.{BOOTSTRAP_OPERATION_OPT_VAL, OPERATION_OPT_KEY}
+import org.apache.hudi.common.bootstrap.index.BootstrapIndex
+import org.apache.hudi.common.fs.FSUtils
+import org.apache.hudi.common.table.HoodieTableMetaClient
 import org.apache.hudi.exception.HoodieException
 import org.apache.hudi.hadoop.HoodieROTablePathFilter
 import org.apache.log4j.LogManager
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand
 import org.apache.spark.sql.execution.streaming.Sink
@@ -54,29 +60,54 @@ class DefaultSource extends RelationProvider
     val parameters = Map(QUERY_TYPE_OPT_KEY -> DEFAULT_QUERY_TYPE_OPT_VAL) ++ translateViewTypesToQueryTypes(optParams)
 
     val path = parameters.get("path")
-    if (path.isEmpty) {
-      throw new HoodieException("'path' must be specified.")
-    }
 
     if (parameters(QUERY_TYPE_OPT_KEY).equals(QUERY_TYPE_SNAPSHOT_OPT_VAL)) {
-      // this is just effectively RO view only, where `path` can contain a mix of
-      // non-hoodie/hoodie path files. set the path filter up
-      sqlContext.sparkContext.hadoopConfiguration.setClass(
-        "mapreduce.input.pathFilter.class",
-        classOf[HoodieROTablePathFilter],
-        classOf[org.apache.hadoop.fs.PathFilter])
-
-      log.info("Constructing hoodie (as parquet) data source with options :" + parameters)
-      log.warn("Snapshot view not supported yet via data source, for MERGE_ON_READ tables. " +
-        "Please query the Hive table registered using Spark SQL.")
-      // simply return as a regular parquet relation
-      DataSource.apply(
-        sparkSession = sqlContext.sparkSession,
-        userSpecifiedSchema = Option(schema),
-        className = "parquet",
-        options = parameters)
-        .resolveRelation()
+      val readPathsStr = parameters.get(DataSourceReadOptions.READ_PATHS_OPT_KEY)
+      if (path.isEmpty && readPathsStr.isEmpty) {
+        throw new HoodieException(s"'path' or '$READ_PATHS_OPT_KEY' or both must be specified.")
+      }
+
+      val readPaths = readPathsStr.map(p => p.split(",").toSeq).getOrElse(Seq())
+      val allPaths = path.map(p => Seq(p)).getOrElse(Seq()) ++ readPaths
+
+      val fs = FSUtils.getFs(allPaths.head, sqlContext.sparkContext.hadoopConfiguration)
+      val globPaths = checkAndGlobPathIfNecessary(allPaths, fs)
+
+      val tablePath = DataSourceUtils.getTablePath(fs, globPaths.toArray)
+      log.info("Obtained hudi table path: " + tablePath)
+
+      val metaClient = new HoodieTableMetaClient(fs.getConf, tablePath)
+      val bootstrapIndex = BootstrapIndex.getBootstrapIndex(metaClient)
+      log.info("Bootstrap Index Available: " + bootstrapIndex.isIndexAvailable)
+
+      if (bootstrapIndex.isIndexAvailable) {
+        // For bootstrapped tables, use our custom Spark relation for querying
+        new HudiBootstrapRelation(sqlContext, schema, globPaths, metaClient, optParams)
+      } else {
+        // this is just effectively RO view only, where `path` can contain a mix of
+        // non-hoodie/hoodie path files. set the path filter up
+        sqlContext.sparkContext.hadoopConfiguration.setClass(
+          "mapreduce.input.pathFilter.class",
+          classOf[HoodieROTablePathFilter],
+          classOf[org.apache.hadoop.fs.PathFilter])
+
+        log.info("Constructing hoodie (as parquet) data source with options :" + parameters)
+        log.warn("Snapshot view not supported yet via data source, for MERGE_ON_READ tables. " +
+          "Please query the Hive table registered using Spark SQL.")
+        // simply return as a regular parquet relation
+        DataSource.apply(
+          sparkSession = sqlContext.sparkSession,
+          paths = readPaths,
+          userSpecifiedSchema = Option(schema),
+          className = "parquet",
+          options = parameters)
+          .resolveRelation()
+      }
     } else if (parameters(QUERY_TYPE_OPT_KEY).equals(QUERY_TYPE_INCREMENTAL_OPT_VAL)) {
+      if (path.isEmpty) {
+        throw new HoodieException("'path' must be specified for incremental query.")
+      }
+
       new IncrementalRelation(sqlContext, path.get, optParams, schema)
     } else {
       throw new HoodieException("Invalid query type :" + parameters(QUERY_TYPE_OPT_KEY))
@@ -105,7 +136,12 @@ class DefaultSource extends RelationProvider
                               df: DataFrame): BaseRelation = {
 
     val parameters = HoodieSparkSqlWriter.parametersWithWriteDefaults(optParams)
-    HoodieSparkSqlWriter.write(sqlContext, mode, parameters, df)
+
+    if (parameters(OPERATION_OPT_KEY).equals(BOOTSTRAP_OPERATION_OPT_VAL)) {
+      HoodieSparkSqlWriter.bootstrap(sqlContext, mode, parameters, df)
+    } else {
+      HoodieSparkSqlWriter.write(sqlContext, mode, parameters, df)
+    }
 
     new HudiEmptyRelation(sqlContext, df.schema)
   }
@@ -122,5 +158,13 @@ class DefaultSource extends RelationProvider
       outputMode)
   }
 
+  private def checkAndGlobPathIfNecessary(paths: Seq[String], fs: FileSystem): Seq[Path] = {
+    paths.flatMap(path => {
+      val qualified = new Path(path).makeQualified(fs.getUri, fs.getWorkingDirectory)
+      val globPaths = SparkHadoopUtil.get.globPathIfNecessary(fs, qualified)
+      globPaths
+    })
+  }
+
   override def shortName(): String = "hudi"
 }
diff --git a/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala b/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
index e1bfe877559c5..b7e7bf063afee 100644
--- a/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
+++ b/hudi-spark/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
@@ -24,6 +24,7 @@ import org.apache.avro.generic.GenericRecord
 import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hudi.DataSourceWriteOptions._
+import org.apache.hudi.avro.HoodieAvroUtils
 import org.apache.hudi.client.{HoodieWriteClient, WriteStatus}
 import org.apache.hudi.common.config.TypedProperties
 import org.apache.hudi.common.fs.FSUtils
@@ -34,6 +35,7 @@ import org.apache.hudi.config.HoodieWriteConfig
 import org.apache.hudi.exception.HoodieException
 import org.apache.hudi.hive.{HiveSyncConfig, HiveSyncTool}
 import org.apache.log4j.LogManager
+import org.apache.spark.SparkContext
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}
@@ -60,7 +62,6 @@ private[hudi] object HoodieSparkSqlWriter {
       case Some(ser) if ser.equals("org.apache.spark.serializer.KryoSerializer") =>
       case _ => throw new HoodieException("hoodie only support org.apache.spark.serializer.KryoSerializer as spark.serializer")
     }
-    val tableType = parameters(TABLE_TYPE_OPT_KEY)
     val operation =
     // It does not make sense to allow upsert() operation if INSERT_DROP_DUPS_OPT_KEY is true
     // Auto-correct the operation to "insert" if OPERATION_OPT_KEY is set to "upsert" wrongly
@@ -112,25 +113,7 @@ private[hudi] object HoodieSparkSqlWriter {
           orderingVal, keyGenerator.getKey(gr), parameters(PAYLOAD_CLASS_OPT_KEY))
       }).toJavaRDD()
 
-      // Handle various save modes
-      if (mode == SaveMode.ErrorIfExists && exists) {
-        throw new HoodieException(s"hoodie table at $basePath already exists.")
-      }
-      if (mode == SaveMode.Ignore && exists) {
-        log.warn(s"hoodie table at $basePath already exists. Ignoring & not performing actual writes.")
-        (true, common.util.Option.empty())
-      }
-      if (mode == SaveMode.Overwrite && exists) {
-        log.warn(s"hoodie table at $basePath already exists. Deleting existing data & overwriting with new data.")
-        fs.delete(basePath, true)
-        exists = false
-      }
-
-      // Create the table if not present
-      if (!exists) {
-        HoodieTableMetaClient.initTableType(sparkContext.hadoopConfiguration, path.get, tableType,
-          tblName.get, "archived", parameters(PAYLOAD_CLASS_OPT_KEY))
-      }
+      initTable(mode, basePath, fs, exists, sparkContext, parameters)
 
       // Create a HoodieWriteClient & issue the write.
       val client = DataSourceUtils.createHoodieClient(jsc, schema.toString, path.get, tblName.get,
@@ -190,6 +173,37 @@ private[hudi] object HoodieSparkSqlWriter {
     (writeSuccessful, common.util.Option.ofNullable(instantTime))
   }
 
+  def bootstrap(sqlContext: SQLContext,
+                mode: SaveMode,
+                parameters: Map[String, String],
+                df: DataFrame): Unit = {
+
+    val sparkContext = sqlContext.sparkContext
+    val path = parameters.get("path")
+    val tableName = parameters.get(HoodieWriteConfig.TABLE_NAME)
+
+    var schema: String = null
+    if (df.schema.nonEmpty) {
+      val structName = s"${tableName.get}_record"
+      val nameSpace = s"hoodie.${tableName.get}"
+      schema = AvroConversionUtils.convertStructTypeToAvroSchema(df.schema, structName, nameSpace).toString
+    } else {
+      schema = HoodieAvroUtils.getNullSchema.toString
+    }
+
+    val basePath = new Path(parameters("path"))
+    val fs = basePath.getFileSystem(sparkContext.hadoopConfiguration)
+    val exists = fs.exists(new Path(basePath, HoodieTableMetaClient.METAFOLDER_NAME))
+
+    initTable(mode, basePath, fs, exists, sparkContext, parameters)
+
+    val jsc = new JavaSparkContext(sqlContext.sparkContext)
+    val writeClient = DataSourceUtils.createHoodieClient(jsc, schema, path.get, tableName.get,
+      mapAsJavaMap(parameters))
+    writeClient.bootstrap(org.apache.hudi.common.util.Option.empty())
+    syncHiveIfEnabled(basePath, jsc, parameters)
+  }
+
   /**
     * Add default options for unspecified write options keys.
     *
@@ -228,6 +242,42 @@ private[hudi] object HoodieSparkSqlWriter {
     props
   }
 
+  private def initTable(mode: SaveMode, basePath: Path, fs: FileSystem, tableExists: Boolean,
+                        sparkContext: SparkContext, parameters: Map[String, String]): Unit = {
+    val tableName = parameters.get(HoodieWriteConfig.TABLE_NAME)
+    val tableType = parameters(TABLE_TYPE_OPT_KEY)
+
+    // Handle various save modes
+    if (mode == SaveMode.ErrorIfExists && tableExists) {
+      throw new HoodieException(s"hoodie table at $basePath already exists.")
+    }
+    if (mode == SaveMode.Ignore && tableExists) {
+      log.warn(s"hoodie table at $basePath already exists. Ignoring & not performing actual writes.")
+      (true, common.util.Option.empty())
+    }
+    if (mode == SaveMode.Overwrite && tableExists) {
+      log.warn(s"hoodie table at $basePath already exists. Deleting existing data & overwriting with new data.")
+      fs.delete(basePath, true)
+    }
+
+    // Create the table if not present
+    if (!tableExists) {
+      HoodieTableMetaClient.initTableType(sparkContext.hadoopConfiguration, basePath.toString, tableType,
+        tableName.get, "archived", parameters(PAYLOAD_CLASS_OPT_KEY))
+    }
+  }
+
+  private def syncHiveIfEnabled(basePath: Path, jsc: JavaSparkContext, parameters: Map[String, String]): Boolean = {
+    val hiveSyncEnabled = parameters.get(HIVE_SYNC_ENABLED_OPT_KEY).exists(r => r.toBoolean)
+    if (hiveSyncEnabled) {
+      log.info("Syncing to Hive Metastore (URL: " + parameters(HIVE_URL_OPT_KEY) + ")")
+      val fs = FSUtils.getFs(basePath.toString, jsc.hadoopConfiguration)
+      syncHive(basePath, fs, parameters)
+    } else {
+      true
+    }
+  }
+
   private def syncHive(basePath: Path, fs: FileSystem, parameters: Map[String, String]): Boolean = {
     val hiveSyncConfig: HiveSyncConfig = buildSyncConfig(basePath, parameters)
     val hiveConf: HiveConf = new HiveConf()
@@ -279,16 +329,9 @@ private[hudi] object HoodieSparkSqlWriter {
         log.info("Commit " + instantTime + " failed!")
       }
 
-      val hiveSyncEnabled = parameters.get(HIVE_SYNC_ENABLED_OPT_KEY).exists(r => r.toBoolean)
-      val syncHiveSucess = if (hiveSyncEnabled) {
-        log.info("Syncing to Hive Metastore (URL: " + parameters(HIVE_URL_OPT_KEY) + ")")
-        val fs = FSUtils.getFs(basePath.toString, jsc.hadoopConfiguration)
-        syncHive(basePath, fs, parameters)
-      } else {
-        true
-      }
+      val syncHiveSuccess = syncHiveIfEnabled(basePath, jsc, parameters)
       client.close()
-      commitSuccess && syncHiveSucess
+      commitSuccess && syncHiveSuccess
     } else {
       log.error(s"$operation failed with $errorCount errors :")
       if (log.isTraceEnabled) {
diff --git a/hudi-spark/src/main/scala/org/apache/hudi/HudiBootstrapRDD.scala b/hudi-spark/src/main/scala/org/apache/hudi/HudiBootstrapRDD.scala
new file mode 100644
index 0000000000000..05c66821859e8
--- /dev/null
+++ b/hudi-spark/src/main/scala/org/apache/hudi/HudiBootstrapRDD.scala
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi
+
+import org.apache.spark.{Partition, TaskContext}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.datasources.PartitionedFile
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.vectorized.ColumnarBatch
+
+class HudiBootstrapRDD(@transient spark: SparkSession,
+                       dataReadFunction: PartitionedFile => Iterator[Any],
+                       skeletonReadFunction: PartitionedFile => Iterator[Any],
+                       regularReadFunction: PartitionedFile => Iterator[Any],
+                       dataSchema: StructType,
+                       skeletonSchema: StructType,
+                       requiredColumns: Array[String],
+                       tableState: HudiBootstrapTableState)
+  extends RDD[InternalRow](spark.sparkContext, Nil) {
+
+  override def compute(split: Partition, context: TaskContext): Iterator[InternalRow] = {
+    val bootstrapPartition = split.asInstanceOf[HudiBootstrapPartition]
+
+    if (bootstrapPartition.split.skeletonFile.isDefined) {
+      logInfo("Got Split => Index: " + bootstrapPartition.index + ", Data File: "
+        + bootstrapPartition.split.dataFile.filePath + ", Skeleton File: "
+        + bootstrapPartition.split.skeletonFile.get.filePath)
+    } else {
+      logInfo("Got Split => Index: " + bootstrapPartition.index + ", Data File: "
+        + bootstrapPartition.split.dataFile.filePath)
+    }
+
+    var partitionedFileIterator: Iterator[InternalRow] = null
+
+    if (bootstrapPartition.split.skeletonFile.isDefined) {
+      val dataFileIterator = read(bootstrapPartition.split.dataFile, dataReadFunction)
+      val skeletonFileIterator = read(bootstrapPartition.split.skeletonFile.get,  skeletonReadFunction)
+      partitionedFileIterator = merge(skeletonFileIterator, dataFileIterator)
+    } else {
+      partitionedFileIterator = read(bootstrapPartition.split.dataFile, regularReadFunction)
+    }
+
+    partitionedFileIterator
+  }
+
+  def merge(skeletonFileIterator: Iterator[InternalRow], dataFileIterator: Iterator[InternalRow])
+  : Iterator[InternalRow] = {
+    new Iterator[InternalRow] {
+      override def hasNext: Boolean = dataFileIterator.hasNext && skeletonFileIterator.hasNext
+      override def next(): InternalRow = {
+        mergeInternalRow(skeletonFileIterator.next(), dataFileIterator.next())
+      }
+    }
+  }
+
+  def mergeInternalRow(skeletonRow: InternalRow, dataRow: InternalRow): InternalRow = {
+    val skeletonArr  = skeletonRow.copy().toSeq(skeletonSchema)
+    val dataArr = dataRow.copy().toSeq(dataSchema)
+    // We need to return it in the order requested
+    val mergedArr = requiredColumns.map(col => {
+      if (skeletonSchema.fieldNames.contains(col)) {
+        val idx = skeletonSchema.fieldIndex(col)
+        skeletonArr(idx)
+      } else {
+        val idx = dataSchema.fieldIndex(col)
+        dataArr(idx)
+      }
+    })
+
+    logDebug("Merged data and skeleton values => " + mergedArr.mkString(","))
+    val mergedRow = InternalRow.fromSeq(mergedArr)
+    mergedRow
+  }
+
+  def read(partitionedFile: PartitionedFile, readFileFunction: PartitionedFile => Iterator[Any])
+    : Iterator[InternalRow] = {
+    val fileIterator = readFileFunction(partitionedFile)
+
+    import scala.collection.JavaConverters._
+
+    val rows = fileIterator.flatMap(_ match {
+      case r: InternalRow => Seq(r)
+      case b: ColumnarBatch => b.rowIterator().asScala
+    })
+    rows
+  }
+
+  override protected def getPartitions: Array[Partition] = {
+    logInfo("Getting partitions..")
+
+    tableState.files.zipWithIndex.map(file => {
+      if (file._1.skeletonFile.isDefined) {
+        logInfo("Forming partition with => " + file._2 + "," + file._1.dataFile.filePath
+          + "," + file._1.skeletonFile.get.filePath)
+        HudiBootstrapPartition(file._2, file._1)
+      } else {
+        logInfo("Forming partition with => " + file._2 + "," + file._1.dataFile.filePath)
+        HudiBootstrapPartition(file._2, file._1)
+      }
+    }).toArray
+  }
+}
+
+case class HudiBootstrapPartition(index: Int, split: HudiBootstrapSplit) extends Partition
diff --git a/hudi-spark/src/main/scala/org/apache/hudi/HudiBootstrapRelation.scala b/hudi-spark/src/main/scala/org/apache/hudi/HudiBootstrapRelation.scala
new file mode 100644
index 0000000000000..b7aa438129e66
--- /dev/null
+++ b/hudi-spark/src/main/scala/org/apache/hudi/HudiBootstrapRelation.scala
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi
+
+import org.apache.hadoop.fs.Path
+import org.apache.hudi.common.model.{HoodieBaseFile, HoodieRecord}
+import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
+import org.apache.hudi.common.table.view.HoodieTableFileSystemView
+import org.apache.spark.internal.Logging
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.datasources.{FileStatusCache, InMemoryFileIndex, PartitionedFile}
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+import org.apache.spark.sql.{Row, SQLContext}
+import org.apache.spark.sql.sources.{BaseRelation, Filter, PrunedFilteredScan}
+import org.apache.spark.sql.types.{StringType, StructField, StructType}
+
+import scala.collection.JavaConverters._
+
+class HudiBootstrapRelation(@transient val _sqlContext: SQLContext,
+                            val userSchema: StructType,
+                            val globPaths: Seq[Path],
+                            val metaClient: HoodieTableMetaClient,
+                            val optParams: Map[String, String]) extends BaseRelation
+  with PrunedFilteredScan with Logging {
+
+  val fileIndex: HudiBootstrapFileIndex = buildFileIndex()
+
+  val skeletonSchema: StructType = StructType(Seq(
+    StructField(HoodieRecord.COMMIT_TIME_METADATA_FIELD, StringType, nullable = true),
+    StructField(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, StringType, nullable = true),
+    StructField(HoodieRecord.RECORD_KEY_METADATA_FIELD, StringType, nullable = true),
+    StructField(HoodieRecord.PARTITION_PATH_METADATA_FIELD, StringType, nullable = true),
+    StructField(HoodieRecord.FILENAME_METADATA_FIELD, StringType, nullable = true)
+  ))
+
+  var dataSchema: StructType = _
+
+  var completeSchema: StructType = _
+
+  override def sqlContext: SQLContext = _sqlContext
+
+  override val needConversion: Boolean = false
+
+  override def schema: StructType = inferFullSchema()
+
+  /**
+    * Implementing PrunedScan to support column pruning, by reading only the required columns from the parquet files
+    * instead by passing them down to the ParquetFileFormat.
+    *
+    * TODO: To get better performance with Filters we should implement PrunedFilteredScan push filters down to the
+    * parquet files. But this is much more tricky to implement because then with filters being pushed down, unequal
+    * number od rows may be returned by external data reader, and skeleton file readers. Merging in this scenario
+    * will become much more complicated.
+    *
+    * @param requiredColumns This contains the columns user has passed in select() or filter() operations on the
+    *                        dataframe
+    * @return
+    */
+  override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = {
+    logInfo("Starting scan..")
+    filters.foreach(filter => logInfo("Obtained filter: " + filter.references.mkString(",") + " "
+      + filter.getClass))
+
+    // Compute splits
+    val bootstrapSplits = fileIndex.files.map(hoodieBaseFile => {
+      var skeletonFile: Option[PartitionedFile] = Option.empty
+      var dataFile: PartitionedFile = null
+
+      if (hoodieBaseFile.getExternalBaseFile.isPresent) {
+        skeletonFile = Option(PartitionedFile(InternalRow.empty, hoodieBaseFile.getPath, 0, hoodieBaseFile.getFileLen))
+        dataFile = PartitionedFile(InternalRow.empty, hoodieBaseFile.getExternalBaseFile.get().getPath, 0,
+          hoodieBaseFile.getExternalBaseFile.get().getFileLen)
+      } else {
+        dataFile = PartitionedFile(InternalRow.empty, hoodieBaseFile.getPath, 0, hoodieBaseFile.getFileLen)
+      }
+      HudiBootstrapSplit(dataFile, skeletonFile)
+    })
+    val tableState = HudiBootstrapTableState(bootstrapSplits)
+
+    // Get required schemas for column pruning
+    val requiredDataSchema = StructType(dataSchema.filter(field => requiredColumns.contains(field.name)))
+    val requiredSkeletonSchema = StructType(skeletonSchema.filter(field => requiredColumns.contains(field.name)))
+    val requiredRegularSchema = StructType(requiredColumns.map(col => {
+      completeSchema.find(_.name == col).get
+    }))
+
+    // Prepare readers for reading data file and skeleton files
+    val dataReadFunction = new ParquetFileFormat()
+        .buildReaderWithPartitionValues(
+          sparkSession = _sqlContext.sparkSession,
+          dataSchema = dataSchema,
+          partitionSchema = StructType(Seq.empty),
+          requiredSchema = requiredDataSchema,
+          filters = Nil,
+          options = Map.empty,
+          hadoopConf = _sqlContext.sparkSession.sessionState.newHadoopConf()
+        )
+
+    val skeletonReadFunction = new ParquetFileFormat()
+      .buildReaderWithPartitionValues(
+        sparkSession = _sqlContext.sparkSession,
+        dataSchema = skeletonSchema,
+        partitionSchema = StructType(Seq.empty),
+        requiredSchema = requiredSkeletonSchema,
+        filters = Nil,
+        options = Map.empty,
+        hadoopConf = _sqlContext.sparkSession.sessionState.newHadoopConf()
+      )
+
+    val regularReadFunction = new ParquetFileFormat()
+      .buildReaderWithPartitionValues(
+        sparkSession = _sqlContext.sparkSession,
+        dataSchema = completeSchema,
+        partitionSchema = StructType(Seq.empty),
+        requiredSchema = requiredRegularSchema,
+        filters = filters,
+        options = Map.empty,
+        hadoopConf = _sqlContext.sparkSession.sessionState.newHadoopConf())
+
+    val rdd = new HudiBootstrapRDD(_sqlContext.sparkSession, dataReadFunction, skeletonReadFunction,
+      regularReadFunction, requiredDataSchema, requiredSkeletonSchema, requiredColumns, tableState)
+
+    logInfo("Number of partitions for HudiBootstrapRDD => " + rdd.partitions.length)
+    rdd.asInstanceOf[RDD[Row]]
+  }
+
+  def inferFullSchema(): StructType = {
+    if (completeSchema == null) {
+      logInfo("Inferring schema..")
+      val schemaResolver = new TableSchemaResolver(metaClient)
+      val tableSchema = schemaResolver.getTableAvroSchemaWithoutMetadataFields
+      dataSchema = AvroConversionUtils.convertAvroSchemaToStructType(tableSchema)
+      completeSchema = StructType(skeletonSchema.fields ++ dataSchema.fields)
+    }
+    completeSchema
+  }
+
+  def buildFileIndex(): HudiBootstrapFileIndex = {
+    logInfo("Building file index..")
+    val inMemoryFileIndex = createInMemoryFileIndex(globPaths)
+    val fileStatuses = inMemoryFileIndex.allFiles()
+
+    if (fileStatuses.isEmpty) {
+      throw new RuntimeException("No files found for reading.")
+    }
+
+    val fsView = new HoodieTableFileSystemView(metaClient, metaClient.getActiveTimeline.getCommitsTimeline
+      .filterCompletedInstants, fileStatuses.toArray)
+    val latestFiles: List[HoodieBaseFile] = fsView.getLatestBaseFiles.iterator().asScala.toList
+    latestFiles.foreach(file => logInfo("Skeleton file path: " + file.getPath))
+    latestFiles.filter(_.getExternalBaseFile.isPresent).foreach(file => {
+      logInfo("External data file path: " + file.getExternalBaseFile.get().getPath)
+    })
+
+    HudiBootstrapFileIndex(latestFiles)
+  }
+
+  private def createInMemoryFileIndex(globbedPaths: Seq[Path]): InMemoryFileIndex = {
+    val fileStatusCache = FileStatusCache.getOrCreate(_sqlContext.sparkSession)
+    new InMemoryFileIndex(_sqlContext.sparkSession, globbedPaths, Map(), Option.empty, fileStatusCache)
+  }
+}
+
+case class HudiBootstrapFileIndex(files: List[HoodieBaseFile])
+
+case class HudiBootstrapTableState(files: List[HudiBootstrapSplit])
+
+case class HudiBootstrapSplit(dataFile: PartitionedFile, skeletonFile: Option[PartitionedFile])
diff --git a/hudi-spark/src/main/scala/org/apache/hudi/IncrementalRelation.scala b/hudi-spark/src/main/scala/org/apache/hudi/IncrementalRelation.scala
index 436895bda3499..e796900544e49 100644
--- a/hudi-spark/src/main/scala/org/apache/hudi/IncrementalRelation.scala
+++ b/hudi-spark/src/main/scala/org/apache/hudi/IncrementalRelation.scala
@@ -17,19 +17,24 @@
 
 package org.apache.hudi
 
+import com.google.common.collect.Lists
+import org.apache.avro.Schema
 import org.apache.hadoop.fs.GlobPattern
 import org.apache.hadoop.fs.Path
 import org.apache.hudi.avro.HoodieAvroUtils
+import org.apache.hudi.common.bootstrap.index.BootstrapIndex
 import org.apache.hudi.common.model.{HoodieCommitMetadata, HoodieRecord, HoodieTableType}
 import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
+import org.apache.hudi.common.table.timeline.HoodieTimeline
+import org.apache.hudi.common.util.ParquetUtils
 import org.apache.hudi.config.HoodieWriteConfig
 import org.apache.hudi.exception.HoodieException
 import org.apache.hudi.table.HoodieTable
 import org.apache.log4j.LogManager
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.sources.{BaseRelation, TableScan}
-import org.apache.spark.sql.types.StructType
-import org.apache.spark.sql.{Row, SQLContext}
+import org.apache.spark.sql.types.{StringType, StructField, StructType}
+import org.apache.spark.sql.{DataFrame, Row, SQLContext}
 
 import scala.collection.JavaConversions._
 import scala.collection.mutable
@@ -47,6 +52,14 @@ class IncrementalRelation(val sqlContext: SQLContext,
 
   private val log = LogManager.getLogger(classOf[IncrementalRelation])
 
+  val skeletonSchema: StructType = StructType(Seq(
+    StructField(HoodieRecord.COMMIT_TIME_METADATA_FIELD, StringType, nullable = true),
+    StructField(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, StringType, nullable = true),
+    StructField(HoodieRecord.RECORD_KEY_METADATA_FIELD, StringType, nullable = true),
+    StructField(HoodieRecord.PARTITION_PATH_METADATA_FIELD, StringType, nullable = true),
+    StructField(HoodieRecord.FILENAME_METADATA_FIELD, StringType, nullable = true)
+  ))
+
   private val metaClient = new HoodieTableMetaClient(sqlContext.sparkContext.hadoopConfiguration, basePath, true)
   // MOR tables not supported yet
   if (metaClient.getTableType.equals(HoodieTableType.MERGE_ON_READ)) {
@@ -71,13 +84,16 @@ class IncrementalRelation(val sqlContext: SQLContext,
     optParams.getOrElse(DataSourceReadOptions.END_INSTANTTIME_OPT_KEY, lastInstant.getTimestamp))
     .getInstants.iterator().toList
 
-  // use schema from latest metadata, if not present, read schema from the data file
-  private val latestSchema = {
-    val schemaUtil = new TableSchemaResolver(metaClient)
-    val tableSchema = HoodieAvroUtils.createHoodieWriteSchema(schemaUtil.getTableAvroSchemaWithoutMetadataFields);
-    AvroConversionUtils.convertAvroSchemaToStructType(tableSchema)
+  // use schema from a file produced in the latest instant
+  val latestSchema: StructType = {
+    log.info("Inferring schema..")
+    val schemaResolver = new TableSchemaResolver(metaClient)
+    val tableSchema = schemaResolver.getTableAvroSchemaWithoutMetadataFields
+    val dataSchema = AvroConversionUtils.convertAvroSchemaToStructType(tableSchema)
+    StructType(skeletonSchema.fields ++ dataSchema.fields)
   }
 
+
   private val filters = {
     if (optParams.contains(DataSourceReadOptions.PUSH_DOWN_INCR_FILTERS_OPT_KEY)) {
       val filterStr = optParams.getOrElse(
@@ -92,36 +108,69 @@ class IncrementalRelation(val sqlContext: SQLContext,
   override def schema: StructType = latestSchema
 
   override def buildScan(): RDD[Row] = {
-    val fileIdToFullPath = mutable.HashMap[String, String]()
+    val regularFileIdToFullPath = mutable.HashMap[String, String]()
+    var metaBootstrapFileIdToFullPath = mutable.HashMap[String, String]()
+
     for (commit <- commitsToReturn) {
       val metadata: HoodieCommitMetadata = HoodieCommitMetadata.fromBytes(commitTimeline.getInstantDetails(commit)
         .get, classOf[HoodieCommitMetadata])
-      fileIdToFullPath ++= metadata.getFileIdAndFullPaths(basePath).toMap
+
+      if (HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS == commit.getTimestamp) {
+        metaBootstrapFileIdToFullPath ++= metadata.getFileIdAndFullPaths(basePath).toMap
+      } else {
+        regularFileIdToFullPath ++= metadata.getFileIdAndFullPaths(basePath).toMap
+      }
+    }
+
+    if (metaBootstrapFileIdToFullPath.nonEmpty) {
+      // filer out meta bootstrap files that have had more commits since metadata bootstrap
+      metaBootstrapFileIdToFullPath = metaBootstrapFileIdToFullPath
+        .filterNot(fileIdFullPath => regularFileIdToFullPath.contains(fileIdFullPath._1))
     }
+
     val pathGlobPattern = optParams.getOrElse(
       DataSourceReadOptions.INCR_PATH_GLOB_OPT_KEY,
       DataSourceReadOptions.DEFAULT_INCR_PATH_GLOB_OPT_VAL)
-    val filteredFullPath = if(!pathGlobPattern.equals(DataSourceReadOptions.DEFAULT_INCR_PATH_GLOB_OPT_VAL)) {
-      val globMatcher = new GlobPattern("*" + pathGlobPattern)
-      fileIdToFullPath.filter(p => globMatcher.matches(p._2))
-    } else {
-      fileIdToFullPath
+    val (filteredRegularFullPaths, filteredMetaBootstrapFullPaths) = {
+      if(!pathGlobPattern.equals(DataSourceReadOptions.DEFAULT_INCR_PATH_GLOB_OPT_VAL)) {
+        val globMatcher = new GlobPattern("*" + pathGlobPattern)
+        (regularFileIdToFullPath.filter(p => globMatcher.matches(p._2)).values,
+          metaBootstrapFileIdToFullPath.filter(p => globMatcher.matches(p._2)).values)
+      } else {
+        (regularFileIdToFullPath.values, metaBootstrapFileIdToFullPath.values)
+      }
     }
     // unset the path filter, otherwise if end_instant_time is not the latest instant, path filter set for RO view
     // will filter out all the files incorrectly.
     sqlContext.sparkContext.hadoopConfiguration.unset("mapreduce.input.pathFilter.class")
     val sOpts = optParams.filter(p => !p._1.equalsIgnoreCase("path"))
-    if (filteredFullPath.isEmpty) {
+    if (filteredRegularFullPaths.isEmpty && filteredMetaBootstrapFullPaths.isEmpty) {
       sqlContext.sparkContext.emptyRDD[Row]
     } else {
       log.info("Additional Filters to be applied to incremental source are :" + filters)
-      filters.foldLeft(sqlContext.read.options(sOpts)
-        .schema(latestSchema)
-        .parquet(filteredFullPath.values.toList: _*)
-        .filter(String.format("%s >= '%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitsToReturn.head.getTimestamp))
-        .filter(String.format("%s <= '%s'",
-          HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitsToReturn.last.getTimestamp)))((e, f) => e.filter(f))
-        .toDF().rdd
+
+      var df: DataFrame = sqlContext.createDataFrame(sqlContext.sparkContext.emptyRDD[Row], latestSchema)
+
+      if (metaBootstrapFileIdToFullPath.nonEmpty) {
+        df = sqlContext.sparkSession.read
+               .format("hudi")
+               .schema(latestSchema)
+               .option(DataSourceReadOptions.READ_PATHS_OPT_KEY, filteredMetaBootstrapFullPaths.mkString(","))
+               .load()
+      }
+
+      if (regularFileIdToFullPath.nonEmpty)
+      {
+        df = df.union(sqlContext.read.options(sOpts)
+                        .schema(latestSchema)
+                        .parquet(filteredRegularFullPaths.toList: _*)
+                        .filter(String.format("%s >= '%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD,
+                          commitsToReturn.head.getTimestamp))
+                        .filter(String.format("%s <= '%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD,
+                          commitsToReturn.last.getTimestamp)))
+      }
+
+      filters.foldLeft(df)((e, f) => e.filter(f)).rdd
     }
   }
 }
diff --git a/hudi-spark/src/test/java/org/apache/hudi/client/TestBootstrap.java b/hudi-spark/src/test/java/org/apache/hudi/client/TestBootstrap.java
index 495e8b057897b..e8eefdcdd61ee 100644
--- a/hudi-spark/src/test/java/org/apache/hudi/client/TestBootstrap.java
+++ b/hudi-spark/src/test/java/org/apache/hudi/client/TestBootstrap.java
@@ -114,6 +114,11 @@ public void setUp() throws Exception {
 
     srcPath = tmpFolder.toAbsolutePath().toString() + "/data";
 
+    // initialize parquet input format
+    reloadInputFormats();
+  }
+
+  private void reloadInputFormats() {
     // initialize parquet input format
     roInputFormat = new HoodieParquetInputFormat();
     roJobConf = new JobConf(jsc.hadoopConfiguration());
@@ -165,7 +170,7 @@ public void testMetadataBootstrapUnpartitionedCOW() throws Exception {
         .withSchema(schema.toString())
         .withBootstrapModeSelector(MetadataOnlyBootstrapModeSelector.class.getName()).build();
     HoodieWriteClient client = new HoodieWriteClient(jsc, config);
-    client.bootstrap();
+    client.bootstrap(Option.empty());
     checkBootstrapResults(totalRecords, schema, HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS, true, 1, timestamp,
         timestamp, false);
 
@@ -183,7 +188,7 @@ public void testMetadataBootstrapUnpartitionedCOW() throws Exception {
 
     // Run bootstrap again
     client = new HoodieWriteClient(jsc, config);
-    client.bootstrap();
+    client.bootstrap(Option.empty());
 
     metaClient.reloadActiveTimeline();
     index = BootstrapIndex.getBootstrapIndex(metaClient);
@@ -223,7 +228,7 @@ public void testMetadataBootstrapWithUpdatesCOW() throws Exception {
         .withBootstrapModeSelector(MetadataOnlyBootstrapModeSelector.class.getName())
         .build();
     HoodieWriteClient client = new HoodieWriteClient(jsc, config);
-    client.bootstrap();
+    client.bootstrap(Option.empty());
     checkBootstrapResults(totalRecords, schema, HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS, true, 1, timestamp,
         timestamp, false);
 
@@ -241,7 +246,7 @@ public void testMetadataBootstrapWithUpdatesCOW() throws Exception {
 
     // Run bootstrap again
     client = new HoodieWriteClient(jsc, config);
-    client.bootstrap();
+    client.bootstrap(Option.empty());
 
     metaClient.reloadActiveTimeline();
     index = BootstrapIndex.getBootstrapIndex(metaClient);
@@ -282,7 +287,7 @@ public void testMetadataBootstrapWithUpdatesMOR() throws Exception {
         .withBootstrapModeSelector(MetadataOnlyBootstrapModeSelector.class.getName()).build();
     System.out.println("Config Props :" + config.getProps().getProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY()));
     HoodieWriteClient client = new HoodieWriteClient(jsc, config);
-    client.bootstrap();
+    client.bootstrap(Option.empty());
     checkBootstrapResults(totalRecords, schema, HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS, true, 1,
         timestamp, timestamp, false);
     // Rollback Bootstrap
@@ -299,7 +304,7 @@ public void testMetadataBootstrapWithUpdatesMOR() throws Exception {
 
     // Run bootstrap again
     client = new HoodieWriteClient(jsc, config);
-    client.bootstrap();
+    client.bootstrap(Option.empty());
 
     metaClient.reloadActiveTimeline();
     index = BootstrapIndex.getBootstrapIndex(metaClient);
@@ -341,7 +346,7 @@ public void testFullBoostrapOnlyCOW() throws Exception {
         .withFullBootstrapInputProvider(FullTestBootstrapInputProvider.class.getName())
         .withBootstrapModeSelector(FullBootstrapModeSelector.class.getName()).build();
     HoodieWriteClient client = new HoodieWriteClient(jsc, config);
-    client.bootstrap();
+    client.bootstrap(Option.empty());
     checkBootstrapResults(totalRecords, schema, HoodieTimeline.FULL_BOOTSTRAP_INSTANT_TS, false, 1, timestamp,
         timestamp, false);
     // Rollback Bootstrap
@@ -358,7 +363,7 @@ public void testFullBoostrapOnlyCOW() throws Exception {
 
     // Run bootstrap again
     client = new HoodieWriteClient(jsc, config);
-    client.bootstrap();
+    client.bootstrap(Option.empty());
 
     metaClient.reloadActiveTimeline();
     index = BootstrapIndex.getBootstrapIndex(metaClient);
@@ -395,7 +400,7 @@ public void testFullBootstrapWithUpdatesMOR() throws Exception {
         .withFullBootstrapInputProvider(FullTestBootstrapInputProvider.class.getName())
         .withBootstrapModeSelector(FullBootstrapModeSelector.class.getName()).build();
     HoodieWriteClient client = new HoodieWriteClient(jsc, config);
-    client.bootstrap();
+    client.bootstrap(Option.empty());
     checkBootstrapResults(totalRecords, schema, HoodieTimeline.FULL_BOOTSTRAP_INSTANT_TS, false, 1, timestamp,
         timestamp, false);
     // Rollback Bootstrap
@@ -412,7 +417,7 @@ public void testFullBootstrapWithUpdatesMOR() throws Exception {
 
     // Run bootstrap again
     client = new HoodieWriteClient(jsc, config);
-    client.bootstrap();
+    client.bootstrap(Option.empty());
 
     metaClient.reloadActiveTimeline();
     index = BootstrapIndex.getBootstrapIndex(metaClient);
@@ -454,7 +459,7 @@ public void testMetaAndFullBoostrapCOW() throws Exception {
         .withFullBootstrapInputProvider(FullTestBootstrapInputProvider.class.getName())
         .withBootstrapModeSelector(TestRandomBootstapModeSelector.class.getName()).build();
     HoodieWriteClient client = new HoodieWriteClient(jsc, config);
-    client.bootstrap();
+    client.bootstrap(Option.empty());
     checkBootstrapResults(totalRecords, schema, HoodieTimeline.FULL_BOOTSTRAP_INSTANT_TS, false, 2, 2,
         timestamp, timestamp, false,
         Arrays.asList(HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS, HoodieTimeline.FULL_BOOTSTRAP_INSTANT_TS));
@@ -472,7 +477,7 @@ public void testMetaAndFullBoostrapCOW() throws Exception {
 
     // Run bootstrap again
     client = new HoodieWriteClient(jsc, config);
-    client.bootstrap();
+    client.bootstrap(Option.empty());
 
     metaClient.reloadActiveTimeline();
     index = BootstrapIndex.getBootstrapIndex(metaClient);
@@ -509,7 +514,7 @@ public void testMetadataAndFullBootstrapWithUpdatesMOR() throws Exception {
         .withFullBootstrapInputProvider(FullTestBootstrapInputProvider.class.getName())
         .withBootstrapModeSelector(TestRandomBootstapModeSelector.class.getName()).build();
     HoodieWriteClient client = new HoodieWriteClient(jsc, config);
-    client.bootstrap();
+    client.bootstrap(Option.empty());
     checkBootstrapResults(totalRecords, schema, HoodieTimeline.FULL_BOOTSTRAP_INSTANT_TS, false, 2, 2,
         timestamp, timestamp, false,
         Arrays.asList(HoodieTimeline.METADATA_BOOTSTRAP_INSTANT_TS, HoodieTimeline.FULL_BOOTSTRAP_INSTANT_TS));
@@ -527,7 +532,7 @@ public void testMetadataAndFullBootstrapWithUpdatesMOR() throws Exception {
 
     // Run bootstrap again
     client = new HoodieWriteClient(jsc, config);
-    client.bootstrap();
+    client.bootstrap(Option.empty());
 
     metaClient.reloadActiveTimeline();
     index = BootstrapIndex.getBootstrapIndex(metaClient);
@@ -593,6 +598,7 @@ private void checkBootstrapResults(int totalRecords, Schema schema, String insta
     }
 
     // RO Input Format Read
+    reloadInputFormats();
     List<GenericRecord> records = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(
         FSUtils.getAllPartitionPaths(metaClient.getFs(), basePath, false).stream()
             .map(f -> basePath + "/" + f).collect(Collectors.toList()),
@@ -609,6 +615,7 @@ private void checkBootstrapResults(int totalRecords, Schema schema, String insta
     assertEquals(totalRecords, seenKeys.size());
 
     //RT Input Format Read
+    reloadInputFormats();
     seenKeys = new HashSet<>();
     records = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(
         FSUtils.getAllPartitionPaths(metaClient.getFs(), basePath, false).stream()
@@ -752,10 +759,8 @@ public Map<BootstrapMode, List<String>> select(List<Pair<String, List<HoodieFile
       partitions.stream().forEach(p -> {
         final BootstrapMode mode;
         if (currIdx == 0) {
-          System.out.println("METADATA bootstrap selected");
           mode = BootstrapMode.METADATA_ONLY_BOOTSTRAP;
         } else {
-          System.out.println("FULL bootstrap selected");
           mode = BootstrapMode.FULL_BOOTSTRAP;
         }
         currIdx = (currIdx + 1) % 2;
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/checkpointing/InitialCheckPointProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/checkpointing/InitialCheckPointProvider.java
index 4cdc01ece6468..7fc8afb548034 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/checkpointing/InitialCheckPointProvider.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/checkpointing/InitialCheckPointProvider.java
@@ -18,12 +18,12 @@
 
 package org.apache.hudi.utilities.checkpointing;
 
-import org.apache.hudi.common.config.TypedProperties;
-import org.apache.hudi.exception.HoodieException;
-
+import java.io.IOException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.exception.HoodieException;
 
 /**
  * Provide the initial checkpoint for delta streamer.
@@ -51,7 +51,13 @@ public InitialCheckPointProvider(TypedProperties props) {
    *
    * @param config Hadoop configuration
    */
-  public abstract void init(Configuration config) throws HoodieException;
+  public void init(Configuration config) throws HoodieException {
+    try {
+      this.fs = FileSystem.get(config);
+    } catch (IOException e) {
+      throw new HoodieException("CheckpointProvider initialization failed");
+    }
+  }
 
   /**
    * Get checkpoint string recognizable for delta streamer.
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/checkpointing/InitialCheckpointFromAnotherHoodieTimelineProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/checkpointing/InitialCheckpointFromAnotherHoodieTimelineProvider.java
new file mode 100644
index 0000000000000..17058da7fddc6
--- /dev/null
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/checkpointing/InitialCheckpointFromAnotherHoodieTimelineProvider.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.utilities.checkpointing;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.model.HoodieCommitMetadata;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.exception.HoodieException;
+
+import java.io.IOException;
+import java.util.Objects;
+
+import static org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.CHECKPOINT_KEY;
+
+/**
+ * This is used to set a checkpoint from latest commit of another (mirror) hudi dataset.
+ * Used by integration test.
+ */
+public class InitialCheckpointFromAnotherHoodieTimelineProvider extends InitialCheckPointProvider {
+
+  private HoodieTableMetaClient anotherDsHoodieMetaclient;
+
+  public InitialCheckpointFromAnotherHoodieTimelineProvider(TypedProperties props) {
+    super(props);
+  }
+
+  @Override
+  public void init(Configuration config) throws HoodieException {
+    super.init(config);
+    this.anotherDsHoodieMetaclient = new HoodieTableMetaClient(config, path.toString());
+  }
+
+  @Override
+  public String getCheckpoint() throws HoodieException {
+    return anotherDsHoodieMetaclient.getCommitsTimeline().filterCompletedInstants().getReverseOrderedInstants()
+        .map(instant -> {
+          try {
+            HoodieCommitMetadata commitMetadata = HoodieCommitMetadata
+                .fromBytes(anotherDsHoodieMetaclient.getActiveTimeline().getInstantDetails(instant).get(),
+                    HoodieCommitMetadata.class);
+            return commitMetadata.getMetadata(CHECKPOINT_KEY);
+          } catch (IOException e) {
+            return null;
+          }
+        }).filter(Objects::nonNull).findFirst().get();
+  }
+}
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/checkpointing/KafkaConnectHdfsProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/checkpointing/KafkaConnectHdfsProvider.java
index 8e8af55a3c563..654836c2a68e3 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/checkpointing/KafkaConnectHdfsProvider.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/checkpointing/KafkaConnectHdfsProvider.java
@@ -21,9 +21,7 @@
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.exception.HoodieException;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
 
@@ -44,15 +42,6 @@ public KafkaConnectHdfsProvider(TypedProperties props) {
     super(props);
   }
 
-  @Override
-  public void init(Configuration config) throws HoodieException {
-    try {
-      this.fs = FileSystem.get(config);
-    } catch (IOException e) {
-      throw new HoodieException("KafkaConnectHdfsProvider initialization failed");
-    }
-  }
-
   /**
    * PathFilter for Kafka-Connect-HDFS.
    * Directory format: /partition1=xxx/partition2=xxx
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java
index 8a8d6780f4246..c5f5e70b7c934 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java
@@ -75,6 +75,8 @@
 
 import scala.collection.JavaConversions;
 
+import static org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.CHECKPOINT_KEY;
+import static org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.CHECKPOINT_RESET_KEY;
 import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_NAMESPACE;
 import static org.apache.hudi.utilities.schema.RowBasedSchemaProvider.HOODIE_RECORD_STRUCT_NAME;
 
@@ -85,8 +87,6 @@ public class DeltaSync implements Serializable {
 
   private static final long serialVersionUID = 1L;
   private static final Logger LOG = LogManager.getLogger(DeltaSync.class);
-  public static final String CHECKPOINT_KEY = "deltastreamer.checkpoint.key";
-  public static final String CHECKPOINT_RESET_KEY = "deltastreamer.checkpoint.reset_key";
 
   /**
    * Delta Sync Config.
@@ -260,7 +260,8 @@ private Pair<SchemaProvider, Pair<String, JavaRDD<HoodieRecord>>> readFromSource
           resumeCheckpointStr = Option.of(cfg.checkpoint);
         } else if (commitMetadata.getMetadata(CHECKPOINT_KEY) != null) {
           resumeCheckpointStr = Option.of(commitMetadata.getMetadata(CHECKPOINT_KEY));
-        } else {
+        } else if (HoodieTimeline.compareTimestamps(HoodieTimeline.FULL_BOOTSTRAP_INSTANT_TS,
+            HoodieTimeline.LESSER_THAN, lastCommit.get().getTimestamp())) {
           throw new HoodieDeltaStreamerException(
               "Unable to find previous checkpoint. Please double check if this table "
                   + "was indeed built via delta streamer. Last Commit :" + lastCommit + ", Instants :"
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java
index 725edd5e1d8dc..6f78e29a611f5 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java
@@ -18,7 +18,7 @@
 
 package org.apache.hudi.utilities.deltastreamer;
 
-import org.apache.hadoop.hive.conf.HiveConf;
+import java.util.HashMap;
 import org.apache.hudi.DataSourceUtils;
 import org.apache.hudi.DataSourceWriteOptions;
 import org.apache.hudi.client.HoodieWriteClient;
@@ -55,6 +55,7 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaSparkContext;
@@ -89,13 +90,14 @@ public class HoodieDeltaStreamer implements Serializable {
   private static final long serialVersionUID = 1L;
   private static final Logger LOG = LogManager.getLogger(HoodieDeltaStreamer.class);
 
-  public static String CHECKPOINT_KEY = "deltastreamer.checkpoint.key";
+  public static final String CHECKPOINT_KEY = "deltastreamer.checkpoint.key";
+  public static final String CHECKPOINT_RESET_KEY = "deltastreamer.checkpoint.reset_key";
 
   private final transient Config cfg;
 
   private final TypedProperties properties;
 
-  private transient DeltaSyncService deltaSyncService;
+  private transient Option<DeltaSyncService> deltaSyncService;
 
   private final Option<BootstrapExecutor> bootstrapExecutor;
 
@@ -114,22 +116,27 @@ public HoodieDeltaStreamer(Config cfg, JavaSparkContext jssc, FileSystem fs, Con
   }
 
   public HoodieDeltaStreamer(Config cfg, JavaSparkContext jssc, FileSystem fs, Configuration conf,
-                             TypedProperties properties) throws IOException {
+                             TypedProperties props) throws IOException {
+    // Resolving the properties first in a consistent way
+    this.properties = props != null ? props : UtilHelpers.readConfig(
+        FSUtils.getFs(cfg.propsFilePath, jssc.hadoopConfiguration()),
+        new Path(cfg.propsFilePath), cfg.configs).getConfig();
+
     if (cfg.initialCheckpointProvider != null && cfg.checkpoint == null) {
       InitialCheckPointProvider checkPointProvider =
-          UtilHelpers.createInitialCheckpointProvider(cfg.initialCheckpointProvider, properties);
+          UtilHelpers.createInitialCheckpointProvider(cfg.initialCheckpointProvider, this.properties);
       checkPointProvider.init(conf);
       cfg.checkpoint = checkPointProvider.getCheckpoint();
     }
     this.cfg = cfg;
-    this.deltaSyncService = new DeltaSyncService(cfg, jssc, fs, conf, properties);
-    this.properties = properties;
     this.bootstrapExecutor = Option.ofNullable(
-        cfg.runBootstrap ? new BootstrapExecutor(cfg, jssc, fs, conf, properties) : null);
+        cfg.runBootstrap ? new BootstrapExecutor(cfg, jssc, fs, conf, this.properties) : null);
+    this.deltaSyncService = Option.ofNullable(
+        cfg.runBootstrap ? null : new DeltaSyncService(cfg, jssc, fs, conf, this.properties));
   }
 
   public void shutdownGracefully() {
-    deltaSyncService.shutdown(false);
+    deltaSyncService.ifPresent(ds -> ds.shutdown(false));
   }
 
   /**
@@ -143,18 +150,30 @@ public void sync() throws Exception {
       bootstrapExecutor.get().execute();
     } else {
       if (cfg.continuousMode) {
-        deltaSyncService.start(this::onDeltaSyncShutdown);
-        deltaSyncService.waitForShutdown();
+        deltaSyncService.ifPresent(ds -> {
+          ds.start(this::onDeltaSyncShutdown);
+          try {
+            ds.waitForShutdown();
+          } catch (Exception e) {
+            throw new HoodieException(e.getMessage(), e);
+          }
+        });
         LOG.info("Delta Sync shutting down");
       } else {
         LOG.info("Delta Streamer running only single round");
         try {
-          deltaSyncService.getDeltaSync().syncOnce();
+          deltaSyncService.ifPresent(ds -> {
+            try {
+              ds.getDeltaSync().syncOnce();
+            } catch (Exception e) {
+              throw new HoodieException(e.getMessage(), e);
+            }
+          });
         } catch (Exception ex) {
           LOG.error("Got error running delta sync once. Shutting down", ex);
           throw ex;
         } finally {
-          deltaSyncService.close();
+          deltaSyncService.ifPresent(DeltaSyncService::close);
           LOG.info("Shut down delta streamer");
         }
       }
@@ -167,7 +186,7 @@ public Config getConfig() {
 
   private boolean onDeltaSyncShutdown(boolean error) {
     LOG.info("DeltaSync shutdown. Closing write client. Error?" + error);
-    deltaSyncService.close();
+    deltaSyncService.ifPresent(DeltaSyncService::close);
     return true;
   }
 
@@ -410,9 +429,7 @@ public DeltaSyncService(Config cfg, JavaSparkContext jssc, FileSystem fs, Config
       ValidationUtils.checkArgument(!cfg.filterDupes || cfg.operation != Operation.UPSERT,
           "'--filter-dupes' needs to be disabled when '--op' is 'UPSERT' to ensure updates are not missed.");
 
-      this.props = properties != null ? properties : UtilHelpers.readConfig(
-          FSUtils.getFs(cfg.propsFilePath, jssc.hadoopConfiguration()),
-          new Path(cfg.propsFilePath), cfg.configs).getConfig();
+      this.props = properties;
       LOG.info("Creating delta streamer with configs : " + props.toString());
       this.schemaProvider = UtilHelpers.createSchemaProvider(cfg.schemaProviderClassName, props, jssc);
 
@@ -685,9 +702,7 @@ public BootstrapExecutor(Config cfg, JavaSparkContext jssc, FileSystem fs, Confi
       this.jssc = jssc;
       this.fs = fs;
       this.configuration = conf;
-      this.props = properties != null ? properties : UtilHelpers.readConfig(
-          FSUtils.getFs(cfg.propsFilePath, jssc.hadoopConfiguration()),
-          new Path(cfg.propsFilePath), cfg.configs).getConfig();
+      this.props = properties;
       // Add more defaults if full bootstrap requested
       this.props.putIfAbsent(DataSourceWriteOptions.PAYLOAD_CLASS_OPT_KEY(),
           DataSourceWriteOptions.DEFAULT_PAYLOAD_OPT_VAL());
@@ -713,8 +728,18 @@ public BootstrapExecutor(Config cfg, JavaSparkContext jssc, FileSystem fs, Confi
     public void execute() throws IOException {
       initializeTable();
       HoodieWriteClient bootstrapClient = new HoodieWriteClient(jssc, bootstrapConfig, true);
-      bootstrapClient.bootstrap();
-      syncHive();
+
+      try {
+        HashMap<String, String> checkpointCommitMetadata = new HashMap<>();
+        checkpointCommitMetadata.put(CHECKPOINT_KEY, cfg.checkpoint);
+        if (cfg.checkpoint != null) {
+          checkpointCommitMetadata.put(CHECKPOINT_RESET_KEY, cfg.checkpoint);
+        }
+        bootstrapClient.bootstrap(Option.of(checkpointCommitMetadata));
+        syncHive();
+      } finally {
+        bootstrapClient.close();
+      }
     }
 
     /**
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java
index e8718558ccfe0..176661b0fe356 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/keygen/TimestampBasedKeyGenerator.java
@@ -18,6 +18,9 @@
 
 package org.apache.hudi.utilities.keygen;
 
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
+import java.nio.charset.StandardCharsets;
 import org.apache.hudi.DataSourceUtils;
 import org.apache.hudi.DataSourceWriteOptions;
 import org.apache.hudi.avro.HoodieAvroUtils;
@@ -62,6 +65,8 @@ enum TimestampType implements Serializable {
   // https://docs.oracle.com/javase/8/docs/api/java/util/TimeZone.html
   private final TimeZone timeZone;
 
+  protected final boolean encodePartitionPath;
+
   /**
    * Supported configs.
    */
@@ -108,6 +113,9 @@ public TimestampBasedKeyGenerator(TypedProperties config) {
       default:
         timeUnit = null;
     }
+
+    this.encodePartitionPath = config.getBoolean(DataSourceWriteOptions.URL_ENCODE_PARTITIONING_OPT_KEY(),
+        Boolean.parseBoolean(DataSourceWriteOptions.DEFAULT_URL_ENCODE_PARTITIONING_OPT_VAL()));
   }
 
   @Override
diff --git a/packaging/hudi-hadoop-mr-bundle/pom.xml b/packaging/hudi-hadoop-mr-bundle/pom.xml
index c759d0d0769a1..0da29359ea9a5 100644
--- a/packaging/hudi-hadoop-mr-bundle/pom.xml
+++ b/packaging/hudi-hadoop-mr-bundle/pom.xml
@@ -104,6 +104,12 @@
                 <relocation>
                   <pattern> org.apache.hadoop.hbase.</pattern>
                   <shadedPattern>org.apache.hudi.org.apache.hadoop.hbase.</shadedPattern>
+                  <excludes>
+                    <!-- Hive 2.3.3 brings in Hbase 1.1.1 which conflicts with our version even with shading -->
+                    <!-- Hbase Version check is implemented using a global runtime configuration hbase.defaults.for.version -->
+                    <!-- The above configuration is auto set by hbase version used by hive at load time which conflicts with our version. Hence, the below exclusion to skip this check -->
+                    <exclude>org.apache.hadoop.hbase.util.VersionInfo</exclude>
+                  </excludes>
                 </relocation>
                 <relocation>
                   <pattern> org.apache.htrace.</pattern>