diff --git a/.gitignore b/.gitignore
index f883fa99..2d3b91cf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,21 @@ dependency-reduced-pom.xml
 target
 .flattened-pom.xml
 
+spark-load/.idea/
+spark-load/target
+spark-load/spark-load-core/dependency-reduced-pom.xml
+spark-load/spark-load-core/output/
+spark-load/spark-load-core/target/
+spark-load/spark-load-core/.idea/
+spark-load/spark-load-dist/dependency-reduced-pom.xml
+spark-load/spark-load-dist/target/
+spark-load/spark-load-dpp/dependency-reduced-pom.xml
+spark-load/spark-load-dpp/.flattened-pom.xml
+spark-load/spark-load-dpp/target/
+spark-load/spark-load-common/dependency-reduced-pom.xml
+spark-load/spark-load-common/target/
+
+
 ### Java template
 # Compiled class file
 *.class
diff --git a/spark-doris-connector/spark-doris-connector-it/src/test/java/org/apache/doris/spark/sql/TestSparkConnector.scala b/spark-doris-connector/spark-doris-connector-it/src/test/java/org/apache/doris/spark/sql/TestSparkConnector.scala
index 1242a9ba..a5e756c1 100644
--- a/spark-doris-connector/spark-doris-connector-it/src/test/java/org/apache/doris/spark/sql/TestSparkConnector.scala
+++ b/spark-doris-connector/spark-doris-connector-it/src/test/java/org/apache/doris/spark/sql/TestSparkConnector.scala
@@ -19,7 +19,8 @@ package org.apache.doris.spark.sql
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.{SparkConf, SparkContext}
-import org.junit.{Ignore, Test}
+import org.junit.Ignore
+import org.junit.Test
 
 // This test need real connect info to run.
 // Set the connect info before comment out this @Ignore
diff --git a/spark-load/build.sh b/spark-load/build.sh
new file mode 100755
index 00000000..a8ca1c73
--- /dev/null
+++ b/spark-load/build.sh
@@ -0,0 +1,175 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+##############################################################
+# This script is used to compile Spark-Load
+# Usage:
+#    sh build.sh
+#
+##############################################################
+
+# Bugzilla 37848: When no TTY is available, don't output to console
+have_tty=0
+# shellcheck disable=SC2006
+if [[ "`tty`" != "not a tty" ]]; then
+    have_tty=1
+fi
+
+# Bugzilla 37848: When no TTY is available, don't output to console
+have_tty=0
+# shellcheck disable=SC2006
+if [[ "`tty`" != "not a tty" ]]; then
+    have_tty=1
+fi
+
+ # Only use colors if connected to a terminal
+if [[ ${have_tty} -eq 1 ]]; then
+  PRIMARY=$(printf '\033[38;5;082m')
+  RED=$(printf '\033[31m')
+  GREEN=$(printf '\033[32m')
+  YELLOW=$(printf '\033[33m')
+  BLUE=$(printf '\033[34m')
+  BOLD=$(printf '\033[1m')
+  RESET=$(printf '\033[0m')
+else
+  PRIMARY=""
+  RED=""
+  GREEN=""
+  YELLOW=""
+  BLUE=""
+  BOLD=""
+  RESET=""
+fi
+
+echo_r () {
+    # Color red: Error, Failed
+    [[ $# -ne 1 ]] && return 1
+    # shellcheck disable=SC2059
+    printf "[%sDoris%s] %s$1%s\n"  $BLUE $RESET $RED $RESET
+}
+
+echo_g () {
+    # Color green: Success
+    [[ $# -ne 1 ]] && return 1
+    # shellcheck disable=SC2059
+    printf "[%sDoris%s] %s$1%s\n"  $BLUE $RESET $GREEN $RESET
+}
+
+echo_y () {
+    # Color yellow: Warning
+    [[ $# -ne 1 ]] && return 1
+    # shellcheck disable=SC2059
+    printf "[%sDoris%s] %s$1%s\n"  $BLUE $RESET $YELLOW $RESET
+}
+
+echo_w () {
+    # Color yellow: White
+    [[ $# -ne 1 ]] && return 1
+    # shellcheck disable=SC2059
+    printf "[%sDoris%s] %s$1%s\n"  $BLUE $RESET $WHITE $RESET
+}
+
+# OS specific support.  $var _must_ be set to either true or false.
+cygwin=false
+os400=false
+# shellcheck disable=SC2006
+case "`uname`" in
+CYGWIN*) cygwin=true;;
+OS400*) os400=true;;
+esac
+
+# resolve links - $0 may be a softlink
+PRG="$0"
+
+while [[ -h "$PRG" ]]; do
+  # shellcheck disable=SC2006
+  ls=`ls -ld "$PRG"`
+  # shellcheck disable=SC2006
+  link=`expr "$ls" : '.*-> \(.*\)$'`
+  if expr "$link" : '/.*' > /dev/null; then
+    PRG="$link"
+  else
+    # shellcheck disable=SC2006
+    PRG=`dirname "$PRG"`/"$link"
+  fi
+done
+
+# Get standard environment variables
+# shellcheck disable=SC2006
+ROOT=$(cd "$(dirname "$PRG")" &>/dev/null && pwd)
+export DORIS_HOME=$(cd "$ROOT/../" &>/dev/null && pwd)
+
+. "${DORIS_HOME}"/env.sh
+
+# include custom environment variables
+if [[ -f ${DORIS_HOME}/custom_env.sh ]]; then
+    . "${DORIS_HOME}"/custom_env.sh
+fi
+
+selectSpark() {
+  echo 'Spark-Load supports multiple versions of spark. Which version do you need ?'
+  select spark in "2.x" "3.x" "other"
+  do
+    case $spark in
+      "2.x")
+        return 1
+        ;;
+      "3.x")
+        return 2
+        ;;
+      *)
+        echo "invalid selected, exit.."
+        exit 1
+        ;;
+    esac
+  done
+}
+
+SPARK_VERSION=0
+selectSpark
+SparkVer=$?
+if [ ${SparkVer} -eq 1 ]; then
+    SPARK_VERSION="spark2"
+    SCALA_VERSION="scala_2.11"
+elif [ ${SparkVer} -eq 2 ]; then
+    SPARK_VERSION="spark3"
+    SCALA_VERSION="scala_2.12"
+fi
+
+echo_g " spark load run based on : ${SPARK_VERSION} and ${SCALA_VERSION}"
+echo_g " build starting..."
+
+${MVN_BIN} clean package -P${SPARK_VERSION},${SCALA_VERSION} "$@"
+
+EXIT_CODE=$?
+if [ $EXIT_CODE -eq 0 ]; then
+  DIST_DIR=${DORIS_HOME}/dist
+  [ ! -d "$DIST_DIR" ] && mkdir "$DIST_DIR"
+  dist_jar=$(ls "${ROOT}"/target | grep "spark-load-")
+  rm -rf "${DIST_DIR}"/"${dist_jar}"
+  cp "${ROOT}"/target/"${dist_jar}" "$DIST_DIR"
+
+  echo_g "*****************************************************************"
+  echo_g "Successfully build Spark-Load"
+  echo_g "dist: $DIST_DIR/$dist_jar "
+  echo_g "*****************************************************************"
+  exit 0;
+else
+  echo_r "Failed build Spark-Load"
+  exit $EXIT_CODE;
+fi
diff --git a/spark-load/pom.xml b/spark-load/pom.xml
new file mode 100644
index 00000000..480d4f92
--- /dev/null
+++ b/spark-load/pom.xml
@@ -0,0 +1,418 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <groupId>org.apache.doris</groupId>
+    <artifactId>spark-load</artifactId>
+    <version>${revision}</version>
+    <packaging>pom</packaging>
+    <modules>
+        <module>spark-load-common</module>
+        <module>spark-load-core</module>
+        <module>spark-load-dpp</module>
+        <module>spark-load-dist</module>
+    </modules>
+
+    <properties>
+        <maven.compiler.source>1.8</maven.compiler.source>
+        <maven.compiler.target>1.8</maven.compiler.target>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        <revision>24.0.0-SNAPSHOT</revision>
+        <commons-codec.version>1.13</commons-codec.version>
+        <commons-lang3.version>3.9</commons-lang3.version>
+        <hadoop.version>3.3.6</hadoop.version>
+        <netty-all.version>4.1.104.Final</netty-all.version>
+        <parquet.version>1.13.1</parquet.version>
+        <commons-collections.version>3.2.2</commons-collections.version>
+        <kryo.version>4.0.2</kryo.version>
+        <guava.version>32.1.2-jre</guava.version>
+        <jackson.version>2.14.2</jackson.version>
+        <lombok.veresion>1.18.30</lombok.veresion>
+        <commons-cli.version>1.4</commons-cli.version>
+        <httpclient.version>4.5.13</httpclient.version>
+        <junit.version>5.8.2</junit.version>
+        <jmockit.version>1.49</jmockit.version>
+        <log4j.version>2.17.1</log4j.version>
+        <slf4j.version>2.0.7</slf4j.version>
+        <commons-logging.version>1.2</commons-logging.version>
+        <aws-java-sdk.version>1.12.669</aws-java-sdk.version>
+        <RoaringBitmap.version>0.8.13</RoaringBitmap.version>
+        <gson.version>2.9.1</gson.version>
+    </properties>
+
+    <dependencyManagement>
+        <dependencies>
+            <!-- https://mvnrepository.com/artifact/commons-codec/commons-codec -->
+            <dependency>
+                <groupId>commons-codec</groupId>
+                <artifactId>commons-codec</artifactId>
+                <version>${commons-codec.version}</version>
+            </dependency>
+            <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
+            <dependency>
+                <groupId>org.apache.commons</groupId>
+                <artifactId>commons-lang3</artifactId>
+                <version>${commons-lang3.version}</version>
+            </dependency>
+            <!-- https://mvnrepository.com/artifact/com.google.code.gson/gson -->
+            <!-- todo: remove gson -->
+            <!-- <dependency> -->
+            <!--     <groupId>com.google.code.gson</groupId> -->
+            <!--     <artifactId>gson</artifactId> -->
+            <!-- </dependency> -->
+            <!-- spark -->
+            <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.12 -->
+            <dependency>
+                <groupId>org.apache.spark</groupId>
+                <artifactId>spark-core_${scala.major.version}</artifactId>
+                <version>${spark.version}</version>
+                <scope>provided</scope>
+                <exclusions>
+                    <exclusion>
+                        <groupId>org.apache.logging.log4j</groupId>
+                        <artifactId>log4j-1.2-api</artifactId>
+                    </exclusion>
+                    <exclusion>
+                        <groupId>org.apache.logging.log4j</groupId>
+                        <artifactId>log4j-api</artifactId>
+                    </exclusion>
+                    <exclusion>
+                        <groupId>org.apache.logging.log4j</groupId>
+                        <artifactId>log4j-core</artifactId>
+                    </exclusion>
+                    <exclusion>
+                        <groupId>commons-logging</groupId>
+                        <artifactId>commons-logging</artifactId>
+                    </exclusion>
+                    <exclusion>
+                        <groupId>org.slf4j</groupId>
+                        <artifactId>slf4j-api</artifactId>
+                    </exclusion>
+                </exclusions>
+            </dependency>
+            <!-- https://mvnrepository.com/artifact/io.netty/netty-all -->
+            <dependency>
+                <groupId>io.netty</groupId>
+                <artifactId>netty-all</artifactId>
+                <version>${netty-all.version}</version>
+            </dependency>
+
+            <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql_2.12 -->
+            <dependency>
+                <groupId>org.apache.spark</groupId>
+                <artifactId>spark-sql_${scala.major.version}</artifactId>
+                <version>${spark.version}</version>
+                <scope>provided</scope>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.hadoop</groupId>
+                <artifactId>hadoop-common</artifactId>
+                <version>${hadoop.version}</version>
+                <exclusions>
+                    <exclusion>
+                        <groupId>org.slf4j</groupId>
+                        <artifactId>slf4j-api</artifactId>
+                    </exclusion>
+                </exclusions>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.hadoop</groupId>
+                <artifactId>hadoop-client</artifactId>
+                <version>${hadoop.version}</version>
+                <exclusions>
+                    <exclusion>
+                        <groupId>org.slf4j</groupId>
+                        <artifactId>slf4j-api</artifactId>
+                    </exclusion>
+                </exclusions>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.hadoop</groupId>
+                <artifactId>hadoop-aws</artifactId>
+                <version>${hadoop.version}</version>
+                <exclusions>
+                    <exclusion>
+                        <groupId>org.slf4j</groupId>
+                        <artifactId>slf4j-log4j12</artifactId>
+                    </exclusion>
+                    <exclusion>
+                        <groupId>log4j</groupId>
+                        <artifactId>log4j</artifactId>
+                    </exclusion>
+                    <exclusion>
+                        <artifactId>servlet-api</artifactId>
+                        <groupId>javax.servlet</groupId>
+                    </exclusion>
+                    <!-- https://github.com/aws/aws-sdk-java/issues/1032 -->
+                    <exclusion>
+                        <groupId>com.amazonaws</groupId>
+                        <artifactId>aws-java-sdk-s3</artifactId>
+                    </exclusion>
+                    <exclusion>
+                        <groupId>com.amazonaws</groupId>
+                        <artifactId>aws-java-sdk-bundle</artifactId>
+                    </exclusion>
+                </exclusions>
+            </dependency>
+            <dependency>
+                <groupId>com.amazonaws</groupId>
+                <artifactId>aws-java-sdk-s3</artifactId>
+                <version>${aws-java-sdk.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>com.amazonaws</groupId>
+                <artifactId>aws-java-sdk-glue</artifactId>
+                <version>${aws-java-sdk.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>com.amazonaws</groupId>
+                <artifactId>aws-java-sdk-dynamodb</artifactId>
+                <version>${aws-java-sdk.version}</version>
+            </dependency>
+            <!--only for apache ranger audit-->
+            <dependency>
+                <groupId>com.amazonaws</groupId>
+                <artifactId>aws-java-sdk-logs</artifactId>
+                <version>${aws-java-sdk.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.parquet</groupId>
+                <artifactId>parquet-column</artifactId>
+                <version>${parquet.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.parquet</groupId>
+                <artifactId>parquet-hadoop</artifactId>
+                <version>${parquet.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.parquet</groupId>
+                <artifactId>parquet-common</artifactId>
+                <version>${parquet.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>commons-collections</groupId>
+                <artifactId>commons-collections</artifactId>
+                <version>${commons-collections.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.scala-lang</groupId>
+                <artifactId>scala-library</artifactId>
+                <version>${scala.version}</version>
+                <scope>provided</scope>
+            </dependency>
+            <dependency>
+                <groupId>com.esotericsoftware</groupId>
+                <artifactId>kryo-shaded</artifactId>
+                <version>${kryo.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.spark</groupId>
+                <artifactId>spark-catalyst_${scala.major.version}</artifactId>
+                <version>${spark.version}</version>
+                <exclusions>
+                    <exclusion>
+                        <groupId>org.slf4j</groupId>
+                        <artifactId>slf4j-api</artifactId>
+                    </exclusion>
+                </exclusions>
+                <scope>provided</scope>
+            </dependency>
+            <dependency>
+                <groupId>com.google.guava</groupId>
+                <artifactId>guava</artifactId>
+                <version>${guava.version}</version>
+            </dependency>
+            <!-- https://mvnrepository.com/artifact/com.fasterxml.jackson.core/jackson-databind -->
+            <dependency>
+                <groupId>com.fasterxml.jackson.core</groupId>
+                <artifactId>jackson-databind</artifactId>
+                <version>${jackson.version}</version>
+            </dependency>
+            <!-- https://mvnrepository.com/artifact/org.projectlombok/lombok -->
+            <dependency>
+                <groupId>org.projectlombok</groupId>
+                <artifactId>lombok</artifactId>
+                <version>${lombok.veresion}</version>
+                <scope>provided</scope>
+            </dependency>
+            <!-- https://mvnrepository.com/artifact/commons-cli/commons-cli -->
+            <dependency>
+                <groupId>commons-cli</groupId>
+                <artifactId>commons-cli</artifactId>
+                <version>${commons-cli.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.apache.spark</groupId>
+                <artifactId>spark-launcher_${scala.major.version}</artifactId>
+                <version>${spark.version}</version>
+            </dependency>
+            <!-- https://mvnrepository.com/artifact/org.apache.httpcomponents.client5/httpclient5 -->
+            <dependency>
+                <groupId>org.apache.httpcomponents</groupId>
+                <artifactId>httpclient</artifactId>
+                <version>${httpclient.version}</version>
+            </dependency>
+
+            <!-- https://mvnrepository.com/artifact/org.junit.jupiter/junit-jupiter-engine -->
+            <dependency>
+                <groupId>org.junit.jupiter</groupId>
+                <artifactId>junit-jupiter-engine</artifactId>
+                <version>${junit.version}</version>
+                <scope>test</scope>
+            </dependency>
+            <!-- https://mvnrepository.com/artifact/org.junit.jupiter/junit-jupiter-params -->
+            <dependency>
+                <groupId>org.junit.jupiter</groupId>
+                <artifactId>junit-jupiter-params</artifactId>
+                <version>${junit.version}</version>
+                <scope>test</scope>
+            </dependency>
+            <!-- https://mvnrepository.com/artifact/org.jmockit/jmockit -->
+            <dependency>
+                <groupId>org.jmockit</groupId>
+                <artifactId>jmockit</artifactId>
+                <version>${jmockit.version}</version>
+                <scope>test</scope>
+            </dependency>
+
+            <!-- log -->
+            <dependency>
+                <groupId>org.apache.logging.log4j</groupId>
+                <artifactId>log4j-core</artifactId>
+                <version>${log4j.version}</version>
+            </dependency>
+            <!-- https://mvnrepository.com/artifact/org.apache.logging.log4j/log4j-core -->
+            <dependency>
+                <groupId>org.apache.logging.log4j</groupId>
+                <artifactId>log4j-api</artifactId>
+                <version>${log4j.version}</version>
+            </dependency>
+            <!-- https://mvnrepository.com/artifact/org.apache.logging.log4j/log4j-slf4j-impl -->
+            <dependency>
+                <groupId>org.apache.logging.log4j</groupId>
+                <artifactId>log4j-slf4j-impl</artifactId>
+                <version>${log4j.version}</version>
+                <!-- <scope>test</scope> -->
+            </dependency>
+            <!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-api -->
+            <dependency>
+                <groupId>org.slf4j</groupId>
+                <artifactId>slf4j-api</artifactId>
+                <version>${slf4j.version}</version>
+            </dependency>
+            <!-- https://mvnrepository.com/artifact/commons-logging/commons-logging -->
+            <dependency>
+                <groupId>commons-logging</groupId>
+                <artifactId>commons-logging</artifactId>
+                <version>${commons-logging.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>org.roaringbitmap</groupId>
+                <artifactId>RoaringBitmap</artifactId>
+                <version>${RoaringBitmap.version}</version>
+            </dependency>
+
+            <!-- https://mvnrepository.com/artifact/com.google.code.gson/gson -->
+            <dependency>
+                <groupId>com.google.code.gson</groupId>
+                <artifactId>gson</artifactId>
+                <version>${gson.version}</version>
+            </dependency>
+            <dependency>
+                <groupId>${project.groupId}</groupId>
+                <artifactId>spark-load-common</artifactId>
+                <version>${project.version}</version>
+            </dependency>
+        </dependencies>
+    </dependencyManagement>
+
+    <profiles>
+        <profile>
+            <id>spark2</id>
+            <activation>
+                <activeByDefault>false</activeByDefault>
+            </activation>
+            <properties>
+                <spark.version>2.4.8</spark.version>
+            </properties>
+        </profile>
+        <profile>
+            <id>spark3</id>
+            <activation>
+                <activeByDefault>true</activeByDefault>
+            </activation>
+            <properties>
+                <spark.version>3.4.1</spark.version>
+            </properties>
+        </profile>
+        <profile>
+            <id>scala_2.11</id>
+            <activation>
+                <activeByDefault>false</activeByDefault>
+            </activation>
+            <properties>
+                <scala.version>2.11.8</scala.version>
+                <scala.major.version>2.11</scala.major.version>
+            </properties>
+        </profile>
+        <profile>
+            <id>scala_2.12</id>
+            <activation>
+                <activeByDefault>true</activeByDefault>
+            </activation>
+            <properties>
+                <scala.version>2.12.10</scala.version>
+                <scala.major.version>2.12</scala.major.version>
+            </properties>
+        </profile>
+    </profiles>
+
+    <build>
+        <pluginManagement>
+            <plugins>
+                <plugin>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-assembly-plugin</artifactId>
+                    <version>3.1.1</version>
+                </plugin>
+                <plugin>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-shade-plugin</artifactId>
+                    <version>3.2.1</version>
+                </plugin>
+                <plugin>
+                    <groupId>org.apache.maven.plugins</groupId>
+                    <artifactId>maven-surefire-plugin</artifactId>
+                    <version>2.22.2</version>
+                </plugin>
+                <plugin>
+                    <groupId>org.codehaus.mojo</groupId>
+                    <artifactId>flatten-maven-plugin</artifactId>
+                    <version>1.4.1</version>
+                </plugin>
+            </plugins>
+        </pluginManagement>
+    </build>
+
+</project>
\ No newline at end of file
diff --git a/spark-load/spark-load-common/pom.xml b/spark-load/spark-load-common/pom.xml
new file mode 100644
index 00000000..4a0e96b7
--- /dev/null
+++ b/spark-load/spark-load-common/pom.xml
@@ -0,0 +1,67 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.doris</groupId>
+        <artifactId>spark-load</artifactId>
+        <version>${revision}</version>
+    </parent>
+
+    <artifactId>spark-load-common</artifactId>
+    <packaging>jar</packaging>
+    <properties>
+        <maven.compiler.source>8</maven.compiler.source>
+        <maven.compiler.target>8</maven.compiler.target>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    </properties>
+
+    <dependencies>
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-databind</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.google.code.gson</groupId>
+            <artifactId>gson</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.google.guava</groupId>
+            <artifactId>guava</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.roaringbitmap</groupId>
+            <artifactId>RoaringBitmap</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>commons-codec</groupId>
+            <artifactId>commons-codec</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+
+</project>
\ No newline at end of file
diff --git a/spark-load/spark-load-common/src/main/java/org/apache/doris/common/DppResult.java b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/DppResult.java
new file mode 100644
index 00000000..7a2a9cb4
--- /dev/null
+++ b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/DppResult.java
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.io.Serializable;
+
+/**
+ * Copied from Apache Doris org.apache.doris.sparkdpp.DppResult
+ */
+public class DppResult implements Serializable {
+
+    public boolean isSuccess;
+
+    public String failedReason;
+
+    public long scannedRows;
+
+    public long fileNumber;
+
+    public long fileSize;
+
+    public long normalRows;
+
+    public long abnormalRows;
+
+    public long unselectRows;
+
+    // only part of abnormal rows will be returned
+    public String partialAbnormalRows;
+
+    public long scannedBytes;
+
+    public DppResult() {
+        isSuccess = true;
+        failedReason = "";
+        scannedRows = 0;
+        fileNumber = 0;
+        fileSize = 0;
+        normalRows = 0;
+        abnormalRows = 0;
+        unselectRows = 0;
+        partialAbnormalRows = "";
+        scannedBytes = 0;
+    }
+
+    @JsonCreator
+    public DppResult(@JsonProperty(value = "is_success", required = true) boolean isSuccess,
+                              @JsonProperty(value = "failed_reason", required = true) String failedReason,
+                              @JsonProperty(value = "scanned_rows", required = true) long scannedRows,
+                              @JsonProperty(value = "file_number", required = true) long fileNumber,
+                              @JsonProperty(value = "file_size", required = true) long fileSize,
+                              @JsonProperty(value = "normal_rows", required = true) long normalRows,
+                              @JsonProperty(value = "abnormal_rows", required = true) long abnormalRows,
+                              @JsonProperty(value = "unselect_rows", required = true) long unselectRows,
+                              @JsonProperty("partial_abnormal_rows") String partialAbnormalRows,
+                              @JsonProperty("scanned_bytes") long scannedBytes) {
+        this.isSuccess = isSuccess;
+        this.failedReason = failedReason;
+        this.scannedRows = scannedRows;
+        this.fileNumber = fileNumber;
+        this.fileSize = fileSize;
+        this.normalRows = normalRows;
+        this.abnormalRows = abnormalRows;
+        this.unselectRows = unselectRows;
+        this.partialAbnormalRows = partialAbnormalRows;
+        this.scannedBytes = scannedBytes;
+    }
+
+}
diff --git a/spark-load/spark-load-common/src/main/java/org/apache/doris/common/io/BitmapValue.java b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/io/BitmapValue.java
new file mode 100644
index 00000000..db4a65c2
--- /dev/null
+++ b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/io/BitmapValue.java
@@ -0,0 +1,423 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.io;
+
+import org.roaringbitmap.Util;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+/**
+ * Copied from Apache Doris
+ */
+public class BitmapValue {
+
+    public static final int EMPTY = 0;
+    public static final int SINGLE32 = 1;
+    public static final int BITMAP32 = 2;
+    public static final int SINGLE64 = 3;
+    public static final int BITMAP64 = 4;
+
+    public static final int SINGLE_VALUE = 1;
+    public static final int BITMAP_VALUE = 2;
+
+    public static final long UNSIGNED_32BIT_INT_MAX_VALUE = 4294967295L;
+
+    private int bitmapType;
+    private long singleValue;
+    private Roaring64Map bitmap;
+
+    public BitmapValue() {
+        bitmapType = EMPTY;
+    }
+
+    public void add(int value) {
+        add(Util.toUnsignedLong(value));
+    }
+
+    public void add(long value) {
+        switch (bitmapType) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+            case EMPTY:
+                singleValue = value;
+                bitmapType = SINGLE_VALUE;
+                break;
+            case SINGLE_VALUE:
+                if (this.singleValue != value) {
+                    bitmap = new Roaring64Map();
+                    bitmap.add(value);
+                    bitmap.add(singleValue);
+                    bitmapType = BITMAP_VALUE;
+                }
+                break;
+            case BITMAP_VALUE:
+                bitmap.addLong(value);
+                break;
+        }
+    }
+
+    public boolean contains(int value) {
+        return contains(Util.toUnsignedLong(value));
+    }
+
+    public boolean contains(long value) {
+        switch (bitmapType) {
+            case EMPTY:
+                return false;
+            case SINGLE_VALUE:
+                return singleValue == value;
+            case BITMAP_VALUE:
+                return bitmap.contains(value);
+            default:
+                return false;
+        }
+    }
+
+    public long cardinality() {
+        switch (bitmapType) {  // CHECKSTYLE IGNORE THIS LINE: missing switch default
+            case EMPTY:
+                return 0;
+            case SINGLE_VALUE:
+                return 1;
+            case BITMAP_VALUE:
+                return bitmap.getLongCardinality();
+        }
+        return 0;
+    }
+
+    public void serialize(DataOutput output) throws IOException {
+        switch (bitmapType) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+            case EMPTY:
+                output.writeByte(EMPTY);
+                break;
+            case SINGLE_VALUE:
+                // is 32-bit enough
+                // FE is big end but BE is little end.
+                if (isLongValue32bitEnough(singleValue)) {
+                    output.write(SINGLE32);
+                    output.writeInt(Integer.reverseBytes((int) singleValue));
+                } else {
+                    output.writeByte(SINGLE64);
+                    output.writeLong(Long.reverseBytes(singleValue));
+                }
+                break;
+            case BITMAP_VALUE:
+                bitmap.serialize(output);
+                break;
+        }
+    }
+
+    public void deserialize(DataInput input) throws IOException {
+        clear();
+        int bitmapType = input.readByte();
+        switch (bitmapType) {
+            case EMPTY:
+                break;
+            case SINGLE32:
+                singleValue = Util.toUnsignedLong(Integer.reverseBytes(input.readInt()));
+                this.bitmapType = SINGLE_VALUE;
+                break;
+            case SINGLE64:
+                singleValue = Long.reverseBytes(input.readLong());
+                this.bitmapType = SINGLE_VALUE;
+                break;
+            case BITMAP32:
+            case BITMAP64:
+                bitmap = bitmap == null ? new Roaring64Map() : bitmap;
+                bitmap.deserialize(input, bitmapType);
+                this.bitmapType = BITMAP_VALUE;
+                break;
+            default:
+                throw new RuntimeException(String.format("unknown bitmap type %s ", bitmapType));
+        }
+    }
+
+    // In-place bitwise AND (intersection) operation. The current bitmap is modified.
+    public void and(BitmapValue other) {
+        switch (other.bitmapType) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+            case EMPTY:
+                clear();
+                break;
+            case SINGLE_VALUE:
+                switch (this.bitmapType) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+                    case EMPTY:
+                        break;
+                    case SINGLE_VALUE:
+                        if (this.singleValue != other.singleValue) {
+                            clear();
+                        }
+                        break;
+                    case BITMAP_VALUE:
+                        if (!this.bitmap.contains(other.singleValue)) {
+                            clear();
+                        } else {
+                            clear();
+                            this.singleValue = other.singleValue;
+                            this.bitmapType = SINGLE_VALUE;
+                        }
+                        break;
+                }
+                break;
+            case BITMAP_VALUE:
+                switch (this.bitmapType) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+                    case EMPTY:
+                        break;
+                    case SINGLE_VALUE:
+                        if (!other.bitmap.contains(this.singleValue)) {
+                            clear();
+                        }
+                        break;
+                    case BITMAP_VALUE:
+                        this.bitmap.and(other.bitmap);
+                        convertToSmallerType();
+                        break;
+                }
+                break;
+        }
+    }
+
+    // In-place bitwise OR (union) operation. The current bitmap is modified.
+    public void or(BitmapValue other) {
+        switch (other.bitmapType) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+            case EMPTY:
+                break;
+            case SINGLE_VALUE:
+                add(other.singleValue);
+                break;
+            case BITMAP_VALUE:
+                switch (this.bitmapType) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+                    case EMPTY:
+                        // deep copy the bitmap in case of multi-rollups update the bitmap repeatedly
+                        this.bitmap = new Roaring64Map();
+                        this.bitmap.or(other.bitmap);
+                        this.bitmapType = BITMAP_VALUE;
+                        break;
+                    case SINGLE_VALUE:
+                        this.bitmap = new Roaring64Map();
+                        this.bitmap.or(other.bitmap);
+                        this.bitmap.add(this.singleValue);
+                        this.bitmapType = BITMAP_VALUE;
+                        break;
+                    case BITMAP_VALUE:
+                        this.bitmap.or(other.bitmap);
+                        break;
+                }
+                break;
+        }
+    }
+
+    public void remove(long value) {
+        switch (this.bitmapType) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+            case EMPTY:
+                break;
+            case SINGLE_VALUE:
+                if (this.singleValue == value) {
+                    clear();
+                }
+                break;
+            case BITMAP_VALUE:
+                this.bitmap.removeLong(value);
+                convertToSmallerType();
+                break;
+        }
+    }
+
+    // In-place bitwise ANDNOT (difference) operation. The current bitmap is modified
+    public void not(BitmapValue other) {
+        switch (other.bitmapType) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+            case EMPTY:
+                break;
+            case SINGLE_VALUE:
+                remove(other.singleValue);
+                break;
+            case BITMAP_VALUE:
+                switch (this.bitmapType) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+                    case EMPTY:
+                        break;
+                    case SINGLE_VALUE:
+                        if (other.bitmap.contains(this.singleValue)) {
+                            clear();
+                        }
+                        break;
+                    case BITMAP_VALUE:
+                        this.bitmap.andNot(other.bitmap);
+                        convertToSmallerType();
+                        break;
+                }
+                break;
+        }
+    }
+
+    // In-place bitwise XOR (symmetric difference) operation. The current bitmap is modified
+    public void xor(BitmapValue other) {
+        switch (other.bitmapType) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+            case EMPTY:
+                break;
+            case SINGLE_VALUE:
+                switch (this.bitmapType) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+                    case EMPTY:
+                        add(other.singleValue);
+                        break;
+                    case SINGLE_VALUE:
+                        if (this.singleValue != other.singleValue) {
+                            add(other.singleValue);
+                        } else {
+                            clear();
+                        }
+                        break;
+                    case BITMAP_VALUE:
+                        if (!this.bitmap.contains(other.singleValue)) {
+                            this.bitmap.add(other.singleValue);
+                        } else {
+                            this.bitmap.removeLong(other.singleValue);
+                            convertToSmallerType();
+                        }
+                        break;
+                }
+                break;
+            case BITMAP_VALUE:
+                switch (this.bitmapType) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+                    case EMPTY:
+                        this.bitmap = other.bitmap;
+                        this.bitmapType = BITMAP_VALUE;
+                        break;
+                    case SINGLE_VALUE:
+                        this.bitmap = other.bitmap;
+                        this.bitmapType = BITMAP_VALUE;
+                        if (this.bitmap.contains(this.singleValue)) {
+                            this.bitmap.removeLong(this.singleValue);
+                        } else {
+                            this.bitmap.add(this.bitmapType);
+                        }
+                        break;
+                    case BITMAP_VALUE:
+                        this.bitmap.xor(other.bitmap);
+                        convertToSmallerType();
+                        break;
+                }
+                break;
+        }
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        if (other == null || !(other instanceof BitmapValue)) {
+            return false;
+        }
+        boolean ret = false;
+        if (this.bitmapType != ((BitmapValue) other).bitmapType) {
+            return false;
+        }
+        switch (((BitmapValue) other).bitmapType) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+            case EMPTY:
+                ret = true;
+                break;
+            case SINGLE_VALUE:
+                ret = this.singleValue == ((BitmapValue) other).singleValue;
+                break;
+            case BITMAP_VALUE:
+                ret = bitmap.equals(((BitmapValue) other).bitmap);
+        }
+        return ret;
+    }
+
+    /**
+     *  usage note:
+     *      now getSizeInBytes is different from be' impl
+     *      The reason is that java's roaring didn't implement method #shrinkToFit but be's getSizeInBytes need it
+     *      Implementing java's shrinkToFit means refactor roaring whose fields are all unaccess in Doris Fe's package
+     *      That would be an another big project
+     */
+    // TODO(wb): keep getSizeInBytes consistent with be and refactor roaring
+    public long getSizeInBytes() {
+        long size = 0;
+        switch (bitmapType) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+            case EMPTY:
+                size = 1;
+                break;
+            case SINGLE_VALUE:
+                if (isLongValue32bitEnough(singleValue)) {
+                    size = 1 + 4;
+                } else {
+                    size = 1 + 8;
+                }
+                break;
+            case BITMAP_VALUE:
+                size = 1 + bitmap.getSizeInBytes();
+        }
+        return size;
+    }
+
+    @Override
+    public String toString() {
+        String toStringStr = "{}";
+        switch (bitmapType) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+            case EMPTY:
+                break;
+            case SINGLE_VALUE:
+                toStringStr = String.format("{%s}", singleValue);
+                break;
+            case BITMAP_VALUE:
+                toStringStr = this.bitmap.toString();
+                break;
+        }
+        return toStringStr;
+    }
+
+    public void clear() {
+        this.bitmapType = EMPTY;
+        this.singleValue = -1;
+        this.bitmap = null;
+    }
+
+    private void convertToSmallerType() {
+        if (bitmapType == BITMAP_VALUE) {
+            if (bitmap.getLongCardinality() == 0) {
+                this.bitmap = null;
+                this.bitmapType = EMPTY;
+            } else if (bitmap.getLongCardinality() == 1) {
+                this.singleValue = bitmap.select(0);
+                this.bitmapType = SINGLE_VALUE;
+                this.bitmap = null;
+            }
+        }
+    }
+
+    private boolean isLongValue32bitEnough(long value) {
+        return value <= UNSIGNED_32BIT_INT_MAX_VALUE;
+    }
+
+    // just for ut
+    public int getBitmapType() {
+        return bitmapType;
+    }
+
+    // just for ut
+    public boolean is32BitsEnough() {
+        switch (bitmapType) {
+            case EMPTY:
+                return true;
+            case SINGLE_VALUE:
+                return isLongValue32bitEnough(singleValue);
+            case BITMAP_VALUE:
+                return bitmap.is32BitsEnough();
+            default:
+                return false;
+        }
+    }
+}
diff --git a/spark-load/spark-load-common/src/main/java/org/apache/doris/common/io/Codec.java b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/io/Codec.java
new file mode 100644
index 00000000..3c57a0f1
--- /dev/null
+++ b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/io/Codec.java
@@ -0,0 +1,59 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+/**
+ * Copied from Apache Doris
+ */
+public class Codec {
+
+    // not support encode negative value now
+    public static void encodeVarint64(long source, DataOutput out) throws IOException {
+        assert source >= 0;
+        short B = 128; // CHECKSTYLE IGNORE THIS LINE
+
+        while (source >= B) {
+            out.write((int) (source & (B - 1) | B));
+            source = source >> 7;
+        }
+        out.write((int) (source & (B - 1)));
+    }
+
+    // not support decode negative value now
+    public static long decodeVarint64(DataInput in) throws IOException {
+        long result = 0;
+        int shift = 0;
+        short B = 128; // CHECKSTYLE IGNORE THIS LINE
+
+        while (true) {
+            int oneByte = in.readUnsignedByte();
+            boolean isEnd = (oneByte & B) == 0;
+            result = result | ((long) (oneByte & B - 1) << (shift * 7));
+            if (isEnd) {
+                break;
+            }
+            shift++;
+        }
+
+        return result;
+    }
+}
diff --git a/spark-load/spark-load-common/src/main/java/org/apache/doris/common/io/Hll.java b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/io/Hll.java
new file mode 100644
index 00000000..a28ea1d8
--- /dev/null
+++ b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/io/Hll.java
@@ -0,0 +1,394 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.io;
+
+import org.apache.commons.codec.binary.StringUtils;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.math.BigInteger;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * Copied from Apache Doris
+ */
+public class Hll {
+
+    public static final byte HLL_DATA_EMPTY = 0;
+    public static final byte HLL_DATA_EXPLICIT = 1;
+    public static final byte HLL_DATA_SPARSE = 2;
+    public static final byte HLL_DATA_FULL = 3;
+
+    public static final int HLL_COLUMN_PRECISION = 14;
+    public static final int HLL_ZERO_COUNT_BITS = (64 - HLL_COLUMN_PRECISION);
+    public static final int HLL_EXPLICIT_INT64_NUM = 160;
+    public static final int HLL_SPARSE_THRESHOLD = 4096;
+    public static final int HLL_REGISTERS_COUNT = 16 * 1024;
+    public static final long M64 = 0xc6a4a7935bd1e995L;
+    public static final int R64 = 47;
+    public static final int SEED = 0xadc83b19;
+    private int type;
+    private Set<Long> hashSet;
+    private byte[] registers;
+
+    public Hll() {
+        type = HLL_DATA_EMPTY;
+        this.hashSet = new HashSet<>();
+    }
+
+    public static byte getLongTailZeroNum(long hashValue) {
+        if (hashValue == 0) {
+            return 0;
+        }
+        long value = 1L;
+        byte idx = 0;
+        for (; ; idx++) {
+            if ((value & hashValue) != 0) {
+                return idx;
+            }
+            value = value << 1;
+            if (idx == 62) {
+                break;
+            }
+        }
+        return idx;
+    }
+
+    private static long getLittleEndianLong(final byte[] data, final int index) {
+        return (((long) data[index] & 0xff))
+                | (((long) data[index + 1] & 0xff) << 8)
+                | (((long) data[index + 2] & 0xff) << 16)
+                | (((long) data[index + 3] & 0xff) << 24)
+                | (((long) data[index + 4] & 0xff) << 32)
+                | (((long) data[index + 5] & 0xff) << 40)
+                | (((long) data[index + 6] & 0xff) << 48)
+                | (((long) data[index + 7] & 0xff) << 56);
+    }
+
+    public static long hash64(final byte[] data, final int length, final int seed) {
+        long h = (seed & 0xffffffffL) ^ (length * M64);
+        final int nblocks = length >> 3;
+
+        // body
+        for (int i = 0; i < nblocks; i++) {
+            final int index = (i << 3);
+            long k = getLittleEndianLong(data, index);
+
+            k *= M64;
+            k ^= k >>> R64;
+            k *= M64;
+
+            h ^= k;
+            h *= M64;
+        }
+
+        final int index = (nblocks << 3);
+        switch (length - index) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+            case 7:
+                h ^= ((long) data[index + 6] & 0xff) << 48;
+            case 6: // CHECKSTYLE IGNORE THIS LINE: fall through
+                h ^= ((long) data[index + 5] & 0xff) << 40;
+            case 5: // CHECKSTYLE IGNORE THIS LINE: fall through
+                h ^= ((long) data[index + 4] & 0xff) << 32;
+            case 4: // CHECKSTYLE IGNORE THIS LINE: fall through
+                h ^= ((long) data[index + 3] & 0xff) << 24;
+            case 3: // CHECKSTYLE IGNORE THIS LINE: fall through
+                h ^= ((long) data[index + 2] & 0xff) << 16;
+            case 2: // CHECKSTYLE IGNORE THIS LINE: fall through
+                h ^= ((long) data[index + 1] & 0xff) << 8;
+            case 1: // CHECKSTYLE IGNORE THIS LINE: fall through
+                h ^= ((long) data[index] & 0xff);
+                h *= M64;
+        }
+
+        h ^= h >>> R64;
+        h *= M64;
+        h ^= h >>> R64;
+
+        return h;
+    }
+
+    private void convertExplicitToRegister() {
+        assert this.type == HLL_DATA_EXPLICIT;
+        registers = new byte[HLL_REGISTERS_COUNT];
+        for (Long value : hashSet) {
+            updateRegisters(value);
+        }
+        hashSet.clear();
+    }
+
+    private void updateRegisters(long hashValue) {
+        int idx;
+        // hash value less than zero means we get a unsigned long
+        // so need to transfer to BigInter to mod
+        if (hashValue < 0) {
+            BigInteger unint64HashValue = new BigInteger(Long.toUnsignedString(hashValue));
+            unint64HashValue = unint64HashValue.mod(new BigInteger(Long.toUnsignedString(HLL_REGISTERS_COUNT)));
+            idx = unint64HashValue.intValue();
+        } else {
+            idx = (int) (hashValue % HLL_REGISTERS_COUNT);
+        }
+
+        hashValue >>>= HLL_COLUMN_PRECISION;
+        hashValue |= (1L << HLL_ZERO_COUNT_BITS);
+        byte firstOneBit = (byte) (getLongTailZeroNum(hashValue) + 1);
+        registers[idx] = registers[idx] > firstOneBit ? registers[idx] : firstOneBit;
+    }
+
+    private void mergeRegisters(byte[] other) {
+        for (int i = 0; i < HLL_REGISTERS_COUNT; i++) {
+            this.registers[i] = this.registers[i] > other[i] ? this.registers[i] : other[i];
+        }
+    }
+
+    public void updateWithHash(Object value) {
+        byte[] v = StringUtils.getBytesUtf8(String.valueOf(value));
+        update(hash64(v, v.length, SEED));
+    }
+
+    public void update(long hashValue) {
+        switch (this.type) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+            case HLL_DATA_EMPTY:
+                hashSet.add(hashValue);
+                type = HLL_DATA_EXPLICIT;
+                break;
+            case HLL_DATA_EXPLICIT:
+                if (hashSet.size() < HLL_EXPLICIT_INT64_NUM) {
+                    hashSet.add(hashValue);
+                    break;
+                }
+                convertExplicitToRegister();
+                type = HLL_DATA_FULL;
+            case HLL_DATA_SPARSE: // CHECKSTYLE IGNORE THIS LINE: fall through
+            case HLL_DATA_FULL:
+                updateRegisters(hashValue);
+                break;
+        }
+    }
+
+    public void merge(Hll other) {
+        if (other.type == HLL_DATA_EMPTY) {
+            return;
+        }
+        switch (this.type) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+            case HLL_DATA_EMPTY:
+                this.type = other.type;
+                switch (other.type) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+                    case HLL_DATA_EXPLICIT:
+                        this.hashSet.addAll(other.hashSet);
+                        break;
+                    case HLL_DATA_SPARSE:
+                    case HLL_DATA_FULL:
+                        this.registers = new byte[HLL_REGISTERS_COUNT];
+                        System.arraycopy(other.registers, 0, this.registers, 0, HLL_REGISTERS_COUNT);
+                        break;
+                }
+                break;
+            case HLL_DATA_EXPLICIT:
+                switch (other.type) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+                    case HLL_DATA_EXPLICIT:
+                        this.hashSet.addAll(other.hashSet);
+                        if (this.hashSet.size() > HLL_EXPLICIT_INT64_NUM) {
+                            convertExplicitToRegister();
+                            this.type = HLL_DATA_FULL;
+                        }
+                        break;
+                    case HLL_DATA_SPARSE:
+                    case HLL_DATA_FULL:
+                        convertExplicitToRegister();
+                        mergeRegisters(other.registers);
+                        this.type = HLL_DATA_FULL;
+                        break;
+                }
+                break;
+            case HLL_DATA_SPARSE:
+            case HLL_DATA_FULL:
+                switch (other.type) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+                    case HLL_DATA_EXPLICIT:
+                        for (long value : other.hashSet) {
+                            update(value);
+                        }
+                        break;
+                    case HLL_DATA_SPARSE:
+                    case HLL_DATA_FULL:
+                        mergeRegisters(other.registers);
+                        break;
+                }
+                break;
+        }
+    }
+
+    public void serialize(DataOutput output) throws IOException {
+        switch (type) { // CHECKSTYLE IGNORE THIS LINE: missing switch default
+            case HLL_DATA_EMPTY:
+                output.writeByte(type);
+                break;
+            case HLL_DATA_EXPLICIT:
+                output.writeByte(type);
+                output.writeByte(hashSet.size());
+                for (long value : hashSet) {
+                    output.writeLong(Long.reverseBytes(value));
+                }
+                break;
+            case HLL_DATA_SPARSE:
+            case HLL_DATA_FULL:
+                int nonZeroRegisterNum = 0;
+                for (int i = 0; i < HLL_REGISTERS_COUNT; i++) {
+                    if (registers[i] != 0) {
+                        nonZeroRegisterNum++;
+                    }
+                }
+                if (nonZeroRegisterNum > HLL_SPARSE_THRESHOLD) {
+                    output.writeByte(HLL_DATA_FULL);
+                    for (byte value : registers) {
+                        output.writeByte(value);
+                    }
+                } else {
+                    output.writeByte(HLL_DATA_SPARSE);
+                    output.writeInt(Integer.reverseBytes(nonZeroRegisterNum));
+                    for (int i = 0; i < HLL_REGISTERS_COUNT; i++) {
+                        if (registers[i] != 0) {
+                            output.writeShort(Short.reverseBytes((short) i));
+                            output.writeByte(registers[i]);
+                        }
+                    }
+                }
+                break;
+        }
+    }
+
+    public boolean deserialize(DataInput input) throws IOException {
+        assert type == HLL_DATA_EMPTY;
+
+        if (input == null) {
+            return false;
+        }
+
+        this.type = input.readByte();
+        switch (this.type) {
+            case HLL_DATA_EMPTY:
+                break;
+            case HLL_DATA_EXPLICIT:
+                int hashSetSize = input.readUnsignedByte();
+                for (int i = 0; i < hashSetSize; i++) {
+                    update(Long.reverseBytes(input.readLong()));
+                }
+                assert this.type == HLL_DATA_EXPLICIT;
+                break;
+            case HLL_DATA_SPARSE:
+                int sparseDataSize = Integer.reverseBytes(input.readInt());
+                this.registers = new byte[HLL_REGISTERS_COUNT];
+                for (int i = 0; i < sparseDataSize; i++) {
+                    int idx = Short.reverseBytes(input.readShort());
+                    byte value = input.readByte();
+                    registers[idx] = value;
+                }
+                break;
+            case HLL_DATA_FULL:
+                this.registers = new byte[HLL_REGISTERS_COUNT];
+                for (int i = 0; i < HLL_REGISTERS_COUNT; i++) {
+                    registers[i] = input.readByte();
+                }
+                break;
+            default:
+                return false;
+        }
+
+        return true;
+    }
+
+    // use strictfp to force java follow IEEE 754 to deal float point strictly
+    public strictfp long estimateCardinality() {
+        if (type == HLL_DATA_EMPTY) {
+            return 0;
+        }
+        if (type == HLL_DATA_EXPLICIT) {
+            return hashSet.size();
+        }
+
+        int numStreams = HLL_REGISTERS_COUNT;
+        float alpha = 0;
+
+        if (numStreams == 16) {
+            alpha = 0.673f;
+        } else if (numStreams == 32) {
+            alpha = 0.697f;
+        } else if (numStreams == 64) {
+            alpha = 0.709f;
+        } else {
+            alpha = 0.7213f / (1 + 1.079f / numStreams);
+        }
+
+        float harmonicMean = 0;
+        int numZeroRegisters = 0;
+
+        for (int i = 0; i < HLL_REGISTERS_COUNT; i++) {
+            harmonicMean += Math.pow(2.0f, -registers[i]);
+
+            if (registers[i] == 0) {
+                numZeroRegisters++;
+            }
+        }
+
+        harmonicMean = 1.0f / harmonicMean;
+        double estimate = alpha * numStreams * numStreams * harmonicMean;
+
+        if (estimate <= numStreams * 2.5 && numZeroRegisters != 0) {
+            estimate = numStreams * Math.log(((float) numStreams) / ((float) numZeroRegisters));
+        } else if (numStreams == 16384 && estimate < 72000) {
+            double bias = 5.9119 * 1.0e-18 * (estimate * estimate * estimate * estimate)
+                    - 1.4253 * 1.0e-12 * (estimate * estimate * estimate)
+                    + 1.2940 * 1.0e-7 * (estimate * estimate)
+                    - 5.2921 * 1.0e-3 * estimate
+                    + 83.3216;
+            estimate -= estimate * (bias / 100);
+        }
+
+        return (long) (estimate + 0.5);
+    }
+
+    public int maxSerializedSize() {
+        switch (type) {
+            case HLL_DATA_EMPTY:
+            default:
+                return 1;
+            case HLL_DATA_EXPLICIT:
+                return 2 + hashSet.size() * 8;
+            case HLL_DATA_SPARSE:
+            case HLL_DATA_FULL:
+                return 1 + HLL_REGISTERS_COUNT;
+        }
+    }
+
+    // just for ut
+    public int getType() {
+        return type;
+    }
+
+    // For convert to statistics used Hll128
+    public byte[] getRegisters() {
+        return registers;
+    }
+
+    // For convert to statistics used Hll128
+    public Set<Long> getHashSet() {
+        return hashSet;
+    }
+}
diff --git a/spark-load/spark-load-common/src/main/java/org/apache/doris/common/io/Roaring64Map.java b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/io/Roaring64Map.java
new file mode 100644
index 00000000..33237983
--- /dev/null
+++ b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/io/Roaring64Map.java
@@ -0,0 +1,1432 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.io;
+
+import org.roaringbitmap.BitmapDataProvider;
+import org.roaringbitmap.BitmapDataProviderSupplier;
+import org.roaringbitmap.IntConsumer;
+import org.roaringbitmap.IntIterator;
+import org.roaringbitmap.InvalidRoaringFormat;
+import org.roaringbitmap.RoaringBitmap;
+import org.roaringbitmap.RoaringBitmapSupplier;
+import org.roaringbitmap.Util;
+import org.roaringbitmap.buffer.MutableRoaringBitmap;
+import org.roaringbitmap.longlong.ImmutableLongBitmapDataProvider;
+import org.roaringbitmap.longlong.LongConsumer;
+import org.roaringbitmap.longlong.LongIterator;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.math.BigInteger;
+import java.util.AbstractMap;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.Objects;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+/**
+ * Copied from Apache Doris
+ */
+public class Roaring64Map {
+
+    private static final boolean DEFAULT_ORDER_IS_SIGNED = false;
+    private static final boolean DEFAULT_CARDINALITIES_ARE_CACHED = true;
+    /**
+     * the constant 2^64
+     */
+    private static final BigInteger TWO_64 = BigInteger.ONE.shiftLeft(64);
+    // Not final to enable initialization in Externalizable.readObject
+    private NavigableMap<Integer, BitmapDataProvider> highToBitmap;
+    // If true, we handle longs a plain java longs: -1 if right before 0
+    // If false, we handle longs as unsigned longs: 0 has no predecessor and Long.MAX_VALUE + 1L is
+    // expressed as a
+    // negative long
+    private boolean signedLongs = false;
+    private BitmapDataProviderSupplier supplier;
+    // By default, we cache cardinalities
+    private transient boolean doCacheCardinalities = true;
+    // Prevent recomputing all cardinalities when requesting consecutive ranks
+    private transient int firstHighNotValid = highestHigh() + 1;
+    // This boolean needs firstHighNotValid == Integer.MAX_VALUE to be allowed to be true
+    // If false, it means nearly all cumulated cardinalities are valid, except high=Integer.MAX_VALUE
+    // If true, it means all cumulated cardinalities are valid, even high=Integer.MAX_VALUE
+    private transient boolean allValid = false;
+    // TODO: I would prefer not managing arrays myself
+    private transient long[] sortedCumulatedCardinality = new long[0];
+    private transient int[] sortedHighs = new int[0];
+    // We guess consecutive .addLong will be on proximate longs: we remember the bitmap attached to
+    // this bucket in order
+    // to skip the indirection
+    private transient Map.Entry<Integer, BitmapDataProvider> latestAddedHigh = null;
+
+    /**
+     * By default, we consider longs are unsigned longs: normal longs: 0 is the lowest possible long.
+     * Long.MAX_VALUE is followed by Long.MIN_VALUE. -1L is the highest possible value
+     */
+    public Roaring64Map() {
+        this(DEFAULT_ORDER_IS_SIGNED);
+    }
+
+    /**
+     * By default, use RoaringBitmap as underlyings {@link BitmapDataProvider}
+     *
+     * @param signedLongs true if longs has to be ordered as plain java longs. False to handle them as
+     *                    unsigned 64bits long (as RoaringBitmap with unsigned integers)
+     */
+    public Roaring64Map(boolean signedLongs) {
+        this(signedLongs, DEFAULT_CARDINALITIES_ARE_CACHED);
+    }
+
+    /**
+     * By default, use RoaringBitmap as underlyings {@link BitmapDataProvider}
+     *
+     * @param signedLongs        true if longs has to be ordered as plain java longs. False to handle them as
+     *                           unsigned 64bits long (as RoaringBitmap with unsigned integers)
+     * @param cacheCardinalities true if cardinalities have to be cached. It will prevent many
+     *                           iteration along the NavigableMap
+     */
+    public Roaring64Map(boolean signedLongs, boolean cacheCardinalities) {
+        this(signedLongs, cacheCardinalities, new RoaringBitmapSupplier());
+    }
+
+    /**
+     * By default, longs are managed as unsigned longs and cardinalities are cached.
+     *
+     * @param supplier provide the logic to instantiate new {@link BitmapDataProvider}, typically
+     *                 instantiated once per high.
+     */
+    public Roaring64Map(BitmapDataProviderSupplier supplier) {
+        this(DEFAULT_ORDER_IS_SIGNED, DEFAULT_CARDINALITIES_ARE_CACHED, supplier);
+    }
+
+    /**
+     * By default, we activating cardinalities caching.
+     *
+     * @param signedLongs true if longs has to be ordered as plain java longs. False to handle them as
+     *                    unsigned 64bits long (as RoaringBitmap with unsigned integers)
+     * @param supplier    provide the logic to instantiate new {@link BitmapDataProvider}, typically
+     *                    instantiated once per high.
+     */
+    public Roaring64Map(boolean signedLongs, BitmapDataProviderSupplier supplier) {
+        this(signedLongs, DEFAULT_CARDINALITIES_ARE_CACHED, supplier);
+    }
+
+    /**
+     * @param signedLongs        true if longs has to be ordered as plain java longs. False to handle them as
+     *                           unsigned 64bits long (as RoaringBitmap with unsigned integers)
+     * @param cacheCardinalities true if cardinalities have to be cached. It will prevent many
+     *                           iteration along the NavigableMap
+     * @param supplier           provide the logic to instantiate new {@link BitmapDataProvider}, typically
+     *                           instantiated once per high.
+     */
+    public Roaring64Map(boolean signedLongs, boolean cacheCardinalities,
+                        BitmapDataProviderSupplier supplier) {
+        this.signedLongs = signedLongs;
+        this.supplier = supplier;
+
+        if (signedLongs) {
+            highToBitmap = new TreeMap<>();
+        } else {
+            highToBitmap = new TreeMap<>(unsignedComparator());
+        }
+
+        this.doCacheCardinalities = cacheCardinalities;
+        resetPerfHelpers();
+    }
+
+    // From Arrays.binarySearch (Comparator). Check with org.roaringbitmap.Util.unsignedBinarySearch
+    private static int unsignedBinarySearch(int[] a, int fromIndex, int toIndex, int key,
+                                            Comparator<? super Integer> c) {
+        int low = fromIndex;
+        int high = toIndex - 1;
+
+        while (low <= high) {
+            int mid = (low + high) >>> 1;
+            int midVal = a[mid];
+            int cmp = c.compare(midVal, key);
+            if (cmp < 0) {
+                low = mid + 1;
+            } else if (cmp > 0) {
+                high = mid - 1;
+            } else {
+                return mid; // key found
+            }
+        }
+        return -(low + 1); // key not found.
+    }
+
+    /**
+     * Generate a bitmap with the specified values set to true. The provided longs values don't have
+     * to be in sorted order, but it may be preferable to sort them from a performance point of view.
+     *
+     * @param dat set values
+     * @return a new bitmap
+     */
+    public static Roaring64Map bitmapOf(final long... dat) {
+        final Roaring64Map ans = new Roaring64Map();
+        ans.add(dat);
+        return ans;
+    }
+
+    /**
+     * @param id any long, positive or negative
+     * @return an int holding the 32 highest order bits of information of the input long
+     */
+    public static int high(long id) {
+        return (int) (id >> 32);
+    }
+
+    /**
+     * @param id any long, positive or negative
+     * @return an int holding the 32 lowest order bits of information of the input long
+     */
+    public static int low(long id) {
+        return (int) id;
+    }
+
+    /**
+     * @param high an integer representing the highest order bits of the output long
+     * @param low  an integer representing the lowest order bits of the output long
+     * @return a long packing together the integers as computed by
+     * {@link #high(long)} and {@link #low(long)}
+     */
+    // https://stackoverflow.com/questions/12772939/java-storing-two-ints-in-a-long
+    public static long pack(int high, int low) {
+        return (((long) high) << 32) | (low & 0xffffffffL);
+    }
+
+    /**
+     * @param signedLongs true if long put in a {@link Roaring64Map} should be considered as
+     *                    signed long.
+     * @return the int representing the highest value which can be set as high value in a
+     */
+    public static int highestHigh(boolean signedLongs) {
+        if (signedLongs) {
+            return Integer.MAX_VALUE;
+        } else {
+            return -1;
+        }
+    }
+
+    /**
+     * @return A comparator for unsigned longs: a negative long is a long greater than Long.MAX_VALUE
+     */
+    public static Comparator<Integer> unsignedComparator() {
+        return new Comparator<Integer>() {
+
+            @Override
+            public int compare(Integer o1, Integer o2) {
+                return compareUnsigned(o1, o2);
+            }
+        };
+    }
+
+    /**
+     * Compares two {@code int} values numerically treating the values as unsigned.
+     *
+     * @param x the first {@code int} to compare
+     * @param y the second {@code int} to compare
+     * @return the value {@code 0} if {@code x == y}; a value less than {@code 0} if {@code x < y} as
+     * unsigned values; and a value greater than {@code 0} if {@code x > y} as unsigned values
+     * @since 1.8
+     */
+    // Duplicated from jdk8 Integer.compareUnsigned
+    public static int compareUnsigned(int x, int y) {
+        return Integer.compare(x + Integer.MIN_VALUE, y + Integer.MIN_VALUE);
+    }
+
+    /**
+     * JDK8 Long.toUnsignedString was too complex to backport. Go for a slow version relying on
+     * BigInteger
+     */
+    // https://stackoverflow.com/questions/7031198/java-signed-long-to-unsigned-long-string
+    static String toUnsignedString(long l) {
+        BigInteger b = BigInteger.valueOf(l);
+        if (b.signum() < 0) {
+            b = b.add(TWO_64);
+        }
+        return b.toString();
+    }
+
+    private void resetPerfHelpers() {
+        firstHighNotValid = highestHigh(signedLongs) + 1;
+        allValid = false;
+
+        sortedCumulatedCardinality = new long[0];
+        sortedHighs = new int[0];
+
+        latestAddedHigh = null;
+    }
+
+    // Package-friendly: for the sake of unit-testing
+    // @VisibleForTesting
+    NavigableMap<Integer, BitmapDataProvider> getHighToBitmap() {
+        return highToBitmap;
+    }
+
+    // Package-friendly: for the sake of unit-testing
+    // @VisibleForTesting
+    int getLowestInvalidHigh() {
+        return firstHighNotValid;
+    }
+
+    // Package-friendly: for the sake of unit-testing
+    // @VisibleForTesting
+    long[] getSortedCumulatedCardinality() {
+        return sortedCumulatedCardinality;
+    }
+
+    /**
+     * Add the value to the container (set the value to "true"), whether it already appears or not.
+     * <p>
+     * Java lacks native unsigned longs but the x argument is considered to be unsigned. Within
+     * bitmaps, numbers are ordered according to {@link Long#compareUnsigned}. We order the numbers
+     * like 0, 1, ..., 9223372036854775807, -9223372036854775808, -9223372036854775807,..., -1.
+     *
+     * @param x long value
+     */
+    public void addLong(long x) {
+        int high = high(x);
+        int low = low(x);
+
+        // Copy the reference to prevent race-condition
+        Map.Entry<Integer, BitmapDataProvider> local = latestAddedHigh;
+
+        BitmapDataProvider bitmap;
+        if (local != null && local.getKey().intValue() == high) {
+            bitmap = local.getValue();
+        } else {
+            bitmap = highToBitmap.get(high);
+            if (bitmap == null) {
+                bitmap = newRoaringBitmap();
+                pushBitmapForHigh(high, bitmap);
+            }
+            latestAddedHigh = new AbstractMap.SimpleImmutableEntry<>(high, bitmap);
+        }
+        bitmap.add(low);
+
+        invalidateAboveHigh(high);
+    }
+
+    /**
+     * Add the integer value to the container (set the value to "true"), whether it already appears or
+     * not.
+     * <p>
+     * Javac lacks native unsigned integers but the x argument is considered to be unsigned. Within
+     * bitmaps, numbers are ordered according to {@link Integer#compareUnsigned}. We order the numbers
+     * like 0, 1, ..., 2147483647, -2147483648, -2147483647,..., -1.
+     *
+     * @param x integer value
+     */
+    public void addInt(int x) {
+        addLong(Util.toUnsignedLong(x));
+    }
+
+    private BitmapDataProvider newRoaringBitmap() {
+        return supplier.newEmpty();
+    }
+
+    private void invalidateAboveHigh(int high) {
+        // The cardinalities after this bucket may not be valid anymore
+        if (compare(firstHighNotValid, high) > 0) {
+            // High was valid up to now
+            firstHighNotValid = high;
+
+            int indexNotValid = binarySearch(sortedHighs, firstHighNotValid);
+
+            final int indexAfterWhichToReset;
+            if (indexNotValid >= 0) {
+                indexAfterWhichToReset = indexNotValid;
+            } else {
+                // We have invalidate a high not already present: added a value for a brand new high
+                indexAfterWhichToReset = -indexNotValid - 1;
+            }
+
+            // This way, sortedHighs remains sorted, without making a new/shorter array
+            Arrays.fill(sortedHighs, indexAfterWhichToReset, sortedHighs.length, highestHigh());
+        }
+        allValid = false;
+    }
+
+    private int compare(int x, int y) {
+        if (signedLongs) {
+            return Integer.compare(x, y);
+        } else {
+            return compareUnsigned(x, y);
+        }
+    }
+
+    private void pushBitmapForHigh(int high, BitmapDataProvider bitmap) {
+        // TODO .size is too slow
+        // int nbHighBefore = highToBitmap.headMap(high).size();
+
+        BitmapDataProvider previous = highToBitmap.put(high, bitmap);
+        assert previous == null : "Should push only not-existing high";
+    }
+
+    /**
+     * Returns the number of distinct integers added to the bitmap (e.g., number of bits set).
+     *
+     * @return the cardinality
+     */
+    public long getLongCardinality() {
+        if (doCacheCardinalities) {
+            if (highToBitmap.isEmpty()) {
+                return 0L;
+            }
+            int indexOk = ensureCumulatives(highestHigh());
+
+            // ensureCumulatives may have removed empty bitmaps
+            if (highToBitmap.isEmpty()) {
+                return 0L;
+            }
+
+
+            return sortedCumulatedCardinality[indexOk - 1];
+        } else {
+            long cardinality = 0L;
+            for (BitmapDataProvider bitmap : highToBitmap.values()) {
+                cardinality += bitmap.getLongCardinality();
+            }
+            return cardinality;
+        }
+    }
+
+    /**
+     * @return the cardinality as an int
+     * @throws UnsupportedOperationException if the cardinality does not fit in an int
+     */
+    public int getIntCardinality() throws UnsupportedOperationException {
+        long cardinality = getLongCardinality();
+
+        if (cardinality > Integer.MAX_VALUE) {
+            // TODO: we should handle cardinality fitting in an unsigned int
+            throw new UnsupportedOperationException(
+                    "Can not call .getIntCardinality as the cardinality is bigger than Integer.MAX_VALUE");
+        }
+
+        return (int) cardinality;
+    }
+
+    /**
+     * Return the jth value stored in this bitmap.
+     *
+     * @param j index of the value
+     * @return the value
+     * @throws IllegalArgumentException if j is out of the bounds of the bitmap cardinality
+     */
+    public long select(final long j) throws IllegalArgumentException {
+        if (!doCacheCardinalities) {
+            return selectNoCache(j);
+        }
+
+        // Ensure all cumulatives as we we have straightforward way to know in advance the high of the
+        // j-th value
+        int indexOk = ensureCumulatives(highestHigh());
+
+        if (highToBitmap.isEmpty()) {
+            return throwSelectInvalidIndex(j);
+        }
+
+        // Use normal binarySearch as cardinality does not depends on considering longs signed or
+        // unsigned
+        // We need sortedCumulatedCardinality not to contain duplicated, else binarySearch may return
+        // any of the duplicates: we need to ensure it holds no high associated to an empty bitmap
+        int position = Arrays.binarySearch(sortedCumulatedCardinality, 0, indexOk, j);
+
+        if (position >= 0) {
+            if (position == indexOk - 1) {
+                // .select has been called on this.getCardinality
+                return throwSelectInvalidIndex(j);
+            }
+
+            // There is a bucket leading to this cardinality: the j-th element is the first element of
+            // next bucket
+            int high = sortedHighs[position + 1];
+            BitmapDataProvider nextBitmap = highToBitmap.get(high);
+            return pack(high, nextBitmap.select(0));
+        } else {
+            // There is no bucket with this cardinality
+            int insertionPoint = -position - 1;
+
+            final long previousBucketCardinality;
+            if (insertionPoint == 0) {
+                previousBucketCardinality = 0L;
+            } else if (insertionPoint >= indexOk) {
+                return throwSelectInvalidIndex(j);
+            } else {
+                previousBucketCardinality = sortedCumulatedCardinality[insertionPoint - 1];
+            }
+
+            // We get a 'select' query for a single bitmap: should fit in an int
+            final int givenBitmapSelect = (int) (j - previousBucketCardinality);
+
+            int high = sortedHighs[insertionPoint];
+            BitmapDataProvider lowBitmap = highToBitmap.get(high);
+            int low = lowBitmap.select(givenBitmapSelect);
+
+            return pack(high, low);
+        }
+    }
+
+    // For benchmarks: compute without using cardinalities cache
+    // https://github.com/RoaringBitmap/CRoaring/blob/master/cpp/roaring64map.hh
+    private long selectNoCache(long j) {
+        long left = j;
+
+        for (Map.Entry<Integer, BitmapDataProvider> entry : highToBitmap.entrySet()) {
+            long lowCardinality = entry.getValue().getCardinality();
+
+            if (left >= lowCardinality) {
+                left -= lowCardinality;
+            } else {
+                // It is legit for left to be negative
+                int leftAsUnsignedInt = (int) left;
+                return pack(entry.getKey(), entry.getValue().select(leftAsUnsignedInt));
+            }
+        }
+
+        return throwSelectInvalidIndex(j);
+    }
+
+    private long throwSelectInvalidIndex(long j) {
+        // see org.roaringbitmap.buffer.ImmutableRoaringBitmap.select(int)
+        throw new IllegalArgumentException(
+                "select " + j + " when the cardinality is " + this.getLongCardinality());
+    }
+
+    /**
+     * For better performance, consider the Use the {@link #forEach forEach} method.
+     *
+     * @return a custom iterator over set bits, the bits are traversed in ascending sorted order
+     */
+    public Iterator<Long> iterator() {
+        final LongIterator it = getLongIterator();
+
+        return new Iterator<Long>() {
+
+            @Override
+            public boolean hasNext() {
+                return it.hasNext();
+            }
+
+            @Override
+            public Long next() {
+                return it.next();
+            }
+
+            @Override
+            public void remove() {
+                // TODO?
+                throw new UnsupportedOperationException();
+            }
+        };
+    }
+
+    public void forEach(final LongConsumer lc) {
+        for (final Map.Entry<Integer, BitmapDataProvider> highEntry : highToBitmap.entrySet()) {
+            highEntry.getValue().forEach(new IntConsumer() {
+
+                @Override
+                public void accept(int low) {
+                    lc.accept(pack(highEntry.getKey(), low));
+                }
+            });
+        }
+    }
+
+    public long rankLong(long id) {
+        int high = high(id);
+        int low = low(id);
+
+        if (!doCacheCardinalities) {
+            return rankLongNoCache(high, low);
+        }
+
+        int indexOk = ensureCumulatives(high);
+
+        int highPosition = binarySearch(sortedHighs, 0, indexOk, high);
+
+        if (highPosition >= 0) {
+            // There is a bucket holding this item
+
+            final long previousBucketCardinality;
+            if (highPosition == 0) {
+                previousBucketCardinality = 0;
+            } else {
+                previousBucketCardinality = sortedCumulatedCardinality[highPosition - 1];
+            }
+
+            BitmapDataProvider lowBitmap = highToBitmap.get(sortedHighs[highPosition]);
+
+            // Rank is previous cardinality plus rank in current bitmap
+            return previousBucketCardinality + lowBitmap.rankLong(low);
+        } else {
+            // There is no bucket holding this item: insertionPoint is previous bitmap
+            int insertionPoint = -highPosition - 1;
+
+            if (insertionPoint == 0) {
+                // this key is before all inserted keys
+                return 0;
+            } else {
+                // The rank is the cardinality of this previous bitmap
+                return sortedCumulatedCardinality[insertionPoint - 1];
+            }
+        }
+    }
+
+    // https://github.com/RoaringBitmap/CRoaring/blob/master/cpp/roaring64map.hh
+    private long rankLongNoCache(int high, int low) {
+        long result = 0L;
+
+        BitmapDataProvider lastBitmap = highToBitmap.get(high);
+        if (lastBitmap == null) {
+            // There is no value with same high: the rank is a sum of cardinalities
+            for (Map.Entry<Integer, BitmapDataProvider> bitmap : highToBitmap.entrySet()) {
+                if (bitmap.getKey().intValue() > high) {
+                    break;
+                } else {
+                    result += bitmap.getValue().getLongCardinality();
+                }
+            }
+        } else {
+            for (BitmapDataProvider bitmap : highToBitmap.values()) {
+                if (bitmap == lastBitmap) {
+                    result += bitmap.rankLong(low);
+                    break;
+                } else {
+                    result += bitmap.getLongCardinality();
+                }
+            }
+        }
+
+        return result;
+    }
+
+    /**
+     * @param high for which high bucket should we compute the cardinality
+     * @return the highest validatedIndex
+     */
+    protected int ensureCumulatives(int high) {
+        if (allValid) {
+            // the whole array is valid (up-to its actual length, not its capacity)
+            return highToBitmap.size();
+        } else if (compare(high, firstHighNotValid) < 0) {
+            // The high is strictly below the first not valid: it is valid
+
+            // sortedHighs may have only a subset of valid values on the right. However, these invalid
+            // values have been set to maxValue, and we are here as high < firstHighNotValid ==> high <
+            // maxHigh()
+            int position = binarySearch(sortedHighs, high);
+
+            if (position >= 0) {
+                // This high has a bitmap: +1 as this index will be used as right (excluded) bound in a
+                // binary-search
+                return position + 1;
+            } else {
+                // This high has no bitmap: it could be between 2 highs with bitmaps
+                int insertionPosition = -position - 1;
+                return insertionPosition;
+            }
+        } else {
+
+            // For each deprecated buckets
+            SortedMap<Integer, BitmapDataProvider> tailMap =
+                    highToBitmap.tailMap(firstHighNotValid, true);
+
+            // TODO .size on tailMap make an iterator: arg
+            int indexOk = highToBitmap.size() - tailMap.size();
+
+            // TODO: It should be possible to compute indexOk based on sortedHighs array
+            // assert indexOk == binarySearch(sortedHighs, firstHighNotValid);
+
+            Iterator<Map.Entry<Integer, BitmapDataProvider>> it = tailMap.entrySet().iterator();
+            while (it.hasNext()) {
+                Map.Entry<Integer, BitmapDataProvider> e = it.next();
+                int currentHigh = e.getKey();
+
+                if (compare(currentHigh, high) > 0) {
+                    // No need to compute more than needed
+                    break;
+                } else if (e.getValue().isEmpty()) {
+                    // highToBitmap can not be modified as we iterate over it
+                    if (latestAddedHigh != null && latestAddedHigh.getKey().intValue() == currentHigh) {
+                        // Dismiss the cached bitmap as it is removed from the NavigableMap
+                        latestAddedHigh = null;
+                    }
+                    it.remove();
+                } else {
+                    ensureOne(e, currentHigh, indexOk);
+
+                    // We have added one valid cardinality
+                    indexOk++;
+                }
+
+            }
+
+            if (highToBitmap.isEmpty() || indexOk == highToBitmap.size()) {
+                // We have compute all cardinalities
+                allValid = true;
+            }
+
+            return indexOk;
+        }
+    }
+
+    private int binarySearch(int[] array, int key) {
+        if (signedLongs) {
+            return Arrays.binarySearch(array, key);
+        } else {
+            return unsignedBinarySearch(array, 0, array.length, key,
+                    unsignedComparator());
+        }
+    }
+
+    private int binarySearch(int[] array, int from, int to, int key) {
+        if (signedLongs) {
+            return Arrays.binarySearch(array, from, to, key);
+        } else {
+            return unsignedBinarySearch(array, from, to, key, unsignedComparator());
+        }
+    }
+
+    private void ensureOne(Map.Entry<Integer, BitmapDataProvider> e, int currentHigh, int indexOk) {
+        // sortedHighs are valid only up to some index
+        assert indexOk <= sortedHighs.length : indexOk + " is bigger than " + sortedHighs.length;
+
+        final int index;
+        if (indexOk == 0) {
+            if (sortedHighs.length == 0) {
+                index = -1;
+                // } else if (sortedHighs[0] == currentHigh) {
+                // index = 0;
+            } else {
+                index = -1;
+            }
+        } else if (indexOk < sortedHighs.length) {
+            index = -indexOk - 1;
+        } else {
+            index = -sortedHighs.length - 1;
+        }
+        assert index == binarySearch(sortedHighs, 0, indexOk, currentHigh) : "Computed " + index
+                + " differs from dummy binary-search index: "
+                + binarySearch(sortedHighs, 0, indexOk, currentHigh);
+
+        if (index >= 0) {
+            // This would mean calling .ensureOne is useless: should never got here at the first time
+            throw new IllegalStateException("Unexpectedly found " + currentHigh + " in "
+                    + Arrays.toString(sortedHighs) + " strictly before index" + indexOk);
+        } else {
+            int insertionPosition = -index - 1;
+
+            // This is a new key
+            if (insertionPosition >= sortedHighs.length) {
+                int previousSize = sortedHighs.length;
+
+                // TODO softer growing factor
+                int newSize = Math.min(Integer.MAX_VALUE, sortedHighs.length * 2 + 1);
+
+                // Insertion at the end
+                sortedHighs = Arrays.copyOf(sortedHighs, newSize);
+                sortedCumulatedCardinality = Arrays.copyOf(sortedCumulatedCardinality, newSize);
+
+                // Not actually needed. But simplify the reading of array content
+                Arrays.fill(sortedHighs, previousSize, sortedHighs.length, highestHigh());
+                Arrays.fill(sortedCumulatedCardinality, previousSize, sortedHighs.length, Long.MAX_VALUE);
+            }
+            sortedHighs[insertionPosition] = currentHigh;
+
+            final long previousCardinality;
+            if (insertionPosition >= 1) {
+                previousCardinality = sortedCumulatedCardinality[insertionPosition - 1];
+            } else {
+                previousCardinality = 0;
+            }
+
+            sortedCumulatedCardinality[insertionPosition] =
+                    previousCardinality + e.getValue().getLongCardinality();
+
+            if (currentHigh == highestHigh()) {
+                // We are already on the highest high. Do not set allValid as it is set anyway out of the
+                // loop
+                firstHighNotValid = currentHigh;
+            } else {
+                // The first not valid is the next high
+                // TODO: The entry comes from a NavigableMap: it may be quite cheap to know the next high
+                firstHighNotValid = currentHigh + 1;
+            }
+        }
+    }
+
+    private int highestHigh() {
+        return highestHigh(signedLongs);
+    }
+
+    /**
+     * In-place bitwise OR (union) operation. The current bitmap is modified.
+     *
+     * @param x2 other bitmap
+     */
+    public void or(final Roaring64Map x2) {
+        boolean firstBucket = true;
+
+        for (Map.Entry<Integer, BitmapDataProvider> e2 : x2.highToBitmap.entrySet()) {
+            // Keep object to prevent auto-boxing
+            Integer high = e2.getKey();
+
+            BitmapDataProvider lowBitmap1 = this.highToBitmap.get(high);
+
+            BitmapDataProvider lowBitmap2 = e2.getValue();
+
+            // TODO Reviewers: is it a good idea to rely on BitmapDataProvider except in methods
+            // expecting an actual MutableRoaringBitmap?
+            // TODO This code may lead to closing a buffer Bitmap in current Navigable even if current is
+            // not on buffer
+            if ((lowBitmap1 == null || lowBitmap1 instanceof RoaringBitmap)
+                    && lowBitmap2 instanceof RoaringBitmap) {
+                if (lowBitmap1 == null) {
+                    // Clone to prevent future modification of this modifying the input Bitmap
+                    RoaringBitmap lowBitmap2Clone = ((RoaringBitmap) lowBitmap2).clone();
+
+                    pushBitmapForHigh(high, lowBitmap2Clone);
+                } else {
+                    ((RoaringBitmap) lowBitmap1).or((RoaringBitmap) lowBitmap2);
+                }
+            } else if ((lowBitmap1 == null || lowBitmap1 instanceof MutableRoaringBitmap)
+                    && lowBitmap2 instanceof MutableRoaringBitmap) {
+                if (lowBitmap1 == null) {
+                    // Clone to prevent future modification of this modifying the input Bitmap
+                    BitmapDataProvider lowBitmap2Clone = ((MutableRoaringBitmap) lowBitmap2).clone();
+                    pushBitmapForHigh(high, lowBitmap2Clone);
+                } else {
+                    ((MutableRoaringBitmap) lowBitmap1).or((MutableRoaringBitmap) lowBitmap2);
+                }
+            } else {
+                throw new UnsupportedOperationException(
+                        ".or is not between " + this.getClass() + " and " + lowBitmap2.getClass());
+            }
+
+            if (firstBucket) {
+                firstBucket = false;
+
+                // Invalidate the lowest high as lowest not valid
+                firstHighNotValid = Math.min(firstHighNotValid, high);
+                allValid = false;
+            }
+        }
+    }
+
+    /**
+     * In-place bitwise XOR (symmetric difference) operation. The current bitmap is modified.
+     *
+     * @param x2 other bitmap
+     */
+    public void xor(final Roaring64Map x2) {
+        boolean firstBucket = true;
+
+        for (Map.Entry<Integer, BitmapDataProvider> e2 : x2.highToBitmap.entrySet()) {
+            // Keep object to prevent auto-boxing
+            Integer high = e2.getKey();
+
+            BitmapDataProvider lowBitmap1 = this.highToBitmap.get(high);
+
+            BitmapDataProvider lowBitmap2 = e2.getValue();
+
+            // TODO Reviewers: is it a good idea to rely on BitmapDataProvider except in methods
+            // expecting an actual MutableRoaringBitmap?
+            // TODO This code may lead to closing a buffer Bitmap in current Navigable even if current is
+            // not on buffer
+            if ((lowBitmap1 == null || lowBitmap1 instanceof RoaringBitmap)
+                    && lowBitmap2 instanceof RoaringBitmap) {
+                if (lowBitmap1 == null) {
+                    // Clone to prevent future modification of this modifying the input Bitmap
+                    RoaringBitmap lowBitmap2Clone = ((RoaringBitmap) lowBitmap2).clone();
+
+                    pushBitmapForHigh(high, lowBitmap2Clone);
+                } else {
+                    ((RoaringBitmap) lowBitmap1).xor((RoaringBitmap) lowBitmap2);
+                }
+            } else if ((lowBitmap1 == null || lowBitmap1 instanceof MutableRoaringBitmap)
+                    && lowBitmap2 instanceof MutableRoaringBitmap) {
+                if (lowBitmap1 == null) {
+                    // Clone to prevent future modification of this modifying the input Bitmap
+                    BitmapDataProvider lowBitmap2Clone = ((MutableRoaringBitmap) lowBitmap2).clone();
+
+                    pushBitmapForHigh(high, lowBitmap2Clone);
+                } else {
+                    ((MutableRoaringBitmap) lowBitmap1).xor((MutableRoaringBitmap) lowBitmap2);
+                }
+            } else {
+                throw new UnsupportedOperationException(
+                        ".or is not between " + this.getClass() + " and " + lowBitmap2.getClass());
+            }
+
+            if (firstBucket) {
+                firstBucket = false;
+
+                // Invalidate the lowest high as lowest not valid
+                firstHighNotValid = Math.min(firstHighNotValid, high);
+                allValid = false;
+            }
+        }
+    }
+
+    /**
+     * In-place bitwise AND (intersection) operation. The current bitmap is modified.
+     *
+     * @param x2 other bitmap
+     */
+    public void and(final Roaring64Map x2) {
+        boolean firstBucket = true;
+
+        Iterator<Map.Entry<Integer, BitmapDataProvider>> thisIterator = highToBitmap.entrySet().iterator();
+        while (thisIterator.hasNext()) {
+            Map.Entry<Integer, BitmapDataProvider> e1 = thisIterator.next();
+
+            // Keep object to prevent auto-boxing
+            Integer high = e1.getKey();
+
+            BitmapDataProvider lowBitmap2 = x2.highToBitmap.get(high);
+
+            if (lowBitmap2 == null) {
+                // None of given high values are present in x2
+                thisIterator.remove();
+            } else {
+                BitmapDataProvider lowBitmap1 = e1.getValue();
+
+                if (lowBitmap2 instanceof RoaringBitmap && lowBitmap1 instanceof RoaringBitmap) {
+                    ((RoaringBitmap) lowBitmap1).and((RoaringBitmap) lowBitmap2);
+                } else if (lowBitmap2 instanceof MutableRoaringBitmap
+                        && lowBitmap1 instanceof MutableRoaringBitmap) {
+                    ((MutableRoaringBitmap) lowBitmap1).and((MutableRoaringBitmap) lowBitmap2);
+                } else {
+                    throw new UnsupportedOperationException(
+                            ".and is not between " + this.getClass() + " and " + lowBitmap1.getClass());
+                }
+            }
+
+            if (firstBucket) {
+                firstBucket = false;
+
+                // Invalidate the lowest high as lowest not valid
+                firstHighNotValid = Math.min(firstHighNotValid, high);
+                allValid = false;
+            }
+        }
+    }
+
+    /**
+     * In-place bitwise ANDNOT (difference) operation. The current bitmap is modified.
+     *
+     * @param x2 other bitmap
+     */
+    public void andNot(final Roaring64Map x2) {
+        boolean firstBucket = true;
+
+        Iterator<Map.Entry<Integer, BitmapDataProvider>> thisIterator = highToBitmap.entrySet().iterator();
+        while (thisIterator.hasNext()) {
+            Map.Entry<Integer, BitmapDataProvider> e1 = thisIterator.next();
+
+            // Keep object to prevent auto-boxing
+            Integer high = e1.getKey();
+
+            BitmapDataProvider lowBitmap2 = x2.highToBitmap.get(high);
+
+            if (lowBitmap2 != null) {
+                BitmapDataProvider lowBitmap1 = e1.getValue();
+
+                if (lowBitmap2 instanceof RoaringBitmap && lowBitmap1 instanceof RoaringBitmap) {
+                    ((RoaringBitmap) lowBitmap1).andNot((RoaringBitmap) lowBitmap2);
+                } else if (lowBitmap2 instanceof MutableRoaringBitmap
+                        && lowBitmap1 instanceof MutableRoaringBitmap) {
+                    ((MutableRoaringBitmap) lowBitmap1).andNot((MutableRoaringBitmap) lowBitmap2);
+                } else {
+                    throw new UnsupportedOperationException(
+                            ".and is not between " + this.getClass() + " and " + lowBitmap1.getClass());
+                }
+            }
+
+            if (firstBucket) {
+                firstBucket = false;
+
+                // Invalidate the lowest high as lowest not valid
+                firstHighNotValid = Math.min(firstHighNotValid, high);
+                allValid = false;
+            }
+        }
+    }
+
+    /**
+     * A string describing the bitmap.
+     *
+     * @return the string
+     */
+    @Override
+    public String toString() {
+        final StringBuilder answer = new StringBuilder();
+        final LongIterator i = this.getLongIterator();
+        answer.append("{");
+        if (i.hasNext()) {
+            if (signedLongs) {
+                answer.append(i.next());
+            } else {
+                answer.append(toUnsignedString(i.next()));
+            }
+        }
+        while (i.hasNext()) {
+            answer.append(",");
+            // to avoid using too much memory, we limit the size
+            if (answer.length() > 0x80000) {
+                answer.append("...");
+                break;
+            }
+            if (signedLongs) {
+                answer.append(i.next());
+            } else {
+                answer.append(toUnsignedString(i.next()));
+            }
+
+        }
+        answer.append("}");
+        return answer.toString();
+    }
+
+    /**
+     * For better performance, consider the Use the {@link #forEach forEach} method.
+     *
+     * @return a custom iterator over set bits, the bits are traversed in ascending sorted order
+     */
+    public LongIterator getLongIterator() {
+        final Iterator<Map.Entry<Integer, BitmapDataProvider>> it = highToBitmap.entrySet().iterator();
+
+        return toIterator(it, false);
+    }
+
+    protected LongIterator toIterator(final Iterator<Map.Entry<Integer, BitmapDataProvider>> it,
+                                      final boolean reversed) {
+        return new LongIterator() {
+
+            protected int currentKey;
+            protected IntIterator currentIt;
+
+            @Override
+            public boolean hasNext() {
+                if (currentIt == null) {
+                    // Were initially empty
+                    if (!moveToNextEntry(it)) {
+                        return false;
+                    }
+                }
+
+                while (true) {
+                    if (currentIt.hasNext()) {
+                        return true;
+                    } else {
+                        if (!moveToNextEntry(it)) {
+                            return false;
+                        }
+                    }
+                }
+            }
+
+            /**
+             *
+             * @param it the underlying iterator which has to be moved to next long
+             * @return true if we MAY have more entries. false if there is definitely nothing more
+             */
+            private boolean moveToNextEntry(Iterator<Map.Entry<Integer, BitmapDataProvider>> it) {
+                if (it.hasNext()) {
+                    Map.Entry<Integer, BitmapDataProvider> next = it.next();
+                    currentKey = next.getKey();
+                    if (reversed) {
+                        currentIt = next.getValue().getReverseIntIterator();
+                    } else {
+                        currentIt = next.getValue().getIntIterator();
+                    }
+
+                    // We may have more long
+                    return true;
+                } else {
+                    // We know there is nothing more
+                    return false;
+                }
+            }
+
+            @Override
+            public long next() {
+                if (hasNext()) {
+                    return pack(currentKey, currentIt.next());
+                } else {
+                    throw new IllegalStateException("empty");
+                }
+            }
+
+            @Override
+            public LongIterator clone() {
+                throw new UnsupportedOperationException("TODO");
+            }
+        };
+    }
+
+    public boolean contains(long x) {
+        int high = high(x);
+        BitmapDataProvider lowBitmap = highToBitmap.get(high);
+        if (lowBitmap == null) {
+            return false;
+        }
+
+        int low = low(x);
+        return lowBitmap.contains(low);
+    }
+
+    public int getSizeInBytes() {
+        return (int) getLongSizeInBytes();
+    }
+
+    public long getLongSizeInBytes() {
+        long size = 8;
+
+        // Size of containers
+        size += highToBitmap.values().stream().mapToLong(p -> p.getLongSizeInBytes()).sum();
+
+        // Size of Map data-structure: we consider each TreeMap entry costs 40 bytes
+        // http://java-performance.info/memory-consumption-of-java-data-types-2/
+        size += 8 + 40 * highToBitmap.size();
+
+        // Size of (boxed) Integers used as keys
+        size += 16 * highToBitmap.size();
+
+        // The cache impacts the size in heap
+        size += 8 * sortedCumulatedCardinality.length;
+        size += 4 * sortedHighs.length;
+
+        return size;
+    }
+
+    public boolean isEmpty() {
+        return getLongCardinality() == 0L;
+    }
+
+    public ImmutableLongBitmapDataProvider limit(long x) {
+        throw new UnsupportedOperationException("TODO");
+    }
+
+    /**
+     * Use a run-length encoding where it is estimated as more space efficient
+     *
+     * @return whether a change was applied
+     */
+    public boolean runOptimize() {
+        boolean hasChanged = false;
+        for (BitmapDataProvider lowBitmap : highToBitmap.values()) {
+            if (lowBitmap instanceof RoaringBitmap) {
+                hasChanged |= ((RoaringBitmap) lowBitmap).runOptimize();
+            } else if (lowBitmap instanceof MutableRoaringBitmap) {
+                hasChanged |= ((MutableRoaringBitmap) lowBitmap).runOptimize();
+            }
+        }
+        return hasChanged;
+    }
+
+    public long serializedSizeInBytes() {
+        long nbBytes = 0L;
+
+        // .writeBoolean for signedLongs boolean
+        nbBytes += 1;
+
+        // .writeInt for number of different high values
+        nbBytes += 4;
+
+        for (Map.Entry<Integer, BitmapDataProvider> entry : highToBitmap.entrySet()) {
+            // .writeInt for high
+            nbBytes += 4;
+
+            // The low bitmap size in bytes
+            nbBytes += entry.getValue().serializedSizeInBytes();
+        }
+
+        return nbBytes;
+    }
+
+    /**
+     * reset to an empty bitmap; result occupies as much space a newly created bitmap.
+     */
+    public void clear() {
+        this.highToBitmap.clear();
+        resetPerfHelpers();
+    }
+
+    /**
+     * Return the set values as an array, if the cardinality is smaller than 2147483648. The long
+     * values are in sorted order.
+     *
+     * @return array representing the set values.
+     */
+    public long[] toArray() {
+        long cardinality = this.getLongCardinality();
+        if (cardinality > Integer.MAX_VALUE) {
+            throw new IllegalStateException("The cardinality does not fit in an array");
+        }
+
+        final long[] array = new long[(int) cardinality];
+
+        int pos = 0;
+        LongIterator it = getLongIterator();
+
+        while (it.hasNext()) {
+            array[pos++] = it.next();
+        }
+        return array;
+    }
+
+    /* ------------------ method below from Roaring64NavigableMap and being overwritten ----------------------------- */
+
+    /**
+     * Set all the specified values to true. This can be expected to be slightly faster than calling
+     * "add" repeatedly. The provided integers values don't have to be in sorted order, but it may be
+     * preferable to sort them from a performance point of view.
+     *
+     * @param dat set values
+     */
+    public void add(long... dat) {
+        for (long oneLong : dat) {
+            addLong(oneLong);
+        }
+    }
+
+    /**
+     * Add to the current bitmap all longs in [rangeStart,rangeEnd).
+     *
+     * @param rangeStart inclusive beginning of range
+     * @param rangeEnd   exclusive ending of range
+     */
+    public void add(final long rangeStart, final long rangeEnd) {
+        int startHigh = high(rangeStart);
+        int startLow = low(rangeStart);
+
+        int endHigh = high(rangeEnd);
+        int endLow = low(rangeEnd);
+
+        for (int high = startHigh; high <= endHigh; high++) {
+            final int currentStartLow;
+            if (startHigh == high) {
+                // The whole range starts in this bucket
+                currentStartLow = startLow;
+            } else {
+                // Add the bucket from the beginning
+                currentStartLow = 0;
+            }
+
+            long startLowAsLong = Util.toUnsignedLong(currentStartLow);
+
+            final long endLowAsLong;
+            if (endHigh == high) {
+                // The whole range ends in this bucket
+                endLowAsLong = Util.toUnsignedLong(endLow);
+            } else {
+                // Add the bucket until the end: we have a +1 as, in RoaringBitmap.add(long,long), the end
+                // is excluded
+                endLowAsLong = Util.toUnsignedLong(-1) + 1;
+            }
+
+            if (endLowAsLong > startLowAsLong) {
+                // Initialize the bitmap only if there is access data to write
+                BitmapDataProvider bitmap = highToBitmap.get(high);
+                if (bitmap == null) {
+                    bitmap = new MutableRoaringBitmap();
+                    pushBitmapForHigh(high, bitmap);
+                }
+
+                if (bitmap instanceof RoaringBitmap) {
+                    ((RoaringBitmap) bitmap).add(startLowAsLong, endLowAsLong);
+                } else if (bitmap instanceof MutableRoaringBitmap) {
+                    ((MutableRoaringBitmap) bitmap).add(startLowAsLong, endLowAsLong);
+                } else {
+                    throw new UnsupportedOperationException("TODO. Not for " + bitmap.getClass());
+                }
+            }
+        }
+
+        invalidateAboveHigh(startHigh);
+    }
+
+
+
+    /*---------------------------- method below is new written for doris's own bitmap --------------------------------*/
+
+    public LongIterator getReverseLongIterator() {
+        return toIterator(highToBitmap.descendingMap().entrySet().iterator(), true);
+    }
+
+    /*---------------  method below fetched from org.roaringbitmap.longlong RoaringIntPacking  -----------------------*/
+
+    public void removeLong(long x) {
+        int high = high(x);
+
+        BitmapDataProvider bitmap = highToBitmap.get(high);
+
+        if (bitmap != null) {
+            int low = low(x);
+            bitmap.remove(low);
+
+            // Invalidate only if actually modified
+            invalidateAboveHigh(high);
+        }
+
+    }
+
+    public void trim() {
+        for (BitmapDataProvider bitmap : highToBitmap.values()) {
+            bitmap.trim();
+        }
+    }
+
+    @Override
+    public int hashCode() {
+        return highToBitmap.hashCode();
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (this == obj) {
+            return true;
+        }
+        if (obj == null) {
+            return false;
+        }
+        if (getClass() != obj.getClass()) {
+            return false;
+        }
+        Roaring64Map other = (Roaring64Map) obj;
+        return Objects.equals(highToBitmap, other.highToBitmap);
+    }
+
+    /**
+     * Add the value if it is not already present, otherwise remove it.
+     *
+     * @param x long value
+     */
+    public void flip(final long x) {
+        int high = high(x);
+        BitmapDataProvider lowBitmap = highToBitmap.get(high);
+        if (lowBitmap == null) {
+            // The value is not added: add it without any flip specific code
+            addLong(x);
+        } else {
+            int low = low(x);
+
+            // .flip is not in BitmapDataProvider contract
+            // TODO Is it relevant to calling .flip with a cast?
+            if (lowBitmap instanceof RoaringBitmap) {
+                ((RoaringBitmap) lowBitmap).flip(low);
+            } else if (lowBitmap instanceof MutableRoaringBitmap) {
+                ((MutableRoaringBitmap) lowBitmap).flip(low);
+            } else {
+                // Fallback to a manual flip
+                if (lowBitmap.contains(low)) {
+                    lowBitmap.remove(low);
+                } else {
+                    lowBitmap.add(low);
+                }
+            }
+        }
+
+        invalidateAboveHigh(high);
+    }
+
+    /**
+     * Serialize this bitmap.
+     * <p>
+     * Unlike RoaringBitmap, there is no specification for now: it may change from one java version
+     * to another, and from one RoaringBitmap version to another.
+     * <p>
+     * Consider calling {@link #runOptimize} before serialization to improve compression.
+     * <p>
+     * The current bitmap is not modified.
+     *
+     * @param out the DataOutput stream
+     * @throws IOException Signals that an I/O exception has occurred.
+     */
+    public void serialize(DataOutput out) throws IOException {
+        if (highToBitmap.size() == 0) {
+            return;
+        }
+        if (is32BitsEnough()) {
+            out.write(BitmapValue.BITMAP32);
+            highToBitmap.get(0).serialize(out);
+            return;
+        }
+
+        out.write(BitmapValue.BITMAP64);
+        Codec.encodeVarint64(highToBitmap.size(), out);
+
+        for (Map.Entry<Integer, BitmapDataProvider> entry : highToBitmap.entrySet()) {
+            // serialized in little end for BE cpp read in case of bugs when the value is larger than 32bits
+            out.writeInt(Integer.reverseBytes(entry.getKey().intValue()));
+            entry.getValue().serialize(out);
+        }
+    }
+
+    /**
+     * Deserialize (retrieve) this bitmap.
+     * <p>
+     * Unlike RoaringBitmap, there is no specification for now: it may change from one java version to
+     * another, and from one RoaringBitmap version to another.
+     * <p>
+     * The current bitmap is overwritten.
+     *
+     * @param in the DataInput stream
+     * @throws IOException Signals that an I/O exception has occurred.
+     */
+    public void deserialize(DataInput in, int bitmapType) throws IOException {
+        this.clear();
+        highToBitmap = new TreeMap<>();
+
+        if (bitmapType == BitmapValue.BITMAP32) {
+            RoaringBitmap provider = new RoaringBitmap();
+            provider.deserialize(in);
+            highToBitmap.put(0, provider);
+            return;
+        }
+
+        if (bitmapType != BitmapValue.BITMAP64) {
+            throw new InvalidRoaringFormat("invalid bitmap type");
+        }
+
+        long nbHighs = Codec.decodeVarint64(in);
+        for (int i = 0; i < nbHighs; i++) {
+            // keep the same behavior with little-end serialize
+            int high = Integer.reverseBytes(in.readInt());
+            RoaringBitmap provider = new RoaringBitmap();
+            provider.deserialize(in);
+            highToBitmap.put(high, provider);
+        }
+
+        resetPerfHelpers();
+    }
+
+    public boolean is32BitsEnough() {
+        return highToBitmap.size() == 1 && highToBitmap.get(0) != null;
+    }
+
+}
diff --git a/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/AutoType.java b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/AutoType.java
new file mode 100644
index 00000000..f65a9fdf
--- /dev/null
+++ b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/AutoType.java
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2006 JMockit developers
+ * This file is subject to the terms of the MIT license (see LICENSE.txt).
+ */
+
+package org.apache.doris.common.jmockit;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Helper class to convert type between Java's wrapper type and primitive type
+ * There are 8 wrapper/primitive types in Java:
+ * |Wrapped Type         |Primitive Type
+ * --------------------------------------
+ * |Boolean              |boolean
+ * |Character            |char
+ * |Byte                 |byte
+ * |Short                |short
+ * |Integer              |int
+ * |Float                |float
+ * |Long                 |longFieldReflection
+ * |Double               |double
+ * <p>
+ * Copied from Apache Doris
+ */
+public class AutoType {
+    private static final Map<Class<?>, Class<?>> PRIMITIVE_TO_WRAPPER = new HashMap();
+    private static final Map<Class<?>, Class<?>> WRAPPER_TO_PRIMITIVE = new HashMap();
+
+    static {
+        WRAPPER_TO_PRIMITIVE.put(Boolean.class, Boolean.TYPE);
+        WRAPPER_TO_PRIMITIVE.put(Character.class, Character.TYPE);
+        WRAPPER_TO_PRIMITIVE.put(Byte.class, Byte.TYPE);
+        WRAPPER_TO_PRIMITIVE.put(Short.class, Short.TYPE);
+        WRAPPER_TO_PRIMITIVE.put(Integer.class, Integer.TYPE);
+        WRAPPER_TO_PRIMITIVE.put(Float.class, Float.TYPE);
+        WRAPPER_TO_PRIMITIVE.put(Long.class, Long.TYPE);
+        WRAPPER_TO_PRIMITIVE.put(Double.class, Double.TYPE);
+
+        PRIMITIVE_TO_WRAPPER.put(Boolean.TYPE, Boolean.class);
+        PRIMITIVE_TO_WRAPPER.put(Character.TYPE, Character.class);
+        PRIMITIVE_TO_WRAPPER.put(Byte.TYPE, Byte.class);
+        PRIMITIVE_TO_WRAPPER.put(Short.TYPE, Short.class);
+        PRIMITIVE_TO_WRAPPER.put(Integer.TYPE, Integer.class);
+        PRIMITIVE_TO_WRAPPER.put(Float.TYPE, Float.class);
+        PRIMITIVE_TO_WRAPPER.put(Long.TYPE, Long.class);
+        PRIMITIVE_TO_WRAPPER.put(Double.TYPE, Double.class);
+    }
+
+    public static boolean isWrapperOfPrimitiveType(Class<?> type) {
+        return WRAPPER_TO_PRIMITIVE.containsKey(type);
+    }
+
+    public static Class<?> getPrimitiveType(Class<?> wrapperType) {
+        return WRAPPER_TO_PRIMITIVE.get(wrapperType);
+    }
+
+    public static Class<?> getWrapperType(Class<?> primitiveType) {
+        return PRIMITIVE_TO_WRAPPER.get(primitiveType);
+    }
+}
diff --git a/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/ConstructorReflection.java b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/ConstructorReflection.java
new file mode 100644
index 00000000..4b437ce4
--- /dev/null
+++ b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/ConstructorReflection.java
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2006 JMockit developers
+ * This file is subject to the terms of the MIT license (see LICENSE.txt).
+ */
+
+package org.apache.doris.common.jmockit;
+
+import java.lang.reflect.AccessibleObject;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+
+/**
+ * Modify from mockit.internal.util.ConstructorReflection JMockit v1.13
+ * Util class to invoke constructor of specified class.
+ * <p>
+ * Copied from Apache Doris
+ */
+public final class ConstructorReflection {
+
+    private ConstructorReflection() {
+    }
+
+    /**
+     * invoke the {@constructor} with parameters {@initArgs}.
+     */
+    public static <T> T invoke(Constructor<T> constructor, Object... initArgs) {
+        if (constructor == null || initArgs == null) {
+            throw new IllegalArgumentException();
+        }
+        makeAccessible(constructor);
+
+        try {
+            return constructor.newInstance(initArgs);
+        } catch (InstantiationException e) {
+            throw new RuntimeException(e);
+        } catch (IllegalAccessException e) {
+            throw new RuntimeException(e);
+        } catch (InvocationTargetException e) {
+            Throwable cause = e.getCause();
+            if (cause instanceof Error) {
+                throw (Error) cause;
+            } else if (cause instanceof RuntimeException) {
+                throw (RuntimeException) cause;
+            } else {
+                throw new IllegalStateException("Should never get here", cause);
+            }
+        }
+    }
+
+    /**
+     * invoke the constructor with parameters {@nonNullArgs Object...}.
+     */
+    public static <T> T newInstance(Class<? extends T> aClass, Object... nonNullArgs) {
+        if (aClass == null || nonNullArgs == null) {
+            throw new IllegalArgumentException();
+        } else {
+            Class<?>[] argTypes = ParameterReflection.getArgumentTypesFromArgumentValues(nonNullArgs);
+            Constructor<T> constructor = findCompatibleConstructor(aClass, argTypes);
+            return invoke(constructor, nonNullArgs);
+        }
+    }
+
+    /**
+     * invoke the constructor with no parameters of {@aClass Class<T>}.
+     */
+    private static <T> T newInstance(Class<T> aClass) {
+        return (T) newInstance((Class) aClass, ParameterReflection.NO_PARAMETERS);
+    }
+
+    /**
+     * invoke the default constructor of {@aClass Class<T>}.
+     * if the default constructor is not available, try to invoke the one constructor with no parameters.
+     */
+    public static <T> T newInstanceUsingDefaultConstructor(Class<T> aClass) {
+        if (aClass == null) {
+            throw new IllegalArgumentException();
+        }
+        try {
+            return aClass.newInstance();
+        } catch (InstantiationException e) {
+            throw new RuntimeException(e);
+        } catch (IllegalAccessException e) {
+            return newInstance(aClass);
+        }
+    }
+
+    /**
+     * invoke the default constructor of {@aClass Class<T>}.
+     */
+    public static <T> T newInstanceUsingDefaultConstructorIfAvailable(Class<T> aClass) {
+        if (aClass == null) {
+            throw new IllegalArgumentException();
+        }
+        try {
+            return aClass.newInstance();
+        } catch (InstantiationException e) {
+            return null;
+        } catch (IllegalAccessException e) {
+            return null;
+        }
+    }
+
+    /**
+     * invoke inner-class constructor with outer-class instance {@outerInstance} and parameters {@nonNullArgs}.
+     */
+    public static <T> T newInnerInstance(Class<? extends T> innerClass, Object outerInstance, Object... nonNullArgs) {
+        if (innerClass == null || outerInstance == null || nonNullArgs == null) {
+            throw new IllegalArgumentException();
+        } else {
+            Object[] initArgs = ParameterReflection.argumentsWithExtraFirstValue(nonNullArgs, outerInstance);
+            return newInstance(innerClass, initArgs);
+        }
+    }
+
+    /**
+     * Get non-inner-class constructor with {@argTypes Class<?>[]}.
+     * if more than one constructor was found, choose the more specific one. (i.e. constructor with parameters that have more concrete types is more specific)
+     * if no constructor was found, will check if {@theClass} is a inner class. Then a IllegalArgumentException exception will be thrown.
+     */
+    private static <T> Constructor<T> findCompatibleConstructor(Class<?> theClass, Class<?>[] argTypes) {
+        if (theClass == null || argTypes == null) {
+            throw new IllegalArgumentException();
+        }
+        Constructor<T> found = null;
+        Class<?>[] foundParameters = null;
+        Constructor<?>[] declaredConstructors = theClass.getDeclaredConstructors();
+        Constructor[] declaredConstructorsArray = declaredConstructors;
+
+        for (Constructor<?> declaredConstructor : declaredConstructorsArray) {
+            Class<?>[] declaredParamTypes = declaredConstructor.getParameterTypes();
+            int gap = declaredParamTypes.length - argTypes.length;
+            if (gap == 0 && (ParameterReflection.matchesParameterTypes(declaredParamTypes, argTypes)
+                    || ParameterReflection.acceptsArgumentTypes(declaredParamTypes, argTypes))
+                    &&
+                    (found == null || ParameterReflection.hasMoreSpecificTypes(declaredParamTypes, foundParameters))) {
+                found = (Constructor<T>) declaredConstructor;
+                foundParameters = declaredParamTypes;
+            }
+        }
+
+        if (found != null) {
+            return found;
+        } else {
+            Class<?> declaringClass = theClass.getDeclaringClass();
+            Class<?>[] paramTypes = declaredConstructors[0].getParameterTypes();
+            // check if this constructor is belong to a inner class
+            // the parameter[0] of inner class's constructor is a instance of outer class
+            if (paramTypes[0] == declaringClass && paramTypes.length > argTypes.length) {
+                throw new IllegalArgumentException(
+                        "Invalid instantiation of inner class; use newInnerInstance instead");
+            } else {
+                String argTypesDesc = ParameterReflection.getParameterTypesDescription(argTypes);
+                throw new IllegalArgumentException(
+                        "No compatible constructor found: " + theClass.getSimpleName() + argTypesDesc);
+            }
+        }
+    }
+
+    // ensure that field is accessible
+    public static void makeAccessible(AccessibleObject classMember) {
+        if (!classMember.isAccessible()) {
+            classMember.setAccessible(true);
+        }
+    }
+}
diff --git a/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/Deencapsulation.java b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/Deencapsulation.java
new file mode 100644
index 00000000..74362e0c
--- /dev/null
+++ b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/Deencapsulation.java
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2006 JMockit developers
+ * This file is subject to the terms of the MIT license (see LICENSE.txt).
+ */
+
+package org.apache.doris.common.jmockit;
+
+/**
+ * Modify from mockit.internal.util.Deencapsulation JMockit ver1.13
+ * <p>
+ * Copied from Apache Doris
+ */
+public final class Deencapsulation {
+    private Deencapsulation() {
+    }
+
+    public static <T> T getField(Object objectWithField, String fieldName) {
+        return FieldReflection.getField(objectWithField.getClass(), fieldName, objectWithField);
+    }
+
+    public static <T> T getField(Object objectWithField, Class<T> fieldType) {
+        return FieldReflection.getField(objectWithField.getClass(), fieldType, objectWithField);
+    }
+
+    public static <T> T getField(Class<?> classWithStaticField, String fieldName) {
+        return FieldReflection.getField(classWithStaticField, fieldName, null);
+    }
+
+    public static <T> T getField(Class<?> classWithStaticField, Class<T> fieldType) {
+        return FieldReflection.getField(classWithStaticField, fieldType, null);
+    }
+
+    public static void setField(Object objectWithField, String fieldName, Object fieldValue) {
+        FieldReflection.setField(objectWithField.getClass(), objectWithField, fieldName, fieldValue);
+    }
+
+    public static void setField(Object objectWithField, Object fieldValue) {
+        FieldReflection.setField(objectWithField.getClass(), objectWithField, null, fieldValue);
+    }
+
+    public static void setField(Class<?> classWithStaticField, String fieldName, Object fieldValue) {
+        FieldReflection.setField(classWithStaticField, null, fieldName, fieldValue);
+    }
+
+    public static void setField(Class<?> classWithStaticField, Object fieldValue) {
+        FieldReflection.setField(classWithStaticField, null, null, fieldValue);
+    }
+
+    public static <T> T invoke(Object objectWithMethod, String methodName, Object... nonNullArgs) {
+        Class<?> theClass = objectWithMethod.getClass();
+        return MethodReflection.invoke(theClass, objectWithMethod, methodName, nonNullArgs);
+    }
+
+    public static <T> T invoke(Class<?> classWithStaticMethod, String methodName, Object... nonNullArgs) {
+        return MethodReflection.invoke(classWithStaticMethod, null, methodName, nonNullArgs);
+    }
+
+    public static <T> T newInstance(Class<? extends T> classToInstantiate, Object... nonNullArgs) {
+        return ConstructorReflection.newInstance(classToInstantiate, nonNullArgs);
+    }
+
+    public static <T> T newInnerInstance(Class<? extends T> innerClassToInstantiate, Object outerClassInstance, Object... nonNullArgs) {
+        return ConstructorReflection.newInnerInstance(innerClassToInstantiate, outerClassInstance, nonNullArgs);
+    }
+}
diff --git a/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/FieldReflection.java b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/FieldReflection.java
new file mode 100644
index 00000000..04c6d9cd
--- /dev/null
+++ b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/FieldReflection.java
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2006 JMockit developers
+ * This file is subject to the terms of the MIT license (see LICENSE.txt).
+ */
+
+package org.apache.doris.common.jmockit;
+
+import java.lang.reflect.AccessibleObject;
+import java.lang.reflect.Field;
+import java.lang.reflect.Modifier;
+import java.lang.reflect.ParameterizedType;
+import java.lang.reflect.Type;
+import java.lang.reflect.TypeVariable;
+
+/**
+ * Modify from mockit.internal.util.FieldReflection JMockit v1.13
+ * Util class to set and get the value of specified field.
+ * <p>
+ * Copied from Apache Doris
+ */
+public final class FieldReflection {
+    private FieldReflection() {
+    }
+
+    /**
+     * Get field's value with field's name.
+     */
+    public static <T> T getField(Class<?> theClass, String fieldName, Object targetObject) {
+        if (theClass == null || fieldName == null || targetObject == null) {
+            throw new IllegalStateException();
+        }
+        Field field = getDeclaredField(theClass, fieldName, targetObject != null);
+        return getFieldValue(field, targetObject);
+    }
+
+    /**
+     * Get field's value with field's type.
+     */
+    public static <T> T getField(Class<?> theClass, Class<T> fieldType, Object targetObject) {
+        if (theClass == null || fieldType == null) {
+            throw new IllegalStateException();
+        }
+        Field field = getDeclaredField(theClass, fieldType, targetObject != null, false);
+        return getFieldValue(field, targetObject);
+    }
+
+    /**
+     * Get field's value with field's type.
+     */
+    public static <T> T getField(Class<?> theClass, Type fieldType, Object targetObject) {
+        if (theClass == null || fieldType == null) {
+            throw new IllegalStateException();
+        }
+        Field field = getDeclaredField(theClass, fieldType, targetObject != null, false);
+        return getFieldValue(field, targetObject);
+    }
+
+    /**
+     * Modify field's value in targetObject.
+     * If {@fieldName String} is null, will try to set field with field's type.
+     */
+    public static Field setField(Class<?> theClass, Object targetObject, String fieldName, Object fieldValue) {
+        if (theClass == null) {
+            throw new IllegalArgumentException();
+        }
+        boolean instanceField = targetObject != null;
+        Field field;
+        if (fieldName != null) {
+            field = getDeclaredField(theClass, fieldName, instanceField);
+        } else {
+            if (fieldValue == null) {
+                throw new IllegalArgumentException("Missing field value when setting field by type");
+            }
+
+            field = getDeclaredField(theClass, fieldValue.getClass(), instanceField, true);
+        }
+
+        setFieldValue(field, targetObject, fieldValue);
+        return field;
+    }
+
+    /**
+     * Get field by field's name.
+     * If no field is found in this class, it will continue to look up its super class.
+     * If {@instanceField boolean} is true, will only search for the non-static field.
+     */
+    private static Field getDeclaredField(Class<?> theClass, String fieldName, boolean instanceField) {
+        if (theClass == null || fieldName == null) {
+            throw new IllegalStateException();
+        }
+        try {
+            return theClass.getDeclaredField(fieldName);
+        } catch (NoSuchFieldException e) {
+            Class<?> superClass = theClass.getSuperclass();
+            if (superClass != null && superClass != Object.class) {
+                return getDeclaredField(superClass, fieldName, instanceField);
+            } else {
+                String kind = instanceField ? "instance" : "static";
+                throw new IllegalArgumentException("No " + kind + " field of name \"" + fieldName + "\" found in " + theClass);
+            }
+        }
+    }
+
+    /**
+     * Get field by field's type.
+     * If no field is found in this class, it will continue to look up its super class.
+     * If {@instanceField boolean} is true, will only search for the non-static field.
+     * If {@forAssignment boolean} is true, will compare its super type with desiredType.
+     */
+    private static Field getDeclaredField(Class<?> theClass, Type desiredType, boolean instanceField, boolean forAssignment) {
+        if (theClass == null || desiredType == null) {
+            throw new IllegalStateException();
+        }
+        Field found = getDeclaredFieldInSingleClass(theClass, desiredType, instanceField, forAssignment);
+        if (found == null) {
+            Class<?> superClass = theClass.getSuperclass();
+            if (superClass != null && superClass != Object.class) {
+                return getDeclaredField(superClass, desiredType, instanceField, forAssignment);
+            } else {
+                StringBuilder errorMsg = new StringBuilder(instanceField ? "Instance" : "Static");
+                String typeName = getTypeName(desiredType);
+                errorMsg.append(" field of type ").append(typeName).append(" not found in ").append(theClass);
+                throw new IllegalArgumentException(errorMsg.toString());
+            }
+        } else {
+            return found;
+        }
+    }
+
+    /**
+     * Get field by field's type.
+     * There is only one field is expected to be found in a single class.
+     * If {@instanceField boolean} is true, will only search for the non-static field.
+     * If {@forAssignment boolean} is true, will compare its super type with desiredType.
+     * If more than one field are found, a IllegalArgumentException will be thrown.
+     */
+    private static Field getDeclaredFieldInSingleClass(Class<?> theClass, Type desiredType, boolean instanceField, boolean forAssignment) {
+        if (theClass == null || desiredType == null) {
+            throw new IllegalStateException();
+        }
+        Field found = null;
+        Field[] fields = theClass.getDeclaredFields();
+
+        for (Field field : fields) {
+            if (!field.isSynthetic()) {
+                Type fieldType = field.getGenericType();
+                if (instanceField != Modifier.isStatic(field.getModifiers()) && isCompatibleFieldType(fieldType, desiredType, forAssignment)) {
+                    if (found != null) {
+                        String message = errorMessageForMoreThanOneFieldFound(desiredType, instanceField, forAssignment, found, field);
+                        throw new IllegalArgumentException(message);
+                    }
+
+                    found = field;
+                }
+            }
+        }
+
+        return found;
+    }
+
+    /**
+     * return true if the {@fieldType} is compatible with {@desiredType}.
+     * If {@forAssignment} is true, will compare its super type with desiredType.
+     * If {@forAssignment} is false, will also compare it with desiredType's super type.
+     */
+    private static boolean isCompatibleFieldType(Type fieldType, Type desiredType, boolean forAssignment) {
+        if (fieldType == null || desiredType == null) {
+            throw new IllegalStateException();
+        }
+        Class<?> fieldClass = getClassType(fieldType);
+        Class<?> desiredClass = getClassType(desiredType);
+        if (isSameType(desiredClass, fieldClass)) {
+            return true;
+        } else if (forAssignment) {
+            return fieldClass.isAssignableFrom(desiredClass);
+        } else {
+            return desiredClass.isAssignableFrom(fieldClass) || fieldClass.isAssignableFrom(desiredClass);
+        }
+    }
+
+    private static String errorMessageForMoreThanOneFieldFound(Type desiredFieldType, boolean instanceField, boolean forAssignment, Field firstField, Field secondField) {
+        return "More than one " + (instanceField ? "instance" : "static") + " field " + (forAssignment ? "to" : "from")
+                + " which a value of type "
+                + getTypeName(desiredFieldType) + (forAssignment ? " can be assigned" : " can be read") + " exists in "
+                + secondField.getDeclaringClass() + ": " + firstField.getName() + ", " + secondField.getName();
+    }
+
+    private static String getTypeName(Type type) {
+        if (type == null) {
+            throw new IllegalStateException();
+        }
+        Class<?> classType = getClassType(type);
+        Class<?> primitiveType = AutoType.getPrimitiveType(classType);
+        if (primitiveType != null) {
+            return primitiveType + " or " + classType.getSimpleName();
+        } else {
+            String name = classType.getName();
+            return name.startsWith("java.lang.") ? name.substring(10) : name;
+        }
+    }
+
+    /**
+     * Get field in {@targetObject Object}.
+     */
+    private static <T> T getFieldValue(Field field, Object targetObject) {
+        if (field == null) {
+            throw new IllegalStateException();
+        }
+        makeAccessible(field);
+
+        try {
+            return (T) field.get(targetObject);
+        } catch (IllegalAccessException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    /**
+     * Modify field with value in {@targetObject Object}.
+     */
+    public static void setFieldValue(Field field, Object targetObject, Object value) {
+        if (field == null) {
+            throw new IllegalStateException();
+        }
+        try {
+            if (Modifier.isStatic(field.getModifiers()) && Modifier.isFinal(field.getModifiers())) {
+                throw new IllegalArgumentException("Do not allow to set static final field");
+            } else {
+                makeAccessible(field);
+                field.set(targetObject, value);
+            }
+
+        } catch (IllegalAccessException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    /*
+    private static void setStaticFinalField(Field field, Object value) throws IllegalAccessException {
+        if (field == null) {
+            throw new IllegalStateException();
+        }
+        Field modifiersField;
+        try {
+            modifiersField = Field.class.getDeclaredField("modifiers");
+        } catch (NoSuchFieldException e) {
+            throw new RuntimeException(e);
+        }
+
+        modifiersField.setAccessible(true);
+        int nonFinalModifiers = modifiersField.getInt(field) - 16;
+        modifiersField.setInt(field, nonFinalModifiers);
+        FieldAccessor accessor = ReflectionFactory.getReflectionFactory().newFieldAccessor(field, false);
+        accessor.set((Object)null, value);
+    }
+    */
+
+    public static Class<?> getClassType(Type declaredType) {
+        while (!(declaredType instanceof Class)) {
+            if (declaredType instanceof ParameterizedType) {
+                return (Class) ((ParameterizedType) declaredType).getRawType();
+            }
+
+            if (!(declaredType instanceof TypeVariable)) {
+                throw new IllegalArgumentException("Type of unexpected kind: " + declaredType);
+            }
+
+            declaredType = ((TypeVariable) declaredType).getBounds()[0];
+        }
+
+        return (Class) declaredType;
+    }
+
+    // ensure that field is accessible
+    public static void makeAccessible(AccessibleObject classMember) {
+        if (!classMember.isAccessible()) {
+            classMember.setAccessible(true);
+        }
+    }
+
+    // return true if the two types are same type.
+    private static boolean isSameType(Class<?> firstType, Class<?> secondType) {
+        return firstType == secondType
+                || firstType.isPrimitive() && firstType == AutoType.getPrimitiveType(secondType)
+                || secondType.isPrimitive() && secondType == AutoType.getPrimitiveType(firstType);
+    }
+
+}
diff --git a/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/GeneratedClasses.java b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/GeneratedClasses.java
new file mode 100644
index 00000000..1281f4ed
--- /dev/null
+++ b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/GeneratedClasses.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2006 JMockit developers
+ * This file is subject to the terms of the MIT license (see LICENSE.txt).
+ */
+
+package org.apache.doris.common.jmockit;
+
+import java.lang.reflect.Proxy;
+
+/**
+ * Modify from mockit.internal.util.GeneratedClasses JMockit v1.13
+ * Helper class to return type of mocked-object
+ * <p>
+ * Copied from Apache Doris
+ */
+public final class GeneratedClasses {
+    private static final String IMPLCLASS_PREFIX = "$Impl_";
+    private static final String SUBCLASS_PREFIX = "$Subclass_";
+
+    private GeneratedClasses() {
+    }
+
+    static boolean isGeneratedImplementationClass(Class<?> mockedType) {
+        return isGeneratedImplementationClass(mockedType.getName());
+    }
+
+    static boolean isGeneratedImplementationClass(String className) {
+        return className.contains(IMPLCLASS_PREFIX);
+    }
+
+    static boolean isGeneratedSubclass(String className) {
+        return className.contains(SUBCLASS_PREFIX);
+    }
+
+    static boolean isGeneratedClass(String className) {
+        return isGeneratedSubclass(className) || isGeneratedImplementationClass(className);
+    }
+
+    static Class<?> getMockedClassOrInterfaceType(Class<?> aClass) {
+        if (!Proxy.isProxyClass(aClass) && !isGeneratedImplementationClass(aClass)) {
+            return isGeneratedSubclass(aClass.getName()) ? aClass.getSuperclass() : aClass;
+        } else {
+            return aClass.getInterfaces()[0];
+        }
+    }
+
+    static Class<?> getMockedClass(Object mock) {
+        return getMockedClassOrInterfaceType(mock.getClass());
+    }
+}
diff --git a/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/MethodReflection.java b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/MethodReflection.java
new file mode 100644
index 00000000..293e9816
--- /dev/null
+++ b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/MethodReflection.java
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2006 JMockit developers
+ * This file is subject to the terms of the MIT license (see LICENSE.txt).
+ */
+
+package org.apache.doris.common.jmockit;
+
+import java.lang.reflect.AccessibleObject;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+
+/**
+ * Modify from mockit.internal.util.MethodReflection JMockit v1.13
+ * Util class to get and invoke method from specified class.
+ * <p>
+ * Copied from Apache Doris
+ */
+public final class MethodReflection {
+    private MethodReflection() {
+    }
+
+    public static <T> T invoke(Class<?> theClass, Object targetInstance, String methodName, Object... methodArgs) {
+        if (theClass == null || methodName == null) {
+            throw new IllegalArgumentException();
+        }
+        boolean staticMethod = targetInstance == null;
+        Class<?>[] argTypes = ParameterReflection.getArgumentTypesFromArgumentValues(methodArgs);
+        Method method = staticMethod ? findCompatibleStaticMethod(theClass, methodName, argTypes) :
+                findCompatibleMethod(theClass, methodName, argTypes);
+        if (staticMethod && !Modifier.isStatic(method.getModifiers())) {
+            throw new IllegalArgumentException(
+                    "Attempted to invoke non-static method without an instance to invoke it on");
+        } else {
+            T result = invoke(targetInstance, method, methodArgs);
+            return result;
+        }
+    }
+
+    public static <T> T invoke(Object targetInstance, Method method, Object... methodArgs) {
+        if (method == null || methodArgs == null) {
+            throw new IllegalArgumentException();
+        }
+        makeAccessible(method);
+
+        try {
+            return (T) method.invoke(targetInstance, methodArgs);
+        } catch (IllegalAccessException e) {
+            throw new RuntimeException(e);
+        } catch (IllegalArgumentException e) {
+            throw new IllegalArgumentException("Failure to invoke method: " + method, e);
+        } catch (InvocationTargetException e) {
+            Throwable cause = e.getCause();
+            if (cause instanceof Error) {
+                throw (Error) cause;
+            } else if (cause instanceof RuntimeException) {
+                throw (RuntimeException) cause;
+            } else {
+                ThrowOfCheckedException.doThrow((Exception) cause);
+                return null;
+            }
+        }
+    }
+
+    /**
+     * Get a static method with {@methodName String} and {@argTypes Class<?>[]}.
+     * If no method was found, a IllegalArgumentException will be thrown.
+     */
+    private static Method findCompatibleStaticMethod(Class<?> theClass, String methodName, Class<?>[] argTypes) {
+        if (theClass == null || methodName == null || argTypes == null) {
+            throw new IllegalArgumentException();
+        }
+        Method methodFound = findCompatibleMethodInClass(theClass, methodName, argTypes);
+        if (methodFound != null) {
+            return methodFound;
+        } else {
+            String argTypesDesc = ParameterReflection.getParameterTypesDescription(argTypes);
+            throw new IllegalArgumentException("No compatible static method found: " + methodName + argTypesDesc);
+        }
+    }
+
+    /**
+     * Get a non-static method with {@methodName String} and {@argTypes Class<?>[]}.
+     */
+    public static Method findCompatibleMethod(Class<?> theClass, String methodName, Class<?>[] argTypes) {
+        if (theClass == null || methodName == null || argTypes == null) {
+            throw new IllegalArgumentException();
+        }
+        Method methodFound = findCompatibleMethodIfAvailable(theClass, methodName, argTypes);
+        if (methodFound != null) {
+            return methodFound;
+        } else {
+            String argTypesDesc = ParameterReflection.getParameterTypesDescription(argTypes);
+            throw new IllegalArgumentException("No compatible method found: " + methodName + argTypesDesc);
+        }
+    }
+
+    /**
+     * Get method with {@methodName String} and {@argTypes Class<?>[]} from {@theClass Class<?>}.
+     * If more than one method is found, choose the more specific one. (i.e. method with parameters that have more concrete types is more specific)
+     */
+    private static Method findCompatibleMethodInClass(Class<?> theClass, String methodName, Class<?>[] argTypes) {
+        if (theClass == null || methodName == null || argTypes == null) {
+            throw new IllegalArgumentException();
+        }
+        Method found = null;
+        Class<?>[] foundParamTypes = null;
+        Method[] methods = theClass.getDeclaredMethods();
+
+        for (Method declaredMethod : methods) {
+            if (declaredMethod.getName().equals(methodName)) {
+                Class<?>[] declaredParamTypes = declaredMethod.getParameterTypes();
+                int gap = declaredParamTypes.length - argTypes.length;
+                if (gap == 0 && (ParameterReflection.matchesParameterTypes(declaredParamTypes, argTypes)
+                        || ParameterReflection.acceptsArgumentTypes(declaredParamTypes, argTypes))
+                        && (foundParamTypes == null
+                        || ParameterReflection.hasMoreSpecificTypes(declaredParamTypes, foundParamTypes))) {
+                    found = declaredMethod;
+                    foundParamTypes = declaredParamTypes;
+                }
+            }
+        }
+
+        return found;
+    }
+
+    /**
+     * Get method with {@methodName String} and {@argTypes Class<?>[]} from {@theClass Class<?>} as well as its super class.
+     * If more than one method is found, choose the more specify one. (i.e. choose the method with parameters that have more concrete types)
+     */
+    private static Method findCompatibleMethodIfAvailable(Class<?> theClass, String methodName, Class<?>[] argTypes) {
+        if (theClass == null || methodName == null || argTypes == null) {
+            throw new IllegalArgumentException();
+        }
+        Method methodFound = null;
+
+        while (true) {
+            Method compatibleMethod = findCompatibleMethodInClass(theClass, methodName, argTypes);
+            if (compatibleMethod != null && (methodFound == null ||
+                    ParameterReflection.hasMoreSpecificTypes(compatibleMethod.getParameterTypes(),
+                            methodFound.getParameterTypes()))) {
+                methodFound = compatibleMethod;
+            }
+
+            Class<?> superClass = theClass.getSuperclass();
+            if (superClass == null || superClass == Object.class) {
+                return methodFound;
+            }
+
+            theClass = superClass;
+        }
+    }
+
+
+    // ensure that field is accessible
+    public static void makeAccessible(AccessibleObject classMember) {
+        if (!classMember.isAccessible()) {
+            classMember.setAccessible(true);
+        }
+    }
+
+    // return true if the two types are same type.
+    private static boolean isSameType(Class<?> firstType, Class<?> secondType) {
+        return firstType == secondType
+                || firstType.isPrimitive() && firstType == AutoType.getPrimitiveType(secondType)
+                || secondType.isPrimitive() && secondType == AutoType.getPrimitiveType(firstType);
+    }
+}
diff --git a/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/ParameterReflection.java b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/ParameterReflection.java
new file mode 100644
index 00000000..6a6efc11
--- /dev/null
+++ b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/ParameterReflection.java
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2006 JMockit developers
+ * This file is subject to the terms of the MIT license (see LICENSE.txt).
+ */
+
+package org.apache.doris.common.jmockit;
+
+import java.util.regex.Pattern;
+
+/**
+ * Modify from mockit.internal.util.ParameterReflection JMockit v1.13
+ * Util class to verify parameter of methods.
+ * <p>
+ * Copied from Apache Doris
+ */
+public final class ParameterReflection {
+    public static final Class<?>[] NO_PARAMETERS = new Class[0];
+
+    public static final Pattern JAVA_LANG = Pattern.compile("java.lang.", 16);
+
+    private ParameterReflection() {
+    }
+
+    /**
+     * check if every member in {@declaredTypes} is completely equal to the corresponding member {@specifiedTypes}.
+     */
+    static boolean matchesParameterTypes(Class<?>[] declaredTypes, Class<?>[] specifiedTypes) {
+        if (declaredTypes == null || specifiedTypes == null) {
+            throw new IllegalArgumentException();
+        }
+        for (int i = 0; i < declaredTypes.length; ++i) {
+            Class<?> declaredType = declaredTypes[i];
+            Class<?> specifiedType = specifiedTypes[i];
+            if (!isSameType(declaredType, specifiedType)) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    /**
+     * check if every member in {@paramTypes} is acceptable to the corresponding member in {@argTypes}.
+     */
+    static boolean acceptsArgumentTypes(Class<?>[] paramTypes, Class<?>[] argTypes) {
+        if (paramTypes == null || argTypes == null) {
+            throw new IllegalArgumentException();
+        }
+        for (int i = 0; i < paramTypes.length; ++i) {
+            Class<?> parType = paramTypes[i];
+            Class<?> argType = argTypes[i];
+            if (!isSameType(parType, argType) && !parType.isAssignableFrom(argType)) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    /**
+     * Get all types from objects {@args}.
+     */
+    static Class<?>[] getArgumentTypesFromArgumentValues(Object... args) {
+        if (args == null) {
+            throw new IllegalArgumentException();
+        }
+        if (args.length == 0) {
+            return NO_PARAMETERS;
+        } else {
+            Class<?>[] argTypes = new Class[args.length];
+
+            for (int i = 0; i < args.length; ++i) {
+                argTypes[i] = getArgumentTypeFromArgumentValue(i, args);
+            }
+
+            return argTypes;
+        }
+    }
+
+    /**
+     * Get type from {@args} by index.
+     */
+    static Class<?> getArgumentTypeFromArgumentValue(int i, Object[] args) {
+        Object arg = args[i];
+        if (arg == null) {
+            throw new IllegalArgumentException("Invalid null value passed as argument " + i);
+        } else {
+            Class argType;
+            if (arg instanceof Class) {
+                argType = (Class) arg;
+                args[i] = null;
+            } else {
+                argType = GeneratedClasses.getMockedClass(arg);
+            }
+
+            return argType;
+        }
+    }
+
+    /**
+     * return true if {@currentTypes} is more specific than {@previousTypes}.
+     */
+    static boolean hasMoreSpecificTypes(Class<?>[] currentTypes, Class<?>[] previousTypes) {
+        if (currentTypes == null || previousTypes == null) {
+            throw new IllegalArgumentException();
+        }
+        for (int i = 0; i < currentTypes.length; ++i) {
+            Class<?> current = wrappedIfPrimitive(currentTypes[i]);
+            Class<?> previous = wrappedIfPrimitive(previousTypes[i]);
+            if (current != previous && previous.isAssignableFrom(current)) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    /**
+     * return the type names of {@paramTypes} wrapped in brackets.
+     */
+    static String getParameterTypesDescription(Class<?>[] paramTypes) {
+        if (paramTypes == null) {
+            throw new IllegalArgumentException();
+        }
+        StringBuilder paramTypesDesc = new StringBuilder(200);
+        paramTypesDesc.append('(');
+        String sep = "";
+
+        for (Class paramType : paramTypes) {
+            String typeName = JAVA_LANG.matcher(paramType.getCanonicalName()).replaceAll("");
+            paramTypesDesc.append(sep).append(typeName);
+            sep = ", ";
+        }
+
+        paramTypesDesc.append(')');
+        return paramTypesDesc.toString();
+    }
+
+    /**
+     * return real parameters array of inner-class belong to the outer-class instance {@firstValue Object}.
+     * the parameter[0] of a inner-class constructor is always the instance of its outer-class.
+     */
+    static Object[] argumentsWithExtraFirstValue(Object[] args, Object firstValue) {
+        Object[] args2 = new Object[1 + args.length];
+        args2[0] = firstValue;
+        System.arraycopy(args, 0, args2, 1, args.length);
+        return args2;
+    }
+
+    // return wrapped type if its type is primitive.
+    private static Class<?> wrappedIfPrimitive(Class<?> parameterType) {
+        if (parameterType.isPrimitive()) {
+            Class<?> wrapperType = AutoType.getWrapperType(parameterType);
+
+            assert wrapperType != null;
+
+            return wrapperType;
+        } else {
+            return parameterType;
+        }
+    }
+
+    // return true if the two types are same type.
+    private static boolean isSameType(Class<?> firstType, Class<?> secondType) {
+        return firstType == secondType
+                || firstType.isPrimitive() && firstType == AutoType.getPrimitiveType(secondType)
+                || secondType.isPrimitive() && secondType == AutoType.getPrimitiveType(firstType);
+    }
+}
diff --git a/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/ThrowOfCheckedException.java b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/ThrowOfCheckedException.java
new file mode 100644
index 00000000..4dfc44ae
--- /dev/null
+++ b/spark-load/spark-load-common/src/main/java/org/apache/doris/common/jmockit/ThrowOfCheckedException.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2006 JMockit developers
+ * This file is subject to the terms of the MIT license (see LICENSE.txt).
+ */
+
+package org.apache.doris.common.jmockit;
+
+/**
+ * Modify from mockit.internal.reflection.ThrowOfCheckedException JMockit v1.13
+ */
+public final class ThrowOfCheckedException {
+    private static Exception exceptionToThrow;
+
+    ThrowOfCheckedException() throws Exception {
+        throw exceptionToThrow;
+    }
+
+    public static synchronized void doThrow(Exception checkedException) {
+        exceptionToThrow = checkedException;
+        ConstructorReflection.newInstanceUsingDefaultConstructor(ThrowOfCheckedException.class);
+    }
+}
diff --git a/spark-load/spark-load-common/src/main/java/org/apache/doris/config/EtlJobConfig.java b/spark-load/spark-load-common/src/main/java/org/apache/doris/config/EtlJobConfig.java
new file mode 100644
index 00000000..9cca8650
--- /dev/null
+++ b/spark-load/spark-load-common/src/main/java/org/apache/doris/config/EtlJobConfig.java
@@ -0,0 +1,513 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.config;
+
+
+import com.google.common.base.Strings;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
+import com.google.gson.ExclusionStrategy;
+import com.google.gson.FieldAttributes;
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.annotations.SerializedName;
+
+import java.io.Serializable;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Copied from Apache Doris org.apache.doris.sparkdpp.EtlJobConfig
+ */
+public class EtlJobConfig implements Serializable {
+    // global dict
+    public static final String GLOBAL_DICT_TABLE_NAME = "doris_global_dict_table_%d";
+    public static final String DISTINCT_KEY_TABLE_NAME = "doris_distinct_key_table_%d_%s";
+    public static final String DORIS_INTERMEDIATE_HIVE_TABLE_NAME = "doris_intermediate_hive_table_%d_%s";
+    // tableId.partitionId.indexId.bucket.schemaHash
+    public static final String TABLET_META_FORMAT = "%d.%d.%d.%d.%d";
+    public static final String ETL_OUTPUT_FILE_FORMAT = "parquet";
+    // dpp result
+    public static final String DPP_RESULT_NAME = "dpp_result.json";
+    // hdfsEtlPath/jobs/dbId/loadLabel/PendingTaskSignature
+    private static final String ETL_OUTPUT_PATH_FORMAT = "%s/jobs/%d/%s/%d";
+    private static final String ETL_OUTPUT_FILE_NAME_DESC_V1 =
+            "version.label.tableId.partitionId.indexId.bucket.schemaHash.parquet";
+    @SerializedName(value = "tables")
+    public Map<Long, EtlTable> tables;
+    @SerializedName(value = "outputPath")
+    public String outputPath;
+    @SerializedName(value = "outputFilePattern")
+    public String outputFilePattern;
+    @SerializedName(value = "label")
+    public String label;
+    @SerializedName(value = "properties")
+    public EtlJobProperty properties;
+    @SerializedName(value = "configVersion")
+    public ConfigVersion configVersion;
+
+    /**
+     * for json deserialize
+     */
+    public EtlJobConfig() {
+    }
+
+    public EtlJobConfig(Map<Long, EtlTable> tables, String outputFilePattern, String label, EtlJobProperty properties) {
+        this.tables = tables;
+        // set outputPath when submit etl job
+        this.outputPath = null;
+        this.outputFilePattern = outputFilePattern;
+        this.label = label;
+        this.properties = properties;
+        this.configVersion = ConfigVersion.V1;
+    }
+
+    public static String getOutputPath(String hdfsEtlPath, long dbId, String loadLabel, long taskSignature) {
+        return String.format(ETL_OUTPUT_PATH_FORMAT, hdfsEtlPath, dbId, loadLabel, taskSignature);
+    }
+
+    public static String getOutputFilePattern(String loadLabel, FilePatternVersion filePatternVersion) {
+        return String.format("%s.%s.%s.%s", filePatternVersion.name(), loadLabel, TABLET_META_FORMAT,
+                ETL_OUTPUT_FILE_FORMAT);
+    }
+
+    public static String getDppResultFilePath(String outputPath) {
+        return outputPath + "/" + DPP_RESULT_NAME;
+    }
+
+    public static String getTabletMetaStr(String filePath) throws Exception {
+        String fileName = filePath.substring(filePath.lastIndexOf("/") + 1);
+        String[] fileNameArr = fileName.split("\\.");
+        // check file version
+        switch (FilePatternVersion.valueOf(fileNameArr[0])) {
+            case V1:
+                // version.label.tableId.partitionId.indexId.bucket.schemaHash.parquet
+                if (fileNameArr.length != ETL_OUTPUT_FILE_NAME_DESC_V1.split("\\.").length) {
+                    throw new Exception(
+                            "etl output file name error, format: " + ETL_OUTPUT_FILE_NAME_DESC_V1 + ", name: "
+                                    + fileName);
+                }
+                long tableId = Long.parseLong(fileNameArr[2]);
+                long partitionId = Long.parseLong(fileNameArr[3]);
+                long indexId = Long.parseLong(fileNameArr[4]);
+                int bucket = Integer.parseInt(fileNameArr[5]);
+                int schemaHash = Integer.parseInt(fileNameArr[6]);
+                // tableId.partitionId.indexId.bucket.schemaHash
+                return String.format(TABLET_META_FORMAT, tableId, partitionId, indexId, bucket, schemaHash);
+            default:
+                throw new Exception("etl output file version error. version: " + fileNameArr[0]);
+        }
+    }
+
+    public static EtlJobConfig configFromJson(String jsonConfig) {
+        return new Gson().fromJson(jsonConfig, EtlJobConfig.class);
+    }
+
+    public String configToJson() {
+        Gson gson =
+                new GsonBuilder().addDeserializationExclusionStrategy(new HiddenAnnotationExclusionStrategy()).create();
+        return gson.toJson(this);
+    }
+
+    @Override
+    public String toString() {
+        return "EtlJobConfig{" + "tables=" + tables + ", outputPath='" + outputPath + '\'' + ", outputFilePattern='"
+                + outputFilePattern + '\'' + ", label='" + label + '\'' + ", properties=" + properties + ", version="
+                + configVersion + '}';
+    }
+
+    public String getOutputPath() {
+        return outputPath;
+    }
+
+    public enum ConfigVersion {
+        V1
+    }
+
+    public enum FilePatternVersion {
+        V1
+    }
+
+    public enum SourceType {
+        FILE, HIVE
+    }
+
+    public static class EtlJobProperty implements Serializable {
+        @SerializedName(value = "strictMode")
+        public boolean strictMode;
+        @SerializedName(value = "timezone")
+        public String timezone;
+
+        @Override
+        public String toString() {
+            return "EtlJobProperty{" + "strictMode=" + strictMode + ", timezone='" + timezone + '\'' + '}';
+        }
+    }
+
+    public static class EtlTable implements Serializable {
+        @SerializedName(value = "indexes")
+        public List<EtlIndex> indexes;
+        @SerializedName(value = "partitionInfo")
+        public EtlPartitionInfo partitionInfo;
+        @SerializedName(value = "fileGroups")
+        public List<EtlFileGroup> fileGroups;
+
+        /**
+         * for json deserialize
+         */
+        public EtlTable() {
+        }
+
+        public EtlTable(List<EtlIndex> etlIndexes, EtlPartitionInfo etlPartitionInfo) {
+            this.indexes = etlIndexes;
+            this.partitionInfo = etlPartitionInfo;
+            this.fileGroups = Lists.newArrayList();
+        }
+
+        public void addFileGroup(EtlFileGroup etlFileGroup) {
+            fileGroups.add(etlFileGroup);
+        }
+
+        @Override
+        public String toString() {
+            return "EtlTable{" + "indexes=" + indexes + ", partitionInfo=" + partitionInfo + ", fileGroups="
+                    + fileGroups + '}';
+        }
+    }
+
+    public static class EtlColumn implements Serializable {
+        @SerializedName(value = "columnName")
+        public String columnName;
+        @SerializedName(value = "columnType")
+        public String columnType;
+        @SerializedName(value = "isAllowNull")
+        public boolean isAllowNull;
+        @SerializedName(value = "isKey")
+        public boolean isKey;
+        @SerializedName(value = "aggregationType")
+        public String aggregationType;
+        @SerializedName(value = "defaultValue")
+        public String defaultValue;
+        @SerializedName(value = "stringLength")
+        public int stringLength;
+        @SerializedName(value = "precision")
+        public int precision;
+        @SerializedName(value = "scale")
+        public int scale;
+        @SerializedName(value = "defineExpr")
+        public String defineExpr;
+
+        // for unit test
+        public EtlColumn() {
+        }
+
+        public EtlColumn(String columnName, String columnType, boolean isAllowNull, boolean isKey,
+                         String aggregationType, String defaultValue, int stringLength, int precision, int scale) {
+            this.columnName = columnName;
+            this.columnType = columnType;
+            this.isAllowNull = isAllowNull;
+            this.isKey = isKey;
+            this.aggregationType = aggregationType;
+            this.defaultValue = defaultValue;
+            this.stringLength = stringLength;
+            this.precision = precision;
+            this.scale = scale;
+            this.defineExpr = null;
+        }
+
+        @Override
+        public String toString() {
+            return "EtlColumn{" + "columnName='" + columnName + '\'' + ", columnType='" + columnType + '\''
+                    + ", isAllowNull=" + isAllowNull + ", isKey=" + isKey + ", aggregationType='" + aggregationType
+                    + '\'' + ", defaultValue='" + defaultValue + '\'' + ", stringLength=" + stringLength
+                    + ", precision=" + precision + ", scale=" + scale + ", defineExpr='" + defineExpr + '\'' + '}';
+        }
+    }
+
+    public static class EtlIndexComparator implements Comparator<EtlIndex> {
+        @Override
+        public int compare(EtlIndex a, EtlIndex b) {
+            int diff = a.columns.size() - b.columns.size();
+            if (diff == 0) {
+                return 0;
+            } else if (diff > 0) {
+                return 1;
+            } else {
+                return -1;
+            }
+        }
+    }
+
+    public static class EtlIndex implements Serializable {
+        @SerializedName(value = "indexId")
+        public long indexId;
+        @SerializedName(value = "columns")
+        public List<EtlColumn> columns;
+        @SerializedName(value = "schemaHash")
+        public int schemaHash;
+        @SerializedName(value = "indexType")
+        public String indexType;
+        @SerializedName(value = "isBaseIndex")
+        public boolean isBaseIndex;
+        @SerializedName(value = "schemaVersion")
+        public int schemaVersion;
+
+        /**
+         * for json deserialize
+         */
+        public EtlIndex() {
+        }
+
+        public EtlIndex(long indexId, List<EtlColumn> etlColumns, int schemaHash, String indexType, boolean isBaseIndex,
+                        int schemaVersion) {
+            this.indexId = indexId;
+            this.columns = etlColumns;
+            this.schemaHash = schemaHash;
+            this.indexType = indexType;
+            this.isBaseIndex = isBaseIndex;
+            this.schemaVersion = schemaVersion;
+        }
+
+        public EtlColumn getColumn(String name) {
+            for (EtlColumn column : columns) {
+                if (column.columnName.equals(name)) {
+                    return column;
+                }
+            }
+            return null;
+        }
+
+        @Override
+        public String toString() {
+            return "EtlIndex{" + "indexId=" + indexId + ", columns=" + columns + ", schemaHash=" + schemaHash
+                    + ", indexType='" + indexType + '\'' + ", isBaseIndex=" + isBaseIndex + ", schemaVersion="
+                    + schemaVersion + '}';
+        }
+    }
+
+    public static class EtlPartitionInfo implements Serializable {
+        @SerializedName(value = "partitionType")
+        public String partitionType;
+        @SerializedName(value = "partitionColumnRefs")
+        public List<String> partitionColumnRefs;
+        @SerializedName(value = "distributionColumnRefs")
+        public List<String> distributionColumnRefs;
+        @SerializedName(value = "partitions")
+        public List<EtlPartition> partitions;
+
+        /**
+         * for json deserialize
+         */
+        public EtlPartitionInfo() {
+        }
+
+        public EtlPartitionInfo(String partitionType, List<String> partitionColumnRefs,
+                                List<String> distributionColumnRefs, List<EtlPartition> etlPartitions) {
+            this.partitionType = partitionType;
+            this.partitionColumnRefs = partitionColumnRefs;
+            this.distributionColumnRefs = distributionColumnRefs;
+            this.partitions = etlPartitions;
+        }
+
+        @Override
+        public String toString() {
+            return "EtlPartitionInfo{" + "partitionType='" + partitionType + '\'' + ", partitionColumnRefs="
+                    + partitionColumnRefs + ", distributionColumnRefs=" + distributionColumnRefs + ", partitions="
+                    + partitions + '}';
+        }
+    }
+
+    public static class EtlPartition implements Serializable {
+        @SerializedName(value = "partitionId")
+        public long partitionId;
+        @SerializedName(value = "startKeys")
+        public List<Object> startKeys;
+        @SerializedName(value = "endKeys")
+        public List<Object> endKeys;
+        @SerializedName(value = "isMaxPartition")
+        public boolean isMaxPartition;
+        @SerializedName(value = "bucketNum")
+        public int bucketNum;
+
+        /**
+         * for json deserialize
+         */
+        public EtlPartition() {
+        }
+
+        public EtlPartition(long partitionId, List<Object> startKeys, List<Object> endKeys, boolean isMaxPartition,
+                            int bucketNum) {
+            this.partitionId = partitionId;
+            this.startKeys = startKeys;
+            this.endKeys = endKeys;
+            this.isMaxPartition = isMaxPartition;
+            this.bucketNum = bucketNum;
+        }
+
+        @Override
+        public String toString() {
+            return "EtlPartition{" + "partitionId=" + partitionId + ", startKeys=" + startKeys + ", endKeys="
+                    + endKeys + ", isMaxPartition=" + isMaxPartition + ", bucketNum=" + bucketNum + '}';
+        }
+    }
+
+    public static class EtlFileGroup implements Serializable {
+        @SerializedName(value = "sourceType")
+        public SourceType sourceType = SourceType.FILE;
+        @SerializedName(value = "filePaths")
+        public List<String> filePaths;
+        @SerializedName(value = "fileFieldNames")
+        public List<String> fileFieldNames;
+        @SerializedName(value = "columnsFromPath")
+        public List<String> columnsFromPath;
+        @SerializedName(value = "columnSeparator")
+        public String columnSeparator;
+        @SerializedName(value = "lineDelimiter")
+        public String lineDelimiter;
+        @SerializedName(value = "isNegative")
+        public boolean isNegative;
+        @SerializedName(value = "fileFormat")
+        public String fileFormat;
+        @SerializedName(value = "columnMappings")
+        public Map<String, EtlColumnMapping> columnMappings;
+        @SerializedName(value = "where")
+        public String where;
+        @SerializedName(value = "partitions")
+        public List<Long> partitions;
+        @SerializedName(value = "hiveDbTableName")
+        public String hiveDbTableName;
+        @SerializedName(value = "hiveTableProperties")
+        public Map<String, String> hiveTableProperties;
+
+        // hive db table used in dpp, not serialized
+        // set with hiveDbTableName (no bitmap column) or IntermediateHiveTable (created by global dict builder)
+        // in spark etl job
+        public String dppHiveDbTableName;
+
+        // for data infile path
+        public EtlFileGroup(SourceType sourceType, List<String> filePaths, List<String> fileFieldNames,
+                            List<String> columnsFromPath, String columnSeparator, String lineDelimiter,
+                            boolean isNegative, String fileFormat, Map<String, EtlColumnMapping> columnMappings,
+                            String where, List<Long> partitions) {
+            this.sourceType = sourceType;
+            this.filePaths = filePaths;
+            this.fileFieldNames = fileFieldNames;
+            this.columnsFromPath = columnsFromPath;
+            this.columnSeparator = Strings.isNullOrEmpty(columnSeparator) ? "\t" : columnSeparator;
+            this.lineDelimiter = lineDelimiter;
+            this.isNegative = isNegative;
+            this.fileFormat = fileFormat;
+            this.columnMappings = columnMappings;
+            this.where = where;
+            this.partitions = partitions;
+        }
+
+        // for data from table
+        public EtlFileGroup(SourceType sourceType, String hiveDbTableName, Map<String, String> hiveTableProperties,
+                            boolean isNegative, Map<String, EtlColumnMapping> columnMappings, String where,
+                            List<Long> partitions) {
+            this.sourceType = sourceType;
+            this.hiveDbTableName = hiveDbTableName;
+            this.hiveTableProperties = hiveTableProperties;
+            this.isNegative = isNegative;
+            this.columnMappings = columnMappings;
+            this.where = where;
+            this.partitions = partitions;
+        }
+
+        @Override
+        public String toString() {
+            return "EtlFileGroup{" + "sourceType=" + sourceType + ", filePaths=" + filePaths + ", fileFieldNames="
+                    + fileFieldNames + ", columnsFromPath=" + columnsFromPath + ", columnSeparator='" + columnSeparator
+                    + '\'' + ", lineDelimiter='" + lineDelimiter + '\'' + ", isNegative=" + isNegative
+                    + ", fileFormat='" + fileFormat + '\'' + ", columnMappings=" + columnMappings + ", where='" + where
+                    + '\'' + ", partitions=" + partitions + ", hiveDbTableName='" + hiveDbTableName + '\''
+                    + ", hiveTableProperties=" + hiveTableProperties + '}';
+        }
+    }
+
+    /**
+     * FunctionCallExpr = functionName(args)
+     * For compatibility with old designed functions used in Hadoop MapReduce etl
+     * <p>
+     * expr is more general, like k1 + 1, not just FunctionCall
+     */
+    public static class EtlColumnMapping implements Serializable {
+
+        private static Map<String, String> functionMap =
+                new ImmutableMap.Builder<String, String>().put("md5sum", "md5").build();
+
+        @SerializedName(value = "functionName")
+        public String functionName;
+        @SerializedName(value = "args")
+        public List<String> args;
+        @SerializedName(value = "expr")
+        public String expr;
+
+        public EtlColumnMapping(String functionName, List<String> args) {
+            this.functionName = functionName;
+            this.args = args;
+        }
+
+        public EtlColumnMapping(String expr) {
+            this.expr = expr;
+        }
+
+        public String toDescription() {
+            StringBuilder sb = new StringBuilder();
+            if (functionName == null) {
+                sb.append(expr);
+            } else {
+                if (functionMap.containsKey(functionName)) {
+                    sb.append(functionMap.get(functionName));
+                } else {
+                    sb.append(functionName);
+                }
+                sb.append("(");
+                if (args != null) {
+                    for (String arg : args) {
+                        sb.append(arg);
+                        sb.append(",");
+                    }
+                }
+                sb.deleteCharAt(sb.length() - 1);
+                sb.append(")");
+            }
+            return sb.toString();
+        }
+
+        @Override
+        public String toString() {
+            return "EtlColumnMapping{" + "functionName='" + functionName + '\'' + ", args=" + args + ", expr=" + expr
+                    + '}';
+        }
+    }
+
+    public static class HiddenAnnotationExclusionStrategy implements ExclusionStrategy {
+        public boolean shouldSkipField(FieldAttributes f) {
+            return f.getAnnotation(SerializedName.class) == null;
+        }
+
+        @Override
+        public boolean shouldSkipClass(Class<?> clazz) {
+            return false;
+        }
+    }
+
+}
diff --git a/spark-load/spark-load-common/src/main/java/org/apache/doris/util/JsonUtils.java b/spark-load/spark-load-common/src/main/java/org/apache/doris/util/JsonUtils.java
new file mode 100644
index 00000000..3d33e85b
--- /dev/null
+++ b/spark-load/spark-load-common/src/main/java/org/apache/doris/util/JsonUtils.java
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.util;
+
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.MapperFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.json.JsonMapper;
+
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * json utilities
+ */
+public class JsonUtils {
+
+    private static final ObjectMapper MAPPER =
+            JsonMapper.builder().enable(MapperFeature.ACCEPT_CASE_INSENSITIVE_ENUMS).build();
+
+    public static <T> T readValue(String s, Class<T> clazz) throws JsonProcessingException {
+        return MAPPER.readValue(s, clazz);
+    }
+
+    public static <T> T readValue(String s, TypeReference<T> ref) throws JsonProcessingException {
+        return MAPPER.readValue(s, ref);
+    }
+
+    public static <T> T readValue(File file, Class<T> clazz) throws IOException {
+        return MAPPER.readValue(file, clazz);
+    }
+
+    public static <T> T readValue(JsonParser parser, Class<T> clazz) throws IOException {
+        return MAPPER.readValue(parser, clazz);
+    }
+
+    public static <T> T readValue(JsonParser parser, TypeReference<T> ref) throws IOException {
+        return MAPPER.readValue(parser, ref);
+    }
+
+    public static String writeValueAsString(Object o) throws JsonProcessingException {
+        return MAPPER.writeValueAsString(o);
+    }
+
+    public static byte[] writeValueAsBytes(Object o) throws JsonProcessingException {
+        return MAPPER.writeValueAsBytes(o);
+    }
+
+}
diff --git a/spark-load/spark-load-common/src/test/java/org/apache/doris/config/EtlJobConfigTest.java b/spark-load/spark-load-common/src/test/java/org/apache/doris/config/EtlJobConfigTest.java
new file mode 100644
index 00000000..7d82e65f
--- /dev/null
+++ b/spark-load/spark-load-common/src/test/java/org/apache/doris/config/EtlJobConfigTest.java
@@ -0,0 +1,113 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.config;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+class EtlJobConfigTest {
+
+    @Test
+    void getOutputPath() {
+        String outputPath = EtlJobConfig.getOutputPath("hdfs://127.0.0.1/spark-load", 10001L, "test", 123L);
+        Assertions.assertEquals("hdfs://127.0.0.1/spark-load/jobs/10001/test/123", outputPath);
+    }
+
+    @Test
+    void getOutputFilePattern() {
+        String outputFilePattern = EtlJobConfig.getOutputFilePattern("test", EtlJobConfig.FilePatternVersion.V1);
+        Assertions.assertEquals("V1.test.%d.%d.%d.%d.%d.parquet", outputFilePattern);
+    }
+
+    @Test
+    void configFromJson() {
+        List<EtlJobConfig.EtlIndex> etlIndexes = new ArrayList<>();
+        List<EtlJobConfig.EtlColumn> etlColumns = new ArrayList<>();
+        EtlJobConfig.EtlColumn etlColumn0 = new EtlJobConfig.EtlColumn("c0", "INT", false, true, "NONE", "0", 0, 0, 0);
+        EtlJobConfig.EtlColumn etlColumn1 =
+                new EtlJobConfig.EtlColumn("c1", "VARCHAR", true, false, "NONE", "\\N", 10, 0, 0);
+        etlColumns.add(etlColumn0);
+        etlColumns.add(etlColumn1);
+        EtlJobConfig.EtlIndex etlIndex = new EtlJobConfig.EtlIndex(1L, etlColumns, 123, "DUPLICATE", true, 0);
+        etlIndexes.add(etlIndex);
+        EtlJobConfig.EtlPartitionInfo etlPartitionInfo =
+                new EtlJobConfig.EtlPartitionInfo("UNPARTITIONED", Collections.emptyList(),
+                        Collections.singletonList("c0"), Collections.singletonList(
+                        new EtlJobConfig.EtlPartition(0, Collections.emptyList(), Collections.emptyList(), true, 0)));
+        EtlJobConfig.EtlTable table = new EtlJobConfig.EtlTable(etlIndexes, etlPartitionInfo);
+        Map<Long, EtlJobConfig.EtlTable> tables = new HashMap<>();
+        tables.put(123L, table);
+        String outputFilePattern = EtlJobConfig.getOutputFilePattern("test", EtlJobConfig.FilePatternVersion.V1);
+        EtlJobConfig.EtlJobProperty properties = new EtlJobConfig.EtlJobProperty();
+        EtlJobConfig jobConfig = new EtlJobConfig(tables, outputFilePattern, "test", properties);
+        Assertions.assertEquals(jobConfig.configToJson(),
+                EtlJobConfig.configFromJson("{\"tables\":{\"123\":{\"indexes\":[{\"indexId\":1,\"columns\":[{\"columnName\":\"c0\"," +
+                        "\"columnType\":\"INT\",\"isAllowNull\":false,\"isKey\":true,\"aggregationType\":\"NONE\"," +
+                        "\"defaultValue\":\"0\",\"stringLength\":0,\"precision\":0,\"scale\":0}," +
+                        "{\"columnName\":\"c1\",\"columnType\":\"VARCHAR\",\"isAllowNull\":true,\"isKey\":false," +
+                        "\"aggregationType\":\"NONE\",\"defaultValue\":\"\\\\N\",\"stringLength\":10,\"precision\":0," +
+                        "\"scale\":0}],\"schemaHash\":123,\"indexType\":\"DUPLICATE\",\"isBaseIndex\":true," +
+                        "\"schemaVersion\":0}],\"partitionInfo\":{\"partitionType\":\"UNPARTITIONED\"," +
+                        "\"partitionColumnRefs\":[],\"distributionColumnRefs\":[\"c0\"],\"partitions\":" +
+                        "[{\"partitionId\":0,\"startKeys\":[],\"endKeys\":[],\"isMaxPartition\":true,\"bucketNum\":0}]}," +
+                        "\"fileGroups\":[]}},\"outputFilePattern\":\"V1.test.%d.%d.%d.%d.%d.parquet\"," +
+                        "\"label\":\"test\",\"properties\":{\"strictMode\":false},\"configVersion\":\"V1\"}").configToJson());
+    }
+
+    @Test
+    void configToJson() {
+        List<EtlJobConfig.EtlIndex> etlIndexes = new ArrayList<>();
+        List<EtlJobConfig.EtlColumn> etlColumns = new ArrayList<>();
+        EtlJobConfig.EtlColumn etlColumn0 = new EtlJobConfig.EtlColumn("c0", "INT", false, true, "NONE", "0", 0, 0, 0);
+        EtlJobConfig.EtlColumn etlColumn1 =
+                new EtlJobConfig.EtlColumn("c1", "VARCHAR", true, false, "NONE", "\\N", 10, 0, 0);
+        etlColumns.add(etlColumn0);
+        etlColumns.add(etlColumn1);
+        EtlJobConfig.EtlIndex etlIndex = new EtlJobConfig.EtlIndex(1L, etlColumns, 123, "DUPLICATE", true, 0);
+        etlIndexes.add(etlIndex);
+        EtlJobConfig.EtlPartitionInfo etlPartitionInfo =
+                new EtlJobConfig.EtlPartitionInfo("UNPARTITIONED", Collections.emptyList(),
+                        Collections.singletonList("c0"), Collections.singletonList(
+                        new EtlJobConfig.EtlPartition(0, Collections.emptyList(), Collections.emptyList(), true, 0)));
+        EtlJobConfig.EtlTable table = new EtlJobConfig.EtlTable(etlIndexes, etlPartitionInfo);
+        Map<Long, EtlJobConfig.EtlTable> tables = new HashMap<>();
+        tables.put(123L, table);
+        String outputFilePattern = EtlJobConfig.getOutputFilePattern("test", EtlJobConfig.FilePatternVersion.V1);
+        EtlJobConfig.EtlJobProperty properties = new EtlJobConfig.EtlJobProperty();
+        EtlJobConfig jobConfig = new EtlJobConfig(tables, outputFilePattern, "test", properties);
+        Assertions.assertEquals(
+                "{\"tables\":{\"123\":{\"indexes\":[{\"indexId\":1,\"columns\":[{\"columnName\":\"c0\"," +
+                        "\"columnType\":\"INT\",\"isAllowNull\":false,\"isKey\":true,\"aggregationType\":\"NONE\"," +
+                        "\"defaultValue\":\"0\",\"stringLength\":0,\"precision\":0,\"scale\":0}," +
+                        "{\"columnName\":\"c1\",\"columnType\":\"VARCHAR\",\"isAllowNull\":true,\"isKey\":false," +
+                        "\"aggregationType\":\"NONE\",\"defaultValue\":\"\\\\N\",\"stringLength\":10,\"precision\":0," +
+                        "\"scale\":0}],\"schemaHash\":123,\"indexType\":\"DUPLICATE\",\"isBaseIndex\":true," +
+                        "\"schemaVersion\":0}],\"partitionInfo\":{\"partitionType\":\"UNPARTITIONED\"," +
+                        "\"partitionColumnRefs\":[],\"distributionColumnRefs\":[\"c0\"],\"partitions\":" +
+                        "[{\"partitionId\":0,\"startKeys\":[],\"endKeys\":[],\"isMaxPartition\":true,\"bucketNum\":0}]}," +
+                        "\"fileGroups\":[]}},\"outputFilePattern\":\"V1.test.%d.%d.%d.%d.%d.parquet\"," +
+                        "\"label\":\"test\",\"properties\":{\"strictMode\":false},\"configVersion\":\"V1\"}",
+                jobConfig.configToJson());
+    }
+}
\ No newline at end of file
diff --git a/spark-load/spark-load-core/pom.xml b/spark-load/spark-load-core/pom.xml
new file mode 100644
index 00000000..fbe3edaf
--- /dev/null
+++ b/spark-load/spark-load-core/pom.xml
@@ -0,0 +1,187 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.doris</groupId>
+        <artifactId>spark-load</artifactId>
+        <version>${revision}</version>
+    </parent>
+
+    <artifactId>spark-load-core</artifactId>
+
+    <properties>
+        <maven.compiler.source>8</maven.compiler.source>
+        <maven.compiler.target>8</maven.compiler.target>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        <fe_ut_parallel>1</fe_ut_parallel>
+        <argLine>-Xmx512m</argLine>
+    </properties>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.doris</groupId>
+            <artifactId>spark-load-common</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-databind</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.projectlombok</groupId>
+            <artifactId>lombok</artifactId>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>commons-cli</groupId>
+            <artifactId>commons-cli</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.google.guava</groupId>
+            <artifactId>guava</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-launcher_${scala.major.version}</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-core_${scala.major.version}</artifactId>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.apache.hadoop</groupId>
+                    <artifactId>hadoop-client</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-catalyst_${scala.major.version}</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-common</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-client</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-core</artifactId>
+        </dependency>
+        <!-- https://mvnrepository.com/artifact/org.apache.logging.log4j/log4j-core -->
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-api</artifactId>
+        </dependency>
+        <!-- https://mvnrepository.com/artifact/org.apache.logging.log4j/log4j-slf4j-impl -->
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-slf4j-impl</artifactId>
+        </dependency>
+        <!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-api -->
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.jmockit</groupId>
+            <artifactId>jmockit</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <!-- https://mvnrepository.com/artifact/commons-logging/commons-logging -->
+        <!-- <dependency> -->
+        <!--     <groupId>commons-logging</groupId> -->
+        <!--     <artifactId>commons-logging</artifactId> -->
+        <!-- </dependency> -->
+        <dependency>
+            <groupId>org.apache.httpcomponents</groupId>
+            <artifactId>httpclient</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-aws</artifactId>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-log4j12</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>log4j</groupId>
+                    <artifactId>log4j</artifactId>
+                </exclusion>
+                <exclusion>
+                    <artifactId>servlet-api</artifactId>
+                    <groupId>javax.servlet</groupId>
+                </exclusion>
+                <!-- https://github.com/aws/aws-sdk-java/issues/1032 -->
+                <exclusion>
+                    <groupId>com.amazonaws</groupId>
+                    <artifactId>aws-java-sdk-s3</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.amazonaws</groupId>
+                    <artifactId>aws-java-sdk-bundle</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>com.amazonaws</groupId>
+            <artifactId>aws-java-sdk-s3</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.amazonaws</groupId>
+            <artifactId>aws-java-sdk-glue</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.amazonaws</groupId>
+            <artifactId>aws-java-sdk-dynamodb</artifactId>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <!-- jmockit -->
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-surefire-plugin</artifactId>
+                <configuration>
+                    <!-->set larger, eg, 3, to reduce the time or running FE unit tests<-->
+                    <forkCount>${fe_ut_parallel}</forkCount>
+                    <!-->not reuse forked jvm, so that each unit test will run in separate jvm. to avoid singleton confict<-->
+                    <reuseForks>false</reuseForks>
+                    <argLine>
+                        -javaagent:${settings.localRepository}/org/jmockit/jmockit/${jmockit.version}/jmockit-${jmockit.version}.jar @{argLine}
+                    </argLine>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
\ No newline at end of file
diff --git a/spark-load/spark-load-core/src/main/java/org/apache/doris/SparkLoadRunner.java b/spark-load/spark-load-core/src/main/java/org/apache/doris/SparkLoadRunner.java
new file mode 100644
index 00000000..f7920879
--- /dev/null
+++ b/spark-load/spark-load-core/src/main/java/org/apache/doris/SparkLoadRunner.java
@@ -0,0 +1,149 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris;
+
+import org.apache.commons.lang3.exception.ExceptionUtils;
+import org.apache.doris.common.CommandLineOptions;
+import org.apache.doris.config.JobConfig;
+import org.apache.doris.load.LoaderFactory;
+import org.apache.doris.load.job.Loader;
+import org.apache.doris.load.job.Recoverable;
+import org.apache.doris.util.JsonUtils;
+
+import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
+import io.netty.util.internal.logging.InternalLoggerFactory;
+import io.netty.util.internal.logging.Log4JLoggerFactory;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.DefaultParser;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.File;
+import java.io.IOException;
+
+public class SparkLoadRunner {
+
+    private static final Logger LOG = LogManager.getLogger(SparkLoadRunner.class);
+
+    public static final String SPARK_LOAD_HOME = System.getenv("SPARK_LOAD_HOME");
+
+    static {
+        InternalLoggerFactory.setDefaultFactory(Log4JLoggerFactory.INSTANCE);
+    }
+
+    public static void main(String[] args) {
+
+        if (StringUtils.isBlank(SPARK_LOAD_HOME)) {
+            System.err.println("env SPARK_LOAD_HOME is not set.");
+            System.exit(-1);
+        }
+
+        CommandLineOptions cmdOptions = parseArgs(args);
+        if (Strings.isNullOrEmpty(cmdOptions.getConfigPath())) {
+            System.err.println("config path is empty");
+            System.exit(-1);
+        }
+
+        JobConfig jobConfig = readConfig(cmdOptions.getConfigPath());
+        try {
+            checkConfig(jobConfig);
+        } catch (IllegalArgumentException e) {
+            System.err.println("check config failed, msg: " + ExceptionUtils.getStackTrace(e));
+            System.exit(-1);
+        }
+
+        Loader loader = LoaderFactory.createLoader(jobConfig, cmdOptions.getRecovery());
+        Runtime.getRuntime().addShutdownHook(new Thread(() -> {
+            LOG.info("Shutting down...");
+            loader.cancel();
+        }));
+        try {
+
+            loader.prepare();
+            do {
+                if (loader instanceof Recoverable) {
+                    if (((Recoverable) loader).canBeRecovered()) {
+                        LOG.info("recovery check passed, start prepare recovery.");
+                        ((Recoverable) loader).prepareRecover();
+                        break;
+                    }
+                }
+                loader.execute();
+            } while (false);
+
+            loader.afterFinished();
+
+        } catch (Exception e) {
+            loader.afterFailed(e);
+            LOG.error("start load failed", e);
+            System.err.println("start load failed, exit.");
+            System.exit(-1);
+        }
+
+    }
+
+    private static CommandLineOptions parseArgs(String[] args) {
+        CommandLineParser parser = new DefaultParser();
+        Options options = new Options();
+        options.addOption("c", "config", true, "Spark load config file");
+        options.addOption("r", "recovery", false, "Recovery mode");
+        CommandLine cmd = null;
+        try {
+            cmd = parser.parse(options, args);
+        } catch (ParseException e) {
+            System.err.println("failed to parse argument, exit.");
+            System.exit(-1);
+        }
+
+        String configPath = cmd.getOptionValue("config");
+        boolean recovery = cmd.hasOption('r') || cmd.hasOption("recovery");
+        return new CommandLineOptions(configPath, recovery);
+
+    }
+
+    private static JobConfig readConfig(String path) {
+        JobConfig jobConfig = null;
+        try {
+            jobConfig = JsonUtils.readValue(new File(path), JobConfig.class);
+        } catch (IOException e) {
+            LOG.error("failed to read config file", e);
+            System.err.println("failed to read config file, exit.");
+            System.exit(-1);
+        }
+        return jobConfig;
+    }
+
+    private static void checkConfig(JobConfig jobConfig) {
+        jobConfig.checkFeAddress();
+        Preconditions.checkArgument(StringUtils.isNoneBlank(jobConfig.getLabel()), "label is empty");
+        Preconditions.checkArgument(StringUtils.isNoneBlank(jobConfig.getUser()), "user is empty");
+        Preconditions.checkArgument(jobConfig.getPassword() != null, "password cannot be null");
+        Preconditions.checkArgument(StringUtils.isNoneBlank(jobConfig.getDatabase()), "database is empty");
+        Preconditions.checkArgument(StringUtils.isNoneBlank(jobConfig.getWorkingDir()),
+                "spark config item workingDir is empty");
+        jobConfig.checkTaskInfo();
+        jobConfig.checkSparkInfo();
+        jobConfig.checkHadoopProperties();
+    }
+
+}
diff --git a/spark-load/spark-load-core/src/main/java/org/apache/doris/client/DorisClient.java b/spark-load/spark-load-core/src/main/java/org/apache/doris/client/DorisClient.java
new file mode 100644
index 00000000..124fd0fe
--- /dev/null
+++ b/spark-load/spark-load-core/src/main/java/org/apache/doris/client/DorisClient.java
@@ -0,0 +1,248 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.client;
+
+import org.apache.doris.common.Constants;
+import org.apache.doris.common.LoadInfo;
+import org.apache.doris.common.ResponseEntity;
+import org.apache.doris.common.meta.LoadInfoResponse;
+import org.apache.doris.common.meta.LoadMeta;
+import org.apache.doris.exception.SparkLoadException;
+import org.apache.doris.util.HttpUtils;
+import org.apache.doris.util.JsonUtils;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.http.HttpHeaders;
+import org.apache.http.HttpStatus;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.client.methods.HttpRequestBase;
+import org.apache.http.client.utils.URIBuilder;
+import org.apache.http.entity.StringEntity;
+import org.apache.http.impl.client.CloseableHttpClient;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Base64;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+public class DorisClient {
+
+    private static volatile FeClient FE;
+    private static BeClient BE;
+
+    public static FeClient getFeClient(String feAddresses, String user, String password) {
+        if (FE == null) {
+            synchronized (FeClient.class) {
+                if (FE == null) {
+                    FE = new FeClient(feAddresses, user, password);
+                }
+            }
+        }
+        return FE;
+    }
+
+    public static class FeClient {
+
+        public static final String BASE_URL = "http://%s%s";
+
+        public static final String INGESTION_LOAD_URL_PATTERN = "/api/ingestion_load/%s/%s/%s";
+
+        public static final String CREATE_ACTION = "_create";
+
+        public static final String UPDATE_ACTION = "_update";
+
+        public static final String GET_LOAD_INFO = "/api/%s/_load_info";
+
+        public static final String GET_DDL = "/api/_get_ddl";
+
+        private final List<String> feNodes;
+
+        private final String auth;
+
+        public FeClient(String feAddresses, String user, String password) {
+            this.feNodes = parseFeNodes(feAddresses);
+            this.auth = parseAuth(user, password);
+        }
+
+        private List<String> parseFeNodes(String feAddresses) {
+            if (StringUtils.isBlank(feAddresses)) {
+                throw new IllegalArgumentException("feAddresses is empty");
+            }
+            String[] feArr = feAddresses.split(",");
+            if (Arrays.stream(feArr).map(x -> x.split(":"))
+                    .anyMatch(x -> x.length != 2 || x[0].isEmpty() || x[1].isEmpty())) {
+                throw new IllegalArgumentException("feAddresses contains invalid format, " + feAddresses);
+            }
+            return Arrays.stream(feArr).collect(Collectors.toList());
+        }
+
+        private String parseAuth(String user, String password) {
+            return Base64.getEncoder().encodeToString((user + ":" + password).getBytes(StandardCharsets.UTF_8));
+        }
+
+        public LoadMeta createIngestionLoad(String db, Map<String, List<String>> tableToPartition, String label,
+                                            Map<String, String> properties) throws SparkLoadException {
+            try {
+                String path = String.format(INGESTION_LOAD_URL_PATTERN, Constants.DEFAULT_CATALOG, db, CREATE_ACTION);
+                HttpPost httpPost = new HttpPost();
+                addCommonHeaders(httpPost);
+                Map<String, Object> params = new HashMap<>();
+                params.put("label", label);
+                params.put("tableToPartition", tableToPartition);
+                params.put("properties", properties);
+                httpPost.setEntity(new StringEntity(JsonUtils.writeValueAsString(params)));
+                String content = executeRequest(httpPost, path, null);
+                if (StringUtils.isBlank(content)) {
+                    throw new SparkLoadException(String.format("request create load failed, path: %s", path));
+                }
+                ResponseEntity res = JsonUtils.readValue(content, ResponseEntity.class);
+                if (res.getCode() != 0) {
+                    throw new SparkLoadException(String.format("create load failed, code: %d, msg: %s, reason: %s",
+                            res.getCode(), res.getMsg(), res.getData().isNull() ? null : res.getData().asText()));
+                }
+                return JsonUtils.readValue(res.getData().traverse(), LoadMeta.class);
+            } catch (IOException | URISyntaxException e) {
+                throw new SparkLoadException("create spark load failed", e);
+            }
+        }
+
+        private void addCommonHeaders(HttpRequestBase req) {
+            req.setHeader(HttpHeaders.AUTHORIZATION, "Basic " + auth);
+        }
+
+        private String executeRequest(HttpRequestBase req, String apiPath, Map<String, String> params)
+                throws IOException, URISyntaxException {
+            IOException ex = null;
+            try (CloseableHttpClient client = HttpUtils.getClient()) {
+                for (String feNode : feNodes) {
+                    String url = String.format(BASE_URL, feNode, apiPath);
+                    URIBuilder uriBuilder = new URIBuilder(URI.create(url));
+                    if (params != null && !params.isEmpty()) {
+                        params.forEach(uriBuilder::addParameter);
+                    }
+                    req.setURI(uriBuilder.build());
+                    addCommonHeaders(req);
+                    CloseableHttpResponse res;
+                    try {
+                        res = client.execute(req);
+                    } catch (IOException e) {
+                        ex = e;
+                        continue;
+                    }
+                    if (res.getStatusLine().getStatusCode() != HttpStatus.SC_OK) {
+                        continue;
+                    }
+                    return HttpUtils.getEntityContent(res.getEntity());
+                }
+            }
+            if (ex != null) {
+                throw ex;
+            }
+            return null;
+        }
+
+        public void updateIngestionLoad(String db, Long loadId, Map<String, String> statusInfo)
+                throws SparkLoadException {
+
+            String path = String.format(INGESTION_LOAD_URL_PATTERN, Constants.DEFAULT_CATALOG, db, UPDATE_ACTION);
+            HttpPost httpPost = new HttpPost();
+            addCommonHeaders(httpPost);
+            Map<String, Object> params = new HashMap<>();
+            params.put("loadId", loadId);
+            params.put("statusInfo", statusInfo);
+            try {
+                httpPost.setEntity(new StringEntity(JsonUtils.writeValueAsString(params)));
+                String content = executeRequest(httpPost, path, null);
+                if (StringUtils.isBlank(content)) {
+                    throw new SparkLoadException(String.format("request update load failed, path: %s", path));
+                }
+                ResponseEntity res = JsonUtils.readValue(content, ResponseEntity.class);
+                if (res.getCode() != 0) {
+                    throw new SparkLoadException(String.format("update load failed, code: %d, msg: %s, reason: %s",
+                            res.getCode(), res.getMsg(), res.getData().isNull() ? null : res.getData().asText()));
+                }
+            } catch (IOException | URISyntaxException e) {
+                throw new SparkLoadException("update load failed", e);
+            }
+
+        }
+
+        public LoadInfo getLoadInfo(String db, String label) throws SparkLoadException {
+
+            String path = String.format(GET_LOAD_INFO, db);
+            HttpGet httpGet = new HttpGet();
+            addCommonHeaders(httpGet);
+            try {
+                Map<String, String> params = new HashMap<>();
+                params.put("label", label);
+                String content = executeRequest(httpGet, path, params);
+                if (StringUtils.isBlank(content)) {
+                    throw new SparkLoadException(String.format("request get load info failed, path: %s", path));
+                }
+                LoadInfoResponse res = JsonUtils.readValue(content, LoadInfoResponse.class);
+                if (!"ok".equalsIgnoreCase(res.getStatus())) {
+                    throw new SparkLoadException(String.format("get load info failed, status: %s, msg: %s, jobInfo: %s",
+                            res.getStatus(), res.getMsg(), JsonUtils.writeValueAsString(res.getJobInfo())));
+                }
+                return res.getJobInfo();
+            } catch (IOException | URISyntaxException e) {
+                throw new SparkLoadException("get load info failed", e);
+            }
+
+        }
+
+        public String getDDL(String db, String table) throws SparkLoadException {
+
+            HttpGet httpGet = new HttpGet();
+            addCommonHeaders(httpGet);
+            try {
+                Map<String, String> params = new HashMap<>();
+                params.put("db", db);
+                params.put("table", table);
+                String content = executeRequest(httpGet, GET_DDL, params);
+                if (StringUtils.isBlank(content)) {
+                    throw new SparkLoadException(String.format("request get ddl failed, path: %s", GET_DDL));
+                }
+                ResponseEntity res = JsonUtils.readValue(content, ResponseEntity.class);
+                if (res.getCode() != 0 || !res.getData().has("create_table")
+                        || res.getData().get("create_table").isEmpty()) {
+                    throw new SparkLoadException(String.format("get ddl failed, status: %s, msg: %s, data: %s",
+                            res.getCode(), res.getMsg(), JsonUtils.writeValueAsString(res.getData())));
+                }
+                return res.getData().get("create_table").get(0).asText();
+            } catch (IOException | URISyntaxException e) {
+                throw new SparkLoadException("get ddl failed", e);
+            }
+
+        }
+
+    }
+
+    private static class BeClient {
+
+    }
+
+}
diff --git a/spark-load/spark-load-core/src/main/java/org/apache/doris/common/CommandLineOptions.java b/spark-load/spark-load-core/src/main/java/org/apache/doris/common/CommandLineOptions.java
new file mode 100644
index 00000000..8c66abcb
--- /dev/null
+++ b/spark-load/spark-load-core/src/main/java/org/apache/doris/common/CommandLineOptions.java
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common;
+
+import lombok.Getter;
+
+@Getter
+public class CommandLineOptions {
+
+    private final String configPath;
+
+    private final Boolean recovery;
+
+    public CommandLineOptions(String configPath, Boolean recovery) {
+        this.configPath = configPath;
+        this.recovery = recovery;
+    }
+}
diff --git a/spark-load/spark-load-core/src/main/java/org/apache/doris/common/Constants.java b/spark-load/spark-load-core/src/main/java/org/apache/doris/common/Constants.java
new file mode 100644
index 00000000..a3e4803e
--- /dev/null
+++ b/spark-load/spark-load-core/src/main/java/org/apache/doris/common/Constants.java
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common;
+
+public interface Constants {
+
+    String HIVE_METASTORE_URIS = "hive.metastore.uris";
+    String SPARK_STANDALONE_SCHEME = "spark";
+    String HADOOP_AUTH_KERBEROS = "kerberos";
+    String HADOOP_SECURITY_AUTHENTICATION = "hadoop.security.authentication";
+    String HADOOP_KERBEROS_PRINCIPAL = "hadoop.kerberos.principal";
+    String HADOOP_KERBEROS_KEYTAB = "hadoop.kerberos.keytab";
+
+    String DEFAULT_CATALOG = "internal";
+
+}
diff --git a/spark-load/spark-load-core/src/main/java/org/apache/doris/common/LoadInfo.java b/spark-load/spark-load-core/src/main/java/org/apache/doris/common/LoadInfo.java
new file mode 100644
index 00000000..1c7e904c
--- /dev/null
+++ b/spark-load/spark-load-core/src/main/java/org/apache/doris/common/LoadInfo.java
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common;
+
+import lombok.Data;
+
+import java.util.List;
+
+@Data
+public class LoadInfo {
+
+    private String dbName;
+    private List<String> tblNames;
+    private String label;
+    private String clusterName;
+    private String state;
+    private String failMsg;
+    private String trackingUrl;
+
+}
diff --git a/spark-load/spark-load-core/src/main/java/org/apache/doris/common/ResponseEntity.java b/spark-load/spark-load-core/src/main/java/org/apache/doris/common/ResponseEntity.java
new file mode 100644
index 00000000..a5a3f149
--- /dev/null
+++ b/spark-load/spark-load-core/src/main/java/org/apache/doris/common/ResponseEntity.java
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import lombok.Data;
+
+@Data
+public class ResponseEntity {
+
+    private Integer code;
+    private String msg;
+    private JsonNode data;
+    private Integer count;
+
+}
diff --git a/spark-load/spark-load-core/src/main/java/org/apache/doris/common/enums/JobStatus.java b/spark-load/spark-load-core/src/main/java/org/apache/doris/common/enums/JobStatus.java
new file mode 100644
index 00000000..6493b36b
--- /dev/null
+++ b/spark-load/spark-load-core/src/main/java/org/apache/doris/common/enums/JobStatus.java
@@ -0,0 +1,26 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.enums;
+
+public enum JobStatus {
+
+    RUNNING,
+    FAILED,
+    SUCCESS
+
+}
diff --git a/spark-load/spark-load-core/src/main/java/org/apache/doris/common/enums/LoadMode.java b/spark-load/spark-load-core/src/main/java/org/apache/doris/common/enums/LoadMode.java
new file mode 100644
index 00000000..d86b0738
--- /dev/null
+++ b/spark-load/spark-load-core/src/main/java/org/apache/doris/common/enums/LoadMode.java
@@ -0,0 +1,22 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.enums;
+
+public enum LoadMode {
+    PUSH, PULL;
+}
diff --git a/spark-load/spark-load-core/src/main/java/org/apache/doris/common/enums/TaskType.java b/spark-load/spark-load-core/src/main/java/org/apache/doris/common/enums/TaskType.java
new file mode 100644
index 00000000..e6ebf9e0
--- /dev/null
+++ b/spark-load/spark-load-core/src/main/java/org/apache/doris/common/enums/TaskType.java
@@ -0,0 +1,25 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.enums;
+
+public enum TaskType {
+
+    HIVE,
+    FILE
+
+}
diff --git a/spark-load/spark-load-core/src/main/java/org/apache/doris/common/meta/LoadInfoResponse.java b/spark-load/spark-load-core/src/main/java/org/apache/doris/common/meta/LoadInfoResponse.java
new file mode 100644
index 00000000..60f28e9f
--- /dev/null
+++ b/spark-load/spark-load-core/src/main/java/org/apache/doris/common/meta/LoadInfoResponse.java
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.meta;
+
+import org.apache.doris.common.LoadInfo;
+
+import lombok.Data;
+
+@Data
+public class LoadInfoResponse {
+
+    private String status;
+    private String msg;
+    private LoadInfo jobInfo;
+
+}
diff --git a/spark-load/spark-load-core/src/main/java/org/apache/doris/common/meta/LoadMeta.java b/spark-load/spark-load-core/src/main/java/org/apache/doris/common/meta/LoadMeta.java
new file mode 100644
index 00000000..6009f092
--- /dev/null
+++ b/spark-load/spark-load-core/src/main/java/org/apache/doris/common/meta/LoadMeta.java
@@ -0,0 +1,158 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.meta;
+
+import org.apache.doris.common.Constants;
+import org.apache.doris.config.EtlJobConfig;
+import org.apache.doris.config.JobConfig;
+import org.apache.doris.exception.SparkLoadException;
+
+import com.google.common.annotations.VisibleForTesting;
+import lombok.Data;
+import org.apache.commons.lang3.StringUtils;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+@Data
+public class LoadMeta {
+
+    private Long loadId;
+    private Long txnId;
+    private Long dbId;
+    private Long signature;
+    private Map<String, TableMeta> tableMeta;
+
+    public EtlJobConfig getEtlJobConfig(JobConfig jobConfig) throws SparkLoadException {
+        Map<Long, EtlJobConfig.EtlTable> tables = new HashMap<>();
+        for (Map.Entry<String, TableMeta> entry : getTableMeta().entrySet()) {
+            String name = entry.getKey();
+            TableMeta meta = entry.getValue();
+            EtlJobConfig.EtlTable etlTable = new EtlJobConfig.EtlTable(meta.getIndexes().stream().map(
+                    TableMeta.EtlIndex::toEtlIndex).collect(Collectors.toList()),
+                    meta.getPartitionInfo().toEtlPartitionInfo());
+            JobConfig.TaskInfo taskInfo = jobConfig.getLoadTasks().get(name);
+            EtlJobConfig.EtlFileGroup fileGroup;
+            Map<String, EtlJobConfig.EtlColumnMapping> columnMappingMap = taskInfo.toEtlColumnMappingMap();
+            checkMapping(etlTable, columnMappingMap);
+            List<Long> partitionIds = meta.getPartitionInfo().partitions.stream()
+                    .map(p -> p.partitionId).collect(Collectors.toList());
+            switch (taskInfo.getType()) {
+                case HIVE:
+                    Map<String, String> properties = new HashMap<>(jobConfig.getHadoopProperties());
+                    properties.put(Constants.HIVE_METASTORE_URIS, taskInfo.getHiveMetastoreUris());
+                    fileGroup =
+                            new EtlJobConfig.EtlFileGroup(EtlJobConfig.SourceType.HIVE, taskInfo.getHiveFullTableName(),
+                                    properties, false, columnMappingMap, taskInfo.getWhere(),
+                                    partitionIds);
+                    break;
+                case FILE:
+                    List<String> columnList = Collections.emptyList();
+                    if (StringUtils.isNoneBlank(taskInfo.getColumns())) {
+                        columnList = Arrays.stream(taskInfo.getColumns().split(",")).collect(Collectors.toList());
+                    }
+                    List<String> columnFromPathList = Collections.emptyList();
+                    if (StringUtils.isNoneBlank(taskInfo.getColumnFromPath())) {
+                        columnFromPathList =
+                                Arrays.stream(taskInfo.getColumnFromPath().split(",")).collect(Collectors.toList());
+                    }
+                    fileGroup =
+                            new EtlJobConfig.EtlFileGroup(EtlJobConfig.SourceType.FILE, taskInfo.getPaths(), columnList,
+                                    columnFromPathList, taskInfo.getFieldSep(), taskInfo.getLineDelim(), false,
+                                    taskInfo.getFormat(), columnMappingMap, taskInfo.getWhere(), partitionIds);
+                    break;
+                default:
+                    throw new IllegalArgumentException("Unsupported task type: " + taskInfo.getType());
+            }
+            etlTable.addFileGroup(fileGroup);
+            tables.put(meta.getId(), etlTable);
+        }
+        String outputFilePattern = EtlJobConfig.getOutputFilePattern(jobConfig.getLabel(),
+                EtlJobConfig.FilePatternVersion.V1);
+        String label = jobConfig.getLabel();
+        EtlJobConfig.EtlJobProperty properties = new EtlJobConfig.EtlJobProperty();
+        EtlJobConfig etlJobConfig = new EtlJobConfig(tables, outputFilePattern, label, properties);
+        etlJobConfig.outputPath =
+                EtlJobConfig.getOutputPath(jobConfig.getWorkingDir(), getDbId(), label,
+                        getSignature());
+        return etlJobConfig;
+    }
+
+    @VisibleForTesting
+    public void checkMapping(EtlJobConfig.EtlTable etlTable,
+                             Map<String, EtlJobConfig.EtlColumnMapping> columnMappingMap) throws SparkLoadException {
+        Optional<EtlJobConfig.EtlIndex> baseIdx = etlTable.indexes.stream().filter(idx -> idx.isBaseIndex).findFirst();
+        if (baseIdx.isPresent()) {
+            EtlJobConfig.EtlIndex etlIndex = baseIdx.get();
+            for (EtlJobConfig.EtlColumn column : etlIndex.columns) {
+                if ("HLL".equalsIgnoreCase(column.columnType)) {
+                    EtlJobConfig.EtlColumnMapping mapping = columnMappingMap.get(column.columnName);
+                    checkHllMapping(column.columnName, mapping);
+                }
+                if ("BITMAP".equalsIgnoreCase(column.columnType)) {
+                    EtlJobConfig.EtlColumnMapping mapping = columnMappingMap.get(column.columnName);
+                    checkBitmapMapping(column.columnName, mapping);
+                }
+            }
+        }
+    }
+
+    private void checkHllMapping(String columnName, EtlJobConfig.EtlColumnMapping mapping) throws SparkLoadException {
+        if (mapping == null) {
+            throw new SparkLoadException("");
+        }
+        Pattern pattern = Pattern.compile("(\\w+)\\(.*\\)");
+        Matcher matcher = pattern.matcher(mapping.expr);
+        if (matcher.find()) {
+            if ("hll_hash".equalsIgnoreCase(matcher.group(1))
+                    || "hll_empty".equalsIgnoreCase(matcher.group(1))) {
+                return;
+            }
+            throw new SparkLoadException("HLL column must use hll function, like " + columnName + "=hll_hash(xxx) or "
+                    + columnName + "=hll_empty()");
+        }
+    }
+
+    private void checkBitmapMapping(String columnName, EtlJobConfig.EtlColumnMapping mapping)
+            throws SparkLoadException {
+        if (mapping == null) {
+            throw new SparkLoadException("");
+        }
+        Pattern pattern = Pattern.compile("(\\w+)\\(.*\\)");
+        Matcher matcher = pattern.matcher(mapping.expr);
+        if (matcher.find()) {
+            if ("to_bitmap".equalsIgnoreCase(matcher.group(1)) || "bitmap_hash".equalsIgnoreCase(matcher.group(1))
+                    || "bitmap_dict".equalsIgnoreCase(matcher.group(1))
+                    || "binary_bitmap".equalsIgnoreCase(matcher.group(1))) {
+                return;
+            }
+            throw new SparkLoadException(
+                    "BITMAP column must use bitmap function, like " + columnName + "=to_bitmap(xxx) or "
+                            + columnName + "=bitmap_hash() or " + columnName + "=bitmap_dict() or "
+                            + columnName + "=binary_bitmap()");
+        }
+    }
+
+}
diff --git a/spark-load/spark-load-core/src/main/java/org/apache/doris/common/meta/TableMeta.java b/spark-load/spark-load-core/src/main/java/org/apache/doris/common/meta/TableMeta.java
new file mode 100644
index 00000000..3e97b97a
--- /dev/null
+++ b/spark-load/spark-load-core/src/main/java/org/apache/doris/common/meta/TableMeta.java
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.meta;
+
+
+import org.apache.doris.config.EtlJobConfig;
+
+import lombok.Data;
+
+import java.io.Serializable;
+import java.util.List;
+import java.util.stream.Collectors;
+
+@Data
+public class TableMeta {
+
+    private Long id;
+    private List<EtlIndex> indexes;
+    private EtlPartitionInfo partitionInfo;
+
+    public static class EtlIndex implements Serializable {
+        public long indexId;
+        public List<EtlJobConfig.EtlColumn> columns;
+        public int schemaHash;
+        public String indexType;
+        public boolean isBaseIndex;
+        public int schemaVersion;
+
+        public EtlIndex() {
+
+        }
+
+        public EtlJobConfig.EtlIndex toEtlIndex() {
+            return new EtlJobConfig.EtlIndex(indexId, columns, schemaHash, indexType, isBaseIndex, schemaVersion);
+        }
+
+    }
+
+    public static class EtlPartitionInfo implements Serializable {
+        public String partitionType;
+        public List<String> partitionColumnRefs;
+        public List<String> distributionColumnRefs;
+        public List<EtlPartition> partitions;
+
+        public EtlPartitionInfo() {
+        }
+
+        public EtlJobConfig.EtlPartitionInfo toEtlPartitionInfo() {
+            return new EtlJobConfig.EtlPartitionInfo(partitionType, partitionColumnRefs, distributionColumnRefs,
+                    partitions.stream().map(EtlPartition::toEtlPartition).collect(Collectors.toList()));
+        }
+
+    }
+
+    public static class EtlPartition implements Serializable {
+        public long partitionId;
+        public List<Object> startKeys;
+        public List<Object> endKeys;
+        public boolean isMaxPartition;
+        public int bucketNum;
+
+        public EtlPartition() {
+        }
+
+        public EtlJobConfig.EtlPartition toEtlPartition() {
+            return new EtlJobConfig.EtlPartition(partitionId, startKeys, endKeys, isMaxPartition, bucketNum);
+        }
+    }
+
+}
diff --git a/spark-load/spark-load-core/src/main/java/org/apache/doris/config/JobConfig.java b/spark-load/spark-load-core/src/main/java/org/apache/doris/config/JobConfig.java
new file mode 100644
index 00000000..fb2f5ccb
--- /dev/null
+++ b/spark-load/spark-load-core/src/main/java/org/apache/doris/config/JobConfig.java
@@ -0,0 +1,267 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.config;
+
+import org.apache.doris.SparkLoadRunner;
+import org.apache.doris.client.DorisClient;
+import org.apache.doris.common.Constants;
+import org.apache.doris.common.enums.LoadMode;
+import org.apache.doris.common.enums.TaskType;
+import org.apache.doris.exception.SparkLoadException;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.base.Preconditions;
+import lombok.Data;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.StringUtils;
+
+import java.io.File;
+import java.net.URI;
+import java.sql.DriverManager;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+@Data
+public class JobConfig {
+
+    @JsonProperty(required = true)
+    private String feAddresses;
+
+    @JsonProperty(required = true)
+    private String label;
+
+    @JsonProperty(required = true)
+    private String user;
+
+    @JsonProperty(required = true)
+    private String password;
+
+    @JsonProperty(required = true)
+    private String database;
+
+    @JsonProperty(required = true)
+    private String workingDir;
+
+    @JsonProperty(required = true)
+    private Map<String, TaskInfo> loadTasks;
+
+    @JsonProperty(required = true)
+    private SparkInfo spark;
+
+    private LoadMode loadMode = LoadMode.PULL;
+
+    private Map<String, String> hadoopProperties = Collections.emptyMap();
+
+    private Map<String, String> jobProperties = Collections.emptyMap();
+
+    private Map<String, String> env = Collections.emptyMap();
+
+    @Data
+    public static class TaskInfo {
+
+        private TaskType type;
+
+        private String hiveMetastoreUris;
+
+        private String hiveDatabase;
+
+        private String hiveTable;
+
+        private List<String> paths;
+
+        private String format;
+
+        private String columns;
+
+        private String columnFromPath;
+
+        private String fieldSep = "\t";
+
+        private String lineDelim = "\n";
+
+        private List<String> columnMappings = Collections.emptyList();
+
+        private String where;
+
+        private List<String> targetPartitions = Collections.emptyList();
+
+        public String getHiveFullTableName() {
+            return hiveDatabase + "." + hiveTable;
+        }
+
+        public Map<String, EtlJobConfig.EtlColumnMapping> toEtlColumnMappingMap() {
+            Map<String, EtlJobConfig.EtlColumnMapping> map = new HashMap<>();
+            for (String columnMapping : columnMappings) {
+                String[] arr = columnMapping.split("=");
+                map.put(arr[0], new EtlJobConfig.EtlColumnMapping(arr[1]));
+            }
+            return map;
+        }
+
+    }
+
+    @Data
+    public static class SparkInfo {
+
+        private static final String DEFAULT_DEPLOY_MODE = "client";
+
+        private static final String DEFAULT_DPP_JAR_PATH =
+                SparkLoadRunner.SPARK_LOAD_HOME + "/app/spark-load-dpp-1.0-SNAPSHOT.jar";
+
+        private String sparkHome;
+
+        private String master;
+
+        private String deployMode = DEFAULT_DEPLOY_MODE;
+
+        private Integer numExecutors;
+
+        private Integer executorCores;
+
+        private String executorMemory;
+
+        private String driverMemory;
+
+        private String dppJarPath = DEFAULT_DPP_JAR_PATH;
+
+        private Map<String, String> properties = Collections.emptyMap();
+
+    }
+
+    public void checkFeAddress() {
+        Preconditions.checkArgument(StringUtils.isNoneBlank(getFeAddresses()), "feAddress is empty");
+        String[] feAddressArr = getFeAddresses().split(",");
+        if (feAddressArr.length == 0) {
+            throw new IllegalArgumentException("feAddress format is incorrect");
+        }
+        for (String feAddress : feAddressArr) {
+            String[] arr = feAddress.split(":");
+            if (arr.length != 2) {
+                throw new IllegalArgumentException("feAddress format is incorrect");
+            }
+        }
+    }
+
+    public void checkTaskInfo() {
+        Map<String, TaskInfo> tasks = getLoadTasks();
+        Preconditions.checkArgument(!tasks.isEmpty(), "loadTasks is empty");
+        for (Map.Entry<String, TaskInfo> entry : tasks.entrySet()) {
+            String table = entry.getKey();
+            try {
+                DorisClient.FeClient feClient = DorisClient.getFeClient(feAddresses, user, password);
+                String ddl = feClient.getDDL(database, table);
+                if (StringUtils.isNoneBlank(ddl) && ddl.contains("\"enable_unique_key_merge_on_write\" = \"true\"")) {
+                    throw new IllegalArgumentException("Merge On Write is not supported");
+                }
+            } catch (SparkLoadException e) {
+                throw new IllegalArgumentException("check table failed", e);
+            }
+            TaskInfo taskInfo = entry.getValue();
+            switch (taskInfo.getType()) {
+                case HIVE:
+                    Preconditions.checkArgument(StringUtils.isNoneBlank(taskInfo.getHiveDatabase()),
+                            "hive database is empty");
+                    Preconditions.checkArgument(StringUtils.isNoneBlank(taskInfo.getHiveTable()),
+                            "hive table is empty");
+                    break;
+                case FILE:
+                    Preconditions.checkArgument(taskInfo.getPaths() != null && !taskInfo.getPaths().isEmpty(),
+                            "file path is empty");
+                    Preconditions.checkArgument(
+                            StringUtils.equalsAnyIgnoreCase(taskInfo.getFormat(), "parquet", "orc", "csv"),
+                            "format only support parquet or orc or csv");
+                    if ("csv".equalsIgnoreCase(taskInfo.getFormat())) {
+                        Preconditions.checkArgument(StringUtils.isNoneEmpty(taskInfo.getFieldSep()),
+                                "field separator is empty");
+                    }
+                    break;
+                default:
+                    throw new IllegalArgumentException("task type only supports hive or file");
+            }
+        }
+    }
+
+    public void checkSparkInfo() {
+        SparkInfo sparkInfo = getSpark();
+        Preconditions.checkArgument(StringUtils.isNoneBlank(sparkInfo.getSparkHome()),
+                "spark config item sparkHome is empty");
+        Preconditions.checkArgument(checkSparkMaster(sparkInfo.getMaster()),
+                "spark master only supports yarn or standalone or local");
+        Preconditions.checkArgument(
+                StringUtils.equalsAnyIgnoreCase(sparkInfo.getDeployMode(), "cluster", "client"),
+                "spark deployMode only supports cluster or client");
+        if (!"yarn".equalsIgnoreCase(sparkInfo.getMaster())) {
+            Preconditions.checkArgument("client".equalsIgnoreCase(sparkInfo.getDeployMode()),
+                    "standalone and local master only supports client mode");
+        }
+        if (LoadMode.PULL == getLoadMode()) {
+            if (StringUtils.isBlank(sparkInfo.getDppJarPath())) {
+                throw new IllegalArgumentException("dpp jar file path is empty");
+            }
+            if (!new File(sparkInfo.getDppJarPath()).exists()) {
+                throw new IllegalArgumentException("dpp jar file is not exists, path: " + getSpark().getDppJarPath());
+            }
+        }
+    }
+
+    private boolean checkSparkMaster(String master) {
+        if (StringUtils.isBlank(master)) {
+            return false;
+        }
+        if ("yarn".equalsIgnoreCase(master) || master.startsWith("local")) {
+            return true;
+        }
+        URI uri = URI.create(master);
+        return Constants.SPARK_STANDALONE_SCHEME.equalsIgnoreCase(uri.getScheme())
+                && StringUtils.isNoneBlank(uri.getHost()) && uri.getPort() != -1;
+    }
+
+    public void checkHadoopProperties() {
+        if (hadoopProperties == null || hadoopProperties.isEmpty()) {
+            return;
+        }
+        if (!hadoopProperties.containsKey("fs.defaultFS")) {
+            throw new IllegalArgumentException("fs.defaultFS is empty");
+        }
+        // check auth
+        if (hadoopProperties.containsKey("hadoop.security.authentication")
+                && StringUtils.equalsIgnoreCase(hadoopProperties.get("hadoop.security.authentication"), "kerberos")) {
+            if (hadoopProperties.containsKey("hadoop.kerberos.principal")) {
+                if (StringUtils.isBlank(hadoopProperties.get("hadoop.kerberos.principal"))) {
+                    throw new IllegalArgumentException("hadoop kerberos principal is empty");
+                }
+                if (hadoopProperties.containsKey("hadoop.kerberos.keytab")) {
+                    if (!FileUtils.getFile(hadoopProperties.get("hadoop.kerberos.keytab")).exists()) {
+                        throw new IllegalArgumentException("hadoop kerberos keytab file is not exists, path: "
+                                + hadoopProperties.get("hadoop.kerberos.keytab"));
+                    }
+                    return;
+                }
+                throw new IllegalArgumentException("hadoop.kerberos.keytab is not set");
+            }
+            throw new IllegalArgumentException("hadoop.kerberos.principal is not set");
+        } else {
+            if (!hadoopProperties.containsKey("hadoop.username")) {
+                throw new IllegalArgumentException("hadoop username is empty");
+            }
+        }
+    }
+
+}
diff --git a/spark-load/spark-load-core/src/main/java/org/apache/doris/exception/SparkLoadException.java b/spark-load/spark-load-core/src/main/java/org/apache/doris/exception/SparkLoadException.java
new file mode 100644
index 00000000..d25aca87
--- /dev/null
+++ b/spark-load/spark-load-core/src/main/java/org/apache/doris/exception/SparkLoadException.java
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.exception;
+
+public class SparkLoadException extends Exception {
+
+    public SparkLoadException(String message) {
+        super(message);
+    }
+
+    public SparkLoadException(String message, Throwable cause) {
+        super(message, cause);
+    }
+}
diff --git a/spark-load/spark-load-core/src/main/java/org/apache/doris/load/LoaderFactory.java b/spark-load/spark-load-core/src/main/java/org/apache/doris/load/LoaderFactory.java
new file mode 100644
index 00000000..0b0fc786
--- /dev/null
+++ b/spark-load/spark-load-core/src/main/java/org/apache/doris/load/LoaderFactory.java
@@ -0,0 +1,36 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load;
+
+import org.apache.doris.config.JobConfig;
+import org.apache.doris.load.job.Loader;
+import org.apache.doris.load.job.PullLoader;
+
+public class LoaderFactory {
+
+    public static Loader createLoader(JobConfig jobConfig, Boolean isRecoveryMode) {
+        switch (jobConfig.getLoadMode()) {
+            case PULL:
+                return new PullLoader(jobConfig, isRecoveryMode);
+            case PUSH:
+            default:
+                throw new UnsupportedOperationException();
+        }
+    }
+
+}
diff --git a/spark-load/spark-load-core/src/main/java/org/apache/doris/load/job/Loader.java b/spark-load/spark-load-core/src/main/java/org/apache/doris/load/job/Loader.java
new file mode 100644
index 00000000..d80caab0
--- /dev/null
+++ b/spark-load/spark-load-core/src/main/java/org/apache/doris/load/job/Loader.java
@@ -0,0 +1,121 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.job;
+
+import org.apache.doris.common.enums.JobStatus;
+import org.apache.doris.config.JobConfig;
+import org.apache.doris.exception.SparkLoadException;
+
+import lombok.Getter;
+import org.apache.spark.launcher.SparkAppHandle;
+import org.apache.spark.launcher.SparkLauncher;
+
+import java.io.File;
+import java.io.IOException;
+import java.time.Duration;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.locks.LockSupport;
+
+public abstract class Loader {
+
+    private static final String SPARK_HADOOP_PREFIX = "spark.hadoop.";
+
+    protected JobConfig jobConfig;
+
+    protected boolean isRecoveryMode = false;
+
+    @Getter
+    protected SparkAppHandle appHandle;
+
+    @Getter
+    protected JobStatus jobStatus = JobStatus.RUNNING;
+
+    protected final Map<String, String> statusInfo = new HashMap<>();
+
+    public abstract void prepare() throws SparkLoadException;
+
+    public void execute() throws SparkLoadException {
+        try {
+            appHandle = submitSparkJob(getMainClass(), getAppArgs(), getLogPath());
+        } catch (IOException e) {
+            throw new SparkLoadException("submit spark job failed", e);
+        }
+        do {
+            if (appHandle.getState().isFinal()) {
+                if (SparkAppHandle.State.FAILED == appHandle.getState()
+                        || SparkAppHandle.State.KILLED == appHandle.getState()) {
+                    statusInfo.put("msg",
+                            String.format("spark job run failed, appId: %s, state: %s", appHandle.getAppId(),
+                                    appHandle.getState()));
+                    jobStatus = JobStatus.FAILED;
+                } else {
+                    jobStatus = JobStatus.SUCCESS;
+                }
+                break;
+            }
+            statusInfo.put("appId", appHandle.getAppId());
+            LockSupport.parkNanos(Duration.ofSeconds(5).toNanos());
+        } while (true);
+    }
+
+    private SparkAppHandle submitSparkJob(String mainClass, String[] appArgs, String logPath) throws IOException {
+        File logFile = new File(logPath);
+        if (!logFile.getParentFile().exists()) {
+            logFile.getParentFile().mkdir();
+        }
+        JobConfig.SparkInfo sparkInfo = jobConfig.getSpark();
+        SparkLauncher launcher = new SparkLauncher(jobConfig.getEnv())
+                .setMaster(sparkInfo.getMaster())
+                .setDeployMode(sparkInfo.getDeployMode())
+                .setAppName("spark-load-" + jobConfig.getLabel())
+                .setAppResource(sparkInfo.getDppJarPath())
+                .setSparkHome(sparkInfo.getSparkHome())
+                .setMainClass(mainClass)
+                .addAppArgs(appArgs)
+                .redirectError(logFile);
+        sparkInfo.getProperties().forEach(launcher::setConf);
+        jobConfig.getHadoopProperties().forEach((k, v) -> launcher.setConf(SPARK_HADOOP_PREFIX + k, v));
+        return launcher.startApplication();
+    }
+
+    public void cancel() {
+        if (jobStatus == JobStatus.RUNNING) {
+            if (appHandle != null) {
+                try {
+                    appHandle.stop();
+                } catch (Exception e) {
+                    appHandle.kill();
+                }
+            }
+        }
+        jobStatus = JobStatus.FAILED;
+        afterFailed(new SparkLoadException("load client cancelled."));
+    }
+
+    protected abstract String getMainClass();
+
+    protected abstract String[] getAppArgs();
+
+    protected abstract String getLogPath();
+
+    public abstract void afterFinished() throws SparkLoadException;
+
+    public abstract void afterFailed(Exception e);
+
+}
diff --git a/spark-load/spark-load-core/src/main/java/org/apache/doris/load/job/PullLoader.java b/spark-load/spark-load-core/src/main/java/org/apache/doris/load/job/PullLoader.java
new file mode 100644
index 00000000..80491bf3
--- /dev/null
+++ b/spark-load/spark-load-core/src/main/java/org/apache/doris/load/job/PullLoader.java
@@ -0,0 +1,370 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.job;
+
+import org.apache.doris.SparkLoadRunner;
+import org.apache.doris.client.DorisClient;
+import org.apache.doris.common.Constants;
+import org.apache.doris.common.DppResult;
+import org.apache.doris.common.LoadInfo;
+import org.apache.doris.common.enums.JobStatus;
+import org.apache.doris.common.meta.LoadMeta;
+import org.apache.doris.common.meta.TableMeta;
+import org.apache.doris.config.EtlJobConfig;
+import org.apache.doris.config.JobConfig;
+import org.apache.doris.exception.SparkLoadException;
+import org.apache.doris.util.DateUtils;
+import org.apache.doris.util.FileSystemUtils;
+import org.apache.doris.util.JsonUtils;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.File;
+import java.io.IOException;
+import java.time.Duration;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.locks.LockSupport;
+import java.util.stream.Collectors;
+
+public class PullLoader extends Loader implements Recoverable {
+
+    private static final Logger LOG = LogManager.getLogger(PullLoader.class);
+
+    private static final String LOAD_META_JSON = "load_meta.json";
+
+    private static final String DPP_RESULT_JSON = "dpp_result.json";
+
+    private static final String SPARK_ETL_JOB_CLASS = "org.apache.doris.load.loadv2.etl.SparkEtlJob";
+
+    private LoadMeta loadMeta;
+
+    private EtlJobConfig etlJobConfig;
+
+    public PullLoader(JobConfig jobConfig, Boolean isRecoveryMode) {
+        this.jobConfig = jobConfig;
+        this.isRecoveryMode = isRecoveryMode;
+    }
+
+    @Override
+    public void prepare() throws SparkLoadException {
+        DorisClient.FeClient feClient = DorisClient.getFeClient(jobConfig.getFeAddresses(), jobConfig.getUser(),
+                jobConfig.getPassword());
+        Map<String, List<String>> tableToPartition = jobConfig.getLoadTasks().entrySet().stream()
+                .collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().getTargetPartitions()));
+        loadMeta = feClient.createIngestionLoad(jobConfig.getDatabase(), tableToPartition, jobConfig.getLabel(),
+                jobConfig.getJobProperties());
+        etlJobConfig = loadMeta.getEtlJobConfig(jobConfig);
+        if (Constants.HADOOP_AUTH_KERBEROS.equalsIgnoreCase(
+                jobConfig.getHadoopProperties().get(Constants.HADOOP_SECURITY_AUTHENTICATION))) {
+            try {
+                FileSystemUtils.kerberosLogin(jobConfig);
+            } catch (IOException e) {
+                throw new SparkLoadException("login with kerberos auth failed", e);
+            }
+        }
+    }
+
+    @Override
+    public void execute() throws SparkLoadException {
+
+        try {
+            cleanOutputPath();
+        } catch (IOException e) {
+            throw new SparkLoadException("clean output path failed", e);
+        }
+        uploadMetaInfo(loadMeta, etlJobConfig.getOutputPath());
+
+        String etlJobConfPath = etlJobConfig.outputPath + "/configs/jobconfig.json";
+        try {
+            FileSystemUtils.createFile(jobConfig, etlJobConfig.configToJson(), etlJobConfPath, true);
+        } catch (IOException e) {
+            throw new SparkLoadException("create job config file failed", e);
+        }
+
+        JobConfig.SparkInfo spark = jobConfig.getSpark();
+
+        LOG.info("submit spark job on master: " + spark.getMaster() + ", deployMode: " + spark.getDeployMode());
+
+        super.execute();
+
+        if (jobStatus == JobStatus.FAILED) {
+            throw new SparkLoadException("spark job run failed, msg: " + statusInfo.get("msg"));
+        }
+        LOG.info("spark job run finished.");
+
+    }
+
+    @Override
+    public void afterFinished() throws SparkLoadException {
+        DorisClient.FeClient feClient = DorisClient.getFeClient(jobConfig.getFeAddresses(), jobConfig.getUser(),
+                jobConfig.getPassword());
+        statusInfo.put("status", jobStatus.name());
+        statusInfo.put("msg", "");
+        statusInfo.put("appId", appHandle == null ? null : appHandle.getAppId());
+        try {
+            String dppResultStr = null;
+            int checkCnt = 0;
+            while (checkCnt < 3) {
+                try {
+                    dppResultStr = getDppResultString();
+                } catch (UnsupportedOperationException e) {
+                    LOG.warn("retry get dpp result", e);
+                    checkCnt++;
+                    LockSupport.parkNanos(Duration.ofMillis(500).toNanos());
+                }
+                if (dppResultStr != null) {
+                    break;
+                }
+            }
+            if (dppResultStr == null) {
+                throw new SparkLoadException("get dpp result str failed");
+            }
+            statusInfo.put("dppResult", dppResultStr);
+            statusInfo.put("filePathToSize", JsonUtils.writeValueAsString(getFilePathToSize()));
+            statusInfo.put("hadoopProperties", JsonUtils.writeValueAsString(jobConfig.getHadoopProperties()));
+        } catch (IOException e) {
+            throw new SparkLoadException("update job status failed", e);
+        }
+        feClient.updateIngestionLoad(jobConfig.getDatabase(), loadMeta.getLoadId(), statusInfo);
+        do {
+            LoadInfo loadInfo = feClient.getLoadInfo(jobConfig.getDatabase(), jobConfig.getLabel());
+            switch (loadInfo.getState().toUpperCase(Locale.ROOT)) {
+                case "FINISHED":
+                    LOG.info("load job finished.");
+                    try {
+                        cleanOutputPath();
+                    } catch (IOException e) {
+                        LOG.warn("clean output path failed", e);
+                    }
+                    return;
+                case "CANCELLED":
+                    throw new SparkLoadException("load job failed, " + loadInfo.getFailMsg());
+                default:
+                    LOG.info("load job unfinished, state: " + loadInfo.getState());
+                    break;
+            }
+            LockSupport.parkNanos(Duration.ofSeconds(15).toNanos());
+        } while (true);
+    }
+
+    @Override
+    public void afterFailed(Exception e) {
+        if (loadMeta == null) {
+            LOG.info("load job not start, skip update.");
+            return;
+        }
+        DorisClient.FeClient feClient = DorisClient.getFeClient(jobConfig.getFeAddresses(), jobConfig.getUser(),
+                jobConfig.getPassword());
+        statusInfo.put("status", jobStatus.name());
+        statusInfo.put("msg", e.getMessage());
+        statusInfo.put("appId", appHandle == null ? null : appHandle.getAppId());
+        try {
+            feClient.updateIngestionLoad(jobConfig.getDatabase(), loadMeta.getLoadId(), statusInfo);
+        } catch (SparkLoadException ex) {
+            LOG.warn("update load failed status failed", ex);
+        }
+    }
+
+    @Override
+    public boolean canBeRecovered() throws SparkLoadException {
+        if (isRecoveryMode) {
+            String outputPath = etlJobConfig.getOutputPath();
+            String parentOutputPath = outputPath.substring(0, StringUtils.lastIndexOf(outputPath, "/"));
+            try {
+                if (FileSystemUtils.exists(jobConfig, parentOutputPath)) {
+                    FileStatus[] fileStatuses = FileSystemUtils.list(jobConfig, parentOutputPath);
+                    if (fileStatuses.length != 1) {
+                        return false;
+                    }
+                    fileStatuses = FileSystemUtils.list(jobConfig, fileStatuses[0].getPath().toString());
+                    boolean hasDppResult = false;
+                    for (FileStatus fileStatus : fileStatuses) {
+                        String fileName = fileStatus.getPath().getName();
+                        if (DPP_RESULT_JSON.equalsIgnoreCase(fileName)) {
+                            hasDppResult = true;
+                            String content = FileSystemUtils.readFile(jobConfig, fileStatus.getPath().toString());
+                            if (StringUtils.isBlank(content)) {
+                                return false;
+                            }
+                            DppResult dppResult = JsonUtils.readValue(content, DppResult.class);
+                            if (!checkDppResult(dppResult)) {
+                                LOG.info("previous etl job is failed, cannot be recovered");
+                                return false;
+                            }
+                        }
+                        // check meta consist
+                        if (LOAD_META_JSON.equalsIgnoreCase(fileName)) {
+                            String content = FileSystemUtils.readFile(jobConfig, fileStatus.getPath().toString());
+                            if (StringUtils.isBlank(content)) {
+                                return false;
+                            }
+                            LoadMeta oldLoadMeta = JsonUtils.readValue(content, LoadMeta.class);
+                            for (Map.Entry<String, TableMeta> entry : loadMeta.getTableMeta().entrySet()) {
+                                TableMeta tableMeta = entry.getValue();
+                                TableMeta oldTableMeta = oldLoadMeta.getTableMeta().get(entry.getKey());
+                                // index count is not consistent
+                                if (oldTableMeta == null
+                                        || oldTableMeta.getIndexes().size() != tableMeta.getIndexes().size()) {
+                                    LOG.info("index size mismatch, cannot be recovered");
+                                    return false;
+                                }
+                                Map<Long, EtlJobConfig.EtlIndex> indexMap = tableMeta.getIndexes().stream()
+                                        .collect(Collectors.toMap(etlIndex -> etlIndex.indexId,
+                                                TableMeta.EtlIndex::toEtlIndex));
+                                Map<Long, EtlJobConfig.EtlIndex> oldIndexMap = oldTableMeta.getIndexes().stream()
+                                        .collect(Collectors.toMap(etlIndex -> etlIndex.indexId,
+                                                TableMeta.EtlIndex::toEtlIndex));
+                                for (Map.Entry<Long, EtlJobConfig.EtlIndex> indexEntry : indexMap.entrySet()) {
+                                    EtlJobConfig.EtlIndex index = indexEntry.getValue();
+                                    EtlJobConfig.EtlIndex oldIndex = oldIndexMap.get(indexEntry.getKey());
+                                    // index not exists
+                                    if (oldIndex == null) {
+                                        LOG.info("index " + index.indexId + " is not exists in previous meta");
+                                        return false;
+                                    }
+                                    // index mismatch
+                                    if (oldIndex.schemaHash != index.schemaHash
+                                            || oldIndex.schemaVersion != index.schemaVersion) {
+                                        LOG.info("index " + index.indexId + " has changed, "
+                                                + "old schemaHash: " + oldIndex.schemaHash + " and schemaVersion: "
+                                                + oldIndex.schemaVersion + " current schemaHash: "
+                                                + index.schemaHash + " and schemaVersion: "
+                                                + index.schemaVersion + ", cannot be recovered");
+                                        return false;
+                                    }
+                                }
+                                // check partition consistent
+                                Set<Long> partitionSet = tableMeta.getPartitionInfo().partitions.stream().map(
+                                        p -> p.partitionId).collect(Collectors.toSet());
+                                Set<Long> oldPartitionSet = oldTableMeta.getPartitionInfo().partitions.stream().map(
+                                        p -> p.partitionId).collect(Collectors.toSet());
+                                if (oldPartitionSet.size() != partitionSet.size()) {
+                                    LOG.info("partition size mismatch, old partition size: " + oldPartitionSet.size()
+                                            + ", now partition size: " + partitionSet.size()
+                                            + ", cannot be recovered");
+                                    return false;
+                                }
+                                for (Long partitionId : partitionSet) {
+                                    if (!oldPartitionSet.contains(partitionId)) {
+                                        LOG.info("partition id mismatch, partition id: " + partitionId
+                                                + ", cannot be recovered");
+                                        return false;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    return hasDppResult;
+                }
+            } catch (IOException e) {
+                throw new SparkLoadException("check recovery failed", e);
+            }
+        }
+        return false;
+    }
+
+    @Override
+    public void prepareRecover() throws SparkLoadException {
+        String outputPath = etlJobConfig.getOutputPath();
+        String parentOutputPath = outputPath.substring(0, StringUtils.lastIndexOf(outputPath, "/"));
+        try {
+            FileStatus[] fileStatuses = FileSystemUtils.list(jobConfig, parentOutputPath);
+            FileSystemUtils.move(jobConfig, fileStatuses[0].getPath().toString(), outputPath);
+            FileSystemUtils.delete(jobConfig, outputPath + "/load_meta.json");
+            uploadMetaInfo(loadMeta, etlJobConfig.getOutputPath());
+            jobStatus = JobStatus.SUCCESS;
+        } catch (IOException e) {
+            throw new SparkLoadException("prepare recovery failed", e);
+        }
+    }
+
+    private boolean checkDppResult(DppResult dppResult) {
+        if (!dppResult.isSuccess) {
+            return false;
+        }
+        int maxFilterRatio = Integer.parseInt(jobConfig.getJobProperties().getOrDefault("max_filter_ratio", "0"));
+        return dppResult.abnormalRows <= (dppResult.abnormalRows + dppResult.normalRows) * maxFilterRatio;
+    }
+
+    private void uploadMetaInfo(LoadMeta metaInfo, String outputPath) throws SparkLoadException {
+        try {
+            if (!FileSystemUtils.exists(jobConfig, outputPath)) {
+                FileSystemUtils.mkdir(jobConfig, outputPath);
+            }
+            FileSystemUtils.createFile(jobConfig, JsonUtils.writeValueAsBytes(metaInfo),
+                    outputPath + "/load_meta.json", true);
+        } catch (IOException e) {
+            throw new SparkLoadException("upload load meta failed", e);
+        }
+    }
+
+    @Override
+    protected String getMainClass() {
+        return SPARK_ETL_JOB_CLASS;
+    }
+
+    @Override
+    protected String[] getAppArgs() {
+        return new String[] {etlJobConfig.outputPath + "/configs/jobconfig.json"};
+    }
+
+    @Override
+    protected String getLogPath() {
+        String formattedNow = DateUtils.getFormattedNow(DateUtils.NUMBER_FORMATER);
+        return SparkLoadRunner.SPARK_LOAD_HOME + "/logs/" + jobConfig.getLabel() + "-" + formattedNow + ".log";
+    }
+
+    public void cleanOutputPath() throws IOException {
+        if (FileSystemUtils.exists(jobConfig, etlJobConfig.outputPath)) {
+            LOG.info("clean output: " + etlJobConfig.outputPath);
+            FileSystemUtils.delete(jobConfig, etlJobConfig.outputPath);
+        }
+    }
+
+    private String getDppResultString() throws SparkLoadException {
+        try {
+            return FileSystemUtils.readFile(jobConfig, etlJobConfig.outputPath + "/dpp_result.json");
+        } catch (IOException e) {
+            throw new SparkLoadException("get dpp result failed", e);
+        }
+    }
+
+    private Map<String, Long> getFilePathToSize() throws SparkLoadException {
+        Map<String, Long> filePathToSize = new HashMap<>();
+        try {
+            FileStatus[] fileStatuses = FileSystemUtils.list(jobConfig, etlJobConfig.outputPath);
+            for (FileStatus fileStatus : fileStatuses) {
+                if (fileStatus.isDirectory()) {
+                    continue;
+                }
+                filePathToSize.put(fileStatus.getPath().toString(), fileStatus.getLen());
+            }
+        } catch (IOException e) {
+            throw new SparkLoadException("get dpp result failed", e);
+        }
+        return filePathToSize;
+    }
+
+}
diff --git a/spark-load/spark-load-core/src/main/java/org/apache/doris/load/job/Recoverable.java b/spark-load/spark-load-core/src/main/java/org/apache/doris/load/job/Recoverable.java
new file mode 100644
index 00000000..ccfd461a
--- /dev/null
+++ b/spark-load/spark-load-core/src/main/java/org/apache/doris/load/job/Recoverable.java
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.job;
+
+import org.apache.doris.exception.SparkLoadException;
+
+public interface Recoverable {
+
+    boolean canBeRecovered() throws SparkLoadException;
+
+    void prepareRecover() throws SparkLoadException;
+
+}
diff --git a/spark-load/spark-load-core/src/main/java/org/apache/doris/util/DateUtils.java b/spark-load/spark-load-core/src/main/java/org/apache/doris/util/DateUtils.java
new file mode 100644
index 00000000..7305ef76
--- /dev/null
+++ b/spark-load/spark-load-core/src/main/java/org/apache/doris/util/DateUtils.java
@@ -0,0 +1,38 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.util;
+
+import java.time.LocalDateTime;
+import java.time.ZoneId;
+import java.time.format.DateTimeFormatter;
+
+public class DateUtils {
+
+    public static final DateTimeFormatter NORMAL_FORMATER =
+            DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss").withZone(
+                    ZoneId.systemDefault());
+
+    public static final DateTimeFormatter NUMBER_FORMATER =
+            DateTimeFormatter.ofPattern("yyyyMMddHHmmss").withZone(
+                    ZoneId.systemDefault());
+
+    public static String getFormattedNow(DateTimeFormatter formatter) {
+        return formatter.format(LocalDateTime.now(ZoneId.systemDefault()));
+    }
+
+}
diff --git a/spark-load/spark-load-core/src/main/java/org/apache/doris/util/FileSystemUtils.java b/spark-load/spark-load-core/src/main/java/org/apache/doris/util/FileSystemUtils.java
new file mode 100644
index 00000000..2e6b5880
--- /dev/null
+++ b/spark-load/spark-load-core/src/main/java/org/apache/doris/util/FileSystemUtils.java
@@ -0,0 +1,146 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.util;
+
+import org.apache.doris.common.Constants;
+import org.apache.doris.config.JobConfig;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+
+public class FileSystemUtils {
+
+    private static final Logger LOG = LogManager.getLogger(FileSystemUtils.class);
+
+    private static FileSystem getFs(JobConfig config, Path path) throws IOException {
+        return FileSystem.get(path.toUri(), getConf(config));
+    }
+
+    public static void createFile(JobConfig config, String content, String path, Boolean overwrite) throws IOException {
+        Path p = new Path(path);
+        try (FileSystem fs = getFs(config, p)) {
+            FSDataOutputStream outputStream = fs.create(p, overwrite);
+            outputStream.write(content.getBytes(StandardCharsets.UTF_8));
+            outputStream.close();
+        }
+    }
+
+    public static void createFile(JobConfig config, byte[] contentBytes, String path, Boolean overwrite)
+            throws IOException {
+        Path p = new Path(path);
+        try (FileSystem fs = getFs(config, p)) {
+            FSDataOutputStream outputStream = fs.create(p, overwrite);
+            outputStream.write(contentBytes);
+            outputStream.close();
+        }
+    }
+
+    public static void delete(JobConfig config, String path) throws IOException {
+        Path p = new Path(path);
+        try (FileSystem fs = getFs(config, p)) {
+            fs.delete(p, true);
+        }
+    }
+
+    public static boolean exists(JobConfig config, String path) throws IOException {
+        Path p = new Path(path);
+        try (FileSystem fs = getFs(config, p)) {
+            return fs.exists(p);
+        }
+    }
+
+    public static FileStatus[] list(JobConfig config, String path) throws IOException {
+        Path p = new Path(path);
+        try (FileSystem fs = getFs(config, p)) {
+            return fs.listStatus(p);
+        }
+    }
+
+    public static String readFile(JobConfig config, String path) throws IOException {
+        Path p = new Path(path);
+        try (FileSystem fs = getFs(config, p)) {
+            if (fs.exists(p) && fs.getFileStatus(p).isFile()) {
+                FSDataInputStream inputStream = fs.open(p);
+                BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
+                StringBuilder sb = new StringBuilder();
+                String line;
+                while ((line = reader.readLine()) != null) {
+                    sb.append(line);
+                }
+                return sb.toString();
+            }
+            throw new UnsupportedOperationException("read file is not exist or is not a file, path: " + path);
+        }
+    }
+
+    public static void move(JobConfig config, String src, String dst) throws IOException {
+        Path srcPath = new Path(src);
+        Path dstpath = new Path(dst);
+        try (FileSystem fs = getFs(config, srcPath)) {
+            fs.rename(srcPath, dstpath);
+        }
+    }
+
+    public static void mkdir(JobConfig config, String path) throws IOException {
+        Path p = new Path(path);
+        try (FileSystem fs = getFs(config, p)) {
+            fs.mkdirs(p, new FsPermission(644));
+        }
+    }
+
+    public static void kerberosLogin(JobConfig jobConfig) throws IOException {
+        Configuration conf = getConf(jobConfig);
+        conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, "true");
+        conf.set(CommonConfigurationKeysPublic.HADOOP_KERBEROS_KEYTAB_LOGIN_AUTORENEWAL_ENABLED, "true");
+        UserGroupInformation.setConfiguration(conf);
+        String keytab = jobConfig.getHadoopProperties().get(Constants.HADOOP_KERBEROS_KEYTAB);
+        String principal = jobConfig.getHadoopProperties().get(Constants.HADOOP_KERBEROS_PRINCIPAL);
+        try {
+            UserGroupInformation ugi = UserGroupInformation.getLoginUser();
+            if (ugi.hasKerberosCredentials() && StringUtils.equals(ugi.getUserName(), principal)) {
+                ugi.checkTGTAndReloginFromKeytab();
+                return;
+            }
+        } catch (IOException e) {
+            LOG.warn("A SecurityException occurs with kerberos, do login immediately.", e);
+        }
+        UserGroupInformation.loginUserFromKeytab(principal, keytab);
+    }
+
+    private static Configuration getConf(JobConfig jobConfig) {
+        Configuration conf = new Configuration();
+        jobConfig.getHadoopProperties().forEach(conf::set);
+        return conf;
+    }
+
+}
diff --git a/spark-load/spark-load-core/src/main/java/org/apache/doris/util/HttpUtils.java b/spark-load/spark-load-core/src/main/java/org/apache/doris/util/HttpUtils.java
new file mode 100644
index 00000000..d1da38d3
--- /dev/null
+++ b/spark-load/spark-load-core/src/main/java/org/apache/doris/util/HttpUtils.java
@@ -0,0 +1,59 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.util;
+
+import org.apache.http.HttpEntity;
+import org.apache.http.client.config.RequestConfig;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClients;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+
+public class HttpUtils {
+
+    public static final int DEFAULT_CONN_TIMEOUT = 60 * 1000;
+    public static final int DEFAULT_SO_TIMEOUT = 60 * 1000;
+
+    public static CloseableHttpClient getClient() {
+        return getClient(DEFAULT_CONN_TIMEOUT, DEFAULT_SO_TIMEOUT);
+    }
+
+    public static CloseableHttpClient getClient(int connectionTimeout, int socketTimeout) {
+        RequestConfig requestConfig = RequestConfig.custom()
+                .setConnectTimeout(connectionTimeout)
+                .setSocketTimeout(socketTimeout)
+                .build();
+        return HttpClients.custom().setDefaultRequestConfig(requestConfig).build();
+    }
+
+    public static String getEntityContent(HttpEntity entity) throws IOException {
+        StringBuilder sb = new StringBuilder();
+        try (InputStream is = entity.getContent();
+                BufferedReader reader = new BufferedReader(new InputStreamReader(is))) {
+            String line;
+            while ((line = reader.readLine()) != null) {
+                sb.append(line);
+            }
+        }
+        return sb.toString();
+    }
+
+}
diff --git a/spark-load/spark-load-core/src/main/resources/log4j.properties b/spark-load/spark-load-core/src/main/resources/log4j.properties
new file mode 100644
index 00000000..c1e97855
--- /dev/null
+++ b/spark-load/spark-load-core/src/main/resources/log4j.properties
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+log4j.rootLogger=INFO,console
+log4j.additivity.org.apache=true
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.Threshold=INFO
+log4j.appender.console.ImmediateFlush=true
+log4j.appender.console.Target=System.out
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %p (%t|%tid) [%C{1}.%M():%L] %m%n
\ No newline at end of file
diff --git a/spark-load/spark-load-core/src/test/java/org/apache/doris/client/DorisClientTest.java b/spark-load/spark-load-core/src/test/java/org/apache/doris/client/DorisClientTest.java
new file mode 100644
index 00000000..4f53a368
--- /dev/null
+++ b/spark-load/spark-load-core/src/test/java/org/apache/doris/client/DorisClientTest.java
@@ -0,0 +1,473 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.client;
+
+import org.apache.doris.common.meta.LoadMeta;
+import org.apache.doris.common.meta.TableMeta;
+import org.apache.doris.config.EtlJobConfig;
+import org.apache.doris.exception.SparkLoadException;
+import org.apache.doris.util.JsonUtils;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import mockit.Mock;
+import mockit.MockUp;
+import org.apache.http.Header;
+import org.apache.http.HeaderIterator;
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpStatus;
+import org.apache.http.HttpVersion;
+import org.apache.http.ProtocolVersion;
+import org.apache.http.StatusLine;
+import org.apache.http.client.ClientProtocolException;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpUriRequest;
+import org.apache.http.entity.StringEntity;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.message.BasicStatusLine;
+import org.apache.http.params.HttpParams;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+
+class DorisClientTest {
+
+    @Test
+    public void getFeClient() {
+        IllegalArgumentException e1 =
+                Assertions.assertThrows(IllegalArgumentException.class, () -> DorisClient.getFeClient("", "", ""));
+        Assertions.assertEquals("feAddresses is empty", e1.getMessage());
+        IllegalArgumentException e2 = Assertions.assertThrows(IllegalArgumentException.class,
+                () -> DorisClient.getFeClient("127.0.0.1", "", ""));
+        Assertions.assertEquals("feAddresses contains invalid format, 127.0.0.1", e2.getMessage());
+        IllegalArgumentException e3 = Assertions.assertThrows(IllegalArgumentException.class,
+                () -> DorisClient.getFeClient("127.0.0.1:", "", ""));
+        Assertions.assertEquals("feAddresses contains invalid format, 127.0.0.1:", e3.getMessage());
+        IllegalArgumentException e4 =
+                Assertions.assertThrows(IllegalArgumentException.class, () -> DorisClient.getFeClient(":8030", "", ""));
+        Assertions.assertEquals("feAddresses contains invalid format, :8030", e4.getMessage());
+        Assertions.assertDoesNotThrow(() -> DorisClient.getFeClient("127.0.0.1:8030", "", ""));
+    }
+
+    @Test
+    public void createIngestionLoad() throws SparkLoadException, JsonProcessingException {
+
+        DorisClient.FeClient feClient = new DorisClient.FeClient("127.0.0.1:8030", "", "");
+
+        new MockUp<CloseableHttpClient>(CloseableHttpClient.class) {
+            @Mock
+            public CloseableHttpResponse execute(
+                    final HttpUriRequest request) throws IOException, ClientProtocolException {
+                MockedCloseableHttpResponse response = new MockedCloseableHttpResponse();
+                response.setStatusCode(HttpStatus.SC_BAD_REQUEST);
+                return response;
+            }
+        };
+        Assertions.assertThrows(SparkLoadException.class, () -> feClient.createIngestionLoad("db", new HashMap<>(), "test", new HashMap<>()));
+
+        new MockUp<CloseableHttpClient>(CloseableHttpClient.class) {
+            @Mock
+            public CloseableHttpResponse execute(
+                    final HttpUriRequest request) throws IOException, ClientProtocolException {
+                MockedCloseableHttpResponse response = new MockedCloseableHttpResponse();
+                response.setStatusCode(HttpStatus.SC_OK);
+                response.setEntity(new StringEntity("{\"code\":1,\"msg\":\"\",\"data\":{},\"count\":0}"));
+                return response;
+            }
+        };
+        Assertions.assertThrows(SparkLoadException.class, () -> feClient.createIngestionLoad("db", new HashMap<>(), "test", new HashMap<>()));
+
+        new MockUp<CloseableHttpClient>(CloseableHttpClient.class) {
+            @Mock
+            public CloseableHttpResponse execute(
+                    final HttpUriRequest request) throws IOException, ClientProtocolException {
+                MockedCloseableHttpResponse response = new MockedCloseableHttpResponse();
+                response.setStatusCode(HttpStatus.SC_OK);
+                response.setEntity(new StringEntity("{\"code\":0,\"msg\":\"\",\"data\":{\"loadId\":1,\"txnId\":1," +
+                        "\"dbId\":1,\"signature\":1,\"tableMeta\":{\"tbl1\":{\"id\":1," +
+                        "\"indexes\":[{\"indexId\":0,\"columns\":[{\"columnName\":\"c0\",\"columnType\":\"INT\"," +
+                        "\"isAllowNull\":true,\"isKey\":true,\"aggregationType\":\"NONE\",\"defaultValue\":\"0\"," +
+                        "\"stringLength\":0,\"precision\":0,\"scale\":0,\"defineExpr\":null}],\"schemaHash\":0," +
+                        "\"indexType\":null,\"isBaseIndex\":false,\"schemaVersion\":0}],\"partitionInfo\":" +
+                        "{\"partitionType\":\"UNPARTITIONED\",\"partitionColumnRefs\":[],\"distributionColumnRefs\":[]," +
+                        "\"partitions\":[{\"partitionId\":1,\"startKeys\":[],\"endKeys\":[],\"isMaxPartition\":true," +
+                        "\"bucketNum\":1}]}}}},\"count\":0}"));
+                return response;
+            }
+        };
+
+        LoadMeta loadMeta = new LoadMeta();
+        loadMeta.setLoadId(1L);
+        loadMeta.setTxnId(1L);
+        loadMeta.setDbId(1L);
+        loadMeta.setSignature(1L);
+        Map<String, TableMeta> tableMetaMap = new HashMap<>();
+        TableMeta tableMeta = new TableMeta();
+        tableMeta.setId(1L);
+        List<TableMeta.EtlIndex> indexList = new ArrayList<>();
+        TableMeta.EtlIndex index = new TableMeta.EtlIndex();
+        List<EtlJobConfig.EtlColumn> columnList = new ArrayList<>();
+        EtlJobConfig.EtlColumn column = new EtlJobConfig.EtlColumn();
+        column.columnName = "c0";
+        column.columnType = "INT";
+        column.defaultValue = "0";
+        column.isAllowNull = true;
+        column.aggregationType = "NONE";
+        column.isKey = true;
+        columnList.add(column);
+        index.columns = columnList;
+        indexList.add(index);
+        tableMeta.setIndexes(indexList);
+        TableMeta.EtlPartitionInfo partitionInfo = new TableMeta.EtlPartitionInfo();
+        TableMeta.EtlPartition partition = new TableMeta.EtlPartition();
+        partition.partitionId = 1;
+        partition.bucketNum = 1;
+        partition.startKeys = Collections.emptyList();
+        partition.endKeys = Collections.emptyList();
+        partition.isMaxPartition = true;
+        partitionInfo.partitions = Collections.singletonList(partition);
+        partitionInfo.partitionType = "UNPARTITIONED";
+        partitionInfo.partitionColumnRefs = new ArrayList<>();
+        partitionInfo.distributionColumnRefs = new ArrayList<>();
+        tableMeta.setPartitionInfo(partitionInfo);
+        tableMetaMap.put("tbl1", tableMeta);
+        loadMeta.setTableMeta(tableMetaMap);
+        Assertions.assertEquals(JsonUtils.writeValueAsString(loadMeta),
+                JsonUtils.writeValueAsString(feClient.createIngestionLoad("db", new HashMap<>(), "test", new HashMap<>())));
+
+    }
+
+    @Test
+    public void updateIngestionLoad() {
+
+        DorisClient.FeClient feClient = new DorisClient.FeClient("127.0.0.1:8030", "", "");
+
+        new MockUp<CloseableHttpClient>(CloseableHttpClient.class) {
+            @Mock
+            public CloseableHttpResponse execute(
+                    final HttpUriRequest request) throws IOException, ClientProtocolException {
+                MockedCloseableHttpResponse response = new MockedCloseableHttpResponse();
+                response.setStatusCode(HttpStatus.SC_BAD_REQUEST);
+                return response;
+            }
+        };
+        Assertions.assertThrows(SparkLoadException.class, () -> feClient.updateIngestionLoad("db", 1L, new HashMap<>()));
+
+        new MockUp<CloseableHttpClient>(CloseableHttpClient.class) {
+            @Mock
+            public CloseableHttpResponse execute(
+                    final HttpUriRequest request) throws IOException, ClientProtocolException {
+                MockedCloseableHttpResponse response = new MockedCloseableHttpResponse();
+                response.setStatusCode(HttpStatus.SC_OK);
+                response.setEntity(new StringEntity("{\"code\":1,\"msg\":\"\",\"data\":{},\"count\":0}"));
+                return response;
+            }
+        };
+        Assertions.assertThrows(SparkLoadException.class, () -> feClient.updateIngestionLoad("db", 1L, new HashMap<>()));
+
+        new MockUp<CloseableHttpClient>(CloseableHttpClient.class) {
+            @Mock
+            public CloseableHttpResponse execute(
+                    final HttpUriRequest request) throws IOException, ClientProtocolException {
+                MockedCloseableHttpResponse response = new MockedCloseableHttpResponse();
+                response.setStatusCode(HttpStatus.SC_OK);
+                response.setEntity(new StringEntity("{\"code\":0,\"msg\":\"\",\"data\":{},\"count\":0}"));
+                return response;
+            }
+        };
+        Assertions.assertDoesNotThrow(() -> feClient.updateIngestionLoad("db", 1L, new HashMap<>()));
+
+    }
+
+    @Test
+    public void getLoadInfo() throws SparkLoadException, JsonProcessingException {
+
+        DorisClient.FeClient feClient = new DorisClient.FeClient("127.0.0.1:8030", "", "");
+
+        new MockUp<CloseableHttpClient>(CloseableHttpClient.class) {
+            @Mock
+            public CloseableHttpResponse execute(
+                    final HttpUriRequest request) throws IOException, ClientProtocolException {
+                MockedCloseableHttpResponse response = new MockedCloseableHttpResponse();
+                response.setStatusCode(HttpStatus.SC_BAD_REQUEST);
+                return response;
+            }
+        };
+        Assertions.assertThrows(SparkLoadException.class, () -> feClient.getLoadInfo("db", "test"));
+
+        new MockUp<CloseableHttpClient>(CloseableHttpClient.class) {
+            @Mock
+            public CloseableHttpResponse execute(
+                    final HttpUriRequest request) throws IOException, ClientProtocolException {
+                MockedCloseableHttpResponse response = new MockedCloseableHttpResponse();
+                response.setStatusCode(HttpStatus.SC_OK);
+                response.setEntity(new StringEntity("{\"status\":\"err\",\"msg\":\"\",\"jobInfo\":{\"dbName\":\"db\"," +
+                        "\"tblNames\":[\"tbl1\"],\"label\":\"test\",\"clusterName\":\"default\",\"state\":\"FINISHED\"," +
+                        "\"failMsg\":\"\",\"trackingUrl\":\"\"}}"));
+                return response;
+            }
+        };
+        Assertions.assertThrows(SparkLoadException.class, () -> feClient.getLoadInfo("db", "test"));
+
+        new MockUp<CloseableHttpClient>(CloseableHttpClient.class) {
+            @Mock
+            public CloseableHttpResponse execute(
+                    final HttpUriRequest request) throws IOException, ClientProtocolException {
+                MockedCloseableHttpResponse response = new MockedCloseableHttpResponse();
+                response.setStatusCode(HttpStatus.SC_OK);
+                response.setEntity(new StringEntity("{\"status\":\"ok\",\"msg\":\"\",\"jobInfo\":{\"dbName\":\"db\"," +
+                        "\"tblNames\":[\"tbl1\"],\"label\":\"test\",\"clusterName\":\"default\",\"state\":\"FINISHED\"," +
+                        "\"failMsg\":\"\",\"trackingUrl\":\"\"}}"));
+                return response;
+            }
+        };
+        Assertions.assertEquals("{\"dbName\":\"db\",\"tblNames\":[\"tbl1\"],\"label\":\"test\"," +
+                "\"clusterName\":\"default\",\"state\":\"FINISHED\",\"failMsg\":\"\",\"trackingUrl\":\"\"}",
+                JsonUtils.writeValueAsString(feClient.getLoadInfo("db", "test")));
+
+    }
+
+    @Test
+    public void getDDL() {
+
+        DorisClient.FeClient feClient = new DorisClient.FeClient("127.0.0.1:8030", "", "");
+
+        new MockUp<CloseableHttpClient>(CloseableHttpClient.class) {
+            @Mock
+            public CloseableHttpResponse execute(
+                    final HttpUriRequest request) throws IOException, ClientProtocolException {
+                MockedCloseableHttpResponse response = new MockedCloseableHttpResponse();
+                response.setStatusCode(HttpStatus.SC_BAD_REQUEST);
+                return response;
+            }
+        };
+        SparkLoadException e1 =
+                Assertions.assertThrows(SparkLoadException.class, () -> feClient.getDDL("db", "test"));
+        Assertions.assertEquals("request get ddl failed, path: /api/_get_ddl", e1.getMessage());
+
+        new MockUp<CloseableHttpClient>(CloseableHttpClient.class) {
+            @Mock
+            public CloseableHttpResponse execute(
+                    final HttpUriRequest request) throws IOException, ClientProtocolException {
+                MockedCloseableHttpResponse response = new MockedCloseableHttpResponse();
+                response.setStatusCode(HttpStatus.SC_OK);
+                response.setEntity(new StringEntity("{\"code\":1,\"msg\":\"\",\"data\":{},\"count\":0}"));
+                return response;
+            }
+        };
+        SparkLoadException e2 =
+                Assertions.assertThrows(SparkLoadException.class, () -> feClient.getDDL("db", "test"));
+        Assertions.assertEquals("get ddl failed, status: 1, msg: , data: {}", e2.getMessage());
+
+        new MockUp<CloseableHttpClient>(CloseableHttpClient.class) {
+            @Mock
+            public CloseableHttpResponse execute(
+                    final HttpUriRequest request) throws IOException, ClientProtocolException {
+                MockedCloseableHttpResponse response = new MockedCloseableHttpResponse();
+                response.setStatusCode(HttpStatus.SC_OK);
+                response.setEntity(new StringEntity("{\"code\":0,\"msg\":\"\",\"data\":{},\"count\":0}"));
+                return response;
+            }
+        };
+        SparkLoadException e3 =
+                Assertions.assertThrows(SparkLoadException.class, () -> feClient.getDDL("db", "test"));
+        Assertions.assertEquals("get ddl failed, status: 0, msg: , data: {}", e3.getMessage());
+
+        new MockUp<CloseableHttpClient>(CloseableHttpClient.class) {
+            @Mock
+            public CloseableHttpResponse execute(
+                    final HttpUriRequest request) throws IOException, ClientProtocolException {
+                MockedCloseableHttpResponse response = new MockedCloseableHttpResponse();
+                response.setStatusCode(HttpStatus.SC_OK);
+                response.setEntity(new StringEntity("{\"code\":0,\"msg\":\"\"," +
+                        "\"data\":{\"create_table\": [\"CREATE TABLE `tbl1` (\\n  `k1` int(11) NULL " +
+                        "COMMENT \\\"\\\",\\n  `k2` int(11) NULL COMMENT \\\"\\\"\\n) ENGINE=OLAP\\n" +
+                        "DUPLICATE KEY(`k1`, `k2`)\\nCOMMENT \\\"OLAP\\\"\\nDISTRIBUTED BY HASH(`k1`) BUCKETS 1\\n" +
+                        "PROPERTIES (\\n\\\"replication_num\\\" = \\\"1\\\",\\n\\\"version_info\\\" = \\\"1,0\\\",\\n" +
+                        "\\\"in_memory\\\" = \\\"false\\\",\\n\\\"storage_format\\\" = \\\"DEFAULT\\\"\\n);\"]\n}," +
+                        "\"count\":0}"));
+                return response;
+            }
+        };
+        Assertions.assertDoesNotThrow(() -> feClient.getDDL("db", "test"));
+
+
+    }
+
+    private class MockedCloseableHttpResponse implements CloseableHttpResponse {
+
+        private StatusLine statusLine;
+        private HttpEntity entity;
+
+        @Override
+        public void close() throws IOException {
+
+        }
+
+        @Override
+        public StatusLine getStatusLine() {
+            return statusLine;
+        }
+
+        @Override
+        public void setStatusLine(StatusLine statusline) {
+            this.statusLine = statusline;
+        }
+
+        @Override
+        public void setStatusLine(ProtocolVersion ver, int code) {
+            this.statusLine = new BasicStatusLine(ver, code, "");
+        }
+
+        @Override
+        public void setStatusLine(ProtocolVersion ver, int code, String reason) {
+            this.statusLine = new BasicStatusLine(ver, code, reason);
+        }
+
+        @Override
+        public void setStatusCode(int code) throws IllegalStateException {
+            if (this.statusLine == null) {
+                this.statusLine = new BasicStatusLine(HttpVersion.HTTP_1_1, code, "");
+            } else {
+                this.statusLine = new BasicStatusLine(statusLine.getProtocolVersion(), code, statusLine.getReasonPhrase());
+            }
+        }
+
+        @Override
+        public void setReasonPhrase(String reason) throws IllegalStateException {
+            if (this.statusLine == null) {
+                this.statusLine = new BasicStatusLine(HttpVersion.HTTP_1_1, HttpStatus.SC_OK, reason);
+            } else {
+                this.statusLine = new BasicStatusLine(statusLine.getProtocolVersion(), statusLine.getStatusCode(), reason);
+            }
+        }
+
+        @Override
+        public HttpEntity getEntity() {
+            return entity;
+        }
+
+        @Override
+        public void setEntity(HttpEntity entity) {
+            this.entity = entity;
+        }
+
+        @Override
+        public Locale getLocale() {
+            return null;
+        }
+
+        @Override
+        public void setLocale(Locale loc) {
+
+        }
+
+        @Override
+        public ProtocolVersion getProtocolVersion() {
+            return HttpVersion.HTTP_1_1;
+        }
+
+        @Override
+        public boolean containsHeader(String name) {
+            return false;
+        }
+
+        @Override
+        public Header[] getHeaders(String name) {
+            return new Header[0];
+        }
+
+        @Override
+        public Header getFirstHeader(String name) {
+            return null;
+        }
+
+        @Override
+        public Header getLastHeader(String name) {
+            return null;
+        }
+
+        @Override
+        public Header[] getAllHeaders() {
+            return new Header[0];
+        }
+
+        @Override
+        public void addHeader(Header header) {
+
+        }
+
+        @Override
+        public void addHeader(String name, String value) {
+
+        }
+
+        @Override
+        public void setHeader(Header header) {
+
+        }
+
+        @Override
+        public void setHeader(String name, String value) {
+
+        }
+
+        @Override
+        public void setHeaders(Header[] headers) {
+
+        }
+
+        @Override
+        public void removeHeader(Header header) {
+
+        }
+
+        @Override
+        public void removeHeaders(String name) {
+
+        }
+
+        @Override
+        public HeaderIterator headerIterator() {
+            return null;
+        }
+
+        @Override
+        public HeaderIterator headerIterator(String name) {
+            return null;
+        }
+
+        @Override
+        public HttpParams getParams() {
+            return null;
+        }
+
+        @Override
+        public void setParams(HttpParams params) {
+
+        }
+    }
+
+
+}
\ No newline at end of file
diff --git a/spark-load/spark-load-core/src/test/java/org/apache/doris/common/meta/LoadMetaTest.java b/spark-load/spark-load-core/src/test/java/org/apache/doris/common/meta/LoadMetaTest.java
new file mode 100644
index 00000000..0c1bceaa
--- /dev/null
+++ b/spark-load/spark-load-core/src/test/java/org/apache/doris/common/meta/LoadMetaTest.java
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.meta;
+
+
+import org.apache.doris.config.EtlJobConfig;
+import org.apache.doris.config.JobConfig;
+import org.apache.doris.exception.SparkLoadException;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class LoadMetaTest {
+
+    @Test
+    public void checkMapping() throws SparkLoadException {
+
+        List<EtlJobConfig.EtlColumn> columns = new ArrayList<>();
+        columns.add(new EtlJobConfig.EtlColumn("id", "BIGINT", false, true, "NONE", null, 0, 10, 0));
+        columns.add(new EtlJobConfig.EtlColumn("c1", "HLL", true, false, "NONE", null, 0, 10, 0));
+        columns.add(new EtlJobConfig.EtlColumn("c2", "BITMAP", true, false, "NONE", null, 0, 10, 0));
+
+        EtlJobConfig.EtlIndex etlIndex = new EtlJobConfig.EtlIndex(1, columns, 1, "DUPLICATE", true, 1);
+        EtlJobConfig.EtlPartition etlPartition =
+                new EtlJobConfig.EtlPartition(1L, Collections.singletonList(0), Collections.singletonList(1), true, 1);
+        EtlJobConfig.EtlPartitionInfo etlPartitionInfo =
+                new EtlJobConfig.EtlPartitionInfo("RANGE", Collections.singletonList("id"),
+                        Collections.singletonList("id"), Collections.singletonList(etlPartition));
+
+        EtlJobConfig.EtlTable etlTable = new EtlJobConfig.EtlTable(Collections.singletonList(etlIndex),
+                etlPartitionInfo);
+
+        LoadMeta loadMeta = new LoadMeta();
+
+        Map<String, EtlJobConfig.EtlColumnMapping> columnMappingMap = new HashMap<>();
+        columnMappingMap.put("c2", new EtlJobConfig.EtlColumnMapping("to_bitmap(c1)"));
+        Assertions.assertThrows(SparkLoadException.class, () -> loadMeta.checkMapping(etlTable, columnMappingMap));
+
+        Map<String, EtlJobConfig.EtlColumnMapping> columnMappingMap1 = new HashMap<>();
+        columnMappingMap1.put("c1", new EtlJobConfig.EtlColumnMapping("hll_hash(c1)"));
+        Assertions.assertThrows(SparkLoadException.class, () -> loadMeta.checkMapping(etlTable, columnMappingMap1));
+
+        Map<String, EtlJobConfig.EtlColumnMapping> columnMappingMap2 = new HashMap<>();
+        columnMappingMap2.put("c1", new EtlJobConfig.EtlColumnMapping("hll_hash(c1)"));
+        columnMappingMap2.put("c2", new EtlJobConfig.EtlColumnMapping("to_bitmap(c1)"));
+        loadMeta.checkMapping(etlTable, columnMappingMap2);
+
+    }
+
+}
\ No newline at end of file
diff --git a/spark-load/spark-load-core/src/test/java/org/apache/doris/config/JobConfigTest.java b/spark-load/spark-load-core/src/test/java/org/apache/doris/config/JobConfigTest.java
new file mode 100644
index 00000000..c4e6f00f
--- /dev/null
+++ b/spark-load/spark-load-core/src/test/java/org/apache/doris/config/JobConfigTest.java
@@ -0,0 +1,222 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.config;
+
+import org.apache.doris.client.DorisClient;
+import org.apache.doris.common.enums.TaskType;
+import org.apache.doris.exception.SparkLoadException;
+
+import mockit.Mock;
+import mockit.MockUp;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+public class JobConfigTest {
+
+    @Test
+    public void checkFeAddress() {
+
+        JobConfig jobConfig = new JobConfig();
+        jobConfig.setFeAddresses("");
+        IllegalArgumentException e1 =
+                Assertions.assertThrows(IllegalArgumentException.class, jobConfig::checkFeAddress);
+        Assertions.assertEquals("feAddress is empty", e1.getMessage());
+
+        jobConfig.setFeAddresses("127.0.0.1");
+        IllegalArgumentException e2 =
+                Assertions.assertThrows(IllegalArgumentException.class, jobConfig::checkFeAddress,
+                        "feAddress format is incorrect");
+        Assertions.assertEquals("feAddress format is incorrect", e2.getMessage());
+
+        jobConfig.setFeAddresses("127.0.0.1,127.0.0.2");
+        IllegalArgumentException e3 =
+                Assertions.assertThrows(IllegalArgumentException.class, jobConfig::checkFeAddress,
+                        "feAddress format is incorrect");
+        Assertions.assertEquals("feAddress format is incorrect", e3.getMessage());
+
+        jobConfig.setFeAddresses("127.0.0.1:8030");
+        Assertions.assertDoesNotThrow(jobConfig::checkFeAddress);
+
+    }
+
+    @Test
+    public void checkTaskInfo() {
+
+        JobConfig jobConfig = new JobConfig();
+        jobConfig.setFeAddresses("127.0.0.1:8030");
+
+        jobConfig.setLoadTasks(new HashMap<>());
+        IllegalArgumentException e1 =
+                Assertions.assertThrows(IllegalArgumentException.class, jobConfig::checkTaskInfo);
+        Assertions.assertEquals("loadTasks is empty", e1.getMessage());
+
+        new MockUp<DorisClient.FeClient>(DorisClient.FeClient.class) {
+            @Mock
+            public String getDDL(String db, String table) throws SparkLoadException {
+                return "create table tbl1 (col1 int, col2 int, col3 int, col4 int) unique key (col1) properties (" +
+                        "\"enable_unique_key_merge_on_write\" = \"false\")";
+            }
+        };
+
+        Map<String, JobConfig.TaskInfo> loadTasks1 = new HashMap<>();
+        JobConfig.TaskInfo taskInfo1 = new JobConfig.TaskInfo();
+        taskInfo1.setType(TaskType.FILE);
+        loadTasks1.put("task1", taskInfo1);
+        jobConfig.setLoadTasks(loadTasks1);
+        IllegalArgumentException e2 =
+                Assertions.assertThrows(IllegalArgumentException.class, jobConfig::checkTaskInfo);
+        Assertions.assertEquals("file path is empty", e2.getMessage());
+
+        Map<String, JobConfig.TaskInfo> loadTasks2 = new HashMap<>();
+        JobConfig.TaskInfo taskInfo2 = new JobConfig.TaskInfo();
+        taskInfo2.setType(TaskType.FILE);
+        taskInfo2.setPaths(Collections.singletonList("test"));
+        taskInfo2.setFormat("sequence");
+        loadTasks2.put("task2", taskInfo2);
+        jobConfig.setLoadTasks(loadTasks2);
+        IllegalArgumentException e3 =
+                Assertions.assertThrows(IllegalArgumentException.class, jobConfig::checkTaskInfo);
+        Assertions.assertEquals("format only support parquet or orc or csv", e3.getMessage());
+
+        taskInfo2.setFormat("csv");
+        Assertions.assertDoesNotThrow(jobConfig::checkTaskInfo);
+
+        Map<String, JobConfig.TaskInfo> loadTasks3 = new HashMap<>();
+        JobConfig.TaskInfo taskInfo3 = new JobConfig.TaskInfo();
+        taskInfo3.setType(TaskType.HIVE);
+        loadTasks3.put("task3", taskInfo3);
+        jobConfig.setLoadTasks(loadTasks3);
+        IllegalArgumentException e4 =
+                Assertions.assertThrows(IllegalArgumentException.class, jobConfig::checkTaskInfo);
+        Assertions.assertEquals("hive database is empty", e4.getMessage());
+
+        taskInfo3.setHiveDatabase("db");
+        Assertions.assertThrows(IllegalArgumentException.class, jobConfig::checkTaskInfo, "hive table is empty");
+
+        taskInfo3.setHiveTable("tbl");
+        Assertions.assertDoesNotThrow(jobConfig::checkTaskInfo);
+
+        new MockUp<DorisClient.FeClient>(DorisClient.FeClient.class) {
+            @Mock
+            public String getDDL(String db, String table) throws SparkLoadException {
+                return "create table tbl1 (col1 int, col2 int, col3 int, col4 int) unique key (col1) properties (" +
+                        "\"enable_unique_key_merge_on_write\" = \"true\")";
+            }
+        };
+        IllegalArgumentException e5 =
+                Assertions.assertThrows(IllegalArgumentException.class, jobConfig::checkTaskInfo);
+
+    }
+
+    @Test
+    public void checkSparkInfo() throws IOException {
+
+        JobConfig jobConfig = new JobConfig();
+        JobConfig.SparkInfo sparkInfo = new JobConfig.SparkInfo();
+        jobConfig.setSpark(sparkInfo);
+        IllegalArgumentException e1 =
+                Assertions.assertThrows(IllegalArgumentException.class, jobConfig::checkSparkInfo);
+        Assertions.assertEquals("spark config item sparkHome is empty", e1.getMessage());
+
+        sparkInfo.setSparkHome("test");
+        IllegalArgumentException e2 =
+                Assertions.assertThrows(IllegalArgumentException.class, jobConfig::checkSparkInfo);
+        Assertions.assertEquals("spark master only supports yarn or standalone or local", e2.getMessage());
+
+        sparkInfo.setMaster("local");
+        sparkInfo.setDeployMode("abc");
+        IllegalArgumentException e3 =
+                Assertions.assertThrows(IllegalArgumentException.class, jobConfig::checkSparkInfo);
+        Assertions.assertEquals("spark deployMode only supports cluster or client", e3.getMessage());
+
+        sparkInfo.setMaster("spark://127.0.0.1:7077");
+        sparkInfo.setDeployMode("cluster");
+        IllegalArgumentException e4 =
+                Assertions.assertThrows(IllegalArgumentException.class, jobConfig::checkSparkInfo);
+        Assertions.assertEquals("standalone and local master only supports client mode", e4.getMessage());
+
+        sparkInfo.setMaster("yarn");
+        sparkInfo.setDeployMode("cluster");
+        IllegalArgumentException e5 =
+                Assertions.assertThrows(IllegalArgumentException.class, jobConfig::checkSparkInfo);
+        Assertions.assertEquals("dpp jar file is not exists, path: null/app/spark-load-dpp-1.0-SNAPSHOT.jar", e5.getMessage());
+
+        sparkInfo.setDppJarPath("");
+        IllegalArgumentException e6 =
+                Assertions.assertThrows(IllegalArgumentException.class, jobConfig::checkSparkInfo);
+        Assertions.assertEquals("dpp jar file path is empty", e6.getMessage());
+
+        Path path = Files.createTempFile(null, null);
+        sparkInfo.setDppJarPath(path.toAbsolutePath().toString());
+        Assertions.assertDoesNotThrow(jobConfig::checkSparkInfo);
+
+    }
+
+    @Test
+    public void checkHadoopProperties() throws IOException {
+
+        JobConfig jobConfig = new JobConfig();
+        Map<String, String> hadoopProperties = new HashMap<>();
+        jobConfig.setHadoopProperties(hadoopProperties);
+
+        hadoopProperties.put("abc", "123");
+        IllegalArgumentException e1 =
+                Assertions.assertThrows(IllegalArgumentException.class, jobConfig::checkHadoopProperties);
+        Assertions.assertEquals("fs.defaultFS is empty", e1.getMessage());
+
+        hadoopProperties.put("fs.defaultFS", "test");
+        IllegalArgumentException e2 =
+                Assertions.assertThrows(IllegalArgumentException.class, jobConfig::checkHadoopProperties);
+        Assertions.assertEquals("hadoop username is empty", e2.getMessage());
+
+        hadoopProperties.put("hadoop.username", "hadoop");
+        Assertions.assertDoesNotThrow(jobConfig::checkHadoopProperties);
+
+        hadoopProperties.put("hadoop.security.authentication", "kerberos");
+        IllegalArgumentException e3 =
+                Assertions.assertThrows(IllegalArgumentException.class, jobConfig::checkHadoopProperties);
+        Assertions.assertEquals("hadoop.kerberos.principal is not set", e3.getMessage());
+
+        hadoopProperties.put("hadoop.kerberos.principal", "");
+        IllegalArgumentException e4 =
+                Assertions.assertThrows(IllegalArgumentException.class, jobConfig::checkHadoopProperties);
+        Assertions.assertEquals("hadoop kerberos principal is empty", e4.getMessage());
+
+        hadoopProperties.put("hadoop.kerberos.principal", "spark@DORIS.ORG");
+        IllegalArgumentException e5 =
+                Assertions.assertThrows(IllegalArgumentException.class, jobConfig::checkHadoopProperties);
+        Assertions.assertEquals("hadoop.kerberos.keytab is not set", e5.getMessage());
+
+        hadoopProperties.put("hadoop.kerberos.keytab", "test");
+        IllegalArgumentException e6 =
+                Assertions.assertThrows(IllegalArgumentException.class, jobConfig::checkHadoopProperties);
+        Assertions.assertEquals("hadoop kerberos keytab file is not exists, path: test", e6.getMessage());
+
+        Path path = Files.createTempFile("spark", ".keytab");
+        hadoopProperties.put("hadoop.kerberos.keytab", path.toAbsolutePath().toString());
+        Assertions.assertDoesNotThrow(jobConfig::checkHadoopProperties);
+
+    }
+}
\ No newline at end of file
diff --git a/spark-load/spark-load-core/src/test/java/org/apache/doris/load/LoaderFactoryTest.java b/spark-load/spark-load-core/src/test/java/org/apache/doris/load/LoaderFactoryTest.java
new file mode 100644
index 00000000..28cb230c
--- /dev/null
+++ b/spark-load/spark-load-core/src/test/java/org/apache/doris/load/LoaderFactoryTest.java
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load;
+
+import org.apache.doris.common.enums.LoadMode;
+import org.apache.doris.config.JobConfig;
+import org.apache.doris.load.job.Loader;
+import org.apache.doris.load.job.PullLoader;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+class LoaderFactoryTest {
+
+    @Test
+    void createLoader() {
+
+        JobConfig jobConfig = new JobConfig();
+        jobConfig.setLoadMode(null);
+        Assertions.assertThrows(NullPointerException.class, () -> LoaderFactory.createLoader(jobConfig, false));
+
+        jobConfig.setLoadMode(LoadMode.PUSH);
+        Assertions.assertThrows(UnsupportedOperationException.class, () -> LoaderFactory.createLoader(jobConfig, false));
+
+        jobConfig.setLoadMode(LoadMode.PULL);
+        Assertions.assertDoesNotThrow(() -> LoaderFactory.createLoader(jobConfig, false));
+        Loader loader = LoaderFactory.createLoader(jobConfig, false);;
+        Assertions.assertInstanceOf(PullLoader.class, loader);
+
+    }
+}
\ No newline at end of file
diff --git a/spark-load/spark-load-core/src/test/java/org/apache/doris/load/job/PullLoaderTest.java b/spark-load/spark-load-core/src/test/java/org/apache/doris/load/job/PullLoaderTest.java
new file mode 100644
index 00000000..a0c56a60
--- /dev/null
+++ b/spark-load/spark-load-core/src/test/java/org/apache/doris/load/job/PullLoaderTest.java
@@ -0,0 +1,236 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.job;
+
+import org.apache.doris.client.DorisClient;
+import org.apache.doris.common.enums.TaskType;
+import org.apache.doris.common.meta.LoadMeta;
+import org.apache.doris.common.meta.TableMeta;
+import org.apache.doris.config.EtlJobConfig;
+import org.apache.doris.config.JobConfig;
+import org.apache.doris.exception.SparkLoadException;
+import org.apache.doris.load.LoaderFactory;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.json.JsonMapper;
+import mockit.Mock;
+import mockit.MockUp;
+import org.apache.commons.io.FileUtils;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertInstanceOf;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import org.junit.jupiter.api.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+class PullLoaderTest {
+
+    @Test
+    void canBeRecovered() throws SparkLoadException, IOException {
+
+        JobConfig jobConfig = new JobConfig();
+        jobConfig.setFeAddresses("127.0.0.1:8080");
+        Map<String, JobConfig.TaskInfo> loadTasks = new HashMap<>();
+        JobConfig.TaskInfo taskInfo = new JobConfig.TaskInfo();
+        taskInfo.setType(TaskType.FILE);
+        taskInfo.setPaths(Collections.singletonList("test"));
+        loadTasks.put("tbl1", taskInfo);
+        jobConfig.setLoadTasks(loadTasks);
+        jobConfig.setLabel("test");
+        File file = new File(System.getProperty("java.io.tmpdir"));
+        jobConfig.setWorkingDir(file.getAbsolutePath());
+
+        new MockUp<DorisClient.FeClient>() {
+            @Mock
+            public LoadMeta createIngestionLoad(String db, Map<String, List<String>> tableToPartition, String label,
+                                                Map<String, String> properties) {
+                LoadMeta loadMeta = new LoadMeta();
+                loadMeta.setLoadId(1L);
+                loadMeta.setTxnId(1L);
+                loadMeta.setDbId(1L);
+                loadMeta.setSignature(1L);
+                Map<String, TableMeta> tableMetaMap = new HashMap<>();
+                TableMeta tableMeta = new TableMeta();
+                tableMeta.setId(1L);
+                List<TableMeta.EtlIndex> indexList = new ArrayList<>();
+                TableMeta.EtlIndex index = new TableMeta.EtlIndex();
+                List<EtlJobConfig.EtlColumn> columnList = new ArrayList<>();
+                EtlJobConfig.EtlColumn column = new EtlJobConfig.EtlColumn();
+                column.columnName = "c0";
+                column.columnType = "INT";
+                column.defaultValue = "0";
+                column.isAllowNull = true;
+                column.aggregationType = "NONE";
+                column.isKey = true;
+                columnList.add(column);
+                index.columns = columnList;
+                indexList.add(index);
+                tableMeta.setIndexes(indexList);
+                TableMeta.EtlPartitionInfo partitionInfo = new TableMeta.EtlPartitionInfo();
+                TableMeta.EtlPartition partition = new TableMeta.EtlPartition();
+                partition.partitionId = 1;
+                partition.bucketNum = 1;
+                partition.startKeys = Collections.emptyList();
+                partition.endKeys = Collections.emptyList();
+                partition.isMaxPartition = true;
+                partitionInfo.partitions = Collections.singletonList(partition);
+                partitionInfo.partitionType = "UNPARTITIONED";
+                partitionInfo.partitionColumnRefs = new ArrayList<>();
+                partitionInfo.distributionColumnRefs = new ArrayList<>();
+                tableMeta.setPartitionInfo(partitionInfo);
+                tableMetaMap.put("tbl1", tableMeta);
+                loadMeta.setTableMeta(tableMetaMap);
+                try {
+                    System.out.println(JsonMapper.builder().build().writeValueAsString(loadMeta));
+                } catch (JsonProcessingException e) {
+                    throw new RuntimeException(e);
+                }
+                return loadMeta;
+            }
+        };
+        Loader loader = LoaderFactory.createLoader(jobConfig, true);
+        assertInstanceOf(Recoverable.class, loader);
+        loader.prepare();
+        assertFalse(((Recoverable)loader).canBeRecovered());
+
+        File file1 = new File(System.getProperty("java.io.tmpdir") + "/jobs/1/test");
+        try {
+
+            file1.mkdirs();
+            assertFalse(((Recoverable)loader).canBeRecovered());
+
+            File file2 = new File(System.getProperty("java.io.tmpdir") + "/jobs/1/test/1");
+            file2.mkdirs();
+            assertFalse(((Recoverable)loader).canBeRecovered());
+
+            File file3 = new File(System.getProperty("java.io.tmpdir") + "/jobs/1/test/1/dpp_result.json");
+            Files.write(file3.toPath(), Collections.singletonList(""));
+            assertFalse(((Recoverable)loader).canBeRecovered());
+
+            Files.write(file3.toPath(), Collections.singletonList("test"));
+            assertThrows(SparkLoadException.class, () -> ((Recoverable)loader).canBeRecovered());
+
+            Files.write(file3.toPath(), Collections.singletonList("{}"));
+            assertThrows(SparkLoadException.class, () -> ((Recoverable)loader).canBeRecovered());
+
+            Files.write(file3.toPath(), Collections.singletonList("{\"is_success\":false,\"failed_reason\":\"\"," +
+                    "\"scanned_rows\":0,\"file_number\":0,\"file_size\":0,\"normal_rows\":0,\"abnormal_rows\":0," +
+                    "\"unselect_rows\":0,\"partial_abnormal_rows\":\"\",\"scanned_bytes\":0}\n"));
+            assertFalse(((Recoverable)loader).canBeRecovered());
+
+            Files.write(file3.toPath(), Collections.singletonList("{\"is_success\":true,\"failed_reason\":\"\"," +
+                    "\"scanned_rows\":0,\"file_number\":0,\"file_size\":0,\"normal_rows\":0,\"abnormal_rows\":0," +
+                    "\"unselect_rows\":0,\"partial_abnormal_rows\":\"\",\"scanned_bytes\":0}\n"));
+
+            File file4 = new File(System.getProperty("java.io.tmpdir") + "/jobs/1/test/1/load_meta.json");
+            Files.write(file4.toPath(), Collections.singletonList(""));
+            assertFalse(((Recoverable)loader).canBeRecovered());
+
+            Files.write(file4.toPath(), Collections.singletonList("{\"loadId\":1,\"txnId\":1,\"dbId\":1,\"signature\":1," +
+                    "\"tableMeta\":{\"tbl1\":{\"id\":1,\"indexes\":[],\"partitionInfo\":{\"partitionType\":" +
+                    "\"UNPARTITIONED\",\"partitionColumnRefs\":[],\"distributionColumnRefs\":[],\"partitions\":" +
+                    "[{\"partitionId\":1,\"startKeys\":[],\"endKeys\":[],\"isMaxPartition\":true,\"bucketNum\":1}]}" +
+                    "}}}\n"));
+            assertFalse(((Recoverable)loader).canBeRecovered());
+
+            Files.write(file4.toPath(), Collections.singletonList("{\"loadId\":1,\"txnId\":1,\"dbId\":1,\"signature\":1," +
+                    "\"tableMeta\":{\"tbl2\":{\"id\":1,\"indexes\":[{\"indexId\":0,\"columns\":[{\"columnName\":\"c0\"," +
+                    "\"columnType\":\"INT\",\"isAllowNull\":true,\"isKey\":true,\"aggregationType\":\"NONE\"," +
+                    "\"defaultValue\":\"0\",\"stringLength\":0,\"precision\":0,\"scale\":0,\"defineExpr\":null}]," +
+                    "\"schemaHash\":0,\"indexType\":null,\"isBaseIndex\":false,\"schemaVersion\":0}],\"partitionInfo\":" +
+                    "{\"partitionType\":\"UNPARTITIONED\",\"partitionColumnRefs\":[],\"distributionColumnRefs\":[]," +
+                    "\"partitions\":[{\"partitionId\":1,\"startKeys\":[],\"endKeys\":[],\"isMaxPartition\":true," +
+                    "\"bucketNum\":1}]}}}}"));
+            assertFalse(((Recoverable)loader).canBeRecovered());
+
+            Files.write(file4.toPath(), Collections.singletonList("{\"loadId\":1,\"txnId\":1,\"dbId\":1,\"signature\":1," +
+                    "\"tableMeta\":{\"tbl1\":{\"id\":1,\"indexes\":[{\"indexId\":1,\"columns\":[{\"columnName\":\"c0\"," +
+                    "\"columnType\":\"INT\",\"isAllowNull\":true,\"isKey\":true,\"aggregationType\":\"NONE\"," +
+                    "\"defaultValue\":\"0\",\"stringLength\":0,\"precision\":0,\"scale\":0,\"defineExpr\":null}]," +
+                    "\"schemaHash\":0,\"indexType\":null,\"isBaseIndex\":false,\"schemaVersion\":0}],\"partitionInfo\":" +
+                    "{\"partitionType\":\"UNPARTITIONED\",\"partitionColumnRefs\":[],\"distributionColumnRefs\":[]," +
+                    "\"partitions\":[{\"partitionId\":1,\"startKeys\":[],\"endKeys\":[],\"isMaxPartition\":true," +
+                    "\"bucketNum\":1}]}}}}"));
+            assertFalse(((Recoverable)loader).canBeRecovered());
+
+            Files.write(file4.toPath(), Collections.singletonList("{\"loadId\":1,\"txnId\":1,\"dbId\":1,\"signature\":1," +
+                    "\"tableMeta\":{\"tbl1\":{\"id\":1,\"indexes\":[{\"indexId\":0,\"columns\":[{\"columnName\":\"c0\"," +
+                    "\"columnType\":\"INT\",\"isAllowNull\":true,\"isKey\":true,\"aggregationType\":\"NONE\"," +
+                    "\"defaultValue\":\"0\",\"stringLength\":0,\"precision\":0,\"scale\":0,\"defineExpr\":null}]," +
+                    "\"schemaHash\":1,\"indexType\":null,\"isBaseIndex\":false,\"schemaVersion\":0}],\"partitionInfo\":" +
+                    "{\"partitionType\":\"UNPARTITIONED\",\"partitionColumnRefs\":[],\"distributionColumnRefs\":[]," +
+                    "\"partitions\":[{\"partitionId\":1,\"startKeys\":[],\"endKeys\":[],\"isMaxPartition\":true," +
+                    "\"bucketNum\":1}]}}}}"));
+            assertFalse(((Recoverable)loader).canBeRecovered());
+
+            Files.write(file4.toPath(), Collections.singletonList("{\"loadId\":1,\"txnId\":1,\"dbId\":1,\"signature\":1," +
+                    "\"tableMeta\":{\"tbl1\":{\"id\":1,\"indexes\":[{\"indexId\":0,\"columns\":[{\"columnName\":\"c0\"," +
+                    "\"columnType\":\"INT\",\"isAllowNull\":true,\"isKey\":true,\"aggregationType\":\"NONE\"," +
+                    "\"defaultValue\":\"0\",\"stringLength\":0,\"precision\":0,\"scale\":0,\"defineExpr\":null}]," +
+                    "\"schemaHash\":0,\"indexType\":null,\"isBaseIndex\":false,\"schemaVersion\":1}],\"partitionInfo\":" +
+                    "{\"partitionType\":\"UNPARTITIONED\",\"partitionColumnRefs\":[],\"distributionColumnRefs\":[]," +
+                    "\"partitions\":[{\"partitionId\":1,\"startKeys\":[],\"endKeys\":[],\"isMaxPartition\":true," +
+                    "\"bucketNum\":1}]}}}}"));
+            assertFalse(((Recoverable)loader).canBeRecovered());
+
+            Files.write(file4.toPath(), Collections.singletonList("{\"loadId\":1,\"txnId\":1,\"dbId\":1,\"signature\":1," +
+                    "\"tableMeta\":{\"tbl1\":{\"id\":1,\"indexes\":[{\"indexId\":0,\"columns\":[{\"columnName\":\"c0\"," +
+                    "\"columnType\":\"INT\",\"isAllowNull\":true,\"isKey\":true,\"aggregationType\":\"NONE\"," +
+                    "\"defaultValue\":\"0\",\"stringLength\":0,\"precision\":0,\"scale\":0,\"defineExpr\":null}]," +
+                    "\"schemaHash\":0,\"indexType\":null,\"isBaseIndex\":false,\"schemaVersion\":0}],\"partitionInfo\":" +
+                    "{\"partitionType\":\"UNPARTITIONED\",\"partitionColumnRefs\":[],\"distributionColumnRefs\":[]," +
+                    "\"partitions\":[{\"partitionId\":1,\"startKeys\":[],\"endKeys\":[],\"isMaxPartition\":true," +
+                    "\"bucketNum\":1},{\"partitionId\":2,\"startKeys\":[],\"endKeys\":[],\"isMaxPartition\":true," +
+                    "\"bucketNum\":1}]}}}}"));
+            assertFalse(((Recoverable)loader).canBeRecovered());
+
+            Files.write(file4.toPath(), Collections.singletonList("{\"loadId\":1,\"txnId\":1,\"dbId\":1,\"signature\":1," +
+                    "\"tableMeta\":{\"tbl1\":{\"id\":1,\"indexes\":[{\"indexId\":0,\"columns\":[{\"columnName\":\"c0\"," +
+                    "\"columnType\":\"INT\",\"isAllowNull\":true,\"isKey\":true,\"aggregationType\":\"NONE\"," +
+                    "\"defaultValue\":\"0\",\"stringLength\":0,\"precision\":0,\"scale\":0,\"defineExpr\":null}]," +
+                    "\"schemaHash\":0,\"indexType\":null,\"isBaseIndex\":false,\"schemaVersion\":0}],\"partitionInfo\":" +
+                    "{\"partitionType\":\"UNPARTITIONED\",\"partitionColumnRefs\":[],\"distributionColumnRefs\":[]," +
+                    "\"partitions\":[{\"partitionId\":2,\"startKeys\":[],\"endKeys\":[],\"isMaxPartition\":true," +
+                    "\"bucketNum\":1}]}}}}"));
+            assertFalse(((Recoverable)loader).canBeRecovered());
+
+            Files.write(file4.toPath(), Collections.singletonList("{\"loadId\":1,\"txnId\":1,\"dbId\":1,\"signature\":1," +
+                    "\"tableMeta\":{\"tbl1\":{\"id\":1,\"indexes\":[{\"indexId\":0,\"columns\":[{\"columnName\":\"c0\"," +
+                    "\"columnType\":\"INT\",\"isAllowNull\":true,\"isKey\":true,\"aggregationType\":\"NONE\"," +
+                    "\"defaultValue\":\"0\",\"stringLength\":0,\"precision\":0,\"scale\":0,\"defineExpr\":null}]," +
+                    "\"schemaHash\":0,\"indexType\":null,\"isBaseIndex\":false,\"schemaVersion\":0}],\"partitionInfo\":" +
+                    "{\"partitionType\":\"UNPARTITIONED\",\"partitionColumnRefs\":[],\"distributionColumnRefs\":[]," +
+                    "\"partitions\":[{\"partitionId\":1,\"startKeys\":[],\"endKeys\":[],\"isMaxPartition\":true," +
+                    "\"bucketNum\":1}]}}}}"));
+            assertTrue(((Recoverable)loader).canBeRecovered());
+
+        } finally {
+            // delete ${java.io.tmpdir}/jobs on exit
+            FileUtils.deleteDirectory(file1.getParentFile().getParentFile());
+        }
+
+    }
+}
\ No newline at end of file
diff --git a/spark-load/spark-load-core/src/test/java/org/apache/doris/util/DateUtilsTest.java b/spark-load/spark-load-core/src/test/java/org/apache/doris/util/DateUtilsTest.java
new file mode 100644
index 00000000..d6d10ce8
--- /dev/null
+++ b/spark-load/spark-load-core/src/test/java/org/apache/doris/util/DateUtilsTest.java
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.util;
+
+import mockit.Mock;
+import mockit.MockUp;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.time.LocalDateTime;
+import java.time.ZoneId;
+
+class DateUtilsTest {
+
+    @Test
+    void getFormattedNow() {
+        new MockUp<LocalDateTime>() {
+            @Mock
+            public LocalDateTime now(ZoneId zoneId) {
+                return LocalDateTime.of(2024,8,1,12,34,56);
+            }
+        };
+        Assertions.assertEquals("2024-08-01 12:34:56", DateUtils.getFormattedNow(DateUtils.NORMAL_FORMATER));
+        Assertions.assertEquals("20240801123456", DateUtils.getFormattedNow(DateUtils.NUMBER_FORMATER));
+    }
+}
\ No newline at end of file
diff --git a/spark-load/spark-load-dist/pom.xml b/spark-load/spark-load-dist/pom.xml
new file mode 100644
index 00000000..01dcad98
--- /dev/null
+++ b/spark-load/spark-load-dist/pom.xml
@@ -0,0 +1,103 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.doris</groupId>
+        <artifactId>spark-load</artifactId>
+        <version>${revision}</version>
+    </parent>
+
+    <packaging>pom</packaging>
+
+    <artifactId>spark-load-dist</artifactId>
+
+    <properties>
+        <maven.compiler.source>8</maven.compiler.source>
+        <maven.compiler.target>8</maven.compiler.target>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    </properties>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.doris</groupId>
+            <artifactId>spark-load-core</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.doris</groupId>
+            <artifactId>spark-load-dpp</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-dependency-plugin</artifactId>
+                <version>3.0.2</version>
+                <configuration>
+                    <excludeTransitive>false</excludeTransitive>
+                    <stripVersion>false</stripVersion>
+                    <artifactItems>
+                        <dependency>
+                            <groupId>org.apache.doris</groupId>
+                            <artifactId>spark-load-dpp</artifactId>
+                            <version>${project.version}</version>
+                            <outputDirectory>${project.build.directory}/app</outputDirectory>
+                        </dependency>
+                    </artifactItems>
+                </configuration>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>copy</goal>
+                        </goals>
+                        <phase>package</phase>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-assembly-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>bin</id>
+                        <goals>
+                            <goal>single</goal>
+                        </goals>
+                        <phase>package</phase>
+                        <configuration>
+                            <finalName>${project.parent.artifactId}-${project.version}</finalName>
+                            <descriptors>
+                                <descriptor>src/main/assembly/assembly.xml</descriptor>
+                            </descriptors>
+                            <outputDirectory>${project.parent.build.directory}</outputDirectory>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
\ No newline at end of file
diff --git a/spark-load/spark-load-dist/src/main/assembly/assembly.xml b/spark-load/spark-load-dist/src/main/assembly/assembly.xml
new file mode 100644
index 00000000..71b9a3ae
--- /dev/null
+++ b/spark-load/spark-load-dist/src/main/assembly/assembly.xml
@@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+<assembly>
+    <id>bin</id>
+    <formats>
+        <format>tar.gz</format>
+    </formats>
+    <includeBaseDirectory>true</includeBaseDirectory>
+    <baseDirectory>${project.parent.artifactId}-${project.version}-bin</baseDirectory>
+
+    <dependencySets>
+        <dependencySet>
+            <unpack>false</unpack>
+            <scope>runtime</scope>
+            <useProjectArtifact>true</useProjectArtifact>
+            <outputDirectory>lib</outputDirectory>
+            <excludes>
+                <exclude>org.apache.doris:spark-load-dpp</exclude>
+            </excludes>
+        </dependencySet>
+    </dependencySets>
+
+    <fileSets>
+        <fileSet>
+            <directory>./src/main/bin</directory>
+            <outputDirectory>bin</outputDirectory>
+            <includes>
+                <include>spark-load.sh</include>
+            </includes>
+            <lineEnding>unix</lineEnding>
+            <fileMode>0755</fileMode>
+        </fileSet>
+        <fileSet>
+            <directory>${project.build.directory}/lib</directory>
+            <outputDirectory>lib</outputDirectory>
+            <fileMode>0755</fileMode>
+        </fileSet>
+        <fileSet>
+            <directory>${project.build.directory}/app</directory>
+            <outputDirectory>app</outputDirectory>
+            <fileMode>0755</fileMode>
+        </fileSet>
+        <fileSet>
+            <directory>${project.build.directory}/../src/main/resources</directory>
+            <outputDirectory>conf</outputDirectory>
+            <lineEnding>unix</lineEnding>
+            <fileMode>0755</fileMode>
+            <includes>
+                <include>*.yml</include>
+                <include>*.properties</include>
+                <include>logback*.xml</include>
+            </includes>
+        </fileSet>
+    </fileSets>
+</assembly>
\ No newline at end of file
diff --git a/spark-load/spark-load-dist/src/main/bin/spark-load.sh b/spark-load/spark-load-dist/src/main/bin/spark-load.sh
new file mode 100644
index 00000000..9097dd24
--- /dev/null
+++ b/spark-load/spark-load-dist/src/main/bin/spark-load.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+if [ -z ${SPARK_LOAD_HOME} ]; then
+  cur_dir=$(dirname "$0")/../
+  SPARK_LOAD_HOME=$(readlink -f ${cur_dir})
+fi
+
+export SPARK_LOAD_HOME
+
+if [[ -z "${JAVA_HOME}" ]]; then
+    if ! command -v java &>/dev/null; then
+        JAVA=""
+    else
+        JAVA="$(command -v java)"
+    fi
+else
+    JAVA="${JAVA_HOME}/bin/java"
+fi
+
+if [[ ! -x "${JAVA}" ]]; then
+    echo "The JAVA_HOME environment variable is not set correctly"
+    echo "This environment variable is required to run this program"
+    echo "Note: JAVA_HOME should point to a JDK and not a JRE"
+    echo "You can set JAVA_HOME in the fe.conf configuration file"
+    exit 1
+fi
+
+SPARK_LOAD_CORE_JAR=
+for f in "${SPARK_LOAD_HOME}/lib"/*.jar; do
+    if [[ $(basename "${f}") == "spark-load-core"*".jar" ]]; then
+        SPARK_LOAD_CORE_JAR="${f}"
+        continue
+    fi
+    CLASSPATH="${f}:${CLASSPATH}"
+done
+CLASSPATH="${SPARK_LOAD_CORE_JAR}:${CLASSPATH}"
+export CLASSPATH="${SPARK_LOAD_CORE_JAR}/conf:${CLASSPATH}:${SPARK_LOAD_CORE_JAR}/lib"
+
+${JAVA} org.apache.doris.SparkLoadRunner "$@"
\ No newline at end of file
diff --git a/spark-load/spark-load-dpp/pom.xml b/spark-load/spark-load-dpp/pom.xml
new file mode 100644
index 00000000..81254e04
--- /dev/null
+++ b/spark-load/spark-load-dpp/pom.xml
@@ -0,0 +1,340 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.doris</groupId>
+        <version>${revision}</version>
+        <artifactId>spark-load</artifactId>
+    </parent>
+    <artifactId>spark-load-dpp</artifactId>
+    <packaging>jar</packaging>
+    <properties>
+        <fe_ut_parallel>1</fe_ut_parallel>
+        <argLine>-Xmx512m</argLine>
+    </properties>
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.doris</groupId>
+            <artifactId>spark-load-common</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <!-- https://mvnrepository.com/artifact/commons-codec/commons-codec -->
+        <dependency>
+            <groupId>commons-codec</groupId>
+            <artifactId>commons-codec</artifactId>
+        </dependency>
+        <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-lang3</artifactId>
+        </dependency>
+
+        <!-- spark -->
+        <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core_2.12 -->
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-core_${scala.major.version}</artifactId>
+        </dependency>
+        <!-- https://mvnrepository.com/artifact/io.netty/netty-all -->
+        <dependency>
+            <groupId>io.netty</groupId>
+            <artifactId>netty-all</artifactId>
+        </dependency>
+
+        <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-sql_2.12 -->
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-sql_${scala.major.version}</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-common</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.parquet</groupId>
+            <artifactId>parquet-column</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.parquet</groupId>
+            <artifactId>parquet-hadoop</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.parquet</groupId>
+            <artifactId>parquet-common</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>commons-collections</groupId>
+            <artifactId>commons-collections</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.scala-lang</groupId>
+            <artifactId>scala-library</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.esotericsoftware</groupId>
+            <artifactId>kryo-shaded</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-catalyst_${scala.major.version}</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.google.guava</groupId>
+            <artifactId>guava</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <!-- https://mvnrepository.com/artifact/org.junit.vintage/junit-vintage-engine -->
+        <!-- <dependency> -->
+        <!--     <groupId>org.junit.vintage</groupId> -->
+        <!--     <artifactId>junit-vintage-engine</artifactId> -->
+        <!--     <scope>test</scope> -->
+        <!-- </dependency> -->
+        <!-- https://mvnrepository.com/artifact/org.junit.jupiter/junit-jupiter-params -->
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-params</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.jmockit</groupId>
+            <artifactId>jmockit</artifactId>
+            <scope>test</scope>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-core</artifactId>
+        </dependency>
+        <!-- https://mvnrepository.com/artifact/org.apache.logging.log4j/log4j-core -->
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-api</artifactId>
+        </dependency>
+        <!-- https://mvnrepository.com/artifact/org.apache.logging.log4j/log4j-slf4j-impl -->
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-slf4j-impl</artifactId>
+        </dependency>
+        <!-- https://mvnrepository.com/artifact/org.slf4j/slf4j-api -->
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-aws</artifactId>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-log4j12</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>log4j</groupId>
+                    <artifactId>log4j</artifactId>
+                </exclusion>
+                <exclusion>
+                    <artifactId>servlet-api</artifactId>
+                    <groupId>javax.servlet</groupId>
+                </exclusion>
+                <!-- https://github.com/aws/aws-sdk-java/issues/1032 -->
+                <exclusion>
+                    <groupId>com.amazonaws</groupId>
+                    <artifactId>aws-java-sdk-s3</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.amazonaws</groupId>
+                    <artifactId>aws-java-sdk-bundle</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>com.amazonaws</groupId>
+            <artifactId>aws-java-sdk-s3</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.amazonaws</groupId>
+            <artifactId>aws-java-sdk-glue</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.amazonaws</groupId>
+            <artifactId>aws-java-sdk-dynamodb</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.google.code.gson</groupId>
+            <artifactId>gson</artifactId>
+        </dependency>
+    </dependencies>
+    <build>
+        <finalName>spark-load-dpp-${project.version}</finalName>
+        <plugins>
+            <!-- jmockit -->
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-surefire-plugin</artifactId>
+                <configuration>
+                    <!-->set larger, eg, 3, to reduce the time or running FE unit tests<-->
+                    <forkCount>${fe_ut_parallel}</forkCount>
+                    <!-->not reuse forked jvm, so that each unit test will run in separate jvm. to avoid singleton confict<-->
+                    <reuseForks>false</reuseForks>
+                    <argLine>
+                        -javaagent:${settings.localRepository}/org/jmockit/jmockit/${jmockit.version}/jmockit-${jmockit.version}.jar @{argLine}
+                    </argLine>
+                </configuration>
+            </plugin>
+            <!-- copy all dependency libs to target lib dir -->
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-dependency-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>copy-dependencies</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>copy-dependencies</goal>
+                        </goals>
+                        <configuration>
+                            <outputDirectory>${project.build.directory}/lib</outputDirectory>
+                            <overWriteReleases>false</overWriteReleases>
+                            <overWriteSnapshots>false</overWriteSnapshots>
+                            <overWriteIfNewer>true</overWriteIfNewer>
+                            <includeScope>runtime</includeScope>
+                            <skip>${skip.plugin}</skip>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-assembly-plugin</artifactId>
+                <configuration>
+                    <archive>
+                        <manifest>
+                            <mainClass>org.apache.doris.load.loadv2.etl.SparkEtlJob</mainClass>
+                        </manifest>
+                    </archive>
+                    <descriptorRefs>
+                        <descriptorRef>jar-with-dependencies</descriptorRef>
+                    </descriptorRefs>
+                </configuration>
+                <executions>
+                    <execution>
+                        <id>make-assembly</id>
+                        <!-- this is used for inheritance merges -->
+                        <phase>package</phase>
+                        <!-- bind to the packaging phase -->
+                        <goals>
+                            <goal>single</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>cobertura-maven-plugin</artifactId>
+                <version>2.7</version>
+                <configuration>
+                    <check>
+                        <maxmem>1024m</maxmem>
+                    </check>
+                </configuration>
+            </plugin>
+            <!-- clean fe/target dir before building -->
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-clean-plugin</artifactId>
+                <version>3.1.0</version>
+                <executions>
+                    <execution>
+                        <id>auto-clean</id>
+                        <phase>initialize</phase>
+                        <goals>
+                            <goal>clean</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <configuration>
+                    <artifactSet>
+                        <excludes>
+                            <exclude>com.google.code.findbugs:*</exclude>
+                            <exclude>org.slf4j:*</exclude>
+                        </excludes>
+                    </artifactSet>
+                    <relocations>
+                        <relocation>
+                            <pattern>org.roaringbitmap</pattern>
+                            <shadedPattern>org.apache.doris.shaded.org.roaringbitmap</shadedPattern>
+                            <pattern>com.google.guava</pattern>
+                            <shadedPattern>org.apache.doris.shaded.com.google.guava</shadedPattern>
+                        </relocation>
+                    </relocations>
+                </configuration>
+                <executions>
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-javadoc-plugin</artifactId>
+                <configuration>
+                    <skip>true</skip>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>flatten-maven-plugin</artifactId>
+                <configuration>
+                    <updatePomFile>true</updatePomFile>
+                    <flattenMode>resolveCiFriendliesOnly</flattenMode>
+                </configuration>
+                <executions>
+                    <execution>
+                        <id>flatten</id>
+                        <phase>process-resources</phase>
+                        <goals>
+                            <goal>flatten</goal>
+                        </goals>
+                    </execution>
+                    <execution>
+                        <id>flatten.clean</id>
+                        <phase>clean</phase>
+                        <goals>
+                            <goal>clean</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+</project>
diff --git a/spark-load/spark-load-dpp/src/main/java/org/apache/doris/common/SparkDppException.java b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/common/SparkDppException.java
new file mode 100644
index 00000000..66547461
--- /dev/null
+++ b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/common/SparkDppException.java
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common;
+
+import com.google.common.base.Strings;
+
+// Exception for Spark DPP process
+public class SparkDppException extends Exception {
+    public SparkDppException(String msg, Throwable cause) {
+        super(Strings.nullToEmpty(msg), cause);
+    }
+
+    public SparkDppException(Throwable cause) {
+        super(cause);
+    }
+
+    public SparkDppException(String msg, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
+        super(Strings.nullToEmpty(msg), cause, enableSuppression, writableStackTrace);
+    }
+
+    public SparkDppException(String msg) {
+        super(Strings.nullToEmpty(msg));
+    }
+}
diff --git a/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/ColumnParser.java b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/ColumnParser.java
new file mode 100644
index 00000000..d639b31f
--- /dev/null
+++ b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/ColumnParser.java
@@ -0,0 +1,296 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.loadv2.dpp;
+
+import org.apache.doris.common.SparkDppException;
+import org.apache.doris.config.EtlJobConfig;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.Serializable;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeFormatterBuilder;
+
+
+// Parser to validate value for different type
+public abstract class ColumnParser implements Serializable {
+
+    // thread safe formatter
+    public static final DateTimeFormatter DATE_FORMATTER = new DateTimeFormatterBuilder()
+            .appendPattern("uuuu-MM-dd")
+            .toFormatter();
+    public static final DateTimeFormatter DATE_TIME_FORMATTER = new DateTimeFormatterBuilder()
+            .appendPattern("uuuu-MM-dd HH:mm:ss")
+            .toFormatter();
+    protected static final Logger LOG = LoggerFactory.getLogger(ColumnParser.class);
+
+    public static ColumnParser create(EtlJobConfig.EtlColumn etlColumn) throws SparkDppException {
+        String columnType = etlColumn.columnType;
+        if (columnType.equalsIgnoreCase("TINYINT")) {
+            return new TinyIntParser();
+        } else if (columnType.equalsIgnoreCase("SMALLINT")) {
+            return new SmallIntParser();
+        } else if (columnType.equalsIgnoreCase("INT")) {
+            return new IntParser();
+        } else if (columnType.equalsIgnoreCase("BIGINT")) {
+            return new BigIntParser();
+        } else if (columnType.equalsIgnoreCase("FLOAT")) {
+            return new FloatParser();
+        } else if (columnType.equalsIgnoreCase("DOUBLE")) {
+            return new DoubleParser();
+        } else if (columnType.equalsIgnoreCase("BOOLEAN")) {
+            return new BooleanParser();
+        } else if (columnType.equalsIgnoreCase("DATE")
+                || columnType.equalsIgnoreCase("DATEV2")) {
+            return new DateParser();
+        } else if (columnType.equalsIgnoreCase("DATETIME")
+                || columnType.equalsIgnoreCase("DATETIMEV2")) {
+            return new DatetimeParser();
+        } else if (columnType.equalsIgnoreCase("STRING")
+                || columnType.equalsIgnoreCase("TEXT")) {
+            return new StringTypeParser(etlColumn);
+        } else if (columnType.equalsIgnoreCase("VARCHAR")
+                || columnType.equalsIgnoreCase("CHAR")
+                || columnType.equalsIgnoreCase("BITMAP")
+                || columnType.equalsIgnoreCase("HLL")) {
+            return new StringParser(etlColumn);
+        } else if (columnType.equalsIgnoreCase("DECIMALV2")
+                || columnType.equalsIgnoreCase("DECIMAL32")
+                || columnType.equalsIgnoreCase("DECIMAL64")
+                || columnType.equalsIgnoreCase("DECIMAL128")) {
+            return new DecimalParser(etlColumn);
+        } else if (columnType.equalsIgnoreCase("LARGEINT")) {
+            return new LargeIntParser();
+        } else {
+            throw new SparkDppException("unsupported type:" + columnType);
+        }
+    }
+
+    public abstract boolean parse(String value);
+}
+
+class TinyIntParser extends ColumnParser {
+    @Override
+    public boolean parse(String value) {
+        try {
+            Byte.parseByte(value);
+        } catch (NumberFormatException e) {
+            return false;
+        }
+        return true;
+    }
+}
+
+class SmallIntParser extends ColumnParser {
+    @Override
+    public boolean parse(String value) {
+        try {
+            Short.parseShort(value);
+        } catch (NumberFormatException e) {
+            return false;
+        }
+        return true;
+    }
+}
+
+class IntParser extends ColumnParser {
+    @Override
+    public boolean parse(String value) {
+        try {
+            Integer.parseInt(value);
+        } catch (NumberFormatException e) {
+            return false;
+        }
+        return true;
+    }
+}
+
+class BigIntParser extends ColumnParser {
+    @Override
+    public boolean parse(String value) {
+        try {
+            Long.parseLong(value);
+        } catch (NumberFormatException e) {
+            return false;
+        }
+        return true;
+    }
+}
+
+class FloatParser extends ColumnParser {
+    @Override
+    public boolean parse(String value) {
+        try {
+            Float ret = Float.parseFloat(value);
+            return !ret.isNaN() && !ret.isInfinite();
+        } catch (NumberFormatException e) {
+            return false;
+        }
+    }
+}
+
+class DoubleParser extends ColumnParser {
+    @Override
+    public boolean parse(String value) {
+        try {
+            Double ret = Double.parseDouble(value);
+            return !ret.isInfinite() && !ret.isNaN();
+        } catch (NumberFormatException e) {
+            return false;
+        }
+    }
+}
+
+class BooleanParser extends ColumnParser {
+    @Override
+    public boolean parse(String value) {
+        if (value.equalsIgnoreCase("true")
+                || value.equalsIgnoreCase("false")
+                || value.equals("0") || value.equals("1")) {
+            return true;
+        }
+        return false;
+    }
+}
+
+class DateParser extends ColumnParser {
+    @Override
+    public boolean parse(String value) {
+        try {
+            DATE_FORMATTER.parse(value);
+        } catch (Exception e) {
+            return false;
+        }
+        return true;
+    }
+}
+
+class DatetimeParser extends ColumnParser {
+    @Override
+    public boolean parse(String value) {
+        try {
+            DATE_TIME_FORMATTER.parse(value);
+        } catch (Exception e) {
+            return false;
+        }
+        return true;
+    }
+}
+
+class StringParser extends ColumnParser {
+
+    private EtlJobConfig.EtlColumn etlColumn;
+
+    public StringParser(EtlJobConfig.EtlColumn etlColumn) {
+        this.etlColumn = etlColumn;
+    }
+
+    @Override
+    public boolean parse(String value) {
+        try {
+            return value.getBytes("UTF-8").length <= etlColumn.stringLength;
+        } catch (Exception e) {
+            throw new RuntimeException("string check failed ", e);
+        }
+    }
+}
+
+class StringTypeParser extends ColumnParser {
+
+    private EtlJobConfig.EtlColumn etlColumn;
+
+    public StringTypeParser(EtlJobConfig.EtlColumn etlColumn) {
+        this.etlColumn = etlColumn;
+    }
+
+    @Override
+    public boolean parse(String value) {
+        try {
+            return value.getBytes("UTF-8").length <= DppUtils.STRING_LENGTH_LIMIT;
+        } catch (Exception e) {
+            throw new RuntimeException("string check failed ", e);
+        }
+    }
+}
+
+
+class DecimalParser extends ColumnParser {
+
+    public static int PRECISION = 27;
+    public static int SCALE = 9;
+
+    private BigDecimal maxValue;
+    private BigDecimal minValue;
+
+    public DecimalParser(EtlJobConfig.EtlColumn etlColumn) {
+        StringBuilder precisionStr = new StringBuilder();
+        for (int i = 0; i < etlColumn.precision - etlColumn.scale; i++) {
+            precisionStr.append("9");
+        }
+        StringBuilder scaleStr = new StringBuilder();
+        for (int i = 0; i < etlColumn.scale; i++) {
+            scaleStr.append("9");
+        }
+        maxValue = new BigDecimal(precisionStr.toString() + "." + scaleStr.toString());
+        minValue = new BigDecimal("-" + precisionStr.toString() + "." + scaleStr.toString());
+    }
+
+    @Override
+    public boolean parse(String value) {
+        try {
+            BigDecimal bigDecimal = new BigDecimal(value);
+            return bigDecimal.precision() - bigDecimal.scale() <= PRECISION - SCALE && bigDecimal.scale() <= SCALE;
+        } catch (NumberFormatException e) {
+            return false;
+        } catch (Exception e) {
+            throw new RuntimeException("decimal parse failed ", e);
+        }
+    }
+
+    public BigDecimal getMaxValue() {
+        return maxValue;
+    }
+
+    public BigDecimal getMinValue() {
+        return minValue;
+    }
+}
+
+class LargeIntParser extends ColumnParser {
+
+    private BigInteger maxValue = new BigInteger("170141183460469231731687303715884105727");
+    private BigInteger minValue = new BigInteger("-170141183460469231731687303715884105728");
+
+    @Override
+    public boolean parse(String value) {
+        try {
+            BigInteger inputValue = new BigInteger(value);
+            return inputValue.compareTo(maxValue) < 0 && inputValue.compareTo(minValue) > 0;
+        } catch (NumberFormatException e) {
+            return false;
+        } catch (ArithmeticException e) {
+            LOG.warn("int value is too big even for java BigInteger,value={}" + value);
+            return false;
+        } catch (Exception e) {
+            throw new RuntimeException("large int parse failed:" + value, e);
+        }
+    }
+}
diff --git a/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/DorisKryoRegistrator.java b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/DorisKryoRegistrator.java
new file mode 100644
index 00000000..c873f5af
--- /dev/null
+++ b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/DorisKryoRegistrator.java
@@ -0,0 +1,36 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.loadv2.dpp;
+
+import org.apache.doris.common.io.BitmapValue;
+import org.apache.doris.common.io.Roaring64Map;
+
+import com.esotericsoftware.kryo.Kryo;
+import org.apache.spark.serializer.KryoRegistrator;
+
+/**
+ * register etl classes with Kryo when using Kryo serialization.
+ */
+public class DorisKryoRegistrator implements KryoRegistrator {
+
+    @Override
+    public void registerClasses(Kryo kryo) {
+        kryo.register(Roaring64Map.class);
+        kryo.register(BitmapValue.class);
+    }
+}
diff --git a/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/DorisRangePartitioner.java b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/DorisRangePartitioner.java
new file mode 100644
index 00000000..9fd413db
--- /dev/null
+++ b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/DorisRangePartitioner.java
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.loadv2.dpp;
+
+import org.apache.doris.config.EtlJobConfig;
+
+import org.apache.spark.Partitioner;
+
+import java.io.Serializable;
+import java.util.List;
+
+public class DorisRangePartitioner extends Partitioner {
+    private static final String UNPARTITIONED_TYPE = "UNPARTITIONED";
+    private EtlJobConfig.EtlPartitionInfo partitionInfo;
+    private List<PartitionRangeKey> partitionRangeKeys;
+    List<Integer> partitionKeyIndexes;
+
+    public DorisRangePartitioner(EtlJobConfig.EtlPartitionInfo partitionInfo,
+                                 List<Integer> partitionKeyIndexes,
+                                 List<PartitionRangeKey> partitionRangeKeys) {
+        this.partitionInfo = partitionInfo;
+        this.partitionKeyIndexes = partitionKeyIndexes;
+        this.partitionRangeKeys = partitionRangeKeys;
+    }
+
+    public int numPartitions() {
+        if (partitionInfo == null) {
+            return 0;
+        }
+        if (partitionInfo.partitionType.equalsIgnoreCase(UNPARTITIONED_TYPE)) {
+            return 1;
+        }
+        return partitionInfo.partitions.size();
+    }
+
+    public int getPartition(Object var1) {
+        if (partitionInfo.partitionType != null
+                && partitionInfo.partitionType.equalsIgnoreCase(UNPARTITIONED_TYPE)) {
+            return 0;
+        }
+        DppColumns key = (DppColumns) var1;
+        // get the partition columns from key as partition key
+        DppColumns partitionKey = new DppColumns(key, partitionKeyIndexes);
+        // TODO: optimize this by use binary search
+        for (int i = 0; i < partitionRangeKeys.size(); ++i) {
+            if (partitionRangeKeys.get(i).isRowContained(partitionKey)) {
+                return i;
+            }
+        }
+        return -1;
+    }
+
+    public static class PartitionRangeKey implements Serializable {
+        public boolean isMaxPartition;
+        public DppColumns startKeys;
+        public DppColumns endKeys;
+
+        public boolean isRowContained(DppColumns row) {
+            if (isMaxPartition) {
+                return startKeys.compareTo(row) <= 0;
+            } else {
+                return startKeys.compareTo(row) <= 0 && endKeys.compareTo(row) > 0;
+            }
+        }
+
+        public String toString() {
+            return "PartitionRangeKey{"
+                    +  "isMaxPartition=" + isMaxPartition
+                    +  ", startKeys=" + startKeys
+                    +  ", endKeys=" + endKeys
+                    + '}';
+        }
+    }
+}
diff --git a/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/DppColumns.java b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/DppColumns.java
new file mode 100644
index 00000000..5b5e3f5d
--- /dev/null
+++ b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/DppColumns.java
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.loadv2.dpp;
+
+import com.google.common.base.Preconditions;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+import java.util.Objects;
+
+// DppColumns is used to store the
+class DppColumns implements Comparable<DppColumns>, Serializable {
+    public List<Object> columns = new ArrayList<Object>();
+
+    public DppColumns(List<Object> keys) {
+        this.columns = keys;
+    }
+
+    public DppColumns(DppColumns key, List<Integer> indexes) {
+        for (int i = 0; i < indexes.size(); ++i) {
+            columns.add(key.columns.get(indexes.get(i)));
+        }
+    }
+
+    @Override
+    public int compareTo(DppColumns other) {
+        Preconditions.checkState(columns.size() == other.columns.size());
+
+        int cmp = 0;
+        for (int i = 0; i < columns.size(); i++) {
+            Object columnObj = columns.get(i);
+            Object otherColumn = other.columns.get(i);
+            if (columnObj == null && otherColumn == null) {
+                return 0;
+            } else if (columnObj == null || otherColumn == null) {
+                if (columnObj == null) {
+                    return -1;
+                } else {
+                    return 1;
+                }
+            }
+            if (columns.get(i) instanceof Integer) {
+                cmp = ((Integer) (columns.get(i))).compareTo((Integer) (other.columns.get(i)));
+            } else if (columns.get(i) instanceof Long) {
+                cmp = ((Long) (columns.get(i))).compareTo((Long) (other.columns.get(i)));
+            }  else if (columns.get(i) instanceof  Boolean) {
+                cmp = ((Boolean) (columns.get(i))).compareTo((Boolean) (other.columns.get(i)));
+            } else if (columns.get(i) instanceof  Short) {
+                cmp = ((Short) (columns.get(i))).compareTo((Short) (other.columns.get(i)));
+            } else if (columns.get(i) instanceof  Float) {
+                cmp = ((Float) (columns.get(i))).compareTo((Float) (other.columns.get(i)));
+            } else if (columns.get(i) instanceof Double) {
+                cmp = ((Double) (columns.get(i))).compareTo((Double) (other.columns.get(i)));
+            } else if (columns.get(i) instanceof Date) {
+                cmp = ((Date) (columns.get(i))).compareTo((Date) (other.columns.get(i)));
+            } else if (columns.get(i) instanceof java.sql.Timestamp) {
+                cmp = ((java.sql.Timestamp) columns.get(i)).compareTo((java.sql.Timestamp) other.columns.get(i));
+            } else {
+                cmp = ((String) (columns.get(i))).compareTo((String) (other.columns.get(i)));
+            }
+            if (cmp != 0) {
+                return cmp;
+            }
+        }
+        return cmp;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) {
+            return true;
+        }
+        if (o == null || getClass() != o.getClass()) {
+            return false;
+        }
+        DppColumns dppColumns = (DppColumns) o;
+        return Objects.equals(columns, dppColumns.columns);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(columns);
+    }
+
+    @Override
+    public String toString() {
+        return "dppColumns{"
+                + "columns=" + columns
+                + '}';
+    }
+}
diff --git a/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/DppUtils.java b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/DppUtils.java
new file mode 100644
index 00000000..bf190408
--- /dev/null
+++ b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/DppUtils.java
@@ -0,0 +1,299 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.loadv2.dpp;
+
+import org.apache.doris.common.SparkDppException;
+import org.apache.doris.config.EtlJobConfig;
+
+import com.google.common.collect.Lists;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.DecimalType;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.List;
+import java.util.Set;
+import java.util.zip.CRC32;
+
+public class DppUtils {
+    public static final String BUCKET_ID = "__bucketId__";
+
+    public static final int STRING_LENGTH_LIMIT = 1048576;
+
+    public static Class getClassFromDataType(DataType dataType) {
+        if (dataType == null) {
+            return null;
+        }
+        if (dataType.equals(DataTypes.BooleanType)) {
+            return Boolean.class;
+        } else if (dataType.equals(DataTypes.ShortType)) {
+            return Short.class;
+        } else if (dataType.equals(DataTypes.IntegerType)) {
+            return Integer.class;
+        } else if (dataType.equals(DataTypes.LongType)) {
+            return Long.class;
+        } else if (dataType.equals(DataTypes.FloatType)) {
+            return Float.class;
+        } else if (dataType.equals(DataTypes.DoubleType)) {
+            return Double.class;
+        } else if (dataType.equals(DataTypes.DateType)) {
+            return Date.class;
+        } else if (dataType.equals(DataTypes.StringType)) {
+            return String.class;
+        } else if (dataType instanceof DecimalType) {
+            DecimalType decimalType = (DecimalType) dataType;
+            return BigDecimal.valueOf(decimalType.precision(), decimalType.scale()).getClass();
+        } else if (dataType.equals(DataTypes.TimestampType)) {
+            return Long.class;
+        }
+        return null;
+    }
+
+    public static Class getClassFromColumn(EtlJobConfig.EtlColumn column) throws SparkDppException {
+        switch (column.columnType) {
+            case "BOOLEAN":
+                return Boolean.class;
+            case "TINYINT":
+            case "SMALLINT":
+                return Short.class;
+            case "INT":
+                return Integer.class;
+            case "DATETIME":
+            case "DATETIMEV2":
+                return java.sql.Timestamp.class;
+            case "BIGINT":
+                return Long.class;
+            case "LARGEINT":
+                throw new SparkDppException("LARGEINT is not supported now");
+            case "FLOAT":
+                return Float.class;
+            case "DOUBLE":
+                return Double.class;
+            case "DATE":
+            case "DATEV2":
+                return Date.class;
+            case "HLL":
+            case "CHAR":
+            case "VARCHAR":
+            case "STRING":
+            case "TEXT":
+            case "BITMAP":
+            case "OBJECT":
+                return String.class;
+            case "DECIMALV2":
+            case "DECIMAL32":
+            case "DECIMAL64":
+            case "DECIMAL128":
+                return BigDecimal.valueOf(column.precision, column.scale).getClass();
+            default:
+                return String.class;
+        }
+    }
+
+    public static DataType getDataTypeFromColumn(EtlJobConfig.EtlColumn column, boolean regardDistinctColumnAsBinary) {
+        DataType dataType = DataTypes.StringType;
+        switch (column.columnType) {
+            case "BOOLEAN":
+                dataType = DataTypes.StringType;
+                break;
+            case "TINYINT":
+                dataType = DataTypes.ByteType;
+                break;
+            case "SMALLINT":
+                dataType = DataTypes.ShortType;
+                break;
+            case "INT":
+                dataType = DataTypes.IntegerType;
+                break;
+            case "DATETIME":
+            case "DATETIMEV2":
+                dataType = DataTypes.TimestampType;
+                break;
+            case "BIGINT":
+                dataType = DataTypes.LongType;
+                break;
+            case "LARGEINT":
+                dataType = DataTypes.StringType;
+                break;
+            case "FLOAT":
+                dataType = DataTypes.FloatType;
+                break;
+            case "DOUBLE":
+                dataType = DataTypes.DoubleType;
+                break;
+            case "DATE":
+            case "DATEV2":
+                dataType = DataTypes.DateType;
+                break;
+            case "CHAR":
+            case "VARCHAR":
+            case "STRING":
+            case "TEXT":
+            case "OBJECT":
+                dataType = DataTypes.StringType;
+                break;
+            case "HLL":
+            case "BITMAP":
+                dataType = regardDistinctColumnAsBinary ? DataTypes.BinaryType : DataTypes.StringType;
+                break;
+            case "DECIMALV2":
+            case "DECIMAL32":
+            case "DECIMAL64":
+            case "DECIMAL128":
+                dataType = DecimalType.apply(column.precision, column.scale);
+                break;
+            default:
+                throw new RuntimeException("Reason: invalid column type:" + column);
+        }
+        return dataType;
+    }
+
+    public static ByteBuffer getHashValue(Object o, DataType type) {
+        ByteBuffer buffer = ByteBuffer.allocate(8);
+        buffer.order(ByteOrder.LITTLE_ENDIAN);
+        if (o == null) {
+            buffer.putInt(0);
+            return buffer;
+        }
+        if (type.equals(DataTypes.ByteType)) {
+            buffer.put((byte) o);
+        } else if (type.equals(DataTypes.ShortType)) {
+            buffer.putShort((Short) o);
+        } else if (type.equals(DataTypes.IntegerType)) {
+            buffer.putInt((Integer) o);
+        } else if (type.equals(DataTypes.LongType)) {
+            buffer.putLong((Long) o);
+        } else if (type.equals(DataTypes.StringType)) {
+            try {
+                String str = String.valueOf(o);
+                buffer = ByteBuffer.wrap(str.getBytes("UTF-8"));
+            } catch (Exception e) {
+                throw new RuntimeException(e);
+            }
+        } else if (type.equals(DataTypes.BooleanType)) {
+            Boolean b = (Boolean) o;
+            byte value = (byte) (b ? 1 : 0);
+            buffer.put(value);
+        }
+        // do not flip buffer when the buffer was created by wrap()
+        if (!type.equals(DataTypes.StringType)) {
+            buffer.flip();
+        }
+        return buffer;
+    }
+
+    public static long getHashValue(Row row, List<String> distributeColumns, StructType dstTableSchema) {
+        CRC32 hashValue = new CRC32();
+        for (String distColumn : distributeColumns) {
+            Object columnObject = row.get(row.fieldIndex(distColumn));
+            ByteBuffer buffer = getHashValue(columnObject, dstTableSchema.apply(distColumn).dataType());
+            hashValue.update(buffer.array(), 0, buffer.limit());
+        }
+        return hashValue.getValue();
+    }
+
+    public static StructType replaceBinaryColsInSchema(Set<String> binaryColumns, StructType dstSchema) {
+        List<StructField> fields = new ArrayList<>();
+        for (StructField originField : dstSchema.fields()) {
+            if (binaryColumns.contains(originField.name())) {
+                fields.add(DataTypes.createStructField(originField.name(),
+                        DataTypes.BinaryType, originField.nullable()));
+            } else {
+                fields.add(DataTypes.createStructField(originField.name(),
+                        originField.dataType(), originField.nullable()));
+            }
+        }
+        StructType ret = DataTypes.createStructType(fields);
+        return ret;
+    }
+
+    public static StructType createDstTableSchema(List<EtlJobConfig.EtlColumn> columns,
+                                                  boolean addBucketIdColumn, boolean regardDistinctColumnAsBinary) {
+        List<StructField> fields = new ArrayList<>();
+        if (addBucketIdColumn) {
+            StructField bucketIdField = DataTypes.createStructField(BUCKET_ID, DataTypes.StringType, true);
+            fields.add(bucketIdField);
+        }
+        for (EtlJobConfig.EtlColumn column : columns) {
+            DataType structColumnType = getDataTypeFromColumn(column, regardDistinctColumnAsBinary);
+            StructField field = DataTypes.createStructField(column.columnName, structColumnType, column.isAllowNull);
+            fields.add(field);
+        }
+        StructType dstSchema = DataTypes.createStructType(fields);
+        return dstSchema;
+    }
+
+    public static List<String> parseColumnsFromPath(String filePath, List<String> columnsFromPath)
+            throws SparkDppException {
+        if (columnsFromPath == null || columnsFromPath.isEmpty()) {
+            return Collections.emptyList();
+        }
+        String[] strings = filePath.split("/");
+        if (strings.length < 2) {
+            System.err.println("Fail to parse columnsFromPath, expected: " + columnsFromPath
+                    + ", filePath: " + filePath);
+            throw new SparkDppException("Reason: Fail to parse columnsFromPath, expected: "
+                    + columnsFromPath + ", filePath: " + filePath);
+        }
+        String[] columns = new String[columnsFromPath.size()];
+        int size = 0;
+        for (int i = strings.length - 2; i >= 0; i--) {
+            String str = strings[i];
+            if (str != null && str.isEmpty()) {
+                continue;
+            }
+            if (str == null || !str.contains("=")) {
+                System.err.println("Fail to parse columnsFromPath, expected: " + columnsFromPath
+                        + ", filePath: " + filePath);
+                throw new SparkDppException("Reason: Fail to parse columnsFromPath, expected: "
+                        + columnsFromPath + ", filePath: " + filePath);
+            }
+            String[] pair = str.split("=", 2);
+            if (pair.length != 2) {
+                System.err.println("Fail to parse columnsFromPath, expected: " + columnsFromPath
+                        + ", filePath: " + filePath);
+                throw new SparkDppException("Reason: Fail to parse columnsFromPath, expected: "
+                        + columnsFromPath + ", filePath: " + filePath);
+            }
+            int index = columnsFromPath.indexOf(pair[0]);
+            if (index == -1) {
+                continue;
+            }
+            columns[index] = pair[1];
+            size++;
+            if (size >= columnsFromPath.size()) {
+                break;
+            }
+        }
+        if (size != columnsFromPath.size()) {
+            System.err.println("Fail to parse columnsFromPath, expected: " + columnsFromPath
+                    + ", filePath: " + filePath);
+            throw new SparkDppException("Reason: Fail to parse columnsFromPath, expected: "
+                    + columnsFromPath + ", filePath: " + filePath);
+        }
+        return Lists.newArrayList(columns);
+    }
+}
diff --git a/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/GlobalDictBuilder.java b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/GlobalDictBuilder.java
new file mode 100644
index 00000000..e19cfae8
--- /dev/null
+++ b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/GlobalDictBuilder.java
@@ -0,0 +1,432 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.loadv2.dpp;
+
+import org.apache.commons.collections.map.MultiValueMap;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.spark.sql.AnalysisException;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.catalog.Column;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.stream.Collectors;
+
+/**
+ *  used for build hive global dict and encode source hive table
+ *
+ *  input: a source hive table
+ *  output: a intermediate hive table whose distinct column is encode with int value
+ *
+ *  usage example
+ *  step1,create a intermediate hive table
+ *      GlobalDictBuilder.createHiveIntermediateTable()
+ *  step2, get distinct column's value
+ *      GlobalDictBuilder.extractDistinctColumn()
+ *  step3, build global dict
+ *      GlobalDictBuilder.buildGlobalDict()
+ *  step4, encode intermediate hive table with global dict
+ *      GlobalDictBuilder.encodeDorisIntermediateHiveTable()
+ */
+
+public class GlobalDictBuilder {
+
+    protected static final Logger LOG = LoggerFactory.getLogger(GlobalDictBuilder.class);
+
+    // name of the column in doris table which need to build global dict
+    // for example: some dict columns a,b,c
+    // case 1: all dict columns has no relation, then the map is as below
+    //     [a=null, b=null, c=null]
+    // case 2: column a's value can reuse column b's value which means column a's value is a subset of column b's value
+    //  [b=a,c=null]
+    private MultiValueMap dictColumn;
+    // target doris table columns in current spark load job
+    private List<String> dorisOlapTableColumnList;
+
+    // distinct columns which need to use map join to solve data skew in encodeDorisIntermediateHiveTable()
+    // we needn't to specify it until data skew happends
+    private List<String> mapSideJoinColumns;
+
+    // hive table datasource,format is db.table
+    private String sourceHiveDBTableName;
+    // user-specified filter when query sourceHiveDBTable
+    private String sourceHiveFilter;
+    // intermediate hive table to store the distinct value of distinct column
+    private String distinctKeyTableName;
+    // current doris table's global dict hive table
+    private String globalDictTableName;
+
+    // used for next step to read
+    private String dorisIntermediateHiveTable;
+    private SparkSession spark;
+
+    // key=doris column name,value=column type
+    private Map<String, String> dorisColumnNameTypeMap = new HashMap<>();
+
+    // column in this list means need split distinct value and then encode respectively
+    // to avoid the performance bottleneck to transfer origin value to dict value
+    private List<String> veryHighCardinalityColumn;
+    // determine the split num of new distinct value,better can be divisible by 1
+    private int veryHighCardinalityColumnSplitNum;
+
+    private ExecutorService pool;
+
+    private StructType distinctValueSchema;
+
+    public GlobalDictBuilder(MultiValueMap dictColumn,
+                             List<String> dorisOlapTableColumnList,
+                             List<String> mapSideJoinColumns,
+                             String sourceHiveDBTableName,
+                             String sourceHiveFilter,
+                             String dorisHiveDB,
+                             String distinctKeyTableName,
+                             String globalDictTableName,
+                             String dorisIntermediateHiveTable,
+                             int buildConcurrency,
+                             List<String> veryHighCardinalityColumn,
+                             int veryHighCardinalityColumnSplitNum,
+                             SparkSession spark) {
+        this.dictColumn = dictColumn;
+        this.dorisOlapTableColumnList = dorisOlapTableColumnList;
+        this.mapSideJoinColumns = mapSideJoinColumns;
+        this.sourceHiveDBTableName = sourceHiveDBTableName;
+        this.sourceHiveFilter = sourceHiveFilter;
+        this.distinctKeyTableName = distinctKeyTableName;
+        this.globalDictTableName = globalDictTableName;
+        this.dorisIntermediateHiveTable = dorisIntermediateHiveTable;
+        this.spark = spark;
+        this.pool = Executors.newFixedThreadPool(buildConcurrency < 0 ? 1 : buildConcurrency);
+        this.veryHighCardinalityColumn = veryHighCardinalityColumn;
+        this.veryHighCardinalityColumnSplitNum = veryHighCardinalityColumnSplitNum;
+
+        spark.sql("use " + dorisHiveDB);
+    }
+
+    public void createHiveIntermediateTable() throws AnalysisException {
+        Map<String, String> sourceHiveTableColumn = spark.catalog()
+                .listColumns(sourceHiveDBTableName)
+                .collectAsList()
+                .stream().collect(Collectors.toMap(Column::name, Column::dataType));
+
+        Map<String, String> sourceHiveTableColumnInLowercase = new HashMap<>();
+        for (Map.Entry<String, String> entry : sourceHiveTableColumn.entrySet()) {
+            sourceHiveTableColumnInLowercase.put(entry.getKey().toLowerCase(), entry.getValue().toLowerCase());
+        }
+
+        // check and get doris column type in hive
+        dorisOlapTableColumnList.stream().map(String::toLowerCase).forEach(columnName -> {
+            String columnType = sourceHiveTableColumnInLowercase.get(columnName);
+            if (StringUtils.isEmpty(columnType)) {
+                throw new RuntimeException(String.format("doris column %s not in source hive table", columnName));
+            }
+            dorisColumnNameTypeMap.put(columnName, columnType);
+        });
+
+        spark.sql(String.format("drop table if exists %s ", dorisIntermediateHiveTable));
+        // create IntermediateHiveTable
+        spark.sql(getCreateIntermediateHiveTableSql());
+
+        // insert data to IntermediateHiveTable
+        spark.sql(getInsertIntermediateHiveTableSql());
+    }
+
+    public void extractDistinctColumn() {
+        // create distinct tables
+        spark.sql(getCreateDistinctKeyTableSql());
+
+        // extract distinct column
+        List<GlobalDictBuildWorker> workerList = new ArrayList<>();
+        // For the column in dictColumns's valueSet, their value is a subset of column in keyset,
+        // so we don't need to extract distinct value of column in valueSet
+        for (Object column : dictColumn.keySet()) {
+            workerList.add(
+                    () -> spark.sql(getInsertDistinctKeyTableSql(column.toString(), dorisIntermediateHiveTable)));
+        }
+
+        submitWorker(workerList);
+    }
+
+    public void buildGlobalDict() throws ExecutionException, InterruptedException {
+        // create global dict hive table
+        spark.sql(getCreateGlobalDictHiveTableSql());
+
+        List<GlobalDictBuildWorker> globalDictBuildWorkers = new ArrayList<>();
+        for (Object distinctColumnNameOrigin : dictColumn.keySet()) {
+            String distinctColumnNameTmp = distinctColumnNameOrigin.toString();
+            globalDictBuildWorkers.add(() -> {
+                // get global dict max value
+                List<Row> maxGlobalDictValueRow
+                        = spark.sql(getMaxGlobalDictValueSql(distinctColumnNameTmp)).collectAsList();
+                if (maxGlobalDictValueRow.size() == 0) {
+                    throw new RuntimeException(String.format("get max dict value failed: %s", distinctColumnNameTmp));
+                }
+
+                long maxDictValue = 0;
+                long minDictValue = 0;
+                Row row = maxGlobalDictValueRow.get(0);
+                if (row != null && row.get(0) != null) {
+                    maxDictValue = (long) row.get(0);
+                    minDictValue = (long) row.get(1);
+                }
+                LOG.info(" column " + distinctColumnNameTmp + " 's max value in dict is "
+                        + maxDictValue + ", min value is " + minDictValue);
+                // maybe never happened, but we need detect it
+                if (minDictValue < 0) {
+                    throw new RuntimeException(String.format(" column %s 's cardinality has exceed bigint's max value",
+                            distinctColumnNameTmp));
+                }
+
+                if (veryHighCardinalityColumn.contains(distinctColumnNameTmp)
+                        && veryHighCardinalityColumnSplitNum > 1) {
+                    // split distinct key first and then encode with count
+                    buildGlobalDictBySplit(maxDictValue, distinctColumnNameTmp);
+                } else {
+                    // build global dict directly
+                    spark.sql(getBuildGlobalDictSql(maxDictValue, distinctColumnNameTmp));
+                }
+
+            });
+        }
+        submitWorker(globalDictBuildWorkers);
+    }
+
+    // encode dorisIntermediateHiveTable's distinct column
+    public void encodeDorisIntermediateHiveTable() {
+        for (Object distinctColumnObj : dictColumn.keySet()) {
+            spark.sql(getEncodeDorisIntermediateHiveTableSql(distinctColumnObj.toString(),
+                    (ArrayList) dictColumn.get(distinctColumnObj.toString())));
+        }
+    }
+
+    private String getCreateIntermediateHiveTableSql() {
+        StringBuilder sql = new StringBuilder();
+        sql.append("create table if not exists ").append(dorisIntermediateHiveTable).append(" ( ");
+
+        Set<String> allDictColumn = new HashSet<>();
+        allDictColumn.addAll(dictColumn.keySet());
+        allDictColumn.addAll(dictColumn.values());
+        dorisOlapTableColumnList.forEach(columnName -> {
+            sql.append(columnName).append(" ");
+            if (allDictColumn.contains(columnName)) {
+                sql.append(" string ,");
+            } else {
+                sql.append(dorisColumnNameTypeMap.get(columnName)).append(" ,");
+            }
+        });
+        return sql.deleteCharAt(sql.length() - 1).append(" )").append(" stored as sequencefile ").toString();
+    }
+
+    private String getInsertIntermediateHiveTableSql() {
+        StringBuilder sql = new StringBuilder();
+        sql.append("insert overwrite table ").append(dorisIntermediateHiveTable).append(" select ");
+        dorisOlapTableColumnList.forEach(columnName -> {
+            sql.append(columnName).append(" ,");
+        });
+        sql.deleteCharAt(sql.length() - 1)
+                .append(" from ").append(sourceHiveDBTableName);
+        if (!StringUtils.isEmpty(sourceHiveFilter)) {
+            sql.append(" where ").append(sourceHiveFilter);
+        }
+        return sql.toString();
+    }
+
+    private String getCreateDistinctKeyTableSql() {
+        return "create table if not exists " + distinctKeyTableName
+                + "(dict_key string) partitioned by (dict_column string) stored as sequencefile ";
+    }
+
+    private String getInsertDistinctKeyTableSql(String distinctColumnName, String sourceHiveTable) {
+        StringBuilder sql = new StringBuilder();
+        sql.append("insert overwrite table ").append(distinctKeyTableName)
+                .append(" partition(dict_column='").append(distinctColumnName).append("')")
+                .append(" select ").append(distinctColumnName)
+                .append(" from ").append(sourceHiveTable)
+                .append(" group by ").append(distinctColumnName);
+        return sql.toString();
+    }
+
+    private String getCreateGlobalDictHiveTableSql() {
+        return "create table if not exists " + globalDictTableName
+                + "(dict_key string, dict_value bigint) partitioned by(dict_column string) stored as sequencefile ";
+    }
+
+    private String getMaxGlobalDictValueSql(String distinctColumnName) {
+        return "select max(dict_value) as max_value,min(dict_value) as min_value from "
+                + globalDictTableName + " where dict_column='" + distinctColumnName + "'";
+    }
+
+    private void buildGlobalDictBySplit(long maxGlobalDictValue, String distinctColumnName) {
+        // 1. get distinct value
+        Dataset<Row> newDistinctValue = spark.sql(getNewDistinctValue(distinctColumnName));
+
+        // 2. split the newDistinctValue to avoid window functions' single node bottleneck
+        Dataset<Row>[] splitedDistinctValue = newDistinctValue.randomSplit(getRandomSplitWeights());
+        long currentMaxDictValue = maxGlobalDictValue;
+        Map<String, Long> distinctKeyMap = new HashMap<>();
+
+        for (int i = 0; i < splitedDistinctValue.length; i++) {
+            long currentDatasetStartDictValue = currentMaxDictValue;
+            long splitDistinctValueCount = splitedDistinctValue[i].count();
+            currentMaxDictValue += splitDistinctValueCount;
+            String tmpDictTableName = String.format("%s_%s_tmp_dict_%s", i,
+                    currentDatasetStartDictValue, distinctColumnName);
+            distinctKeyMap.put(tmpDictTableName, currentDatasetStartDictValue);
+            Dataset<Row> distinctValueFrame = spark.createDataFrame(
+                    splitedDistinctValue[i].toJavaRDD(), getDistinctValueSchema());
+            distinctValueFrame.createOrReplaceTempView(tmpDictTableName);
+        }
+
+        spark.sql(getSplitBuildGlobalDictSql(distinctKeyMap, distinctColumnName));
+
+    }
+
+    private String getSplitBuildGlobalDictSql(Map<String, Long> distinctKeyMap, String distinctColumnName) {
+        StringBuilder sql = new StringBuilder();
+        sql.append("insert overwrite table ").append(globalDictTableName)
+                .append(" partition(dict_column='").append(distinctColumnName).append("') ")
+                .append(" select dict_key,dict_value from ").append(globalDictTableName)
+                .append(" where dict_column='").append(distinctColumnName).append("' ");
+        for (Map.Entry<String, Long> entry : distinctKeyMap.entrySet()) {
+            sql.append(" union all select dict_key, CAST((row_number() over(order by dict_key)) as BIGINT) ")
+                    .append(String.format("+ CAST(%s as BIGINT) as dict_value from %s",
+                            entry.getValue(), entry.getKey()));
+        }
+        return sql.toString();
+    }
+
+    private StructType getDistinctValueSchema() {
+        if (distinctValueSchema == null) {
+            List<StructField> fieldList = new ArrayList<>();
+            fieldList.add(DataTypes.createStructField("dict_key", DataTypes.StringType, false));
+            distinctValueSchema = DataTypes.createStructType(fieldList);
+        }
+        return distinctValueSchema;
+    }
+
+    private double[] getRandomSplitWeights() {
+        double[] weights = new double[veryHighCardinalityColumnSplitNum];
+        double weight = 1 / Double.parseDouble(String.valueOf(veryHighCardinalityColumnSplitNum));
+        Arrays.fill(weights, weight);
+        return weights;
+    }
+
+    private String getBuildGlobalDictSql(long maxGlobalDictValue, String distinctColumnName) {
+        return "insert overwrite table " + globalDictTableName + " partition(dict_column='" + distinctColumnName + "') "
+                + " select dict_key,dict_value from " + globalDictTableName
+                + " where dict_column='" + distinctColumnName + "' "
+                + " union all select t1.dict_key as dict_key,"
+                + "CAST((row_number() over(order by t1.dict_key)) as BIGINT) + "
+                + "CAST(" + maxGlobalDictValue + " as BIGINT) as dict_value from "
+                + "(select dict_key from " + distinctKeyTableName
+                + " where dict_column='" + distinctColumnName + "' and dict_key is not null)t1 left join "
+                + " (select dict_key,dict_value from " + globalDictTableName
+                + " where dict_column='" + distinctColumnName + "' )t2 "
+                + "on t1.dict_key = t2.dict_key where t2.dict_value is null";
+    }
+
+    private String getNewDistinctValue(String distinctColumnName) {
+        return  "select t1.dict_key from "
+                + " (select dict_key from " + distinctKeyTableName
+                + " where dict_column='" + distinctColumnName
+                + "' and dict_key is not null)t1 left join "
+                + " (select dict_key,dict_value from " + globalDictTableName
+                + " where dict_column='" + distinctColumnName + "' )t2 "
+                + "on t1.dict_key = t2.dict_key where t2.dict_value is null";
+
+    }
+
+    private String getEncodeDorisIntermediateHiveTableSql(String dictColumn, List<String> childColumn) {
+        StringBuilder sql = new StringBuilder();
+        sql.append("insert overwrite table ").append(dorisIntermediateHiveTable).append(" select ");
+        // using map join to solve distinct column data skew
+        // here is a spark sql hint
+        if (mapSideJoinColumns.size() != 0 && mapSideJoinColumns.contains(dictColumn)) {
+            sql.append(" /*+ BROADCAST (t) */ ");
+        }
+        dorisOlapTableColumnList.forEach(columnName -> {
+            if (dictColumn.equals(columnName)) {
+                sql.append("t.dict_value").append(" ,");
+                // means the dictColumn is reused
+            } else if (childColumn != null && childColumn.contains(columnName)) {
+                sql.append(String.format(" if(%s is null, null, t.dict_value) ", columnName)).append(" ,");
+            } else {
+                sql.append(dorisIntermediateHiveTable).append(".").append(columnName).append(" ,");
+            }
+        });
+        sql.deleteCharAt(sql.length() - 1)
+                .append(" from ")
+                .append(dorisIntermediateHiveTable)
+                .append(" LEFT OUTER JOIN ( select dict_key,dict_value from ").append(globalDictTableName)
+                .append(" where dict_column='").append(dictColumn).append("' ) t on ")
+                .append(dorisIntermediateHiveTable).append(".").append(dictColumn)
+                .append(" = t.dict_key ");
+        return sql.toString();
+    }
+
+    private void submitWorker(List<GlobalDictBuildWorker> workerList) {
+        try {
+            List<Future<Boolean>> futureList = new ArrayList<>();
+            for (GlobalDictBuildWorker globalDictBuildWorker : workerList) {
+                futureList.add(pool.submit(new Callable<Boolean>() {
+                    @Override
+                    public Boolean call() throws Exception {
+                        try {
+                            globalDictBuildWorker.work();
+                            return true;
+                        } catch (Exception e) {
+                            LOG.error("BuildGlobalDict failed", e);
+                            return false;
+                        }
+                    }
+                }));
+            }
+
+            LOG.info("begin to fetch worker result");
+            for (Future<Boolean> future : futureList) {
+                if (!future.get()) {
+                    throw new RuntimeException("detect one worker failed");
+                }
+            }
+            LOG.info("fetch worker result complete");
+        } catch (Exception e) {
+            LOG.error("submit worker failed", e);
+            throw new RuntimeException("submit worker failed", e);
+        }
+    }
+
+    private interface GlobalDictBuildWorker {
+        void work();
+    }
+}
diff --git a/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/MinimumCoverageRollupTreeBuilder.java b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/MinimumCoverageRollupTreeBuilder.java
new file mode 100644
index 00000000..ca89ab8d
--- /dev/null
+++ b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/MinimumCoverageRollupTreeBuilder.java
@@ -0,0 +1,127 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.loadv2.dpp;
+
+import org.apache.doris.config.EtlJobConfig;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+// Build RollupTree by using minimum coverage strategy,
+// which is to find the index with the minimum columns that
+// has all columns of rollup index as parent index node.
+// Eg:
+// There are three indexes:
+//   index1(c1, c2, c3, c4, c5)
+//   index2(c1, c2, c4)
+//   index3(c1, c2)
+//   index4(c3, c4)
+//   index5(c1, c2, c5)
+// then the result tree is:
+//          index1
+//      |     \      \
+//  index2  index4   index5
+//    |
+//  index3
+// Now, if there are more than one indexes meet the column coverage requirement,
+// have the same column size(eg: index2 vs index5), child rollup is preferred
+// builded from the front index(eg: index3 is the child of index2). This can be
+// further optimized based on the row number of the index.
+public class MinimumCoverageRollupTreeBuilder implements RollupTreeBuilder {
+    public RollupTreeNode build(EtlJobConfig.EtlTable tableMeta) {
+        List<EtlJobConfig.EtlIndex> indexes = tableMeta.indexes;
+        List<EtlJobConfig.EtlIndex> indexMetas = new ArrayList<>();
+        EtlJobConfig.EtlIndex baseIndex = null;
+        for (EtlJobConfig.EtlIndex indexMeta : indexes) {
+            if (indexMeta.isBaseIndex) {
+                baseIndex = indexMeta;
+                continue;
+            }
+            indexMetas.add(indexMeta);
+        }
+        List<EtlJobConfig.EtlColumn> baseIndexColumns = baseIndex.columns;
+        List<String> baseKeyColumns = new ArrayList<>();
+        List<String> baseValueColumns = new ArrayList<>();
+        for (EtlJobConfig.EtlColumn columnMeta : baseIndexColumns) {
+            if (columnMeta.isKey) {
+                baseKeyColumns.add(columnMeta.columnName);
+            } else {
+                baseValueColumns.add(columnMeta.columnName);
+            }
+        }
+        RollupTreeNode root = new RollupTreeNode();
+        root.parent = null;
+        root.keyColumnNames = baseKeyColumns;
+        root.valueColumnNames = baseValueColumns;
+        root.indexId = baseIndex.indexId;
+        root.indexMeta = baseIndex;
+
+        // sort the index metas to make sure the column number decrease
+        Collections.sort(indexMetas, new EtlJobConfig.EtlIndexComparator().reversed());
+        for (int i = 0; i < indexMetas.size(); ++i) {
+            List<String> keyColumns = new ArrayList<>();
+            List<String> valueColumns = new ArrayList<>();
+            for (EtlJobConfig.EtlColumn column : indexMetas.get(i).columns) {
+                if (column.isKey) {
+                    keyColumns.add(column.columnName);
+                } else {
+                    valueColumns.add(column.columnName);
+                }
+            }
+            if (!insertIndex(root, indexMetas.get(i), keyColumns, valueColumns)) {
+                throw new RuntimeException(String.format("can't find a parent rollup for rollup %s,"
+                                + " rollup tree is %s", indexMetas.get(i).toString(), root));
+            }
+        }
+        return root;
+    }
+
+    // DFS traverse to build the rollup tree
+    // return true means we find a parent rollup for current rollup table
+    private boolean insertIndex(RollupTreeNode root, EtlJobConfig.EtlIndex indexMeta,
+                             List<String> keyColumns,
+                             List<String> valueColumns) {
+        // find suitable parent rollup from current node's children
+        if (root.children != null) {
+            for (int i = root.children.size() - 1; i >= 0; i--) {
+                if (insertIndex(root.children.get(i), indexMeta, keyColumns, valueColumns)) {
+                    return true;
+                }
+            }
+        }
+
+        // find suitable parent rollup from current node
+        if (root.keyColumnNames.containsAll(keyColumns) && root.valueColumnNames.containsAll(valueColumns)) {
+            if (root.children == null) {
+                root.children = new ArrayList<>();
+            }
+            RollupTreeNode newChild = new RollupTreeNode();
+            newChild.keyColumnNames = keyColumns;
+            newChild.valueColumnNames = valueColumns;
+            newChild.indexMeta = indexMeta;
+            newChild.indexId = indexMeta.indexId;
+            newChild.parent = root;
+            newChild.level = root.level + 1;
+            root.children.add(newChild);
+            return true;
+        }
+
+        return false;
+    }
+}
diff --git a/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/RollupTreeBuilder.java b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/RollupTreeBuilder.java
new file mode 100644
index 00000000..16ce92b8
--- /dev/null
+++ b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/RollupTreeBuilder.java
@@ -0,0 +1,25 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.loadv2.dpp;
+
+import org.apache.doris.config.EtlJobConfig;
+
+// RollupTreeBuilder is used to get the RollupTree from the TableMeta
+public abstract interface RollupTreeBuilder {
+    public RollupTreeNode build(EtlJobConfig.EtlTable tableMeta);
+}
diff --git a/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/RollupTreeNode.java b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/RollupTreeNode.java
new file mode 100644
index 00000000..ec3129f3
--- /dev/null
+++ b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/RollupTreeNode.java
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.loadv2.dpp;
+
+import org.apache.doris.config.EtlJobConfig;
+
+import java.util.List;
+
+// Base and rollup indexes are managed by as a RollupTree in order to
+// produce the rollup index data from the best-fit index to get better performance.
+// The calculation will be done through preorder traversal
+public class RollupTreeNode {
+    public RollupTreeNode parent;
+    public List<RollupTreeNode> children;
+    public long indexId;
+    public List<String> keyColumnNames;
+    public List<String> valueColumnNames;
+    public int level;
+    public EtlJobConfig.EtlIndex indexMeta;
+
+    public String toString() {
+        StringBuilder builder = new StringBuilder();
+        for (int i = 0; i < level; ++i) {
+            builder.append("-");
+        }
+        builder.append("indexid: " + indexId + "\n");
+        if (children != null && !children.isEmpty()) {
+            for (int i = 0; i < level; ++i) {
+                builder.append("-");
+            }
+            builder.append("children:\n");
+            for (RollupTreeNode child : children) {
+                builder.append(child.toString());
+            }
+        }
+        return builder.toString();
+    }
+}
diff --git a/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/SparkDpp.java b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/SparkDpp.java
new file mode 100644
index 00000000..6746e80e
--- /dev/null
+++ b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/SparkDpp.java
@@ -0,0 +1,1205 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.loadv2.dpp;
+
+import org.apache.doris.common.DppResult;
+import org.apache.doris.common.SparkDppException;
+import org.apache.doris.config.EtlJobConfig;
+import org.apache.doris.util.JsonUtils;
+
+import com.google.common.base.Strings;
+import com.google.common.collect.Maps;
+import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.collections.IteratorUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.column.ParquetProperties.WriterVersion;
+import org.apache.parquet.hadoop.ParquetWriter;
+import org.apache.parquet.hadoop.metadata.CompressionCodecName;
+import org.apache.spark.Partitioner;
+import org.apache.spark.TaskContext;
+import org.apache.spark.api.java.JavaPairRDD;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.api.java.function.PairFlatMapFunction;
+import org.apache.spark.api.java.function.VoidFunction;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.catalyst.CatalystTypeConverters;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport;
+import org.apache.spark.sql.functions;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.storage.StorageLevel;
+import org.apache.spark.util.LongAccumulator;
+import org.apache.spark.util.SerializableConfiguration;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import scala.Tuple2;
+import scala.collection.JavaConverters;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+import java.util.Set;
+import java.util.stream.Collectors;
+// This class is a Spark-based data preprocessing program,
+// which will make use of the distributed compute framework of spark to
+// do ETL job/sort/preaggregate jobs in spark job
+// to boost the process of large amount of data load.
+// the process steps are as following:
+// 1. load data
+//     1.1 load data from path/hive table
+//     1.2 do the etl process
+// 2. repartition data by using doris data model(partition and bucket)
+// 3. process aggregation if needed
+// 4. write data to parquet file
+
+public final class SparkDpp implements java.io.Serializable {
+    private static final Logger LOG = LoggerFactory.getLogger(SparkDpp.class);
+
+    private static final String NULL_FLAG = "\\N";
+    private static final String DPP_RESULT_FILE = "dpp_result.json";
+    private static final String BITMAP_TYPE = "bitmap";
+    Map<Long, Set<String>> tableToBitmapDictColumns = new HashMap<>();
+    Map<Long, Set<String>> tableToBinaryBitmapColumns = new HashMap<>();
+    private SparkSession spark = null;
+    private EtlJobConfig etlJobConfig = null;
+    private LongAccumulator abnormalRowAcc = null;
+    private LongAccumulator scannedRowsAcc = null;
+    private LongAccumulator fileNumberAcc = null;
+    private LongAccumulator fileSizeAcc = null;
+    private Map<String, Integer> bucketKeyMap = new HashMap<>();
+    // accumulator to collect invalid rows
+    private StringAccumulator invalidRows = new StringAccumulator();
+    // save the hadoop configuration from spark session.
+    // because hadoop configuration is not serializable,
+    // we need to wrap it so that we can use it in executor.
+    private SerializableConfiguration serializableHadoopConf;
+    private DppResult dppResult = new DppResult();
+
+    // just for ut
+    public SparkDpp() {
+    }
+
+    public SparkDpp(SparkSession spark, EtlJobConfig etlJobConfig, Map<Long, Set<String>> tableToBitmapDictColumns,
+                    Map<Long, Set<String>> tableToBinaryBitmapColumns) {
+        this.spark = spark;
+        this.etlJobConfig = etlJobConfig;
+        if (tableToBitmapDictColumns != null) {
+            this.tableToBitmapDictColumns = tableToBitmapDictColumns;
+        }
+        if (tableToBinaryBitmapColumns != null) {
+            this.tableToBinaryBitmapColumns = tableToBinaryBitmapColumns;
+        }
+    }
+
+    public void init() {
+        abnormalRowAcc = spark.sparkContext().longAccumulator("abnormalRowAcc");
+        scannedRowsAcc = spark.sparkContext().longAccumulator("scannedRowsAcc");
+        fileNumberAcc = spark.sparkContext().longAccumulator("fileNumberAcc");
+        fileSizeAcc = spark.sparkContext().longAccumulator("fileSizeAcc");
+        spark.sparkContext().register(invalidRows, "InvalidRowsAccumulator");
+        this.serializableHadoopConf = new SerializableConfiguration(spark.sparkContext().hadoopConfiguration());
+    }
+
+    private JavaPairRDD<List<Object>, Object[]> processRDDAggregate(JavaPairRDD<List<Object>, Object[]> currentPairRDD,
+                                                                    RollupTreeNode curNode,
+                                                                    SparkRDDAggregator[] sparkRDDAggregators)
+            throws SparkDppException {
+        final boolean isDuplicateTable = !StringUtils.equalsIgnoreCase(curNode.indexMeta.indexType, "AGGREGATE")
+                && !StringUtils.equalsIgnoreCase(curNode.indexMeta.indexType, "UNIQUE");
+        // Aggregate/UNIQUE table
+        if (!isDuplicateTable) {
+            int idx = 0;
+            for (int i = 0; i < curNode.indexMeta.columns.size(); i++) {
+                if (!curNode.indexMeta.columns.get(i).isKey) {
+                    sparkRDDAggregators[idx] = SparkRDDAggregator.buildAggregator(curNode.indexMeta.columns.get(i));
+                    idx++;
+                }
+            }
+
+            if (curNode.indexMeta.isBaseIndex) {
+                return currentPairRDD.mapToPair(
+                                new EncodeBaseAggregateTableFunction(sparkRDDAggregators))
+                        .reduceByKey(new AggregateReduceFunction(sparkRDDAggregators));
+            } else {
+                return currentPairRDD
+                        .mapToPair(new EncodeRollupAggregateTableFunction(
+                                getColumnIndexInParentRollup(curNode.keyColumnNames, curNode.valueColumnNames,
+                                        curNode.parent.keyColumnNames, curNode.parent.valueColumnNames)))
+                        .reduceByKey(new AggregateReduceFunction(sparkRDDAggregators));
+            }
+            // Duplicate Table
+        } else {
+            int idx = 0;
+            for (int i = 0; i < curNode.indexMeta.columns.size(); i++) {
+                if (!curNode.indexMeta.columns.get(i).isKey) {
+                    // duplicate table doesn't need aggregator
+                    // init a aggregator here just for keeping interface compatibility when writing data to HDFS
+                    sparkRDDAggregators[idx] = new DefaultSparkRDDAggregator();
+                    idx++;
+                }
+            }
+            if (curNode.indexMeta.isBaseIndex) {
+                return currentPairRDD;
+            } else {
+                return currentPairRDD.mapToPair(new EncodeRollupAggregateTableFunction(
+                        getColumnIndexInParentRollup(curNode.keyColumnNames, curNode.valueColumnNames,
+                                curNode.parent.keyColumnNames, curNode.parent.valueColumnNames)));
+            }
+        }
+    }
+
+    // write data to parquet file by using writing the parquet scheme of spark.
+    private void writeRepartitionAndSortedRDDToParquet(JavaPairRDD<List<Object>, Object[]> resultRDD,
+                                                       String pathPattern, long tableId,
+                                                       EtlJobConfig.EtlIndex indexMeta,
+                                                       SparkRDDAggregator<?>[] sparkRDDAggregators) {
+        // TODO(wb) should deal largeint as BigInteger instead of string when using biginteger as key,
+        // data type may affect sorting logic
+        StructType dstSchema = DppUtils.createDstTableSchema(indexMeta.columns, false, true);
+
+        resultRDD.repartitionAndSortWithinPartitions(new BucketPartitioner(bucketKeyMap), new BucketComparator())
+                .foreachPartition((VoidFunction<Iterator<Tuple2<List<Object>, Object[]>>>) t -> {
+                    // write the data to dst file
+                    Configuration conf = new Configuration(serializableHadoopConf.value());
+                    FileSystem fs = FileSystem.get(new Path(etlJobConfig.outputPath).toUri(), conf);
+                    String lastBucketKey = null;
+                    ParquetWriter<InternalRow> parquetWriter = null;
+                    TaskContext taskContext = TaskContext.get();
+                    long taskAttemptId = taskContext.taskAttemptId();
+                    String dstPath = "";
+                    String tmpPath = "";
+
+                    while (t.hasNext()) {
+                        Tuple2<List<Object>, Object[]> pair = t.next();
+                        List<Object> keyColumns = pair._1();
+                        Object[] valueColumns = pair._2();
+                        if ((keyColumns.size() + valueColumns.length) <= 1) {
+                            LOG.warn("invalid row:" + pair);
+                            continue;
+                        }
+
+                        String curBucketKey = keyColumns.get(0).toString();
+                        List<Object> columnObjects = new ArrayList<>();
+                        for (int i = 1; i < keyColumns.size(); ++i) {
+                            columnObjects.add(keyColumns.get(i));
+                        }
+                        for (int i = 0; i < valueColumns.length; ++i) {
+                            columnObjects.add(sparkRDDAggregators[i].finalize(valueColumns[i]));
+                        }
+
+                        // if the bucket key is new, it will belong to a new tablet
+                        if (!curBucketKey.equals(lastBucketKey)) {
+                            if (parquetWriter != null) {
+                                parquetWriter.close();
+                                // rename tmpPath to path
+                                try {
+                                    fs.rename(new Path(tmpPath), new Path(dstPath));
+                                } catch (IOException ioe) {
+                                    LOG.warn("rename from tmpPath" + tmpPath + " to dstPath:" + dstPath
+                                            + " failed. exception:" + ioe);
+                                    throw ioe;
+                                }
+                            }
+                            // flush current writer and create a new writer
+                            String[] bucketKey = curBucketKey.split("_");
+                            if (bucketKey.length != 2) {
+                                LOG.warn("invalid bucket key:" + curBucketKey);
+                                continue;
+                            }
+                            long partitionId = Long.parseLong(bucketKey[0]);
+                            int bucketId = Integer.parseInt(bucketKey[1]);
+                            dstPath = String.format(pathPattern, tableId, partitionId, indexMeta.indexId, bucketId,
+                                    indexMeta.schemaHash);
+                            tmpPath = dstPath + "." + taskAttemptId;
+                            conf.setBoolean("spark.sql.parquet.writeLegacyFormat", false);
+                            conf.setBoolean("spark.sql.parquet.int64AsTimestampMillis", false);
+                            conf.setBoolean("spark.sql.parquet.int96AsTimestamp", true);
+                            conf.setBoolean("spark.sql.parquet.binaryAsString", false);
+                            conf.setBoolean("spark.sql.parquet.fieldId.write.enabled", true);
+                            conf.set("spark.sql.parquet.outputTimestampType", "INT96");
+                            ParquetWriteSupport.setSchema(dstSchema, conf);
+                            ParquetWriteSupport parquetWriteSupport = new ParquetWriteSupport();
+                            parquetWriter = new ParquetWriter<>(new Path(tmpPath), parquetWriteSupport,
+                                    CompressionCodecName.SNAPPY, 256 * 1024 * 1024, 16 * 1024, 1024 * 1024, true, false,
+                                    WriterVersion.PARQUET_1_0, conf);
+                            LOG.info("[HdfsOperate]>> initialize writer succeed! path:" + tmpPath);
+                            lastBucketKey = curBucketKey;
+                        }
+                        Object[] array = columnObjects.toArray();
+                        Object[] catalystArr = new Object[array.length];
+                        for (int i = 0; i < array.length; i++) {
+                            catalystArr[i] = CatalystTypeConverters.createToCatalystConverter(dstSchema.apply(i).dataType()).apply(array[i]);
+                        }
+                        InternalRow internalRow = InternalRow.apply(
+                                JavaConverters.asScalaBufferConverter(Arrays.asList(catalystArr)).asScala()
+                                        .toSeq());
+                        parquetWriter.write(internalRow);
+                    }
+                    if (parquetWriter != null) {
+                        parquetWriter.close();
+                        try {
+                            fs.rename(new Path(tmpPath), new Path(dstPath));
+                        } catch (IOException ioe) {
+                            LOG.warn("rename from tmpPath" + tmpPath + " to dstPath:" + dstPath + " failed. exception:"
+                                    + ioe);
+                            throw ioe;
+                        }
+                    }
+
+                });
+    }
+
+    // TODO(wb) one shuffle to calculate the rollup in the same level
+    private void processRollupTree(RollupTreeNode rootNode,
+                                   JavaPairRDD<List<Object>, Object[]> rootRDD,
+                                   long tableId, EtlJobConfig.EtlIndex baseIndex) throws SparkDppException {
+        Queue<RollupTreeNode> nodeQueue = new LinkedList<>();
+        nodeQueue.offer(rootNode);
+        int currentLevel = 0;
+        // level travel the tree
+        Map<Long, JavaPairRDD<List<Object>, Object[]>> parentRDDMap = new HashMap<>();
+        parentRDDMap.put(baseIndex.indexId, rootRDD);
+        Map<Long, JavaPairRDD<List<Object>, Object[]>> childrenRDDMap = new HashMap<>();
+        String pathPattern = etlJobConfig.outputPath + "/" + etlJobConfig.outputFilePattern;
+        while (!nodeQueue.isEmpty()) {
+            RollupTreeNode curNode = nodeQueue.poll();
+            LOG.info("start to process index:" + curNode.indexId);
+            if (curNode.children != null) {
+                for (RollupTreeNode child : curNode.children) {
+                    nodeQueue.offer(child);
+                }
+            }
+            JavaPairRDD<List<Object>, Object[]> curRDD = null;
+            // column select for rollup
+            if (curNode.level != currentLevel) {
+                for (JavaPairRDD<List<Object>, Object[]> rdd : parentRDDMap.values()) {
+                    rdd.unpersist();
+                }
+                currentLevel = curNode.level;
+                parentRDDMap.clear();
+                parentRDDMap = childrenRDDMap;
+                childrenRDDMap = new HashMap<>();
+            }
+
+            long parentIndexId = baseIndex.indexId;
+            if (curNode.parent != null) {
+                parentIndexId = curNode.parent.indexId;
+            }
+
+            JavaPairRDD<List<Object>, Object[]> parentRDD = parentRDDMap.get(parentIndexId);
+
+            // aggregate
+            SparkRDDAggregator[] sparkRDDAggregators = new SparkRDDAggregator[curNode.valueColumnNames.size()];
+            curRDD = processRDDAggregate(parentRDD, curNode, sparkRDDAggregators);
+
+            childrenRDDMap.put(curNode.indexId, curRDD);
+
+            if (curNode.children != null && curNode.children.size() > 1) {
+                // if the children number larger than 1, persist the dataframe for performance
+                curRDD.persist(StorageLevel.MEMORY_AND_DISK());
+            }
+            // repartition and write to hdfs
+            writeRepartitionAndSortedRDDToParquet(curRDD, pathPattern, tableId, curNode.indexMeta, sparkRDDAggregators);
+        }
+    }
+
+    // get column index map from parent rollup to child rollup
+    // not consider bucketId here
+    private Pair<Integer[], Integer[]> getColumnIndexInParentRollup(List<String> childRollupKeyColumns,
+                                                                    List<String> childRollupValueColumns,
+                                                                    List<String> parentRollupKeyColumns,
+                                                                    List<String> parentRollupValueColumns)
+            throws SparkDppException {
+        List<Integer> keyMap = new ArrayList<>();
+        List<Integer> valueMap = new ArrayList<>();
+        // find column index in parent rollup schema
+        for (String childRollupKeyColumn : childRollupKeyColumns) {
+            for (int j = 0; j < parentRollupKeyColumns.size(); j++) {
+                if (StringUtils.equalsIgnoreCase(childRollupKeyColumn, parentRollupKeyColumns.get(j))) {
+                    keyMap.add(j);
+                    break;
+                }
+            }
+        }
+
+        for (String childRollupValueColumn : childRollupValueColumns) {
+            for (int j = 0; j < parentRollupValueColumns.size(); j++) {
+                if (StringUtils.equalsIgnoreCase(childRollupValueColumn, parentRollupValueColumns.get(j))) {
+                    valueMap.add(j);
+                    break;
+                }
+            }
+        }
+
+        if (keyMap.size() != childRollupKeyColumns.size() || valueMap.size() != childRollupValueColumns.size()) {
+            throw new SparkDppException(String.format("column map index from child to parent has error,"
+                            + " key size src: %s, dst: %s; value size src: %s, dst: %s",
+                    childRollupKeyColumns.size(), keyMap.size(), childRollupValueColumns.size(), valueMap.size()));
+        }
+
+        return Pair.of(keyMap.toArray(new Integer[0]), valueMap.toArray(new Integer[0]));
+    }
+
+    /**
+     * check decimal,char/varchar
+     */
+    public boolean validateData(Object srcValue, EtlJobConfig.EtlColumn etlColumn, ColumnParser columnParser, Row row) {
+
+        switch (etlColumn.columnType.toUpperCase()) {
+            case "DECIMALV2":
+            case "DECIMAL32":
+            case "DECIMAL64":
+            case "DECIMAL128":
+                // TODO(wb):  support decimal round; see be DecimalV2Value::round
+                DecimalParser decimalParser = (DecimalParser) columnParser;
+                BigDecimal srcBigDecimal = (BigDecimal) srcValue;
+                if (srcValue != null && (decimalParser.getMaxValue().compareTo(srcBigDecimal) < 0
+                        || decimalParser.getMinValue().compareTo(srcBigDecimal) > 0)) {
+                    LOG.warn(String.format("decimal value is not valid for defination, column=%s,"
+                                    + " value=%s,precision=%s,scale=%s",
+                            etlColumn.columnName, srcValue, srcBigDecimal.precision(), srcBigDecimal.scale()));
+                    return false;
+                }
+                break;
+            case "CHAR":
+            case "VARCHAR":
+                // TODO(wb) padding char type
+                int strSize = 0;
+                if (srcValue != null && (strSize = srcValue.toString().getBytes(StandardCharsets.UTF_8).length)
+                        > etlColumn.stringLength) {
+                    LOG.warn(String.format("the length of input is too long than schema."
+                                    + " column_name:%s,input_str[%s],schema length:%s,actual length:%s",
+                            etlColumn.columnName, row.toString(), etlColumn.stringLength, strSize));
+                    return false;
+                }
+                break;
+            case "STRING":
+            case "TEXT":
+                // TODO(zjf) padding string type
+                int strDataSize = 0;
+                if (srcValue != null && (strDataSize = srcValue.toString().getBytes(StandardCharsets.UTF_8).length)
+                        > DppUtils.STRING_LENGTH_LIMIT) {
+                    LOG.warn(String.format("The string type is limited to a maximum of %s bytes."
+                                    + " column_name:%s,input_str[%s],actual length:%s",
+                            DppUtils.STRING_LENGTH_LIMIT, etlColumn.columnName, row.toString(), strDataSize));
+                    return false;
+                }
+                break;
+            default:
+                return true;
+        }
+        return true;
+    }
+
+    /**
+     * 1 project column and reorder column
+     * 2 validate data
+     * 3 fill tuple with partition column
+     */
+    private JavaPairRDD<List<Object>, Object[]> fillTupleWithPartitionColumn(Dataset<Row> dataframe,
+                                                                             EtlJobConfig.EtlPartitionInfo partitionInfo,
+                                                                             List<Integer> partitionKeyIndex,
+                                                                             List<DorisRangePartitioner.PartitionRangeKey> partitionRangeKeys,
+                                                                             List<String> keyAndPartitionColumnNames,
+                                                                             List<String> valueColumnNames,
+                                                                             StructType dstTableSchema,
+                                                                             EtlJobConfig.EtlIndex baseIndex,
+                                                                             List<Long> validPartitionIds)
+            throws SparkDppException {
+        List<String> distributeColumns = partitionInfo.distributionColumnRefs;
+        Partitioner partitioner = new DorisRangePartitioner(partitionInfo, partitionKeyIndex, partitionRangeKeys);
+        Set<Integer> validPartitionIndex = new HashSet<>();
+        if (validPartitionIds == null) {
+            for (int i = 0; i < partitionInfo.partitions.size(); ++i) {
+                validPartitionIndex.add(i);
+            }
+        } else {
+            for (int i = 0; i < partitionInfo.partitions.size(); ++i) {
+                if (validPartitionIds.contains(partitionInfo.partitions.get(i).partitionId)) {
+                    validPartitionIndex.add(i);
+                }
+            }
+        }
+
+        Map<String, ColumnParser> parsers = Maps.newHashMap();
+        for (EtlJobConfig.EtlColumn column : baseIndex.columns) {
+            parsers.put(column.columnName, ColumnParser.create(column));
+        }
+
+        // use PairFlatMapFunction instead of PairMapFunction because the there will be
+        // 0 or 1 output row for 1 input row
+        JavaPairRDD<List<Object>, Object[]> resultPairRDD = dataframe.toJavaRDD().flatMapToPair(
+                (PairFlatMapFunction<Row, List<Object>, Object[]>) row -> {
+                    List<Tuple2<List<Object>, Object[]>> result = new ArrayList<>();
+                    List<Object> keyAndPartitionColumns = new ArrayList<>();
+                    List<Object> keyColumns = new ArrayList<>();
+                    List<Object> valueColumns = new ArrayList<>(valueColumnNames.size());
+                    for (String columnName : keyAndPartitionColumnNames) {
+                        Object columnObject = row.get(row.fieldIndex(columnName));
+                        if (!validateData(columnObject, baseIndex.getColumn(columnName),
+                                parsers.get(columnName), row)) {
+                            LOG.info("invalid row: " + row);
+                            abnormalRowAcc.add(1);
+                            return IteratorUtils.emptyIterator();
+                        }
+                        keyAndPartitionColumns.add(columnObject);
+
+                        if (baseIndex.getColumn(columnName).isKey) {
+                            keyColumns.add(columnObject);
+                        }
+                    }
+
+                    for (String columnName : valueColumnNames) {
+                        Object columnObject = row.get(row.fieldIndex(columnName));
+                        if (!validateData(columnObject, baseIndex.getColumn(columnName),
+                                parsers.get(columnName), row)) {
+                            abnormalRowAcc.add(1);
+                            return IteratorUtils.emptyIterator();
+                        }
+                        valueColumns.add(columnObject);
+                    }
+
+                    DppColumns key = new DppColumns(keyAndPartitionColumns);
+                    int pid = partitioner.getPartition(key);
+                    if (!validPartitionIndex.contains(pid)) {
+                        LOG.warn("invalid partition for row:" + row + ", pid:" + pid);
+                        abnormalRowAcc.add(1);
+                        LOG.info("abnormalRowAcc:" + abnormalRowAcc);
+                        if (abnormalRowAcc.value() < 5) {
+                            LOG.info("add row to invalidRows:" + row.toString());
+                            invalidRows.add(row.toString());
+                            LOG.info("invalid rows contents:" + invalidRows.value());
+                        }
+                    } else {
+                        // TODO(wb) support lagreint for hash
+                        long hashValue = DppUtils.getHashValue(row, distributeColumns, dstTableSchema);
+                        int bucketId = (int) ((hashValue & 0xffffffffL) % partitionInfo.partitions.get(pid).bucketNum);
+                        long partitionId = partitionInfo.partitions.get(pid).partitionId;
+                        // bucketKey is partitionId_bucketId
+                        String bucketKey = partitionId + "_" + bucketId;
+
+                        List<Object> tuple = new ArrayList<>();
+                        tuple.add(bucketKey);
+                        tuple.addAll(keyColumns);
+                        result.add(new Tuple2<>(tuple, valueColumns.toArray()));
+                    }
+                    return result.iterator();
+                });
+
+        // use bucket number as the parallel number
+        int reduceNum = 0;
+        for (EtlJobConfig.EtlPartition partition : partitionInfo.partitions) {
+            for (int i = 0; i < partition.bucketNum; i++) {
+                bucketKeyMap.put(partition.partitionId + "_" + i, reduceNum);
+                reduceNum++;
+            }
+        }
+
+        // print to system.out for easy to find log info
+        System.out.println("print bucket key map:" + bucketKeyMap.toString());
+
+        return resultPairRDD;
+    }
+
+    // do the etl process
+    private Dataset<Row> convertSrcDataframeToDstDataframe(EtlJobConfig.EtlIndex baseIndex,
+                                                           Dataset<Row> srcDataframe, StructType dstTableSchema,
+                                                           EtlJobConfig.EtlFileGroup fileGroup)
+            throws SparkDppException {
+
+        Dataset<Row> dataframe = srcDataframe;
+        StructType srcSchema = dataframe.schema();
+        Set<String> srcColumnNames = new HashSet<>();
+        for (StructField field : srcSchema.fields()) {
+            srcColumnNames.add(field.name());
+        }
+        Map<String, EtlJobConfig.EtlColumnMapping> columnMappings = fileGroup.columnMappings;
+        // 1. process simple columns
+        Set<String> mappingColumns = null;
+        if (columnMappings != null) {
+            mappingColumns = columnMappings.keySet();
+        }
+        List<String> dstColumnNames = new ArrayList<>();
+        for (StructField dstField : dstTableSchema.fields()) {
+            dstColumnNames.add(dstField.name());
+            EtlJobConfig.EtlColumn column = baseIndex.getColumn(dstField.name());
+            if (!srcColumnNames.contains(dstField.name())) {
+                if (mappingColumns != null && mappingColumns.contains(dstField.name())) {
+                    // mapping columns will be processed in next step
+                    continue;
+                }
+                if (column.defaultValue != null) {
+                    if (column.defaultValue.equals(NULL_FLAG)) {
+                        dataframe = dataframe.withColumn(dstField.name(), functions.lit(null));
+                    } else {
+                        dataframe = dataframe.withColumn(dstField.name(), functions.lit(column.defaultValue));
+                    }
+                } else if (column.isAllowNull) {
+                    dataframe = dataframe.withColumn(dstField.name(), functions.lit(null));
+                } else {
+                    throw new SparkDppException("Reason: no data for column:" + dstField.name());
+                }
+            }
+            if (column.columnType.equalsIgnoreCase("DATE") || column.columnType.equalsIgnoreCase("DATEV2")) {
+                dataframe = dataframe.withColumn(dstField.name(),
+                        dataframe.col(dstField.name()).cast(DataTypes.DateType));
+            } else if (column.columnType.equalsIgnoreCase("DATETIME")
+                    || column.columnType.equalsIgnoreCase("DATETIMEV2")) {
+                dataframe = dataframe.withColumn(dstField.name(),
+                        dataframe.col(dstField.name()).cast(DataTypes.TimestampType));
+            } else if (column.columnType.equalsIgnoreCase("BOOLEAN")) {
+                dataframe = dataframe.withColumn(dstField.name(),
+                        functions.when(functions.lower(dataframe.col(dstField.name())).equalTo("true"), "1")
+                                .when(dataframe.col(dstField.name()).equalTo("1"), "1")
+                                .otherwise("0"));
+            } else if (!column.columnType.equalsIgnoreCase(BITMAP_TYPE)
+                    && !dstField.dataType().equals(DataTypes.StringType)) {
+                dataframe = dataframe.withColumn(dstField.name(),
+                        dataframe.col(dstField.name()).cast(dstField.dataType()));
+            } else if (column.columnType.equalsIgnoreCase(BITMAP_TYPE)
+                    && dstField.dataType().equals(DataTypes.BinaryType)) {
+                dataframe = dataframe.withColumn(dstField.name(),
+                        dataframe.col(dstField.name()).cast(DataTypes.BinaryType));
+            }
+            if (fileGroup.isNegative && !column.isKey) {
+                // negative load
+                // value will be convert te -1 * value
+                dataframe = dataframe.withColumn(dstField.name(), functions.expr("-1 *" + dstField.name()));
+            }
+        }
+        // 2. process the mapping columns
+        for (String mappingColumn : mappingColumns) {
+            String mappingDescription = columnMappings.get(mappingColumn).toDescription();
+            if (mappingDescription.toLowerCase().contains("hll_hash")) {
+                continue;
+            }
+            // here should cast data type to dst column type
+            dataframe = dataframe.withColumn(mappingColumn,
+                    functions.expr(mappingDescription).cast(dstTableSchema.apply(mappingColumn).dataType()));
+        }
+        return dataframe;
+    }
+
+    private Dataset<Row> loadDataFromPath(SparkSession spark,
+                                          EtlJobConfig.EtlFileGroup fileGroup,
+                                          String fileUrl,
+                                          EtlJobConfig.EtlIndex baseIndex,
+                                          List<EtlJobConfig.EtlColumn> columns) throws SparkDppException {
+        List<String> columnValueFromPath = DppUtils.parseColumnsFromPath(fileUrl, fileGroup.columnsFromPath);
+        List<String> dataSrcColumns = fileGroup.fileFieldNames;
+        if (dataSrcColumns == null) {
+            // if there is no source columns info
+            // use base index columns as source columns
+            dataSrcColumns = new ArrayList<>();
+            for (EtlJobConfig.EtlColumn column : baseIndex.columns) {
+                dataSrcColumns.add(column.columnName);
+            }
+        }
+        // for getting schema to check source data
+        Map<String, Integer> dstColumnNameToIndex = new HashMap<String, Integer>();
+        for (int i = 0; i < baseIndex.columns.size(); i++) {
+            dstColumnNameToIndex.put(baseIndex.columns.get(i).columnName, i);
+        }
+        List<String> srcColumnsWithColumnsFromPath = new ArrayList<>(dataSrcColumns);
+        if (fileGroup.columnsFromPath != null) {
+            srcColumnsWithColumnsFromPath.addAll(fileGroup.columnsFromPath);
+        }
+
+        if ("parquet".equalsIgnoreCase(fileGroup.fileFormat)) {
+            // parquet had its own schema, just use it; perhaps we could add some validation in future.
+            Dataset<Row> dataFrame = spark.read().parquet(fileUrl);
+            if (!CollectionUtils.isEmpty(columnValueFromPath)) {
+                for (int k = 0; k < columnValueFromPath.size(); k++) {
+                    dataFrame = dataFrame.withColumn(
+                            fileGroup.columnsFromPath.get(k), functions.lit(columnValueFromPath.get(k)));
+                }
+            }
+            if (!Strings.isNullOrEmpty(fileGroup.where)) {
+                dataFrame = dataFrame.where(fileGroup.where);
+            }
+            return dataFrame;
+        }
+
+        if ("orc".equalsIgnoreCase(fileGroup.fileFormat)) {
+            Dataset<Row> dataFrame = spark.read().orc(fileUrl);
+            if (!CollectionUtils.isEmpty(columnValueFromPath)) {
+                for (int k = 0; k < columnValueFromPath.size(); k++) {
+                    dataFrame = dataFrame.withColumn(
+                            fileGroup.columnsFromPath.get(k), functions.lit(columnValueFromPath.get(k)));
+                }
+            }
+            if (!Strings.isNullOrEmpty(fileGroup.where)) {
+                dataFrame = dataFrame.where(fileGroup.where);
+            }
+            return dataFrame;
+        }
+
+        StructType srcSchema = createScrSchema(srcColumnsWithColumnsFromPath);
+        JavaRDD<String> sourceDataRdd = spark.read().textFile(fileUrl).toJavaRDD();
+        int columnSize = dataSrcColumns.size();
+        List<ColumnParser> parsers = new ArrayList<>();
+        for (EtlJobConfig.EtlColumn column : baseIndex.columns) {
+            parsers.add(ColumnParser.create(column));
+        }
+        char separator = (char) fileGroup.columnSeparator.getBytes(StandardCharsets.UTF_8)[0];
+        JavaRDD<Row> rowRDD = sourceDataRdd.flatMap(
+                record -> {
+                    scannedRowsAcc.add(1);
+                    String[] attributes = splitLine(record, separator);
+                    List<Row> result = new ArrayList<>();
+                    boolean validRow = true;
+                    if (attributes.length != columnSize) {
+                        LOG.warn("invalid src schema, data columns:"
+                                + attributes.length + ", file group columns:"
+                                + columnSize + ", row:" + record);
+                        validRow = false;
+                    } else {
+                        for (int i = 0; i < attributes.length; ++i) {
+                            StructField field = srcSchema.apply(i);
+                            String srcColumnName = field.name();
+                            if (attributes[i].equals(NULL_FLAG) && dstColumnNameToIndex.containsKey(srcColumnName)) {
+                                if (baseIndex.columns.get(dstColumnNameToIndex.get(srcColumnName)).isAllowNull) {
+                                    attributes[i] = null;
+                                } else {
+                                    LOG.warn("column name:" + srcColumnName + ", attribute: " + i
+                                            + " can not be null. row:" + record);
+                                    validRow = false;
+                                    break;
+                                }
+                            }
+                            boolean isStrictMode = etlJobConfig.properties.strictMode;
+                            if (isStrictMode) {
+                                if (dstColumnNameToIndex.containsKey(srcColumnName)) {
+                                    int index = dstColumnNameToIndex.get(srcColumnName);
+                                    String type = columns.get(index).columnType;
+                                    if (type.equalsIgnoreCase("CHAR")
+                                            || type.equalsIgnoreCase("VARCHAR")
+                                            || fileGroup.columnMappings.containsKey(field.name())) {
+                                        continue;
+                                    }
+                                    ColumnParser parser = parsers.get(index);
+                                    boolean valid = parser.parse(attributes[i]);
+                                    if (!valid) {
+                                        validRow = false;
+                                        LOG.warn("invalid row:" + record
+                                                + ", attribute " + i + ": " + attributes[i] + " parsed failed");
+                                        break;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    if (validRow) {
+                        Row row = null;
+                        if (fileGroup.columnsFromPath == null) {
+                            row = RowFactory.create(attributes);
+                        } else {
+                            // process columns from path
+                            // append columns from path to the tail
+                            List<String> columnAttributes = new ArrayList<>();
+                            columnAttributes.addAll(Arrays.asList(attributes));
+                            columnAttributes.addAll(columnValueFromPath);
+                            row = RowFactory.create(columnAttributes.toArray());
+                        }
+                        result.add(row);
+                    } else {
+                        abnormalRowAcc.add(1);
+                        // at most add 5 rows to invalidRows
+                        if (abnormalRowAcc.value() <= 5) {
+                            invalidRows.add(record);
+                        }
+                    }
+                    return result.iterator();
+                }
+        );
+
+        Dataset<Row> dataframe = spark.createDataFrame(rowRDD, srcSchema);
+        if (!Strings.isNullOrEmpty(fileGroup.where)) {
+            dataframe = dataframe.where(fileGroup.where);
+        }
+        return dataframe;
+    }
+
+    private StructType createScrSchema(List<String> srcColumns) {
+        List<StructField> fields = new ArrayList<>();
+        for (String srcColumn : srcColumns) {
+            // user StringType to load source data
+            StructField field = DataTypes.createStructField(srcColumn, DataTypes.StringType, true);
+            fields.add(field);
+        }
+        return DataTypes.createStructType(fields);
+    }
+
+    // This method is to keep the splitting consistent with broker load / mini load
+    private String[] splitLine(String line, char sep) {
+        if (line == null || line.isEmpty()) {
+            return new String[0];
+        }
+        int index = 0;
+        int lastIndex = 0;
+        // line-begin char and line-end char are considered to be 'delimeter'
+        List<String> values = new ArrayList<>();
+        for (int i = 0; i < line.length(); i++, index++) {
+            if (line.charAt(index) == sep) {
+                values.add(line.substring(lastIndex, index));
+                lastIndex = index + 1;
+            }
+        }
+        values.add(line.substring(lastIndex, index));
+        return values.toArray(new String[0]);
+    }
+
+    // partition keys will be parsed into double from json
+    // so need to convert it to partition columns' type
+    private Object convertPartitionKey(Object srcValue, Class<?> dstClass, boolean isV2Type) throws SparkDppException {
+        if (dstClass.equals(Float.class) || dstClass.equals(Double.class)) {
+            return null;
+        }
+        if (srcValue instanceof Double) {
+            if (dstClass.equals(Short.class)) {
+                return ((Double) srcValue).shortValue();
+            } else if (dstClass.equals(Integer.class)) {
+                return ((Double) srcValue).intValue();
+            } else if (dstClass.equals(Long.class)) {
+                return ((Double) srcValue).longValue();
+            } else if (dstClass.equals(BigInteger.class)) {
+                // TODO(wb) gson will cast origin value to double by default
+                // when the partition column is largeint, this will cause error data
+                // need fix it thoroughly
+                return new BigInteger(srcValue.toString());
+            } else if (dstClass.equals(java.sql.Date.class) || dstClass.equals(java.util.Date.class)) {
+                double srcValueDouble = (double) srcValue;
+                return convertToJavaDate((int) srcValueDouble);
+            } else if (dstClass.equals(java.sql.Timestamp.class)) {
+                double srcValueDouble = (double) srcValue;
+                if (isV2Type) {
+                    return convertV2ToJavaDatetime((long) srcValueDouble);
+                }
+                return convertToJavaDatetime((long) srcValueDouble);
+            } else {
+                // dst type is string
+                return srcValue.toString();
+            }
+        } else {
+            LOG.warn("unsupport partition key:" + srcValue);
+            throw new SparkDppException("unsupport partition key:" + srcValue);
+        }
+    }
+
+    private java.sql.Timestamp convertToJavaDatetime(long src) {
+        String dateTimeStr = Long.valueOf(src).toString();
+        if (dateTimeStr.length() != 14) {
+            throw new RuntimeException("invalid input date format for SparkDpp, src: " + dateTimeStr);
+        }
+
+        String year = dateTimeStr.substring(0, 4);
+        String month = dateTimeStr.substring(4, 6);
+        String day = dateTimeStr.substring(6, 8);
+        String hour = dateTimeStr.substring(8, 10);
+        String min = dateTimeStr.substring(10, 12);
+        String sec = dateTimeStr.substring(12, 14);
+
+        return java.sql.Timestamp.valueOf(String.format("%s-%s-%s %s:%s:%s", year, month, day, hour, min, sec));
+    }
+
+    private java.sql.Timestamp convertV2ToJavaDatetime(long src) {
+        String dateTimeStr = Long.valueOf(src).toString();
+        if (dateTimeStr.length() != 18) {
+            throw new RuntimeException("invalid input date format for SparkDpp, src: " + dateTimeStr);
+        }
+
+        long year = (src >> 46);
+        long month = (src >> 42) & ((1L << 4) - 1);
+        long day = (src >> 37) & ((1L << 5) - 1);
+        long hour = (src >> 32) & ((1L << 5) - 1);
+        long min = (src >> 26) & ((1L << 6) - 1);
+        long sec = (src >> 20) & ((1L << 6) - 1);
+        long ms = src & ((1L << 20) - 1);
+
+        return java.sql.Timestamp.valueOf(
+                String.format("%d-%02d-%02d %02d:%02d:%02d.%d", year, month, day, hour, min, sec, ms));
+    }
+
+    private java.sql.Date convertToJavaDate(int originDate) {
+        int day = originDate & 0x1f;
+        originDate >>= 5;
+        int month = originDate & 0x0f;
+        originDate >>= 4;
+        int year = originDate;
+        return java.sql.Date.valueOf(String.format("%04d-%02d-%02d", year, month, day));
+    }
+
+    private List<DorisRangePartitioner.PartitionRangeKey> createPartitionRangeKeys(
+            EtlJobConfig.EtlPartitionInfo partitionInfo, List<Class<?>> partitionKeySchema,
+            Map<Integer, String> partitionKeyIndexToType) throws SparkDppException {
+        List<DorisRangePartitioner.PartitionRangeKey> partitionRangeKeys = new ArrayList<>();
+        for (EtlJobConfig.EtlPartition partition : partitionInfo.partitions) {
+            DorisRangePartitioner.PartitionRangeKey partitionRangeKey = new DorisRangePartitioner.PartitionRangeKey();
+            List<Object> startKeyColumns = new ArrayList<>();
+            for (int i = 0; i < partition.startKeys.size(); i++) {
+                Object value = partition.startKeys.get(i);
+                boolean isV2Type =
+                        partitionKeyIndexToType.get(i) != null && partitionKeyIndexToType.get(i).endsWith("V2");
+                startKeyColumns.add(convertPartitionKey(value, partitionKeySchema.get(i), isV2Type));
+            }
+            partitionRangeKey.startKeys = new DppColumns(startKeyColumns);
+            if (!partition.isMaxPartition) {
+                partitionRangeKey.isMaxPartition = false;
+                List<Object> endKeyColumns = new ArrayList<>();
+                for (int i = 0; i < partition.endKeys.size(); i++) {
+                    Object value = partition.endKeys.get(i);
+                    boolean isV2Type =
+                            partitionKeyIndexToType.get(i) != null && partitionKeyIndexToType.get(i).endsWith("V2");
+                    endKeyColumns.add(convertPartitionKey(value, partitionKeySchema.get(i), isV2Type));
+                }
+                partitionRangeKey.endKeys = new DppColumns(endKeyColumns);
+            } else {
+                partitionRangeKey.isMaxPartition = true;
+            }
+            partitionRangeKeys.add(partitionRangeKey);
+        }
+        return partitionRangeKeys;
+    }
+
+    private Dataset<Row> loadDataFromFilePaths(SparkSession spark,
+                                               EtlJobConfig.EtlIndex baseIndex,
+                                               List<String> filePaths,
+                                               EtlJobConfig.EtlFileGroup fileGroup,
+                                               StructType dstTableSchema)
+            throws SparkDppException, IOException {
+        Dataset<Row> fileGroupDataframe = null;
+        for (String filePath : filePaths) {
+            try {
+                FileSystem fs = FileSystem.get(new Path(filePath).toUri(), serializableHadoopConf.value());
+                FileStatus[] fileStatuses = fs.globStatus(new Path(filePath));
+                if (fileStatuses == null) {
+                    throw new SparkDppException("fs list status failed: " + filePath);
+                }
+                for (FileStatus fileStatus : fileStatuses) {
+                    if (fileStatus.isDirectory()) {
+                        continue;
+                    }
+                    fileNumberAcc.add(1);
+                    fileSizeAcc.add(fileStatus.getLen());
+                }
+            } catch (Exception e) {
+                LOG.warn("parse path failed:" + filePath);
+                throw e;
+            }
+            if (fileGroup.columnSeparator == null) {
+                LOG.warn("invalid null column separator!");
+                throw new SparkDppException("Reason: invalid null column separator!");
+            }
+            Dataset<Row> dataframe = null;
+
+            dataframe = loadDataFromPath(spark, fileGroup, filePath, baseIndex, baseIndex.columns);
+            dataframe = convertSrcDataframeToDstDataframe(baseIndex, dataframe, dstTableSchema, fileGroup);
+            if (fileGroupDataframe == null) {
+                fileGroupDataframe = dataframe;
+            } else {
+                fileGroupDataframe.union(dataframe);
+            }
+        }
+        return fileGroupDataframe;
+    }
+
+    private Dataset<Row> loadDataFromHiveTable(SparkSession spark,
+                                               String hiveDbTableName,
+                                               EtlJobConfig.EtlIndex baseIndex,
+                                               EtlJobConfig.EtlFileGroup fileGroup,
+                                               StructType dstTableSchema,
+                                               Set<String> dictBitmapColumnSet,
+                                               Set<String> binaryBitmapColumnsSet) throws SparkDppException {
+        // select base index columns from hive table
+        StringBuilder sql = new StringBuilder();
+        sql.append("select ");
+        baseIndex.columns.forEach(column -> {
+            sql.append(column.columnName).append(",");
+        });
+        sql.deleteCharAt(sql.length() - 1).append(" from ").append(hiveDbTableName);
+        if (!Strings.isNullOrEmpty(fileGroup.where)) {
+            sql.append(" where ").append(fileGroup.where);
+        }
+
+        Dataset<Row> dataframe = spark.sql(sql.toString());
+        dataframe.show();
+        // Note(wb): in current spark load implementation, spark load can't be consistent with doris BE;
+        // The reason is as follows
+        // For stream load in doris BE, it runs as follow steps:
+        // step 1: type check
+        // step 2: expression calculation
+        // step 3: strict mode check
+        // step 4: nullable column check
+        // BE can do the four steps row by row
+        // but spark load relies on spark to do step2, so it can only do step 1 for whole dataset
+        // and then do step 2 for whole dataset and so on;
+        // So in spark load, we first do step 1,3,4,and then do step 2.
+        dataframe = checkDataFromHiveWithStrictMode(dataframe, baseIndex, fileGroup.columnMappings.keySet(),
+                etlJobConfig.properties.strictMode, dstTableSchema, dictBitmapColumnSet, binaryBitmapColumnsSet);
+        dataframe = convertSrcDataframeToDstDataframe(baseIndex, dataframe, dstTableSchema, fileGroup);
+        return dataframe;
+    }
+
+    private Dataset<Row> checkDataFromHiveWithStrictMode(Dataset<Row> dataframe, EtlJobConfig.EtlIndex baseIndex,
+                                                         Set<String> mappingColKeys, boolean isStrictMode,
+                                                         StructType dstTableSchema,
+                                                         Set<String> dictBitmapColumnSet,
+                                                         Set<String> binaryBitmapColumnsSet) throws SparkDppException {
+        List<EtlJobConfig.EtlColumn> columnNameNeedCheckArrayList = new ArrayList<>();
+        List<ColumnParser> columnParserArrayList = new ArrayList<>();
+        for (EtlJobConfig.EtlColumn column : baseIndex.columns) {
+            // note(wb): there are three data source for bitmap column
+            // case 1: global dict and binary data; needn't check
+            // case 2: bitmap hash function; this func is not supported in spark load now, so ignore it here
+            // case 3: origin value is a integer value; it should be checked use LongParser
+            if (StringUtils.equalsIgnoreCase(column.columnType, "bitmap")) {
+                if (dictBitmapColumnSet.contains(column.columnName.toLowerCase())) {
+                    continue;
+                }
+                if (binaryBitmapColumnsSet.contains(column.columnName.toLowerCase())) {
+                    continue;
+                }
+                columnNameNeedCheckArrayList.add(column);
+                columnParserArrayList.add(new BigIntParser());
+            } else if (!StringUtils.equalsIgnoreCase(column.columnType, "varchar")
+                    && !StringUtils.equalsIgnoreCase(column.columnType, "char")
+                    && !mappingColKeys.contains(column.columnName)) {
+                columnNameNeedCheckArrayList.add(column);
+                columnParserArrayList.add(ColumnParser.create(column));
+            }
+        }
+
+        ColumnParser[] columnParserArray = columnParserArrayList.toArray(new ColumnParser[0]);
+        EtlJobConfig.EtlColumn[] columnNameArray = columnNameNeedCheckArrayList.toArray(new EtlJobConfig.EtlColumn[0]);
+
+        StructType srcSchema = dataframe.schema();
+        JavaRDD<Row> result = dataframe.toJavaRDD().flatMap(new FlatMapFunction<Row, Row>() {
+            @Override
+            public Iterator<Row> call(Row row) throws Exception {
+                List<Row> result = new ArrayList<>();
+                Set<Integer> columnIndexNeedToRepalceNull = new HashSet<Integer>();
+                boolean validRow = true;
+                for (int i = 0; i < columnNameArray.length; i++) {
+                    EtlJobConfig.EtlColumn column = columnNameArray[i];
+                    int fieldIndex = row.fieldIndex(column.columnName);
+                    Object value = row.get(fieldIndex);
+                    if (value == null && !column.isAllowNull) {
+                        validRow = false;
+                        LOG.warn("column:" + i + " can not be null. row:" + row.toString());
+                        break;
+                    }
+                    if (value != null && !columnParserArray[i].parse(value.toString())) {
+                        if (isStrictMode) {
+                            validRow = false;
+                            LOG.warn(String.format("row parsed failed in strict mode, column name %s, src row %s",
+                                    column.columnName, row.toString()));
+                        } else if (!column.isAllowNull) {
+                            // a column parsed failed would be filled null,
+                            // but if doris column is not allowed null, we should skip this row
+                            validRow = false;
+                            LOG.warn("column:" + i + " can not be null. row:" + row.toString());
+                            break;
+                        } else {
+                            columnIndexNeedToRepalceNull.add(fieldIndex);
+                        }
+                    }
+                }
+                if (!validRow) {
+                    abnormalRowAcc.add(1);
+                    // at most add 5 rows to invalidRows
+                    if (abnormalRowAcc.value() <= 5) {
+                        invalidRows.add(row.toString());
+                    }
+                } else if (!columnIndexNeedToRepalceNull.isEmpty()) {
+                    scannedRowsAcc.add(1);
+                    Object[] newRow = new Object[row.size()];
+                    for (int i = 0; i < row.size(); i++) {
+                        if (columnIndexNeedToRepalceNull.contains(i)) {
+                            newRow[i] = null;
+                        } else {
+                            newRow[i] = row.get(i);
+                        }
+                    }
+                    result.add(RowFactory.create(newRow));
+                } else {
+                    scannedRowsAcc.add(1);
+                    result.add(row);
+                }
+                return result.iterator();
+            }
+        });
+
+        // here we just check data but not do cast,
+        // so data type should be same with src schema which is hive table schema
+        return spark.createDataFrame(result, srcSchema);
+    }
+
+    private void process() throws Exception {
+        try {
+            for (Map.Entry<Long, EtlJobConfig.EtlTable> entry : etlJobConfig.tables.entrySet()) {
+                Long tableId = entry.getKey();
+                EtlJobConfig.EtlTable etlTable = entry.getValue();
+                LOG.info("etlTable:" + etlTable);
+                Set<String> dictBitmapColumnSet = tableToBitmapDictColumns.getOrDefault(tableId, new HashSet<>());
+                Set<String> binaryBitmapColumnSet = tableToBinaryBitmapColumns.getOrDefault(tableId, new HashSet<>());
+
+                // get the base index meta
+                EtlJobConfig.EtlIndex baseIndex = null;
+                for (EtlJobConfig.EtlIndex indexMeta : etlTable.indexes) {
+                    if (indexMeta.isBaseIndex) {
+                        baseIndex = indexMeta;
+                        break;
+                    }
+                }
+
+                // get key and partition column names and value column names separately
+                List<String> keyAndPartitionColumnNames = new ArrayList<>();
+                List<String> valueColumnNames = new ArrayList<>();
+                for (EtlJobConfig.EtlColumn etlColumn : baseIndex.columns) {
+                    if (etlColumn.isKey) {
+                        keyAndPartitionColumnNames.add(etlColumn.columnName);
+                    } else {
+                        if (etlTable.partitionInfo.partitionColumnRefs.contains(etlColumn.columnName)) {
+                            keyAndPartitionColumnNames.add(etlColumn.columnName);
+                        }
+                        valueColumnNames.add(etlColumn.columnName);
+                    }
+                }
+
+                EtlJobConfig.EtlPartitionInfo partitionInfo = etlTable.partitionInfo;
+                List<Integer> partitionKeyIndex = new ArrayList<Integer>();
+                List<Class<?>> partitionKeySchema = new ArrayList<>();
+                for (String key : partitionInfo.partitionColumnRefs) {
+                    for (int i = 0; i < baseIndex.columns.size(); ++i) {
+                        EtlJobConfig.EtlColumn column = baseIndex.columns.get(i);
+                        if (column.columnName.equals(key)) {
+                            partitionKeyIndex.add(keyAndPartitionColumnNames.indexOf(key));
+                            partitionKeySchema.add(DppUtils.getClassFromColumn(column));
+                            break;
+                        }
+                    }
+                }
+                Map<String, String> columnToType = baseIndex.columns.stream().collect(
+                        Collectors.toMap(etlColumn -> etlColumn.columnName, etlColumn -> etlColumn.columnType));
+                Map<Integer, String> partitionKeyIndexToType = new HashMap<>();
+                for (int i = 0; i < partitionInfo.partitionColumnRefs.size(); i++) {
+                    String partitionColumn = partitionInfo.partitionColumnRefs.get(i);
+                    partitionKeyIndexToType.put(i, columnToType.get(partitionColumn));
+                }
+                List<DorisRangePartitioner.PartitionRangeKey> partitionRangeKeys
+                        = createPartitionRangeKeys(partitionInfo, partitionKeySchema, partitionKeyIndexToType);
+                StructType dstTableSchema = DppUtils.createDstTableSchema(baseIndex.columns, false, false);
+                dstTableSchema = DppUtils.replaceBinaryColsInSchema(binaryBitmapColumnSet, dstTableSchema);
+                RollupTreeBuilder rollupTreeParser = new MinimumCoverageRollupTreeBuilder();
+                RollupTreeNode rootNode = rollupTreeParser.build(etlTable);
+                LOG.info("Start to process rollup tree:" + rootNode);
+
+                JavaPairRDD<List<Object>, Object[]> tablePairRDD = null;
+                for (EtlJobConfig.EtlFileGroup fileGroup : etlTable.fileGroups) {
+                    List<String> filePaths = fileGroup.filePaths;
+                    Dataset<Row> fileGroupDataframe = null;
+                    EtlJobConfig.SourceType sourceType = fileGroup.sourceType;
+                    if (sourceType == EtlJobConfig.SourceType.FILE) {
+                        fileGroupDataframe = loadDataFromFilePaths(
+                                spark, baseIndex, filePaths, fileGroup, dstTableSchema);
+                    } else if (sourceType == EtlJobConfig.SourceType.HIVE) {
+                        fileGroupDataframe = loadDataFromHiveTable(spark, fileGroup.dppHiveDbTableName,
+                                baseIndex, fileGroup, dstTableSchema, dictBitmapColumnSet, binaryBitmapColumnSet);
+                    } else {
+                        throw new RuntimeException("Unknown source type: " + sourceType.name());
+                    }
+                    if (fileGroupDataframe == null) {
+                        LOG.info("no data for file file group:" + fileGroup);
+                        continue;
+                    }
+
+                    JavaPairRDD<List<Object>, Object[]> ret = fillTupleWithPartitionColumn(
+                            fileGroupDataframe,
+                            partitionInfo, partitionKeyIndex,
+                            partitionRangeKeys,
+                            keyAndPartitionColumnNames, valueColumnNames,
+                            dstTableSchema, baseIndex, fileGroup.partitions);
+                    if (tablePairRDD == null) {
+                        tablePairRDD = ret;
+                    } else {
+                        tablePairRDD.union(ret);
+                    }
+                }
+                processRollupTree(rootNode, tablePairRDD, tableId, baseIndex);
+            }
+            LOG.info("invalid rows contents:" + invalidRows.value());
+            dppResult.isSuccess = true;
+            dppResult.failedReason = "";
+        } catch (Exception exception) {
+            LOG.warn("spark dpp failed for exception:" + exception);
+            dppResult.isSuccess = false;
+            dppResult.failedReason = exception.getMessage();
+            throw exception;
+        } finally {
+            spark.stop();
+            dppResult.normalRows = scannedRowsAcc.value() - abnormalRowAcc.value();
+            dppResult.scannedRows = scannedRowsAcc.value();
+            dppResult.fileNumber = fileNumberAcc.value();
+            dppResult.fileSize = fileSizeAcc.value();
+            dppResult.abnormalRows = abnormalRowAcc.value();
+            dppResult.partialAbnormalRows = invalidRows.value();
+        }
+    }
+
+    private void writeDppResult(DppResult dppResult) throws Exception {
+        String outputPath = etlJobConfig.getOutputPath();
+        String resultFilePath = outputPath + "/" + DPP_RESULT_FILE;
+        FileSystem fs = FileSystem.get(new Path(outputPath).toUri(), serializableHadoopConf.value());
+        Path filePath = new Path(resultFilePath);
+        FSDataOutputStream outputStream = fs.create(filePath);
+        outputStream.write(JsonUtils.writeValueAsBytes(dppResult));
+        outputStream.write('\n');
+        outputStream.close();
+    }
+
+    public void doDpp() throws Exception {
+        try {
+            process();
+        } finally {
+            // write dpp result to file in outputPath
+            writeDppResult(dppResult);
+        }
+    }
+}
diff --git a/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/SparkRDDAggregator.java b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/SparkRDDAggregator.java
new file mode 100644
index 00000000..e06dc2df
--- /dev/null
+++ b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/SparkRDDAggregator.java
@@ -0,0 +1,607 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.loadv2.dpp;
+
+import org.apache.doris.common.SparkDppException;
+import org.apache.doris.common.io.BitmapValue;
+import org.apache.doris.common.io.Hll;
+import org.apache.doris.config.EtlJobConfig;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.spark.Partitioner;
+import org.apache.spark.api.java.function.Function2;
+import org.apache.spark.api.java.function.PairFunction;
+import scala.Tuple2;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.Serializable;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+
+// contains all class about spark aggregate
+
+public abstract class SparkRDDAggregator<T> implements Serializable {
+
+    T init(Object value) {
+        return (T) value;
+    }
+
+    abstract T update(T v1, T v2);
+
+    Object finalize(Object value) {
+        return value;
+    }
+
+    public static SparkRDDAggregator buildAggregator(EtlJobConfig.EtlColumn column) throws SparkDppException {
+        String aggType = StringUtils.lowerCase(column.aggregationType);
+        String columnType = StringUtils.lowerCase(column.columnType);
+        switch (aggType) {
+            case "bitmap_union":
+                return new BitmapUnionAggregator();
+            case "hll_union":
+                return new HllUnionAggregator();
+            case "max":
+                switch (columnType) {
+                    case "tinyint":
+                    case "smallint":
+                    case "int":
+                    case "bigint":
+                    case "float":
+                    case "double":
+                    case "decimalv2":
+                    case "decimal32":
+                    case "decimal64":
+                    case "decimal128":
+                    case "date":
+                    case "datetime":
+                    case "datev2":
+                    case "datetimev2":
+                        return new NumberMaxAggregator();
+                    case "char":
+                    case "varchar":
+                        return new StringMaxAggregator();
+                    case "largeint":
+                        return new LargeIntMaxAggregator();
+                    default:
+                        throw new SparkDppException(
+                                String.format("unsupported max aggregator for column type:%s", columnType));
+                }
+            case "min":
+                switch (columnType) {
+                    case "tinyint":
+                    case "smallint":
+                    case "int":
+                    case "bigint":
+                    case "float":
+                    case "double":
+                    case "decimalv2":
+                    case "decimal32":
+                    case "decimal64":
+                    case "decimal128":
+                    case "date":
+                    case "datetime":
+                    case "datev2":
+                    case "datetimev2":
+                        return new NumberMinAggregator();
+                    case "char":
+                    case "varchar":
+                        return new StringMinAggregator();
+                    case "largeint":
+                        return new LargeIntMinAggregator();
+                    default:
+                        throw new SparkDppException(
+                                String.format("unsupported min aggregator for column type:%s", columnType));
+                }
+            case "sum":
+                switch (columnType) {
+                    case "tinyint":
+                        return new ByteSumAggregator();
+                    case "smallint":
+                        return new ShortSumAggregator();
+                    case "int":
+                        return new IntSumAggregator();
+                    case "bigint":
+                        return new LongSumAggregator();
+                    case "float":
+                        return new FloatSumAggregator();
+                    case "double":
+                        return new DoubleSumAggregator();
+                    case "largeint":
+                        return new LargeIntSumAggregator();
+                    case "decimalv2":
+                    case "decimal32":
+                    case "decimal64":
+                    case "decimal128":
+                        return new BigDecimalSumAggregator();
+                    default:
+                        throw new SparkDppException(
+                                String.format("unsupported sum aggregator for column type:%s", columnType));
+                }
+            case "replace_if_not_null":
+                return new ReplaceIfNotNullAggregator();
+            case "replace":
+                return new ReplaceAggregator();
+            default:
+                throw new SparkDppException(String.format("unsupported aggregate type %s", aggType));
+        }
+    }
+
+}
+
+// just used for duplicate table, default logic is enough
+class DefaultSparkRDDAggregator extends SparkRDDAggregator {
+
+    @Override
+    Object update(Object v1, Object v2) {
+        return null;
+    }
+}
+
+// just encode value column,used for base rollup
+class EncodeBaseAggregateTableFunction implements PairFunction<Tuple2<List<Object>, Object[]>, List<Object>, Object[]> {
+
+    private SparkRDDAggregator[] valueAggregators;
+
+    public EncodeBaseAggregateTableFunction(SparkRDDAggregator[] valueAggregators) {
+        this.valueAggregators = valueAggregators;
+    }
+
+
+    @Override
+    public Tuple2<List<Object>, Object[]> call(Tuple2<List<Object>, Object[]> srcPair) throws Exception {
+        for (int i = 0; i < srcPair._2().length; i++) {
+            srcPair._2()[i] = valueAggregators[i].init(srcPair._2()[i]);
+        }
+        return srcPair;
+    }
+}
+
+// just map column from parent rollup index to child rollup index,used for child rollup
+class EncodeRollupAggregateTableFunction
+        implements PairFunction<Tuple2<List<Object>, Object[]>, List<Object>, Object[]> {
+
+    Pair<Integer[], Integer[]> columnIndexInParentRollup;
+
+    public EncodeRollupAggregateTableFunction(Pair<Integer[], Integer[]> columnIndexInParentRollup) {
+        this.columnIndexInParentRollup = columnIndexInParentRollup;
+    }
+
+    @Override
+    public Tuple2<List<Object>, Object[]> call(Tuple2<List<Object>, Object[]> parentRollupKeyValuePair)
+            throws Exception {
+        Integer[] keyColumnIndexMap = columnIndexInParentRollup.getKey();
+        Integer[] valueColumnIndexMap = columnIndexInParentRollup.getValue();
+
+        List<Object> keys = new ArrayList();
+        Object[] values = new Object[valueColumnIndexMap.length];
+
+        // deal bucket_id column
+        keys.add(parentRollupKeyValuePair._1().get(0));
+        for (int i = 0; i < keyColumnIndexMap.length; i++) {
+            keys.add(parentRollupKeyValuePair._1().get(keyColumnIndexMap[i] + 1));
+        }
+
+        for (int i = 0; i < valueColumnIndexMap.length; i++) {
+            values[i] = parentRollupKeyValuePair._2()[valueColumnIndexMap[i]];
+        }
+        return new Tuple2<>(keys, values);
+    }
+}
+
+class AggregateReduceFunction implements Function2<Object[], Object[], Object[]> {
+
+    private SparkRDDAggregator[] valueAggregators;
+
+    public AggregateReduceFunction(SparkRDDAggregator[] sparkDppAggregators) {
+        this.valueAggregators = sparkDppAggregators;
+    }
+
+    @Override
+    public Object[] call(Object[] v1, Object[] v2) throws Exception {
+        Object[] result = new Object[valueAggregators.length];
+        for (int i = 0; i < v1.length; i++) {
+            result[i] = valueAggregators[i].update(v1[i], v2[i]);
+        }
+        return result;
+    }
+}
+
+class ReplaceAggregator extends SparkRDDAggregator<Object> {
+
+    @Override
+    Object update(Object dst, Object src) {
+        return src;
+    }
+}
+
+class ReplaceIfNotNullAggregator extends SparkRDDAggregator<Object> {
+
+    @Override
+    Object update(Object dst, Object src) {
+        return src == null ? dst : src;
+    }
+}
+
+class BitmapUnionAggregator extends SparkRDDAggregator<BitmapValue> {
+    private static final Logger LOG = LogManager.getLogger(BitmapUnionAggregator.class);
+
+    @Override
+    BitmapValue init(Object value) {
+        try {
+            BitmapValue bitmapValue = new BitmapValue();
+            if (value instanceof byte[]) {
+                bitmapValue.deserialize(new DataInputStream(new ByteArrayInputStream((byte[]) value)));
+            } else if (value != null) {
+                bitmapValue.add(Long.parseLong(value.toString()));
+            }
+            return bitmapValue;
+        } catch (Exception e) {
+            throw new RuntimeException("build bitmap value failed", e);
+        }
+    }
+
+    @Override
+    BitmapValue update(BitmapValue v1, BitmapValue v2) {
+        BitmapValue newBitmapValue = new BitmapValue();
+        if (v1 != null) {
+            newBitmapValue.or(v1);
+        }
+        if (v2 != null) {
+            newBitmapValue.or(v2);
+        }
+        return newBitmapValue;
+    }
+
+    @Override
+    byte[] finalize(Object value) {
+        try {
+            ByteArrayOutputStream bos = new ByteArrayOutputStream();
+            DataOutputStream outputStream = new DataOutputStream(bos);
+            ((BitmapValue) value).serialize(outputStream);
+            return bos.toByteArray();
+        } catch (IOException ioException) {
+            LOG.warn("", ioException);
+            throw new RuntimeException(ioException);
+        }
+    }
+
+}
+
+class HllUnionAggregator extends SparkRDDAggregator<Hll> {
+    private static final Logger LOG = LogManager.getLogger(HllUnionAggregator.class);
+
+    @Override
+    Hll init(Object value) {
+        try {
+            Hll hll = new Hll();
+            if (value instanceof byte[]) {
+                hll.deserialize(new DataInputStream(new ByteArrayInputStream((byte[]) value)));
+            } else if (value != null) {
+                hll.updateWithHash(value);
+            }
+            return hll;
+        } catch (Exception e) {
+            throw new RuntimeException("build hll value failed", e);
+        }
+    }
+
+    @Override
+    Hll update(Hll v1, Hll v2) {
+        Hll newHll = new Hll();
+        if (v1 != null) {
+            newHll.merge(v1);
+        }
+        if (v2 != null) {
+            newHll.merge(v2);
+        }
+        return newHll;
+    }
+
+    @Override
+    byte[] finalize(Object value) {
+        try {
+            ByteArrayOutputStream bos = new ByteArrayOutputStream();
+            DataOutputStream outputStream = new DataOutputStream(bos);
+            ((Hll) value).serialize(outputStream);
+            return bos.toByteArray();
+        } catch (IOException ioException) {
+            LOG.warn("", ioException);
+            throw new RuntimeException(ioException);
+        }
+    }
+
+}
+
+class LargeIntMaxAggregator extends SparkRDDAggregator<BigInteger> {
+
+    BigInteger init(Object value) {
+        if (value == null) {
+            return null;
+        }
+        return new BigInteger(value.toString());
+    }
+
+    @Override
+    BigInteger update(BigInteger dst, BigInteger src) {
+        if (src == null) {
+            return dst;
+        }
+        if (dst == null) {
+            return src;
+        }
+        return dst.compareTo(src) > 0 ? dst : src;
+    }
+
+    @Override
+    String finalize(Object value) {
+        BigInteger bigInteger = (BigInteger) value;
+        return bigInteger.toString();
+    }
+}
+
+class LargeIntMinAggregator extends LargeIntMaxAggregator {
+
+    @Override
+    BigInteger update(BigInteger dst, BigInteger src) {
+        if (src == null) {
+            return dst;
+        }
+        if (dst == null) {
+            return src;
+        }
+        return dst.compareTo(src) < 0 ? dst : src;
+    }
+}
+
+class LargeIntSumAggregator extends LargeIntMaxAggregator {
+
+    @Override
+    BigInteger update(BigInteger dst, BigInteger src) {
+        if (src == null) {
+            return dst;
+        }
+        if (dst == null) {
+            return src;
+        }
+        return dst.add(src);
+    }
+}
+
+
+class NumberMaxAggregator extends SparkRDDAggregator {
+
+    @Override
+    Object update(Object dst, Object src) {
+        if (src == null) {
+            return dst;
+        }
+        if (dst == null) {
+            return src;
+        }
+        return ((Comparable) dst).compareTo(src) > 0 ? dst : src;
+    }
+}
+
+
+class NumberMinAggregator extends SparkRDDAggregator {
+
+    @Override
+    Object update(Object dst, Object src) {
+        if (src == null) {
+            return dst;
+        }
+        if (dst == null) {
+            return src;
+        }
+        return ((Comparable) dst).compareTo(src) < 0 ? dst : src;
+    }
+}
+
+class LongSumAggregator extends SparkRDDAggregator<Long> {
+
+    @Override
+    Long update(Long dst, Long src) {
+        if (src == null) {
+            return dst;
+        }
+        if (dst == null) {
+            return src;
+        }
+        return dst + src;
+    }
+}
+
+class ShortSumAggregator extends SparkRDDAggregator<Short> {
+
+    @Override
+    Short update(Short dst, Short src) {
+        if (src == null) {
+            return dst;
+        }
+        if (dst == null) {
+            return src;
+        }
+        int ret = dst + src;
+        // here may overflow, just keep the same logic with be
+        return (short) ret;
+    }
+}
+
+class IntSumAggregator extends SparkRDDAggregator<Integer> {
+
+    @Override
+    Integer update(Integer dst, Integer src) {
+        if (src == null) {
+            return dst;
+        }
+        if (dst == null) {
+            return src;
+        }
+        long ret = Long.sum(dst, src);
+        // here may overflow, just keep the same logic with be
+        return (int) ret;
+    }
+}
+
+class ByteSumAggregator extends SparkRDDAggregator<Byte> {
+
+    @Override
+    Byte update(Byte dst, Byte src) {
+        if (src == null) {
+            return dst;
+        }
+        if (dst == null) {
+            return src;
+        }
+        int ret = dst + src;
+        // here may overflow, just keep the same logic with be
+        return (byte) ret;
+    }
+}
+
+class DoubleSumAggregator extends SparkRDDAggregator<Double> {
+
+    @Override
+    strictfp Double update(Double dst, Double src) {
+        if (src == null) {
+            return dst;
+        }
+        if (dst == null) {
+            return src;
+        }
+        return dst + src;
+    }
+}
+
+class FloatSumAggregator extends SparkRDDAggregator<Float> {
+
+    @Override
+    strictfp Float update(Float dst, Float src) {
+        if (src == null) {
+            return dst;
+        }
+        if (dst == null) {
+            return src;
+        }
+        return dst + src;
+    }
+}
+
+class StringMaxAggregator extends SparkRDDAggregator<String> {
+
+    @Override
+    String update(String dst, String src) {
+        if (src == null) {
+            return dst;
+        }
+        if (dst == null) {
+            return src;
+        }
+        return dst.compareTo(src) > 0 ? dst : src;
+    }
+}
+
+class StringMinAggregator extends SparkRDDAggregator<String> {
+
+    @Override
+    String update(String dst, String src) {
+        if (src == null) {
+            return dst;
+        }
+        if (dst == null) {
+            return src;
+        }
+        return dst.compareTo(src) < 0 ? dst : src;
+    }
+}
+
+class BigDecimalSumAggregator extends SparkRDDAggregator<BigDecimal> {
+
+
+    @Override
+    BigDecimal update(BigDecimal src, BigDecimal dst) {
+        if (src == null) {
+            return dst;
+        }
+        if (dst == null) {
+            return src;
+        }
+        return src.add(dst);
+    }
+}
+
+
+class BucketComparator implements Comparator<List<Object>>, Serializable {
+
+    @Override
+    public int compare(List<Object> keyArray1, List<Object> keyArray2) {
+        int cmp = 0;
+
+        for (int i = 0; i < keyArray1.size(); i++) {
+            Object key1 = keyArray1.get(i);
+            Object key2 = keyArray2.get(i);
+            if (key1 == key2) {
+                continue;
+            }
+            if (key1 == null || key2 == null) {
+                return key1 == null ? -1 : 1;
+            }
+            if (key1 instanceof Comparable && key2 instanceof Comparable) {
+                cmp = ((Comparable) key1).compareTo(key2);
+            } else {
+                throw new RuntimeException(String.format("uncomparable column type %s", key1.getClass().toString()));
+            }
+            if (cmp != 0) {
+                return cmp;
+            }
+        }
+
+        return cmp;
+    }
+}
+
+class BucketPartitioner extends Partitioner {
+
+    private Map<String, Integer> bucketKeyMap;
+
+    public BucketPartitioner(Map<String, Integer> bucketKeyMap) {
+        this.bucketKeyMap = bucketKeyMap;
+    }
+
+    @Override
+    public int numPartitions() {
+        return bucketKeyMap.size();
+    }
+
+    @Override
+    public int getPartition(Object key) {
+        List<Object> rddKey = (List<Object>) key;
+        return bucketKeyMap.get(String.valueOf(rddKey.get(0)));
+    }
+}
diff --git a/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/StringAccumulator.java b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/StringAccumulator.java
new file mode 100644
index 00000000..428a9d42
--- /dev/null
+++ b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/dpp/StringAccumulator.java
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.loadv2.dpp;
+
+import org.apache.spark.util.AccumulatorV2;
+
+import java.util.ArrayList;
+import java.util.List;
+
+// This class is a accumulator of string based on AccumulatorV2
+// (https://spark.apache.org/docs/latest/api/java/org/apache/spark/util/AccumulatorV2.html).
+// Spark does not provide string accumulator.
+//
+// This class is used to collect the invalid rows when doing etl.
+public class StringAccumulator extends AccumulatorV2<String, String> {
+    private List<String> strs = new ArrayList<>();
+
+    @Override
+    public boolean isZero() {
+        return strs.isEmpty();
+    }
+
+    @Override
+    public AccumulatorV2<String, String> copy() {
+        StringAccumulator newAccumulator = new StringAccumulator();
+        newAccumulator.strs.addAll(this.strs);
+        return newAccumulator;
+    }
+
+    @Override
+    public void reset() {
+        strs.clear();
+    }
+
+    @Override
+    public void add(String v) {
+        strs.add(v);
+    }
+
+    @Override
+    public void merge(AccumulatorV2<String, String> other) {
+        StringAccumulator o = (StringAccumulator) other;
+        strs.addAll(o.strs);
+    }
+
+    @Override
+    public String value() {
+        return strs.toString();
+    }
+}
diff --git a/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/etl/SparkEtlJob.java b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/etl/SparkEtlJob.java
new file mode 100644
index 00000000..03300014
--- /dev/null
+++ b/spark-load/spark-load-dpp/src/main/java/org/apache/doris/load/loadv2/etl/SparkEtlJob.java
@@ -0,0 +1,288 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.loadv2.etl;
+
+import org.apache.doris.common.SparkDppException;
+import org.apache.doris.config.EtlJobConfig;
+import org.apache.doris.config.EtlJobConfig.EtlColumn;
+import org.apache.doris.config.EtlJobConfig.EtlColumnMapping;
+import org.apache.doris.config.EtlJobConfig.EtlFileGroup;
+import org.apache.doris.config.EtlJobConfig.EtlIndex;
+import org.apache.doris.config.EtlJobConfig.EtlTable;
+import org.apache.doris.load.loadv2.dpp.GlobalDictBuilder;
+import org.apache.doris.load.loadv2.dpp.SparkDpp;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import com.google.common.io.CharStreams;
+import org.apache.commons.collections.map.MultiValueMap;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.spark.SparkConf;
+import org.apache.spark.deploy.SparkHadoopUtil;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.functions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * SparkEtlJob is responsible for global dict building, data partition, data sort and data aggregation.
+ * 1. init job config
+ * 2. check if job has bitmap_dict function columns
+ * 3. build global dict if step 2 is true
+ * 4. dpp (data partition, data sort and data aggregation)
+ */
+public class SparkEtlJob {
+    private static final Logger LOG = LoggerFactory.getLogger(SparkEtlJob.class);
+
+    private static final String BITMAP_DICT_FUNC = "bitmap_dict";
+    private static final String TO_BITMAP_FUNC = "to_bitmap";
+    private static final String BITMAP_HASH = "bitmap_hash";
+    private static final String BINARY_BITMAP = "binary_bitmap";
+
+    private String jobConfigFilePath;
+    private EtlJobConfig etlJobConfig;
+    private Set<Long> hiveSourceTables;
+    private Map<Long, Set<String>> tableToBitmapDictColumns;
+    private Map<Long, Set<String>> tableToBinaryBitmapColumns;
+    private final SparkConf conf;
+    private SparkSession spark;
+
+    private SparkEtlJob(String jobConfigFilePath) {
+        this.jobConfigFilePath = jobConfigFilePath;
+        this.etlJobConfig = null;
+        this.hiveSourceTables = Sets.newHashSet();
+        this.tableToBitmapDictColumns = Maps.newHashMap();
+        this.tableToBinaryBitmapColumns = Maps.newHashMap();
+        conf = new SparkConf();
+    }
+
+    private void initSpark() {
+        //serialization conf
+        conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
+        conf.set("spark.kryo.registrator", "org.apache.doris.load.loadv2.dpp.DorisKryoRegistrator");
+        conf.set("spark.kryo.registrationRequired", "false");
+        spark = SparkSession.builder().enableHiveSupport().config(conf).getOrCreate();
+    }
+
+    private void initSparkConfigs(Map<String, String> configs) {
+        if (configs == null) {
+            return;
+        }
+        for (Map.Entry<String, String> entry : configs.entrySet()) {
+            conf.set(entry.getKey(), entry.getValue());
+            conf.set("spark.hadoop." + entry.getKey(), entry.getValue());
+        }
+    }
+
+    private void initConfig() throws IOException {
+        if (LOG.isDebugEnabled()) {
+            LOG.debug("job config file path: " + jobConfigFilePath);
+        }
+        Configuration hadoopConf = SparkHadoopUtil.get().newConfiguration(this.conf);
+        String jsonConfig;
+        Path path = new Path(jobConfigFilePath);
+        try (FileSystem fs = path.getFileSystem(hadoopConf); DataInputStream in = fs.open(path)) {
+            jsonConfig = CharStreams.toString(new InputStreamReader(in));
+        }
+        if (LOG.isDebugEnabled()) {
+            LOG.debug("rdd read json config: " + jsonConfig);
+        }
+        etlJobConfig = EtlJobConfig.configFromJson(jsonConfig);
+        if (LOG.isDebugEnabled()) {
+            LOG.debug("etl job config: " + etlJobConfig);
+        }
+    }
+
+    /*
+     * 1. check bitmap column
+     * 2. fill tableToBitmapDictColumns
+     * 3. remove bitmap_dict and to_bitmap mapping from columnMappings
+     */
+    private void checkConfig() throws Exception {
+        for (Map.Entry<Long, EtlTable> entry : etlJobConfig.tables.entrySet()) {
+            boolean isHiveSource = false;
+            Set<String> bitmapDictColumns = Sets.newHashSet();
+            Set<String> binaryBitmapColumns = Sets.newHashSet();
+
+            for (EtlFileGroup fileGroup : entry.getValue().fileGroups) {
+                if (fileGroup.sourceType == EtlJobConfig.SourceType.HIVE) {
+                    isHiveSource = true;
+                }
+                Map<String, EtlColumnMapping> newColumnMappings = Maps.newHashMap();
+                for (Map.Entry<String, EtlColumnMapping> mappingEntry : fileGroup.columnMappings.entrySet()) {
+                    String columnName = mappingEntry.getKey();
+                    String exprStr = mappingEntry.getValue().toDescription();
+                    String funcName = functions.expr(exprStr).expr().prettyName();
+                    if (funcName.equalsIgnoreCase(BITMAP_HASH)) {
+                        throw new SparkDppException("spark load not support bitmap_hash now");
+                    }
+                    if (funcName.equalsIgnoreCase(BINARY_BITMAP)) {
+                        binaryBitmapColumns.add(columnName.toLowerCase());
+                    } else if (funcName.equalsIgnoreCase(BITMAP_DICT_FUNC)) {
+                        bitmapDictColumns.add(columnName.toLowerCase());
+                    } else if (!funcName.equalsIgnoreCase(TO_BITMAP_FUNC)) {
+                        newColumnMappings.put(mappingEntry.getKey(), mappingEntry.getValue());
+                    }
+                }
+                // reset new columnMappings
+                fileGroup.columnMappings = newColumnMappings;
+            }
+            if (isHiveSource) {
+                hiveSourceTables.add(entry.getKey());
+            }
+            if (!bitmapDictColumns.isEmpty()) {
+                tableToBitmapDictColumns.put(entry.getKey(), bitmapDictColumns);
+            }
+            if (!binaryBitmapColumns.isEmpty()) {
+                tableToBinaryBitmapColumns.put(entry.getKey(), binaryBitmapColumns);
+            }
+        }
+        LOG.info("init hiveSourceTables: " + hiveSourceTables
+                + ",tableToBitmapDictColumns: " + tableToBitmapDictColumns);
+
+        // spark etl must have only one table with bitmap type column to process.
+        if (hiveSourceTables.size() > 1
+                || tableToBitmapDictColumns.size() > 1
+                || tableToBinaryBitmapColumns.size() > 1) {
+            throw new Exception("spark etl job must have only one hive table with bitmap type column to process");
+        }
+    }
+
+    private void processDpp() throws Exception {
+        SparkDpp sparkDpp = new SparkDpp(spark, etlJobConfig, tableToBitmapDictColumns, tableToBinaryBitmapColumns);
+        sparkDpp.init();
+        sparkDpp.doDpp();
+    }
+
+    private String buildGlobalDictAndEncodeSourceTable(EtlTable table, long tableId) {
+        // dict column map
+        MultiValueMap dictColumnMap = new MultiValueMap();
+        for (String dictColumn : tableToBitmapDictColumns.get(tableId)) {
+            dictColumnMap.put(dictColumn, null);
+        }
+
+        // doris schema
+        List<String> dorisOlapTableColumnList = Lists.newArrayList();
+        for (EtlIndex etlIndex : table.indexes) {
+            if (etlIndex.isBaseIndex) {
+                for (EtlColumn column : etlIndex.columns) {
+                    dorisOlapTableColumnList.add(column.columnName);
+                }
+            }
+        }
+
+        // hive db and tables
+        EtlFileGroup fileGroup = table.fileGroups.get(0);
+        String sourceHiveDBTableName = fileGroup.hiveDbTableName;
+        String dorisHiveDB = sourceHiveDBTableName.split("\\.")[0];
+        String taskId = etlJobConfig.outputPath.substring(etlJobConfig.outputPath.lastIndexOf("/") + 1);
+        String globalDictTableName = String.format(EtlJobConfig.GLOBAL_DICT_TABLE_NAME, tableId);
+        String distinctKeyTableName = String.format(EtlJobConfig.DISTINCT_KEY_TABLE_NAME, tableId, taskId);
+        String dorisIntermediateHiveTable = String.format(
+                EtlJobConfig.DORIS_INTERMEDIATE_HIVE_TABLE_NAME, tableId, taskId);
+        String sourceHiveFilter = fileGroup.where;
+
+        // others
+        List<String> mapSideJoinColumns = Lists.newArrayList();
+        int buildConcurrency = 1;
+        List<String> veryHighCardinalityColumn = Lists.newArrayList();
+        int veryHighCardinalityColumnSplitNum = 1;
+
+        LOG.info("global dict builder args, dictColumnMap: " + dictColumnMap
+                         + ", dorisOlapTableColumnList: " + dorisOlapTableColumnList
+                         + ", sourceHiveDBTableName: " + sourceHiveDBTableName
+                         + ", sourceHiveFilter: " + sourceHiveFilter
+                         + ", distinctKeyTableName: " + distinctKeyTableName
+                         + ", globalDictTableName: " + globalDictTableName
+                         + ", dorisIntermediateHiveTable: " + dorisIntermediateHiveTable);
+        try {
+            GlobalDictBuilder globalDictBuilder = new GlobalDictBuilder(dictColumnMap, dorisOlapTableColumnList,
+                    mapSideJoinColumns, sourceHiveDBTableName, sourceHiveFilter, dorisHiveDB, distinctKeyTableName,
+                    globalDictTableName, dorisIntermediateHiveTable, buildConcurrency, veryHighCardinalityColumn,
+                    veryHighCardinalityColumnSplitNum, spark);
+            globalDictBuilder.createHiveIntermediateTable();
+            globalDictBuilder.extractDistinctColumn();
+            globalDictBuilder.buildGlobalDict();
+            globalDictBuilder.encodeDorisIntermediateHiveTable();
+        } catch (Exception e) {
+            throw new RuntimeException(e);
+        }
+
+        return String.format("%s.%s", dorisHiveDB, dorisIntermediateHiveTable);
+    }
+
+    private void processData() throws Exception {
+        if (!hiveSourceTables.isEmpty()) {
+            // only one table
+            long tableId = -1;
+            EtlTable table = null;
+            for (Map.Entry<Long, EtlTable> entry : etlJobConfig.tables.entrySet()) {
+                tableId = entry.getKey();
+                table = entry.getValue();
+                break;
+            }
+
+            // init hive configs like metastore service
+            EtlFileGroup fileGroup = table.fileGroups.get(0);
+            initSparkConfigs(fileGroup.hiveTableProperties);
+            fileGroup.dppHiveDbTableName = fileGroup.hiveDbTableName;
+
+            // build global dict and encode source hive table if has bitmap dict columns
+            if (!tableToBitmapDictColumns.isEmpty() && tableToBitmapDictColumns.containsKey(tableId)) {
+                String dorisIntermediateHiveDbTableName = buildGlobalDictAndEncodeSourceTable(table, tableId);
+                // set with dorisIntermediateHiveDbTable
+                fileGroup.dppHiveDbTableName = dorisIntermediateHiveDbTableName;
+            }
+        }
+
+        initSpark();
+        // data partition sort and aggregation
+        processDpp();
+    }
+
+    private void run() throws Exception {
+        initConfig();
+        checkConfig();
+        processData();
+    }
+
+    public static void main(String[] args) {
+        if (args.length < 1) {
+            System.err.println("missing job config file path arg");
+            System.exit(-1);
+        }
+
+        try {
+            new SparkEtlJob(args[0]).run();
+        } catch (Exception e) {
+            System.err.println("spark etl job run failed");
+            LOG.warn("", e);
+            System.exit(-1);
+        }
+    }
+}
diff --git a/spark-load/spark-load-dpp/src/test/java/org/apache/doris/load/loadv2/dpp/ColumnParserTest.java b/spark-load/spark-load-dpp/src/test/java/org/apache/doris/load/loadv2/dpp/ColumnParserTest.java
new file mode 100644
index 00000000..9c219a14
--- /dev/null
+++ b/spark-load/spark-load-dpp/src/test/java/org/apache/doris/load/loadv2/dpp/ColumnParserTest.java
@@ -0,0 +1,135 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.loadv2.dpp;
+
+import org.apache.doris.config.EtlJobConfig;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+public class ColumnParserTest {
+
+    // TODO(wb) try to keep ut consistent with be's ut
+    @Test
+    public void testBoundCheck() {
+        // tinyint
+        TinyIntParser tinyIntParser = new TinyIntParser();
+        // 1 normal
+        String tinyint = "100";
+        Assertions.assertTrue(tinyIntParser.parse(tinyint));
+        // 2 upper
+        String tinyintUpper = "128";
+        Assertions.assertFalse(tinyIntParser.parse(tinyintUpper));
+        // 3 lower
+        String tinyintLower = "-129";
+        Assertions.assertFalse(tinyIntParser.parse(tinyintLower));
+
+        // smallint
+        SmallIntParser smallIntParser = new SmallIntParser();
+        // 1 normal
+        String smallint = "100";
+        Assertions.assertTrue(smallIntParser.parse(smallint));
+        // 2 upper
+        String smallintUpper = "32768";
+        Assertions.assertFalse(smallIntParser.parse(smallintUpper));
+        // 3 lower
+        String smallintLower = "-32769";
+        Assertions.assertFalse(smallIntParser.parse(smallintLower));
+
+        // int
+        IntParser intParser = new IntParser();
+        // 1 normal
+        String intValue = "100";
+        Assertions.assertTrue(intParser.parse(intValue));
+        // 2 upper
+        String intUpper = "2147483648";
+        Assertions.assertFalse(intParser.parse(intUpper));
+        // 3 lower
+        String intLower = "-2147483649";
+        Assertions.assertFalse(intParser.parse(intLower));
+
+        // bigint
+        BigIntParser bigIntParser = new BigIntParser();
+        // 1 normal
+        String bigint = "100";
+        Assertions.assertTrue(bigIntParser.parse(bigint));
+        // 2 upper
+        String bigintUpper = "9223372036854775808";
+        Assertions.assertFalse(bigIntParser.parse(bigintUpper));
+        // 3 lower
+        String bigintLower = "-9223372036854775809";
+        Assertions.assertFalse(bigIntParser.parse(bigintLower));
+
+        // largeint
+        LargeIntParser largeIntParser = new LargeIntParser();
+        // 1 normal
+        String largeint = "100";
+        Assertions.assertTrue(largeIntParser.parse(largeint));
+        // 2 upper
+        String largeintUpper = "170141183460469231731687303715884105728";
+        Assertions.assertFalse(largeIntParser.parse(largeintUpper));
+        // 3 lower
+        String largeintLower = "-170141183460469231731687303715884105729";
+        Assertions.assertFalse(largeIntParser.parse(largeintLower));
+
+        // float
+        FloatParser floatParser = new FloatParser();
+        // normal
+        String floatValue = "1.1";
+        Assertions.assertTrue(floatParser.parse(floatValue));
+        // inf
+        String inf = "Infinity";
+        Assertions.assertFalse(floatParser.parse(inf));
+        // nan
+        String nan = "NaN";
+        // failed
+        Assertions.assertFalse(floatParser.parse(nan));
+
+        // double
+        DoubleParser doubleParser = new DoubleParser();
+        // normal
+        Assertions.assertTrue(doubleParser.parse(floatValue));
+        // inf
+        Assertions.assertFalse(doubleParser.parse(inf));
+        // nan
+        Assertions.assertFalse(doubleParser.parse(nan));
+
+        // decimal
+        EtlJobConfig.EtlColumn etlColumn = new EtlJobConfig.EtlColumn();
+        etlColumn.precision = 5;
+        etlColumn.scale = 3;
+        DecimalParser decimalParser = new DecimalParser(etlColumn);
+        // normal
+        String decimalValue = "10.333";
+        Assertions.assertTrue(decimalParser.parse(decimalValue));
+        // overflow
+        String decimalOverflow = "1000.3333333333";
+        Assertions.assertFalse(decimalParser.parse(decimalOverflow));
+
+        // string
+        EtlJobConfig.EtlColumn stringColumn = new EtlJobConfig.EtlColumn();
+        stringColumn.stringLength = 3;
+        StringParser stringParser = new StringParser(stringColumn);
+        // normal
+        String stringnormal = "a";
+        Assertions.assertTrue(stringParser.parse(stringnormal));
+        // overflow
+        String stringoverflow = "中文";
+        Assertions.assertFalse(stringParser.parse(stringoverflow));
+    }
+
+}
diff --git a/spark-load/spark-load-dpp/src/test/java/org/apache/doris/load/loadv2/dpp/DorisRangePartitionerTest.java b/spark-load/spark-load-dpp/src/test/java/org/apache/doris/load/loadv2/dpp/DorisRangePartitionerTest.java
new file mode 100644
index 00000000..28eba87f
--- /dev/null
+++ b/spark-load/spark-load-dpp/src/test/java/org/apache/doris/load/loadv2/dpp/DorisRangePartitionerTest.java
@@ -0,0 +1,135 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.loadv2.dpp;
+
+import org.apache.doris.config.EtlJobConfig;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class DorisRangePartitionerTest {
+
+    @Test
+    public void testRangePartitioner() {
+        List<Object> startKeys = new ArrayList<>();
+        startKeys.add(new Integer(0));
+        List<Object> endKeys = new ArrayList<>();
+        endKeys.add(new Integer(100));
+        EtlJobConfig.EtlPartition partition1 = new EtlJobConfig.EtlPartition(
+                10000, startKeys, endKeys, false, 3);
+
+        List<Object> startKeys2 = new ArrayList<>();
+        startKeys2.add(new Integer(100));
+        List<Object> endKeys2 = new ArrayList<>();
+        endKeys2.add(new Integer(200));
+        EtlJobConfig.EtlPartition partition2 = new EtlJobConfig.EtlPartition(
+                10001, startKeys2, endKeys2, false, 4);
+
+        List<Object> startKeys3 = new ArrayList<>();
+        startKeys3.add(new Integer(200));
+        List<Object> endKeys3 = new ArrayList<>();
+        endKeys3.add(new Integer(300));
+        EtlJobConfig.EtlPartition partition3 = new EtlJobConfig.EtlPartition(
+                10002, startKeys3, endKeys3, false, 5);
+
+        List<EtlJobConfig.EtlPartition> partitions = new ArrayList<>();
+        partitions.add(partition1);
+        partitions.add(partition2);
+        partitions.add(partition3);
+
+        List<String> partitionColumns = new ArrayList<>();
+        partitionColumns.add("id");
+        List<String> bucketColumns = new ArrayList<>();
+        bucketColumns.add("key");
+        EtlJobConfig.EtlPartitionInfo partitionInfo = new EtlJobConfig.EtlPartitionInfo(
+                "RANGE", partitionColumns, bucketColumns, partitions);
+        List<DorisRangePartitioner.PartitionRangeKey> partitionRangeKeys = new ArrayList<>();
+        for (EtlJobConfig.EtlPartition partition : partitions) {
+            DorisRangePartitioner.PartitionRangeKey partitionRangeKey = new DorisRangePartitioner.PartitionRangeKey();
+            partitionRangeKey.isMaxPartition = false;
+            partitionRangeKey.startKeys = new DppColumns(partition.startKeys);
+            partitionRangeKey.endKeys = new DppColumns(partition.endKeys);
+            partitionRangeKeys.add(partitionRangeKey);
+        }
+        List<Integer> partitionKeyIndexes = new ArrayList<>();
+        partitionKeyIndexes.add(0);
+        DorisRangePartitioner rangePartitioner = new DorisRangePartitioner(partitionInfo, partitionKeyIndexes, partitionRangeKeys);
+        int num = rangePartitioner.numPartitions();
+        Assertions.assertEquals(3, num);
+
+        List<Object> fields1 = new ArrayList<>();
+        fields1.add(-100);
+        fields1.add("name");
+        DppColumns record1 = new DppColumns(fields1);
+        int id1 = rangePartitioner.getPartition(record1);
+        Assertions.assertEquals(-1, id1);
+
+        List<Object> fields2 = new ArrayList<>();
+        fields2.add(10);
+        fields2.add("name");
+        DppColumns record2 = new DppColumns(fields2);
+        int id2 = rangePartitioner.getPartition(record2);
+        Assertions.assertEquals(0, id2);
+
+        List<Object> fields3 = new ArrayList<>();
+        fields3.add(110);
+        fields3.add("name");
+        DppColumns record3 = new DppColumns(fields3);
+        int id3 = rangePartitioner.getPartition(record3);
+        Assertions.assertEquals(1, id3);
+
+        List<Object> fields4 = new ArrayList<>();
+        fields4.add(210);
+        fields4.add("name");
+        DppColumns record4 = new DppColumns(fields4);
+        int id4 = rangePartitioner.getPartition(record4);
+        Assertions.assertEquals(2, id4);
+
+        List<Object> fields5 = new ArrayList<>();
+        fields5.add(310);
+        fields5.add("name");
+        DppColumns record5 = new DppColumns(fields5);
+        int id5 = rangePartitioner.getPartition(record5);
+        Assertions.assertEquals(-1, id5);
+    }
+
+    @Test
+    public void testUnpartitionedPartitioner() {
+        List<String> bucketColumns = new ArrayList<>();
+        bucketColumns.add("key");
+        EtlJobConfig.EtlPartitionInfo partitionInfo = new EtlJobConfig.EtlPartitionInfo(
+                "UNPARTITIONED", null, bucketColumns, null);
+        List<Class> partitionSchema = new ArrayList<>();
+        partitionSchema.add(Integer.class);
+        List<Integer> partitionKeyIndexes = new ArrayList<>();
+        partitionKeyIndexes.add(0);
+        DorisRangePartitioner rangePartitioner = new DorisRangePartitioner(partitionInfo, partitionKeyIndexes, null);
+        int num = rangePartitioner.numPartitions();
+        Assertions.assertEquals(1, num);
+
+        List<Object> fields = new ArrayList<>();
+        fields.add(100);
+        fields.add("name");
+        DppColumns record = new DppColumns(fields);
+        int id = rangePartitioner.getPartition(record);
+        Assertions.assertEquals(0, id);
+    }
+}
diff --git a/spark-load/spark-load-dpp/src/test/java/org/apache/doris/load/loadv2/dpp/DppUtilsTest.java b/spark-load/spark-load-dpp/src/test/java/org/apache/doris/load/loadv2/dpp/DppUtilsTest.java
new file mode 100644
index 00000000..4b47e14d
--- /dev/null
+++ b/spark-load/spark-load-dpp/src/test/java/org/apache/doris/load/loadv2/dpp/DppUtilsTest.java
@@ -0,0 +1,239 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.loadv2.dpp;
+
+import org.apache.doris.config.EtlJobConfig;
+
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.StructType;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+
+public class DppUtilsTest {
+
+    @Test
+    public void testGetClassFromDataType() {
+        DppUtils dppUtils = new DppUtils();
+
+        Class stringResult = dppUtils.getClassFromDataType(DataTypes.StringType);
+        Assertions.assertEquals(String.class, stringResult);
+
+        Class booleanResult = dppUtils.getClassFromDataType(DataTypes.BooleanType);
+        Assertions.assertEquals(Boolean.class, booleanResult);
+
+        Class shortResult = dppUtils.getClassFromDataType(DataTypes.ShortType);
+        Assertions.assertEquals(Short.class, shortResult);
+
+        Class integerResult = dppUtils.getClassFromDataType(DataTypes.IntegerType);
+        Assertions.assertEquals(Integer.class, integerResult);
+
+        Class longResult = dppUtils.getClassFromDataType(DataTypes.LongType);
+        Assertions.assertEquals(Long.class, longResult);
+
+        Class floatResult = dppUtils.getClassFromDataType(DataTypes.FloatType);
+        Assertions.assertEquals(Float.class, floatResult);
+
+        Class doubleResult = dppUtils.getClassFromDataType(DataTypes.DoubleType);
+        Assertions.assertEquals(Double.class, doubleResult);
+
+        Class dateResult = dppUtils.getClassFromDataType(DataTypes.DateType);
+        Assertions.assertEquals(Date.class, dateResult);
+    }
+
+    @Test
+    public void testGetClassFromColumn() {
+        DppUtils dppUtils = new DppUtils();
+
+        try {
+            EtlJobConfig.EtlColumn column = new EtlJobConfig.EtlColumn();
+            column.columnType = "CHAR";
+            Class charResult = dppUtils.getClassFromColumn(column);
+            Assertions.assertEquals(String.class, charResult);
+
+            column.columnType = "HLL";
+            Class hllResult = dppUtils.getClassFromColumn(column);
+            Assertions.assertEquals(String.class, hllResult);
+
+            column.columnType = "OBJECT";
+            Class objectResult = dppUtils.getClassFromColumn(column);
+            Assertions.assertEquals(String.class, objectResult);
+
+            column.columnType = "BOOLEAN";
+            Class booleanResult = dppUtils.getClassFromColumn(column);
+            Assertions.assertEquals(Boolean.class, booleanResult);
+
+            column.columnType = "TINYINT";
+            Class tinyResult = dppUtils.getClassFromColumn(column);
+            Assertions.assertEquals(Short.class, tinyResult);
+
+            column.columnType = "SMALLINT";
+            Class smallResult = dppUtils.getClassFromColumn(column);
+            Assertions.assertEquals(Short.class, smallResult);
+
+            column.columnType = "INT";
+            Class integerResult = dppUtils.getClassFromColumn(column);
+            Assertions.assertEquals(Integer.class, integerResult);
+
+            column.columnType = "DATETIME";
+            Class datetimeResult = dppUtils.getClassFromColumn(column);
+            Assertions.assertEquals(java.sql.Timestamp.class, datetimeResult);
+
+            column.columnType = "FLOAT";
+            Class floatResult = dppUtils.getClassFromColumn(column);
+            Assertions.assertEquals(Float.class, floatResult);
+
+            column.columnType = "DOUBLE";
+            Class doubleResult = dppUtils.getClassFromColumn(column);
+            Assertions.assertEquals(Double.class, doubleResult);
+
+            column.columnType = "DATE";
+            Class dateResult = dppUtils.getClassFromColumn(column);
+            Assertions.assertEquals(Date.class, dateResult);
+
+            column.columnType = "DECIMALV2";
+            column.precision = 10;
+            column.scale = 2;
+            Class decimalResult = dppUtils.getClassFromColumn(column);
+            Assertions.assertEquals(BigDecimal.valueOf(10, 2).getClass(), decimalResult);
+        } catch (Exception e) {
+            Assertions.assertFalse(false);
+        }
+
+    }
+
+    @Test
+    public void testGetDataTypeFromColumn() {
+        DppUtils dppUtils = new DppUtils();
+
+        try {
+            EtlJobConfig.EtlColumn column = new EtlJobConfig.EtlColumn();
+            column.columnType = "VARCHAR";
+            DataType stringResult = dppUtils.getDataTypeFromColumn(column, false);
+            Assertions.assertEquals(DataTypes.StringType, stringResult);
+
+            column.columnType = "CHAR";
+            DataType charResult = dppUtils.getDataTypeFromColumn(column, false);
+            Assertions.assertEquals(DataTypes.StringType, charResult);
+
+            column.columnType = "HLL";
+            DataType hllResult = dppUtils.getDataTypeFromColumn(column, false);
+            Assertions.assertEquals(DataTypes.StringType, hllResult);
+
+            column.columnType = "OBJECT";
+            DataType objectResult = dppUtils.getDataTypeFromColumn(column, false);
+            Assertions.assertEquals(DataTypes.StringType, objectResult);
+
+            column.columnType = "BOOLEAN";
+            DataType booleanResult = dppUtils.getDataTypeFromColumn(column, false);
+            Assertions.assertEquals(DataTypes.StringType, booleanResult);
+
+            column.columnType = "TINYINT";
+            DataType tinyResult = dppUtils.getDataTypeFromColumn(column, false);
+            Assertions.assertEquals(DataTypes.ByteType, tinyResult);
+
+            column.columnType = "SMALLINT";
+            DataType smallResult = dppUtils.getDataTypeFromColumn(column, false);
+            Assertions.assertEquals(DataTypes.ShortType, smallResult);
+
+            column.columnType = "INT";
+            DataType integerResult = dppUtils.getDataTypeFromColumn(column, false);
+            Assertions.assertEquals(DataTypes.IntegerType, integerResult);
+
+            column.columnType = "BIGINT";
+            DataType longResult = dppUtils.getDataTypeFromColumn(column, false);
+            Assertions.assertEquals(DataTypes.LongType, longResult);
+
+            column.columnType = "DATETIME";
+            DataType datetimeResult = dppUtils.getDataTypeFromColumn(column, false);
+            Assertions.assertEquals(DataTypes.TimestampType, datetimeResult);
+
+            column.columnType = "FLOAT";
+            DataType floatResult = dppUtils.getDataTypeFromColumn(column, false);
+            Assertions.assertEquals(DataTypes.FloatType, floatResult);
+
+            column.columnType = "DOUBLE";
+            DataType doubleResult = dppUtils.getDataTypeFromColumn(column, false);
+            Assertions.assertEquals(DataTypes.DoubleType, doubleResult);
+
+            column.columnType = "DATE";
+            DataType dateResult = dppUtils.getDataTypeFromColumn(column, false);
+            Assertions.assertEquals(DataTypes.DateType, dateResult);
+        } catch (Exception e) {
+            Assertions.assertTrue(false);
+        }
+    }
+
+    @Test
+    public void testCreateDstTableSchema() {
+        DppUtils dppUtils = new DppUtils();
+
+        EtlJobConfig.EtlColumn column1 = new EtlJobConfig.EtlColumn(
+                "column1", "INT",
+                true, true,
+                "NONE", "0",
+                0, 0, 0);
+        EtlJobConfig.EtlColumn column2 = new EtlJobConfig.EtlColumn(
+                "column2", "SMALLINT",
+                true, true,
+                "NONE", "0",
+                0, 0, 0);
+        List<EtlJobConfig.EtlColumn> columns = new ArrayList<>();
+        columns.add(column1);
+        columns.add(column2);
+
+        try {
+            StructType schema = dppUtils.createDstTableSchema(columns, false, false);
+            Assertions.assertEquals(2, schema.fieldNames().length);
+            Assertions.assertEquals("column1", schema.fieldNames()[0]);
+            Assertions.assertEquals("column2", schema.fieldNames()[1]);
+
+            StructType schema2 = dppUtils.createDstTableSchema(columns, true, false);
+            Assertions.assertEquals(3, schema2.fieldNames().length);
+            Assertions.assertEquals("__bucketId__", schema2.fieldNames()[0]);
+            Assertions.assertEquals("column1", schema2.fieldNames()[1]);
+            Assertions.assertEquals("column2", schema2.fieldNames()[2]);
+        } catch (Exception e) {
+            Assertions.assertTrue(false);
+        }
+    }
+
+    @Test
+    public void testParseColumnsFromPath() {
+        DppUtils dppUtils = new DppUtils();
+
+        String path = "/path/to/file/city=beijing/date=2020-04-10/data";
+        List<String> columnFromPaths = new ArrayList<>();
+        columnFromPaths.add("city");
+        columnFromPaths.add("date");
+        try {
+            List<String> columnFromPathValues = dppUtils.parseColumnsFromPath(path, columnFromPaths);
+            Assertions.assertEquals(2, columnFromPathValues.size());
+            Assertions.assertEquals("beijing", columnFromPathValues.get(0));
+            Assertions.assertEquals("2020-04-10", columnFromPathValues.get(1));
+        } catch (Exception e) {
+            Assertions.assertTrue(false);
+        }
+    }
+}
diff --git a/spark-load/spark-load-dpp/src/test/java/org/apache/doris/load/loadv2/dpp/MinimumCoverageRollupTreeBuilderTest.java b/spark-load/spark-load-dpp/src/test/java/org/apache/doris/load/loadv2/dpp/MinimumCoverageRollupTreeBuilderTest.java
new file mode 100644
index 00000000..fc57abe3
--- /dev/null
+++ b/spark-load/spark-load-dpp/src/test/java/org/apache/doris/load/loadv2/dpp/MinimumCoverageRollupTreeBuilderTest.java
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.loadv2.dpp;
+
+import org.apache.doris.config.EtlJobConfig;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class MinimumCoverageRollupTreeBuilderTest {
+
+    @Test
+    public void testBuild() {
+        EtlJobConfig.EtlColumn column1 = new EtlJobConfig.EtlColumn(
+                "column1", "INT",
+                true, true,
+                "NONE", "0",
+                0, 0, 0);
+        EtlJobConfig.EtlColumn column2 = new EtlJobConfig.EtlColumn(
+                "column2", "SMALLINT",
+                true, true,
+                "NONE", "0",
+                0, 0, 0);
+        EtlJobConfig.EtlColumn column3 = new EtlJobConfig.EtlColumn(
+                "column3", "VARCHAR",
+                true, true,
+                "NONE", "",
+                0, 0, 0);
+        EtlJobConfig.EtlColumn column4 = new EtlJobConfig.EtlColumn(
+                "column4", "INT",
+                true, false,
+                "SUM", "",
+                0, 0, 0);
+        List<EtlJobConfig.EtlColumn> baseColumns = new ArrayList<>();
+        baseColumns.add(column1);
+        baseColumns.add(column2);
+        baseColumns.add(column3);
+        baseColumns.add(column4);
+        EtlJobConfig.EtlIndex baseIndex = new EtlJobConfig.EtlIndex(10000,
+                baseColumns, 12345, "DUPLICATE", true, 1);
+        List<EtlJobConfig.EtlColumn> roll1Columns = new ArrayList<>();
+        roll1Columns.add(column1);
+        roll1Columns.add(column2);
+        roll1Columns.add(column4);
+        EtlJobConfig.EtlIndex roll1Index = new EtlJobConfig.EtlIndex(10001,
+                roll1Columns, 12346, "AGGREGATE", false, 1);
+        List<EtlJobConfig.EtlColumn> roll2Columns = new ArrayList<>();
+        roll2Columns.add(column1);
+        roll2Columns.add(column4);
+        EtlJobConfig.EtlIndex roll2Index = new EtlJobConfig.EtlIndex(10002,
+                roll2Columns, 12347, "AGGREGATE", false, 1);
+
+        List<EtlJobConfig.EtlColumn> roll3Columns = new ArrayList<>();
+        roll3Columns.add(column3);
+        roll3Columns.add(column4);
+        EtlJobConfig.EtlIndex roll3Index = new EtlJobConfig.EtlIndex(10003,
+                roll3Columns, 12348, "AGGREGATE", false, 1);
+
+        List<EtlJobConfig.EtlIndex> indexes = new ArrayList<>();
+        indexes.add(baseIndex);
+        indexes.add(roll1Index);
+        indexes.add(roll2Index);
+        indexes.add(roll3Index);
+        EtlJobConfig.EtlTable table = new EtlJobConfig.EtlTable(indexes, null);
+
+        MinimumCoverageRollupTreeBuilder builder = new MinimumCoverageRollupTreeBuilder();
+        RollupTreeNode resultNode = builder.build(table);
+        Assertions.assertEquals(resultNode.parent, null);
+        Assertions.assertEquals(resultNode.indexId, 10000);
+        Assertions.assertEquals(resultNode.level, 0);
+        Assertions.assertEquals(resultNode.children.size(), 2);
+
+        RollupTreeNode index1Node = resultNode.children.get(0);
+        Assertions.assertEquals(index1Node.parent.indexId, 10000);
+        Assertions.assertEquals(index1Node.indexId, 10001);
+        Assertions.assertEquals(index1Node.level, 1);
+        Assertions.assertEquals(index1Node.children.size(), 1);
+
+        RollupTreeNode index3Node = resultNode.children.get(1);
+        Assertions.assertEquals(index3Node.parent.indexId, 10000);
+        Assertions.assertEquals(index3Node.indexId, 10003);
+        Assertions.assertEquals(index3Node.level, 1);
+        Assertions.assertEquals(index3Node.children, null);
+
+        RollupTreeNode index2Node = index1Node.children.get(0);
+        Assertions.assertEquals(index2Node.parent.indexId, 10001);
+        Assertions.assertEquals(index2Node.indexId, 10002);
+        Assertions.assertEquals(index2Node.level, 2);
+        Assertions.assertEquals(index2Node.children, null);
+    }
+}
diff --git a/spark-load/spark-load-dpp/src/test/java/org/apache/doris/load/loadv2/dpp/SparkDppTest.java b/spark-load/spark-load-dpp/src/test/java/org/apache/doris/load/loadv2/dpp/SparkDppTest.java
new file mode 100644
index 00000000..20039092
--- /dev/null
+++ b/spark-load/spark-load-dpp/src/test/java/org/apache/doris/load/loadv2/dpp/SparkDppTest.java
@@ -0,0 +1,67 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.loadv2.dpp;
+
+import org.apache.doris.config.EtlJobConfig;
+
+import org.apache.spark.sql.RowFactory;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.math.BigDecimal;
+
+public class SparkDppTest {
+
+    @Test
+    public void testValidateData() {
+        SparkDpp sparkDpp = new SparkDpp();
+
+        // decimal
+        EtlJobConfig.EtlColumn etlColumn = new EtlJobConfig.EtlColumn();
+        etlColumn.columnType = "DECIMALV2";
+        etlColumn.precision = 3;
+        etlColumn.scale = 2;
+
+        DecimalParser decimalParser = new DecimalParser(etlColumn);
+        // test max/min
+        Assertions.assertEquals(decimalParser.getMaxValue().toString(), "9.99");
+        Assertions.assertEquals(decimalParser.getMinValue().toString(), "-9.99");
+        // normal
+        BigDecimal bigDecimal = new BigDecimal("1.21");
+        Assertions.assertTrue(sparkDpp.validateData(bigDecimal, etlColumn, decimalParser, RowFactory.create(bigDecimal)));
+        // failed
+        BigDecimal bigDecimalFailed = new BigDecimal("10");
+        Assertions.assertFalse(sparkDpp.validateData(bigDecimalFailed, etlColumn, decimalParser, RowFactory.create(bigDecimalFailed)));
+
+        // string
+        EtlJobConfig.EtlColumn stringColumn = new EtlJobConfig.EtlColumn();
+        stringColumn.stringLength = 3;
+        stringColumn.columnType = "VARCHAR";
+        StringParser stringParser = new StringParser(stringColumn);
+        // normal
+        String normalString = "a1";
+        Assertions.assertTrue(sparkDpp.validateData(normalString, stringColumn, stringParser, RowFactory.create(normalString)));
+        // cn normal
+        String normalStringCN = "中";
+        Assertions.assertTrue(sparkDpp.validateData(normalStringCN, stringColumn, stringParser, RowFactory.create(normalStringCN)));
+        // cn failed
+        String failedStringCN = "中a";
+        Assertions.assertFalse(sparkDpp.validateData(failedStringCN, stringColumn, stringParser, RowFactory.create(failedStringCN)));
+    }
+
+}
diff --git a/spark-load/spark-load-dpp/src/test/java/org/apache/doris/load/loadv2/etl/SparkEtlJobTest.java b/spark-load/spark-load-dpp/src/test/java/org/apache/doris/load/loadv2/etl/SparkEtlJobTest.java
new file mode 100644
index 00000000..676a2139
--- /dev/null
+++ b/spark-load/spark-load-dpp/src/test/java/org/apache/doris/load/loadv2/etl/SparkEtlJobTest.java
@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.load.loadv2.etl;
+
+import org.apache.doris.common.jmockit.Deencapsulation;
+import org.apache.doris.config.EtlJobConfig;
+import org.apache.doris.config.EtlJobConfig.EtlColumn;
+import org.apache.doris.config.EtlJobConfig.EtlColumnMapping;
+import org.apache.doris.config.EtlJobConfig.EtlFileGroup;
+import org.apache.doris.config.EtlJobConfig.EtlIndex;
+import org.apache.doris.config.EtlJobConfig.EtlJobProperty;
+import org.apache.doris.config.EtlJobConfig.EtlPartition;
+import org.apache.doris.config.EtlJobConfig.EtlPartitionInfo;
+import org.apache.doris.config.EtlJobConfig.EtlTable;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import mockit.Expectations;
+import mockit.Mocked;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PositionedReadable;
+import org.apache.hadoop.fs.Seekable;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+
+import java.io.ByteArrayInputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+public class SparkEtlJobTest {
+    private long tableId;
+    private long index1Id;
+    private long index2Id;
+    private long partition1Id;
+    private long partition2Id;
+    private EtlJobConfig etlJobConfig;
+
+    @BeforeEach
+    public void setUp() {
+        tableId = 0L;
+        index1Id = 1L;
+        index2Id = 2L;
+        partition1Id = 3L;
+        partition2Id = 4L;
+
+        // indexes
+        EtlColumn k1 = new EtlColumn("k1", "INT", false, true, "NONE", "0", 0, 0, 0);
+        EtlColumn k2 = new EtlColumn("k2", "VARCHAR", false, true, "NONE", "0", 10, 0, 0);
+        EtlColumn v1 = new EtlColumn("v1", "BIGINT", false, false, "NONE", "0", 0, 0, 0);
+        EtlIndex index1 = new EtlIndex(index1Id, Lists.newArrayList(k1, k2, v1), 666666, "DUPLICATE", true, 1);
+        v1 = new EtlColumn("v1", "BIGINT", false, false, "SUM", "0", 0, 0, 0);
+        EtlIndex index2 = new EtlIndex(index2Id, Lists.newArrayList(k1, v1), 888888, "AGGREGATE", true,1 );
+        List<EtlIndex> indexes = Lists.newArrayList(index1, index2);
+        // partition info
+        List<EtlPartition> partitions = Lists.newArrayList();
+        partitions.add(new EtlPartition(partition1Id, Lists.newArrayList(0), Lists.newArrayList(100), false, 2));
+        partitions.add(new EtlPartition(partition2Id, Lists.newArrayList(100), Lists.newArrayList(), true, 3));
+        EtlPartitionInfo partitionInfo = new EtlPartitionInfo("RANGE", Lists.newArrayList("k1"), Lists.newArrayList("k2"), partitions);
+        EtlTable table = new EtlTable(indexes, partitionInfo);
+        // file group
+        Map<String, EtlColumnMapping> columnMappings = Maps.newHashMap();
+        columnMappings.put("k1", new EtlColumnMapping("k1 + 1"));
+        table.addFileGroup(new EtlFileGroup(EtlJobConfig.SourceType.FILE, Lists.newArrayList("hdfs://127.0.0.1:10000/file"),
+                                            Lists.newArrayList(), Lists.newArrayList(), "\t", "\n", false, null,
+                                            Maps.newHashMap(), "", Lists.newArrayList(partition1Id, partition2Id)));
+        // tables
+        Map<Long, EtlTable> tables = Maps.newHashMap();
+        tables.put(tableId, table);
+        // others
+        String outputFilePattern = "V1.label0.%d.%d.%d.%d.%d.parquet";
+        String label = "label0";
+        EtlJobProperty properties = new EtlJobProperty();
+        properties.strictMode = false;
+        properties.timezone = "Asia/Shanghai";
+        etlJobConfig = new EtlJobConfig(tables, outputFilePattern, label, properties);
+    }
+
+    @Test
+    public void testInitConfig(@Mocked FileSystem fs) throws IOException {
+        new Expectations() {
+            {
+                fs.open(new Path("hdfs://127.0.0.1:10000/jobconfig.json"));
+                result = new FSDataInputStream(new SeekableByteArrayInputStream(etlJobConfig.configToJson().getBytes()));
+            }
+        };
+
+        SparkEtlJob job = Deencapsulation.newInstance(SparkEtlJob.class, "hdfs://127.0.0.1:10000/jobconfig.json");
+        Deencapsulation.invoke(job, "initConfig");
+        EtlJobConfig parsedConfig = Deencapsulation.getField(job, "etlJobConfig");
+        Assertions.assertTrue(parsedConfig.tables.containsKey(tableId));
+        EtlTable table = parsedConfig.tables.get(tableId);
+        Assertions.assertEquals(2, table.indexes.size());
+        Assertions.assertEquals(2, table.partitionInfo.partitions.size());
+        Assertions.assertEquals(false, parsedConfig.properties.strictMode);
+        Assertions.assertEquals("label0", parsedConfig.label);
+    }
+
+    @Test
+    public void testCheckConfigWithoutBitmapDictColumns() {
+        SparkEtlJob job = Deencapsulation.newInstance(SparkEtlJob.class, "hdfs://127.0.0.1:10000/jobconfig.json");
+        Deencapsulation.setField(job, "etlJobConfig", etlJobConfig);
+        Deencapsulation.invoke(job, "checkConfig");
+        Map<Long, Set<String>> tableToBitmapDictColumns = Deencapsulation.getField(job, "tableToBitmapDictColumns");
+        // check bitmap dict columns empty
+        Assertions.assertTrue(tableToBitmapDictColumns.isEmpty());
+    }
+
+    @Test
+    public void testCheckConfigWithBitmapDictColumns() {
+        SparkEtlJob job = Deencapsulation.newInstance(SparkEtlJob.class, "hdfs://127.0.0.1:10000/jobconfig.json");
+        EtlTable table = etlJobConfig.tables.get(tableId);
+        table.indexes.get(0).columns.add(
+                new EtlColumn("v2", "BITMAP", false, false, "BITMAP_UNION", "0", 0, 0, 0)
+        );
+        EtlFileGroup fileGroup = table.fileGroups.get(0);
+        fileGroup.sourceType = EtlJobConfig.SourceType.HIVE;
+        fileGroup.columnMappings.put(
+                "v2", new EtlColumnMapping("bitmap_dict", Lists.newArrayList("v2"))
+        );
+        Deencapsulation.setField(job, "etlJobConfig", etlJobConfig);
+        Deencapsulation.invoke(job, "checkConfig");
+        // check hive source
+        Set<Long> hiveSourceTables = Deencapsulation.getField(job, "hiveSourceTables");
+        Assertions.assertTrue(hiveSourceTables.contains(tableId));
+        // check bitmap dict columns has v2
+        Map<Long, Set<String>> tableToBitmapDictColumns = Deencapsulation.getField(job, "tableToBitmapDictColumns");
+        Assertions.assertTrue(tableToBitmapDictColumns.containsKey(tableId));
+        Assertions.assertTrue(tableToBitmapDictColumns.get(tableId).contains("v2"));
+        // check remove v2 bitmap_dict func mapping from file group column mappings
+        Assertions.assertFalse(table.fileGroups.get(0).columnMappings.containsKey("v2"));
+    }
+
+    private static class SeekableByteArrayInputStream extends ByteArrayInputStream implements Seekable, PositionedReadable {
+        public SeekableByteArrayInputStream(byte[] buf) {
+            super(buf);
+        }
+
+        public void seek(long position) {
+            if (position < 0 || position >= buf.length) {
+                throw new IllegalArgumentException("pos = " + position + " length = " + buf.length);
+            }
+            this.pos = (int) position;
+        }
+
+        public long getPos() {
+            return this.pos;
+        }
+
+        @Override
+        public boolean seekToNewSource(long targetPos) throws IOException {
+            return false;
+        }
+
+        @Override
+        public int read(long position, byte[] buffer, int offset, int length) throws IOException {
+            this.seek(position);
+            return this.read(buffer, offset, length);
+        }
+
+        @Override
+        public void readFully(long position, byte[] buffer, int offset, int length) throws IOException {
+            if (position + length > buf.length) {
+                throw  new EOFException("End of file reached before reading fully.");
+            }
+            System.arraycopy(buf, (int) position, buffer, offset, length);
+        }
+
+        @Override
+        public void readFully(long position, byte[] buffer) throws IOException {
+            readFully(position, buffer, 0, buffer.length);
+        }
+    }
+}