Skip to content

[VL] Add uniffle integration #9838

[VL] Add uniffle integration

[VL] Add uniffle integration #9838

Workflow file for this run

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Velox backend
on:
pull_request:
paths:
- '.github/**'
- 'pom.xml'
- 'backends-velox/**'
- 'gluten-celeborn/**'
- 'gluten-uniffle/**'
- 'gluten-core/**'
- 'gluten-data/**'
- 'gluten-delta/**'
- 'gluten-iceberg/**'
- 'gluten-ut/**'
- 'shims/**'
- 'tools/gluten-it/**'
- 'tools/gluten-te/**'
- 'ep/build-velox/**'
- 'cpp/*'
- 'cpp/CMake/**'
- 'cpp/velox/**'
- 'cpp/core/**'
- 'dev/**'
env:
HTTP_PROXY_HOST: proxy-shz.intel.com
HTTP_PROXY_PORT: 911
PATH_TO_GLUTEN_TE: ./tools/gluten-te
DOCKER_PULL_REGISTRY: 10.1.0.25:5000
MAVEN_OPTS: -Dmaven.wagon.http.retryHandler.count=3
concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true
jobs:
ubuntu2204-test:
runs-on: velox-self-hosted
env:
OS_IMAGE_NAME: ubuntu
OS_IMAGE_TAG: 22.04
steps:
- uses: actions/checkout@v4
- name: Setup docker container
run: |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh
- name: Build Gluten velox third party
run: |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh '
cd /opt/gluten/ep/build-velox/src && \
./get_velox.sh --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON && \
./build_velox.sh --run_setup_script=ON --enable_ep_cache=OFF --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON'
- name: Build Gluten CPP library
run: |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh '
cd /opt/gluten/cpp && \
./compile.sh --build_velox_backend=ON --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON'
- name: Build for Spark 3.2.2
run: |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh '
cd /opt/gluten && \
mvn clean install -Pspark-3.2 -Pbackends-velox -Prss -Prss-uniffle -Piceberg -Pdelta -DskipTests'
- name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.2 with uniffle
run: |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh \
'wget -nv https://archive.apache.org/dist/incubator/uniffle/0.8.0/apache-uniffle-0.8.0-incubating-bin.tar.gz && \
tar xzf apache-uniffle-0.8.0-incubating-bin.tar.gz -C /opt/ && mv /opt/rss-0.8.0-hadoop2.8 /opt/uniffle && \
wget -nv https://archive.apache.org/dist/hadoop/common/hadoop-2.8.5/hadoop-2.8.5.tar.gz && \
tar xzf hadoop-2.8.5.tar.gz -C /opt/ && \
cd /opt/uniffle && mkdir shuffle_data && \
echo -e "XMX_SIZE=16g\nHADOOP_HOME=/opt/hadoop-2.8.5\nJAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" > ./bin/rss-env.sh && \
echo -e "rss.coordinator.server.periodic.output.interval.times 1\nrss.coordinator.shuffle.nodes.max 1" > ./conf/coordinator.conf && \
echo -e "rss.server.heartbeat.delay 3000\nrss.rpc.server.port 19997\nrss.jetty.http.port 19996\nrss.server.netty.port 19995\nrss.storage.basePath /opt/uniffle/shuffle_data\nrss.storage.type MEMORY_LOCALFILE\nrss.coordinator.quorum localhost:19999\nrss.server.flush.thread.alive 5\nrss.server.single.buffer.flush.threshold 64m" > ./conf/server.conf && \
bash ./bin/start-coordinator.sh && bash ./bin/start-shuffle-server.sh && \
sleep 50 && \
echo "=====show uniffle startup====" && \
cat ./logs/coordinator.log && \
cat ./logs/shuffle_server.log && \
cd /opt/gluten/tools/gluten-it && mvn clean install -Pspark-3.2,rss-uniffle && \
tail -n 200 /opt/uniffle/logs/coordinator.log && \
tail -n 200 /opt/uniffle/logs/shuffle_server.log && \
echo "=====show uniffle end====" && \
GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox-with-uniffle --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 && \
tail -n 200 ./logs/coordinator.log && \
tail -n 200 ./logs/shuffle_server.log && \
bash /opt/uniffle/bin/stop-coordinator.sh && bash /opt/uniffle/bin/stop-shuffle-server.sh'
- name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.2
run: |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \
mvn clean install -Pspark-3.2 \
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
&& GLUTEN_IT_JVM_ARGS=-Xmx20G sbin/gluten-it.sh queries-compare \
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=40g -s=10.0 --threads=32 --iterations=1'
- name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.2 with Celeborn 0.4.0
run: |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh \
'cd /opt && mkdir -p celeborn && \
tar xzf apache-celeborn-0.4.0-incubating-bin.tgz -C /opt/celeborn --strip-components=1 && cd celeborn && \
mv ./conf/celeborn-env.sh.template ./conf/celeborn-env.sh && \
echo -e "CELEBORN_MASTER_MEMORY=4g\nCELEBORN_WORKER_MEMORY=4g\nCELEBORN_WORKER_OFFHEAP_MEMORY=8g" > ./conf/celeborn-env.sh && \
echo -e "celeborn.worker.commitFiles.threads 128\nceleborn.worker.sortPartition.threads 64" > ./conf/celeborn-defaults.conf \
&& bash ./sbin/start-master.sh && bash ./sbin/start-worker.sh && \
cd /opt/gluten/tools/gluten-it && mvn clean install -Pspark-3.2,rss,celeborn-0.4 \
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox-with-celeborn --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox-with-celeborn --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 && \
bash /opt/celeborn/sbin/stop-worker.sh \
&& bash /opt/celeborn/sbin/stop-master.sh && rm -rf /opt/celeborn'
- name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.2 with Celeborn 0.3.2
run: |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh \
'cd /opt && mkdir -p celeborn && \
tar xzf apache-celeborn-0.3.2-incubating-bin.tgz -C /opt/celeborn --strip-components=1 && cd celeborn && \
mv ./conf/celeborn-env.sh.template ./conf/celeborn-env.sh && \
echo -e "CELEBORN_MASTER_MEMORY=4g\nCELEBORN_WORKER_MEMORY=4g\nCELEBORN_WORKER_OFFHEAP_MEMORY=8g" > ./conf/celeborn-env.sh && \
echo -e "celeborn.worker.commitFiles.threads 128\nceleborn.worker.sortPartition.threads 64" > ./conf/celeborn-defaults.conf \
&& bash ./sbin/start-master.sh && bash ./sbin/start-worker.sh && \
cd /opt/gluten/tools/gluten-it && mvn clean install -Pspark-3.2,rss \
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox-with-celeborn --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox-with-celeborn --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 && \
bash /opt/celeborn/sbin/stop-worker.sh \
&& bash /opt/celeborn/sbin/stop-master.sh'
- name: Exit docker container
if: ${{ always() }}
run: |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh