From efffd0eca78cdc1e504ba85829d9d1b9f7839673 Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Wed, 10 Apr 2024 14:53:00 +0800 Subject: [PATCH 1/3] Initial commit --- .../backendsapi/velox/ListenerApiImpl.scala | 10 +++++----- cpp/CMake/ConfigArrow.cmake | 8 ++++---- ep/build-velox/src/get_velox.sh | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/ListenerApiImpl.scala b/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/ListenerApiImpl.scala index 4a7dbc187342..77039493663d 100644 --- a/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/ListenerApiImpl.scala +++ b/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/ListenerApiImpl.scala @@ -35,7 +35,7 @@ import org.apache.commons.lang3.StringUtils import scala.sys.process._ class ListenerApiImpl extends ListenerApi { - private val ARROW_VERSION = "1500" + private val ARROW_VERSION = "1400" override def onDriverStart(conf: SparkConf): Unit = { // sql table cache serializer @@ -111,9 +111,9 @@ class ListenerApiImpl extends ListenerApi { } loader .newTransaction() - .loadAndCreateLink(s"libarrow.so.$ARROW_VERSION.0.0", s"libarrow.so.$ARROW_VERSION", false) + .loadAndCreateLink(s"libarrow.so.$ARROW_VERSION.1.0", s"libarrow.so.$ARROW_VERSION", false) .loadAndCreateLink( - s"libparquet.so.$ARROW_VERSION.0.0", + s"libparquet.so.$ARROW_VERSION.1.0", s"libparquet.so.$ARROW_VERSION", false) .commit() @@ -123,11 +123,11 @@ class ListenerApiImpl extends ListenerApi { loader .newTransaction() .loadAndCreateLink( - s"libarrow.$ARROW_VERSION.0.0.dylib", + s"libarrow.$ARROW_VERSION.1.0.dylib", s"libarrow.$ARROW_VERSION.dylib", false) .loadAndCreateLink( - s"libparquet.$ARROW_VERSION.0.0.dylib", + s"libparquet.$ARROW_VERSION.1.0.dylib", s"libparquet.$ARROW_VERSION.dylib", false) .commit() diff --git a/cpp/CMake/ConfigArrow.cmake b/cpp/CMake/ConfigArrow.cmake index 1ae4ece1b8ef..d9633b27cc81 100644 --- a/cpp/CMake/ConfigArrow.cmake +++ b/cpp/CMake/ConfigArrow.cmake @@ -16,11 +16,11 @@ # under the License. if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") - set(ARROW_SHARED_LIBRARY_SUFFIX ".1500.dylib") - set(ARROW_SHARED_LIBRARY_PARENT_SUFFIX ".1500.1.0.dylib") + set(ARROW_SHARED_LIBRARY_SUFFIX ".1400.dylib") + set(ARROW_SHARED_LIBRARY_PARENT_SUFFIX ".1400.1.0.dylib") else() - set(ARROW_SHARED_LIBRARY_SUFFIX ".so.1500") - set(ARROW_SHARED_LIBRARY_PARENT_SUFFIX ".so.1500.1.0") + set(ARROW_SHARED_LIBRARY_SUFFIX ".so.1400") + set(ARROW_SHARED_LIBRARY_PARENT_SUFFIX ".so.1400.1.0") endif() set(ARROW_LIB_NAME "arrow") diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh index 3cd22e732368..cc9d6cc070dd 100755 --- a/ep/build-velox/src/get_velox.sh +++ b/ep/build-velox/src/get_velox.sh @@ -17,7 +17,7 @@ set -exu VELOX_REPO=https://github.com/oap-project/velox.git -VELOX_BRANCH=2024_02_29 +VELOX_BRANCH=2024_02_29-v1 VELOX_HOME="" #Set on run gluten on HDFS From 93a6c3c5ee7c1d4ed1fa19ad4cd334b9afce7b3f Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Wed, 10 Apr 2024 15:39:45 +0800 Subject: [PATCH 2/3] Add yml file just to trigger new CI --- .github/workflows/velox_be.yml | 618 ------------------------- .github/workflows/velox_docker.yml | 700 +++++++++++++++++++++++++++++ dev/ci-velox-buildstatic.sh | 9 + 3 files changed, 709 insertions(+), 618 deletions(-) delete mode 100644 .github/workflows/velox_be.yml create mode 100644 .github/workflows/velox_docker.yml create mode 100755 dev/ci-velox-buildstatic.sh diff --git a/.github/workflows/velox_be.yml b/.github/workflows/velox_be.yml deleted file mode 100644 index 41348331fef1..000000000000 --- a/.github/workflows/velox_be.yml +++ /dev/null @@ -1,618 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -name: Velox backend - -on: - pull_request: - paths: - - '.github/**' - - 'pom.xml' - - 'backends-velox/**' - - 'gluten-celeborn/**' - - 'gluten-core/**' - - 'gluten-data/**' - - 'gluten-delta/**' - - 'gluten-iceberg/**' - - 'gluten-ut/**' - - 'shims/**' - - 'tools/gluten-it/**' - - 'tools/gluten-te/**' - - 'ep/build-velox/**' - - 'cpp/*' - - 'cpp/CMake/**' - - 'cpp/velox/**' - - 'cpp/core/**' - - 'dev/**' - -env: - HTTP_PROXY_HOST: proxy-shz.intel.com - HTTP_PROXY_PORT: 911 - PATH_TO_GLUTEN_TE: ./tools/gluten-te - DOCKER_PULL_REGISTRY: 10.1.0.25:5000 - MAVEN_OPTS: -Dmaven.wagon.http.retryHandler.count=3 - -concurrency: - group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} - cancel-in-progress: true - -jobs: - ubuntu2004-test-spark32: - runs-on: velox-self-hosted - env: - OS_IMAGE_NAME: ubuntu - OS_IMAGE_TAG: 20.04 - steps: - - uses: actions/checkout@v4 - - name: Setup docker container - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh - - name: Build Gluten velox third party - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/ep/build-velox/src && \ - ./get_velox.sh && \ - ./build_velox.sh --run_setup_script=ON --enable_ep_cache=OFF --build_test_utils=ON' - - name: Build Gluten CPP library - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/cpp && \ - ./compile.sh --build_velox_backend=ON --build_tests=ON --build_examples=ON --build_benchmarks=ON' - - name: Run CPP unit test - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/cpp/build && \ - ctest -V' - - name: Run HBM CPP unit test - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/cpp/build && \ - cmake -DBUILD_TESTS=ON -DENABLE_HBM=ON .. && \ - cmake --build . --target hbw_allocator_test -- -j && \ - ctest -V -R TestHbw' - - name: Build and run unit test for Spark 3.2.2 (other tests) - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten && \ - mvn clean install -Pspark-3.2 -Pspark-ut -Pbackends-velox -Prss -DargLine="-Dspark.test.home=/opt/spark322" -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,io.glutenproject.tags.UDFTest,io.glutenproject.tags.SkipTestTags && \ - mvn test -Pspark-3.2 -Pbackends-velox -DtagsToExclude=None -DtagsToInclude=io.glutenproject.tags.UDFTest' - # Cpp micro benchmarks will use generated files from unit test in backends-velox module. - - name: Run micro benchmarks - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/cpp/build/velox/benchmarks && \ - ./generic_benchmark --run-example --with-shuffle --threads 1 --iterations 1' - - name: Copy golden files from container to host - if: failure() - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/cp.sh /tmp/tpch-approved-plan/ /tmp/$GITHUB_RUN_ID/tpch-approved-plan - - name: Upload golden files - if: failure() - uses: actions/upload-artifact@v4 - with: - name: golden-files-spark32 - path: | - /tmp/${{ github.run_id }}/tpch-approved-plan/** - - name: Clean temp golden files - if: failure() - run: | - rm -rf /tmp/$GITHUB_RUN_ID/tpch-approved-plan - - name: Exit docker container - if: ${{ always() }} - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh - - ubuntu2004-test-spark32-slow: - runs-on: velox-self-hosted - env: - OS_IMAGE_NAME: ubuntu - OS_IMAGE_TAG: 20.04 - steps: - - uses: actions/checkout@v4 - - name: Setup docker container - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh - - name: Build Gluten velox third party - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/ep/build-velox/src && \ - ./get_velox.sh && \ - ./build_velox.sh --run_setup_script=ON --enable_ep_cache=OFF' - - name: Build Gluten CPP library - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/cpp && \ - ./compile.sh --build_velox_backend=ON' - - name: Build and run unit test for Spark 3.2.2 (slow tests) - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten && \ - mvn clean install -Pspark-3.2 -Pspark-ut -Pbackends-velox -Prss -Piceberg -Pdelta -DargLine="-Dspark.test.home=/opt/spark322" -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest' - - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.2 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1' - - name: Exit docker container - if: ${{ always() }} - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh - - ubuntu2004-test-spark33-slow: - runs-on: velox-self-hosted - env: - OS_IMAGE_NAME: ubuntu - OS_IMAGE_TAG: 20.04 - steps: - - uses: actions/checkout@v4 - - name: Setup docker container - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh - - name: Build Gluten velox third party - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/ep/build-velox/src && \ - ./get_velox.sh && \ - ./build_velox.sh --run_setup_script=ON --enable_ep_cache=OFF' - - name: Build Gluten CPP library - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/cpp && \ - ./compile.sh --build_velox_backend=ON' - - name: Build and Run unit test for Spark 3.3.1 (slow tests) - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten && \ - mvn clean install -Pspark-3.3 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=/opt/spark331" -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest' - - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.3 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.3 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1' - - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.3 Q38 flush - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 --queries=q38 \ - --disable-bhj \ - --extra-conf=spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.1 \ - --extra-conf=spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.2 \ - --extra-conf=spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100 \ - --extra-conf=spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0' - - name: Exit docker container - if: ${{ always() }} - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh - - ubuntu2004-test-spark33: - runs-on: velox-self-hosted - env: - OS_IMAGE_NAME: ubuntu - OS_IMAGE_TAG: 20.04 - steps: - - uses: actions/checkout@v4 - - name: Setup docker container - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh - - name: Build Gluten velox third party - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/ep/build-velox/src && \ - ./get_velox.sh && \ - ./build_velox.sh --run_setup_script=ON --enable_ep_cache=OFF' - - name: Build Gluten CPP library - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/cpp && \ - ./compile.sh --build_velox_backend=ON --build_examples=ON' - - name: Build and Run unit test for Spark 3.3.1 (other tests) - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten && \ - mvn clean install -Pspark-3.3 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=/opt/spark331" -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,io.glutenproject.tags.UDFTest,io.glutenproject.tags.SkipTestTags && \ - mvn test -Pspark-3.3 -Pbackends-velox -DtagsToExclude=None -DtagsToInclude=io.glutenproject.tags.UDFTest' - - name: Exit docker container - if: ${{ always() }} - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh - - ubuntu2004-test-spark34-slow: - runs-on: velox-self-hosted - env: - OS_IMAGE_NAME: ubuntu - OS_IMAGE_TAG: 20.04 - steps: - - uses: actions/checkout@v4 - - name: Setup docker container - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh - - name: Build Gluten velox third party - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/ep/build-velox/src && \ - ./get_velox.sh && \ - ./build_velox.sh --run_setup_script=ON --enable_ep_cache=OFF' - - name: Build Gluten CPP library - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/cpp && \ - ./compile.sh --build_velox_backend=ON ' - - name: Build and Run unit test for Spark 3.4.2 (slow tests) - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten && \ - mvn clean install -Pspark-3.4 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=/opt/spark342" -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest' - - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.4 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.4 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1' - - name: Exit docker container - if: ${{ always() }} - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh - - ubuntu2004-test-spark34: - runs-on: velox-self-hosted - env: - OS_IMAGE_NAME: ubuntu - OS_IMAGE_TAG: 20.04 - steps: - - uses: actions/checkout@v4 - - name: Setup docker container - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh - - name: Build Gluten velox third party - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/ep/build-velox/src && \ - ./get_velox.sh && \ - ./build_velox.sh --run_setup_script=ON --enable_ep_cache=OFF' - - name: Build Gluten CPP library - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/cpp && \ - ./compile.sh --build_velox_backend=ON --build_examples=ON' - - name: Build and Run unit test for Spark 3.4.2 (other tests) - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten && \ - mvn clean install -Pspark-3.4 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=/opt/spark342" -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,io.glutenproject.tags.UDFTest,io.glutenproject.tags.SkipTestTags && \ - mvn test -Pspark-3.4 -Pbackends-velox -DtagsToExclude=None -DtagsToInclude=io.glutenproject.tags.UDFTest' - - name: Exit docker container - if: ${{ always() }} - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh - - ubuntu2204-test-spark33-spark34: - runs-on: velox-self-hosted - env: - OS_IMAGE_NAME: ubuntu - OS_IMAGE_TAG: 22.04 - steps: - - uses: actions/checkout@v4 - - name: Setup docker container - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh - - name: Build Gluten velox third party - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/ep/build-velox/src && \ - ./get_velox.sh --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON && \ - ./build_velox.sh --run_setup_script=ON --enable_ep_cache=OFF --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON' - - name: Build Gluten CPP library - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/cpp && \ - ./compile.sh --build_velox_backend=ON --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON' - - name: Build for Spark 3.3.1 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten && \ - mvn clean install -Pspark-3.3 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests' - - name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.3 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.3 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ - && GLUTEN_IT_JVM_ARGS=-Xmx20G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=30g -s=10.0 --threads=32 --iterations=1' - - name: Build for Spark 3.4.2 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten && \ - mvn clean install -Pspark-3.4 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests' - - name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.4 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.4 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ - && GLUTEN_IT_JVM_ARGS=-Xmx20G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=30g -s=10.0 --threads=32 --iterations=1' - - name: Exit docker container - if: ${{ always() }} - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh - - ubuntu2204-test: - runs-on: velox-self-hosted - env: - OS_IMAGE_NAME: ubuntu - OS_IMAGE_TAG: 22.04 - steps: - - uses: actions/checkout@v4 - - name: Setup docker container - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh - - name: Build Gluten velox third party - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/ep/build-velox/src && \ - ./get_velox.sh --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON && \ - ./build_velox.sh --run_setup_script=ON --enable_ep_cache=OFF --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON' - - name: Build Gluten CPP library - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/cpp && \ - ./compile.sh --build_velox_backend=ON --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON' - - name: Build for Spark 3.2.2 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten && \ - mvn clean install -Pspark-3.2 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests' - - name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.2 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.2 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ - && GLUTEN_IT_JVM_ARGS=-Xmx20G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=40g -s=10.0 --threads=32 --iterations=1' - - name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.2 with Celeborn - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh \ - 'wget https://dlcdn.apache.org/incubator/celeborn/celeborn-0.3.0-incubating/apache-celeborn-0.3.0-incubating-bin.tgz && \ - tar xzf apache-celeborn-0.3.0-incubating-bin.tgz && cd apache-celeborn-0.3.0-incubating-bin && \ - mv ./conf/celeborn-env.sh.template ./conf/celeborn-env.sh && \ - echo -e "CELEBORN_MASTER_MEMORY=4g\nCELEBORN_WORKER_MEMORY=4g\nCELEBORN_WORKER_OFFHEAP_MEMORY=8g" > ./conf/celeborn-env.sh && \ - echo -e "celeborn.worker.commitFiles.threads 128\nceleborn.worker.sortPartition.threads 64" > ./conf/celeborn-defaults.conf \ - && bash ./sbin/start-master.sh && bash ./sbin/start-worker.sh && \ - cd /opt/gluten/tools/gluten-it && mvn clean install -Pspark-3.2,rss \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox-with-celeborn --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox-with-celeborn --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 && \ - bash /opt/apache-celeborn-0.3.0-incubating-bin/sbin/stop-worker.sh \ - && bash /opt/apache-celeborn-0.3.0-incubating-bin/sbin/stop-master.sh' - - name: Exit docker container - if: ${{ always() }} - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh - - centos8-test: - runs-on: velox-self-hosted - env: - OS_IMAGE_NAME: centos - OS_IMAGE_TAG: 8 - steps: - - uses: actions/checkout@v4 - - name: Setup docker container - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh - - name: Build Gluten velox third party - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - source /env.sh && \ - sudo yum -y install patch && \ - cd /opt/gluten/ep/build-velox/src && \ - ./get_velox.sh --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON && \ - ./build_velox.sh --run_setup_script=ON --enable_ep_cache=OFF --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON' - - name: Build Gluten CPP library - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - source /env.sh && \ - cd /opt/gluten/cpp && \ - ./compile.sh --build_velox_backend=ON --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON' - - name: Build for Spark 3.2.2 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten && \ - mvn clean install -Pspark-3.2 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests' - - name: TPC-H SF1.0 && TPC-DS SF30.0 Parquet local spark3.2 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.2 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ - && GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=50g -s=30.0 --threads=32 --iterations=1' - - name: TPC-H SF1.0 && TPC-DS SF30.0 Parquet local spark3.2 random kill tasks - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.2 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries \ - --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 --skip-data-gen --random-kill-tasks \ - && GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh queries \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=50g -s=30.0 --threads=32 --iterations=1 --skip-data-gen --random-kill-tasks' - - name: Exit docker container - if: ${{ always() }} - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh - - centos7-test: - runs-on: velox-self-hosted - env: - OS_IMAGE_NAME: centos - OS_IMAGE_TAG: 7 - steps: - - uses: actions/checkout@v4 - - name: Setup docker container - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh - - name: Build Gluten velox third party - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - yum -y install epel-release centos-release-scl patch sudo && \ - cd /opt/gluten/ep/build-velox/src && \ - source /opt/rh/devtoolset-9/enable && \ - ./get_velox.sh --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON && \ - ./build_velox.sh --run_setup_script=ON --enable_ep_cache=OFF --enable_s3=ON --enable_gcs=ON --enable_abfs=ON --enable_hdfs=ON' - - name: Build Gluten CPP library - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten/cpp && \ - source /opt/rh/devtoolset-9/enable && \ - ./compile.sh --build_velox_backend=ON --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON' - - name: Build for Spark 3.2.2 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - cd /opt/gluten && \ - mvn clean install -Pspark-3.2 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests' - - name: TPC-H SF1.0 && TPC-DS SF30.0 Parquet local spark3.2 - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.2 \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ - && GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=50g -s=30.0 --threads=32 --iterations=1' - - name: TPC-DS SF30.0 Parquet local spark3.2 Q67/Q95 low memory, memory isolation off - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.2 \ - && GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67,q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ - --skip-data-gen -m=OffHeapExecutionMemory \ - -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \ - -d=OFFHEAP_SIZE:5g,spark.memory.offHeap.size=5g \ - -d=OFFHEAP_SIZE:3g,spark.memory.offHeap.size=3g \ - -d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \ - -d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5' - - name: (To be fixed) TPC-DS SF30.0 Parquet local spark3.2 Q67/Q95 low memory, memory isolation on - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.2 \ - && GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67,q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ - --skip-data-gen -m=OffHeapExecutionMemory \ - -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ - -d=OFFHEAP_SIZE:5g,spark.memory.offHeap.size=5g \ - -d=OFFHEAP_SIZE:3g,spark.memory.offHeap.size=3g \ - -d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \ - -d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5' || true - - name: TPC-DS SF30.0 Parquet local spark3.2 Q23A/Q23B low memory - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q23a,q23b -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ - --skip-data-gen -m=OffHeapExecutionMemory \ - -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \ - -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ - -d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \ - -d=FLUSH_MODE:DISABLED,spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \ - -d=FLUSH_MODE:ABANDONED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \ - -d=FLUSH_MODE:FLUSHED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.05,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.1,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0' - - name: (To be fixed) TPC-DS SF30.0 Parquet local spark3.2 Q97 low memory # The case currently causes crash with "free: invalid size". - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ - GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q97 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ - --skip-data-gen -m=OffHeapExecutionMemory \ - -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \ - -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ - -d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \ - -d=OFFHEAP_SIZE:1g,spark.memory.offHeap.size=1g' || true - - name: Exit docker container - if: ${{ always() }} - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh - - - static-build-centos7-test: - runs-on: velox-self-hosted - steps: - - uses: actions/checkout@v4 - - name: Setup docker container - run: | - docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ - -v $PWD:/opt/gluten --name static-build-test-$GITHUB_RUN_ID -e NUM_THREADS=30 -detach $DOCKER_PULL_REGISTRY/gluten-te/gluten-buildenv-centos:7 \ - bash -c 'cd /opt/gluten && sleep 14400' - - name: Build Gluten CPP library - run: | - docker exec -i static-build-test-$GITHUB_RUN_ID bash -c ' - source /env.sh && \ - sudo yum -y install patch && \ - cd /opt/gluten && \ - sudo -E ./dev/vcpkg/setup-build-depends.sh && \ - source ./dev/vcpkg/env.sh && \ - ./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_tests=OFF --build_benchmarks=OFF --enable_s3=ON \ - --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON --build_type=Debug' - - name: Build for Spark 3.2.2 - run: | - docker exec static-build-test-$GITHUB_RUN_ID bash -c ' - cd /opt/gluten && \ - mvn clean install -Pspark-3.2 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests && \ - cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.2' - - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 (ubuntu 20.04) - run: | - docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ - -v $PWD:/opt/gluten --name static-build-test-$GITHUB_RUN_ID-tpc -e NUM_THREADS=30 ubuntu:20.04 \ - bash -c 'apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install openjdk-8-jre -y \ - && cd /opt/gluten/tools/gluten-it \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ - && GLUTEN_IT_JVM_ARGS=-Xmx10G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=20g -s=1.0 --threads=32 --iterations=1' - - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 (ubuntu 22.04) - run: | - docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ - -v $PWD:/opt/gluten --name static-build-test-$GITHUB_RUN_ID-tpc -e NUM_THREADS=30 ubuntu:22.04 \ - bash -c 'apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install openjdk-8-jre -y \ - && cd /opt/gluten/tools/gluten-it \ - && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ - && GLUTEN_IT_JVM_ARGS=-Xmx10G sbin/gluten-it.sh queries-compare \ - --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=20g -s=1.0 --threads=32 --iterations=1' - - name: Exit docker container - if: ${{ always() }} - run: | - docker stop static-build-test-$GITHUB_RUN_ID || true - - build-script-test: - runs-on: velox-self-hosted - env: - OS_IMAGE_NAME: centos - OS_IMAGE_TAG: 8 - steps: - - uses: actions/checkout@v4 - - name: Setup docker container - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh - - name: Build Script Test - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' - source /env.sh && \ - sudo yum -y install patch && \ - cd /opt/gluten/ && \ - ./dev/package.sh' - - name: Exit docker container - if: ${{ always() }} - run: | - $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh diff --git a/.github/workflows/velox_docker.yml b/.github/workflows/velox_docker.yml new file mode 100644 index 000000000000..4d65df5b2bcb --- /dev/null +++ b/.github/workflows/velox_docker.yml @@ -0,0 +1,700 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Velox backend Github Runner + +on: + pull_request: + paths: + - '.github/workflows/velox_docker.yml' + - 'pom.xml' + - 'backends-velox/**' + - 'gluten-celeborn/common/**' + - 'gluten-celeborn/package/**' + - 'gluten-celeborn/velox/**' + - 'gluten-ras/**' + - 'gluten-core/**' + - 'gluten-data/**' + - 'gluten-delta/**' + - 'gluten-iceberg/**' + - 'gluten-ut/**' + - 'shims/**' + - 'tools/gluten-it/**' + - 'tools/gluten-te/**' + - 'ep/build-velox/**' + - 'cpp/*' + - 'cpp/CMake/**' + - 'cpp/velox/**' + - 'cpp/core/**' + - 'dev/**' + + +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +jobs: + build-native-lib: + runs-on: ubuntu-20.04 + container: apache/gluten:gluten-vcpkg-builder_2024_03_17 # centos7 with dependencies installed + steps: + - uses: actions/checkout@v2 + - name: Generate cache key + run: | + echo ${{ hashFiles('./ep/build-velox/src/**', './dev/**', './cpp/*', './github/workflows/*') }} > cache-key + - name: Cache + id: cache + uses: actions/cache/restore@v3 + with: + path: ./cpp/build/releases/ + key: cache-velox-build-${{ hashFiles('./cache-key') }} + - name: Build Gluten Velox third party + if: ${{ steps.cache.outputs.cache-hit != 'true' }} + run: | + source dev/ci-velox-buildstatic.sh + - uses: actions/upload-artifact@v2 + with: + path: ./cpp/build/releases/ + name: velox-native-lib-${{github.sha}} + + run-tpc-test-ubuntu: + needs: build-native-lib + strategy: + fail-fast: false + matrix: + os: [ "ubuntu:20.04", "ubuntu:22.04" ] + spark: [ "spark-3.2", "spark-3.3", "spark-3.4", "spark-3.5" ] + java: [ "java-8", "java-17" ] + # Spark supports JDK17 since 3.3 and later, see https://issues.apache.org/jira/browse/SPARK-33772 + exclude: + - spark: spark-3.2 + java: java-17 + - spark: spark-3.4 + java: java-17 + - spark: spark-3.5 + java: java-17 + - os: ubuntu:22.04 + java: java-17 + runs-on: ubuntu-20.04 + container: ${{ matrix.os }} + steps: + - uses: actions/checkout@v2 + - name: Download All Artifacts + uses: actions/download-artifact@v2 + with: + name: velox-native-lib-${{github.sha}} + path: ./cpp/build/releases + - name: Setup java and maven + run: | + if [ "${{ matrix.java }}" = "java-17" ]; then + apt-get update && apt-get install -y openjdk-17-jdk maven + else + apt-get update && apt-get install -y openjdk-8-jdk maven + fi + apt remove openjdk-11* -y + - name: Build and run TPCH/DS + run: | + cd $GITHUB_WORKSPACE/ + export JAVA_HOME=/usr/lib/jvm/${{ matrix.java }}-openjdk-amd64 + echo "JAVA_HOME: $JAVA_HOME" + mvn -ntp clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pbackends-velox -DskipTests + cd $GITHUB_WORKSPACE/tools/gluten-it + mvn -ntp clean install -P${{ matrix.spark }} -P${{ matrix.java }} \ + && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ + --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ + && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ + --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 + + run-tpc-test-centos: + needs: build-native-lib + strategy: + fail-fast: false + matrix: + os: [ "centos:7", "centos:8" ] + spark: [ "spark-3.2", "spark-3.3", "spark-3.4", "spark-3.5" ] + java: [ "java-8", "java-17" ] + # Spark supports JDK17 since 3.3 and later, see https://issues.apache.org/jira/browse/SPARK-33772 + exclude: + - spark: spark-3.2 + java: java-17 + - spark: spark-3.4 + java: java-17 + - spark: spark-3.5 + java: java-17 + - os: centos:7 + java: java-17 + runs-on: ubuntu-20.04 + container: ${{ matrix.os }} + steps: + - uses: actions/checkout@v2 + - name: Download All Artifacts + uses: actions/download-artifact@v2 + with: + name: velox-native-lib-${{github.sha}} + path: ./cpp/build/releases + - name: Update mirror list + if: matrix.os == 'centos:8' + run: | + sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* || true + sed -i -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" /etc/yum.repos.d/CentOS-* || true + - name: Setup java and maven + run: | + if [ "${{ matrix.java }}" = "java-17" ]; then + yum update -y && yum install -y java-17-openjdk-devel wget + else + yum update -y && yum install -y java-1.8.0-openjdk-devel wget + fi + wget https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz + tar -xvf apache-maven-3.8.8-bin.tar.gz + mv apache-maven-3.8.8 /usr/lib/maven + - name: Set environment variables + run: | + echo "MAVEN_HOME=/usr/lib/maven" >> $GITHUB_ENV + echo "PATH=${PATH}:/usr/lib/maven/bin" >> $GITHUB_ENV + if [ "${{ matrix.java }}" = "java-17" ]; then + echo "JAVA_HOME=/usr/lib/jvm/java-17-openjdk" >> $GITHUB_ENV + else + echo "JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk" >> $GITHUB_ENV + fi + - name: Build gluten-it + run: | + echo "JAVA_HOME: $JAVA_HOME" + cd $GITHUB_WORKSPACE/ + mvn -ntp clean install -P${{ matrix.spark }} -P${{ matrix.java }} -Pbackends-velox -DskipTests + cd $GITHUB_WORKSPACE/tools/gluten-it + mvn -ntp clean install -P${{ matrix.spark }} -P${{ matrix.java }} + - name: Run TPC-H / TPC-DS + run: | + echo "JAVA_HOME: $JAVA_HOME" + cd $GITHUB_WORKSPACE/tools/gluten-it + GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ + --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ + && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ + --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 + - name: Run TPC-H / TPC-DS with RAS + run: | + echo "JAVA_HOME: $JAVA_HOME" + cd $GITHUB_WORKSPACE/tools/gluten-it + GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ + --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ + --extra-conf=spark.gluten.sql.ras.enabled=true \ + && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ + --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ + --extra-conf=spark.gluten.sql.ras.enabled=true + + run-tpc-test-ubuntu-oom: + needs: build-native-lib + strategy: + fail-fast: false + matrix: + spark: [ "spark-3.2" ] + runs-on: ubuntu-20.04 + steps: + - name: Maximize build disk space + shell: bash + run: | + df -h + set -euo pipefail + echo "Removing unwanted software... " + sudo rm -rf /usr/share/dotnet + sudo rm -rf /usr/local/lib/android + sudo rm -rf /opt/ghc + sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo docker image prune --all --force > /dev/null + df -h + - uses: actions/checkout@v2 + - name: Download All Artifacts + uses: actions/download-artifact@v2 + with: + name: velox-native-lib-${{github.sha}} + path: ./cpp/build/releases + - name: Setup java and maven + run: | + sudo apt-get update + sudo apt-get install -y openjdk-8-jdk maven + - name: Set environment variables + run: | + echo "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> $GITHUB_ENV + - name: Build for Spark ${{ matrix.spark }} + run: | + cd $GITHUB_WORKSPACE/ + mvn -ntp clean install -P${{ matrix.spark }} -Pbackends-velox -DskipTests + cd $GITHUB_WORKSPACE/tools/gluten-it + mvn -ntp clean install -P${{ matrix.spark }} + GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh data-gen-only --local --benchmark-type=ds -s=30.0 --threads=12 + - name: TPC-DS SF30.0 Parquet local spark3.2 Q67/Q95 low memory, memory isolation off + run: | + cd tools/gluten-it \ + && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh parameterized \ + --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67,q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ + --skip-data-gen -m=OffHeapExecutionMemory \ + -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \ + -d=OFFHEAP_SIZE:6g,spark.memory.offHeap.size=6g \ + -d=OFFHEAP_SIZE:4g,spark.memory.offHeap.size=4g \ + -d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \ + -d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5 + - name: TPC-DS SF30.0 Parquet local spark3.2 Q67 low memory, memory isolation on + run: | + cd tools/gluten-it \ + && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh parameterized \ + --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ + --skip-data-gen -m=OffHeapExecutionMemory \ + -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ + -d=OFFHEAP_SIZE:6g,spark.memory.offHeap.size=6g \ + -d=OFFHEAP_SIZE:4g,spark.memory.offHeap.size=4g \ + -d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \ + -d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5 + - name: (To be fixed) TPC-DS SF30.0 Parquet local spark3.2 Q95 low memory, memory isolation on + run: | + cd tools/gluten-it \ + && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh parameterized \ + --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ + --skip-data-gen -m=OffHeapExecutionMemory \ + -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ + -d=OFFHEAP_SIZE:6g,spark.memory.offHeap.size=6g \ + -d=OFFHEAP_SIZE:4g,spark.memory.offHeap.size=4g \ + -d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \ + -d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5 || true + - name: TPC-DS SF30.0 Parquet local spark3.2 Q23A/Q23B low memory + run: | + cd tools/gluten-it \ + && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh parameterized \ + --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q23a,q23b -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ + --skip-data-gen -m=OffHeapExecutionMemory \ + -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \ + -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ + -d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \ + -d=FLUSH_MODE:DISABLED,spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \ + -d=FLUSH_MODE:ABANDONED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \ + -d=FLUSH_MODE:FLUSHED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.05,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.1,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 + - name: (To be fixed) TPC-DS SF30.0 Parquet local spark3.2 Q97 low memory # The case currently causes crash with "free: invalid size". + run: | + cd tools/gluten-it \ + && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh parameterized \ + --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q97 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ + --skip-data-gen -m=OffHeapExecutionMemory \ + -d=ISOLATION:OFF,spark.gluten.memory.isolation=false \ + -d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ + -d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \ + -d=OFFHEAP_SIZE:1g,spark.memory.offHeap.size=1g || true + + run-tpc-test-ubuntu-randomkill: + needs: build-native-lib + strategy: + fail-fast: false + matrix: + spark: [ "spark-3.2" ] + runs-on: ubuntu-20.04 + steps: + - name: Maximize build disk space + shell: bash + run: | + df -h + set -euo pipefail + echo "Removing unwanted software... " + sudo rm -rf /usr/share/dotnet + sudo rm -rf /usr/local/lib/android + sudo rm -rf /opt/ghc + sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo docker image prune --all --force > /dev/null + df -h + - uses: actions/checkout@v2 + - name: Download All Artifacts + uses: actions/download-artifact@v2 + with: + name: velox-native-lib-${{github.sha}} + path: ./cpp/build/releases + - name: Setup java and maven + run: | + sudo apt-get update + sudo apt-get install -y openjdk-8-jdk maven + - name: Set environment variables + run: | + echo "JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64" >> $GITHUB_ENV + - name: Build for Spark ${{ matrix.spark }} + run: | + cd $GITHUB_WORKSPACE/ + mvn -ntp clean install -P${{ matrix.spark }} -Pbackends-velox -DskipTests + cd $GITHUB_WORKSPACE/tools/gluten-it + mvn -ntp clean install -P${{ matrix.spark }} + GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh data-gen-only --local --benchmark-type=ds -s=30.0 --threads=12 + - name: TPC-DS SF30.0 Parquet local spark3.2 random kill tasks + run: | + cd tools/gluten-it \ + && GLUTEN_IT_JVM_ARGS=-Xmx6G sbin/gluten-it.sh queries \ + --local --preset=velox --benchmark-type=ds --error-on-memleak -s=30.0 --off-heap-size=8g --threads=12 --shuffle-partitions=72 --iterations=1 \ + --skip-data-gen --random-kill-tasks + + run-tpc-test-ubuntu-2204-celeborn: + needs: build-native-lib + strategy: + fail-fast: false + matrix: + spark: ["spark-3.2"] + celeborn: ["celeborn-0.4.0", "celeborn-0.3.2"] + runs-on: ubuntu-20.04 + container: ubuntu:22.04 + steps: + - uses: actions/checkout@v2 + - name: Download All Artifacts + uses: actions/download-artifact@v2 + with: + name: velox-native-lib-${{github.sha}} + path: ./cpp/build/releases + - name: Setup java and maven + run: | + apt-get update && apt-get install -y openjdk-8-jdk maven wget + - name: Build for Spark ${{ matrix.spark }} + run: | + cd $GITHUB_WORKSPACE/ + mvn clean install -P${{ matrix.spark }} -Pbackends-velox,rss -DskipTests + - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 with ${{ matrix.celeborn }} + run: | + export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64 + EXTRA_PROFILE="" + if [ "${{ matrix.celeborn }}" = "celeborn-0.4.0" ]; then + EXTRA_PROFILE="-Pceleborn-0.4" + fi + echo "EXTRA_PROFILE: ${EXTRA_PROFILE}" + cd /opt && mkdir -p celeborn && \ + wget https://archive.apache.org/dist/incubator/celeborn/${{ matrix.celeborn }}-incubating/apache-${{ matrix.celeborn }}-incubating-bin.tgz && \ + tar xzf apache-${{ matrix.celeborn }}-incubating-bin.tgz -C /opt/celeborn --strip-components=1 && cd celeborn && \ + mv ./conf/celeborn-env.sh.template ./conf/celeborn-env.sh && \ + bash -c "echo -e 'CELEBORN_MASTER_MEMORY=4g\nCELEBORN_WORKER_MEMORY=4g\nCELEBORN_WORKER_OFFHEAP_MEMORY=8g' > ./conf/celeborn-env.sh" && \ + bash -c "echo -e 'celeborn.worker.commitFiles.threads 128\nceleborn.worker.sortPartition.threads 64' > ./conf/celeborn-defaults.conf" && \ + bash ./sbin/start-master.sh && bash ./sbin/start-worker.sh && \ + cd $GITHUB_WORKSPACE/tools/gluten-it && mvn clean install -Pspark-3.2,rss ${EXTRA_PROFILE} && \ + GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ + --local --preset=velox-with-celeborn --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=8 --iterations=1 && \ + GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ + --local --preset=velox-with-celeborn --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=8 --iterations=1 + + run-spark-test-spark32: + runs-on: ubuntu-20.04 + container: ghcr.io/facebookincubator/velox-dev:circleci-avx + steps: + - uses: actions/checkout@v2 + - name: Build Gluten velox third party + run: | + yum install sudo patch java-1.8.0-openjdk-devel wget numactl-devel -y && \ + cd ep/build-velox/src && \ + ./get_velox.sh && \ + source /opt/rh/gcc-toolset-9/enable && \ + ./build_velox.sh --run_setup_script=OFF --enable_ep_cache=OFF --build_test_utils=ON + - name: Build Gluten CPP library + run: | + cd $GITHUB_WORKSPACE/cpp && \ + source /opt/rh/gcc-toolset-9/enable && \ + ./compile.sh --build_velox_backend=ON --build_protobuf=ON --build_tests=ON --build_examples=ON --build_benchmarks=ON + - name: Gluten CPP Test + run: | + cd $GITHUB_WORKSPACE/cpp/build && \ + ctest -V + - name: Setup java and maven + run: | + wget https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz + tar -xvf apache-maven-3.8.8-bin.tar.gz + mv apache-maven-3.8.8 /usr/lib/maven + - name: Prepare spark.test.home for Spark 3.2.2 (other tests) + run: | + cd $GITHUB_WORKSPACE/ && \ + wget https://archive.apache.org/dist/spark/spark-3.2.2/spark-3.2.2-bin-hadoop3.2.tgz && \ + tar --strip-components=1 -xf spark-3.2.2-bin-hadoop3.2.tgz spark-3.2.2-bin-hadoop3.2/jars/ && \ + rm -rf spark-3.2.2-bin-hadoop3.2.tgz && \ + mkdir -p $GITHUB_WORKSPACE//shims/spark32/spark_home/assembly/target/scala-2.12 && \ + mv jars $GITHUB_WORKSPACE//shims/spark32/spark_home/assembly/target/scala-2.12 && \ + cd $GITHUB_WORKSPACE// && \ + wget https://github.com/apache/spark/archive/refs/tags/v3.2.2.tar.gz && \ + tar --strip-components=1 -xf v3.2.2.tar.gz spark-3.2.2/sql/core/src/test/resources/ && \ + mkdir -p shims/spark32/spark_home/ && \ + mv sql shims/spark32/spark_home/ + - name: Build and run unit test for Spark 3.2.2 (other tests) + run: | + cd $GITHUB_WORKSPACE/ + export SPARK_SCALA_VERSION=2.12 + export MAVEN_HOME=/usr/lib/maven + export PATH=${PATH}:${MAVEN_HOME}/bin + mvn -ntp clean install -Pspark-3.2 -Pspark-ut -Pbackends-velox -Prss -Piceberg -Pdelta -DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark32/spark_home/" -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags && \ + mvn -ntp test -Pspark-3.2 -Pbackends-velox -Piceberg -Pdelta -DtagsToExclude=None -DtagsToInclude=org.apache.gluten.tags.UDFTest + - name: Upload golden files + if: failure() + uses: actions/upload-artifact@v4 + with: + name: golden-files-spark32 + path: /tmp/tpch-approved-plan/** + - name: Gluten CPP Benchmark Test + run: | + # This test depends on example.json generated by the above mvn test. + cd $GITHUB_WORKSPACE/cpp/build/velox/benchmarks && \ + ./generic_benchmark --run-example --with-shuffle --threads 1 --iterations 1 + + run-spark-test-spark32-slow: + runs-on: ubuntu-20.04 + container: ghcr.io/facebookincubator/velox-dev:circleci-avx + steps: + - uses: actions/checkout@v2 + - name: Build Gluten velox third party + run: | + yum install sudo patch java-1.8.0-openjdk-devel wget -y && \ + cd ep/build-velox/src && \ + ./get_velox.sh && \ + source /opt/rh/gcc-toolset-9/enable && \ + ./build_velox.sh --run_setup_script=OFF --enable_ep_cache=OFF --build_test_utils=ON + - name: Build Gluten CPP library + run: | + cd $GITHUB_WORKSPACE/cpp && \ + source /opt/rh/gcc-toolset-9/enable && \ + ./compile.sh --build_velox_backend=ON --build_protobuf=ON + - name: Setup java and maven + run: | + wget https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz + tar -xvf apache-maven-3.8.8-bin.tar.gz + mv apache-maven-3.8.8 /usr/lib/maven + - name: Prepare spark.test.home for Spark 3.2.2 (slow tests) + run: | + cd $GITHUB_WORKSPACE// && \ + wget https://github.com/apache/spark/archive/refs/tags/v3.2.2.tar.gz && \ + tar --strip-components=1 -xf v3.2.2.tar.gz spark-3.2.2/sql/core/src/test/resources/ && \ + mkdir -p shims/spark32/spark_home/ && \ + mv sql shims/spark32/spark_home/ + - name: Build and run unit test for Spark 3.2.2 (slow tests) + run: | + cd $GITHUB_WORKSPACE/ && \ + export MAVEN_HOME=/usr/lib/maven + export PATH=${PATH}:${MAVEN_HOME}/bin + mvn -ntp clean install -Pspark-3.2 -Pspark-ut -Pbackends-velox -Prss -Piceberg -Pdelta -DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark32/spark_home/" -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest + + run-spark-test-spark33: + runs-on: ubuntu-20.04 + container: ghcr.io/facebookincubator/velox-dev:circleci-avx + steps: + - uses: actions/checkout@v2 + - name: Build Gluten velox third party + run: | + yum install sudo patch java-1.8.0-openjdk-devel wget -y && \ + cd ep/build-velox/src && \ + ./get_velox.sh && \ + source /opt/rh/gcc-toolset-9/enable && \ + ./build_velox.sh --run_setup_script=OFF --enable_ep_cache=OFF --build_test_utils=ON + - name: Build Gluten CPP library + run: | + cd $GITHUB_WORKSPACE/cpp && \ + source /opt/rh/gcc-toolset-9/enable && \ + ./compile.sh --build_velox_backend=ON --build_protobuf=ON --build_tests=ON --build_examples=ON --build_benchmarks=ON + - name: Setup java and maven + run: | + wget https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz + tar -xvf apache-maven-3.8.8-bin.tar.gz + mv apache-maven-3.8.8 /usr/lib/maven + - name: Prepare spark.test.home for Spark 3.3.1 (other tests) + run: | + cd $GITHUB_WORKSPACE/ && \ + wget https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz && \ + tar --strip-components=1 -xf spark-3.3.1-bin-hadoop3.tgz spark-3.3.1-bin-hadoop3/jars/ && \ + rm -rf spark-3.3.1-bin-hadoop3.tgz && \ + mkdir -p $GITHUB_WORKSPACE//shims/spark33/spark_home/assembly/target/scala-2.12 && \ + mv jars $GITHUB_WORKSPACE//shims/spark33/spark_home/assembly/target/scala-2.12 && \ + cd $GITHUB_WORKSPACE// && \ + wget https://github.com/apache/spark/archive/refs/tags/v3.3.1.tar.gz && \ + tar --strip-components=1 -xf v3.3.1.tar.gz spark-3.3.1/sql/core/src/test/resources/ && \ + mkdir -p shims/spark33/spark_home/ && \ + mv sql shims/spark33/spark_home/ + - name: Build and Run unit test for Spark 3.3.1 (other tests) + run: | + cd $GITHUB_WORKSPACE/ && \ + export SPARK_SCALA_VERSION=2.12 && \ + export MAVEN_HOME=/usr/lib/maven + export PATH=${PATH}:${MAVEN_HOME}/bin + mvn -ntp clean install -Pspark-3.3 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark33/spark_home/" -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags && \ + mvn -ntp test -Pspark-3.3 -Pbackends-velox -Piceberg -Pdelta -DtagsToExclude=None -DtagsToInclude=org.apache.gluten.tags.UDFTest + - name: Upload golden files + if: failure() + uses: actions/upload-artifact@v4 + with: + name: golden-files-spark33 + path: /tmp/tpch-approved-plan/** + + + run-spark-test-spark33-slow: + runs-on: ubuntu-20.04 + container: ghcr.io/facebookincubator/velox-dev:circleci-avx + steps: + - uses: actions/checkout@v2 + - name: Build Gluten velox third party + run: | + yum install sudo patch java-1.8.0-openjdk-devel wget -y && \ + cd ep/build-velox/src && \ + ./get_velox.sh && \ + source /opt/rh/gcc-toolset-9/enable && \ + ./build_velox.sh --run_setup_script=OFF --enable_ep_cache=OFF --build_test_utils=ON + - name: Build Gluten CPP library + run: | + cd $GITHUB_WORKSPACE/cpp && \ + source /opt/rh/gcc-toolset-9/enable && \ + ./compile.sh --build_velox_backend=ON --build_protobuf=ON + - name: Setup java and maven + run: | + wget https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz + tar -xvf apache-maven-3.8.8-bin.tar.gz + mv apache-maven-3.8.8 /usr/lib/maven + - name: Prepare spark.test.home for Spark 3.3.1 (slow tests) + run: | + cd $GITHUB_WORKSPACE// && \ + wget https://github.com/apache/spark/archive/refs/tags/v3.3.1.tar.gz && \ + tar --strip-components=1 -xf v3.3.1.tar.gz spark-3.3.1/sql/core/src/test/resources/ && \ + mkdir -p shims/spark33/spark_home/ && \ + mv sql shims/spark33/spark_home/ + - name: Build and Run unit test for Spark 3.3.1 (slow tests) + run: | + cd $GITHUB_WORKSPACE/ && \ + export MAVEN_HOME=/usr/lib/maven + export PATH=${PATH}:${MAVEN_HOME}/bin + mvn -ntp clean install -Pspark-3.3 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark33/spark_home/" -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest + + run-spark-test-spark34: + runs-on: ubuntu-20.04 + container: ghcr.io/facebookincubator/velox-dev:circleci-avx + steps: + - uses: actions/checkout@v2 + - name: Build Gluten velox third party + run: | + yum install sudo patch java-1.8.0-openjdk-devel wget -y && \ + cd ep/build-velox/src && \ + ./get_velox.sh && \ + source /opt/rh/gcc-toolset-9/enable && \ + ./build_velox.sh --run_setup_script=OFF --enable_ep_cache=OFF --build_test_utils=ON + - name: Build Gluten CPP library + run: | + cd $GITHUB_WORKSPACE/cpp && \ + source /opt/rh/gcc-toolset-9/enable && \ + ./compile.sh --build_velox_backend=ON --build_protobuf=ON --build_tests=ON --build_examples=ON --build_benchmarks=ON + - name: Setup java and maven + run: | + wget https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz + tar -xvf apache-maven-3.8.8-bin.tar.gz + mv apache-maven-3.8.8 /usr/lib/maven + - name: Prepare spark.test.home for Spark 3.4.2 (other tests) + run: | + cd $GITHUB_WORKSPACE/ && \ + wget https://archive.apache.org/dist/spark/spark-3.4.2/spark-3.4.2-bin-hadoop3.tgz && \ + tar --strip-components=1 -xf spark-3.4.2-bin-hadoop3.tgz spark-3.4.2-bin-hadoop3/jars/ && \ + rm -rf spark-3.4.2-bin-hadoop3.tgz && \ + mkdir -p $GITHUB_WORKSPACE//shims/spark34/spark_home/assembly/target/scala-2.12 && \ + mv jars $GITHUB_WORKSPACE//shims/spark34/spark_home/assembly/target/scala-2.12 && \ + cd $GITHUB_WORKSPACE// && \ + wget https://github.com/apache/spark/archive/refs/tags/v3.4.2.tar.gz && \ + tar --strip-components=1 -xf v3.4.2.tar.gz spark-3.4.2/sql/core/src/test/resources/ && \ + mkdir -p shims/spark34/spark_home/ && \ + mv sql shims/spark34/spark_home/ + - name: Build and Run unit test for Spark 3.4.2 (other tests) + run: | + cd $GITHUB_WORKSPACE/ && \ + export SPARK_SCALA_VERSION=2.12 && \ + export MAVEN_HOME=/usr/lib/maven + export PATH=${PATH}:${MAVEN_HOME}/bin + mvn -ntp clean install -Pspark-3.4 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark34/spark_home/" -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags && \ + mvn -ntp test -Pspark-3.4 -Pbackends-velox -Piceberg -Pdelta -DtagsToExclude=None -DtagsToInclude=org.apache.gluten.tags.UDFTest + - name: Upload golden files + if: failure() + uses: actions/upload-artifact@v4 + with: + name: golden-files-spark34 + path: /tmp/tpch-approved-plan/** + + + run-spark-test-spark34-slow: + runs-on: ubuntu-20.04 + container: ghcr.io/facebookincubator/velox-dev:circleci-avx + steps: + - uses: actions/checkout@v2 + - name: Build Gluten velox third party + run: | + yum install sudo patch java-1.8.0-openjdk-devel wget -y && \ + cd ep/build-velox/src && \ + ./get_velox.sh && \ + source /opt/rh/gcc-toolset-9/enable && \ + ./build_velox.sh --run_setup_script=OFF --enable_ep_cache=OFF --build_test_utils=ON + - name: Build Gluten CPP library + run: | + cd $GITHUB_WORKSPACE/cpp && \ + source /opt/rh/gcc-toolset-9/enable && \ + ./compile.sh --build_velox_backend=ON --build_protobuf=ON + - name: Setup java and maven + run: | + wget https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz + tar -xvf apache-maven-3.8.8-bin.tar.gz + mv apache-maven-3.8.8 /usr/lib/maven + - name: Prepare spark.test.home for Spark 3.4.2 (slow tests) + run: | + cd $GITHUB_WORKSPACE// && \ + wget https://github.com/apache/spark/archive/refs/tags/v3.4.2.tar.gz && \ + tar --strip-components=1 -xf v3.4.2.tar.gz spark-3.4.2/sql/core/src/test/resources/ && \ + mkdir -p shims/spark34/spark_home/ && \ + mv sql shims/spark34/spark_home/ + - name: Build and Run unit test for Spark 3.4.2 (slow tests) + run: | + cd $GITHUB_WORKSPACE/ + export MAVEN_HOME=/usr/lib/maven + export PATH=${PATH}:${MAVEN_HOME}/bin + mvn -ntp clean install -Pspark-3.4 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark34/spark_home/" -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest + + run-spark-test-spark35: + runs-on: ubuntu-20.04 + container: ghcr.io/facebookincubator/velox-dev:circleci-avx + steps: + - uses: actions/checkout@v2 + - name: Build Gluten velox third party + run: | + yum install sudo patch java-1.8.0-openjdk-devel wget -y && \ + cd ep/build-velox/src && \ + ./get_velox.sh && \ + source /opt/rh/gcc-toolset-9/enable && \ + ./build_velox.sh --run_setup_script=OFF --enable_ep_cache=OFF --build_test_utils=ON + - name: Build Gluten CPP library + run: | + cd $GITHUB_WORKSPACE/cpp && \ + source /opt/rh/gcc-toolset-9/enable && \ + ./compile.sh --build_velox_backend=ON --build_protobuf=ON --build_tests=ON --build_examples=ON --build_benchmarks=ON + - name: Setup java and maven + run: | + wget https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz + tar -xvf apache-maven-3.8.8-bin.tar.gz + mv apache-maven-3.8.8 /usr/lib/maven + - name: Prepare spark.test.home for Spark 3.5.1 (other tests) + run: | + cd $GITHUB_WORKSPACE/ && \ + wget https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz && \ + tar --strip-components=1 -xf spark-3.5.1-bin-hadoop3.tgz spark-3.5.1-bin-hadoop3/jars/ && \ + rm -rf spark-3.5.1-bin-hadoop3.tgz && \ + mkdir -p $GITHUB_WORKSPACE//shims/spark35/spark_home/assembly/target/scala-2.12 && \ + mv jars $GITHUB_WORKSPACE//shims/spark35/spark_home/assembly/target/scala-2.12 && \ + cd $GITHUB_WORKSPACE// && \ + wget https://github.com/apache/spark/archive/refs/tags/v3.5.1.tar.gz && \ + tar --strip-components=1 -xf v3.5.1.tar.gz spark-3.5.1/sql/core/src/test/resources/ && \ + mkdir -p shims/spark35/spark_home/ && \ + mv sql shims/spark35/spark_home/ + - name: Build for Spark 3.5.1 (other tests) + run: | + cd $GITHUB_WORKSPACE/ && \ + export SPARK_SCALA_VERSION=2.12 && \ + export MAVEN_HOME=/usr/lib/maven + export PATH=${PATH}:${MAVEN_HOME}/bin + mvn -ntp clean install -Pspark-3.5 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark35/spark_home/" -DskipTests + - name: Upload golden files + if: failure() + uses: actions/upload-artifact@v4 + with: + name: golden-files-spark35 + path: /tmp/tpch-approved-plan/** \ No newline at end of file diff --git a/dev/ci-velox-buildstatic.sh b/dev/ci-velox-buildstatic.sh new file mode 100755 index 000000000000..a9b9d2c3fcc7 --- /dev/null +++ b/dev/ci-velox-buildstatic.sh @@ -0,0 +1,9 @@ +yum install sudo patch java-1.8.0-openjdk-devel -y +cd $GITHUB_WORKSPACE/ep/build-velox/src +./get_velox.sh +source /opt/rh/devtoolset-9/enable +source $GITHUB_WORKSPACE//dev/vcpkg/env.sh +cd $GITHUB_WORKSPACE/ +sed -i '/^headers/d' ep/build-velox/build/velox_ep/CMakeLists.txt +export NUM_THREADS=4 +./dev/builddeps-veloxbe.sh --build_tests=OFF --build_benchmarks=OFF --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON From 8b52556cf9212026cf53c2096388dc880cbbcfd1 Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Thu, 11 Apr 2024 15:44:34 +0800 Subject: [PATCH 3/3] Remove previous build --- .github/workflows/velox_docker.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/velox_docker.yml b/.github/workflows/velox_docker.yml index 4d65df5b2bcb..cd1b530cdbfd 100644 --- a/.github/workflows/velox_docker.yml +++ b/.github/workflows/velox_docker.yml @@ -63,6 +63,7 @@ jobs: - name: Build Gluten Velox third party if: ${{ steps.cache.outputs.cache-hit != 'true' }} run: | + sudo rm -rf ep/build-velox/build/velox_ep/ source dev/ci-velox-buildstatic.sh - uses: actions/upload-artifact@v2 with: