From 19511bb5381f5ea5dc86dd51eaa0c22a5e90ffc9 Mon Sep 17 00:00:00 2001 From: Yufei Gu Date: Tue, 20 Aug 2024 18:24:49 -0700 Subject: [PATCH 1/3] Update Spark version --- regtests/run.sh | 5 ++++- regtests/setup.sh | 14 +++++++------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/regtests/run.sh b/regtests/run.sh index eef12b622..aa5e9ebfe 100755 --- a/regtests/run.sh +++ b/regtests/run.sh @@ -16,8 +16,11 @@ # # Run without args to run all tests, or single arg for single test. +export SPARK_VERSION=spark-3.5.2 +export SPARK_DISTRIBUTION=${SPARK_VERSION}-bin-hadoop3-scala2.13 + if [ -z "${SPARK_HOME}"]; then - export SPARK_HOME=$(realpath ~/spark-3.5.1-bin-hadoop3-scala2.13) + export SPARK_HOME=$(realpath ~/${SPARK_DISTRIBUTION}) fi export PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" diff --git a/regtests/setup.sh b/regtests/setup.sh index e28a6fe05..93fb450e3 100755 --- a/regtests/setup.sh +++ b/regtests/setup.sh @@ -24,7 +24,7 @@ set -x SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) if [ -z "${SPARK_HOME}" ]; then - SPARK_HOME=$(realpath ~/spark-3.5.1-bin-hadoop3-scala2.13) + SPARK_HOME=$(realpath ~/${SPARK_DISTRIBUTION}) fi SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf" export PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" @@ -33,22 +33,22 @@ export PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7- echo 'Verifying Spark binaries...' if ! [ -f ${SPARK_HOME}/bin/spark-sql ]; then echo 'Setting up Spark...' - if ! [ -f ~/spark-3.5.1-bin-hadoop3-scala2.13.tgz ]; then + if ! [ -f ~/${SPARK_DISTRIBUTION}.tgz ]; then echo 'Downloading spark distro...' - wget -O ~/spark-3.5.1-bin-hadoop3-scala2.13.tgz https://dlcdn.apache.org/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3-scala2.13.tgz - if ! [ -f ~/spark-3.5.1-bin-hadoop3-scala2.13.tgz ]; then + wget -O ~/${SPARK_DISTRIBUTION}.tgz https://dlcdn.apache.org/spark/${SPARK_VERSION}/${SPARK_DISTRIBUTION}.tgz + if ! [ -f ~/${SPARK_DISTRIBUTION}.tgz ]; then if [[ "${OSTYPE}" == "darwin"* ]]; then echo "Detected OS: mac. Running 'brew install wget' to try again." brew install wget - wget -O ~/spark-3.5.1-bin-hadoop3-scala2.13.tgz https://dlcdn.apache.org/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3-scala2.13.tgz + wget -O ~/${SPARK_DISTRIBUTION}.tgz https://dlcdn.apache.org/spark/${SPARK_VERSION}/${SPARK_DISTRIBUTION}.tgz fi fi else echo 'Found existing Spark tarball' fi - tar xzvf ~/spark-3.5.1-bin-hadoop3-scala2.13.tgz -C ~ + tar xzvf ~/${SPARK_DISTRIBUTION}.tgz -C ~ echo 'Done!' - SPARK_HOME=$(realpath ~/spark-3.5.1-bin-hadoop3-scala2.13) + SPARK_HOME=$(realpath ~/${SPARK_DISTRIBUTION}) SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf" else echo 'Verified Spark distro already installed.' From 85fef998c1b9a86d00e5f5ef45f18334500c6778 Mon Sep 17 00:00:00 2001 From: Yufei Gu Date: Tue, 20 Aug 2024 18:38:11 -0700 Subject: [PATCH 2/3] setup.sh won't work independently --- regtests/README.md | 7 ------- 1 file changed, 7 deletions(-) diff --git a/regtests/README.md b/regtests/README.md index 590201ffa..d4c8b92c6 100644 --- a/regtests/README.md +++ b/regtests/README.md @@ -74,13 +74,6 @@ AZURE_BLOB_TEST_BASE=abfss://@.blob.core.w into the `credentials` folder. Then specify the name of the file in your .env file - do not change the path, as `/tmp/credentials` is the folder on the container where the credentials file will be mounted. -## Setup without running tests - -Setup is idempotent. - -``` -./setup.sh -``` ## Experiment with failed test From 2bc8333f904baec455a75214b4599d6e2bd5b913 Mon Sep 17 00:00:00 2001 From: Yufei Gu Date: Wed, 21 Aug 2024 11:13:53 -0700 Subject: [PATCH 3/3] Set the env variables in run_spark_sql.sh --- regtests/run_spark_sql.sh | 5 ++++- regtests/setup.sh | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/regtests/run_spark_sql.sh b/regtests/run_spark_sql.sh index 5451e2bd9..ba0c05a77 100755 --- a/regtests/run_spark_sql.sh +++ b/regtests/run_spark_sql.sh @@ -22,10 +22,13 @@ REGTEST_HOME=$(dirname $(realpath $0)) cd ${REGTEST_HOME} +export SPARK_VERSION=spark-3.5.2 +export SPARK_DISTRIBUTION=${SPARK_VERSION}-bin-hadoop3-scala2.13 + ./setup.sh if [ -z "${SPARK_HOME}"]; then - export SPARK_HOME=$(realpath ~/spark-3.5.1-bin-hadoop3-scala2.13) + export SPARK_HOME=$(realpath ~/${SPARK_DISTRIBUTION}) fi SPARK_BEARER_TOKEN="${REGTEST_ROOT_BEARER_TOKEN:-principal:root;realm:default-realm}" diff --git a/regtests/setup.sh b/regtests/setup.sh index 93fb450e3..039725f92 100755 --- a/regtests/setup.sh +++ b/regtests/setup.sh @@ -33,6 +33,10 @@ export PYTHONPATH="${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.9.7- echo 'Verifying Spark binaries...' if ! [ -f ${SPARK_HOME}/bin/spark-sql ]; then echo 'Setting up Spark...' + if [ -z "${SPARK_VERSION}" ] || [ -z "${SPARK_DISTRIBUTION}" ]; then + echo 'SPARK_VERSION or SPARK_DISTRIBUTION not set. Please set SPARK_VERSION and SPARK_DISTRIBUTION to the desired version.' + exit 1 + fi if ! [ -f ~/${SPARK_DISTRIBUTION}.tgz ]; then echo 'Downloading spark distro...' wget -O ~/${SPARK_DISTRIBUTION}.tgz https://dlcdn.apache.org/spark/${SPARK_VERSION}/${SPARK_DISTRIBUTION}.tgz