From 5db28433db2ed67704b06c0aeab7948403688d52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 11 Dec 2024 10:23:54 +0100 Subject: [PATCH] GH-44980: [CI] Remove retrieval of Arrow version from Java on Spark integration and update test structure for PySpark (#44981) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change The job is currently failing. ### What changes are included in this PR? Remove unnecessary check on Java code and refactor pyspark test modules to follow new test structure: https://github.com/apache/spark/pull/49104 ### Are these changes tested? Via archery ### Are there any user-facing changes? No * GitHub Issue: #44980 Authored-by: Raúl Cumplido Signed-off-by: Raúl Cumplido --- ci/scripts/integration_spark.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ci/scripts/integration_spark.sh b/ci/scripts/integration_spark.sh index 4272e2e776d6e..f7ef87a8b8f29 100755 --- a/ci/scripts/integration_spark.sh +++ b/ci/scripts/integration_spark.sh @@ -32,11 +32,6 @@ if [ "${SPARK_VERSION:1:2}" == "2." ]; then export ARROW_PRE_0_15_IPC_FORMAT=1 fi -# Get Arrow Java version -pushd ${source_dir}/java - arrow_version=`mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | sed -n -e '/^\[.*\]/ !{ /^[0-9]/ { p; q } }'` -popd - export MAVEN_OPTS="-Xss256m -Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=warn" export MAVEN_OPTS="${MAVEN_OPTS} -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn" @@ -47,8 +42,13 @@ pushd ${spark_dir} build/mvn -B -DskipTests package # Run pyarrow related Python tests only + # "pyspark.sql.tests.arrow.test_arrow_grouped_map" and + # "pyspark.sql.tests.arrow.test_arrow_cogrouped_map" currently fail. + # See: https://github.com/apache/arrow/issues/44986 spark_python_tests=( - "pyspark.sql.tests.test_arrow") + "pyspark.sql.tests.arrow.test_arrow" + "pyspark.sql.tests.arrow.test_arrow_map" + "pyspark.sql.tests.arrow.test_arrow_python_udf") case "${SPARK_VERSION}" in v1.*|v2.*|v3.0.*|v3.1.*|v3.2.*|v3.3.*)