From ca4afaf67953058d4c77ba950612575678273755 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Mon, 27 Jan 2025 17:22:45 -0800 Subject: [PATCH 1/4] use 1.7.2 --- dev/Dockerfile | 6 +++--- tests/conftest.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dev/Dockerfile b/dev/Dockerfile index b55be39e9d..982ddc0cfe 100644 --- a/dev/Dockerfile +++ b/dev/Dockerfile @@ -39,7 +39,7 @@ WORKDIR ${SPARK_HOME} # Remember to also update `tests/conftest`'s spark setting ENV SPARK_VERSION=3.5.3 ENV ICEBERG_SPARK_RUNTIME_VERSION=3.5_2.12 -ENV ICEBERG_VERSION=1.6.0 +ENV ICEBERG_VERSION=1.7.2 ENV PYICEBERG_VERSION=0.8.1 RUN curl --retry 5 -s -C - https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop3.tgz -o spark-${SPARK_VERSION}-bin-hadoop3.tgz \ @@ -47,11 +47,11 @@ RUN curl --retry 5 -s -C - https://archive.apache.org/dist/spark/spark-${SPARK_V && rm -rf spark-${SPARK_VERSION}-bin-hadoop3.tgz # Download iceberg spark runtime -RUN curl --retry 5 -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}/${ICEBERG_VERSION}/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar -Lo iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar \ +RUN curl --retry 5 -s https://repository.apache.org/content/repositories/orgapacheiceberg-1180/org/apache/iceberg/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}/${ICEBERG_VERSION}/iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar -Lo iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar \ && mv iceberg-spark-runtime-${ICEBERG_SPARK_RUNTIME_VERSION}-${ICEBERG_VERSION}.jar /opt/spark/jars # Download AWS bundle -RUN curl --retry 5 -s https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar -Lo /opt/spark/jars/iceberg-aws-bundle-${ICEBERG_VERSION}.jar +RUN curl --retry 5 -s https://repository.apache.org/content/repositories/orgapacheiceberg-1180/org/apache/iceberg/iceberg-aws-bundle/${ICEBERG_VERSION}/iceberg-aws-bundle-${ICEBERG_VERSION}.jar -Lo /opt/spark/jars/iceberg-aws-bundle-${ICEBERG_VERSION}.jar COPY spark-defaults.conf /opt/spark/conf ENV PATH="/opt/spark/sbin:/opt/spark/bin:${PATH}" diff --git a/tests/conftest.py b/tests/conftest.py index cfd9796312..1904230adc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2420,7 +2420,7 @@ def spark() -> "SparkSession": # Remember to also update `dev/Dockerfile` spark_version = ".".join(importlib.metadata.version("pyspark").split(".")[:2]) scala_version = "2.12" - iceberg_version = "1.6.0" + iceberg_version = "1.7.2" os.environ["PYSPARK_SUBMIT_ARGS"] = ( f"--packages org.apache.iceberg:iceberg-spark-runtime-{spark_version}_{scala_version}:{iceberg_version}," From 3793477b62d4bc28e723c00f97be698c21729cdf Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Tue, 28 Jan 2025 08:17:21 -0800 Subject: [PATCH 2/4] limit parallelism --- tests/conftest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 1904230adc..137029c1a5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2433,6 +2433,8 @@ def spark() -> "SparkSession": spark = ( SparkSession.builder.appName("PyIceberg integration test") .config("spark.sql.session.timeZone", "UTC") + .config("spark.sql.shuffle.partitions", "1") + .config("spark.default.parallelism", "1") .config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") .config("spark.sql.catalog.integration", "org.apache.iceberg.spark.SparkCatalog") .config("spark.sql.catalog.integration.catalog-impl", "org.apache.iceberg.rest.RESTCatalog") From 044687b4cdc6f84de285c66a98c09714326d5cd9 Mon Sep 17 00:00:00 2001 From: Kevin Liu Date: Tue, 28 Jan 2025 09:08:06 -0800 Subject: [PATCH 3/4] use rc repo --- tests/conftest.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 137029c1a5..f3a8166165 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2432,9 +2432,8 @@ def spark() -> "SparkSession": spark = ( SparkSession.builder.appName("PyIceberg integration test") + .config('spark.jars.repositories', 'https://repository.apache.org/content/repositories/orgapacheiceberg-1180/') .config("spark.sql.session.timeZone", "UTC") - .config("spark.sql.shuffle.partitions", "1") - .config("spark.default.parallelism", "1") .config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") .config("spark.sql.catalog.integration", "org.apache.iceberg.spark.SparkCatalog") .config("spark.sql.catalog.integration.catalog-impl", "org.apache.iceberg.rest.RESTCatalog") From fe2eab0df2d2eadfd7c997de2fe0e7fd11fc76c5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 28 Jan 2025 17:08:51 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index f3a8166165..b455b57995 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2432,7 +2432,7 @@ def spark() -> "SparkSession": spark = ( SparkSession.builder.appName("PyIceberg integration test") - .config('spark.jars.repositories', 'https://repository.apache.org/content/repositories/orgapacheiceberg-1180/') + .config("spark.jars.repositories", "https://repository.apache.org/content/repositories/orgapacheiceberg-1180/") .config("spark.sql.session.timeZone", "UTC") .config("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions") .config("spark.sql.catalog.integration", "org.apache.iceberg.spark.SparkCatalog")