From 5a6c1391b4266f7613217784dd52c9630654a239 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Mon, 29 Sep 2025 14:24:20 -0700 Subject: [PATCH 1/2] Add delta examples --- examples/TableFormat-Examples/delta/README.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 examples/TableFormat-Examples/delta/README.md diff --git a/examples/TableFormat-Examples/delta/README.md b/examples/TableFormat-Examples/delta/README.md new file mode 100644 index 000000000..e69de29bb From 40d77bb2e2019b71fdf102fdc300f4d44aaa1588 Mon Sep 17 00:00:00 2001 From: Jihoon Son Date: Wed, 1 Oct 2025 15:02:11 -0700 Subject: [PATCH 2/2] Delta microbenchmarks --- .../notebooks/delta-benchmarks-gpu.ipynb | 2849 +++++++++++++++++ examples/TableFormat-Examples/delta/README.md | 0 2 files changed, 2849 insertions(+) create mode 100644 examples/SQL+DF-Examples/micro-benchmarks/notebooks/delta-benchmarks-gpu.ipynb delete mode 100644 examples/TableFormat-Examples/delta/README.md diff --git a/examples/SQL+DF-Examples/micro-benchmarks/notebooks/delta-benchmarks-gpu.ipynb b/examples/SQL+DF-Examples/micro-benchmarks/notebooks/delta-benchmarks-gpu.ipynb new file mode 100644 index 000000000..052776cf5 --- /dev/null +++ b/examples/SQL+DF-Examples/micro-benchmarks/notebooks/delta-benchmarks-gpu.ipynb @@ -0,0 +1,2849 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "62787244", + "metadata": {}, + "source": [ + "# Microbenchmarks on GPU\n", + "This is a notebook for microbenchmarks running on GPU. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c3a15d7", + "metadata": {}, + "outputs": [], + "source": [ + "from pyspark.sql import SparkSession\n", + "from pyspark.conf import SparkConf\n", + "from time import time\n", + "import os\n", + "# Change to your cluster ip:port and directories\n", + "SPARK_MASTER_URL = os.getenv(\"SPARK_MASTER_URL\", \"spark:your-ip:port\")\n", + "RAPIDS_JAR = os.getenv(\"RAPIDS_JAR\", \"/your-path/rapids-4-spark_2.12-25.08.0.jar\")\n", + "DATA_ROOT = os.getenv(\"DATA_ROOT\", \"/data\")" + ] + }, + { + "cell_type": "markdown", + "id": "b10a2ad1", + "metadata": {}, + "source": [ + "Run the example with retryTimes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c3536ad", + "metadata": {}, + "outputs": [], + "source": [ + "def runMicroBenchmark(spark, appName, query, retryTimes):\n", + " count = 0\n", + " total_time = 0\n", + " # You can print the physical plan of each query\n", + " # spark.sql(query).explain()\n", + " while count < retryTimes:\n", + " start = time()\n", + " spark.sql(query).show(5)\n", + " end = time()\n", + " total_time += round(end - start, 2)\n", + " count = count + 1\n", + " print(\"Retry times : {}, \".format(count) + appName + \" microbenchmark takes {} seconds\".format(round(end - start, 2)))\n", + " print(appName + \" microbenchmark takes average {} seconds after {} retries\".format(round(total_time/retryTimes),retryTimes))\n", + " with open('result.txt', 'a') as file:\n", + " file.write(\"{},{},{}\\n\".format(appName, round(total_time/retryTimes), retryTimes))" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "975717da", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "25/09/29 17:10:42 WARN RapidsPluginUtils: spark.rapids.sql.multiThreadedRead.numThreads is set to 20.\n", + "25/09/29 17:10:42 WARN RapidsPluginUtils: RAPIDS Accelerator is enabled, to disable GPU support set `spark.rapids.sql.enabled` to false.\n", + "25/09/29 17:10:42 WARN RapidsPluginUtils: spark.rapids.sql.explain is set to `NOT_ON_GPU`. Set it to 'NONE' to suppress the diagnostics logging about the query placement on the GPU.\n", + "25/09/29 17:10:42 WARN GpuDeviceManager: spark.rapids.memory.host.offHeapLimit.size is not set; we used memory limit derived from (0.8 * (estimated available host memory / device count) - spark.executor.memory - spark.executor.pyspark.memory - spark.memory.offHeap.size) = (0.8 * (106798522368 / 1) - 17179869184 - 0 - 0) = 71694922547\n", + "25/09/29 17:10:42 WARN GpuDeviceManager: Off Heap Host Memory configured to be 8192.0 MiB pinned, 59941.60546875 MiB non-pinned, and 240.0 MiB of untracked overhead.\n", + "25/09/29 17:10:42 WARN GpuDeviceManager: The default cuDF host pool was already configured\n", + "25/09/29 17:10:42 WARN SpillFramework: both spark.rapids.memory.host.offHeapLimit.enabled and spark.rapids.memory.host.spillStorageSize are set; using spark.rapids.memory.host.offHeapLimit.size and ignoring spark.rapids.memory.host.spillStorageSize\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# You need to update with your real hardware resource \n", + "driverMem = os.getenv(\"DRIVER_MEM\", \"50g\")\n", + "executorMem = os.getenv(\"EXECUTOR_MEM\", \"16g\")\n", + "maxPartionBytes = os.getenv(\"MAX_PARTITION_BYTES\", \"1g\")\n", + "pinnedPoolSize = os.getenv(\"PINNED_POOL_SIZE\", \"8g\")\n", + "concurrentGpuTasks = os.getenv(\"CONCURRENT_GPU_TASKS\", \"3\")\n", + "executorCores = int(os.getenv(\"EXECUTOR_CORES\", \"16\"))\n", + "gpuPerTask = 1/executorCores\n", + "# Common spark settings\n", + "conf = SparkConf()\n", + "conf.setMaster(SPARK_MASTER_URL)\n", + "conf.setAppName(\"Delta examples\")\n", + "conf.set(\"spark.driver.memory\", driverMem)\n", + "## The tasks will run on GPU memory, so there is no need to set a high host memory\n", + "conf.set(\"spark.executor.memory\", executorMem)\n", + "## The tasks will run on GPU cores, so there is no need to use many cpu cores\n", + "conf.set(\"spark.executor.cores\", executorCores)\n", + "conf.set(\"spark.locality.wait\", \"0\")\n", + "conf.set(\"spark.sql.files.maxPartitionBytes\", maxPartionBytes) \n", + "conf.set(\"spark.dynamicAllocation.enabled\", \"false\") \n", + "conf.set(\"spark.sql.adaptive.enabled\", \"true\") \n", + "\n", + "# Delta settings\n", + "conf.set(\"spark.jars.packages\", \"io.delta:delta-spark_2.12:3.3.0\")\n", + "conf.set(\"spark.sql.extensions\", \"io.delta.sql.DeltaSparkSessionExtension\")\n", + "conf.set(\"spark.sql.catalog.spark_catalog\", \"org.apache.spark.sql.delta.catalog.DeltaCatalog\")\n", + "\n", + "# Plugin settings\n", + "conf.set(\"spark.executor.resource.gpu.amount\", \"0\")\n", + "# 4 tasks will run concurrently per GPU\n", + "conf.set(\"spark.rapids.sql.concurrentGpuTasks\", concurrentGpuTasks)\n", + "# Pinned 8g host memory to transfer data between GPU and host memory\n", + "conf.set(\"spark.rapids.memory.pinnedPool.size\", pinnedPoolSize)\n", + "# 16 tasks will run concurrently per executor, as we set spark.executor.cores=16\n", + "conf.set(\"spark.task.resource.gpu.amount\", \"0\") \n", + "conf.set(\"spark.rapids.sql.enabled\", \"true\") \n", + "conf.set(\"spark.plugins\", \"com.nvidia.spark.SQLPlugin\")\n", + "conf.set(\"spark.rapids.sql.variableFloatAgg.enabled\", \"true\")\n", + "conf.set(\"spark.driver.extraClassPath\", RAPIDS_JAR)\n", + "conf.set(\"spark.executor.extraClassPath\", RAPIDS_JAR)\n", + "conf.set(\"spark.jars\", RAPIDS_JAR)\n", + "# Create spark session\n", + "spark = SparkSession.builder.config(conf=conf).getOrCreate()\n", + "# Load dataframe and create tempView\n", + "spark.read.format(\"delta\").load(DATA_ROOT + \"/customer\").createOrReplaceTempView(\"customer\")\n", + "spark.read.format(\"delta\").load(DATA_ROOT + \"/store_sales\").createOrReplaceTempView(\"store_sales\")\n", + "spark.read.format(\"delta\").load(DATA_ROOT + \"/catalog_sales\").createOrReplaceTempView(\"catalog_sales\")\n", + "spark.read.format(\"delta\").load(DATA_ROOT + \"/web_sales\").createOrReplaceTempView(\"web_sales\")\n", + "spark.read.format(\"delta\").load(DATA_ROOT + \"/item\").createOrReplaceTempView(\"item\")\n", + "spark.read.format(\"delta\").load(DATA_ROOT + \"/date_dim\").createOrReplaceTempView(\"date_dim\")\n", + "print(\"-\"*50)" + ] + }, + { + "cell_type": "markdown", + "id": "7136eb63", + "metadata": {}, + "source": [ + "### Update\n", + "This is a microbenchmark about the update running on the GPU. TBD." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd12d749", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "25/09/29 17:13:13 WARN GpuOverrides: \n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.SerializeFromObjectExec\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.MapElementsExec\n", + " !Expression obj#4877 cannot run on GPU because expression AttributeReference obj#4877 produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec\n", + " ! newInstance(class scala.Tuple1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " !Expression if (isnull(add#4812)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because expression If if (isnull(add#4812)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.AddFile) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported); trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " @Expression isnull(add#4812) could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#4812.path.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#4812.path could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -1), lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -2), lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString, add#4812.partitionValues, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#4812.partitionValues could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! assertnotnull(add#4812.size) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#4812.size could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! assertnotnull(add#4812.modificationTime) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#4812.modificationTime could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! assertnotnull(add#4812.dataChange) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#4812.dataChange could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! add#4812.stats.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#4812.stats could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -3), lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -4), lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString, add#4812.tags, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#4812.tags could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " !Expression if (isnull(add#4812.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); expression If if (isnull(add#4812.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " @Expression isnull(add#4812.deletionVector) could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#4812.deletionVector.storageType.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#4812.deletionVector.storageType could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! add#4812.deletionVector.pathOrInlineDv.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#4812.deletionVector.pathOrInlineDv could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! wrapoption(add#4812.deletionVector.offset, IntegerType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#4812.deletionVector.offset could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! assertnotnull(add#4812.deletionVector.sizeInBytes) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#4812.deletionVector.sizeInBytes could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! assertnotnull(add#4812.deletionVector.cardinality) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#4812.deletionVector.cardinality could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! wrapoption(add#4812.deletionVector.maxRowIndex, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#4812.deletionVector.maxRowIndex could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! wrapoption(add#4812.baseRowId, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#4812.baseRowId could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! wrapoption(add#4812.defaultRowCommitVersion, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#4812.defaultRowCommitVersion could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! wrapoption(add#4812.clusteringProvider.toString, ObjectType(class java.lang.String)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " ! add#4812.clusteringProvider.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#4812.clusteringProvider could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " !Expression obj#4876 cannot run on GPU because expression AttributeReference obj#4876 produces an unsupported type ObjectType(class scala.Tuple1)\n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.RDDScanExec\n", + " @Expression txn#4811 could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " @Expression remove#4813 could run on GPU\n", + " @Expression metaData#4814 could run on GPU\n", + " @Expression protocol#4815 could run on GPU\n", + " @Expression cdc#4816 could run on GPU\n", + " @Expression checkpointMetadata#4817 could run on GPU\n", + " @Expression sidecar#4818 could run on GPU\n", + " @Expression domainMetadata#4819 could run on GPU\n", + " @Expression commitInfo#4820 could run on GPU\n", + "\n", + "25/09/29 17:13:13 WARN GpuOverrides: \n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.SerializeFromObjectExec\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.MapElementsExec\n", + " !Expression obj#4877 cannot run on GPU because expression AttributeReference obj#4877 produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec\n", + " ! newInstance(class scala.Tuple1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " !Expression if (isnull(add#4812)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because expression If if (isnull(add#4812)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.AddFile) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported); trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " @Expression isnull(add#4812) could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#4812.path.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#4812.path could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -1), lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -2), lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString, add#4812.partitionValues, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#4812.partitionValues could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! assertnotnull(add#4812.size) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#4812.size could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! assertnotnull(add#4812.modificationTime) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#4812.modificationTime could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! assertnotnull(add#4812.dataChange) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#4812.dataChange could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! add#4812.stats.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#4812.stats could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -3), lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -4), lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString, add#4812.tags, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#4812.tags could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " !Expression if (isnull(add#4812.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); expression If if (isnull(add#4812.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " @Expression isnull(add#4812.deletionVector) could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#4812.deletionVector.storageType.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#4812.deletionVector.storageType could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! add#4812.deletionVector.pathOrInlineDv.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#4812.deletionVector.pathOrInlineDv could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! wrapoption(add#4812.deletionVector.offset, IntegerType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#4812.deletionVector.offset could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! assertnotnull(add#4812.deletionVector.sizeInBytes) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#4812.deletionVector.sizeInBytes could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! assertnotnull(add#4812.deletionVector.cardinality) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#4812.deletionVector.cardinality could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! wrapoption(add#4812.deletionVector.maxRowIndex, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#4812.deletionVector.maxRowIndex could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! wrapoption(add#4812.baseRowId, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#4812.baseRowId could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! wrapoption(add#4812.defaultRowCommitVersion, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#4812.defaultRowCommitVersion could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! wrapoption(add#4812.clusteringProvider.toString, ObjectType(class java.lang.String)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " ! add#4812.clusteringProvider.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#4812.clusteringProvider could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " !Expression obj#4876 cannot run on GPU because expression AttributeReference obj#4876 produces an unsupported type ObjectType(class scala.Tuple1)\n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.RDDScanExec\n", + " @Expression txn#4811 could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " @Expression remove#4813 could run on GPU\n", + " @Expression metaData#4814 could run on GPU\n", + " @Expression protocol#4815 could run on GPU\n", + " @Expression cdc#4816 could run on GPU\n", + " @Expression checkpointMetadata#4817 could run on GPU\n", + " @Expression sidecar#4818 could run on GPU\n", + " @Expression domainMetadata#4819 could run on GPU\n", + " @Expression commitInfo#4820 could run on GPU\n", + "\n", + "25/09/29 17:13:13 WARN GpuOverrides: \n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.SerializeFromObjectExec\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.MapElementsExec\n", + " !Expression obj#4877 cannot run on GPU because expression AttributeReference obj#4877 produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec\n", + " ! newInstance(class scala.Tuple1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " !Expression if (isnull(add#4812)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because expression If if (isnull(add#4812)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.AddFile) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported); trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " @Expression isnull(add#4812) could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#4812.path.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#4812.path could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -1), lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -2), lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString, add#4812.partitionValues, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#4812.partitionValues could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! assertnotnull(add#4812.size) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#4812.size could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! assertnotnull(add#4812.modificationTime) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#4812.modificationTime could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! assertnotnull(add#4812.dataChange) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#4812.dataChange could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! add#4812.stats.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#4812.stats could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -3), lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -4), lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString, add#4812.tags, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#4812.tags could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " !Expression if (isnull(add#4812.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); expression If if (isnull(add#4812.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " @Expression isnull(add#4812.deletionVector) could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#4812.deletionVector.storageType.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#4812.deletionVector.storageType could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! add#4812.deletionVector.pathOrInlineDv.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#4812.deletionVector.pathOrInlineDv could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! wrapoption(add#4812.deletionVector.offset, IntegerType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#4812.deletionVector.offset could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! assertnotnull(add#4812.deletionVector.sizeInBytes) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#4812.deletionVector.sizeInBytes could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! assertnotnull(add#4812.deletionVector.cardinality) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#4812.deletionVector.cardinality could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! wrapoption(add#4812.deletionVector.maxRowIndex, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#4812.deletionVector.maxRowIndex could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! wrapoption(add#4812.baseRowId, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#4812.baseRowId could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! wrapoption(add#4812.defaultRowCommitVersion, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#4812.defaultRowCommitVersion could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! wrapoption(add#4812.clusteringProvider.toString, ObjectType(class java.lang.String)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " ! add#4812.clusteringProvider.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#4812.clusteringProvider could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " !Expression obj#4876 cannot run on GPU because expression AttributeReference obj#4876 produces an unsupported type ObjectType(class scala.Tuple1)\n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.RDDScanExec\n", + " @Expression txn#4811 could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " @Expression remove#4813 could run on GPU\n", + " @Expression metaData#4814 could run on GPU\n", + " @Expression protocol#4815 could run on GPU\n", + " @Expression cdc#4816 could run on GPU\n", + " @Expression checkpointMetadata#4817 could run on GPU\n", + " @Expression sidecar#4818 could run on GPU\n", + " @Expression domainMetadata#4819 could run on GPU\n", + " @Expression commitInfo#4820 could run on GPU\n", + "\n", + "25/09/29 17:13:13 WARN GpuOverrides: \n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.SerializeFromObjectExec\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.MapElementsExec\n", + " !Expression obj#4877 cannot run on GPU because expression AttributeReference obj#4877 produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec\n", + " ! newInstance(class scala.Tuple1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " !Expression if (isnull(add#4812)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because expression If if (isnull(add#4812)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.AddFile) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported); trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " @Expression isnull(add#4812) could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#4812.path.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#4812.path could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -1), lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -2), lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString, add#4812.partitionValues, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#4812.partitionValues could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! assertnotnull(add#4812.size) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#4812.size could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! assertnotnull(add#4812.modificationTime) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#4812.modificationTime could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! assertnotnull(add#4812.dataChange) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#4812.dataChange could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! add#4812.stats.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#4812.stats could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -3), lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -4), lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString, add#4812.tags, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#4812.tags could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " !Expression if (isnull(add#4812.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); expression If if (isnull(add#4812.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " @Expression isnull(add#4812.deletionVector) could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#4812.deletionVector.storageType.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#4812.deletionVector.storageType could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! add#4812.deletionVector.pathOrInlineDv.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#4812.deletionVector.pathOrInlineDv could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! wrapoption(add#4812.deletionVector.offset, IntegerType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#4812.deletionVector.offset could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! assertnotnull(add#4812.deletionVector.sizeInBytes) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#4812.deletionVector.sizeInBytes could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! assertnotnull(add#4812.deletionVector.cardinality) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#4812.deletionVector.cardinality could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! wrapoption(add#4812.deletionVector.maxRowIndex, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#4812.deletionVector.maxRowIndex could run on GPU\n", + " @Expression add#4812.deletionVector could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! wrapoption(add#4812.baseRowId, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#4812.baseRowId could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! wrapoption(add#4812.defaultRowCommitVersion, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#4812.defaultRowCommitVersion could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " ! wrapoption(add#4812.clusteringProvider.toString, ObjectType(class java.lang.String)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " ! add#4812.clusteringProvider.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#4812.clusteringProvider could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " !Expression obj#4876 cannot run on GPU because expression AttributeReference obj#4876 produces an unsupported type ObjectType(class scala.Tuple1)\n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.RDDScanExec\n", + " @Expression txn#4811 could run on GPU\n", + " @Expression add#4812 could run on GPU\n", + " @Expression remove#4813 could run on GPU\n", + " @Expression metaData#4814 could run on GPU\n", + " @Expression protocol#4815 could run on GPU\n", + " @Expression cdc#4816 could run on GPU\n", + " @Expression checkpointMetadata#4817 could run on GPU\n", + " @Expression sidecar#4818 could run on GPU\n", + " @Expression domainMetadata#4819 could run on GPU\n", + " @Expression commitInfo#4820 could run on GPU\n", + "\n", + "25/09/29 17:13:13 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:13 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:13 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:13 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:13 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:13 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:13 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:13 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:13 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:14 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:14 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:14 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:14 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:14 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:14 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:14 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:14 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:14 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:14 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:14 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:14 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:14 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:14 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n" + ] + } + ], + "source": [ + "spark.sql(f\"CREATE TABLE delta.`{DATA_ROOT}/store_sales_update` SHALLOW CLONE delta.`{DATA_ROOT}/store_sales`\")\n", + "sql = f\"\"\"\n", + "UPDATE delta.`{DATA_ROOT}/store_sales_update` SET \n", + "ss_wholesale_cost = ss_wholesale_cost * 2,\n", + "ss_list_price = ss_list_price * 2,\n", + "ss_sales_price = ss_sales_price * 2,\n", + "ss_ext_sales_price = ss_ext_sales_price * 2,\n", + "ss_ext_wholesale_cost = ss_ext_wholesale_cost * 2,\n", + "ss_ext_list_price = ss_ext_list_price * 2,\n", + "ss_ext_discount_amt = ss_ext_discount_amt * 2\n", + "WHERE ss_store_sk <= 40\n", + "\"\"\"\n", + "print(\"-\"*50)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "2e105bf8", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "25/09/29 17:13:18 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:18 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:18 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:18 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:18 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:18 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:18 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:19 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:19 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:19 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:19 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:19 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:19 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:19 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:19 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:19 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:19 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:19 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:20 WARN GpuOverrides: \n", + "! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.LocalTableScanExec\n", + " @Expression path#6938 could run on GPU\n", + " @Expression partitionValues#6939 could run on GPU\n", + " @Expression size#6940L could run on GPU\n", + " @Expression modificationTime#6941L could run on GPU\n", + " @Expression dataChange#6942 could run on GPU\n", + " @Expression stats#6943 could run on GPU\n", + " @Expression tags#6944 could run on GPU\n", + " @Expression deletionVector#6945 could run on GPU\n", + " @Expression baseRowId#6946L could run on GPU\n", + " @Expression defaultRowCommitVersion#6947L could run on GPU\n", + " @Expression clusteringProvider#6948 could run on GPU\n", + "\n", + "25/09/29 17:13:27 WARN GpuOverrides: \n", + " !Exec cannot run on GPU because not all expressions can be replaced\n", + " @Expression ss_sold_time_sk#5966 could run on GPU\n", + " @Expression ss_item_sk#5967 could run on GPU\n", + " @Expression ss_customer_sk#5968 could run on GPU\n", + " @Expression ss_cdemo_sk#5969 could run on GPU\n", + " @Expression ss_hdemo_sk#5970 could run on GPU\n", + " @Expression ss_addr_sk#5971 could run on GPU\n", + " @Expression ss_store_sk#5972 could run on GPU\n", + " @Expression ss_promo_sk#5973 could run on GPU\n", + " @Expression ss_ticket_number#5974L could run on GPU\n", + " @Expression ss_quantity#5975L could run on GPU\n", + " @Expression if (__condition__#7018) cast((ss_wholesale_cost#5976 * 2) as decimal(7,2)) else ss_wholesale_cost#5976 AS ss_wholesale_cost#7054 could run on GPU\n", + " @Expression if (__condition__#7018) cast((ss_wholesale_cost#5976 * 2) as decimal(7,2)) else ss_wholesale_cost#5976 could run on GPU\n", + " @Expression __condition__#7018 could run on GPU\n", + " ! cast((ss_wholesale_cost#5976 * 2) as decimal(7,2)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.CheckOverflowInTableWrite\n", + " @Expression cast((ss_wholesale_cost#5976 * 2) as decimal(7,2)) could run on GPU\n", + " @Expression (ss_wholesale_cost#5976 * 2) could run on GPU\n", + " @Expression ss_wholesale_cost#5976 could run on GPU\n", + " @Expression 2 could run on GPU\n", + " @Expression ss_wholesale_cost#5976 could run on GPU\n", + " @Expression if (__condition__#7018) cast((ss_list_price#5977 * 2) as decimal(7,2)) else ss_list_price#5977 AS ss_list_price#7055 could run on GPU\n", + " @Expression if (__condition__#7018) cast((ss_list_price#5977 * 2) as decimal(7,2)) else ss_list_price#5977 could run on GPU\n", + " @Expression __condition__#7018 could run on GPU\n", + " ! cast((ss_list_price#5977 * 2) as decimal(7,2)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.CheckOverflowInTableWrite\n", + " @Expression cast((ss_list_price#5977 * 2) as decimal(7,2)) could run on GPU\n", + " @Expression (ss_list_price#5977 * 2) could run on GPU\n", + " @Expression ss_list_price#5977 could run on GPU\n", + " @Expression 2 could run on GPU\n", + " @Expression ss_list_price#5977 could run on GPU\n", + " @Expression if (__condition__#7018) cast((ss_sales_price#5978 * 2) as decimal(7,2)) else ss_sales_price#5978 AS ss_sales_price#7056 could run on GPU\n", + " @Expression if (__condition__#7018) cast((ss_sales_price#5978 * 2) as decimal(7,2)) else ss_sales_price#5978 could run on GPU\n", + " @Expression __condition__#7018 could run on GPU\n", + " ! cast((ss_sales_price#5978 * 2) as decimal(7,2)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.CheckOverflowInTableWrite\n", + " @Expression cast((ss_sales_price#5978 * 2) as decimal(7,2)) could run on GPU\n", + " @Expression (ss_sales_price#5978 * 2) could run on GPU\n", + " @Expression ss_sales_price#5978 could run on GPU\n", + " @Expression 2 could run on GPU\n", + " @Expression ss_sales_price#5978 could run on GPU\n", + " @Expression if (__condition__#7018) cast((ss_ext_discount_amt#5979 * 2) as decimal(7,2)) else ss_ext_discount_amt#5979 AS ss_ext_discount_amt#7057 could run on GPU\n", + " @Expression if (__condition__#7018) cast((ss_ext_discount_amt#5979 * 2) as decimal(7,2)) else ss_ext_discount_amt#5979 could run on GPU\n", + " @Expression __condition__#7018 could run on GPU\n", + " ! cast((ss_ext_discount_amt#5979 * 2) as decimal(7,2)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.CheckOverflowInTableWrite\n", + " @Expression cast((ss_ext_discount_amt#5979 * 2) as decimal(7,2)) could run on GPU\n", + " @Expression (ss_ext_discount_amt#5979 * 2) could run on GPU\n", + " @Expression ss_ext_discount_amt#5979 could run on GPU\n", + " @Expression 2 could run on GPU\n", + " @Expression ss_ext_discount_amt#5979 could run on GPU\n", + " @Expression if (__condition__#7018) cast((ss_ext_sales_price#5980 * 2) as decimal(7,2)) else ss_ext_sales_price#5980 AS ss_ext_sales_price#7058 could run on GPU\n", + " @Expression if (__condition__#7018) cast((ss_ext_sales_price#5980 * 2) as decimal(7,2)) else ss_ext_sales_price#5980 could run on GPU\n", + " @Expression __condition__#7018 could run on GPU\n", + " ! cast((ss_ext_sales_price#5980 * 2) as decimal(7,2)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.CheckOverflowInTableWrite\n", + " @Expression cast((ss_ext_sales_price#5980 * 2) as decimal(7,2)) could run on GPU\n", + " @Expression (ss_ext_sales_price#5980 * 2) could run on GPU\n", + " @Expression ss_ext_sales_price#5980 could run on GPU\n", + " @Expression 2 could run on GPU\n", + " @Expression ss_ext_sales_price#5980 could run on GPU\n", + " @Expression if (__condition__#7018) cast((ss_ext_wholesale_cost#5981 * 2) as decimal(7,2)) else ss_ext_wholesale_cost#5981 AS ss_ext_wholesale_cost#7059 could run on GPU\n", + " @Expression if (__condition__#7018) cast((ss_ext_wholesale_cost#5981 * 2) as decimal(7,2)) else ss_ext_wholesale_cost#5981 could run on GPU\n", + " @Expression __condition__#7018 could run on GPU\n", + " ! cast((ss_ext_wholesale_cost#5981 * 2) as decimal(7,2)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.CheckOverflowInTableWrite\n", + " @Expression cast((ss_ext_wholesale_cost#5981 * 2) as decimal(7,2)) could run on GPU\n", + " @Expression (ss_ext_wholesale_cost#5981 * 2) could run on GPU\n", + " @Expression ss_ext_wholesale_cost#5981 could run on GPU\n", + " @Expression 2 could run on GPU\n", + " @Expression ss_ext_wholesale_cost#5981 could run on GPU\n", + " @Expression if (__condition__#7018) cast((ss_ext_list_price#5982 * 2) as decimal(7,2)) else ss_ext_list_price#5982 AS ss_ext_list_price#7060 could run on GPU\n", + " @Expression if (__condition__#7018) cast((ss_ext_list_price#5982 * 2) as decimal(7,2)) else ss_ext_list_price#5982 could run on GPU\n", + " @Expression __condition__#7018 could run on GPU\n", + " ! cast((ss_ext_list_price#5982 * 2) as decimal(7,2)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.CheckOverflowInTableWrite\n", + " @Expression cast((ss_ext_list_price#5982 * 2) as decimal(7,2)) could run on GPU\n", + " @Expression (ss_ext_list_price#5982 * 2) could run on GPU\n", + " @Expression ss_ext_list_price#5982 could run on GPU\n", + " @Expression 2 could run on GPU\n", + " @Expression ss_ext_list_price#5982 could run on GPU\n", + " @Expression ss_ext_tax#5983 could run on GPU\n", + " @Expression ss_coupon_amt#5984 could run on GPU\n", + " @Expression ss_net_paid#5985 could run on GPU\n", + " @Expression ss_net_paid_inc_tax#5986 could run on GPU\n", + " @Expression ss_net_profit#5987 could run on GPU\n", + " @Expression ss_sold_date_sk#5988 could run on GPU\n", + "\n", + "25/09/29 17:13:28 WARN GpuOverrides: \n", + "! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.LocalTableScanExec\n", + " @Expression path#7401 could run on GPU\n", + " @Expression partitionValues#7402 could run on GPU\n", + " @Expression size#7403L could run on GPU\n", + " @Expression modificationTime#7404L could run on GPU\n", + " @Expression dataChange#7405 could run on GPU\n", + " @Expression stats#7406 could run on GPU\n", + " @Expression tags#7407 could run on GPU\n", + " @Expression deletionVector#7408 could run on GPU\n", + " @Expression baseRowId#7409L could run on GPU\n", + " @Expression defaultRowCommitVersion#7410L could run on GPU\n", + " @Expression clusteringProvider#7411 could run on GPU\n", + "\n", + "25/09/29 17:13:28 WARN GpuDeltaInvariantCheckerExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric\n", + "25/09/29 17:13:28 WARN GpuSortExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric\n", + "25/09/29 17:13:28 WARN GpuDeltaInvariantCheckerExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric\n", + "25/09/29 17:13:51 WARN GpuOverrides: \n", + "! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.LocalTableScanExec\n", + " @Expression toprettystring(num_affected_rows)#9603 could run on GPU\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----------------+\n", + "|num_affected_rows|\n", + "+-----------------+\n", + "| 11338477|\n", + "+-----------------+\n", + "\n", + "Retry times : 1, update microbenchmark takes 32.98 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "25/09/29 17:13:51 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:51 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:51 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:51 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:51 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:51 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:51 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:52 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:52 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:52 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:52 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:52 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:52 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:52 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:52 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:52 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:13:52 WARN GpuOverrides: \n", + "! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.LocalTableScanExec\n", + " @Expression path#10629 could run on GPU\n", + " @Expression partitionValues#10630 could run on GPU\n", + " @Expression size#10631L could run on GPU\n", + " @Expression modificationTime#10632L could run on GPU\n", + " @Expression dataChange#10633 could run on GPU\n", + " @Expression stats#10634 could run on GPU\n", + " @Expression tags#10635 could run on GPU\n", + " @Expression deletionVector#10636 could run on GPU\n", + " @Expression baseRowId#10637L could run on GPU\n", + " @Expression defaultRowCommitVersion#10638L could run on GPU\n", + " @Expression clusteringProvider#10639 could run on GPU\n", + "\n", + "25/09/29 17:13:58 WARN GpuOverrides: \n", + " !Exec cannot run on GPU because not all expressions can be replaced\n", + " @Expression ss_sold_time_sk#9607 could run on GPU\n", + " @Expression ss_item_sk#9608 could run on GPU\n", + " @Expression ss_customer_sk#9609 could run on GPU\n", + " @Expression ss_cdemo_sk#9610 could run on GPU\n", + " @Expression ss_hdemo_sk#9611 could run on GPU\n", + " @Expression ss_addr_sk#9612 could run on GPU\n", + " @Expression ss_store_sk#9613 could run on GPU\n", + " @Expression ss_promo_sk#9614 could run on GPU\n", + " @Expression ss_ticket_number#9615L could run on GPU\n", + " @Expression ss_quantity#9616L could run on GPU\n", + " @Expression if (__condition__#10709) cast((ss_wholesale_cost#9617 * 2) as decimal(7,2)) else ss_wholesale_cost#9617 AS ss_wholesale_cost#10745 could run on GPU\n", + " @Expression if (__condition__#10709) cast((ss_wholesale_cost#9617 * 2) as decimal(7,2)) else ss_wholesale_cost#9617 could run on GPU\n", + " @Expression __condition__#10709 could run on GPU\n", + " ! cast((ss_wholesale_cost#9617 * 2) as decimal(7,2)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.CheckOverflowInTableWrite\n", + " @Expression cast((ss_wholesale_cost#9617 * 2) as decimal(7,2)) could run on GPU\n", + " @Expression (ss_wholesale_cost#9617 * 2) could run on GPU\n", + " @Expression ss_wholesale_cost#9617 could run on GPU\n", + " @Expression 2 could run on GPU\n", + " @Expression ss_wholesale_cost#9617 could run on GPU\n", + " @Expression if (__condition__#10709) cast((ss_list_price#9618 * 2) as decimal(7,2)) else ss_list_price#9618 AS ss_list_price#10746 could run on GPU\n", + " @Expression if (__condition__#10709) cast((ss_list_price#9618 * 2) as decimal(7,2)) else ss_list_price#9618 could run on GPU\n", + " @Expression __condition__#10709 could run on GPU\n", + " ! cast((ss_list_price#9618 * 2) as decimal(7,2)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.CheckOverflowInTableWrite\n", + " @Expression cast((ss_list_price#9618 * 2) as decimal(7,2)) could run on GPU\n", + " @Expression (ss_list_price#9618 * 2) could run on GPU\n", + " @Expression ss_list_price#9618 could run on GPU\n", + " @Expression 2 could run on GPU\n", + " @Expression ss_list_price#9618 could run on GPU\n", + " @Expression if (__condition__#10709) cast((ss_sales_price#9619 * 2) as decimal(7,2)) else ss_sales_price#9619 AS ss_sales_price#10747 could run on GPU\n", + " @Expression if (__condition__#10709) cast((ss_sales_price#9619 * 2) as decimal(7,2)) else ss_sales_price#9619 could run on GPU\n", + " @Expression __condition__#10709 could run on GPU\n", + " ! cast((ss_sales_price#9619 * 2) as decimal(7,2)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.CheckOverflowInTableWrite\n", + " @Expression cast((ss_sales_price#9619 * 2) as decimal(7,2)) could run on GPU\n", + " @Expression (ss_sales_price#9619 * 2) could run on GPU\n", + " @Expression ss_sales_price#9619 could run on GPU\n", + " @Expression 2 could run on GPU\n", + " @Expression ss_sales_price#9619 could run on GPU\n", + " @Expression if (__condition__#10709) cast((ss_ext_discount_amt#9620 * 2) as decimal(7,2)) else ss_ext_discount_amt#9620 AS ss_ext_discount_amt#10748 could run on GPU\n", + " @Expression if (__condition__#10709) cast((ss_ext_discount_amt#9620 * 2) as decimal(7,2)) else ss_ext_discount_amt#9620 could run on GPU\n", + " @Expression __condition__#10709 could run on GPU\n", + " ! cast((ss_ext_discount_amt#9620 * 2) as decimal(7,2)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.CheckOverflowInTableWrite\n", + " @Expression cast((ss_ext_discount_amt#9620 * 2) as decimal(7,2)) could run on GPU\n", + " @Expression (ss_ext_discount_amt#9620 * 2) could run on GPU\n", + " @Expression ss_ext_discount_amt#9620 could run on GPU\n", + " @Expression 2 could run on GPU\n", + " @Expression ss_ext_discount_amt#9620 could run on GPU\n", + " @Expression if (__condition__#10709) cast((ss_ext_sales_price#9621 * 2) as decimal(7,2)) else ss_ext_sales_price#9621 AS ss_ext_sales_price#10749 could run on GPU\n", + " @Expression if (__condition__#10709) cast((ss_ext_sales_price#9621 * 2) as decimal(7,2)) else ss_ext_sales_price#9621 could run on GPU\n", + " @Expression __condition__#10709 could run on GPU\n", + " ! cast((ss_ext_sales_price#9621 * 2) as decimal(7,2)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.CheckOverflowInTableWrite\n", + " @Expression cast((ss_ext_sales_price#9621 * 2) as decimal(7,2)) could run on GPU\n", + " @Expression (ss_ext_sales_price#9621 * 2) could run on GPU\n", + " @Expression ss_ext_sales_price#9621 could run on GPU\n", + " @Expression 2 could run on GPU\n", + " @Expression ss_ext_sales_price#9621 could run on GPU\n", + " @Expression if (__condition__#10709) cast((ss_ext_wholesale_cost#9622 * 2) as decimal(7,2)) else ss_ext_wholesale_cost#9622 AS ss_ext_wholesale_cost#10750 could run on GPU\n", + " @Expression if (__condition__#10709) cast((ss_ext_wholesale_cost#9622 * 2) as decimal(7,2)) else ss_ext_wholesale_cost#9622 could run on GPU\n", + " @Expression __condition__#10709 could run on GPU\n", + " ! cast((ss_ext_wholesale_cost#9622 * 2) as decimal(7,2)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.CheckOverflowInTableWrite\n", + " @Expression cast((ss_ext_wholesale_cost#9622 * 2) as decimal(7,2)) could run on GPU\n", + " @Expression (ss_ext_wholesale_cost#9622 * 2) could run on GPU\n", + " @Expression ss_ext_wholesale_cost#9622 could run on GPU\n", + " @Expression 2 could run on GPU\n", + " @Expression ss_ext_wholesale_cost#9622 could run on GPU\n", + " @Expression if (__condition__#10709) cast((ss_ext_list_price#9623 * 2) as decimal(7,2)) else ss_ext_list_price#9623 AS ss_ext_list_price#10751 could run on GPU\n", + " @Expression if (__condition__#10709) cast((ss_ext_list_price#9623 * 2) as decimal(7,2)) else ss_ext_list_price#9623 could run on GPU\n", + " @Expression __condition__#10709 could run on GPU\n", + " ! cast((ss_ext_list_price#9623 * 2) as decimal(7,2)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.CheckOverflowInTableWrite\n", + " @Expression cast((ss_ext_list_price#9623 * 2) as decimal(7,2)) could run on GPU\n", + " @Expression (ss_ext_list_price#9623 * 2) could run on GPU\n", + " @Expression ss_ext_list_price#9623 could run on GPU\n", + " @Expression 2 could run on GPU\n", + " @Expression ss_ext_list_price#9623 could run on GPU\n", + " @Expression ss_ext_tax#9624 could run on GPU\n", + " @Expression ss_coupon_amt#9625 could run on GPU\n", + " @Expression ss_net_paid#9626 could run on GPU\n", + " @Expression ss_net_paid_inc_tax#9627 could run on GPU\n", + " @Expression ss_net_profit#9628 could run on GPU\n", + " @Expression ss_sold_date_sk#9629 could run on GPU\n", + "\n", + "25/09/29 17:13:58 WARN GpuOverrides: \n", + "! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.LocalTableScanExec\n", + " @Expression path#11092 could run on GPU\n", + " @Expression partitionValues#11093 could run on GPU\n", + " @Expression size#11094L could run on GPU\n", + " @Expression modificationTime#11095L could run on GPU\n", + " @Expression dataChange#11096 could run on GPU\n", + " @Expression stats#11097 could run on GPU\n", + " @Expression tags#11098 could run on GPU\n", + " @Expression deletionVector#11099 could run on GPU\n", + " @Expression baseRowId#11100L could run on GPU\n", + " @Expression defaultRowCommitVersion#11101L could run on GPU\n", + " @Expression clusteringProvider#11102 could run on GPU\n", + "\n", + "25/09/29 17:13:58 WARN GpuDeltaInvariantCheckerExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric\n", + "25/09/29 17:13:58 WARN GpuSortExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric\n", + "25/09/29 17:13:58 WARN GpuDeltaInvariantCheckerExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric\n", + "[Stage 71:====================================================> (15 + 1) / 16]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----------------+\n", + "|num_affected_rows|\n", + "+-----------------+\n", + "| 11338477|\n", + "+-----------------+\n", + "\n", + "Retry times : 2, update microbenchmark takes 29.63 seconds\n", + "update microbenchmark takes average 31 seconds after 2 retries\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "25/09/29 17:14:21 WARN GpuOverrides: \n", + "! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.LocalTableScanExec\n", + " @Expression toprettystring(num_affected_rows)#13294 could run on GPU\n", + "\n" + ] + } + ], + "source": [ + "# Run microbenchmark with n retry time\n", + "runMicroBenchmark(spark,\"update\",sql,2)" + ] + }, + { + "cell_type": "markdown", + "id": "f50ec183", + "metadata": {}, + "source": [ + "### Delete\n", + "This is a microbenchmark about windowing expressions running on GPU mode. The sub-query calculates the average ss_sales_price of a fixed window function partition by ss_customer_sk, and the parent query calculates the average price of the sub-query grouping by each customer. You will see about 25x speedups in this query. The speedup mainly comes from GPUSort/GPUWindow/GPUHashAggregate. The avg aggregation function evaluates all rows which are generated by the sub-query's window function. There will be a more significant performance improvement along with the increasing number of sub-query aggregate functions." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "31bd0635", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "25/09/29 17:37:20 WARN GpuOverrides: \n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.SerializeFromObjectExec\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.MapElementsExec\n", + " !Expression obj#13403 cannot run on GPU because expression AttributeReference obj#13403 produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec\n", + " ! newInstance(class scala.Tuple1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " !Expression if (isnull(add#13338)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.AddFile) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported); trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported); expression If if (isnull(add#13338)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " @Expression isnull(add#13338) could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#13338.path.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#13338.path could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -1), lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -2), lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString, add#13338.partitionValues, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#13338.partitionValues could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! assertnotnull(add#13338.size) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#13338.size could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! assertnotnull(add#13338.modificationTime) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#13338.modificationTime could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! assertnotnull(add#13338.dataChange) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#13338.dataChange could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! add#13338.stats.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#13338.stats could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -3), lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -4), lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString, add#13338.tags, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#13338.tags could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " !Expression if (isnull(add#13338.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); expression If if (isnull(add#13338.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor); falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported)\n", + " @Expression isnull(add#13338.deletionVector) could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#13338.deletionVector.storageType.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#13338.deletionVector.storageType could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! add#13338.deletionVector.pathOrInlineDv.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#13338.deletionVector.pathOrInlineDv could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! wrapoption(add#13338.deletionVector.offset, IntegerType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#13338.deletionVector.offset could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! assertnotnull(add#13338.deletionVector.sizeInBytes) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#13338.deletionVector.sizeInBytes could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! assertnotnull(add#13338.deletionVector.cardinality) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#13338.deletionVector.cardinality could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! wrapoption(add#13338.deletionVector.maxRowIndex, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#13338.deletionVector.maxRowIndex could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! wrapoption(add#13338.baseRowId, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#13338.baseRowId could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! wrapoption(add#13338.defaultRowCommitVersion, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#13338.defaultRowCommitVersion could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! wrapoption(add#13338.clusteringProvider.toString, ObjectType(class java.lang.String)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " ! add#13338.clusteringProvider.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#13338.clusteringProvider could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " !Expression obj#13402 cannot run on GPU because expression AttributeReference obj#13402 produces an unsupported type ObjectType(class scala.Tuple1)\n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.RDDScanExec\n", + " @Expression txn#13337 could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " @Expression remove#13339 could run on GPU\n", + " @Expression metaData#13340 could run on GPU\n", + " @Expression protocol#13341 could run on GPU\n", + " @Expression cdc#13342 could run on GPU\n", + " @Expression checkpointMetadata#13343 could run on GPU\n", + " @Expression sidecar#13344 could run on GPU\n", + " @Expression domainMetadata#13345 could run on GPU\n", + " @Expression commitInfo#13346 could run on GPU\n", + "\n", + "25/09/29 17:37:20 WARN GpuOverrides: \n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.SerializeFromObjectExec\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.MapElementsExec\n", + " !Expression obj#13403 cannot run on GPU because expression AttributeReference obj#13403 produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec\n", + " ! newInstance(class scala.Tuple1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " !Expression if (isnull(add#13338)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.AddFile) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported); trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported); expression If if (isnull(add#13338)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " @Expression isnull(add#13338) could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#13338.path.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#13338.path could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -1), lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -2), lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString, add#13338.partitionValues, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#13338.partitionValues could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! assertnotnull(add#13338.size) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#13338.size could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! assertnotnull(add#13338.modificationTime) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#13338.modificationTime could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! assertnotnull(add#13338.dataChange) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#13338.dataChange could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! add#13338.stats.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#13338.stats could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -3), lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -4), lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString, add#13338.tags, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#13338.tags could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " !Expression if (isnull(add#13338.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); expression If if (isnull(add#13338.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor); falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported)\n", + " @Expression isnull(add#13338.deletionVector) could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#13338.deletionVector.storageType.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#13338.deletionVector.storageType could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! add#13338.deletionVector.pathOrInlineDv.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#13338.deletionVector.pathOrInlineDv could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! wrapoption(add#13338.deletionVector.offset, IntegerType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#13338.deletionVector.offset could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! assertnotnull(add#13338.deletionVector.sizeInBytes) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#13338.deletionVector.sizeInBytes could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! assertnotnull(add#13338.deletionVector.cardinality) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#13338.deletionVector.cardinality could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! wrapoption(add#13338.deletionVector.maxRowIndex, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#13338.deletionVector.maxRowIndex could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! wrapoption(add#13338.baseRowId, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#13338.baseRowId could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! wrapoption(add#13338.defaultRowCommitVersion, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#13338.defaultRowCommitVersion could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! wrapoption(add#13338.clusteringProvider.toString, ObjectType(class java.lang.String)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " ! add#13338.clusteringProvider.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#13338.clusteringProvider could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " !Expression obj#13402 cannot run on GPU because expression AttributeReference obj#13402 produces an unsupported type ObjectType(class scala.Tuple1)\n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.RDDScanExec\n", + " @Expression txn#13337 could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " @Expression remove#13339 could run on GPU\n", + " @Expression metaData#13340 could run on GPU\n", + " @Expression protocol#13341 could run on GPU\n", + " @Expression cdc#13342 could run on GPU\n", + " @Expression checkpointMetadata#13343 could run on GPU\n", + " @Expression sidecar#13344 could run on GPU\n", + " @Expression domainMetadata#13345 could run on GPU\n", + " @Expression commitInfo#13346 could run on GPU\n", + "\n", + "25/09/29 17:37:20 WARN GpuOverrides: \n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.SerializeFromObjectExec\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.MapElementsExec\n", + " !Expression obj#13403 cannot run on GPU because expression AttributeReference obj#13403 produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec\n", + " ! newInstance(class scala.Tuple1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " !Expression if (isnull(add#13338)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.AddFile) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported); trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported); expression If if (isnull(add#13338)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " @Expression isnull(add#13338) could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#13338.path.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#13338.path could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -1), lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -2), lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString, add#13338.partitionValues, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#13338.partitionValues could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! assertnotnull(add#13338.size) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#13338.size could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! assertnotnull(add#13338.modificationTime) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#13338.modificationTime could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! assertnotnull(add#13338.dataChange) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#13338.dataChange could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! add#13338.stats.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#13338.stats could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -3), lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -4), lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString, add#13338.tags, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#13338.tags could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " !Expression if (isnull(add#13338.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); expression If if (isnull(add#13338.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor); falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported)\n", + " @Expression isnull(add#13338.deletionVector) could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#13338.deletionVector.storageType.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#13338.deletionVector.storageType could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! add#13338.deletionVector.pathOrInlineDv.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#13338.deletionVector.pathOrInlineDv could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! wrapoption(add#13338.deletionVector.offset, IntegerType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#13338.deletionVector.offset could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! assertnotnull(add#13338.deletionVector.sizeInBytes) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#13338.deletionVector.sizeInBytes could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! assertnotnull(add#13338.deletionVector.cardinality) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#13338.deletionVector.cardinality could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! wrapoption(add#13338.deletionVector.maxRowIndex, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#13338.deletionVector.maxRowIndex could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! wrapoption(add#13338.baseRowId, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#13338.baseRowId could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! wrapoption(add#13338.defaultRowCommitVersion, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#13338.defaultRowCommitVersion could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! wrapoption(add#13338.clusteringProvider.toString, ObjectType(class java.lang.String)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " ! add#13338.clusteringProvider.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#13338.clusteringProvider could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " !Expression obj#13402 cannot run on GPU because expression AttributeReference obj#13402 produces an unsupported type ObjectType(class scala.Tuple1)\n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.RDDScanExec\n", + " @Expression txn#13337 could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " @Expression remove#13339 could run on GPU\n", + " @Expression metaData#13340 could run on GPU\n", + " @Expression protocol#13341 could run on GPU\n", + " @Expression cdc#13342 could run on GPU\n", + " @Expression checkpointMetadata#13343 could run on GPU\n", + " @Expression sidecar#13344 could run on GPU\n", + " @Expression domainMetadata#13345 could run on GPU\n", + " @Expression commitInfo#13346 could run on GPU\n", + "\n", + "25/09/29 17:37:20 WARN GpuOverrides: \n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.SerializeFromObjectExec\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.MapElementsExec\n", + " !Expression obj#13403 cannot run on GPU because expression AttributeReference obj#13403 produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec\n", + " ! newInstance(class scala.Tuple1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " !Expression if (isnull(add#13338)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.AddFile) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported); trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported); expression If if (isnull(add#13338)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " @Expression isnull(add#13338) could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#13338.path.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#13338.path could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -1), lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -2), lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString, add#13338.partitionValues, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#13338.partitionValues could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! assertnotnull(add#13338.size) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#13338.size could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! assertnotnull(add#13338.modificationTime) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#13338.modificationTime could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! assertnotnull(add#13338.dataChange) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#13338.dataChange could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! add#13338.stats.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#13338.stats could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -3), lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -4), lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString, add#13338.tags, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#13338.tags could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " !Expression if (isnull(add#13338.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); expression If if (isnull(add#13338.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor); falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported)\n", + " @Expression isnull(add#13338.deletionVector) could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#13338.deletionVector.storageType.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#13338.deletionVector.storageType could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! add#13338.deletionVector.pathOrInlineDv.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#13338.deletionVector.pathOrInlineDv could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! wrapoption(add#13338.deletionVector.offset, IntegerType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#13338.deletionVector.offset could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! assertnotnull(add#13338.deletionVector.sizeInBytes) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#13338.deletionVector.sizeInBytes could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! assertnotnull(add#13338.deletionVector.cardinality) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#13338.deletionVector.cardinality could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! wrapoption(add#13338.deletionVector.maxRowIndex, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#13338.deletionVector.maxRowIndex could run on GPU\n", + " @Expression add#13338.deletionVector could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! wrapoption(add#13338.baseRowId, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#13338.baseRowId could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! wrapoption(add#13338.defaultRowCommitVersion, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#13338.defaultRowCommitVersion could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " ! wrapoption(add#13338.clusteringProvider.toString, ObjectType(class java.lang.String)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " ! add#13338.clusteringProvider.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#13338.clusteringProvider could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " !Expression obj#13402 cannot run on GPU because expression AttributeReference obj#13402 produces an unsupported type ObjectType(class scala.Tuple1)\n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.RDDScanExec\n", + " @Expression txn#13337 could run on GPU\n", + " @Expression add#13338 could run on GPU\n", + " @Expression remove#13339 could run on GPU\n", + " @Expression metaData#13340 could run on GPU\n", + " @Expression protocol#13341 could run on GPU\n", + " @Expression cdc#13342 could run on GPU\n", + " @Expression checkpointMetadata#13343 could run on GPU\n", + " @Expression sidecar#13344 could run on GPU\n", + " @Expression domainMetadata#13345 could run on GPU\n", + " @Expression commitInfo#13346 could run on GPU\n", + "\n", + "25/09/29 17:37:20 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:21 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:21 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:21 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:21 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:21 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:21 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:21 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:21 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:21 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:21 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:21 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:21 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:21 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:21 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:21 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:21 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:21 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:21 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:21 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:21 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:21 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:21 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "spark.sql(f\"CREATE TABLE delta.`{DATA_ROOT}/store_sales_delete` SHALLOW CLONE delta.`{DATA_ROOT}/store_sales`\")\n", + "sql = f\"\"\"\n", + "DELETE FROM delta.`{DATA_ROOT}/store_sales_delete` WHERE ss_store_sk <= 40\n", + "\"\"\"\n", + "print(\"-\"*50)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9e93983", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "25/09/29 17:37:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:36 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:36 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:36 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:36 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:36 WARN GpuOverrides: \n", + "! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.LocalTableScanExec\n", + " @Expression path#15457 could run on GPU\n", + " @Expression partitionValues#15458 could run on GPU\n", + " @Expression size#15459L could run on GPU\n", + " @Expression modificationTime#15460L could run on GPU\n", + " @Expression dataChange#15461 could run on GPU\n", + " @Expression stats#15462 could run on GPU\n", + " @Expression tags#15463 could run on GPU\n", + " @Expression deletionVector#15464 could run on GPU\n", + " @Expression baseRowId#15465L could run on GPU\n", + " @Expression defaultRowCommitVersion#15466L could run on GPU\n", + " @Expression clusteringProvider#15467 could run on GPU\n", + "\n", + "25/09/29 17:37:42 WARN GpuOverrides: \n", + "! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.LocalTableScanExec\n", + " @Expression path#15826 could run on GPU\n", + " @Expression partitionValues#15827 could run on GPU\n", + " @Expression size#15828L could run on GPU\n", + " @Expression modificationTime#15829L could run on GPU\n", + " @Expression dataChange#15830 could run on GPU\n", + " @Expression stats#15831 could run on GPU\n", + " @Expression tags#15832 could run on GPU\n", + " @Expression deletionVector#15833 could run on GPU\n", + " @Expression baseRowId#15834L could run on GPU\n", + " @Expression defaultRowCommitVersion#15835L could run on GPU\n", + " @Expression clusteringProvider#15836 could run on GPU\n", + "\n", + "25/09/29 17:37:42 WARN GpuDeltaInvariantCheckerExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric\n", + "25/09/29 17:37:42 WARN GpuSortExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric\n", + "25/09/29 17:37:42 WARN GpuDeltaInvariantCheckerExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric\n", + "25/09/29 17:37:57 WARN GpuOverrides: \n", + "! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.LocalTableScanExec\n", + " @Expression toprettystring(num_affected_rows)#18028 could run on GPU\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----------------+\n", + "|num_affected_rows|\n", + "+-----------------+\n", + "| 11338477|\n", + "+-----------------+\n", + "\n", + "Retry times : 1, delete microbenchmark takes 21.76 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "25/09/29 17:37:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:58 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----------------+\n", + "|num_affected_rows|\n", + "+-----------------+\n", + "| 0|\n", + "+-----------------+\n", + "\n", + "Retry times : 2, delete microbenchmark takes 0.99 seconds\n", + "delete microbenchmark takes average 11 seconds after 2 retries\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "25/09/29 17:37:58 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:37:58 WARN GpuOverrides: \n", + "! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.LocalTableScanExec\n", + " @Expression toprettystring(num_affected_rows)#19037 could run on GPU\n", + "\n" + ] + } + ], + "source": [ + "# Run microbenchmark with n retry time\n", + "runMicroBenchmark(spark,\"delete\",sql,1)" + ] + }, + { + "cell_type": "markdown", + "id": "dcf08e47", + "metadata": {}, + "source": [ + "### Merge\n", + "Data skew is caused by many null values in the ss_customer_sk column. You will see about 80x speedups in this query. The heavier skew task a query has, the more improved performance we will get because GPU parallelizes the computation, CPU is limited to just a single core because of how the algorithms are written." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2b9d223c", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "25/09/29 17:39:43 WARN GpuOverrides: \n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.SerializeFromObjectExec\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.MapElementsExec\n", + " !Expression obj#19146 cannot run on GPU because expression AttributeReference obj#19146 produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec\n", + " ! newInstance(class scala.Tuple1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " !Expression if (isnull(add#19081)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because expression If if (isnull(add#19081)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.AddFile) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported); trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " @Expression isnull(add#19081) could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#19081.path.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#19081.path could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -1), lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -2), lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString, add#19081.partitionValues, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#19081.partitionValues could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! assertnotnull(add#19081.size) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#19081.size could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! assertnotnull(add#19081.modificationTime) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#19081.modificationTime could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! assertnotnull(add#19081.dataChange) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#19081.dataChange could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! add#19081.stats.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#19081.stats could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -3), lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -4), lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString, add#19081.tags, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#19081.tags could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " !Expression if (isnull(add#19081.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); expression If if (isnull(add#19081.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " @Expression isnull(add#19081.deletionVector) could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#19081.deletionVector.storageType.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#19081.deletionVector.storageType could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! add#19081.deletionVector.pathOrInlineDv.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#19081.deletionVector.pathOrInlineDv could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! wrapoption(add#19081.deletionVector.offset, IntegerType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#19081.deletionVector.offset could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! assertnotnull(add#19081.deletionVector.sizeInBytes) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#19081.deletionVector.sizeInBytes could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! assertnotnull(add#19081.deletionVector.cardinality) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#19081.deletionVector.cardinality could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! wrapoption(add#19081.deletionVector.maxRowIndex, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#19081.deletionVector.maxRowIndex could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! wrapoption(add#19081.baseRowId, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#19081.baseRowId could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! wrapoption(add#19081.defaultRowCommitVersion, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#19081.defaultRowCommitVersion could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! wrapoption(add#19081.clusteringProvider.toString, ObjectType(class java.lang.String)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " ! add#19081.clusteringProvider.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#19081.clusteringProvider could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " !Expression obj#19145 cannot run on GPU because expression AttributeReference obj#19145 produces an unsupported type ObjectType(class scala.Tuple1)\n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.RDDScanExec\n", + " @Expression txn#19080 could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " @Expression remove#19082 could run on GPU\n", + " @Expression metaData#19083 could run on GPU\n", + " @Expression protocol#19084 could run on GPU\n", + " @Expression cdc#19085 could run on GPU\n", + " @Expression checkpointMetadata#19086 could run on GPU\n", + " @Expression sidecar#19087 could run on GPU\n", + " @Expression domainMetadata#19088 could run on GPU\n", + " @Expression commitInfo#19089 could run on GPU\n", + "\n", + "25/09/29 17:39:43 WARN GpuOverrides: \n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.SerializeFromObjectExec\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.MapElementsExec\n", + " !Expression obj#19146 cannot run on GPU because expression AttributeReference obj#19146 produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec\n", + " ! newInstance(class scala.Tuple1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " !Expression if (isnull(add#19081)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because expression If if (isnull(add#19081)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.AddFile) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported); trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " @Expression isnull(add#19081) could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#19081.path.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#19081.path could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -1), lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -2), lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString, add#19081.partitionValues, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#19081.partitionValues could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! assertnotnull(add#19081.size) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#19081.size could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! assertnotnull(add#19081.modificationTime) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#19081.modificationTime could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! assertnotnull(add#19081.dataChange) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#19081.dataChange could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! add#19081.stats.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#19081.stats could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -3), lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -4), lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString, add#19081.tags, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#19081.tags could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " !Expression if (isnull(add#19081.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); expression If if (isnull(add#19081.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " @Expression isnull(add#19081.deletionVector) could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#19081.deletionVector.storageType.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#19081.deletionVector.storageType could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! add#19081.deletionVector.pathOrInlineDv.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#19081.deletionVector.pathOrInlineDv could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! wrapoption(add#19081.deletionVector.offset, IntegerType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#19081.deletionVector.offset could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! assertnotnull(add#19081.deletionVector.sizeInBytes) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#19081.deletionVector.sizeInBytes could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! assertnotnull(add#19081.deletionVector.cardinality) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#19081.deletionVector.cardinality could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! wrapoption(add#19081.deletionVector.maxRowIndex, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#19081.deletionVector.maxRowIndex could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! wrapoption(add#19081.baseRowId, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#19081.baseRowId could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! wrapoption(add#19081.defaultRowCommitVersion, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#19081.defaultRowCommitVersion could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! wrapoption(add#19081.clusteringProvider.toString, ObjectType(class java.lang.String)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " ! add#19081.clusteringProvider.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#19081.clusteringProvider could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " !Expression obj#19145 cannot run on GPU because expression AttributeReference obj#19145 produces an unsupported type ObjectType(class scala.Tuple1)\n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.RDDScanExec\n", + " @Expression txn#19080 could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " @Expression remove#19082 could run on GPU\n", + " @Expression metaData#19083 could run on GPU\n", + " @Expression protocol#19084 could run on GPU\n", + " @Expression cdc#19085 could run on GPU\n", + " @Expression checkpointMetadata#19086 could run on GPU\n", + " @Expression sidecar#19087 could run on GPU\n", + " @Expression domainMetadata#19088 could run on GPU\n", + " @Expression commitInfo#19089 could run on GPU\n", + "\n", + "25/09/29 17:39:43 WARN GpuOverrides: \n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.SerializeFromObjectExec\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.MapElementsExec\n", + " !Expression obj#19146 cannot run on GPU because expression AttributeReference obj#19146 produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec\n", + " ! newInstance(class scala.Tuple1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " !Expression if (isnull(add#19081)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because expression If if (isnull(add#19081)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.AddFile) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported); trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " @Expression isnull(add#19081) could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#19081.path.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#19081.path could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -1), lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -2), lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString, add#19081.partitionValues, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#19081.partitionValues could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! assertnotnull(add#19081.size) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#19081.size could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! assertnotnull(add#19081.modificationTime) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#19081.modificationTime could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! assertnotnull(add#19081.dataChange) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#19081.dataChange could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! add#19081.stats.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#19081.stats could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -3), lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -4), lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString, add#19081.tags, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#19081.tags could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " !Expression if (isnull(add#19081.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); expression If if (isnull(add#19081.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " @Expression isnull(add#19081.deletionVector) could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#19081.deletionVector.storageType.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#19081.deletionVector.storageType could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! add#19081.deletionVector.pathOrInlineDv.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#19081.deletionVector.pathOrInlineDv could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! wrapoption(add#19081.deletionVector.offset, IntegerType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#19081.deletionVector.offset could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! assertnotnull(add#19081.deletionVector.sizeInBytes) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#19081.deletionVector.sizeInBytes could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! assertnotnull(add#19081.deletionVector.cardinality) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#19081.deletionVector.cardinality could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! wrapoption(add#19081.deletionVector.maxRowIndex, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#19081.deletionVector.maxRowIndex could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! wrapoption(add#19081.baseRowId, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#19081.baseRowId could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! wrapoption(add#19081.defaultRowCommitVersion, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#19081.defaultRowCommitVersion could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! wrapoption(add#19081.clusteringProvider.toString, ObjectType(class java.lang.String)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " ! add#19081.clusteringProvider.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#19081.clusteringProvider could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " !Expression obj#19145 cannot run on GPU because expression AttributeReference obj#19145 produces an unsupported type ObjectType(class scala.Tuple1)\n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.RDDScanExec\n", + " @Expression txn#19080 could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " @Expression remove#19082 could run on GPU\n", + " @Expression metaData#19083 could run on GPU\n", + " @Expression protocol#19084 could run on GPU\n", + " @Expression cdc#19085 could run on GPU\n", + " @Expression checkpointMetadata#19086 could run on GPU\n", + " @Expression sidecar#19087 could run on GPU\n", + " @Expression domainMetadata#19088 could run on GPU\n", + " @Expression commitInfo#19089 could run on GPU\n", + "\n", + "25/09/29 17:39:43 WARN GpuOverrides: \n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.SerializeFromObjectExec\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.MapElementsExec\n", + " !Expression obj#19146 cannot run on GPU because expression AttributeReference obj#19146 produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec\n", + " ! newInstance(class scala.Tuple1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " !Expression if (isnull(add#19081)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because expression If if (isnull(add#19081)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.AddFile) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported); trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " @Expression isnull(add#19081) could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#19081.path.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#19081.path could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -1), lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -2), lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString, add#19081.partitionValues, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#19081.partitionValues could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! assertnotnull(add#19081.size) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#19081.size could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! assertnotnull(add#19081.modificationTime) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#19081.modificationTime could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! assertnotnull(add#19081.dataChange) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#19081.dataChange could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! add#19081.stats.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#19081.stats could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -3), lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -4), lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString, add#19081.tags, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#19081.tags could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " !Expression if (isnull(add#19081.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); expression If if (isnull(add#19081.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " @Expression isnull(add#19081.deletionVector) could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#19081.deletionVector.storageType.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#19081.deletionVector.storageType could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! add#19081.deletionVector.pathOrInlineDv.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#19081.deletionVector.pathOrInlineDv could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! wrapoption(add#19081.deletionVector.offset, IntegerType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#19081.deletionVector.offset could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! assertnotnull(add#19081.deletionVector.sizeInBytes) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#19081.deletionVector.sizeInBytes could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! assertnotnull(add#19081.deletionVector.cardinality) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#19081.deletionVector.cardinality could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! wrapoption(add#19081.deletionVector.maxRowIndex, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#19081.deletionVector.maxRowIndex could run on GPU\n", + " @Expression add#19081.deletionVector could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! wrapoption(add#19081.baseRowId, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#19081.baseRowId could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! wrapoption(add#19081.defaultRowCommitVersion, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#19081.defaultRowCommitVersion could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " ! wrapoption(add#19081.clusteringProvider.toString, ObjectType(class java.lang.String)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " ! add#19081.clusteringProvider.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#19081.clusteringProvider could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " !Expression obj#19145 cannot run on GPU because expression AttributeReference obj#19145 produces an unsupported type ObjectType(class scala.Tuple1)\n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.RDDScanExec\n", + " @Expression txn#19080 could run on GPU\n", + " @Expression add#19081 could run on GPU\n", + " @Expression remove#19082 could run on GPU\n", + " @Expression metaData#19083 could run on GPU\n", + " @Expression protocol#19084 could run on GPU\n", + " @Expression cdc#19085 could run on GPU\n", + " @Expression checkpointMetadata#19086 could run on GPU\n", + " @Expression sidecar#19087 could run on GPU\n", + " @Expression domainMetadata#19088 could run on GPU\n", + " @Expression commitInfo#19089 could run on GPU\n", + "\n", + "25/09/29 17:39:44 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:44 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:44 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:44 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:44 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:44 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:44 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:44 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:44 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:44 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:44 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:44 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:44 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:44 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:44 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:44 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:44 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:44 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:44 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:44 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:44 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:45 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:45 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "spark.sql(f\"CREATE TABLE delta.`{DATA_ROOT}/store_sales_merge` SHALLOW CLONE delta.`{DATA_ROOT}/store_sales`\")\n", + "sql = f\"\"\"\n", + "MERGE INTO delta.`{DATA_ROOT}/store_sales_merge` AS target\n", + "USING delta.`{DATA_ROOT}/store_sales` AS source\n", + " ON target.ss_ticket_number = source.ss_ticket_number\n", + " AND target.ss_item_sk = source.ss_item_sk\n", + " AND (source.ss_ticket_number * source.ss_item_sk) % 1000 < 400\n", + "WHEN MATCHED THEN\n", + " UPDATE SET target.ss_coupon_amt = source.ss_coupon_amt\n", + "\"\"\"\n", + "print(\"-\"*50)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "0d7c65ee", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "25/09/29 17:39:56 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:56 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:56 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:56 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:56 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:56 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:56 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:56 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:56 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:57 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:39:58 WARN GpuOverrides: \n", + "! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.LocalTableScanExec\n", + " @Expression path#21515 could run on GPU\n", + " @Expression partitionValues#21516 could run on GPU\n", + " @Expression size#21517L could run on GPU\n", + " @Expression modificationTime#21518L could run on GPU\n", + " @Expression dataChange#21519 could run on GPU\n", + " @Expression stats#21520 could run on GPU\n", + " @Expression tags#21521 could run on GPU\n", + " @Expression deletionVector#21522 could run on GPU\n", + " @Expression baseRowId#21523L could run on GPU\n", + " @Expression defaultRowCommitVersion#21524L could run on GPU\n", + " @Expression clusteringProvider#21525 could run on GPU\n", + "\n", + "25/09/29 17:40:06 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:40:06 WARN GpuOverrides: \n", + " !Exec cannot run on GPU because not all expressions can be replaced\n", + " @Expression ss_sold_time_sk#20235 could run on GPU\n", + " @Expression ss_item_sk#20236 could run on GPU\n", + " @Expression ss_customer_sk#20237 could run on GPU\n", + " @Expression ss_cdemo_sk#20238 could run on GPU\n", + " @Expression ss_hdemo_sk#20239 could run on GPU\n", + " @Expression ss_addr_sk#20240 could run on GPU\n", + " @Expression ss_store_sk#20241 could run on GPU\n", + " @Expression ss_promo_sk#20242 could run on GPU\n", + " @Expression ss_ticket_number#20243L could run on GPU\n", + " @Expression ss_quantity#20244L could run on GPU\n", + " @Expression ss_wholesale_cost#20245 could run on GPU\n", + " @Expression ss_list_price#20246 could run on GPU\n", + " @Expression ss_sales_price#20247 could run on GPU\n", + " @Expression ss_ext_discount_amt#20248 could run on GPU\n", + " @Expression ss_ext_sales_price#20249 could run on GPU\n", + " @Expression ss_ext_wholesale_cost#20250 could run on GPU\n", + " @Expression ss_ext_list_price#20251 could run on GPU\n", + " @Expression ss_ext_tax#20252 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN ss_coupon_amt#20253 ELSE ss_coupon_amt#20276 END AS ss_coupon_amt#21887 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN ss_coupon_amt#20253 ELSE ss_coupon_amt#20276 END could run on GPU\n", + " @Expression isnull(_source_row_present_#21651) could run on GPU\n", + " @Expression _source_row_present_#21651 could run on GPU\n", + " @Expression ss_coupon_amt#20253 could run on GPU\n", + " @Expression ss_coupon_amt#20276 could run on GPU\n", + " @Expression ss_net_paid#20254 could run on GPU\n", + " @Expression ss_net_paid_inc_tax#20255 could run on GPU\n", + " @Expression ss_net_profit#20256 could run on GPU\n", + " @Expression ss_sold_date_sk#20257 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN false ELSE false END AS _row_dropped_#21892 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN false ELSE false END could run on GPU\n", + " @Expression isnull(_source_row_present_#21651) could run on GPU\n", + " @Expression _source_row_present_#21651 could run on GPU\n", + " ! false cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " @Expression false could run on GPU\n", + " ! false cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " ! false cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " @Expression false could run on GPU\n", + " !Exec cannot run on GPU because not all expressions can be replaced\n", + " @Expression ss_item_sk#20259 could run on GPU\n", + " @Expression ss_ticket_number#20266L could run on GPU\n", + " @Expression ss_coupon_amt#20276 could run on GPU\n", + " @Expression true AS _source_row_present_#21651 could run on GPU\n", + " ! true cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " @Expression true could run on GPU\n", + "\n", + "25/09/29 17:40:06 WARN GpuOverrides: \n", + " !Exec cannot run on GPU because not all expressions can be replaced\n", + " @Expression ss_sold_time_sk#20235 could run on GPU\n", + " @Expression ss_item_sk#20236 could run on GPU\n", + " @Expression ss_customer_sk#20237 could run on GPU\n", + " @Expression ss_cdemo_sk#20238 could run on GPU\n", + " @Expression ss_hdemo_sk#20239 could run on GPU\n", + " @Expression ss_addr_sk#20240 could run on GPU\n", + " @Expression ss_store_sk#20241 could run on GPU\n", + " @Expression ss_promo_sk#20242 could run on GPU\n", + " @Expression ss_ticket_number#20243L could run on GPU\n", + " @Expression ss_quantity#20244L could run on GPU\n", + " @Expression ss_wholesale_cost#20245 could run on GPU\n", + " @Expression ss_list_price#20246 could run on GPU\n", + " @Expression ss_sales_price#20247 could run on GPU\n", + " @Expression ss_ext_discount_amt#20248 could run on GPU\n", + " @Expression ss_ext_sales_price#20249 could run on GPU\n", + " @Expression ss_ext_wholesale_cost#20250 could run on GPU\n", + " @Expression ss_ext_list_price#20251 could run on GPU\n", + " @Expression ss_ext_tax#20252 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN ss_coupon_amt#20253 ELSE ss_coupon_amt#20276 END AS ss_coupon_amt#21887 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN ss_coupon_amt#20253 ELSE ss_coupon_amt#20276 END could run on GPU\n", + " @Expression isnull(_source_row_present_#21651) could run on GPU\n", + " @Expression _source_row_present_#21651 could run on GPU\n", + " @Expression ss_coupon_amt#20253 could run on GPU\n", + " @Expression ss_coupon_amt#20276 could run on GPU\n", + " @Expression ss_net_paid#20254 could run on GPU\n", + " @Expression ss_net_paid_inc_tax#20255 could run on GPU\n", + " @Expression ss_net_profit#20256 could run on GPU\n", + " @Expression ss_sold_date_sk#20257 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN false ELSE false END AS _row_dropped_#21892 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN false ELSE false END could run on GPU\n", + " @Expression isnull(_source_row_present_#21651) could run on GPU\n", + " @Expression _source_row_present_#21651 could run on GPU\n", + " ! false cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " @Expression false could run on GPU\n", + " ! false cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " ! false cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " @Expression false could run on GPU\n", + " !Exec cannot run on GPU because not all expressions can be replaced\n", + " @Expression ss_item_sk#20259 could run on GPU\n", + " @Expression ss_ticket_number#20266L could run on GPU\n", + " @Expression ss_coupon_amt#20276 could run on GPU\n", + " @Expression true AS _source_row_present_#21651 could run on GPU\n", + " ! true cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " @Expression true could run on GPU\n", + "\n", + "25/09/29 17:40:06 WARN GpuOverrides: \n", + " !Exec cannot run on GPU because not all expressions can be replaced\n", + " @Expression ss_sold_time_sk#20235 could run on GPU\n", + " @Expression ss_item_sk#20236 could run on GPU\n", + " @Expression ss_customer_sk#20237 could run on GPU\n", + " @Expression ss_cdemo_sk#20238 could run on GPU\n", + " @Expression ss_hdemo_sk#20239 could run on GPU\n", + " @Expression ss_addr_sk#20240 could run on GPU\n", + " @Expression ss_store_sk#20241 could run on GPU\n", + " @Expression ss_promo_sk#20242 could run on GPU\n", + " @Expression ss_ticket_number#20243L could run on GPU\n", + " @Expression ss_quantity#20244L could run on GPU\n", + " @Expression ss_wholesale_cost#20245 could run on GPU\n", + " @Expression ss_list_price#20246 could run on GPU\n", + " @Expression ss_sales_price#20247 could run on GPU\n", + " @Expression ss_ext_discount_amt#20248 could run on GPU\n", + " @Expression ss_ext_sales_price#20249 could run on GPU\n", + " @Expression ss_ext_wholesale_cost#20250 could run on GPU\n", + " @Expression ss_ext_list_price#20251 could run on GPU\n", + " @Expression ss_ext_tax#20252 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN ss_coupon_amt#20253 ELSE ss_coupon_amt#20276 END AS ss_coupon_amt#21887 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN ss_coupon_amt#20253 ELSE ss_coupon_amt#20276 END could run on GPU\n", + " @Expression isnull(_source_row_present_#21651) could run on GPU\n", + " @Expression _source_row_present_#21651 could run on GPU\n", + " @Expression ss_coupon_amt#20253 could run on GPU\n", + " @Expression ss_coupon_amt#20276 could run on GPU\n", + " @Expression ss_net_paid#20254 could run on GPU\n", + " @Expression ss_net_paid_inc_tax#20255 could run on GPU\n", + " @Expression ss_net_profit#20256 could run on GPU\n", + " @Expression ss_sold_date_sk#20257 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN false ELSE false END AS _row_dropped_#21892 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN false ELSE false END could run on GPU\n", + " @Expression isnull(_source_row_present_#21651) could run on GPU\n", + " @Expression _source_row_present_#21651 could run on GPU\n", + " ! false cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " @Expression false could run on GPU\n", + " ! false cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " ! false cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " @Expression false could run on GPU\n", + " !Exec cannot run on GPU because not all expressions can be replaced\n", + " @Expression ss_item_sk#20259 could run on GPU\n", + " @Expression ss_ticket_number#20266L could run on GPU\n", + " @Expression ss_coupon_amt#20276 could run on GPU\n", + " @Expression true AS _source_row_present_#21651 could run on GPU\n", + " ! true cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " @Expression true could run on GPU\n", + "\n", + "25/09/29 17:40:06 WARN GpuOverrides: \n", + " !Exec cannot run on GPU because not all expressions can be replaced\n", + " @Expression ss_sold_time_sk#20235 could run on GPU\n", + " @Expression ss_item_sk#20236 could run on GPU\n", + " @Expression ss_customer_sk#20237 could run on GPU\n", + " @Expression ss_cdemo_sk#20238 could run on GPU\n", + " @Expression ss_hdemo_sk#20239 could run on GPU\n", + " @Expression ss_addr_sk#20240 could run on GPU\n", + " @Expression ss_store_sk#20241 could run on GPU\n", + " @Expression ss_promo_sk#20242 could run on GPU\n", + " @Expression ss_ticket_number#20243L could run on GPU\n", + " @Expression ss_quantity#20244L could run on GPU\n", + " @Expression ss_wholesale_cost#20245 could run on GPU\n", + " @Expression ss_list_price#20246 could run on GPU\n", + " @Expression ss_sales_price#20247 could run on GPU\n", + " @Expression ss_ext_discount_amt#20248 could run on GPU\n", + " @Expression ss_ext_sales_price#20249 could run on GPU\n", + " @Expression ss_ext_wholesale_cost#20250 could run on GPU\n", + " @Expression ss_ext_list_price#20251 could run on GPU\n", + " @Expression ss_ext_tax#20252 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN ss_coupon_amt#20253 ELSE ss_coupon_amt#20276 END AS ss_coupon_amt#21887 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN ss_coupon_amt#20253 ELSE ss_coupon_amt#20276 END could run on GPU\n", + " @Expression isnull(_source_row_present_#21651) could run on GPU\n", + " @Expression _source_row_present_#21651 could run on GPU\n", + " @Expression ss_coupon_amt#20253 could run on GPU\n", + " @Expression ss_coupon_amt#20276 could run on GPU\n", + " @Expression ss_net_paid#20254 could run on GPU\n", + " @Expression ss_net_paid_inc_tax#20255 could run on GPU\n", + " @Expression ss_net_profit#20256 could run on GPU\n", + " @Expression ss_sold_date_sk#20257 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN false ELSE false END AS _row_dropped_#21892 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN false ELSE false END could run on GPU\n", + " @Expression isnull(_source_row_present_#21651) could run on GPU\n", + " @Expression _source_row_present_#21651 could run on GPU\n", + " ! false cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " @Expression false could run on GPU\n", + " ! false cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " ! false cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " @Expression false could run on GPU\n", + " !Exec cannot run on GPU because not all expressions can be replaced\n", + " @Expression ss_item_sk#20259 could run on GPU\n", + " @Expression ss_ticket_number#20266L could run on GPU\n", + " @Expression ss_coupon_amt#20276 could run on GPU\n", + " @Expression true AS _source_row_present_#21651 could run on GPU\n", + " ! true cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " @Expression true could run on GPU\n", + "\n", + "25/09/29 17:40:06 WARN GpuOverrides: \n", + " !Exec cannot run on GPU because not all expressions can be replaced\n", + " @Expression ss_item_sk#20259 could run on GPU\n", + " @Expression ss_ticket_number#20266L could run on GPU\n", + " @Expression ss_coupon_amt#20276 could run on GPU\n", + " @Expression true AS _source_row_present_#21651 could run on GPU\n", + " ! true cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " @Expression true could run on GPU\n", + "\n", + "25/09/29 17:40:06 WARN GpuOverrides: \n", + "! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.LocalTableScanExec\n", + " @Expression path#22334 could run on GPU\n", + " @Expression partitionValues#22335 could run on GPU\n", + " @Expression size#22336L could run on GPU\n", + " @Expression modificationTime#22337L could run on GPU\n", + " @Expression dataChange#22338 could run on GPU\n", + " @Expression stats#22339 could run on GPU\n", + " @Expression tags#22340 could run on GPU\n", + " @Expression deletionVector#22341 could run on GPU\n", + " @Expression baseRowId#22342L could run on GPU\n", + " @Expression defaultRowCommitVersion#22343L could run on GPU\n", + " @Expression clusteringProvider#22344 could run on GPU\n", + "\n", + "25/09/29 17:40:15 WARN GpuOverrides: ==> (8 + 8) / 16]\n", + " !Exec cannot run on GPU because not all expressions can be replaced\n", + " @Expression ss_sold_time_sk#20235 could run on GPU\n", + " @Expression ss_item_sk#20236 could run on GPU\n", + " @Expression ss_customer_sk#20237 could run on GPU\n", + " @Expression ss_cdemo_sk#20238 could run on GPU\n", + " @Expression ss_hdemo_sk#20239 could run on GPU\n", + " @Expression ss_addr_sk#20240 could run on GPU\n", + " @Expression ss_store_sk#20241 could run on GPU\n", + " @Expression ss_promo_sk#20242 could run on GPU\n", + " @Expression ss_ticket_number#20243L could run on GPU\n", + " @Expression ss_quantity#20244L could run on GPU\n", + " @Expression ss_wholesale_cost#20245 could run on GPU\n", + " @Expression ss_list_price#20246 could run on GPU\n", + " @Expression ss_sales_price#20247 could run on GPU\n", + " @Expression ss_ext_discount_amt#20248 could run on GPU\n", + " @Expression ss_ext_sales_price#20249 could run on GPU\n", + " @Expression ss_ext_wholesale_cost#20250 could run on GPU\n", + " @Expression ss_ext_list_price#20251 could run on GPU\n", + " @Expression ss_ext_tax#20252 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN ss_coupon_amt#20253 ELSE ss_coupon_amt#20276 END AS ss_coupon_amt#21887 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN ss_coupon_amt#20253 ELSE ss_coupon_amt#20276 END could run on GPU\n", + " @Expression isnull(_source_row_present_#21651) could run on GPU\n", + " @Expression _source_row_present_#21651 could run on GPU\n", + " @Expression ss_coupon_amt#20253 could run on GPU\n", + " @Expression ss_coupon_amt#20276 could run on GPU\n", + " @Expression ss_net_paid#20254 could run on GPU\n", + " @Expression ss_net_paid_inc_tax#20255 could run on GPU\n", + " @Expression ss_net_profit#20256 could run on GPU\n", + " @Expression ss_sold_date_sk#20257 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN false ELSE false END AS _row_dropped_#21892 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN false ELSE false END could run on GPU\n", + " @Expression isnull(_source_row_present_#21651) could run on GPU\n", + " @Expression _source_row_present_#21651 could run on GPU\n", + " ! false cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " @Expression false could run on GPU\n", + " ! false cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " ! false cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " @Expression false could run on GPU\n", + "\n", + "25/09/29 17:40:22 WARN GpuOverrides: =========================> (15 + 1) / 16]\n", + " !Exec cannot run on GPU because not all expressions can be replaced\n", + " @Expression ss_sold_time_sk#20235 could run on GPU\n", + " @Expression ss_item_sk#20236 could run on GPU\n", + " @Expression ss_customer_sk#20237 could run on GPU\n", + " @Expression ss_cdemo_sk#20238 could run on GPU\n", + " @Expression ss_hdemo_sk#20239 could run on GPU\n", + " @Expression ss_addr_sk#20240 could run on GPU\n", + " @Expression ss_store_sk#20241 could run on GPU\n", + " @Expression ss_promo_sk#20242 could run on GPU\n", + " @Expression ss_ticket_number#20243L could run on GPU\n", + " @Expression ss_quantity#20244L could run on GPU\n", + " @Expression ss_wholesale_cost#20245 could run on GPU\n", + " @Expression ss_list_price#20246 could run on GPU\n", + " @Expression ss_sales_price#20247 could run on GPU\n", + " @Expression ss_ext_discount_amt#20248 could run on GPU\n", + " @Expression ss_ext_sales_price#20249 could run on GPU\n", + " @Expression ss_ext_wholesale_cost#20250 could run on GPU\n", + " @Expression ss_ext_list_price#20251 could run on GPU\n", + " @Expression ss_ext_tax#20252 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN ss_coupon_amt#20253 ELSE ss_coupon_amt#20276 END AS ss_coupon_amt#21887 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN ss_coupon_amt#20253 ELSE ss_coupon_amt#20276 END could run on GPU\n", + " @Expression isnull(_source_row_present_#21651) could run on GPU\n", + " @Expression _source_row_present_#21651 could run on GPU\n", + " @Expression ss_coupon_amt#20253 could run on GPU\n", + " @Expression ss_coupon_amt#20276 could run on GPU\n", + " @Expression ss_net_paid#20254 could run on GPU\n", + " @Expression ss_net_paid_inc_tax#20255 could run on GPU\n", + " @Expression ss_net_profit#20256 could run on GPU\n", + " @Expression ss_sold_date_sk#20257 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN false ELSE false END AS _row_dropped_#21892 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN false ELSE false END could run on GPU\n", + " @Expression isnull(_source_row_present_#21651) could run on GPU\n", + " @Expression _source_row_present_#21651 could run on GPU\n", + " ! false cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " @Expression false could run on GPU\n", + " ! false cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " ! false cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " @Expression false could run on GPU\n", + "\n", + "25/09/29 17:40:22 WARN GpuOverrides: \n", + " !Exec cannot run on GPU because not all expressions can be replaced\n", + " @Expression ss_sold_time_sk#20235 could run on GPU\n", + " @Expression ss_item_sk#20236 could run on GPU\n", + " @Expression ss_customer_sk#20237 could run on GPU\n", + " @Expression ss_cdemo_sk#20238 could run on GPU\n", + " @Expression ss_hdemo_sk#20239 could run on GPU\n", + " @Expression ss_addr_sk#20240 could run on GPU\n", + " @Expression ss_store_sk#20241 could run on GPU\n", + " @Expression ss_promo_sk#20242 could run on GPU\n", + " @Expression ss_ticket_number#20243L could run on GPU\n", + " @Expression ss_quantity#20244L could run on GPU\n", + " @Expression ss_wholesale_cost#20245 could run on GPU\n", + " @Expression ss_list_price#20246 could run on GPU\n", + " @Expression ss_sales_price#20247 could run on GPU\n", + " @Expression ss_ext_discount_amt#20248 could run on GPU\n", + " @Expression ss_ext_sales_price#20249 could run on GPU\n", + " @Expression ss_ext_wholesale_cost#20250 could run on GPU\n", + " @Expression ss_ext_list_price#20251 could run on GPU\n", + " @Expression ss_ext_tax#20252 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN ss_coupon_amt#20253 ELSE ss_coupon_amt#20276 END AS ss_coupon_amt#21887 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN ss_coupon_amt#20253 ELSE ss_coupon_amt#20276 END could run on GPU\n", + " @Expression isnull(_source_row_present_#21651) could run on GPU\n", + " @Expression _source_row_present_#21651 could run on GPU\n", + " @Expression ss_coupon_amt#20253 could run on GPU\n", + " @Expression ss_coupon_amt#20276 could run on GPU\n", + " @Expression ss_net_paid#20254 could run on GPU\n", + " @Expression ss_net_paid_inc_tax#20255 could run on GPU\n", + " @Expression ss_net_profit#20256 could run on GPU\n", + " @Expression ss_sold_date_sk#20257 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN false ELSE false END AS _row_dropped_#21892 could run on GPU\n", + " @Expression CASE WHEN isnull(_source_row_present_#21651) THEN false ELSE false END could run on GPU\n", + " @Expression isnull(_source_row_present_#21651) could run on GPU\n", + " @Expression _source_row_present_#21651 could run on GPU\n", + " ! false cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " @Expression false could run on GPU\n", + " ! false cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " ! false cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.delta.metric.IncrementMetric\n", + " @Expression false could run on GPU\n", + "\n", + "25/09/29 17:40:37 WARN GpuDeltaInvariantCheckerExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric\n", + "25/09/29 17:40:37 WARN GpuSortExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric\n", + "25/09/29 17:40:37 WARN GpuDeltaInvariantCheckerExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric\n", + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+-----------------+----------------+----------------+-----------------+\n", + "|num_affected_rows|num_updated_rows|num_deleted_rows|num_inserted_rows|\n", + "+-----------------+----------------+----------------+-----------------+\n", + "| 11556035| 11556035| 0| 0|\n", + "+-----------------+----------------+----------------+-----------------+\n", + "\n", + "Retry times : 1, merge microbenchmark takes 56.92 seconds\n", + "merge microbenchmark takes average 57 seconds after 1 retries\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "25/09/29 17:40:53 WARN GpuOverrides: \n", + "! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.LocalTableScanExec\n", + " @Expression toprettystring(num_affected_rows)#27621 could run on GPU\n", + " @Expression toprettystring(num_updated_rows)#27622 could run on GPU\n", + " @Expression toprettystring(num_deleted_rows)#27623 could run on GPU\n", + " @Expression toprettystring(num_inserted_rows)#27624 could run on GPU\n", + "\n" + ] + } + ], + "source": [ + "# Run microbenchmark with n retry time\n", + "runMicroBenchmark(spark,\"merge\",sql,1)" + ] + }, + { + "cell_type": "markdown", + "id": "53c0ed28", + "metadata": {}, + "source": [ + "### Bin packing\n", + "This is a microbenchmark about intersection operation running on GPU mode. The query calculates items in the same brand, class, and category that are sold in all three sales channels in two consecutive years. You will see about 10x speedups in this query. This is a competition between high cardinality SortMergeJoin vs GpuShuffleHashJoin. The mainly improved performance comes from two SortMergeJoin(s) in this query running on CPU get converted to GpuShuffleHashJoin running on GPU." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "643c2e8a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "25/09/29 17:50:38 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:50:38 WARN GpuDeltaInvariantCheckerExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric\n", + "25/09/29 17:50:38 WARN GpuSortExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric\n", + "25/09/29 17:50:38 WARN GpuDeltaInvariantCheckerExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric\n", + " \r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "# Set maxRecordsPerFile to a small number to create many small files\n", + "spark.conf.set(\"spark.sql.files.maxRecordsPerFile\", 10000)\n", + "spark.sql(f\"CREATE TABLE delta.`{DATA_ROOT}/store_sales_bin_packing` USING delta PARTITIONED BY (ss_sold_date_sk) AS SELECT * FROM delta.`{DATA_ROOT}/store_sales`\")\n", + "sql = f\"\"\"\n", + "OPTIMIZE delta.`{DATA_ROOT}/store_sales_bin_packing`\n", + "\"\"\"\n", + "spark.conf.unset(\"spark.sql.files.maxRecordsPerFile\")\n", + "print(\"-\"*50)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "61bc2260", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+----------+\n", + "|ss_item_sk|\n", + "+----------+\n", + "| 4323|\n", + "| 4324|\n", + "| 4325|\n", + "| 4327|\n", + "| 4328|\n", + "+----------+\n", + "only showing top 5 rows\n", + "\n", + "Retry times : 1, NDS Q14a subquery microbenchmark takes 6.71 seconds\n", + "+----------+\n", + "|ss_item_sk|\n", + "+----------+\n", + "| 14103|\n", + "| 14104|\n", + "| 14105|\n", + "| 14107|\n", + "| 14108|\n", + "+----------+\n", + "only showing top 5 rows\n", + "\n", + "Retry times : 2, NDS Q14a subquery microbenchmark takes 6.11 seconds\n", + "NDS Q14a subquery microbenchmark takes average 6 seconds after 2 retries\n" + ] + } + ], + "source": [ + "# Run microbenchmark with n retry time\n", + "runMicroBenchmark(spark,\"NDS Q14a subquery\",query,1)" + ] + }, + { + "cell_type": "markdown", + "id": "1346d126", + "metadata": {}, + "source": [ + "### Liquid clustering\n", + "This is a microbenchmark for a 1-million rows crossjoin with itself. You will see about 10x speedups in this query. The mainly improved performance comes from converting BroadcastNestedLoogJoin running on CPU to GpuBroadcastNestedLoogJoin running on GPU." + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "286ea45d", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "25/09/29 17:58:13 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:14 WARN GpuDeltaInvariantCheckerExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric\n", + "25/09/29 17:58:14 WARN GpuDeltaInvariantCheckerExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric\n", + "25/09/29 17:58:16 WARN GpuOverrides: \n", + "! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.SerializeFromObjectExec\n", + " @Expression staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).path, true, false, true) AS path#33032 could run on GPU\n", + " !Expression staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).path, true, false, true) cannot run on GPU because StaticInvoke is not supported\n", + " ! knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).path cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " !Expression knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) cannot run on GPU because expression KnownNotNull knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); input expression AssertNotNull assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " ! assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " !Expression input[0, org.apache.spark.sql.delta.actions.AddFile, true] cannot run on GPU because expression BoundReference input[0, org.apache.spark.sql.delta.actions.AddFile, true] produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " @Expression externalmaptocatalyst(lambdavariable(ExternalMapToCatalyst_key, ObjectType(class java.lang.Object), true, -1), staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(lambdavariable(ExternalMapToCatalyst_key, ObjectType(class java.lang.Object), true, -1), StringType, ObjectType(class java.lang.String)), true, false, true), lambdavariable(ExternalMapToCatalyst_value, ObjectType(class java.lang.Object), true, -2), staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(lambdavariable(ExternalMapToCatalyst_value, ObjectType(class java.lang.Object), true, -2), StringType, ObjectType(class java.lang.String)), true, false, true), knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).partitionValues) AS partitionValues#33033 could run on GPU\n", + " ! externalmaptocatalyst(lambdavariable(ExternalMapToCatalyst_key, ObjectType(class java.lang.Object), true, -1), staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(lambdavariable(ExternalMapToCatalyst_key, ObjectType(class java.lang.Object), true, -1), StringType, ObjectType(class java.lang.String)), true, false, true), lambdavariable(ExternalMapToCatalyst_value, ObjectType(class java.lang.Object), true, -2), staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(lambdavariable(ExternalMapToCatalyst_value, ObjectType(class java.lang.Object), true, -2), StringType, ObjectType(class java.lang.String)), true, false, true), knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).partitionValues) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.ExternalMapToCatalyst\n", + " ! lambdavariable(ExternalMapToCatalyst_key, ObjectType(class java.lang.Object), true, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " !Expression staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(lambdavariable(ExternalMapToCatalyst_key, ObjectType(class java.lang.Object), true, -1), StringType, ObjectType(class java.lang.String)), true, false, true) cannot run on GPU because StaticInvoke is not supported\n", + " ! validateexternaltype(lambdavariable(ExternalMapToCatalyst_key, ObjectType(class java.lang.Object), true, -1), StringType, ObjectType(class java.lang.String)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.ValidateExternalType\n", + " ! lambdavariable(ExternalMapToCatalyst_key, ObjectType(class java.lang.Object), true, -1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(ExternalMapToCatalyst_value, ObjectType(class java.lang.Object), true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " !Expression staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(lambdavariable(ExternalMapToCatalyst_value, ObjectType(class java.lang.Object), true, -2), StringType, ObjectType(class java.lang.String)), true, false, true) cannot run on GPU because StaticInvoke is not supported\n", + " ! validateexternaltype(lambdavariable(ExternalMapToCatalyst_value, ObjectType(class java.lang.Object), true, -2), StringType, ObjectType(class java.lang.String)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.ValidateExternalType\n", + " ! lambdavariable(ExternalMapToCatalyst_value, ObjectType(class java.lang.Object), true, -2) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).partitionValues cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " !Expression knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) cannot run on GPU because expression KnownNotNull knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); input expression AssertNotNull assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " ! assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " !Expression input[0, org.apache.spark.sql.delta.actions.AddFile, true] cannot run on GPU because expression BoundReference input[0, org.apache.spark.sql.delta.actions.AddFile, true] produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " @Expression knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).size AS size#33034L could run on GPU\n", + " ! knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).size cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " !Expression knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) cannot run on GPU because expression KnownNotNull knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); input expression AssertNotNull assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " ! assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " !Expression input[0, org.apache.spark.sql.delta.actions.AddFile, true] cannot run on GPU because expression BoundReference input[0, org.apache.spark.sql.delta.actions.AddFile, true] produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " @Expression knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).modificationTime AS modificationTime#33035L could run on GPU\n", + " ! knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).modificationTime cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " !Expression knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) cannot run on GPU because expression KnownNotNull knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); input expression AssertNotNull assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " ! assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " !Expression input[0, org.apache.spark.sql.delta.actions.AddFile, true] cannot run on GPU because expression BoundReference input[0, org.apache.spark.sql.delta.actions.AddFile, true] produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " @Expression knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).dataChange AS dataChange#33036 could run on GPU\n", + " ! knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).dataChange cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " !Expression knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) cannot run on GPU because expression KnownNotNull knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); input expression AssertNotNull assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " ! assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " !Expression input[0, org.apache.spark.sql.delta.actions.AddFile, true] cannot run on GPU because expression BoundReference input[0, org.apache.spark.sql.delta.actions.AddFile, true] produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " @Expression staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).stats, true, false, true) AS stats#33037 could run on GPU\n", + " !Expression staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).stats, true, false, true) cannot run on GPU because StaticInvoke is not supported\n", + " ! knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).stats cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " !Expression knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) cannot run on GPU because expression KnownNotNull knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); input expression AssertNotNull assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " ! assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " !Expression input[0, org.apache.spark.sql.delta.actions.AddFile, true] cannot run on GPU because expression BoundReference input[0, org.apache.spark.sql.delta.actions.AddFile, true] produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " @Expression externalmaptocatalyst(lambdavariable(ExternalMapToCatalyst_key, ObjectType(class java.lang.Object), true, -3), staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(lambdavariable(ExternalMapToCatalyst_key, ObjectType(class java.lang.Object), true, -3), StringType, ObjectType(class java.lang.String)), true, false, true), lambdavariable(ExternalMapToCatalyst_value, ObjectType(class java.lang.Object), true, -4), staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(lambdavariable(ExternalMapToCatalyst_value, ObjectType(class java.lang.Object), true, -4), StringType, ObjectType(class java.lang.String)), true, false, true), knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).tags) AS tags#33038 could run on GPU\n", + " ! externalmaptocatalyst(lambdavariable(ExternalMapToCatalyst_key, ObjectType(class java.lang.Object), true, -3), staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(lambdavariable(ExternalMapToCatalyst_key, ObjectType(class java.lang.Object), true, -3), StringType, ObjectType(class java.lang.String)), true, false, true), lambdavariable(ExternalMapToCatalyst_value, ObjectType(class java.lang.Object), true, -4), staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(lambdavariable(ExternalMapToCatalyst_value, ObjectType(class java.lang.Object), true, -4), StringType, ObjectType(class java.lang.String)), true, false, true), knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).tags) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.ExternalMapToCatalyst\n", + " ! lambdavariable(ExternalMapToCatalyst_key, ObjectType(class java.lang.Object), true, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " !Expression staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(lambdavariable(ExternalMapToCatalyst_key, ObjectType(class java.lang.Object), true, -3), StringType, ObjectType(class java.lang.String)), true, false, true) cannot run on GPU because StaticInvoke is not supported\n", + " ! validateexternaltype(lambdavariable(ExternalMapToCatalyst_key, ObjectType(class java.lang.Object), true, -3), StringType, ObjectType(class java.lang.String)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.ValidateExternalType\n", + " ! lambdavariable(ExternalMapToCatalyst_key, ObjectType(class java.lang.Object), true, -3) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(ExternalMapToCatalyst_value, ObjectType(class java.lang.Object), true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " !Expression staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, validateexternaltype(lambdavariable(ExternalMapToCatalyst_value, ObjectType(class java.lang.Object), true, -4), StringType, ObjectType(class java.lang.String)), true, false, true) cannot run on GPU because StaticInvoke is not supported\n", + " ! validateexternaltype(lambdavariable(ExternalMapToCatalyst_value, ObjectType(class java.lang.Object), true, -4), StringType, ObjectType(class java.lang.String)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.ValidateExternalType\n", + " ! lambdavariable(ExternalMapToCatalyst_value, ObjectType(class java.lang.Object), true, -4) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).tags cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " !Expression knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) cannot run on GPU because expression KnownNotNull knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); input expression AssertNotNull assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " ! assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " !Expression input[0, org.apache.spark.sql.delta.actions.AddFile, true] cannot run on GPU because expression BoundReference input[0, org.apache.spark.sql.delta.actions.AddFile, true] produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " @Expression if (isnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector)) null else named_struct(storageType, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).storageType, true, false, true), pathOrInlineDv, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).pathOrInlineDv, true, false, true), offset, unwrapoption(IntegerType, knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).offset), sizeInBytes, knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).sizeInBytes, cardinality, knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).cardinality, maxRowIndex, unwrapoption(LongType, knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).maxRowIndex)) AS deletionVector#33039 could run on GPU\n", + " @Expression if (isnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector)) null else named_struct(storageType, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).storageType, true, false, true), pathOrInlineDv, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).pathOrInlineDv, true, false, true), offset, unwrapoption(IntegerType, knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).offset), sizeInBytes, knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).sizeInBytes, cardinality, knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).cardinality, maxRowIndex, unwrapoption(LongType, knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).maxRowIndex)) could run on GPU\n", + " !Expression isnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector) cannot run on GPU because input expression Invoke knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported)\n", + " ! knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " !Expression knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) cannot run on GPU because expression KnownNotNull knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); input expression AssertNotNull assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " ! assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " !Expression input[0, org.apache.spark.sql.delta.actions.AddFile, true] cannot run on GPU because expression BoundReference input[0, org.apache.spark.sql.delta.actions.AddFile, true] produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " @Expression null could run on GPU\n", + " @Expression named_struct(storageType, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).storageType, true, false, true), pathOrInlineDv, staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).pathOrInlineDv, true, false, true), offset, unwrapoption(IntegerType, knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).offset), sizeInBytes, knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).sizeInBytes, cardinality, knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).cardinality, maxRowIndex, unwrapoption(LongType, knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).maxRowIndex)) could run on GPU\n", + " @Expression storageType could run on GPU\n", + " !Expression staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).storageType, true, false, true) cannot run on GPU because StaticInvoke is not supported\n", + " ! knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).storageType cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " !Expression knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector) cannot run on GPU because input expression Invoke knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); expression KnownNotNull knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " ! knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " !Expression knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) cannot run on GPU because expression KnownNotNull knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); input expression AssertNotNull assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " ! assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " !Expression input[0, org.apache.spark.sql.delta.actions.AddFile, true] cannot run on GPU because expression BoundReference input[0, org.apache.spark.sql.delta.actions.AddFile, true] produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " @Expression pathOrInlineDv could run on GPU\n", + " !Expression staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).pathOrInlineDv, true, false, true) cannot run on GPU because StaticInvoke is not supported\n", + " ! knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).pathOrInlineDv cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " !Expression knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector) cannot run on GPU because input expression Invoke knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); expression KnownNotNull knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " ! knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " !Expression knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) cannot run on GPU because expression KnownNotNull knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); input expression AssertNotNull assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " ! assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " !Expression input[0, org.apache.spark.sql.delta.actions.AddFile, true] cannot run on GPU because expression BoundReference input[0, org.apache.spark.sql.delta.actions.AddFile, true] produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " @Expression offset could run on GPU\n", + " ! unwrapoption(IntegerType, knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).offset) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.UnwrapOption\n", + " ! knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).offset cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " !Expression knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector) cannot run on GPU because input expression Invoke knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); expression KnownNotNull knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " ! knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " !Expression knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) cannot run on GPU because expression KnownNotNull knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); input expression AssertNotNull assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " ! assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " !Expression input[0, org.apache.spark.sql.delta.actions.AddFile, true] cannot run on GPU because expression BoundReference input[0, org.apache.spark.sql.delta.actions.AddFile, true] produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " @Expression sizeInBytes could run on GPU\n", + " ! knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).sizeInBytes cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " !Expression knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector) cannot run on GPU because input expression Invoke knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); expression KnownNotNull knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " ! knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " !Expression knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) cannot run on GPU because expression KnownNotNull knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); input expression AssertNotNull assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " ! assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " !Expression input[0, org.apache.spark.sql.delta.actions.AddFile, true] cannot run on GPU because expression BoundReference input[0, org.apache.spark.sql.delta.actions.AddFile, true] produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " @Expression cardinality could run on GPU\n", + " ! knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).cardinality cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " !Expression knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector) cannot run on GPU because input expression Invoke knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); expression KnownNotNull knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " ! knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " !Expression knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) cannot run on GPU because expression KnownNotNull knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); input expression AssertNotNull assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " ! assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " !Expression input[0, org.apache.spark.sql.delta.actions.AddFile, true] cannot run on GPU because expression BoundReference input[0, org.apache.spark.sql.delta.actions.AddFile, true] produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " @Expression maxRowIndex could run on GPU\n", + " ! unwrapoption(LongType, knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).maxRowIndex) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.UnwrapOption\n", + " ! knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector).maxRowIndex cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " !Expression knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector) cannot run on GPU because input expression Invoke knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); expression KnownNotNull knownnotnull(knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " ! knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).deletionVector cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " !Expression knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) cannot run on GPU because expression KnownNotNull knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); input expression AssertNotNull assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " ! assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " !Expression input[0, org.apache.spark.sql.delta.actions.AddFile, true] cannot run on GPU because expression BoundReference input[0, org.apache.spark.sql.delta.actions.AddFile, true] produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " @Expression unwrapoption(LongType, knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).baseRowId) AS baseRowId#33040L could run on GPU\n", + " ! unwrapoption(LongType, knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).baseRowId) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.UnwrapOption\n", + " ! knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).baseRowId cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " !Expression knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) cannot run on GPU because expression KnownNotNull knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); input expression AssertNotNull assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " ! assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " !Expression input[0, org.apache.spark.sql.delta.actions.AddFile, true] cannot run on GPU because expression BoundReference input[0, org.apache.spark.sql.delta.actions.AddFile, true] produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " @Expression unwrapoption(LongType, knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).defaultRowCommitVersion) AS defaultRowCommitVersion#33041L could run on GPU\n", + " ! unwrapoption(LongType, knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).defaultRowCommitVersion) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.UnwrapOption\n", + " ! knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).defaultRowCommitVersion cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " !Expression knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) cannot run on GPU because expression KnownNotNull knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); input expression AssertNotNull assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " ! assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " !Expression input[0, org.apache.spark.sql.delta.actions.AddFile, true] cannot run on GPU because expression BoundReference input[0, org.apache.spark.sql.delta.actions.AddFile, true] produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " @Expression staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, unwrapoption(ObjectType(class java.lang.String), knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).clusteringProvider), true, false, true) AS clusteringProvider#33042 could run on GPU\n", + " !Expression staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, unwrapoption(ObjectType(class java.lang.String), knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).clusteringProvider), true, false, true) cannot run on GPU because StaticInvoke is not supported\n", + " ! unwrapoption(ObjectType(class java.lang.String), knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).clusteringProvider) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.UnwrapOption\n", + " ! knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])).clusteringProvider cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " !Expression knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) cannot run on GPU because expression KnownNotNull knownnotnull(assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true])) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile); input expression AssertNotNull assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported)\n", + " ! assertnotnull(input[0, org.apache.spark.sql.delta.actions.AddFile, true]) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " !Expression input[0, org.apache.spark.sql.delta.actions.AddFile, true] cannot run on GPU because expression BoundReference input[0, org.apache.spark.sql.delta.actions.AddFile, true] produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.MapElementsExec\n", + " !Expression obj#33031 cannot run on GPU because expression AttributeReference obj#33031 produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! cannot run on GPU because not all expressions can be replaced; GPU does not currently support the operator class org.apache.spark.sql.execution.DeserializeToObjectExec\n", + " ! newInstance(class scala.Tuple1) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " !Expression if (isnull(add#32966)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.AddFile) (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported); trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.AddFile) is not supported); expression If if (isnull(add#32966)) null else newInstance(class org.apache.spark.sql.delta.actions.AddFile) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " @Expression isnull(add#32966) could run on GPU\n", + " @Expression add#32966 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.AddFile)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.AddFile) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#32966.path.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#32966.path could run on GPU\n", + " @Expression add#32966 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -5), lambdavariable(CatalystToExternalMap_key, StringType, false, -5).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -6), lambdavariable(CatalystToExternalMap_value, StringType, true, -6).toString, add#32966.partitionValues, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -5) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -5).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -5) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -6) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -6).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -6) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#32966.partitionValues could run on GPU\n", + " @Expression add#32966 could run on GPU\n", + " ! assertnotnull(add#32966.size) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#32966.size could run on GPU\n", + " @Expression add#32966 could run on GPU\n", + " ! assertnotnull(add#32966.modificationTime) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#32966.modificationTime could run on GPU\n", + " @Expression add#32966 could run on GPU\n", + " ! assertnotnull(add#32966.dataChange) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#32966.dataChange could run on GPU\n", + " @Expression add#32966 could run on GPU\n", + " ! add#32966.stats.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#32966.stats could run on GPU\n", + " @Expression add#32966 could run on GPU\n", + " ! catalysttoexternalmap(lambdavariable(CatalystToExternalMap_key, StringType, false, -7), lambdavariable(CatalystToExternalMap_key, StringType, false, -7).toString, lambdavariable(CatalystToExternalMap_value, StringType, true, -8), lambdavariable(CatalystToExternalMap_value, StringType, true, -8).toString, add#32966.tags, interface scala.collection.immutable.Map) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.CatalystToExternalMap\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -7) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -7).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_key, StringType, false, -7) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -8) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -8).toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " ! lambdavariable(CatalystToExternalMap_value, StringType, true, -8) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable\n", + " @Expression add#32966.tags could run on GPU\n", + " @Expression add#32966 could run on GPU\n", + " !Expression if (isnull(add#32966.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because trueValue expression Literal null (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); falseValue expression NewInstance newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) (ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) is not supported); expression If if (isnull(add#32966.deletionVector)) null else newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " @Expression isnull(add#32966.deletionVector) could run on GPU\n", + " @Expression add#32966.deletionVector could run on GPU\n", + " @Expression add#32966 could run on GPU\n", + " !Expression null cannot run on GPU because expression Literal null produces an unsupported type ObjectType(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor)\n", + " ! newInstance(class org.apache.spark.sql.delta.actions.DeletionVectorDescriptor) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.NewInstance\n", + " ! add#32966.deletionVector.storageType.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#32966.deletionVector.storageType could run on GPU\n", + " @Expression add#32966.deletionVector could run on GPU\n", + " @Expression add#32966 could run on GPU\n", + " ! add#32966.deletionVector.pathOrInlineDv.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#32966.deletionVector.pathOrInlineDv could run on GPU\n", + " @Expression add#32966.deletionVector could run on GPU\n", + " @Expression add#32966 could run on GPU\n", + " ! wrapoption(add#32966.deletionVector.offset, IntegerType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#32966.deletionVector.offset could run on GPU\n", + " @Expression add#32966.deletionVector could run on GPU\n", + " @Expression add#32966 could run on GPU\n", + " ! assertnotnull(add#32966.deletionVector.sizeInBytes) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#32966.deletionVector.sizeInBytes could run on GPU\n", + " @Expression add#32966.deletionVector could run on GPU\n", + " @Expression add#32966 could run on GPU\n", + " ! assertnotnull(add#32966.deletionVector.cardinality) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull\n", + " @Expression add#32966.deletionVector.cardinality could run on GPU\n", + " @Expression add#32966.deletionVector could run on GPU\n", + " @Expression add#32966 could run on GPU\n", + " ! wrapoption(add#32966.deletionVector.maxRowIndex, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#32966.deletionVector.maxRowIndex could run on GPU\n", + " @Expression add#32966.deletionVector could run on GPU\n", + " @Expression add#32966 could run on GPU\n", + " ! wrapoption(add#32966.baseRowId, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#32966.baseRowId could run on GPU\n", + " @Expression add#32966 could run on GPU\n", + " ! wrapoption(add#32966.defaultRowCommitVersion, LongType) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " @Expression add#32966.defaultRowCommitVersion could run on GPU\n", + " @Expression add#32966 could run on GPU\n", + " ! wrapoption(add#32966.clusteringProvider.toString, ObjectType(class java.lang.String)) cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.WrapOption\n", + " ! add#32966.clusteringProvider.toString cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.catalyst.expressions.objects.Invoke\n", + " @Expression add#32966.clusteringProvider could run on GPU\n", + " @Expression add#32966 could run on GPU\n", + " !Expression obj#33030 cannot run on GPU because expression AttributeReference obj#33030 produces an unsupported type ObjectType(class scala.Tuple1)\n", + " ! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.RDDScanExec\n", + " @Expression txn#32965 could run on GPU\n", + " @Expression add#32966 could run on GPU\n", + " @Expression remove#32967 could run on GPU\n", + " @Expression metaData#32968 could run on GPU\n", + " @Expression protocol#32969 could run on GPU\n", + " @Expression cdc#32970 could run on GPU\n", + " @Expression checkpointMetadata#32971 could run on GPU\n", + " @Expression sidecar#32972 could run on GPU\n", + " @Expression domainMetadata#32973 could run on GPU\n", + " @Expression commitInfo#32974 could run on GPU\n", + "\n", + "25/09/29 17:58:16 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:16 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:16 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:16 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:16 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:16 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:16 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:16 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------------------------------------------------\n" + ] + } + ], + "source": [ + "spark.sql(f\"CREATE TABLE delta.`{DATA_ROOT}/store_sales_clustered` USING delta CLUSTER BY (ss_sold_date_sk) AS SELECT * FROM delta.`{DATA_ROOT}/store_sales`\")\n", + "sql = f\"\"\"\n", + "OPTIMIZE delta.`{DATA_ROOT}/store_sales_clustered`\n", + "\"\"\"\n", + "print(\"-\"*50)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "f41b8d54", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "25/09/29 17:58:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:35 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:36 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:36 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:36 WARN GpuOverrides: \n", + "! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.LocalTableScanExec\n", + " @Expression path#34603 could run on GPU\n", + " @Expression partitionValues#34604 could run on GPU\n", + " @Expression size#34605L could run on GPU\n", + " @Expression modificationTime#34606L could run on GPU\n", + " @Expression dataChange#34607 could run on GPU\n", + " @Expression stats#34608 could run on GPU\n", + " @Expression tags#34609 could run on GPU\n", + " @Expression deletionVector#34610 could run on GPU\n", + " @Expression baseRowId#34611L could run on GPU\n", + " @Expression defaultRowCommitVersion#34612L could run on GPU\n", + " @Expression clusteringProvider#34613 could run on GPU\n", + "\n", + "25/09/29 17:58:49 WARN GpuOverrides: \n", + "! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.LocalTableScanExec\n", + " @Expression path#34676 could run on GPU\n", + " @Expression partitionValues#34677 could run on GPU\n", + " @Expression size#34678L could run on GPU\n", + " @Expression modificationTime#34679L could run on GPU\n", + " @Expression dataChange#34680 could run on GPU\n", + " @Expression stats#34681 could run on GPU\n", + " @Expression tags#34682 could run on GPU\n", + " @Expression deletionVector#34683 could run on GPU\n", + " @Expression baseRowId#34684L could run on GPU\n", + " @Expression defaultRowCommitVersion#34685L could run on GPU\n", + " @Expression clusteringProvider#34686 could run on GPU\n", + "\n", + "25/09/29 17:58:50 WARN GpuDeltaInvariantCheckerExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric\n", + "25/09/29 17:58:50 WARN GpuDeltaInvariantCheckerExec: GpuRapidsDeltaWriteExec returned empty metrics in getOpTimeNewMetric\n", + "25/09/29 17:58:55 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:55 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:55 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:55 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:55 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:56 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:56 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n", + "25/09/29 17:58:56 WARN GpuOverrides: Can't replace any part of this plan due to: Delta Lake metadata queries are not efficient on GPU\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+--------------------+--------------------+\n", + "| path| metrics|\n", + "+--------------------+--------------------+\n", + "|file:/home/jihoon...|{1, 16, {12463571...|\n", + "+--------------------+--------------------+\n", + "\n", + "Retry times : 1, clustering microbenchmark takes 20.67 seconds\n", + "clustering microbenchmark takes average 21 seconds after 1 retries\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "25/09/29 17:58:56 WARN GpuOverrides: \n", + "! cannot run on GPU because GPU does not currently support the operator class org.apache.spark.sql.execution.LocalTableScanExec\n", + " @Expression toprettystring(path)#35138 could run on GPU\n", + " @Expression toprettystring(metrics)#35139 could run on GPU\n", + "\n" + ] + } + ], + "source": [ + "# Run microbenchmark with n retry time\n", + "runMicroBenchmark(spark,\"clustering\",sql,1)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "fc2092e8", + "metadata": {}, + "outputs": [], + "source": [ + "spark.stop()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "spark-rapids-examples", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/TableFormat-Examples/delta/README.md b/examples/TableFormat-Examples/delta/README.md deleted file mode 100644 index e69de29bb..000000000