[VL] Add columnar table cache benchmark (apache#3375)

* Add columnar table cache benchmark * benchmark --------- Co-authored-by: Kent Yao <[email protected]>
Kyligence · Oct 13, 2023 · 003964c · 003964c
1 parent c9ec034
commit 003964c
Show file tree

Hide file tree

Showing 2 changed files with 106 additions and 0 deletions.
diff --git a/backends-velox/benchmark/ColumnarTableCacheBenchmark-results.txt b/backends-velox/benchmark/ColumnarTableCacheBenchmark-results.txt
@@ -0,0 +1,23 @@
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Mac OS X 13.5
+Apple M1 Pro
+table cache count:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+disable columnar table cache                      16773          17024         401          1.2         838.7       1.0X
+enable columnar table cache                        9985          10051          65          2.0         499.3       1.0X
+
+
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Mac OS X 13.5
+Apple M1 Pro
+table cache column pruning:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+disable columnar table cache                      16429          16873         688          1.2         821.5       1.0X
+enable columnar table cache                       15118          15495         456          1.3         755.9       1.0X
+
+
+OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Mac OS X 13.5
+Apple M1 Pro
+table cache filter:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+disable columnar table cache                      22895          23527         722          0.9        1144.7       1.0X
+enable columnar table cache                       16673          17462         765          1.2         833.7       1.0X
+
diff --git a/...src/test/scala/org/apache/spark/sql/execution/benchmark/ColumnarTableCacheBenchmark.scala b/...src/test/scala/org/apache/spark/sql/execution/benchmark/ColumnarTableCacheBenchmark.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.benchmark
+
+import io.glutenproject.GlutenConfig
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.storage.StorageLevel
+
+/**
+ * Benchmark to measure performance for columnar table cache. To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
+ * }}}
+ */
+object ColumnarTableCacheBenchmark extends SqlBasedBenchmark {
+  private val numRows = 20L * 1000 * 1000
+
+  private def doBenchmark(name: String, cardinality: Long)(f: => Unit): Unit = {
+    val benchmark = new Benchmark(name, cardinality, output = output)
+    val flag = if (spark.sessionState.conf.getConf(GlutenConfig.COLUMNAR_TABLE_CACHE_ENABLED)) {
+      "enable"
+    } else {
+      "disable"
+    }
+    benchmark.addCase(s"$flag columnar table cache", 3)(_ => f)
+    benchmark.run()
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    withTempPath {
+      f =>
+        spark
+          .range(numRows)
+          .selectExpr(
+            "cast(id as int) as c0",
+            "cast(id as double) as c1",
+            "id as c2",
+            "cast(id as string) as c3",
+            "uuid() as c4")
+          .write
+          .parquet(f.getCanonicalPath)
+
+        doBenchmark("table cache count", numRows) {
+          spark.read.parquet(f.getCanonicalPath).persist(StorageLevel.MEMORY_ONLY).count()
+          spark.catalog.clearCache()
+        }
+
+        doBenchmark("table cache column pruning", numRows) {
+          val cached = spark.read
+            .parquet(f.getCanonicalPath)
+            .persist(StorageLevel.MEMORY_ONLY)
+          cached.select("c1", "c2").noop()
+          cached.select("c0", "c3").noop()
+          spark.catalog.clearCache()
+        }
+
+        doBenchmark("table cache filter", numRows) {
+          val cached = spark.read
+            .parquet(f.getCanonicalPath)
+            .persist(StorageLevel.MEMORY_ONLY)
+          cached.where("c1 % 100 > 10").noop()
+          cached.where("c1 % 100 > 20").noop()
+          spark.catalog.clearCache()
+        }
+    }
+  }
+}