Skip to content

Commit

Permalink
[VL] Add columnar table cache benchmark (apache#3375)
Browse files Browse the repository at this point in the history
* Add columnar table cache benchmark

* benchmark

---------

Co-authored-by: Kent Yao <[email protected]>
  • Loading branch information
ulysses-you and yaooqinn authored Oct 13, 2023
1 parent c9ec034 commit 003964c
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 0 deletions.
23 changes: 23 additions & 0 deletions backends-velox/benchmark/ColumnarTableCacheBenchmark-results.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Mac OS X 13.5
Apple M1 Pro
table cache count: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
disable columnar table cache 16773 17024 401 1.2 838.7 1.0X
enable columnar table cache 9985 10051 65 2.0 499.3 1.0X


OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Mac OS X 13.5
Apple M1 Pro
table cache column pruning: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
disable columnar table cache 16429 16873 688 1.2 821.5 1.0X
enable columnar table cache 15118 15495 456 1.3 755.9 1.0X


OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Mac OS X 13.5
Apple M1 Pro
table cache filter: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
disable columnar table cache 22895 23527 722 0.9 1144.7 1.0X
enable columnar table cache 16673 17462 765 1.2 833.7 1.0X

Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.execution.benchmark

import io.glutenproject.GlutenConfig

import org.apache.spark.benchmark.Benchmark
import org.apache.spark.storage.StorageLevel

/**
* Benchmark to measure performance for columnar table cache. To run this benchmark:
* {{{
* 1. without sbt:
* bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
* }}}
*/
object ColumnarTableCacheBenchmark extends SqlBasedBenchmark {
private val numRows = 20L * 1000 * 1000

private def doBenchmark(name: String, cardinality: Long)(f: => Unit): Unit = {
val benchmark = new Benchmark(name, cardinality, output = output)
val flag = if (spark.sessionState.conf.getConf(GlutenConfig.COLUMNAR_TABLE_CACHE_ENABLED)) {
"enable"
} else {
"disable"
}
benchmark.addCase(s"$flag columnar table cache", 3)(_ => f)
benchmark.run()
}

override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
withTempPath {
f =>
spark
.range(numRows)
.selectExpr(
"cast(id as int) as c0",
"cast(id as double) as c1",
"id as c2",
"cast(id as string) as c3",
"uuid() as c4")
.write
.parquet(f.getCanonicalPath)

doBenchmark("table cache count", numRows) {
spark.read.parquet(f.getCanonicalPath).persist(StorageLevel.MEMORY_ONLY).count()
spark.catalog.clearCache()
}

doBenchmark("table cache column pruning", numRows) {
val cached = spark.read
.parquet(f.getCanonicalPath)
.persist(StorageLevel.MEMORY_ONLY)
cached.select("c1", "c2").noop()
cached.select("c0", "c3").noop()
spark.catalog.clearCache()
}

doBenchmark("table cache filter", numRows) {
val cached = spark.read
.parquet(f.getCanonicalPath)
.persist(StorageLevel.MEMORY_ONLY)
cached.where("c1 % 100 > 10").noop()
cached.where("c1 % 100 > 20").noop()
spark.catalog.clearCache()
}
}
}
}

0 comments on commit 003964c

Please sign in to comment.