Skip to content

Commit

Permalink
[GLUTEN-7385][CH] Add some config parameters to constrol the cache si…
Browse files Browse the repository at this point in the history
…ze for the mergetree parts (apache#7386)

Add some config parameters to constrol the cache size for the mergetree parts:

- spark.gluten.sql.columnar.backend.ch.deltascan.cache.size
- spark.gluten.sql.columnar.backend.ch.addfiles.to.mtps.cache.size
- spark.gluten.sql.columnar.backend.ch.table.path.to.mtps.cache.size

Close apache#7385.
  • Loading branch information
zzcclp authored Sep 29, 2024
1 parent 6b20d0e commit 9e3cf51
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,12 @@ object CHBackendSettings extends BackendSettingsApi with Logging {

val GLUTEN_AQE_PROPAGATEEMPTY: String = CHConf.prefixOf("aqe.propagate.empty.relation")

val GLUTEN_CLICKHOUSE_DELTA_SCAN_CACHE_SIZE: String = CHConf.prefixOf("deltascan.cache.size")
val GLUTEN_CLICKHOUSE_ADDFILES_TO_MTPS_CACHE_SIZE: String =
CHConf.prefixOf("addfiles.to.mtps.cache.size")
val GLUTEN_CLICKHOUSE_TABLE_PATH_TO_MTPS_CACHE_SIZE: String =
CHConf.prefixOf("table.path.to.mtps.cache.size")

def affinityMode: String = {
SparkEnv.get.conf
.get(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
*/
package org.apache.spark.sql.delta

import org.apache.gluten.backendsapi.clickhouse.CHBackendSettings

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BindReferences, Expression, Predicate}
import org.apache.spark.sql.delta.actions.AddFile
import org.apache.spark.sql.delta.stats.DeltaScan
Expand Down Expand Up @@ -82,14 +85,20 @@ case class FilterExprsAsKey(

object ClickhouseSnapshot {
val deltaScanCache: Cache[FilterExprsAsKey, DeltaScan] = CacheBuilder.newBuilder
.maximumSize(100)
.expireAfterAccess(3600L, TimeUnit.SECONDS)
.maximumSize(
SparkSession.getActiveSession.get.conf
.get(CHBackendSettings.GLUTEN_CLICKHOUSE_DELTA_SCAN_CACHE_SIZE, "10000")
.toLong)
.expireAfterAccess(7200L, TimeUnit.SECONDS)
.recordStats()
.build()

val addFileToAddMTPCache: LoadingCache[AddFileAsKey, AddMergeTreeParts] = CacheBuilder.newBuilder
.maximumSize(1000000)
.expireAfterAccess(3600L, TimeUnit.SECONDS)
.maximumSize(
SparkSession.getActiveSession.get.conf
.get(CHBackendSettings.GLUTEN_CLICKHOUSE_ADDFILES_TO_MTPS_CACHE_SIZE, "1000000")
.toLong)
.expireAfterAccess(7200L, TimeUnit.SECONDS)
.recordStats
.build[AddFileAsKey, AddMergeTreeParts](new CacheLoader[AddFileAsKey, AddMergeTreeParts]() {
@throws[Exception]
Expand All @@ -99,8 +108,11 @@ object ClickhouseSnapshot {
})

val pathToAddMTPCache: Cache[String, AddMergeTreeParts] = CacheBuilder.newBuilder
.maximumSize(1000000)
.expireAfterAccess(3600L, TimeUnit.SECONDS)
.maximumSize(
SparkSession.getActiveSession.get.conf
.get(CHBackendSettings.GLUTEN_CLICKHOUSE_TABLE_PATH_TO_MTPS_CACHE_SIZE, "1000000")
.toLong)
.expireAfterAccess(7200L, TimeUnit.SECONDS)
.recordStats()
.build()

Expand Down

0 comments on commit 9e3cf51

Please sign in to comment.