[Spark] Skip collecting commit stats to prevent computing Snapshot State

#### Which Delta project/connector is this regarding? - [x] Spark - [ ] Standalone - [ ] Flink - [ ] Kernel - [ ] Other (fill in here) ## Description Before this PR, Delta computes a [SnapshotState](https://github.com/delta-io/delta/blob/v3.1.0/spark/src/main/scala/org/apache/spark/sql/delta/SnapshotState.scala#L46-L58) during every commit. Computing a SnapshotState is fairly slow and expensive, because it involves reading the entirety of a checkpoint, sidecars, and log segment. For many types of commit, it should be unnecessary to compute the SnapshotState. After this PR, an transaction can avoid computing the SnapshotState of a newly created snapshot. Skipping the computation is enabled via a spark configuration option `spark.databricks.delta.commitStats.collect=false` This change can have a big performance impact when writing into a Delta Table. Especially when the table comprises a large number of underlying data files. ## How was this patch tested? - Locally built delta-spark - Ran a small spark job to insert rows into a delta table - Inspected log4j output to see if snapshot state was computed - Repeated again, this time setting `spark.databricks.delta.commitStats.collect=false` ## Does this PR introduce _any_ user-facing changes? Yes, after this PR the user can set spark config option `spark.databricks.delta.commitStats.collect=false` to avoid computing SnapshotState after a commit. Signed-off-by: Ian Streeter <[email protected]>
delta-io · Mar 26, 2024 · b166e50 · b166e50
1 parent dbed7a9
commit b166e50
Show file tree

Hide file tree

Showing 5 changed files with 81 additions and 6 deletions.
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/OptimisticTransaction.scala b/spark/src/main/scala/org/apache/spark/sql/delta/OptimisticTransaction.scala
@@ -1848,6 +1848,37 @@ trait OptimisticTransactionImpl extends TransactionalWrite
     val info = currentTransactionInfo.commitInfo
       .map(_.copy(readVersion = None, isolationLevel = None)).orNull
     setNeedsCheckpoint(attemptVersion, postCommitSnapshot)
+
+    val numFilesTotal =
+      if (spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_COLLECT_COMMIT_STATS)) {
+        // Forces snapshot state reconstruction
+        postCommitSnapshot.numOfFiles
+      } else -1L
+
+    val sizeInBytesTotal =
+      if (spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_COLLECT_COMMIT_STATS)) {
+        // Forces snapshot state reconstruction
+        postCommitSnapshot.sizeInBytes
+      } else -1L
+
+    val protocol =
+      if (spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_COLLECT_COMMIT_STATS)) {
+        // Forces protocolAndMetadata reconstruction
+        postCommitSnapshot.protocol
+      } else currentTransactionInfo.protocol
+
+    val checkpointSizeInBytes =
+      if (spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_COLLECT_COMMIT_STATS)) {
+        // This might block waiting on a Future that has not yet completed.
+        postCommitSnapshot.checkpointSizeInBytes()
+      } else -1L
+
+    val numPartitionColumnsInTable =
+      if (spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_COLLECT_COMMIT_STATS)) {
+        // Forces protocolAndMetadata reconstruction
+        postCommitSnapshot.metadata.partitionColumns.size
+      } else -1
+
     val stats = CommitStats(
       startVersion = snapshot.version,
       commitVersion = attemptVersion,
@@ -1861,20 +1892,20 @@ trait OptimisticTransactionImpl extends TransactionalWrite
       numRemove = numRemove,
       numSetTransaction = numSetTransaction,
       bytesNew = bytesNew,
-      numFilesTotal = postCommitSnapshot.numOfFiles,
-      sizeInBytesTotal = postCommitSnapshot.sizeInBytes,
+      numFilesTotal = numFilesTotal,
+      sizeInBytesTotal = sizeInBytesTotal,
       numCdcFiles = numCdcFiles,
       cdcBytesNew = cdcBytesNew,
-      protocol = postCommitSnapshot.protocol,
+      protocol = protocol,
       commitSizeBytes = jsonActions.map(_.size).sum,
-      checkpointSizeBytes = postCommitSnapshot.checkpointSizeInBytes(),
+      checkpointSizeBytes = checkpointSizeInBytes,
       totalCommitsSizeSinceLastCheckpoint = postCommitSnapshot.deltaFileSizeInBytes(),
       checkpointAttempt = needsCheckpoint,
       info = info,
       newMetadata = newMetadata,
       numAbsolutePathsInAdd = numAbsolutePaths,
       numDistinctPartitionsInAdd = distinctPartitions.size,
-      numPartitionColumnsInTable = postCommitSnapshot.metadata.partitionColumns.size,
+      numPartitionColumnsInTable = numPartitionColumnsInTable,
       isolationLevel = isolationLevel.toString,
       numOfDomainMetadatas = numOfDomainMetadatas,
       txnId = Some(txnId))

diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/RowId.scala b/spark/src/main/scala/org/apache/spark/sql/delta/RowId.scala
@@ -110,7 +110,11 @@ object RowId {
    * Extracts the high watermark of row IDs from a snapshot.
    */
   private[delta] def extractHighWatermark(snapshot: Snapshot): Option[Long] =
-    RowTrackingMetadataDomain.fromSnapshot(snapshot).map(_.rowIdHighWaterMark)
+    if (isEnabled(snapshot.protocol, snapshot.metadata)) {
+      RowTrackingMetadataDomain.fromSnapshot(snapshot).map(_.rowIdHighWaterMark)
+    } else {
+      None
+    }
 
   /** Base Row ID column name */
   val BASE_ROW_ID = "base_row_id"

diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSQLConf.scala b/spark/src/main/scala/org/apache/spark/sql/delta/sources/DeltaSQLConf.scala
@@ -83,6 +83,17 @@ trait DeltaSQLConfBase {
       .stringConf
       .createOptional
 
+  val DELTA_COLLECT_COMMIT_STATS =
+    buildConf("commitStats.collect")
+      .internal()
+      .doc(
+        """When true, commit statistics are collected for logging purposes.
+        | Enabling this feature might require the Snapshot State to be computed, which is
+        | potentially expensive.
+        """.stripMargin)
+      .booleanConf
+      .createWithDefault(true)
+
   val DELTA_CONVERT_USE_METADATA_LOG =
     buildConf("convert.useMetadataLog")
       .doc(

diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/OptimisticTransactionSuite.scala b/spark/src/test/scala/org/apache/spark/sql/delta/OptimisticTransactionSuite.scala
@@ -806,4 +806,30 @@ class OptimisticTransactionSuite
       }
     }
   }
+
+  test("Skip computing state of post-commit snapshot") {
+    withTempDir { tableDir =>
+      val df = Seq((1, 0), (2, 1)).toDF("key", "value")
+      df.write.format("delta").mode("append").save(tableDir.getCanonicalPath)
+
+      val deltaLog = DeltaLog.forTable(spark, tableDir)
+      val snapshot = deltaLog.update()
+
+      assert(!snapshot.stateReconstructionTriggered)
+    }
+  }
+
+  test("Skip computing state of pre-commit snapshot") {
+    withTempDir { tableDir =>
+      val df = Seq((1, 0), (2, 1)).toDF("key", "value")
+      df.write.format("delta").mode("append").save(tableDir.getCanonicalPath)
+
+      val deltaLog = DeltaLog.forTable(spark, tableDir)
+      val preCommitSnapshot = deltaLog.update()
+
+      df.write.format("delta").mode("append").save(tableDir.getCanonicalPath)
+
+      assert(!preCommitSnapshot.stateReconstructionTriggered)
+    }
+  }
 }
diff --git a/spark/src/test/scala/org/apache/spark/sql/delta/OptimisticTransactionSuiteBase.scala b/spark/src/test/scala/org/apache/spark/sql/delta/OptimisticTransactionSuiteBase.scala
@@ -33,6 +33,9 @@ trait OptimisticTransactionSuiteBase
     with SharedSparkSession
     with DeletionVectorsTestUtils {
 
+  override def sparkConf =
+    super.sparkConf.set("spark.databricks.delta.commitStats.collect", "false")
+
 
   /**
    * Check whether the test transaction conflict with the concurrent writes by executing the