Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
richardc-db committed May 15, 2024
1 parent 69fd7e4 commit 756003a
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import org.apache.spark.sql.delta.test.DeltaTestImplicits._
import org.apache.spark.sql.delta.util.JsonUtils
import org.apache.hadoop.fs.Path

import org.apache.spark.sql.Column
import org.apache.spark.sql.{Column, DataFrame}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.expressions.Literal
import org.apache.spark.sql.test.SharedSparkSession
Expand Down Expand Up @@ -184,15 +184,20 @@ class AutoCompactSuite extends
}
}

private def checkAutoCompactionWorks(dir: String): Unit = {
spark.range(10).write.format("delta").mode("append").save(dir)
/**
* Writes `df` twice to the same location and checks that
* 1. There is only one resultant file.
* 2. The result is equal to `df` unioned with itself.
*/
private def checkAutoCompactionWorks(dir: String, df: DataFrame): Unit = {
df.write.format("delta").mode("append").save(dir)
val deltaLog = DeltaLog.forTable(spark, dir)
val newSnapshot = deltaLog.update()
assert(newSnapshot.version === 1) // 0 is the first commit, 1 is optimize
assert(deltaLog.update().numOfFiles === 1)

val isLogged = checkAutoOptimizeLogging {
spark.range(10).write.format("delta").mode("append").save(dir)
df.write.format("delta").mode("append").save(dir)
}

assert(isLogged)
Expand All @@ -202,17 +207,27 @@ class AutoCompactSuite extends

assert(deltaLog.update().numOfFiles === 1, "Files should be optimized into a single one")
checkAnswer(
spark.range(10).union(spark.range(10)).toDF(),
df.union(df).toDF(),
spark.read.format("delta").load(dir)
)
}

testBothModesViaProperty("auto compact should kick in when enabled - table config") { dir =>
checkAutoCompactionWorks(dir)
checkAutoCompactionWorks(dir, spark.range(10).toDF("id"))
}

testBothModesViaConf("auto compact should kick in when enabled - session config") { dir =>
checkAutoCompactionWorks(dir)
checkAutoCompactionWorks(dir, spark.range(10).toDF("id"))
}

testBothModesViaProperty(
"variant auto compact should kick in when enabled - table config") {dir =>
checkAutoCompactionWorks(dir, spark.range(10).selectExpr("parse_json(cast(id as string)) as v"))
}

testBothModesViaConf(
"variant auto compact should kick in when enabled - session config") { dir =>
checkAutoCompactionWorks(dir, spark.range(10).selectExpr("parse_json(cast(id as string)) as v"))
}

testBothModesViaProperty("auto compact should not kick in when session config is off") { dir =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ class DeletionVectorsSuite extends QueryTest
}
}

Seq("name", "id").foreach(mode =>
Seq("name", "id").foreach { mode =>
test(s"DELETE with DVs with column mapping mode=$mode") {
withSQLConf("spark.databricks.delta.properties.defaults.columnMapping.mode" -> mode) {
withTempDir { dirName =>
Expand All @@ -286,7 +286,26 @@ class DeletionVectorsSuite extends QueryTest
}
}
}
)

test(s"variant types DELETE with DVs with column mapping mode=$mode") {
withSQLConf("spark.databricks.delta.properties.defaults.columnMapping.mode" -> mode) {
withTempDir { dirName =>
val path = dirName.getAbsolutePath
val df = spark.range(0, 50).selectExpr(
"id % 10 as part",
"id",
"parse_json(cast(id as string)) as v"
)
df.write.format("delta").partitionBy("part").save(path)
val tableLog = DeltaLog.forTable(spark, path)
enableDeletionVectorsInTable(tableLog, true)
spark.sql(s"DELETE FROM delta.`$path` WHERE v::int = 2")
checkAnswer(spark.sql(s"select * from delta.`$path` WHERE v::int = 2"), Seq())
verifyDVsExist(tableLog, 1)
}
}
}
}

test("DELETE with DVs - existing table already has DVs") {
withSQLConf(DeltaSQLConf.DELETE_USE_PERSISTENT_DELETION_VECTORS.key -> "true") {
Expand Down

0 comments on commit 756003a

Please sign in to comment.