From 9aae7381a919435bf9d8d7d8bff4cc747c4d4d3e Mon Sep 17 00:00:00 2001 From: Tuan Pham Date: Sat, 12 Oct 2024 17:29:53 +1100 Subject: [PATCH] Make metadata test work on spark 3.0/3.1/3.2 --- .../fast/tests/DataFrameComparerTest.scala | 25 ++++++++++--------- .../fast/tests/DatasetComparerTest.scala | 17 +++++++------ 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/core/src/test/scala/com/github/mrpowers/spark/fast/tests/DataFrameComparerTest.scala b/core/src/test/scala/com/github/mrpowers/spark/fast/tests/DataFrameComparerTest.scala index d43f7c7..b5871a0 100644 --- a/core/src/test/scala/com/github/mrpowers/spark/fast/tests/DataFrameComparerTest.scala +++ b/core/src/test/scala/com/github/mrpowers/spark/fast/tests/DataFrameComparerTest.scala @@ -4,6 +4,7 @@ import org.apache.spark.sql.types.{DoubleType, IntegerType, MetadataBuilder, Str import SparkSessionExt._ import com.github.mrpowers.spark.fast.tests.SchemaComparer.DatasetSchemaMismatch import com.github.mrpowers.spark.fast.tests.StringExt.StringOps +import org.apache.spark.sql.functions.col import org.scalatest.freespec.AnyFreeSpec class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with SparkSessionTestWrapper { @@ -320,7 +321,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar ), List(("number", IntegerType, true)) ) - .withMetadata("number", new MetadataBuilder().putString("description", "small int").build()) + .withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small int").build())) val expectedDF = spark .createDF( @@ -330,7 +331,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar ), List(("number", IntegerType, true)) ) - .withMetadata("number", new MetadataBuilder().putString("description", "small number").build()) + .withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small number").build())) assertLargeDataFrameEquality(sourceDF, expectedDF) } @@ -344,7 +345,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar ), List(("number", IntegerType, true)) ) - .withMetadata("number", new MetadataBuilder().putString("description", "small int").build()) + .withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small int").build())) val expectedDF = spark .createDF( @@ -354,7 +355,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar ), List(("number", IntegerType, true)) ) - .withMetadata("number", new MetadataBuilder().putString("description", "small number").build()) + .withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small number").build())) intercept[DatasetSchemaMismatch] { assertLargeDataFrameEquality(sourceDF, expectedDF, ignoreMetadata = false) @@ -517,7 +518,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar ), List(("number", IntegerType, true)) ) - .withMetadata("number", new MetadataBuilder().putString("description", "small int").build()) + .withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small int").build())) val expectedDF = spark .createDF( @@ -527,7 +528,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar ), List(("number", IntegerType, true)) ) - .withMetadata("number", new MetadataBuilder().putString("description", "small number").build()) + .withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small number").build())) assertApproximateDataFrameEquality(sourceDF, expectedDF, precision = 0.0000001) } @@ -541,7 +542,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar ), List(("number", IntegerType, true)) ) - .withMetadata("number", new MetadataBuilder().putString("description", "small int").build()) + .withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small int").build())) val expectedDF = spark .createDF( @@ -551,7 +552,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar ), List(("number", IntegerType, true)) ) - .withMetadata("number", new MetadataBuilder().putString("description", "small number").build()) + .withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small number").build())) intercept[DatasetSchemaMismatch] { assertApproximateDataFrameEquality(sourceDF, expectedDF, precision = 0.0000001, ignoreMetadata = false) @@ -714,7 +715,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar ), List(("number", IntegerType, true)) ) - .withMetadata("number", new MetadataBuilder().putString("description", "small int").build()) + .withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small int").build())) val expectedDF = spark .createDF( @@ -724,7 +725,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar ), List(("number", IntegerType, true)) ) - .withMetadata("number", new MetadataBuilder().putString("description", "small number").build()) + .withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small number").build())) assertApproximateSmallDataFrameEquality(sourceDF, expectedDF, precision = 0.0000001) } @@ -738,7 +739,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar ), List(("number", IntegerType, true)) ) - .withMetadata("number", new MetadataBuilder().putString("description", "small int").build()) + .withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small int").build())) val expectedDF = spark .createDF( @@ -748,7 +749,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar ), List(("number", IntegerType, true)) ) - .withMetadata("number", new MetadataBuilder().putString("description", "small number").build()) + .withColumn("number", col("number").as("number", new MetadataBuilder().putString("description", "small number").build())) intercept[DatasetSchemaMismatch] { assertApproximateSmallDataFrameEquality(sourceDF, expectedDF, precision = 0.0000001, ignoreMetadata = false) diff --git a/core/src/test/scala/com/github/mrpowers/spark/fast/tests/DatasetComparerTest.scala b/core/src/test/scala/com/github/mrpowers/spark/fast/tests/DatasetComparerTest.scala index b9fece9..06b0270 100644 --- a/core/src/test/scala/com/github/mrpowers/spark/fast/tests/DatasetComparerTest.scala +++ b/core/src/test/scala/com/github/mrpowers/spark/fast/tests/DatasetComparerTest.scala @@ -4,6 +4,7 @@ import org.apache.spark.sql.types._ import SparkSessionExt._ import com.github.mrpowers.spark.fast.tests.SchemaComparer.DatasetSchemaMismatch import com.github.mrpowers.spark.fast.tests.StringExt.StringOps +import org.apache.spark.sql.functions.col import org.scalatest.freespec.AnyFreeSpec object Person { @@ -415,7 +416,7 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes Person("li", 49), Person("alice", 5) ).toDS - .withMetadata("name", new MetadataBuilder().putString("description", "name of the person").build()) + .withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the person").build())) .as[Person] val ds2 = Seq( @@ -424,7 +425,7 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes Person("li", 49), Person("alice", 5) ).toDS - .withMetadata("name", new MetadataBuilder().putString("description", "name of the individual").build()) + .withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the individual").build())) .as[Person] assertLargeDatasetEquality(ds2, ds1) @@ -437,7 +438,7 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes Person("li", 49), Person("alice", 5) ).toDS - .withMetadata("name", new MetadataBuilder().putString("description", "name of the person").build()) + .withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the person").build())) .as[Person] val ds2 = Seq( @@ -446,7 +447,7 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes Person("li", 49), Person("alice", 5) ).toDS - .withMetadata("name", new MetadataBuilder().putString("description", "name of the individual").build()) + .withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the individual").build())) .as[Person] intercept[DatasetSchemaMismatch] { @@ -623,7 +624,7 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes Person("li", 49), Person("alice", 5) ).toDS - .withMetadata("name", new MetadataBuilder().putString("description", "first name of a person").build()) + .withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the person").build())) .as[Person] val ds2 = Seq( @@ -632,7 +633,7 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes Person("li", 49), Person("alice", 5) ).toDS - .withMetadata("name", new MetadataBuilder().putString("description", "first name of an individual").build()) + .withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the individual").build())) .as[Person] assertSmallDatasetEquality(ds2, ds1) @@ -645,7 +646,7 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes Person("li", 49), Person("alice", 5) ).toDS - .withMetadata("name", new MetadataBuilder().putString("description", "name of the person").build()) + .withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the person").build())) .as[Person] val ds2 = Seq( @@ -654,7 +655,7 @@ class DatasetComparerTest extends AnyFreeSpec with DatasetComparer with SparkSes Person("li", 49), Person("alice", 5) ).toDS - .withMetadata("name", new MetadataBuilder().putString("description", "name of the individual").build()) + .withColumn("name", col("name").as("name", new MetadataBuilder().putString("description", "name of the individual").build())) .as[Person] intercept[DatasetSchemaMismatch] {