Skip to content

Commit

Permalink
[SPARK-40283][INFRA] Make MiMa check default exclude private object
Browse files Browse the repository at this point in the history
… and bump `previousSparkVersion` to 3.3.0

### What changes were proposed in this pull request?
The main change of this pr as follows:
1. Make MiMa check excludes `private object` by default
2. Bump MiMa's `previousSparkVersion` to 3.3.0
3. Supplement missing `ProblemFilters` to `MimaExcludes`
4. Clean up expired rules and case match
5. Correct the rule added by SPARK-38679 and  SPARK-39506 but misplaced

### Why are the changes needed?
To ensure that MiMa checks cover new APIs added in Spark 3.3.0.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?

Scala 2.12

```
dev/mima -Pscala-2.12
```

Scala 2.13

```
dev/change-scala-version.sh 2.13
dev/mima -Pscala-2.13
```

Closes apache#37741 from LuciferYang/SPARK-40283.

Lead-authored-by: yangjie01 <[email protected]>
Co-authored-by: YangJie <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
  • Loading branch information
LuciferYang authored and dongjoon-hyun committed Sep 2, 2022
1 parent 339d638 commit 28c8073
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 97 deletions.
2 changes: 1 addition & 1 deletion project/MimaBuild.scala
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ object MimaBuild {

def mimaSettings(sparkHome: File, projectRef: ProjectRef): Seq[Setting[_]] = {
val organization = "org.apache.spark"
val previousSparkVersion = "3.2.0"
val previousSparkVersion = "3.3.0"
val project = projectRef.project
val id = "spark-" + project

Expand Down
118 changes: 23 additions & 95 deletions project/MimaExcludes.scala
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ import com.typesafe.tools.mima.core.ProblemFilters._
*/
object MimaExcludes {

// Exclude rules for 3.4.x
lazy val v34excludes = v33excludes ++ Seq(
// Exclude rules for 3.4.x from 3.3.0
lazy val v34excludes = defaultExcludes ++ Seq(
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.recommendation.ALS.checkedCast"),
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.recommendation.ALSModel.checkedCast"),

Expand All @@ -62,48 +62,34 @@ object MimaExcludes {
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.deploy.DeployMessages#RequestExecutors.copy"),
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.deploy.DeployMessages#RequestExecutors.copy$default$2"),
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.deploy.DeployMessages#RequestExecutors.this"),
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.deploy.DeployMessages#RequestExecutors.apply")
)

// Exclude rules for 3.3.x from 3.2.0
lazy val v33excludes = v32excludes ++ Seq(
// [SPARK-35672][CORE][YARN] Pass user classpath entries to executors using config instead of command line
// The followings are necessary for Scala 2.13.
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.CoarseGrainedExecutorBackend#Arguments.*"),
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.executor.CoarseGrainedExecutorBackend#Arguments.*"),
ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.executor.CoarseGrainedExecutorBackend$Arguments$"),
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.deploy.DeployMessages#RequestExecutors.apply"),

// [SPARK-37391][SQL] JdbcConnectionProvider tells if it modifies security context
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.jdbc.JdbcConnectionProvider.modifiesSecurityContext"),
// [SPARK-38679][CORE] Expose the number of partitions in a stage to TaskContext
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.TaskContext.numPartitions"),

// [SPARK-37780][SQL] QueryExecutionListener support SQLConf as constructor parameter
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.util.ExecutionListenerManager.this"),
// [SPARK-37786][SQL] StreamingQueryListener support use SQLConf.get to get corresponding SessionState's SQLConf
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQueryManager.this"),
// [SPARK-38432][SQL] Reactor framework so as JDBC dialect could compile filter by self way
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.sources.Filter.toV2"),
// [SPARK-39506] In terms of 3 layer namespace effort, add currentCatalog, setCurrentCatalog and listCatalogs API to Catalog interface
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.currentCatalog"),
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.setCurrentCatalog"),
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.listCatalogs"),

// [SPARK-37831][CORE] Add task partition id in TaskInfo and Task Metrics
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.status.api.v1.TaskData.this"),
// [SPARK-39704][SQL] Implement createIndex & dropIndex & indexExists in JDBC (H2 dialect)
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.jdbc.JdbcDialect.createIndex"),
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.jdbc.JdbcDialect.dropIndex"),
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.jdbc.JdbcDialect.indexExists"),

// [SPARK-37600][BUILD] Upgrade to Hadoop 3.3.2
ProblemFilters.exclude[MissingClassProblem]("org.apache.hadoop.shaded.net.jpountz.lz4.LZ4Compressor"),
ProblemFilters.exclude[MissingClassProblem]("org.apache.hadoop.shaded.net.jpountz.lz4.LZ4Factory"),
ProblemFilters.exclude[MissingClassProblem]("org.apache.hadoop.shaded.net.jpountz.lz4.LZ4SafeDecompressor"),
// [SPARK-39759][SQL] Implement listIndexes in JDBC (H2 dialect)
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.jdbc.JdbcDialect.listIndexes"),

// [SPARK-37377][SQL] Initial implementation of Storage-Partitioned Join
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.connector.read.partitioning.ClusteredDistribution"),
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.connector.read.partitioning.Distribution"),
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.connector.read.partitioning.Partitioning.*"),
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.connector.read.partitioning.Partitioning.*"),
// [SPARK-38929][SQL] Improve error messages for cast failures in ANSI
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.types.Decimal.fromStringANSI"),
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.types.Decimal.fromStringANSI$default$3"),

// [SPARK-38908][SQL] Provide query context in runtime error of Casting from String to
// Number/Date/Timestamp/Boolean
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.types.Decimal.fromStringANSI")
// [SPARK-36511][MINOR][SQL] Remove ColumnIOUtil
ProblemFilters.exclude[MissingClassProblem]("org.apache.parquet.io.ColumnIOUtil")
)

// Exclude rules for 3.2.x from 3.1.1
lazy val v32excludes = Seq(
// Defulat exclude rules
lazy val defaultExcludes = Seq(
// Spark Internals
ProblemFilters.exclude[Problem]("org.apache.spark.rpc.*"),
ProblemFilters.exclude[Problem]("org.spark-project.jetty.*"),
Expand All @@ -124,73 +110,15 @@ object MimaExcludes {
// Avro source implementation is internal.
ProblemFilters.exclude[Problem]("org.apache.spark.sql.v2.avro.*"),

// [SPARK-34848][CORE] Add duration to TaskMetricDistributions
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.status.api.v1.TaskMetricDistributions.this"),

// [SPARK-34488][CORE] Support task Metrics Distributions and executor Metrics Distributions
// in the REST API call for a specified stage
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.status.api.v1.StageData.this"),

// [SPARK-36173][CORE] Support getting CPU number in TaskContext
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.TaskContext.cpus"),

// [SPARK-38679][CORE] Expose the number of partitions in a stage to TaskContext
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.TaskContext.numPartitions"),

// [SPARK-35896] Include more granular metrics for stateful operators in StreamingQueryProgress
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StateOperatorProgress.this"),

(problem: Problem) => problem match {
case MissingClassProblem(cls) => !cls.fullName.startsWith("org.sparkproject.jpmml") &&
!cls.fullName.startsWith("org.sparkproject.dmg.pmml")
case _ => true
},

// [SPARK-33808][SQL] DataSource V2: Build logical writes in the optimizer
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.connector.write.V1WriteBuilder"),

// [SPARK-33955] Add latest offsets to source progress
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.SourceProgress.this"),

// [SPARK-34862][SQL] Support nested column in ORC vectorized reader
ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getBoolean"),
ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getByte"),
ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getShort"),
ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getInt"),
ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getLong"),
ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getFloat"),
ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getDouble"),
ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getDecimal"),
ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getUTF8String"),
ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getBinary"),
ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getArray"),
ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getMap"),
ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getChild"),

// [SPARK-35135][CORE] Turn WritablePartitionedIterator from trait into a default implementation class
ProblemFilters.exclude[IncompatibleTemplateDefProblem]("org.apache.spark.util.collection.WritablePartitionedIterator"),

// [SPARK-35757][CORE] Add bitwise AND operation and functionality for intersecting bloom filters
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.util.sketch.BloomFilter.intersectInPlace"),

// [SPARK-35276][CORE] Calculate checksum for shuffle data and write as checksum file
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.shuffle.sort.io.LocalDiskShuffleMapOutputWriter.commitAllPartitions"),
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.shuffle.sort.io.LocalDiskSingleSpillMapOutputWriter.transferMapSpillFile"),
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.shuffle.api.ShuffleMapOutputWriter.commitAllPartitions"),
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.shuffle.api.SingleSpillShuffleMapOutputWriter.transferMapSpillFile"),
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.shuffle.api.SingleSpillShuffleMapOutputWriter.transferMapSpillFile"),
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.shuffle.api.ShuffleMapOutputWriter.commitAllPartitions"),

// [SPARK-39506] In terms of 3 layer namespace effort, add currentCatalog, setCurrentCatalog and listCatalogs API to Catalog interface
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.currentCatalog"),
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.setCurrentCatalog"),
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.listCatalogs")
}
)

def excludes(version: String) = version match {
case v if v.startsWith("3.4") => v34excludes
case v if v.startsWith("3.3") => v33excludes
case v if v.startsWith("3.2") => v32excludes
case _ => Seq()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ object GenerateMIMAIgnore {
val directlyPrivateSpark =
isPackagePrivate(classSymbol) ||
isPackagePrivateModule(moduleSymbol) ||
classSymbol.isPrivate
classSymbol.isPrivate ||
moduleSymbol.isPrivate
/* Inner classes defined within a private[spark] class or object are effectively
invisible, so we account for them as package private. */
lazy val indirectlyPrivateSpark = {
Expand Down

0 comments on commit 28c8073

Please sign in to comment.