Skip to content
This repository has been archived by the owner on Jun 14, 2024. It is now read-only.

Commit

Permalink
Add assert
Browse files Browse the repository at this point in the history
  • Loading branch information
sezruby committed Jul 21, 2021
1 parent 421cbc0 commit deed490
Show file tree
Hide file tree
Showing 11 changed files with 377 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ object FilterColumnFilter extends QueryPlanIndexFilter {
index,
FilterReasons.NoFirstIndexedColCond(
index.derivedDataset.indexedColumns.head,
filterColumnNames.mkString(", "))) {
filterColumnNames.mkString(","))) {
ResolverUtils
.resolve(spark, index.derivedDataset.indexedColumns.head, filterColumnNames)
.isDefined
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -431,17 +431,17 @@ object JoinColumnFilter extends QueryPlanIndexFilter {
idx,
FilterReasons.NotAllJoinColIndexed(
leftOrRight,
requiredIndexCols.mkString(", "),
idx.indexedColumns.mkString(", "))) {
requiredIndexCols.mkString(","),
idx.indexedColumns.mkString(","))) {
requiredIndexCols.toSet.equals(idx.indexedColumns.toSet)
} &&
withFilterReasonTag(
plan,
idx,
FilterReasons.MissingIndexedCol(
leftOrRight,
allRequiredCols.mkString(", "),
idx.indexedColumns.mkString(", "))) {
allRequiredCols.mkString(","),
idx.indexedColumns.mkString(","))) {
allRequiredCols.forall(allCols.contains)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ object CandidateIndexAnalyzer extends Logging {
}
}
}
.sortBy(r => (r._1, r._3))
.sortBy(r => (r._1, r._2, r._3, r._4))
.distinct

if (res.isEmpty) {
Expand Down Expand Up @@ -186,12 +186,14 @@ object CandidateIndexAnalyzer extends Logging {
matchStr.group(1)
}
.toSeq
.distinct
.sorted
printIndexNames(appliedIndexNames)

stringBuilder.append("Applicable indexes, but not applied due to priority:")
stringBuilder.append(newLine)
val applicableButNotAppliedIndexNames =
applicableIndexes.map(_._1.name).distinct.filterNot(appliedIndexNames.contains(_))
applicableIndexes.map(_._1.name).distinct.sorted.filterNot(appliedIndexNames.contains(_))
printIndexNames(applicableButNotAppliedIndexNames)

// Covert reasons to Dataframe rows.
Expand All @@ -214,7 +216,7 @@ object CandidateIndexAnalyzer extends Logging {
}
}
}
.sortBy(r => (r._1, r._3))
.sortBy(r => (r._1, r._2, r._3, r._4, r._5))
.distinct

import spark.implicits._
Expand All @@ -233,6 +235,7 @@ object CandidateIndexAnalyzer extends Logging {
.filter(row => row._4.equals("SOURCE_DATA_CHANGE"))
.map(_._2)
.distinct
.sorted
.filterNot(appliedIndexNames.contains(_))
.filterNot(applicableButNotAppliedIndexNames.contains(_))
printIndexNames(indexNamesForOutdated)
Expand All @@ -245,6 +248,7 @@ object CandidateIndexAnalyzer extends Logging {
row._4.equals("COL_SCHEMA_MISMATCH") || row._4.equals("SOURCE_DATA_CHANGE"))
.map(_._2)
.distinct
.sorted
.filterNot(appliedIndexNames.contains(_))
.filterNot(applicableButNotAppliedIndexNames.contains(_))
printIndexNames(indexNamesForNoApplicablePlan)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ object ColumnSchemaFilter extends SourcePlanIndexFilter {
plan,
index,
FilterReasons.ColSchemaMismatch(
relationColumnNames.mkString(", "),
index.derivedDataset.referencedColumns.mkString(", "))) {
relationColumnNames.mkString(","),
index.derivedDataset.referencedColumns.mkString(","))) {
ResolverUtils
.resolve(spark, index.derivedDataset.referencedColumns, relationColumnNames)
.isDefined
Expand Down
63 changes: 63 additions & 0 deletions src/test/resources/expected/spark-2.4/whyNot_allIndex.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@

=============================================================
Plan with Hyperspace & Summary:
=============================================================
Join Inner, (c3# = c3#)
:- Project [c4#, c3#]
: +- Filter ((isnotnull(c4#) && (c4# = 2)) && isnotnull(c3#))
: +- Relation[c3#,c4#] Hyperspace(Type: CI, Name: leftDfFilterIndex, LogVersion: 1)
+- Project [c5#, c3#]
+- Filter ((isnotnull(c5#) && (c5# = 3000)) && isnotnull(c3#))
+- Relation[c3#,c5#] Hyperspace(Type: CI, Name: rightDfFilterIndex, LogVersion: 1)

Applied indexes:
- leftDfFilterIndex
- rightDfFilterIndex

Applicable indexes, but not applied due to priority:
- leftDfJoinIndex
- rightDfJoinIndex

Non-applicable indexes - index is outdated:
- No such index found.

Non-applicable indexes - no applicable query plan:
- No such index found.

For more information, please visit: https://microsoft.github.io/hyperspace/docs/why-not-result-analysis

=============================================================
Plan without Hyperspace & WhyNot reasons:
=============================================================
00 Join Inner, (c3# = c3#)
01 :- Project [c4#, c3#]
02 : +- Filter ((isnotnull(c4#) && (c4# = 2)) && isnotnull(c3#))
03 : +- Relation[c1#,c2#,c3#,c4#,c5#] parquet
04 +- Project [c5#, c3#]
05 +- Filter ((isnotnull(c5#) && (c5# = 3000)) && isnotnull(c3#))
06 +- Relation[c1#,c2#,c3#,c4#,c5#] parquet

+-----------+------------------+---------+-------------------------+------------------------------------------------------------+
|SubPlan |IndexName |IndexType|Reason |Message |
+-----------+------------------+---------+-------------------------+------------------------------------------------------------+
|Filter @ 2 |leftDfFilterIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c4,c3] |
|Filter @ 2 |leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] |
|Filter @ 2 |rightDfFilterIndex|CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c5], filterCols=[c4,c3] |
|Filter @ 2 |rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c5] |
|Filter @ 5 |leftDfFilterIndex |CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c4], filterCols=[c5,c3] |
|Filter @ 5 |leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] |
|Filter @ 5 |rightDfFilterIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c5,c3] |
|Filter @ 5 |rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c5] |
|Join @ 0 |leftDfFilterIndex |CI |NOT_ALL_JOIN_COL_INDEXED |child=[left], joinCols=[c3], indexedCols=[c4] |
|Join @ 0 |leftDfFilterIndex |CI |NOT_ALL_JOIN_COL_INDEXED |child=[right], joinCols=[c3], indexedCols=[c4] |
|Join @ 0 |leftDfJoinIndex |CI |MISSING_INDEXED_COL |child=[right], requiredIndexedCols=[c5,c3], IndexedCols=[c3]|
|Join @ 0 |rightDfFilterIndex|CI |NOT_ALL_JOIN_COL_INDEXED |child=[left], joinCols=[c3], indexedCols=[c5] |
|Join @ 0 |rightDfFilterIndex|CI |NOT_ALL_JOIN_COL_INDEXED |child=[right], joinCols=[c3], indexedCols=[c5] |
|Join @ 0 |rightDfJoinIndex |CI |MISSING_INDEXED_COL |child=[left], requiredIndexedCols=[c4,c3], IndexedCols=[c3] |
|Project @ 1|leftDfJoinIndex |CI |ANOTHER_INDEX_APPLIED |appliedIndex=[leftDfFilterIndex] |
|Project @ 1|rightDfFilterIndex|CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c5], filterCols=[c4,c3] |
|Project @ 1|rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c4,c3], indexCols=[c3,c5] |
|Project @ 4|leftDfFilterIndex |CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c4], filterCols=[c5,c3] |
|Project @ 4|leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c5,c3], indexCols=[c3,c4] |
|Project @ 4|rightDfJoinIndex |CI |ANOTHER_INDEX_APPLIED |appliedIndex=[rightDfFilterIndex] |
+-----------+------------------+---------+-------------------------+------------------------------------------------------------+
48 changes: 48 additions & 0 deletions src/test/resources/expected/spark-2.4/whyNot_indexName.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@

=============================================================
Plan with Hyperspace & Summary:
=============================================================
Join Inner, (c3# = c3#)
:- Project [c4#, c3#]
: +- Filter ((isnotnull(c4#) && (c4# = 2)) && isnotnull(c3#))
: +- Relation[c3#,c4#] Hyperspace(Type: CI, Name: leftDfFilterIndex, LogVersion: 1)
+- Project [c5#, c3#]
+- Filter ((isnotnull(c5#) && (c5# = 3000)) && isnotnull(c3#))
+- Relation[c3#,c5#] Hyperspace(Type: CI, Name: rightDfFilterIndex, LogVersion: 1)

Applied indexes:
- leftDfFilterIndex
- rightDfFilterIndex

Applicable indexes, but not applied due to priority:
- leftDfJoinIndex
- rightDfJoinIndex

Non-applicable indexes - index is outdated:
- No such index found.

Non-applicable indexes - no applicable query plan:
- No such index found.

For more information, please visit: https://microsoft.github.io/hyperspace/docs/why-not-result-analysis

=============================================================
Plan without Hyperspace & WhyNot reasons:
=============================================================
00 Join Inner, (c3# = c3#)
01 :- Project [c4#, c3#]
02 : +- Filter ((isnotnull(c4#) && (c4# = 2)) && isnotnull(c3#))
03 : +- Relation[c1#,c2#,c3#,c4#,c5#] parquet
04 +- Project [c5#, c3#]
05 +- Filter ((isnotnull(c5#) && (c5# = 3000)) && isnotnull(c3#))
06 +- Relation[c1#,c2#,c3#,c4#,c5#] parquet

+-----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+
|SubPlan |IndexName |IndexType|Reason |Message |VerboseMessage |
+-----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+
|Filter @ 2 |leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c3,c4,c5,c2,c1], Index columns: [c3,c4] |
|Filter @ 5 |leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c3,c4,c5,c2,c1], Index columns: [c3,c4] |
|Join @ 0 |leftDfJoinIndex|CI |MISSING_INDEXED_COL |child=[right], requiredIndexedCols=[c5,c3], IndexedCols=[c3]|Index does not contain required columns for right subplan. Required indexed columns: [c5,c3], Indexed columns: [c3]|
|Project @ 1|leftDfJoinIndex|CI |ANOTHER_INDEX_APPLIED|appliedIndex=[leftDfFilterIndex] |Another candidate index is applied: leftDfFilterIndex |
|Project @ 4|leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c5,c3], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c5,c3], Index columns: [c3,c4] |
+-----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+
63 changes: 63 additions & 0 deletions src/test/resources/expected/spark-3.0/whyNot_allIndex.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@

=============================================================
Plan with Hyperspace & Summary:
=============================================================
Join Inner, (c3# = c3#)
:- Project [c4#, c3#]
: +- Filter ((isnotnull(c4#) AND (c4# = 2)) AND isnotnull(c3#))
: +- Relation[c3#,c4#] Hyperspace(Type: CI, Name: leftDfFilterIndex, LogVersion: 1)
+- Project [c5#, c3#]
+- Filter ((isnotnull(c5#) AND (c5# = 3000)) AND isnotnull(c3#))
+- Relation[c3#,c5#] Hyperspace(Type: CI, Name: rightDfFilterIndex, LogVersion: 1)

Applied indexes:
- leftDfFilterIndex
- rightDfFilterIndex

Applicable indexes, but not applied due to priority:
- leftDfJoinIndex
- rightDfJoinIndex

Non-applicable indexes - index is outdated:
- No such index found.

Non-applicable indexes - no applicable query plan:
- No such index found.

For more information, please visit: https://microsoft.github.io/hyperspace/docs/why-not-result-analysis

=============================================================
Plan without Hyperspace & WhyNot reasons:
=============================================================
00 Join Inner, (c3# = c3#)
01 :- Project [c4#, c3#]
02 : +- Filter ((isnotnull(c4#) AND (c4# = 2)) AND isnotnull(c3#))
03 : +- Relation[c1#,c2#,c3#,c4#,c5#] parquet
04 +- Project [c5#, c3#]
05 +- Filter ((isnotnull(c5#) AND (c5# = 3000)) AND isnotnull(c3#))
06 +- Relation[c1#,c2#,c3#,c4#,c5#] parquet

+-----------+------------------+---------+-------------------------+------------------------------------------------------------+
|SubPlan |IndexName |IndexType|Reason |Message |
+-----------+------------------+---------+-------------------------+------------------------------------------------------------+
|Filter @ 2 |leftDfFilterIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c4,c3] |
|Filter @ 2 |leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] |
|Filter @ 2 |rightDfFilterIndex|CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c5], filterCols=[c4,c3] |
|Filter @ 2 |rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c5] |
|Filter @ 5 |leftDfFilterIndex |CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c4], filterCols=[c5, c3] |
|Filter @ 5 |leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] |
|Filter @ 5 |rightDfFilterIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c5,c3] |
|Filter @ 5 |rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c5] |
|Join @ 0 |leftDfFilterIndex |CI |NOT_ALL_JOIN_COL_INDEXED |child=[left], joinCols=[c3], indexedCols=[c4] |
|Join @ 0 |leftDfFilterIndex |CI |NOT_ALL_JOIN_COL_INDEXED |child=[right], joinCols=[c3], indexedCols=[c4] |
|Join @ 0 |leftDfJoinIndex |CI |MISSING_INDEXED_COL |child=[right], requiredIndexedCols=[c5,c3], IndexedCols=[c3]|
|Join @ 0 |rightDfFilterIndex|CI |NOT_ALL_JOIN_COL_INDEXED |child=[left], joinCols=[c3], indexedCols=[c5] |
|Join @ 0 |rightDfFilterIndex|CI |NOT_ALL_JOIN_COL_INDEXED |child=[right], joinCols=[c3], indexedCols=[c5] |
|Join @ 0 |rightDfJoinIndex |CI |MISSING_INDEXED_COL |child=[left], requiredIndexedCols=[c4,c3], IndexedCols=[c3] |
|Project @ 1|leftDfJoinIndex |CI |ANOTHER_INDEX_APPLIED |appliedIndex=[leftDfFilterIndex] |
|Project @ 1|rightDfFilterIndex|CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c5], filterCols=[c4,c3] |
|Project @ 1|rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c4,c3], indexCols=[c3,c5] |
|Project @ 4|leftDfFilterIndex |CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c4], filterCols=[c5,c3] |
|Project @ 4|leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c5,c3], indexCols=[c3,c4] |
|Project @ 4|rightDfJoinIndex |CI |ANOTHER_INDEX_APPLIED |appliedIndex=[rightDfFilterIndex] |
+-----------+------------------+---------+-------------------------+------------------------------------------------------------+
48 changes: 48 additions & 0 deletions src/test/resources/expected/spark-3.0/whyNot_indexName.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@

=============================================================
Plan with Hyperspace & Summary:
=============================================================
Join Inner, (c3# = c3#)
:- Project [c4#, c3#]
: +- Filter ((isnotnull(c4#) AND (c4# = 2)) AND isnotnull(c3#))
: +- Relation[c3#,c4#] Hyperspace(Type: CI, Name: leftDfFilterIndex, LogVersion: 1)
+- Project [c5#, c3#]
+- Filter ((isnotnull(c5#) AND (c5# = 3000)) AND isnotnull(c3#))
+- Relation[c3#,c5#] Hyperspace(Type: CI, Name: rightDfFilterIndex, LogVersion: 1)

Applied indexes:
- leftDfFilterIndex
- rightDfFilterIndex

Applicable indexes, but not applied due to priority:
- leftDfJoinIndex
- rightDfJoinIndex

Non-applicable indexes - index is outdated:
- No such index found.

Non-applicable indexes - no applicable query plan:
- No such index found.

For more information, please visit: https://microsoft.github.io/hyperspace/docs/why-not-result-analysis

=============================================================
Plan without Hyperspace & WhyNot reasons:
=============================================================
00 Join Inner, (c3# = c3#)
01 :- Project [c4#, c3#]
02 : +- Filter ((isnotnull(c4#) AND (c4# = 2)) AND isnotnull(c3#))
03 : +- Relation[c1#,c2#,c3#,c4#,c5#] parquet
04 +- Project [c5#, c3#]
05 +- Filter ((isnotnull(c5#) AND (c5# = 3000)) AND isnotnull(c3#))
06 +- Relation[c1#,c2#,c3#,c4#,c5#] parquet

+-----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+
|SubPlan |IndexName |IndexType|Reason |Message |VerboseMessage |
+-----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+
|Filter @ 2 |leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c3,c4,c5,c2,c1], Index columns: [c3,c4] |
|Filter @ 5 |leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c3,c4,c5,c2,c1], Index columns: [c3,c4] |
|Join @ 0 |leftDfJoinIndex|CI |MISSING_INDEXED_COL |child=[right], requiredIndexedCols=[c5,c3], IndexedCols=[c3]|Index does not contain required columns for right subplan. Required indexed columns: [c5,c3], Indexed columns: [c3]|
|Project @ 1|leftDfJoinIndex|CI |ANOTHER_INDEX_APPLIED|appliedIndex=[leftDfFilterIndex] |Another candidate index is applied: leftDfFilterIndex |
|Project @ 4|leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c5,c3], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c5,c3], Index columns: [c3,c4] |
+-----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+
Loading

0 comments on commit deed490

Please sign in to comment.