diff --git a/src/main/scala/com/microsoft/hyperspace/index/plananalysis/CandidateIndexAnalyzer.scala b/src/main/scala/com/microsoft/hyperspace/index/plananalysis/CandidateIndexAnalyzer.scala index 16dd3e641..13b8f7fb8 100644 --- a/src/main/scala/com/microsoft/hyperspace/index/plananalysis/CandidateIndexAnalyzer.scala +++ b/src/main/scala/com/microsoft/hyperspace/index/plananalysis/CandidateIndexAnalyzer.scala @@ -95,7 +95,7 @@ object CandidateIndexAnalyzer extends Logging { } else { -1 } - s"${plan.nodeName} @ $startLineNumber" + s"${plan.nodeName} @$startLineNumber" } private def generateApplicableIndexInfoString( diff --git a/src/test/resources/expected/spark-2.4/selfJoin.txt b/src/test/resources/expected/spark-2.4/selfJoin.txt index 79a05ecec..ad26b676d 100644 --- a/src/test/resources/expected/spark-2.4/selfJoin.txt +++ b/src/test/resources/expected/spark-2.4/selfJoin.txt @@ -55,11 +55,11 @@ Plan without Hyperspace: 03 +- Filter isnotnull(Col1#) 04 +- Relation[Col1#,Col2#] parquet -+----------+---------+---------+---------------+ -|SubPlan |IndexName|IndexType|RuleName | -+----------+---------+---------+---------------+ -|Filter @ 1|joinIndex|CI |FilterIndexRule| -|Filter @ 3|joinIndex|CI |FilterIndexRule| -|Join @ 0 |joinIndex|CI |JoinIndexRule | -+----------+---------+---------+---------------+ ++---------+---------+---------+---------------+ +|SubPlan |IndexName|IndexType|RuleName | ++---------+---------+---------+---------------+ +|Filter @1|joinIndex|CI |FilterIndexRule| +|Filter @3|joinIndex|CI |FilterIndexRule| +|Join @0 |joinIndex|CI |JoinIndexRule | ++---------+---------+---------+---------------+ diff --git a/src/test/resources/expected/spark-2.4/selfJoin_Iceberg.txt b/src/test/resources/expected/spark-2.4/selfJoin_Iceberg.txt index 4b1ed314b..8939d2e1c 100644 --- a/src/test/resources/expected/spark-2.4/selfJoin_Iceberg.txt +++ b/src/test/resources/expected/spark-2.4/selfJoin_Iceberg.txt @@ -54,11 +54,11 @@ Plan without Hyperspace: 03 +- Filter isnotnull(Col1#) 04 +- RelationV2 iceberg[Col1#, Col2#] (Options: $icebergOptions) -+----------+---------+---------+---------------+ -|SubPlan |IndexName|IndexType|RuleName | -+----------+---------+---------+---------------+ -|Filter @ 1|joinIndex|CI |FilterIndexRule| -|Filter @ 3|joinIndex|CI |FilterIndexRule| -|Join @ 0 |joinIndex|CI |JoinIndexRule | -+----------+---------+---------+---------------+ ++---------+---------+---------+---------------+ +|SubPlan |IndexName|IndexType|RuleName | ++---------+---------+---------+---------------+ +|Filter @1|joinIndex|CI |FilterIndexRule| +|Filter @3|joinIndex|CI |FilterIndexRule| +|Join @0 |joinIndex|CI |JoinIndexRule | ++---------+---------+---------+---------------+ diff --git a/src/test/resources/expected/spark-2.4/whyNot_allIndex.txt b/src/test/resources/expected/spark-2.4/whyNot_allIndex.txt index 7276b63e3..67b2e840e 100644 --- a/src/test/resources/expected/spark-2.4/whyNot_allIndex.txt +++ b/src/test/resources/expected/spark-2.4/whyNot_allIndex.txt @@ -37,27 +37,27 @@ Plan without Hyperspace & WhyNot reasons: 05 +- Filter ((isnotnull(c5#) && (c5# = 3000)) && isnotnull(c3#)) 06 +- Relation[c1#,c2#,c3#,c4#,c5#] parquet -+-----------+------------------+---------+-------------------------+------------------------------------------------------------+ -|SubPlan |IndexName |IndexType|Reason |Message | -+-----------+------------------+---------+-------------------------+------------------------------------------------------------+ -|Filter @ 2 |leftDfFilterIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c4,c3] | -|Filter @ 2 |leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] | -|Filter @ 2 |rightDfFilterIndex|CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c5], filterCols=[c4,c3] | -|Filter @ 2 |rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c5] | -|Filter @ 5 |leftDfFilterIndex |CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c4], filterCols=[c5,c3] | -|Filter @ 5 |leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] | -|Filter @ 5 |rightDfFilterIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c5,c3] | -|Filter @ 5 |rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c5] | -|Join @ 0 |leftDfFilterIndex |CI |NOT_ALL_JOIN_COL_INDEXED |child=[left], joinCols=[c3], indexedCols=[c4] | -|Join @ 0 |leftDfFilterIndex |CI |NOT_ALL_JOIN_COL_INDEXED |child=[right], joinCols=[c3], indexedCols=[c4] | -|Join @ 0 |leftDfJoinIndex |CI |MISSING_INDEXED_COL |child=[right], requiredIndexedCols=[c5,c3], indexedCols=[c3]| -|Join @ 0 |rightDfFilterIndex|CI |NOT_ALL_JOIN_COL_INDEXED |child=[left], joinCols=[c3], indexedCols=[c5] | -|Join @ 0 |rightDfFilterIndex|CI |NOT_ALL_JOIN_COL_INDEXED |child=[right], joinCols=[c3], indexedCols=[c5] | -|Join @ 0 |rightDfJoinIndex |CI |MISSING_INDEXED_COL |child=[left], requiredIndexedCols=[c4,c3], indexedCols=[c3] | -|Project @ 1|leftDfJoinIndex |CI |ANOTHER_INDEX_APPLIED |appliedIndex=[leftDfFilterIndex] | -|Project @ 1|rightDfFilterIndex|CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c5], filterCols=[c4,c3] | -|Project @ 1|rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c4,c3], indexCols=[c3,c5] | -|Project @ 4|leftDfFilterIndex |CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c4], filterCols=[c5,c3] | -|Project @ 4|leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c5,c3], indexCols=[c3,c4] | -|Project @ 4|rightDfJoinIndex |CI |ANOTHER_INDEX_APPLIED |appliedIndex=[rightDfFilterIndex] | -+-----------+------------------+---------+-------------------------+------------------------------------------------------------+ ++----------+------------------+---------+-------------------------+------------------------------------------------------------+ +|SubPlan |IndexName |IndexType|Reason |Message | ++----------+------------------+---------+-------------------------+------------------------------------------------------------+ +|Filter @2 |leftDfFilterIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c4,c3] | +|Filter @2 |leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] | +|Filter @2 |rightDfFilterIndex|CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c5], filterCols=[c4,c3] | +|Filter @2 |rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c5] | +|Filter @5 |leftDfFilterIndex |CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c4], filterCols=[c5,c3] | +|Filter @5 |leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] | +|Filter @5 |rightDfFilterIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c5,c3] | +|Filter @5 |rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c5] | +|Join @0 |leftDfFilterIndex |CI |NOT_ALL_JOIN_COL_INDEXED |child=[left], joinCols=[c3], indexedCols=[c4] | +|Join @0 |leftDfFilterIndex |CI |NOT_ALL_JOIN_COL_INDEXED |child=[right], joinCols=[c3], indexedCols=[c4] | +|Join @0 |leftDfJoinIndex |CI |MISSING_INDEXED_COL |child=[right], requiredIndexedCols=[c5,c3], indexedCols=[c3]| +|Join @0 |rightDfFilterIndex|CI |NOT_ALL_JOIN_COL_INDEXED |child=[left], joinCols=[c3], indexedCols=[c5] | +|Join @0 |rightDfFilterIndex|CI |NOT_ALL_JOIN_COL_INDEXED |child=[right], joinCols=[c3], indexedCols=[c5] | +|Join @0 |rightDfJoinIndex |CI |MISSING_INDEXED_COL |child=[left], requiredIndexedCols=[c4,c3], indexedCols=[c3] | +|Project @1|leftDfJoinIndex |CI |ANOTHER_INDEX_APPLIED |appliedIndex=[leftDfFilterIndex] | +|Project @1|rightDfFilterIndex|CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c5], filterCols=[c4,c3] | +|Project @1|rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c4,c3], indexCols=[c3,c5] | +|Project @4|leftDfFilterIndex |CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c4], filterCols=[c5,c3] | +|Project @4|leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c5,c3], indexCols=[c3,c4] | +|Project @4|rightDfJoinIndex |CI |ANOTHER_INDEX_APPLIED |appliedIndex=[rightDfFilterIndex] | ++----------+------------------+---------+-------------------------+------------------------------------------------------------+ diff --git a/src/test/resources/expected/spark-2.4/whyNot_indexName.txt b/src/test/resources/expected/spark-2.4/whyNot_indexName.txt index cd76c3da2..2c6832076 100644 --- a/src/test/resources/expected/spark-2.4/whyNot_indexName.txt +++ b/src/test/resources/expected/spark-2.4/whyNot_indexName.txt @@ -37,12 +37,12 @@ Plan without Hyperspace & WhyNot reasons: 05 +- Filter ((isnotnull(c5#) && (c5# = 3000)) && isnotnull(c3#)) 06 +- Relation[c1#,c2#,c3#,c4#,c5#] parquet -+-----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ -|SubPlan |IndexName |IndexType|Reason |Message |VerboseMessage | -+-----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ -|Filter @ 2 |leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c3,c4,c5,c2,c1], Index columns: [c3,c4] | -|Filter @ 5 |leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c3,c4,c5,c2,c1], Index columns: [c3,c4] | -|Join @ 0 |leftDfJoinIndex|CI |MISSING_INDEXED_COL |child=[right], requiredIndexedCols=[c5,c3], indexedCols=[c3]|Index does not contain required columns for right subplan. Required indexed columns: [c5,c3], Indexed columns: [c3]| -|Project @ 1|leftDfJoinIndex|CI |ANOTHER_INDEX_APPLIED|appliedIndex=[leftDfFilterIndex] |Another candidate index is applied: leftDfFilterIndex | -|Project @ 4|leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c5,c3], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c5,c3], Index columns: [c3,c4] | -+-----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ ++----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +|SubPlan |IndexName |IndexType|Reason |Message |VerboseMessage | ++----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +|Filter @2 |leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c3,c4,c5,c2,c1], Index columns: [c3,c4] | +|Filter @5 |leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c3,c4,c5,c2,c1], Index columns: [c3,c4] | +|Join @0 |leftDfJoinIndex|CI |MISSING_INDEXED_COL |child=[right], requiredIndexedCols=[c5,c3], indexedCols=[c3]|Index does not contain required columns for right subplan. Required indexed columns: [c5,c3], Indexed columns: [c3]| +|Project @1|leftDfJoinIndex|CI |ANOTHER_INDEX_APPLIED|appliedIndex=[leftDfFilterIndex] |Another candidate index is applied: leftDfFilterIndex | +|Project @4|leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c5,c3], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c5,c3], Index columns: [c3,c4] | ++----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ diff --git a/src/test/resources/expected/spark-3.0/selfJoin.txt b/src/test/resources/expected/spark-3.0/selfJoin.txt index 68b21c0d1..319d014f7 100644 --- a/src/test/resources/expected/spark-3.0/selfJoin.txt +++ b/src/test/resources/expected/spark-3.0/selfJoin.txt @@ -63,11 +63,11 @@ Plan without Hyperspace: 03 +- Filter isnotnull(Col1#) 04 +- Relation[Col1#,Col2#] parquet -+----------+---------+---------+---------------+ -|SubPlan |IndexName|IndexType|RuleName | -+----------+---------+---------+---------------+ -|Filter @ 1|joinIndex|CI |FilterIndexRule| -|Filter @ 3|joinIndex|CI |FilterIndexRule| -|Join @ 0 |joinIndex|CI |JoinIndexRule | -+----------+---------+---------+---------------+ ++---------+---------+---------+---------------+ +|SubPlan |IndexName|IndexType|RuleName | ++---------+---------+---------+---------------+ +|Filter @1|joinIndex|CI |FilterIndexRule| +|Filter @3|joinIndex|CI |FilterIndexRule| +|Join @0 |joinIndex|CI |JoinIndexRule | ++---------+---------+---------+---------------+ diff --git a/src/test/resources/expected/spark-3.0/selfJoin_Iceberg.txt b/src/test/resources/expected/spark-3.0/selfJoin_Iceberg.txt index fee7af990..bab01a0f9 100644 --- a/src/test/resources/expected/spark-3.0/selfJoin_Iceberg.txt +++ b/src/test/resources/expected/spark-3.0/selfJoin_Iceberg.txt @@ -59,11 +59,11 @@ Plan without Hyperspace: 03 +- Filter isnotnull(Col1#) 04 +- RelationV2[Col1#, Col2#] $icebergPath -+----------+---------+---------+---------------+ -|SubPlan |IndexName|IndexType|RuleName | -+----------+---------+---------+---------------+ -|Filter @ 1|joinIndex|CI |FilterIndexRule| -|Filter @ 3|joinIndex|CI |FilterIndexRule| -|Join @ 0 |joinIndex|CI |JoinIndexRule | -+----------+---------+---------+---------------+ ++---------+---------+---------+---------------+ +|SubPlan |IndexName|IndexType|RuleName | ++---------+---------+---------+---------------+ +|Filter @1|joinIndex|CI |FilterIndexRule| +|Filter @3|joinIndex|CI |FilterIndexRule| +|Join @0 |joinIndex|CI |JoinIndexRule | ++---------+---------+---------+---------------+ diff --git a/src/test/resources/expected/spark-3.0/whyNot_allIndex.txt b/src/test/resources/expected/spark-3.0/whyNot_allIndex.txt index 5b7315196..1926c9d7c 100644 --- a/src/test/resources/expected/spark-3.0/whyNot_allIndex.txt +++ b/src/test/resources/expected/spark-3.0/whyNot_allIndex.txt @@ -37,27 +37,27 @@ Plan without Hyperspace & WhyNot reasons: 05 +- Filter ((isnotnull(c5#) AND (c5# = 3000)) AND isnotnull(c3#)) 06 +- Relation[c1#,c2#,c3#,c4#,c5#] parquet -+-----------+------------------+---------+-------------------------+------------------------------------------------------------+ -|SubPlan |IndexName |IndexType|Reason |Message | -+-----------+------------------+---------+-------------------------+------------------------------------------------------------+ -|Filter @ 2 |leftDfFilterIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c4,c3] | -|Filter @ 2 |leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] | -|Filter @ 2 |rightDfFilterIndex|CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c5], filterCols=[c4,c3] | -|Filter @ 2 |rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c5] | -|Filter @ 5 |leftDfFilterIndex |CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c4], filterCols=[c5,c3] | -|Filter @ 5 |leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] | -|Filter @ 5 |rightDfFilterIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c5,c3] | -|Filter @ 5 |rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c5] | -|Join @ 0 |leftDfFilterIndex |CI |NOT_ALL_JOIN_COL_INDEXED |child=[left], joinCols=[c3], indexedCols=[c4] | -|Join @ 0 |leftDfFilterIndex |CI |NOT_ALL_JOIN_COL_INDEXED |child=[right], joinCols=[c3], indexedCols=[c4] | -|Join @ 0 |leftDfJoinIndex |CI |MISSING_INDEXED_COL |child=[right], requiredIndexedCols=[c5,c3], indexedCols=[c3]| -|Join @ 0 |rightDfFilterIndex|CI |NOT_ALL_JOIN_COL_INDEXED |child=[left], joinCols=[c3], indexedCols=[c5] | -|Join @ 0 |rightDfFilterIndex|CI |NOT_ALL_JOIN_COL_INDEXED |child=[right], joinCols=[c3], indexedCols=[c5] | -|Join @ 0 |rightDfJoinIndex |CI |MISSING_INDEXED_COL |child=[left], requiredIndexedCols=[c4,c3], indexedCols=[c3] | -|Project @ 1|leftDfJoinIndex |CI |ANOTHER_INDEX_APPLIED |appliedIndex=[leftDfFilterIndex] | -|Project @ 1|rightDfFilterIndex|CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c5], filterCols=[c4,c3] | -|Project @ 1|rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c4,c3], indexCols=[c3,c5] | -|Project @ 4|leftDfFilterIndex |CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c4], filterCols=[c5,c3] | -|Project @ 4|leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c5,c3], indexCols=[c3,c4] | -|Project @ 4|rightDfJoinIndex |CI |ANOTHER_INDEX_APPLIED |appliedIndex=[rightDfFilterIndex] | -+-----------+------------------+---------+-------------------------+------------------------------------------------------------+ ++----------+------------------+---------+-------------------------+------------------------------------------------------------+ +|SubPlan |IndexName |IndexType|Reason |Message | ++----------+------------------+---------+-------------------------+------------------------------------------------------------+ +|Filter @2 |leftDfFilterIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c4,c3] | +|Filter @2 |leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] | +|Filter @2 |rightDfFilterIndex|CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c5], filterCols=[c4,c3] | +|Filter @2 |rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c5] | +|Filter @5 |leftDfFilterIndex |CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c4], filterCols=[c5,c3] | +|Filter @5 |leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] | +|Filter @5 |rightDfFilterIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c5,c3] | +|Filter @5 |rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c5] | +|Join @0 |leftDfFilterIndex |CI |NOT_ALL_JOIN_COL_INDEXED |child=[left], joinCols=[c3], indexedCols=[c4] | +|Join @0 |leftDfFilterIndex |CI |NOT_ALL_JOIN_COL_INDEXED |child=[right], joinCols=[c3], indexedCols=[c4] | +|Join @0 |leftDfJoinIndex |CI |MISSING_INDEXED_COL |child=[right], requiredIndexedCols=[c5,c3], indexedCols=[c3]| +|Join @0 |rightDfFilterIndex|CI |NOT_ALL_JOIN_COL_INDEXED |child=[left], joinCols=[c3], indexedCols=[c5] | +|Join @0 |rightDfFilterIndex|CI |NOT_ALL_JOIN_COL_INDEXED |child=[right], joinCols=[c3], indexedCols=[c5] | +|Join @0 |rightDfJoinIndex |CI |MISSING_INDEXED_COL |child=[left], requiredIndexedCols=[c4,c3], indexedCols=[c3] | +|Project @1|leftDfJoinIndex |CI |ANOTHER_INDEX_APPLIED |appliedIndex=[leftDfFilterIndex] | +|Project @1|rightDfFilterIndex|CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c5], filterCols=[c4,c3] | +|Project @1|rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c4,c3], indexCols=[c3,c5] | +|Project @4|leftDfFilterIndex |CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c4], filterCols=[c5,c3] | +|Project @4|leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c5,c3], indexCols=[c3,c4] | +|Project @4|rightDfJoinIndex |CI |ANOTHER_INDEX_APPLIED |appliedIndex=[rightDfFilterIndex] | ++----------+------------------+---------+-------------------------+------------------------------------------------------------+ diff --git a/src/test/resources/expected/spark-3.0/whyNot_indexName.txt b/src/test/resources/expected/spark-3.0/whyNot_indexName.txt index bfa04ff7b..325a43fa5 100644 --- a/src/test/resources/expected/spark-3.0/whyNot_indexName.txt +++ b/src/test/resources/expected/spark-3.0/whyNot_indexName.txt @@ -37,12 +37,12 @@ Plan without Hyperspace & WhyNot reasons: 05 +- Filter ((isnotnull(c5#) AND (c5# = 3000)) AND isnotnull(c3#)) 06 +- Relation[c1#,c2#,c3#,c4#,c5#] parquet -+-----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ -|SubPlan |IndexName |IndexType|Reason |Message |VerboseMessage | -+-----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ -|Filter @ 2 |leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c3,c4,c5,c2,c1], Index columns: [c3,c4] | -|Filter @ 5 |leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c3,c4,c5,c2,c1], Index columns: [c3,c4] | -|Join @ 0 |leftDfJoinIndex|CI |MISSING_INDEXED_COL |child=[right], requiredIndexedCols=[c5,c3], indexedCols=[c3]|Index does not contain required columns for right subplan. Required indexed columns: [c5,c3], Indexed columns: [c3]| -|Project @ 1|leftDfJoinIndex|CI |ANOTHER_INDEX_APPLIED|appliedIndex=[leftDfFilterIndex] |Another candidate index is applied: leftDfFilterIndex | -|Project @ 4|leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c5,c3], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c5,c3], Index columns: [c3,c4] | -+-----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ ++----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +|SubPlan |IndexName |IndexType|Reason |Message |VerboseMessage | ++----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +|Filter @2 |leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c3,c4,c5,c2,c1], Index columns: [c3,c4] | +|Filter @5 |leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c3,c4,c5,c2,c1], Index columns: [c3,c4] | +|Join @0 |leftDfJoinIndex|CI |MISSING_INDEXED_COL |child=[right], requiredIndexedCols=[c5,c3], indexedCols=[c3]|Index does not contain required columns for right subplan. Required indexed columns: [c5,c3], Indexed columns: [c3]| +|Project @1|leftDfJoinIndex|CI |ANOTHER_INDEX_APPLIED|appliedIndex=[leftDfFilterIndex] |Another candidate index is applied: leftDfFilterIndex | +|Project @4|leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c5,c3], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c5,c3], Index columns: [c3,c4] | ++----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ diff --git a/src/test/resources/expected/spark-3.1/selfJoin.txt b/src/test/resources/expected/spark-3.1/selfJoin.txt index 34f9ca42e..fe95b14eb 100644 --- a/src/test/resources/expected/spark-3.1/selfJoin.txt +++ b/src/test/resources/expected/spark-3.1/selfJoin.txt @@ -59,11 +59,11 @@ Plan without Hyperspace: 03 +- Filter isnotnull(Col1#) 04 +- Relation[Col1#,Col2#] parquet -+----------+---------+---------+---------------+ -|SubPlan |IndexName|IndexType|RuleName | -+----------+---------+---------+---------------+ -|Filter @ 1|joinIndex|CI |FilterIndexRule| -|Filter @ 3|joinIndex|CI |FilterIndexRule| -|Join @ 0 |joinIndex|CI |JoinIndexRule | -+----------+---------+---------+---------------+ ++---------+---------+---------+---------------+ +|SubPlan |IndexName|IndexType|RuleName | ++---------+---------+---------+---------------+ +|Filter @1|joinIndex|CI |FilterIndexRule| +|Filter @3|joinIndex|CI |FilterIndexRule| +|Join @0 |joinIndex|CI |JoinIndexRule | ++---------+---------+---------+---------------+ diff --git a/src/test/resources/expected/spark-3.1/selfJoin_Iceberg.txt b/src/test/resources/expected/spark-3.1/selfJoin_Iceberg.txt index 89caf4da2..771ec23a0 100644 --- a/src/test/resources/expected/spark-3.1/selfJoin_Iceberg.txt +++ b/src/test/resources/expected/spark-3.1/selfJoin_Iceberg.txt @@ -54,11 +54,11 @@ Plan without Hyperspace: 03 +- Filter isnotnull(Col1#) 04 +- RelationV2[Col1#, Col2#] $icebergPath -+----------+---------+---------+---------------+ -|SubPlan |IndexName|IndexType|RuleName | -+----------+---------+---------+---------------+ -|Filter @ 1|joinIndex|CI |FilterIndexRule| -|Filter @ 3|joinIndex|CI |FilterIndexRule| -|Join @ 0 |joinIndex|CI |JoinIndexRule | -+----------+---------+---------+---------------+ ++---------+---------+---------+---------------+ +|SubPlan |IndexName|IndexType|RuleName | ++---------+---------+---------+---------------+ +|Filter @1|joinIndex|CI |FilterIndexRule| +|Filter @3|joinIndex|CI |FilterIndexRule| +|Join @0 |joinIndex|CI |JoinIndexRule | ++---------+---------+---------+---------------+ diff --git a/src/test/resources/expected/spark-3.1/whyNot_allIndex.txt b/src/test/resources/expected/spark-3.1/whyNot_allIndex.txt index 5b7315196..1926c9d7c 100644 --- a/src/test/resources/expected/spark-3.1/whyNot_allIndex.txt +++ b/src/test/resources/expected/spark-3.1/whyNot_allIndex.txt @@ -37,27 +37,27 @@ Plan without Hyperspace & WhyNot reasons: 05 +- Filter ((isnotnull(c5#) AND (c5# = 3000)) AND isnotnull(c3#)) 06 +- Relation[c1#,c2#,c3#,c4#,c5#] parquet -+-----------+------------------+---------+-------------------------+------------------------------------------------------------+ -|SubPlan |IndexName |IndexType|Reason |Message | -+-----------+------------------+---------+-------------------------+------------------------------------------------------------+ -|Filter @ 2 |leftDfFilterIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c4,c3] | -|Filter @ 2 |leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] | -|Filter @ 2 |rightDfFilterIndex|CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c5], filterCols=[c4,c3] | -|Filter @ 2 |rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c5] | -|Filter @ 5 |leftDfFilterIndex |CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c4], filterCols=[c5,c3] | -|Filter @ 5 |leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] | -|Filter @ 5 |rightDfFilterIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c5,c3] | -|Filter @ 5 |rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c5] | -|Join @ 0 |leftDfFilterIndex |CI |NOT_ALL_JOIN_COL_INDEXED |child=[left], joinCols=[c3], indexedCols=[c4] | -|Join @ 0 |leftDfFilterIndex |CI |NOT_ALL_JOIN_COL_INDEXED |child=[right], joinCols=[c3], indexedCols=[c4] | -|Join @ 0 |leftDfJoinIndex |CI |MISSING_INDEXED_COL |child=[right], requiredIndexedCols=[c5,c3], indexedCols=[c3]| -|Join @ 0 |rightDfFilterIndex|CI |NOT_ALL_JOIN_COL_INDEXED |child=[left], joinCols=[c3], indexedCols=[c5] | -|Join @ 0 |rightDfFilterIndex|CI |NOT_ALL_JOIN_COL_INDEXED |child=[right], joinCols=[c3], indexedCols=[c5] | -|Join @ 0 |rightDfJoinIndex |CI |MISSING_INDEXED_COL |child=[left], requiredIndexedCols=[c4,c3], indexedCols=[c3] | -|Project @ 1|leftDfJoinIndex |CI |ANOTHER_INDEX_APPLIED |appliedIndex=[leftDfFilterIndex] | -|Project @ 1|rightDfFilterIndex|CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c5], filterCols=[c4,c3] | -|Project @ 1|rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c4,c3], indexCols=[c3,c5] | -|Project @ 4|leftDfFilterIndex |CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c4], filterCols=[c5,c3] | -|Project @ 4|leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c5,c3], indexCols=[c3,c4] | -|Project @ 4|rightDfJoinIndex |CI |ANOTHER_INDEX_APPLIED |appliedIndex=[rightDfFilterIndex] | -+-----------+------------------+---------+-------------------------+------------------------------------------------------------+ ++----------+------------------+---------+-------------------------+------------------------------------------------------------+ +|SubPlan |IndexName |IndexType|Reason |Message | ++----------+------------------+---------+-------------------------+------------------------------------------------------------+ +|Filter @2 |leftDfFilterIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c4,c3] | +|Filter @2 |leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] | +|Filter @2 |rightDfFilterIndex|CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c5], filterCols=[c4,c3] | +|Filter @2 |rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c5] | +|Filter @5 |leftDfFilterIndex |CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c4], filterCols=[c5,c3] | +|Filter @5 |leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] | +|Filter @5 |rightDfFilterIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c5,c3] | +|Filter @5 |rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c5] | +|Join @0 |leftDfFilterIndex |CI |NOT_ALL_JOIN_COL_INDEXED |child=[left], joinCols=[c3], indexedCols=[c4] | +|Join @0 |leftDfFilterIndex |CI |NOT_ALL_JOIN_COL_INDEXED |child=[right], joinCols=[c3], indexedCols=[c4] | +|Join @0 |leftDfJoinIndex |CI |MISSING_INDEXED_COL |child=[right], requiredIndexedCols=[c5,c3], indexedCols=[c3]| +|Join @0 |rightDfFilterIndex|CI |NOT_ALL_JOIN_COL_INDEXED |child=[left], joinCols=[c3], indexedCols=[c5] | +|Join @0 |rightDfFilterIndex|CI |NOT_ALL_JOIN_COL_INDEXED |child=[right], joinCols=[c3], indexedCols=[c5] | +|Join @0 |rightDfJoinIndex |CI |MISSING_INDEXED_COL |child=[left], requiredIndexedCols=[c4,c3], indexedCols=[c3] | +|Project @1|leftDfJoinIndex |CI |ANOTHER_INDEX_APPLIED |appliedIndex=[leftDfFilterIndex] | +|Project @1|rightDfFilterIndex|CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c5], filterCols=[c4,c3] | +|Project @1|rightDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c4,c3], indexCols=[c3,c5] | +|Project @4|leftDfFilterIndex |CI |NO_FIRST_INDEXED_COL_COND|firstIndexedCol=[c4], filterCols=[c5,c3] | +|Project @4|leftDfJoinIndex |CI |MISSING_REQUIRED_COL |requiredCols=[c5,c3], indexCols=[c3,c4] | +|Project @4|rightDfJoinIndex |CI |ANOTHER_INDEX_APPLIED |appliedIndex=[rightDfFilterIndex] | ++----------+------------------+---------+-------------------------+------------------------------------------------------------+ diff --git a/src/test/resources/expected/spark-3.1/whyNot_indexName.txt b/src/test/resources/expected/spark-3.1/whyNot_indexName.txt index bfa04ff7b..325a43fa5 100644 --- a/src/test/resources/expected/spark-3.1/whyNot_indexName.txt +++ b/src/test/resources/expected/spark-3.1/whyNot_indexName.txt @@ -37,12 +37,12 @@ Plan without Hyperspace & WhyNot reasons: 05 +- Filter ((isnotnull(c5#) AND (c5# = 3000)) AND isnotnull(c3#)) 06 +- Relation[c1#,c2#,c3#,c4#,c5#] parquet -+-----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ -|SubPlan |IndexName |IndexType|Reason |Message |VerboseMessage | -+-----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ -|Filter @ 2 |leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c3,c4,c5,c2,c1], Index columns: [c3,c4] | -|Filter @ 5 |leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c3,c4,c5,c2,c1], Index columns: [c3,c4] | -|Join @ 0 |leftDfJoinIndex|CI |MISSING_INDEXED_COL |child=[right], requiredIndexedCols=[c5,c3], indexedCols=[c3]|Index does not contain required columns for right subplan. Required indexed columns: [c5,c3], Indexed columns: [c3]| -|Project @ 1|leftDfJoinIndex|CI |ANOTHER_INDEX_APPLIED|appliedIndex=[leftDfFilterIndex] |Another candidate index is applied: leftDfFilterIndex | -|Project @ 4|leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c5,c3], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c5,c3], Index columns: [c3,c4] | -+-----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ ++----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +|SubPlan |IndexName |IndexType|Reason |Message |VerboseMessage | ++----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+ +|Filter @2 |leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c3,c4,c5,c2,c1], Index columns: [c3,c4] | +|Filter @5 |leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c3,c4,c5,c2,c1], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c3,c4,c5,c2,c1], Index columns: [c3,c4] | +|Join @0 |leftDfJoinIndex|CI |MISSING_INDEXED_COL |child=[right], requiredIndexedCols=[c5,c3], indexedCols=[c3]|Index does not contain required columns for right subplan. Required indexed columns: [c5,c3], Indexed columns: [c3]| +|Project @1|leftDfJoinIndex|CI |ANOTHER_INDEX_APPLIED|appliedIndex=[leftDfFilterIndex] |Another candidate index is applied: leftDfFilterIndex | +|Project @4|leftDfJoinIndex|CI |MISSING_REQUIRED_COL |requiredCols=[c5,c3], indexCols=[c3,c4] |Index does not contain required columns. Required columns: [c5,c3], Index columns: [c3,c4] | ++----------+---------------+---------+---------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------+