From 3df44deb847a1e1ed203b96a8179742a5b28d141 Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Sat, 4 Jan 2025 22:02:15 -0600
Subject: [PATCH 01/23] planner: Recognize potential for correlation in subset
 index match

---
 pkg/planner/cardinality/cross_estimation.go | 18 +++----
 pkg/planner/cardinality/row_count_index.go  | 50 +++++++++--------
 pkg/planner/cardinality/selectivity.go      |  8 ++-
 pkg/planner/core/debugtrace.go              | 16 +++---
 pkg/planner/core/exhaust_physical_plans.go  |  9 ++--
 pkg/planner/core/find_best_task.go          | 60 +++++++++++++--------
 pkg/planner/core/stats.go                   |  3 +-
 pkg/planner/util/path.go                    |  5 ++
 pkg/statistics/statistics_test.go           | 18 +++----
 pkg/statistics/table.go                     |  2 +-
 10 files changed, 113 insertions(+), 76 deletions(-)

diff --git a/pkg/planner/cardinality/cross_estimation.go b/pkg/planner/cardinality/cross_estimation.go
index d249a47502855..fb4b754d9cda0 100644
--- a/pkg/planner/cardinality/cross_estimation.go
+++ b/pkg/planner/cardinality/cross_estimation.go
@@ -156,7 +156,7 @@ func crossEstimateRowCount(sctx planctx.PlanContext,
 	if idxExists && len(idxIDs) > 0 {
 		idxID = idxIDs[0]
 	}
-	rangeCounts, ok := getColumnRangeCounts(sctx, colUniqueID, ranges, dsTableStats.HistColl, idxID)
+	rangeCounts, _, ok := getColumnRangeCounts(sctx, colUniqueID, ranges, dsTableStats.HistColl, idxID)
 	if !ok {
 		return 0, false, corr
 	}
@@ -166,7 +166,7 @@ func crossEstimateRowCount(sctx planctx.PlanContext,
 	}
 	var rangeCount float64
 	if idxExists {
-		rangeCount, err = GetRowCountByIndexRanges(sctx, dsTableStats.HistColl, idxID, convertedRanges)
+		rangeCount, _, err = GetRowCountByIndexRanges(sctx, dsTableStats.HistColl, idxID, convertedRanges)
 	} else {
 		rangeCount, err = GetRowCountByColumnRanges(sctx, dsTableStats.HistColl, colUniqueID, convertedRanges)
 	}
@@ -182,30 +182,30 @@ func crossEstimateRowCount(sctx planctx.PlanContext,
 }
 
 // getColumnRangeCounts estimates row count for each range respectively.
-func getColumnRangeCounts(sctx planctx.PlanContext, colID int64, ranges []*ranger.Range, histColl *statistics.HistColl, idxID int64) ([]float64, bool) {
+func getColumnRangeCounts(sctx planctx.PlanContext, colID int64, ranges []*ranger.Range, histColl *statistics.HistColl, idxID int64) ([]float64, float64, bool) {
 	var err error
-	var count float64
+	var count, corrCount float64
 	rangeCounts := make([]float64, len(ranges))
 	for i, ran := range ranges {
 		if idxID >= 0 {
 			idxHist := histColl.GetIdx(idxID)
 			if statistics.IndexStatsIsInvalid(sctx, idxHist, histColl, idxID) {
-				return nil, false
+				return nil, 0, false
 			}
-			count, err = GetRowCountByIndexRanges(sctx, histColl, idxID, []*ranger.Range{ran})
+			count, corrCount, err = GetRowCountByIndexRanges(sctx, histColl, idxID, []*ranger.Range{ran})
 		} else {
 			colHist := histColl.GetCol(colID)
 			if statistics.ColumnStatsIsInvalid(colHist, sctx, histColl, colID) {
-				return nil, false
+				return nil, 0, false
 			}
 			count, err = GetRowCountByColumnRanges(sctx, histColl, colID, []*ranger.Range{ran})
 		}
 		if err != nil {
-			return nil, false
+			return nil, 0, false
 		}
 		rangeCounts[i] = count
 	}
-	return rangeCounts, true
+	return rangeCounts, corrCount, true
 }
 
 // convertRangeFromExpectedCnt builds new ranges used to estimate row count we need to scan in table scan before finding specified
diff --git a/pkg/planner/cardinality/row_count_index.go b/pkg/planner/cardinality/row_count_index.go
index 08641e20b84dd..ab6116cba6e08 100644
--- a/pkg/planner/cardinality/row_count_index.go
+++ b/pkg/planner/cardinality/row_count_index.go
@@ -38,7 +38,7 @@ import (
 )
 
 // GetRowCountByIndexRanges estimates the row count by a slice of Range.
-func GetRowCountByIndexRanges(sctx planctx.PlanContext, coll *statistics.HistColl, idxID int64, indexRanges []*ranger.Range) (result float64, err error) {
+func GetRowCountByIndexRanges(sctx planctx.PlanContext, coll *statistics.HistColl, idxID int64, indexRanges []*ranger.Range) (result float64, corrResult float64, err error) {
 	var name string
 	if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
 		debugtrace.EnterContextCommon(sctx)
@@ -69,7 +69,7 @@ func GetRowCountByIndexRanges(sctx planctx.PlanContext, coll *statistics.HistCol
 		if err == nil && sc.EnableOptimizerCETrace && idx != nil {
 			ceTraceRange(sctx, coll.PhysicalID, colNames, indexRanges, "Index Stats-Pseudo", uint64(result))
 		}
-		return result, err
+		return result, 0, err
 	}
 	realtimeCnt, modifyCount := coll.GetScaledRealtimeAndModifyCnt(idx)
 	if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
@@ -79,15 +79,16 @@ func GetRowCountByIndexRanges(sctx planctx.PlanContext, coll *statistics.HistCol
 			"Increase Factor", idx.GetIncreaseFactor(realtimeCnt),
 		)
 	}
+	corrResult = float64(0)
 	if idx.CMSketch != nil && idx.StatsVer == statistics.Version1 {
 		result, err = getIndexRowCountForStatsV1(sctx, coll, idxID, indexRanges)
 	} else {
-		result, err = getIndexRowCountForStatsV2(sctx, idx, coll, indexRanges, realtimeCnt, modifyCount)
+		result, corrResult, err = getIndexRowCountForStatsV2(sctx, idx, coll, indexRanges, realtimeCnt, modifyCount)
 	}
 	if sc.EnableOptimizerCETrace {
 		ceTraceRange(sctx, coll.PhysicalID, colNames, indexRanges, "Index Stats", uint64(result))
 	}
-	return result, errors.Trace(err)
+	return result, corrResult, errors.Trace(err)
 }
 
 func getIndexRowCountForStatsV1(sctx planctx.PlanContext, coll *statistics.HistColl, idxID int64, indexRanges []*ranger.Range) (float64, error) {
@@ -117,7 +118,7 @@ func getIndexRowCountForStatsV1(sctx planctx.PlanContext, coll *statistics.HistC
 		// values in this case.
 		if rangePosition == 0 || isSingleColIdxNullRange(idx, ran) {
 			realtimeCnt, modifyCount := coll.GetScaledRealtimeAndModifyCnt(idx)
-			count, err := getIndexRowCountForStatsV2(sctx, idx, nil, []*ranger.Range{ran}, realtimeCnt, modifyCount)
+			count, _, err := getIndexRowCountForStatsV2(sctx, idx, nil, []*ranger.Range{ran}, realtimeCnt, modifyCount)
 			if err != nil {
 				return 0, errors.Trace(err)
 			}
@@ -181,7 +182,7 @@ func getIndexRowCountForStatsV1(sctx planctx.PlanContext, coll *statistics.HistC
 			// prefer index stats over column stats
 			if idxIDs, ok := coll.ColUniqueID2IdxIDs[colUniqueID]; ok && len(idxIDs) > 0 {
 				idxID := idxIDs[0]
-				count, err = GetRowCountByIndexRanges(sctx, coll, idxID, []*ranger.Range{&rang})
+				count, _, err = GetRowCountByIndexRanges(sctx, coll, idxID, []*ranger.Range{&rang})
 			} else {
 				count, err = GetRowCountByColumnRanges(sctx, coll, colUniqueID, []*ranger.Range{&rang})
 			}
@@ -215,26 +216,26 @@ func isSingleColIdxNullRange(idx *statistics.Index, ran *ranger.Range) bool {
 }
 
 // It uses the modifyCount to validate, and realtimeRowCount to adjust the influence of modifications on the table.
-func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index, coll *statistics.HistColl, indexRanges []*ranger.Range, realtimeRowCount, modifyCount int64) (float64, error) {
+func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index, coll *statistics.HistColl, indexRanges []*ranger.Range, realtimeRowCount, modifyCount int64) (float64, float64, error) {
 	sc := sctx.GetSessionVars().StmtCtx
 	debugTrace := sc.EnableOptimizerDebugTrace
 	if debugTrace {
 		debugtrace.EnterContextCommon(sctx)
 		defer debugtrace.LeaveContextCommon(sctx)
 	}
-	totalCount := float64(0)
+	totalCount, corrCount := float64(0), float64(0)
 	isSingleColIdx := len(idx.Info.Columns) == 1
 	for _, indexRange := range indexRanges {
 		var count float64
 		lb, err := codec.EncodeKey(sc.TimeZone(), nil, indexRange.LowVal...)
 		err = sc.HandleError(err)
 		if err != nil {
-			return 0, err
+			return 0, 0, err
 		}
 		rb, err := codec.EncodeKey(sc.TimeZone(), nil, indexRange.HighVal...)
 		err = sc.HandleError(err)
 		if err != nil {
-			return 0, err
+			return 0, 0, err
 		}
 		if debugTrace {
 			debugTraceStartEstimateRange(sctx, indexRange, lb, rb, totalCount)
@@ -293,13 +294,14 @@ func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index,
 		// Due to the limitation of calcFraction and convertDatumToScalar, the histogram actually won't estimate anything.
 		// If the first column's range is point.
 		if rangePosition := getOrdinalOfRangeCond(sc, indexRange); rangePosition > 0 && idx.StatsVer >= statistics.Version2 && coll != nil {
-			var expBackoffSel float64
-			expBackoffSel, expBackoffSuccess, err = expBackoffEstimation(sctx, idx, coll, indexRange)
+			var expBackoffSel, corrSel float64
+			expBackoffSel, corrSel, expBackoffSuccess, err = expBackoffEstimation(sctx, idx, coll, indexRange)
 			if err != nil {
-				return 0, err
+				return 0, 0, err
 			}
 			if expBackoffSuccess {
 				expBackoffCnt := expBackoffSel * idx.TotalRowCount()
+				corrCnt := corrSel * idx.TotalRowCount()
 
 				upperLimit := expBackoffCnt
 				// Use the multi-column stats to calculate the max possible row count of [l, r)
@@ -326,6 +328,7 @@ func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index,
 					expBackoffCnt = upperLimit
 				}
 				count += expBackoffCnt
+				corrCount += corrCnt
 			}
 		}
 		if !expBackoffSuccess {
@@ -335,6 +338,7 @@ func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index,
 		// If the current table row count has changed, we should scale the row count accordingly.
 		increaseFactor := idx.GetIncreaseFactor(realtimeRowCount)
 		count *= increaseFactor
+		corrCount *= increaseFactor
 
 		// handling the out-of-range part
 		if (outOfRangeOnIndex(idx, l) && !(isSingleColIdx && lowIsNull)) || outOfRangeOnIndex(idx, r) {
@@ -369,7 +373,7 @@ func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index,
 		// Don't allow the final result to go below 1 row
 		totalCount = mathutil.Clamp(totalCount, 1, float64(realtimeRowCount))
 	}
-	return totalCount, nil
+	return totalCount, corrCount, nil
 }
 
 var nullKeyBytes, _ = codec.EncodeKey(time.UTC, nil, types.NewDatum(nil))
@@ -429,7 +433,7 @@ func equalRowCountOnIndex(sctx planctx.PlanContext, idx *statistics.Index, b []b
 }
 
 // expBackoffEstimation estimate the multi-col cases following the Exponential Backoff. See comment below for details.
-func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll *statistics.HistColl, indexRange *ranger.Range) (sel float64, success bool, err error) {
+func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll *statistics.HistColl, indexRange *ranger.Range) (sel float64, corrsel float64, success bool, err error) {
 	if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
 		debugtrace.EnterContextCommon(sctx)
 		defer func() {
@@ -485,7 +489,7 @@ func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll
 					continue
 				}
 				foundStats = true
-				count, err = GetRowCountByIndexRanges(sctx, coll, idxID, tmpRan)
+				count, _, err = GetRowCountByIndexRanges(sctx, coll, idxID, tmpRan)
 				if err == nil {
 					break
 				}
@@ -497,7 +501,7 @@ func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll
 			continue
 		}
 		if err != nil {
-			return 0, false, err
+			return 0, 0, false, err
 		}
 		singleColumnEstResults = append(singleColumnEstResults, selectivity)
 	}
@@ -509,9 +513,9 @@ func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll
 		l = 0
 	})
 	if l == 1 {
-		return singleColumnEstResults[0], true, nil
+		return singleColumnEstResults[0], 0, true, nil
 	} else if l == 0 {
-		return 0, false, nil
+		return 0, 0, false, nil
 	}
 	// Do not allow the exponential backoff to go below the available index bound. If the number of predicates
 	// is less than the number of index columns - use 90% of the bound to differentiate a subset from full index match.
@@ -524,19 +528,21 @@ func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll
 	if l < len(idx.Info.Columns) {
 		idxLowBound /= 0.9
 	}
+	// corrsel is the selectivity of the most filtering column
+	corrsel = min(idxLowBound, singleColumnEstResults[0])
 	minTwoCol := min(singleColumnEstResults[0], singleColumnEstResults[1], idxLowBound)
 	multTwoCol := singleColumnEstResults[0] * math.Sqrt(singleColumnEstResults[1])
 	if l == 2 {
-		return max(minTwoCol, multTwoCol), true, nil
+		return max(minTwoCol, multTwoCol), corrsel, true, nil
 	}
 	minThreeCol := min(minTwoCol, singleColumnEstResults[2])
 	multThreeCol := multTwoCol * math.Sqrt(math.Sqrt(singleColumnEstResults[2]))
 	if l == 3 {
-		return max(minThreeCol, multThreeCol), true, nil
+		return max(minThreeCol, multThreeCol), corrsel, true, nil
 	}
 	minFourCol := min(minThreeCol, singleColumnEstResults[3])
 	multFourCol := multThreeCol * math.Sqrt(math.Sqrt(math.Sqrt(singleColumnEstResults[3])))
-	return max(minFourCol, multFourCol), true, nil
+	return max(minFourCol, multFourCol), corrsel, true, nil
 }
 
 // outOfRangeOnIndex checks if the datum is out of the range.
diff --git a/pkg/planner/cardinality/selectivity.go b/pkg/planner/cardinality/selectivity.go
index 1fe6eb84524b3..4a42857b1029b 100644
--- a/pkg/planner/cardinality/selectivity.go
+++ b/pkg/planner/cardinality/selectivity.go
@@ -202,11 +202,12 @@ func Selectivity(
 			if err != nil {
 				return 0, nil, errors.Trace(err)
 			}
-			cnt, err := GetRowCountByIndexRanges(ctx, coll, id, ranges)
+			cnt, corrCnt, err := GetRowCountByIndexRanges(ctx, coll, id, ranges)
 			if err != nil {
 				return 0, nil, errors.Trace(err)
 			}
 			selectivity := cnt / float64(coll.RealtimeCount)
+			corrSelectivity := corrCnt / float64(coll.RealtimeCount)
 			nodes = append(nodes, &StatsNode{
 				Tp:                       IndexType,
 				ID:                       id,
@@ -214,6 +215,7 @@ func Selectivity(
 				Ranges:                   ranges,
 				numCols:                  len(idxStats.Info.Columns),
 				Selectivity:              selectivity,
+				CorrSelectivity:          corrSelectivity,
 				partCover:                partCover,
 				minAccessCondsForDNFCond: minAccessCondsForDNFCond,
 			})
@@ -543,6 +545,10 @@ type StatsNode struct {
 	mask int64
 	// Selectivity indicates the Selectivity of this column/index.
 	Selectivity float64
+	// CorrSelectivity indicates the Selectivity of this column/index with correlated column.
+	// That is - it is the selectivity assuming the most filtering column only, and all other
+	// columns are uncorrelated.
+	CorrSelectivity float64
 	// numCols is the number of columns contained in the index or column(which is always 1).
 	numCols int
 	// partCover indicates whether the bit in the mask is for a full cover or partial cover. It is only true
diff --git a/pkg/planner/core/debugtrace.go b/pkg/planner/core/debugtrace.go
index 254c278847ed1..b2ee3166e64d0 100644
--- a/pkg/planner/core/debugtrace.go
+++ b/pkg/planner/core/debugtrace.go
@@ -227,13 +227,14 @@ func stabilizeGetStatsTblInfo(info *getStatsTblInfo) {
 */
 
 type accessPathForDebugTrace struct {
-	IndexName        string `json:",omitempty"`
-	AccessConditions []string
-	IndexFilters     []string
-	TableFilters     []string
-	PartialPaths     []accessPathForDebugTrace `json:",omitempty"`
-	CountAfterAccess float64
-	CountAfterIndex  float64
+	IndexName            string `json:",omitempty"`
+	AccessConditions     []string
+	IndexFilters         []string
+	TableFilters         []string
+	PartialPaths         []accessPathForDebugTrace `json:",omitempty"`
+	CountAfterAccess     float64
+	CorrCountAfterAccess float64
+	CountAfterIndex      float64
 }
 
 func convertAccessPathForDebugTrace(ctx expression.EvalContext, path *util.AccessPath, out *accessPathForDebugTrace) {
@@ -244,6 +245,7 @@ func convertAccessPathForDebugTrace(ctx expression.EvalContext, path *util.Acces
 	out.IndexFilters = expression.ExprsToStringsForDisplay(ctx, path.IndexFilters)
 	out.TableFilters = expression.ExprsToStringsForDisplay(ctx, path.TableFilters)
 	out.CountAfterAccess = path.CountAfterAccess
+	out.CorrCountAfterAccess = path.CorrCountAfterAccess
 	out.CountAfterIndex = path.CountAfterIndex
 	out.PartialPaths = make([]accessPathForDebugTrace, len(path.PartialIndexPaths))
 	for i, partialPath := range path.PartialIndexPaths {
diff --git a/pkg/planner/core/exhaust_physical_plans.go b/pkg/planner/core/exhaust_physical_plans.go
index 3c47bcce17fab..96f315b091c67 100644
--- a/pkg/planner/core/exhaust_physical_plans.go
+++ b/pkg/planner/core/exhaust_physical_plans.go
@@ -1347,10 +1347,11 @@ func constructInnerIndexScanTask(
 		rowCount = math.Min(rowCount, 1.0)
 	}
 	tmpPath := &util.AccessPath{
-		IndexFilters:     indexConds,
-		TableFilters:     tblConds,
-		CountAfterIndex:  rowCount,
-		CountAfterAccess: rowCount,
+		IndexFilters:         indexConds,
+		TableFilters:         tblConds,
+		CountAfterIndex:      rowCount,
+		CountAfterAccess:     rowCount,
+		CorrCountAfterAccess: 0,
 	}
 	// Assume equal conditions used by index join and other conditions are independent.
 	if len(tblConds) > 0 {
diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go
index ac93c82bdb5f4..b21611a8a7909 100644
--- a/pkg/planner/core/find_best_task.go
+++ b/pkg/planner/core/find_best_task.go
@@ -728,32 +728,48 @@ func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, prop *
 	if statsTbl != nil && rhs.path.Index != nil {
 		rhsHasStatistics = statsTbl.ColAndIdxExistenceMap.HasAnalyzed(rhs.path.Index.ID, true)
 	}
-	if !lhs.path.IsTablePath() && !rhs.path.IsTablePath() && // Not a table scan
-		(lhsHasStatistics || rhsHasStatistics) && // At least one index has statistics
-		(!lhsHasStatistics || !rhsHasStatistics) && // At least one index doesn't have statistics
-		len(lhs.path.PartialIndexPaths) == 0 && len(rhs.path.PartialIndexPaths) == 0 { // not IndexMerge due to unreliability
-		lhsTotalEqual := lhs.path.EqCondCount + lhs.path.EqOrInCondCount
-		rhsTotalEqual := rhs.path.EqCondCount + rhs.path.EqOrInCondCount
-		if lhsHasStatistics && lhsTotalEqual > 0 && lhsTotalEqual >= rhsTotalEqual {
+	lhsTotalEqual := lhs.path.EqCondCount + lhs.path.EqOrInCondCount
+	rhsTotalEqual := rhs.path.EqCondCount + rhs.path.EqOrInCondCount
+
+	if len(lhs.path.PartialIndexPaths) == 0 && len(rhs.path.PartialIndexPaths) == 0 {
+		if !lhs.path.IsTablePath() && !rhs.path.IsTablePath() && // Not a table scan
+			(lhsHasStatistics || rhsHasStatistics) && // At least one index has statistics
+			(!lhsHasStatistics || !rhsHasStatistics) { // At least one index doesn't have statistics
+			if lhsHasStatistics && lhsTotalEqual > 0 && lhsTotalEqual >= rhsTotalEqual {
+				return 1
+			}
+			if rhsHasStatistics && rhsTotalEqual > 0 && rhsTotalEqual >= lhsTotalEqual {
+				return -1
+			}
+		}
+
+		lhsCorrRatio, rhsCorrRatio := 0.0, 0.0
+		if lhs.path.CorrCountAfterAccess > 0 || rhs.path.CorrCountAfterAccess > 0 {
+			lhsCorrRatio = lhs.path.CorrCountAfterAccess / lhs.path.CountAfterAccess
+			rhsCorrRatio = rhs.path.CorrCountAfterAccess / rhs.path.CountAfterAccess
+		}
+
+		if lhsTotalEqual >= rhsTotalEqual && lhsCorrRatio < rhsCorrRatio {
 			return 1
 		}
-		if rhsHasStatistics && rhsTotalEqual > 0 && rhsTotalEqual >= lhsTotalEqual {
+		if rhsTotalEqual >= lhsTotalEqual && rhsCorrRatio < lhsCorrRatio {
 			return -1
 		}
-	}
-
-	// This rule is empirical but not always correct.
-	// If x's range row count is significantly lower than y's, for example, 1000 times, we think x is better.
-	if lhs.path.CountAfterAccess > 100 && rhs.path.CountAfterAccess > 100 && // to prevent some extreme cases, e.g. 0.01 : 10
-		len(lhs.path.PartialIndexPaths) == 0 && len(rhs.path.PartialIndexPaths) == 0 && // not IndexMerge since its row count estimation is not accurate enough
-		prop.ExpectedCnt == math.MaxFloat64 { // Limit may affect access row count
-		threshold := float64(fixcontrol.GetIntWithDefault(sctx.GetSessionVars().OptimizerFixControl, fixcontrol.Fix45132, 1000))
-		if threshold > 0 { // set it to 0 to disable this rule
-			if lhs.path.CountAfterAccess/rhs.path.CountAfterAccess > threshold {
-				return -1
-			}
-			if rhs.path.CountAfterAccess/lhs.path.CountAfterAccess > threshold {
-				return 1
+		// This rule is empirical but not always correct.
+		// If x's range row count is significantly lower than y's, for example, 1000 times, we think x is better.
+		if lhs.path.CorrCountAfterAccess > 100 && rhs.path.CorrCountAfterAccess > 100 && // to prevent some extreme cases, e.g. 0.01 : 10
+			len(lhs.path.PartialIndexPaths) == 0 && len(rhs.path.PartialIndexPaths) == 0 && // not IndexMerge since its row count estimation is not accurate enough
+			prop.ExpectedCnt == math.MaxFloat64 { // Limit may affect access row count
+			threshold := float64(fixcontrol.GetIntWithDefault(sctx.GetSessionVars().OptimizerFixControl, fixcontrol.Fix45132, 1000))
+			if threshold > 0 { // set it to 0 to disable this rule
+				if lhs.path.CountAfterAccess/rhs.path.CountAfterAccess > threshold &&
+					(rhsCorrRatio < lhsCorrRatio || rhsTotalEqual > lhsTotalEqual) {
+					return -1
+				}
+				if rhs.path.CountAfterAccess/lhs.path.CountAfterAccess > threshold &&
+					(lhsCorrRatio < rhsCorrRatio || lhsTotalEqual > rhsTotalEqual) {
+					return 1
+				}
 			}
 		}
 	}
diff --git a/pkg/planner/core/stats.go b/pkg/planner/core/stats.go
index d48f711f5cc34..50ada5e873e31 100644
--- a/pkg/planner/core/stats.go
+++ b/pkg/planner/core/stats.go
@@ -176,6 +176,7 @@ func fillIndexPath(ds *logicalop.DataSource, path *util.AccessPath, conds []expr
 	}
 	path.Ranges = ranger.FullRange()
 	path.CountAfterAccess = float64(ds.StatisticTable.RealtimeCount)
+	path.CorrCountAfterAccess = 0
 	path.IdxCols, path.IdxColLens = expression.IndexInfo2PrefixCols(ds.Columns, ds.Schema().Columns, path.Index)
 	path.FullIdxCols, path.FullIdxColLens = expression.IndexInfo2Cols(ds.Columns, ds.Schema().Columns, path.Index)
 	if !path.Index.Unique && !path.Index.Primary && len(path.Index.Columns) == len(path.IdxCols) {
@@ -404,7 +405,7 @@ func detachCondAndBuildRangeForPath(
 			path.ConstCols[i] = res.ColumnValues[i] != nil
 		}
 	}
-	path.CountAfterAccess, err = cardinality.GetRowCountByIndexRanges(sctx, histColl, path.Index.ID, path.Ranges)
+	path.CountAfterAccess, path.CorrCountAfterAccess, err = cardinality.GetRowCountByIndexRanges(sctx, histColl, path.Index.ID, path.Ranges)
 	return err
 }
 
diff --git a/pkg/planner/util/path.go b/pkg/planner/util/path.go
index a8ccf7df379f2..99b1ea2990480 100644
--- a/pkg/planner/util/path.go
+++ b/pkg/planner/util/path.go
@@ -41,6 +41,10 @@ type AccessPath struct {
 	// CountAfterAccess is the row count after we apply range seek and before we use other filter to filter data.
 	// For index merge path, CountAfterAccess is the row count after partial paths and before we apply table filters.
 	CountAfterAccess float64
+	// CorrCountAfterAccess is the row count after only applying the most filtering index columns.
+	// against the index. This is used when we don't have a full index statistics
+	// and we need to use the exponential backoff to estimate the row count.
+	CorrCountAfterAccess float64
 	// CountAfterIndex is the row count after we apply filters on index and before we apply the table filters.
 	CountAfterIndex float64
 	AccessConds     []expression.Expression
@@ -132,6 +136,7 @@ func (path *AccessPath) Clone() *AccessPath {
 		ConstCols:                    slices.Clone(path.ConstCols),
 		Ranges:                       CloneRanges(path.Ranges),
 		CountAfterAccess:             path.CountAfterAccess,
+		CorrCountAfterAccess:         path.CorrCountAfterAccess,
 		CountAfterIndex:              path.CountAfterIndex,
 		AccessConds:                  CloneExprs(path.AccessConds),
 		EqCondCount:                  path.EqCondCount,
diff --git a/pkg/statistics/statistics_test.go b/pkg/statistics/statistics_test.go
index cfaf69cc7f68b..3afdbf30e0b86 100644
--- a/pkg/statistics/statistics_test.go
+++ b/pkg/statistics/statistics_test.go
@@ -395,51 +395,51 @@ func SubTestIndexRanges() func(*testing.T) {
 			HighVal:   []types.Datum{types.MaxValueDatum()},
 			Collators: collate.GetBinaryCollatorSlice(1),
 		}}
-		count, err := GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran)
+		count, _, err := GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran)
 		require.NoError(t, err)
 		require.Equal(t, 99900, int(count))
 		ran[0].LowVal[0] = types.NewIntDatum(1000)
 		ran[0].HighVal[0] = types.NewIntDatum(2000)
-		count, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran)
+		count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran)
 		require.NoError(t, err)
 		require.Equal(t, 2500, int(count))
 		ran[0].LowVal[0] = types.NewIntDatum(1001)
 		ran[0].HighVal[0] = types.NewIntDatum(1999)
-		count, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran)
+		count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran)
 		require.NoError(t, err)
 		require.Equal(t, 2500, int(count))
 		ran[0].LowVal[0] = types.NewIntDatum(1000)
 		ran[0].HighVal[0] = types.NewIntDatum(1000)
-		count, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran)
+		count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran)
 		require.NoError(t, err)
 		require.Equal(t, 100, int(count))
 
 		tbl.SetIdx(0, &Index{Info: &model.IndexInfo{Columns: []*model.IndexColumn{{Offset: 0}}, Unique: true}})
 		ran[0].LowVal[0] = types.NewIntDatum(1000)
 		ran[0].HighVal[0] = types.NewIntDatum(1000)
-		count, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran)
+		count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran)
 		require.NoError(t, err)
 		require.Equal(t, 1, int(count))
 
 		tbl.SetIdx(0, idx)
 		ran[0].LowVal[0] = types.MinNotNullDatum()
 		ran[0].HighVal[0] = types.MaxValueDatum()
-		count, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran)
+		count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran)
 		require.NoError(t, err)
 		require.Equal(t, 100000, int(count))
 		ran[0].LowVal[0] = types.NewIntDatum(1000)
 		ran[0].HighVal[0] = types.NewIntDatum(2000)
-		count, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran)
+		count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran)
 		require.NoError(t, err)
 		require.Equal(t, 1000, int(count))
 		ran[0].LowVal[0] = types.NewIntDatum(1001)
 		ran[0].HighVal[0] = types.NewIntDatum(1990)
-		count, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran)
+		count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran)
 		require.NoError(t, err)
 		require.Equal(t, 989, int(count))
 		ran[0].LowVal[0] = types.NewIntDatum(1000)
 		ran[0].HighVal[0] = types.NewIntDatum(1000)
-		count, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran)
+		count, _, err = GetRowCountByIndexRanges(ctx, &tbl.HistColl, 0, ran)
 		require.NoError(t, err)
 		require.Equal(t, 1, int(count))
 	}
diff --git a/pkg/statistics/table.go b/pkg/statistics/table.go
index 75c50e1921ce6..b386a308431c7 100644
--- a/pkg/statistics/table.go
+++ b/pkg/statistics/table.go
@@ -50,7 +50,7 @@ var (
 	// Note: all functions below will be removed after finishing moving all estimation functions into the cardinality package.
 
 	// GetRowCountByIndexRanges is a function type to get row count by index ranges.
-	GetRowCountByIndexRanges func(sctx planctx.PlanContext, coll *HistColl, idxID int64, indexRanges []*ranger.Range) (result float64, err error)
+	GetRowCountByIndexRanges func(sctx planctx.PlanContext, coll *HistColl, idxID int64, indexRanges []*ranger.Range) (result float64, corrResult float64, err error)
 
 	// GetRowCountByIntColumnRanges is a function type to get row count by int column ranges.
 	GetRowCountByIntColumnRanges func(sctx planctx.PlanContext, coll *HistColl, colID int64, intRanges []*ranger.Range) (result float64, err error)

From 8f2fdbe9e2c9e0a11878d38b04b8449810c1fd94 Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Sat, 4 Jan 2025 22:19:37 -0600
Subject: [PATCH 02/23] build error

---
 pkg/planner/cardinality/row_count_index.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pkg/planner/cardinality/row_count_index.go b/pkg/planner/cardinality/row_count_index.go
index ab6116cba6e08..78dea02c59d52 100644
--- a/pkg/planner/cardinality/row_count_index.go
+++ b/pkg/planner/cardinality/row_count_index.go
@@ -216,23 +216,23 @@ func isSingleColIdxNullRange(idx *statistics.Index, ran *ranger.Range) bool {
 }
 
 // It uses the modifyCount to validate, and realtimeRowCount to adjust the influence of modifications on the table.
-func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index, coll *statistics.HistColl, indexRanges []*ranger.Range, realtimeRowCount, modifyCount int64) (float64, float64, error) {
+func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index, coll *statistics.HistColl, indexRanges []*ranger.Range, realtimeRowCount, modifyCount int64) (totalCount float64, corrCount float64, err error) {
 	sc := sctx.GetSessionVars().StmtCtx
 	debugTrace := sc.EnableOptimizerDebugTrace
 	if debugTrace {
 		debugtrace.EnterContextCommon(sctx)
 		defer debugtrace.LeaveContextCommon(sctx)
 	}
-	totalCount, corrCount := float64(0), float64(0)
 	isSingleColIdx := len(idx.Info.Columns) == 1
 	for _, indexRange := range indexRanges {
 		var count float64
-		lb, err := codec.EncodeKey(sc.TimeZone(), nil, indexRange.LowVal...)
+		var lb, rb []byte
+		lb, err = codec.EncodeKey(sc.TimeZone(), nil, indexRange.LowVal...)
 		err = sc.HandleError(err)
 		if err != nil {
 			return 0, 0, err
 		}
-		rb, err := codec.EncodeKey(sc.TimeZone(), nil, indexRange.HighVal...)
+		rb, err = codec.EncodeKey(sc.TimeZone(), nil, indexRange.HighVal...)
 		err = sc.HandleError(err)
 		if err != nil {
 			return 0, 0, err

From ce4bf9a412c81b218b455098b40eff2833d3ea4a Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Sat, 4 Jan 2025 22:49:48 -0600
Subject: [PATCH 03/23] testcase1

---
 pkg/planner/cardinality/selectivity_test.go | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/pkg/planner/cardinality/selectivity_test.go b/pkg/planner/cardinality/selectivity_test.go
index e645f3d863e79..42c51acbed469 100644
--- a/pkg/planner/cardinality/selectivity_test.go
+++ b/pkg/planner/cardinality/selectivity_test.go
@@ -252,11 +252,11 @@ func TestEstimationForUnknownValues(t *testing.T) {
 	require.Equal(t, 12.2, count)
 
 	idxID := table.Meta().Indices[0].ID
-	count, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(30, 30))
+	count, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(30, 30))
 	require.NoError(t, err)
 	require.Equal(t, 0.1, count)
 
-	count, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(9, 30))
+	count, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(9, 30))
 	require.NoError(t, err)
 	require.Equal(t, 10.0, count)
 
@@ -286,7 +286,7 @@ func TestEstimationForUnknownValues(t *testing.T) {
 	require.Equal(t, 1.0, count)
 
 	idxID = table.Meta().Indices[0].ID
-	count, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(2, 2))
+	count, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(2, 2))
 	require.NoError(t, err)
 	require.Equal(t, 0.0, count)
 }
@@ -377,11 +377,11 @@ func TestEstimationUniqueKeyEqualConds(t *testing.T) {
 
 	sctx := mock.NewContext()
 	idxID := table.Meta().Indices[0].ID
-	count, err := cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(7, 7))
+	count, _, err := cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(7, 7))
 	require.NoError(t, err)
 	require.Equal(t, 1.0, count)
 
-	count, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(6, 6))
+	count, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(6, 6))
 	require.NoError(t, err)
 	require.Equal(t, 1.0, count)
 
@@ -1011,12 +1011,12 @@ func TestIssue39593(t *testing.T) {
 	sctx := testKit.Session()
 	idxID := tblInfo.Indices[0].ID
 	vals := []int64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}
-	count, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRanges(vals, vals))
+	count, _, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRanges(vals, vals))
 	require.NoError(t, err)
 	// estimated row count without any changes
 	require.Equal(t, float64(360), count)
 	statsTbl.RealtimeCount *= 10
-	count, err = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRanges(vals, vals))
+	count, _, err = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRanges(vals, vals))
 	require.NoError(t, err)
 	// estimated row count after mock modify on the table
 	require.Equal(t, float64(3600), count)

From a27a439f307a9fdd040b16ef2eb1ba2512b1653e Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Sat, 4 Jan 2025 23:13:26 -0600
Subject: [PATCH 04/23] revision1

---
 pkg/planner/core/find_best_task.go | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go
index b21611a8a7909..3d9b25cb926a7 100644
--- a/pkg/planner/core/find_best_task.go
+++ b/pkg/planner/core/find_best_task.go
@@ -730,15 +730,19 @@ func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, prop *
 	}
 	lhsTotalEqual := lhs.path.EqCondCount + lhs.path.EqOrInCondCount
 	rhsTotalEqual := rhs.path.EqCondCount + rhs.path.EqOrInCondCount
+	lhsTotalIndexFilters := lhsTotalEqual + len(lhs.path.IndexFilters)
+	rhsTotalIndexFilters := rhsTotalEqual + len(rhs.path.IndexFilters)
+	lhsMoreFilters := lhsTotalEqual > 0 && (lhsTotalEqual > rhsTotalEqual || (lhsTotalEqual == rhsTotalEqual && lhsTotalIndexFilters >= rhsTotalIndexFilters))
+	rhsMoreFilters := rhsTotalEqual > 0 && (rhsTotalEqual > lhsTotalEqual || (rhsTotalEqual == lhsTotalEqual && rhsTotalIndexFilters >= lhsTotalIndexFilters))
 
 	if len(lhs.path.PartialIndexPaths) == 0 && len(rhs.path.PartialIndexPaths) == 0 {
 		if !lhs.path.IsTablePath() && !rhs.path.IsTablePath() && // Not a table scan
 			(lhsHasStatistics || rhsHasStatistics) && // At least one index has statistics
 			(!lhsHasStatistics || !rhsHasStatistics) { // At least one index doesn't have statistics
-			if lhsHasStatistics && lhsTotalEqual > 0 && lhsTotalEqual >= rhsTotalEqual {
+			if lhsHasStatistics && lhsMoreFilters {
 				return 1
 			}
-			if rhsHasStatistics && rhsTotalEqual > 0 && rhsTotalEqual >= lhsTotalEqual {
+			if rhsHasStatistics && rhsMoreFilters {
 				return -1
 			}
 		}
@@ -749,10 +753,10 @@ func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, prop *
 			rhsCorrRatio = rhs.path.CorrCountAfterAccess / rhs.path.CountAfterAccess
 		}
 
-		if lhsTotalEqual >= rhsTotalEqual && lhsCorrRatio < rhsCorrRatio {
+		if lhsMoreFilters && lhsCorrRatio < rhsCorrRatio {
 			return 1
 		}
-		if rhsTotalEqual >= lhsTotalEqual && rhsCorrRatio < lhsCorrRatio {
+		if rhsMoreFilters && rhsCorrRatio < lhsCorrRatio {
 			return -1
 		}
 		// This rule is empirical but not always correct.
@@ -763,11 +767,11 @@ func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, prop *
 			threshold := float64(fixcontrol.GetIntWithDefault(sctx.GetSessionVars().OptimizerFixControl, fixcontrol.Fix45132, 1000))
 			if threshold > 0 { // set it to 0 to disable this rule
 				if lhs.path.CountAfterAccess/rhs.path.CountAfterAccess > threshold &&
-					(rhsCorrRatio < lhsCorrRatio || rhsTotalEqual > lhsTotalEqual) {
+					(rhsCorrRatio < lhsCorrRatio || rhsMoreFilters) {
 					return -1
 				}
 				if rhs.path.CountAfterAccess/lhs.path.CountAfterAccess > threshold &&
-					(lhsCorrRatio < rhsCorrRatio || lhsTotalEqual > rhsTotalEqual) {
+					lhsCorrRatio < rhsCorrRatio || lhsMoreFilters {
 					return 1
 				}
 			}

From 269a78ca3b7307cdf626b37873dbab3f5186cd90 Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Sun, 5 Jan 2025 00:37:02 -0600
Subject: [PATCH 05/23] revision2

---
 pkg/planner/core/find_best_task.go      | 18 ++++++++----------
 tests/integrationtest/r/imdbload.result | 20 +++++++++-----------
 2 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go
index 3d9b25cb926a7..741f865aa325b 100644
--- a/pkg/planner/core/find_best_task.go
+++ b/pkg/planner/core/find_best_task.go
@@ -730,19 +730,17 @@ func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, prop *
 	}
 	lhsTotalEqual := lhs.path.EqCondCount + lhs.path.EqOrInCondCount
 	rhsTotalEqual := rhs.path.EqCondCount + rhs.path.EqOrInCondCount
-	lhsTotalIndexFilters := lhsTotalEqual + len(lhs.path.IndexFilters)
-	rhsTotalIndexFilters := rhsTotalEqual + len(rhs.path.IndexFilters)
-	lhsMoreFilters := lhsTotalEqual > 0 && (lhsTotalEqual > rhsTotalEqual || (lhsTotalEqual == rhsTotalEqual && lhsTotalIndexFilters >= rhsTotalIndexFilters))
-	rhsMoreFilters := rhsTotalEqual > 0 && (rhsTotalEqual > lhsTotalEqual || (rhsTotalEqual == lhsTotalEqual && rhsTotalIndexFilters >= lhsTotalIndexFilters))
+	lhsMoreFilters := (lhsTotalEqual > rhsTotalEqual || (lhsTotalEqual == rhsTotalEqual && len(lhs.path.IndexFilters) >= len(rhs.path.IndexFilters)))
+	rhsMoreFilters := (rhsTotalEqual > lhsTotalEqual || (rhsTotalEqual == lhsTotalEqual && len(rhs.path.IndexFilters) >= len(lhs.path.IndexFilters)))
 
 	if len(lhs.path.PartialIndexPaths) == 0 && len(rhs.path.PartialIndexPaths) == 0 {
 		if !lhs.path.IsTablePath() && !rhs.path.IsTablePath() && // Not a table scan
 			(lhsHasStatistics || rhsHasStatistics) && // At least one index has statistics
 			(!lhsHasStatistics || !rhsHasStatistics) { // At least one index doesn't have statistics
-			if lhsHasStatistics && lhsMoreFilters {
+			if lhsHasStatistics && lhsTotalEqual > 0 && lhsMoreFilters {
 				return 1
 			}
-			if rhsHasStatistics && rhsMoreFilters {
+			if rhsHasStatistics && rhsTotalEqual > 0 && rhsMoreFilters {
 				return -1
 			}
 		}
@@ -766,12 +764,12 @@ func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, prop *
 			prop.ExpectedCnt == math.MaxFloat64 { // Limit may affect access row count
 			threshold := float64(fixcontrol.GetIntWithDefault(sctx.GetSessionVars().OptimizerFixControl, fixcontrol.Fix45132, 1000))
 			if threshold > 0 { // set it to 0 to disable this rule
-				if lhs.path.CountAfterAccess/rhs.path.CountAfterAccess > threshold &&
-					(rhsCorrRatio < lhsCorrRatio || rhsMoreFilters) {
+				if rhsMoreFilters &&
+					(lhs.path.CountAfterAccess/rhs.path.CountAfterAccess > threshold || rhsCorrRatio < lhsCorrRatio) {
 					return -1
 				}
-				if rhs.path.CountAfterAccess/lhs.path.CountAfterAccess > threshold &&
-					lhsCorrRatio < rhsCorrRatio || lhsMoreFilters {
+				if lhsMoreFilters &&
+					(rhs.path.CountAfterAccess/lhs.path.CountAfterAccess > threshold || lhsCorrRatio < rhsCorrRatio) {
 					return 1
 				}
 			}
diff --git a/tests/integrationtest/r/imdbload.result b/tests/integrationtest/r/imdbload.result
index 787c49b2e81eb..3dc0c532d4ccb 100644
--- a/tests/integrationtest/r/imdbload.result
+++ b/tests/integrationtest/r/imdbload.result
@@ -276,9 +276,9 @@ load stats 's/imdbload_stats/movie_info.json';
 load stats 's/imdbload_stats/cast_info.json';
 explain select * from char_name where ((imdb_index = 'I') and (surname_pcode < 'E436')) or ((imdb_index = 'L') and (surname_pcode < 'E436'));
 id	estRows	task	access object	operator info
-IndexLookUp_10	2.00	root		
-├─IndexRangeScan_8(Build)	2.00	cop[tikv]	table:char_name, index:itest2(imdb_index, surname_pcode, name_pcode_nf)	range:["I" -inf,"I" "E436"), ["L" -inf,"L" "E436"), keep order:false
-└─TableRowIDScan_9(Probe)	2.00	cop[tikv]	table:char_name	keep order:false
+TableReader_7	2.00	root		data:Selection_6
+└─Selection_6	2.00	cop[tikv]		or(and(eq(imdbload.char_name.imdb_index, "I"), lt(imdbload.char_name.surname_pcode, "E436")), and(eq(imdbload.char_name.imdb_index, "L"), lt(imdbload.char_name.surname_pcode, "E436")))
+  └─TableFullScan_5	4314864.00	cop[tikv]	table:char_name	keep order:false
 explain select * from char_name use index (itest2) where ((imdb_index = 'I') and (surname_pcode < 'E436')) or ((imdb_index = 'L') and (surname_pcode < 'E436'));
 id	estRows	task	access object	operator info
 IndexLookUp_7	2.00	root		
@@ -350,20 +350,18 @@ CE_trace
 
 explain select * from keyword where ((phonetic_code = 'R1652') and (keyword > 'ecg-monitor' and keyword < 'killers'));
 id	estRows	task	access object	operator info
-IndexLookUp_11	901.00	root		
-├─IndexRangeScan_8(Build)	901.00	cop[tikv]	table:keyword, index:itest(phonetic_code, keyword)	range:("R1652" "ecg-monitor","R1652" "killers"), keep order:false
-└─Selection_10(Probe)	901.00	cop[tikv]		gt(imdbload.keyword.keyword, "ecg-monitor"), lt(imdbload.keyword.keyword, "killers")
-  └─TableRowIDScan_9	901.00	cop[tikv]	table:keyword	keep order:false
+TableReader_7	901.00	root		data:Selection_6
+└─Selection_6	901.00	cop[tikv]		eq(imdbload.keyword.phonetic_code, "R1652"), gt(imdbload.keyword.keyword, "ecg-monitor"), lt(imdbload.keyword.keyword, "killers")
+  └─TableFullScan_5	236627.00	cop[tikv]	table:keyword	keep order:false
 trace plan target = 'estimation' select * from keyword where ((phonetic_code = 'R1652') and (keyword > 'ecg-monitor' and keyword < 'killers'));
 CE_trace
 [{"table_name":"keyword","type":"Column Stats-Point","expr":"((phonetic_code = 'R1652'))","row_count":23480},{"table_name":"keyword","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":236627},{"table_name":"keyword","type":"Column Stats-Range","expr":"((keyword > 'ecg-monitor' and keyword < 'killers'))","row_count":44075},{"table_name":"keyword","type":"Index Stats-Point","expr":"((phonetic_code = 'R1652'))","row_count":23480},{"table_name":"keyword","type":"Index Stats-Range","expr":"((keyword >= 'ecg-m' and keyword <= 'kille'))","row_count":44036},{"table_name":"keyword","type":"Index Stats-Range","expr":"((phonetic_code = 'R1652') and (keyword > 'ecg-monitor' and keyword < 'killers'))","row_count":901},{"table_name":"keyword","type":"Table Stats-Expression-CNF","expr":"`and`(`eq`(imdbload.keyword.phonetic_code, 'R1652'), `and`(`gt`(imdbload.keyword.keyword, 'ecg-monitor'), `lt`(imdbload.keyword.keyword, 'killers')))","row_count":901}]
 
 explain select * from cast_info where (nr_order is null) and (person_role_id = 2) and (note >= '(key set pa: Florida');
 id	estRows	task	access object	operator info
-IndexLookUp_11	144633.00	root		
-├─IndexRangeScan_8(Build)	144633.00	cop[tikv]	table:cast_info, index:itest2(nr_order, person_role_id, note)	range:[NULL 2 "(key set pa: Florida",NULL 2 +inf], keep order:false
-└─Selection_10(Probe)	144633.00	cop[tikv]		ge(imdbload.cast_info.note, "(key set pa: Florida")
-  └─TableRowIDScan_9	144633.00	cop[tikv]	table:cast_info	keep order:false
+TableReader_7	144633.00	root		data:Selection_6
+└─Selection_6	144633.00	cop[tikv]		eq(imdbload.cast_info.person_role_id, 2), ge(imdbload.cast_info.note, "(key set pa: Florida"), isnull(imdbload.cast_info.nr_order)
+  └─TableFullScan_5	63475835.00	cop[tikv]	table:cast_info	keep order:false
 trace plan target = 'estimation' select * from cast_info where (nr_order is null) and (person_role_id = 2) and (note >= '(key set pa: Florida');
 CE_trace
 [{"table_name":"cast_info","type":"Column Stats-Point","expr":"((nr_order is null))","row_count":45995275},{"table_name":"cast_info","type":"Column Stats-Point","expr":"((person_role_id = 2))","row_count":2089611},{"table_name":"cast_info","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":63475835},{"table_name":"cast_info","type":"Column Stats-Range","expr":"((note >= '(key set pa: Florida' and true))","row_count":14934328},{"table_name":"cast_info","type":"Index Stats-Point","expr":"((person_role_id = 2))","row_count":2089611},{"table_name":"cast_info","type":"Index Stats-Range","expr":"((nr_order is null) and (person_role_id = 2) and (note >= '(key set pa: Florida' and true))","row_count":144633},{"table_name":"cast_info","type":"Table Stats-Expression-CNF","expr":"`and`(`isnull`(imdbload.cast_info.nr_order), `and`(`eq`(imdbload.cast_info.person_role_id, 2), `ge`(imdbload.cast_info.note, '(key set pa: Florida')))","row_count":144633},{"table_name":"cast_info","type":"Table Stats-Expression-CNF","expr":"`eq`(imdbload.cast_info.person_role_id, 2)","row_count":2089611}]

From 3ee89e06d8aaedc93c24458e0333900d2c6663bc Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Sun, 5 Jan 2025 16:22:18 -0800
Subject: [PATCH 06/23] revision3

---
 .codegpt/head                      |  1 +
 pkg/planner/core/find_best_task.go | 35 +++++++++++++++---------------
 2 files changed, 19 insertions(+), 17 deletions(-)
 create mode 100644 .codegpt/head

diff --git a/.codegpt/head b/.codegpt/head
new file mode 100644
index 0000000000000..712e0cd554e79
--- /dev/null
+++ b/.codegpt/head
@@ -0,0 +1 @@
+979ec686-8133-4fa8-8c6a-027f512c60a2
\ No newline at end of file
diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go
index 741f865aa325b..9bf7a07449ddf 100644
--- a/pkg/planner/core/find_best_task.go
+++ b/pkg/planner/core/find_best_task.go
@@ -734,29 +734,30 @@ func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, prop *
 	rhsMoreFilters := (rhsTotalEqual > lhsTotalEqual || (rhsTotalEqual == lhsTotalEqual && len(rhs.path.IndexFilters) >= len(lhs.path.IndexFilters)))
 
 	if len(lhs.path.PartialIndexPaths) == 0 && len(rhs.path.PartialIndexPaths) == 0 {
-		if !lhs.path.IsTablePath() && !rhs.path.IsTablePath() && // Not a table scan
-			(lhsHasStatistics || rhsHasStatistics) && // At least one index has statistics
-			(!lhsHasStatistics || !rhsHasStatistics) { // At least one index doesn't have statistics
-			if lhsHasStatistics && lhsTotalEqual > 0 && lhsMoreFilters {
+		if !lhs.path.IsTablePath() && !rhs.path.IsTablePath() { // Not a table scan
+			if (lhsHasStatistics || rhsHasStatistics) && // At least one index has statistics
+				(!lhsHasStatistics || !rhsHasStatistics) { // At least one index doesn't have statistics
+				if lhsHasStatistics && lhsTotalEqual > 0 && lhsMoreFilters {
+					return 1
+				}
+				if rhsHasStatistics && rhsTotalEqual > 0 && rhsMoreFilters {
+					return -1
+				}
+			}
+
+			lhsCorrRatio, rhsCorrRatio := 0.0, 0.0
+			if lhs.path.CorrCountAfterAccess > 0 || rhs.path.CorrCountAfterAccess > 0 {
+				lhsCorrRatio = lhs.path.CorrCountAfterAccess / lhs.path.CountAfterAccess
+				rhsCorrRatio = rhs.path.CorrCountAfterAccess / rhs.path.CountAfterAccess
+			}
+			if lhsMoreFilters && lhsCorrRatio < rhsCorrRatio {
 				return 1
 			}
-			if rhsHasStatistics && rhsTotalEqual > 0 && rhsMoreFilters {
+			if rhsMoreFilters && rhsCorrRatio < lhsCorrRatio {
 				return -1
 			}
 		}
 
-		lhsCorrRatio, rhsCorrRatio := 0.0, 0.0
-		if lhs.path.CorrCountAfterAccess > 0 || rhs.path.CorrCountAfterAccess > 0 {
-			lhsCorrRatio = lhs.path.CorrCountAfterAccess / lhs.path.CountAfterAccess
-			rhsCorrRatio = rhs.path.CorrCountAfterAccess / rhs.path.CountAfterAccess
-		}
-
-		if lhsMoreFilters && lhsCorrRatio < rhsCorrRatio {
-			return 1
-		}
-		if rhsMoreFilters && rhsCorrRatio < lhsCorrRatio {
-			return -1
-		}
 		// This rule is empirical but not always correct.
 		// If x's range row count is significantly lower than y's, for example, 1000 times, we think x is better.
 		if lhs.path.CorrCountAfterAccess > 100 && rhs.path.CorrCountAfterAccess > 100 && // to prevent some extreme cases, e.g. 0.01 : 10

From 4e006db5b3343aa471a198321479a177e6e21cdc Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Sun, 5 Jan 2025 17:06:01 -0800
Subject: [PATCH 07/23] revision4

---
 pkg/planner/core/find_best_task.go | 43 +++++++++++++++---------------
 1 file changed, 21 insertions(+), 22 deletions(-)

diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go
index 9bf7a07449ddf..d85f3efdabc86 100644
--- a/pkg/planner/core/find_best_task.go
+++ b/pkg/planner/core/find_best_task.go
@@ -730,34 +730,33 @@ func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, prop *
 	}
 	lhsTotalEqual := lhs.path.EqCondCount + lhs.path.EqOrInCondCount
 	rhsTotalEqual := rhs.path.EqCondCount + rhs.path.EqOrInCondCount
-	lhsMoreFilters := (lhsTotalEqual > rhsTotalEqual || (lhsTotalEqual == rhsTotalEqual && len(lhs.path.IndexFilters) >= len(rhs.path.IndexFilters)))
-	rhsMoreFilters := (rhsTotalEqual > lhsTotalEqual || (rhsTotalEqual == lhsTotalEqual && len(rhs.path.IndexFilters) >= len(lhs.path.IndexFilters)))
-
-	if len(lhs.path.PartialIndexPaths) == 0 && len(rhs.path.PartialIndexPaths) == 0 {
-		if !lhs.path.IsTablePath() && !rhs.path.IsTablePath() { // Not a table scan
-			if (lhsHasStatistics || rhsHasStatistics) && // At least one index has statistics
-				(!lhsHasStatistics || !rhsHasStatistics) { // At least one index doesn't have statistics
-				if lhsHasStatistics && lhsTotalEqual > 0 && lhsMoreFilters {
-					return 1
-				}
-				if rhsHasStatistics && rhsTotalEqual > 0 && rhsMoreFilters {
-					return -1
-				}
-			}
-
-			lhsCorrRatio, rhsCorrRatio := 0.0, 0.0
-			if lhs.path.CorrCountAfterAccess > 0 || rhs.path.CorrCountAfterAccess > 0 {
-				lhsCorrRatio = lhs.path.CorrCountAfterAccess / lhs.path.CountAfterAccess
-				rhsCorrRatio = rhs.path.CorrCountAfterAccess / rhs.path.CountAfterAccess
-			}
-			if lhsMoreFilters && lhsCorrRatio < rhsCorrRatio {
+	lhsMoreFilters := (lhsTotalEqual > rhsTotalEqual || (lhsTotalEqual > 0 && lhsTotalEqual == rhsTotalEqual && len(lhs.path.IndexFilters) >= len(rhs.path.IndexFilters)))
+	rhsMoreFilters := (rhsTotalEqual > lhsTotalEqual || (rhsTotalEqual > 0 && rhsTotalEqual == lhsTotalEqual && len(rhs.path.IndexFilters) >= len(lhs.path.IndexFilters)))
+
+	if len(lhs.path.PartialIndexPaths) == 0 && len(rhs.path.PartialIndexPaths) == 0 &&
+		!lhs.path.IsTablePath() && !rhs.path.IsTablePath() { // Not a table scan
+		if (lhsHasStatistics || rhsHasStatistics) && // At least one index has statistics
+			(!lhsHasStatistics || !rhsHasStatistics) { // At least one index doesn't have statistics
+			if lhsHasStatistics && lhsTotalEqual > 0 && lhsMoreFilters {
 				return 1
 			}
-			if rhsMoreFilters && rhsCorrRatio < lhsCorrRatio {
+			if rhsHasStatistics && rhsTotalEqual > 0 && rhsMoreFilters {
 				return -1
 			}
 		}
 
+		lhsCorrRatio, rhsCorrRatio := 0.0, 0.0
+		if lhs.path.CorrCountAfterAccess > 0 || rhs.path.CorrCountAfterAccess > 0 {
+			lhsCorrRatio = lhs.path.CorrCountAfterAccess / lhs.path.CountAfterAccess
+			rhsCorrRatio = rhs.path.CorrCountAfterAccess / rhs.path.CountAfterAccess
+		}
+		if lhsMoreFilters && lhsCorrRatio < rhsCorrRatio {
+			return 1
+		}
+		if rhsMoreFilters && rhsCorrRatio < lhsCorrRatio {
+			return -1
+		}
+
 		// This rule is empirical but not always correct.
 		// If x's range row count is significantly lower than y's, for example, 1000 times, we think x is better.
 		if lhs.path.CorrCountAfterAccess > 100 && rhs.path.CorrCountAfterAccess > 100 && // to prevent some extreme cases, e.g. 0.01 : 10

From 4c2a739049f94db7799a2fad2a30612109e04999 Mon Sep 17 00:00:00 2001
From: tpp <146148086+terry1purcell@users.noreply.github.com>
Date: Sun, 5 Jan 2025 17:19:14 -0800
Subject: [PATCH 08/23] Delete .codegpt/head

file is not in repo
---
 .codegpt/head | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 .codegpt/head

diff --git a/.codegpt/head b/.codegpt/head
deleted file mode 100644
index 712e0cd554e79..0000000000000
--- a/.codegpt/head
+++ /dev/null
@@ -1 +0,0 @@
-979ec686-8133-4fa8-8c6a-027f512c60a2
\ No newline at end of file

From 3f440d5d57a4fbd4721dd31e508bcc39b25ff4e9 Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Thu, 23 Jan 2025 16:48:34 -0800
Subject: [PATCH 09/23] revert for conflict

---
 pkg/planner/core/find_best_task.go | 182 +++++++++++++++++------------
 1 file changed, 105 insertions(+), 77 deletions(-)

diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go
index d85f3efdabc86..d5bcc8a703b65 100644
--- a/pkg/planner/core/find_best_task.go
+++ b/pkg/planner/core/find_best_task.go
@@ -45,7 +45,6 @@ import (
 	h "github.com/pingcap/tidb/pkg/util/hint"
 	"github.com/pingcap/tidb/pkg/util/intest"
 	"github.com/pingcap/tidb/pkg/util/logutil"
-	"github.com/pingcap/tidb/pkg/util/ranger"
 	"github.com/pingcap/tidb/pkg/util/tracing"
 	"github.com/pingcap/tipb/go-tipb"
 	"go.uber.org/zap"
@@ -710,99 +709,121 @@ func compareGlobalIndex(lhs, rhs *candidatePath) int {
 }
 
 // compareCandidates is the core of skyline pruning, which is used to decide which candidate path is better.
-// The return value is 1 if lhs is better, -1 if rhs is better, 0 if they are equivalent or not comparable.
-func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, prop *property.PhysicalProperty, lhs, rhs *candidatePath) int {
+// The first return value is 1 if lhs is better, -1 if rhs is better, 0 if they are equivalent or not comparable.
+// The 2nd return value indicates whether the "better path" is missing statistics or not.
+func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, tableInfo *model.TableInfo, prop *property.PhysicalProperty, lhs, rhs *candidatePath, preferRange bool) (int, bool) {
 	// Due to #50125, full scan on MVIndex has been disabled, so MVIndex path might lead to 'can't find a proper plan' error at the end.
 	// Avoid MVIndex path to exclude all other paths and leading to 'can't find a proper plan' error, see #49438 for an example.
 	if isMVIndexPath(lhs.path) || isMVIndexPath(rhs.path) {
-		return 0
-	}
-
-	// If one index has statistics and the other does not, choose the index with statistics if it
-	// has the same or higher number of equal/IN predicates.
-	lhsHasStatistics := statsTbl.Pseudo
-	if statsTbl != nil && lhs.path.Index != nil {
-		lhsHasStatistics = statsTbl.ColAndIdxExistenceMap.HasAnalyzed(lhs.path.Index.ID, true)
-	}
-	rhsHasStatistics := statsTbl.Pseudo
-	if statsTbl != nil && rhs.path.Index != nil {
-		rhsHasStatistics = statsTbl.ColAndIdxExistenceMap.HasAnalyzed(rhs.path.Index.ID, true)
-	}
-	lhsTotalEqual := lhs.path.EqCondCount + lhs.path.EqOrInCondCount
-	rhsTotalEqual := rhs.path.EqCondCount + rhs.path.EqOrInCondCount
-	lhsMoreFilters := (lhsTotalEqual > rhsTotalEqual || (lhsTotalEqual > 0 && lhsTotalEqual == rhsTotalEqual && len(lhs.path.IndexFilters) >= len(rhs.path.IndexFilters)))
-	rhsMoreFilters := (rhsTotalEqual > lhsTotalEqual || (rhsTotalEqual > 0 && rhsTotalEqual == lhsTotalEqual && len(rhs.path.IndexFilters) >= len(lhs.path.IndexFilters)))
-
-	if len(lhs.path.PartialIndexPaths) == 0 && len(rhs.path.PartialIndexPaths) == 0 &&
-		!lhs.path.IsTablePath() && !rhs.path.IsTablePath() { // Not a table scan
-		if (lhsHasStatistics || rhsHasStatistics) && // At least one index has statistics
-			(!lhsHasStatistics || !rhsHasStatistics) { // At least one index doesn't have statistics
-			if lhsHasStatistics && lhsTotalEqual > 0 && lhsMoreFilters {
-				return 1
+		return 0, false
+	}
+	// lhsPseudo == lhs has pseudo (no) stats for the table or index for the lhs path.
+	// rhsPseudo == rhs has pseudo (no) stats for the table or index for the rhs path.
+	//
+	// For the return value - if lhs wins (1), we return lhsPseudo. If rhs wins (-1), we return rhsPseudo.
+	// If there is no winner (0), we return false.
+	//
+	// This return value is used later in SkyLinePruning to determine whether we should preference an index scan
+	// over a table scan. Allowing indexes without statistics to survive means they can win via heuristics where
+	// they otherwise would have lost on cost.
+	lhsPseudo, rhsPseudo, tablePseudo := false, false, false
+	lhsFullScan := lhs.path.IsFullScanRange(tableInfo)
+	rhsFullScan := rhs.path.IsFullScanRange(tableInfo)
+	if statsTbl != nil {
+		lhsPseudo, rhsPseudo, tablePseudo = statsTbl.HistColl.Pseudo, statsTbl.HistColl.Pseudo, statsTbl.HistColl.Pseudo
+		if len(lhs.path.PartialIndexPaths) == 0 && len(rhs.path.PartialIndexPaths) == 0 {
+			if !lhsFullScan && lhs.path.Index != nil {
+				if statsTbl.ColAndIdxExistenceMap.HasAnalyzed(lhs.path.Index.ID, true) {
+					lhsPseudo = false // We have statistics for the lhs index
+				} else {
+					lhsPseudo = true
+				}
 			}
-			if rhsHasStatistics && rhsTotalEqual > 0 && rhsMoreFilters {
-				return -1
+			if !rhsFullScan && rhs.path.Index != nil {
+				if statsTbl.ColAndIdxExistenceMap.HasAnalyzed(rhs.path.Index.ID, true) {
+					rhsPseudo = false // We have statistics on the rhs index
+				} else {
+					rhsPseudo = true
+				}
 			}
 		}
+	}
 
-		lhsCorrRatio, rhsCorrRatio := 0.0, 0.0
-		if lhs.path.CorrCountAfterAccess > 0 || rhs.path.CorrCountAfterAccess > 0 {
-			lhsCorrRatio = lhs.path.CorrCountAfterAccess / lhs.path.CountAfterAccess
-			rhsCorrRatio = rhs.path.CorrCountAfterAccess / rhs.path.CountAfterAccess
+	matchResult, globalResult := compareBool(lhs.isMatchProp, rhs.isMatchProp), compareGlobalIndex(lhs, rhs)
+	accessResult, comparable1 := util.CompareCol2Len(lhs.accessCondsColMap, rhs.accessCondsColMap)
+	scanResult, comparable2 := compareIndexBack(lhs, rhs)
+	sum := accessResult + scanResult + matchResult + globalResult
+
+	// First rules apply when an index doesn't have statistics and another object (index or table) has statistics
+	if (lhsPseudo || rhsPseudo) && !tablePseudo && !lhsFullScan && !rhsFullScan { // At least one index doesn't have statistics
+		// If one index has statistics and the other does not, choose the index with statistics if it
+		// has the same or higher number of equal/IN predicates.
+		if !lhsPseudo && globalResult >= 0 && sum >= 0 &&
+			lhs.path.EqOrInCondCount > 0 && lhs.path.EqOrInCondCount >= rhs.path.EqOrInCondCount {
+			return 1, false // left wins and has statistics
 		}
-		if lhsMoreFilters && lhsCorrRatio < rhsCorrRatio {
-			return 1
+		if !rhsPseudo && globalResult <= 0 && sum <= 0 &&
+			rhs.path.EqOrInCondCount > 0 && rhs.path.EqOrInCondCount >= lhs.path.EqOrInCondCount {
+			return -1, false // right wins and has statistics
 		}
-		if rhsMoreFilters && rhsCorrRatio < lhsCorrRatio {
-			return -1
+		if preferRange {
+			// keep an index without statistics if that index has more equal/IN predicates, AND:
+			// 1) there are at least 2 equal/INs
+			// 2) OR - it's a full index match for all index predicates
+			if lhsPseudo && lhs.path.EqOrInCondCount > rhs.path.EqOrInCondCount && globalResult >= 0 && sum >= 0 &&
+				(lhs.path.EqOrInCondCount > 1 || (lhs.path.EqOrInCondCount > 0 && len(lhs.indexCondsColMap) >= len(lhs.path.Index.Columns))) {
+				return 1, true // left wins and does NOT have statistics
+			}
+			if rhsPseudo && rhs.path.EqOrInCondCount > lhs.path.EqOrInCondCount && globalResult <= 0 && sum <= 0 &&
+				(rhs.path.EqOrInCondCount > 1 || (rhs.path.EqOrInCondCount > 0 && len(rhs.indexCondsColMap) >= len(rhs.path.Index.Columns))) {
+				return -1, true // right wins and does NOT have statistics
+			}
 		}
+	}
 
-		// This rule is empirical but not always correct.
-		// If x's range row count is significantly lower than y's, for example, 1000 times, we think x is better.
-		if lhs.path.CorrCountAfterAccess > 100 && rhs.path.CorrCountAfterAccess > 100 && // to prevent some extreme cases, e.g. 0.01 : 10
-			len(lhs.path.PartialIndexPaths) == 0 && len(rhs.path.PartialIndexPaths) == 0 && // not IndexMerge since its row count estimation is not accurate enough
-			prop.ExpectedCnt == math.MaxFloat64 { // Limit may affect access row count
-			threshold := float64(fixcontrol.GetIntWithDefault(sctx.GetSessionVars().OptimizerFixControl, fixcontrol.Fix45132, 1000))
-			if threshold > 0 { // set it to 0 to disable this rule
-				if rhsMoreFilters &&
-					(lhs.path.CountAfterAccess/rhs.path.CountAfterAccess > threshold || rhsCorrRatio < lhsCorrRatio) {
-					return -1
-				}
-				if lhsMoreFilters &&
-					(rhs.path.CountAfterAccess/lhs.path.CountAfterAccess > threshold || lhsCorrRatio < rhsCorrRatio) {
-					return 1
-				}
+	// This rule is empirical but not always correct.
+	// If x's range row count is significantly lower than y's, for example, 1000 times, we think x is better.
+	if lhs.path.CountAfterAccess > 100 && rhs.path.CountAfterAccess > 100 && // to prevent some extreme cases, e.g. 0.01 : 10
+		len(lhs.path.PartialIndexPaths) == 0 && len(rhs.path.PartialIndexPaths) == 0 && // not IndexMerge since its row count estimation is not accurate enough
+		prop.ExpectedCnt == math.MaxFloat64 { // Limit may affect access row count
+		threshold := float64(fixcontrol.GetIntWithDefault(sctx.GetSessionVars().OptimizerFixControl, fixcontrol.Fix45132, 1000))
+		if threshold > 0 { // set it to 0 to disable this rule
+			if lhs.path.CountAfterAccess/rhs.path.CountAfterAccess > threshold {
+				return -1, false
+			}
+			if rhs.path.CountAfterAccess/lhs.path.CountAfterAccess > threshold {
+				return 1, false
 			}
 		}
 	}
 
-	// Below compares the two candidate paths on three dimensions:
+	// Below compares the two candidate paths on four dimensions:
 	// (1): the set of columns that occurred in the access condition,
 	// (2): does it require a double scan,
 	// (3): whether or not it matches the physical property,
 	// (4): it's a global index path or not.
 	// If `x` is not worse than `y` at all factors,
 	// and there exists one factor that `x` is better than `y`, then `x` is better than `y`.
-	accessResult, comparable1 := util.CompareCol2Len(lhs.accessCondsColMap, rhs.accessCondsColMap)
 	if !comparable1 {
-		return 0
+		return 0, false
 	}
-	scanResult, comparable2 := compareIndexBack(lhs, rhs)
 	if !comparable2 {
-		return 0
+		return 0, false
 	}
-	matchResult, globalResult := compareBool(lhs.isMatchProp, rhs.isMatchProp), compareGlobalIndex(lhs, rhs)
-	sum := accessResult + scanResult + matchResult + globalResult
 	if accessResult >= 0 && scanResult >= 0 && matchResult >= 0 && globalResult >= 0 && sum > 0 {
-		return 1
+		return 1, false
 	}
 	if accessResult <= 0 && scanResult <= 0 && matchResult <= 0 && globalResult <= 0 && sum < 0 {
-		return -1
+		return -1, false
 	}
-	return 0
+	return 0, false
 }
 
 func isMatchProp(ds *logicalop.DataSource, path *util.AccessPath, prop *property.PhysicalProperty) bool {
+	if ds.Table.Type().IsClusterTable() && !prop.IsSortItemEmpty() {
+		// TableScan with cluster table can't keep order.
+		return false
+	}
 	if prop.VectorProp.VSInfo != nil && path.Index != nil && path.Index.VectorInfo != nil {
 		if path.Index == nil || path.Index.VectorInfo == nil {
 			return false
@@ -1142,6 +1163,9 @@ func getIndexMergeCandidate(ds *logicalop.DataSource, path *util.AccessPath, pro
 // there exists a path that is not worse than it at all factors and there is at least one better factor.
 func skylinePruning(ds *logicalop.DataSource, prop *property.PhysicalProperty) []*candidatePath {
 	candidates := make([]*candidatePath, 0, 4)
+	idxMissingStats := false
+	// tidb_opt_prefer_range_scan is the master switch to control index preferencing
+	preferRange := ds.SCtx().GetSessionVars().GetAllowPreferRangeScan()
 	for _, path := range ds.PossibleAccessPaths {
 		// We should check whether the possible access path is valid first.
 		if path.StoreType != kv.TiFlash && prop.IsFlashProp() {
@@ -1182,7 +1206,12 @@ func skylinePruning(ds *logicalop.DataSource, prop *property.PhysicalProperty) [
 			if candidates[i].path.StoreType == kv.TiFlash {
 				continue
 			}
-			result := compareCandidates(ds.SCtx(), ds.StatisticTable, prop, candidates[i], currentCandidate)
+			var result int
+			currentMissingStats := false
+			result, currentMissingStats = compareCandidates(ds.SCtx(), ds.StatisticTable, ds.TableInfo, prop, candidates[i], currentCandidate, preferRange)
+			if currentMissingStats {
+				idxMissingStats = true // Ensure that we track idxMissingStats across all iterations
+			}
 			if result == 1 {
 				pruned = true
 				// We can break here because the current candidate cannot prune others anymore.
@@ -1202,28 +1231,23 @@ func skylinePruning(ds *logicalop.DataSource, prop *property.PhysicalProperty) [
 		fixcontrol.Fix52869,
 		false,
 	)
-	// tidb_opt_prefer_range_scan is the master switch to control index preferencing
-	preferRange := ds.SCtx().GetSessionVars().GetAllowPreferRangeScan() &&
-		(preferMerge || (ds.TableStats.HistColl.Pseudo || ds.TableStats.RowCount < 1))
+	if preferRange {
+		// Override preferRange with the following limitations to scope
+		preferRange = preferMerge || idxMissingStats || ds.TableStats.HistColl.Pseudo || ds.TableStats.RowCount < 1
+	}
 	if preferRange && len(candidates) > 1 {
-		// If a candidate path is TiFlash-path or forced-path or MV index, we just keep them. For other candidate paths, if there exists
-		// any range scan path, we remove full scan paths and keep range scan paths.
+		// If a candidate path is TiFlash-path or forced-path or MV index or global index, we just keep them. For other
+		// candidate paths, if there exists any range scan path, we remove full scan paths and keep range scan paths.
 		preferredPaths := make([]*candidatePath, 0, len(candidates))
 		var hasRangeScanPath bool
 		for _, c := range candidates {
-			if c.path.Forced || c.path.StoreType == kv.TiFlash || (c.path.Index != nil && c.path.Index.MVIndex) {
+			if c.path.Forced || c.path.StoreType == kv.TiFlash || (c.path.Index != nil && (c.path.Index.Global || c.path.Index.MVIndex)) {
 				preferredPaths = append(preferredPaths, c)
 				continue
 			}
-			var unsignedIntHandle bool
-			if c.path.IsIntHandlePath && ds.TableInfo.PKIsHandle {
-				if pkColInfo := ds.TableInfo.GetPkColInfo(); pkColInfo != nil {
-					unsignedIntHandle = mysql.HasUnsignedFlag(pkColInfo.GetFlag())
-				}
-			}
-			if !ranger.HasFullRange(c.path.Ranges, unsignedIntHandle) {
+			if !c.path.IsFullScanRange(ds.TableInfo) {
 				// Preference plans with equals/IN predicates or where there is more filtering in the index than against the table
-				indexFilters := c.path.EqCondCount > 0 || c.path.EqOrInCondCount > 0 || len(c.path.TableFilters) < len(c.path.IndexFilters)
+				indexFilters := c.path.EqOrInCondCount > 0 || len(c.path.TableFilters) < len(c.path.IndexFilters)
 				if preferMerge || (indexFilters && (prop.IsSortItemEmpty() || c.isMatchProp)) {
 					preferredPaths = append(preferredPaths, c)
 					hasRangeScanPath = true
@@ -1421,6 +1445,10 @@ func findBestTask4LogicalDataSource(lp base.LogicalPlan, prop *property.Physical
 			if ds.PreferStoreType&h.PreferTiFlash != 0 && path.StoreType == kv.TiKV {
 				continue
 			}
+			// prefer tikv, while current table path is tiflash, skip it.
+			if ds.PreferStoreType&h.PreferTiKV != 0 && path.StoreType == kv.TiFlash {
+				continue
+			}
 			idxMergeTask, err := convertToIndexMergeScan(ds, prop, candidate, opt)
 			if err != nil {
 				return nil, 0, err

From 06cbd70390d86a544bd875400a00896d6e58ba33 Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Thu, 23 Jan 2025 17:01:01 -0800
Subject: [PATCH 10/23] re-add code change after conflict resolution

---
 pkg/planner/core/find_best_task.go | 33 +++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go
index d5bcc8a703b65..ebda30b3ba48c 100644
--- a/pkg/planner/core/find_best_task.go
+++ b/pkg/planner/core/find_best_task.go
@@ -783,16 +783,29 @@ func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, tableI
 
 	// This rule is empirical but not always correct.
 	// If x's range row count is significantly lower than y's, for example, 1000 times, we think x is better.
-	if lhs.path.CountAfterAccess > 100 && rhs.path.CountAfterAccess > 100 && // to prevent some extreme cases, e.g. 0.01 : 10
-		len(lhs.path.PartialIndexPaths) == 0 && len(rhs.path.PartialIndexPaths) == 0 && // not IndexMerge since its row count estimation is not accurate enough
-		prop.ExpectedCnt == math.MaxFloat64 { // Limit may affect access row count
-		threshold := float64(fixcontrol.GetIntWithDefault(sctx.GetSessionVars().OptimizerFixControl, fixcontrol.Fix45132, 1000))
-		if threshold > 0 { // set it to 0 to disable this rule
-			if lhs.path.CountAfterAccess/rhs.path.CountAfterAccess > threshold {
-				return -1, false
-			}
-			if rhs.path.CountAfterAccess/lhs.path.CountAfterAccess > threshold {
-				return 1, false
+	if len(lhs.path.PartialIndexPaths) == 0 && len(rhs.path.PartialIndexPaths) == 0 { // not IndexMerge since its row count estimation is not accurate enough
+		lhsCorrRatio, rhsCorrRatio := 0.0, 0.0
+		if lhs.path.CorrCountAfterAccess > 0 || rhs.path.CorrCountAfterAccess > 0 {
+			lhsCorrRatio = lhs.path.CorrCountAfterAccess / lhs.path.CountAfterAccess
+			rhsCorrRatio = rhs.path.CorrCountAfterAccess / rhs.path.CountAfterAccess
+		}
+		if globalResult >= 0 && sum >= 0 && lhsCorrRatio < rhsCorrRatio {
+			return 1, false
+		}
+		if globalResult <= 0 && sum <= 0 && rhsCorrRatio < lhsCorrRatio {
+			return -1, false
+		}
+
+		if lhs.path.CountAfterAccess > 100 && rhs.path.CountAfterAccess > 100 && // to prevent some extreme cases, e.g. 0.01 : 10
+			prop.ExpectedCnt == math.MaxFloat64 { // Limit may affect access row count
+			threshold := float64(fixcontrol.GetIntWithDefault(sctx.GetSessionVars().OptimizerFixControl, fixcontrol.Fix45132, 1000))
+			if threshold > 0 { // set it to 0 to disable this rule
+				if lhs.path.CountAfterAccess/rhs.path.CountAfterAccess > threshold {
+					return -1, false
+				}
+				if rhs.path.CountAfterAccess/lhs.path.CountAfterAccess > threshold {
+					return 1, false
+				}
 			}
 		}
 	}

From 1096071f8f517ccd205b64f34cdab6195a3ffdad Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Thu, 23 Jan 2025 17:40:11 -0800
Subject: [PATCH 11/23] revision1

---
 pkg/planner/core/find_best_task.go | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go
index ebda30b3ba48c..5d9329c809bc2 100644
--- a/pkg/planner/core/find_best_task.go
+++ b/pkg/planner/core/find_best_task.go
@@ -781,21 +781,20 @@ func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, tableI
 		}
 	}
 
-	// This rule is empirical but not always correct.
-	// If x's range row count is significantly lower than y's, for example, 1000 times, we think x is better.
 	if len(lhs.path.PartialIndexPaths) == 0 && len(rhs.path.PartialIndexPaths) == 0 { // not IndexMerge since its row count estimation is not accurate enough
 		lhsCorrRatio, rhsCorrRatio := 0.0, 0.0
 		if lhs.path.CorrCountAfterAccess > 0 || rhs.path.CorrCountAfterAccess > 0 {
 			lhsCorrRatio = lhs.path.CorrCountAfterAccess / lhs.path.CountAfterAccess
 			rhsCorrRatio = rhs.path.CorrCountAfterAccess / rhs.path.CountAfterAccess
 		}
-		if globalResult >= 0 && sum >= 0 && lhsCorrRatio < rhsCorrRatio {
+		if globalResult >= 0 && sum >= 0 && !lhsFullScan && lhsCorrRatio < rhsCorrRatio {
 			return 1, false
 		}
-		if globalResult <= 0 && sum <= 0 && rhsCorrRatio < lhsCorrRatio {
+		if globalResult <= 0 && sum <= 0 && !rhsFullScan && rhsCorrRatio < lhsCorrRatio {
 			return -1, false
 		}
-
+		// This rule is empirical but not always correct.
+		// If x's range row count is significantly lower than y's, for example, 1000 times, we think x is better.
 		if lhs.path.CountAfterAccess > 100 && rhs.path.CountAfterAccess > 100 && // to prevent some extreme cases, e.g. 0.01 : 10
 			prop.ExpectedCnt == math.MaxFloat64 { // Limit may affect access row count
 			threshold := float64(fixcontrol.GetIntWithDefault(sctx.GetSessionVars().OptimizerFixControl, fixcontrol.Fix45132, 1000))

From a87a38953deaf572430c0dfb0193686f5a113c55 Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Thu, 23 Jan 2025 18:53:13 -0800
Subject: [PATCH 12/23] revision2

---
 pkg/planner/core/find_best_task.go | 55 ++++++++++++++++--------------
 1 file changed, 30 insertions(+), 25 deletions(-)

diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go
index 5d9329c809bc2..af61f39a6560f 100644
--- a/pkg/planner/core/find_best_task.go
+++ b/pkg/planner/core/find_best_task.go
@@ -708,6 +708,21 @@ func compareGlobalIndex(lhs, rhs *candidatePath) int {
 	return compareBool(lhs.path.Index.Global, rhs.path.Index.Global)
 }
 
+func compareCorrRatio(lhs, rhs *candidatePath) (int, float64) {
+	lhsCorrRatio, rhsCorrRatio := 0.0, 0.0
+	if lhs.path.CorrCountAfterAccess > 0 || rhs.path.CorrCountAfterAccess > 0 {
+		lhsCorrRatio = lhs.path.CorrCountAfterAccess / lhs.path.CountAfterAccess
+		rhsCorrRatio = rhs.path.CorrCountAfterAccess / rhs.path.CountAfterAccess
+	}
+	if lhsCorrRatio > 0 {
+		return 1, lhsCorrRatio
+	}
+	if rhsCorrRatio > 0 {
+		return -1, rhsCorrRatio
+	}
+	return 0, 0
+}
+
 // compareCandidates is the core of skyline pruning, which is used to decide which candidate path is better.
 // The first return value is 1 if lhs is better, -1 if rhs is better, 0 if they are equivalent or not comparable.
 // The 2nd return value indicates whether the "better path" is missing statistics or not.
@@ -752,7 +767,9 @@ func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, tableI
 	matchResult, globalResult := compareBool(lhs.isMatchProp, rhs.isMatchProp), compareGlobalIndex(lhs, rhs)
 	accessResult, comparable1 := util.CompareCol2Len(lhs.accessCondsColMap, rhs.accessCondsColMap)
 	scanResult, comparable2 := compareIndexBack(lhs, rhs)
-	sum := accessResult + scanResult + matchResult + globalResult
+	// corrResult returns the left vs right comparison as a boolean, but also the actual ratio - which will be used in future
+	corrResult, _ := compareCorrRatio(lhs, rhs)
+	sum := accessResult + scanResult + matchResult + globalResult + corrResult
 
 	// First rules apply when an index doesn't have statistics and another object (index or table) has statistics
 	if (lhsPseudo || rhsPseudo) && !tablePseudo && !lhsFullScan && !rhsFullScan { // At least one index doesn't have statistics
@@ -781,30 +798,18 @@ func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, tableI
 		}
 	}
 
-	if len(lhs.path.PartialIndexPaths) == 0 && len(rhs.path.PartialIndexPaths) == 0 { // not IndexMerge since its row count estimation is not accurate enough
-		lhsCorrRatio, rhsCorrRatio := 0.0, 0.0
-		if lhs.path.CorrCountAfterAccess > 0 || rhs.path.CorrCountAfterAccess > 0 {
-			lhsCorrRatio = lhs.path.CorrCountAfterAccess / lhs.path.CountAfterAccess
-			rhsCorrRatio = rhs.path.CorrCountAfterAccess / rhs.path.CountAfterAccess
-		}
-		if globalResult >= 0 && sum >= 0 && !lhsFullScan && lhsCorrRatio < rhsCorrRatio {
-			return 1, false
-		}
-		if globalResult <= 0 && sum <= 0 && !rhsFullScan && rhsCorrRatio < lhsCorrRatio {
-			return -1, false
-		}
-		// This rule is empirical but not always correct.
-		// If x's range row count is significantly lower than y's, for example, 1000 times, we think x is better.
-		if lhs.path.CountAfterAccess > 100 && rhs.path.CountAfterAccess > 100 && // to prevent some extreme cases, e.g. 0.01 : 10
-			prop.ExpectedCnt == math.MaxFloat64 { // Limit may affect access row count
-			threshold := float64(fixcontrol.GetIntWithDefault(sctx.GetSessionVars().OptimizerFixControl, fixcontrol.Fix45132, 1000))
-			if threshold > 0 { // set it to 0 to disable this rule
-				if lhs.path.CountAfterAccess/rhs.path.CountAfterAccess > threshold {
-					return -1, false
-				}
-				if rhs.path.CountAfterAccess/lhs.path.CountAfterAccess > threshold {
-					return 1, false
-				}
+	// This rule is empirical but not always correct.
+	// If x's range row count is significantly lower than y's, for example, 1000 times, we think x is better.
+	if len(lhs.path.PartialIndexPaths) == 0 && len(rhs.path.PartialIndexPaths) == 0 && // not IndexMerge since its row count estimation is not accurate enough
+		lhs.path.CountAfterAccess > 100 && rhs.path.CountAfterAccess > 100 && // to prevent some extreme cases, e.g. 0.01 : 10
+		prop.ExpectedCnt == math.MaxFloat64 { // Limit may affect access row count
+		threshold := float64(fixcontrol.GetIntWithDefault(sctx.GetSessionVars().OptimizerFixControl, fixcontrol.Fix45132, 1000))
+		if threshold > 0 { // set it to 0 to disable this rule
+			if lhs.path.CountAfterAccess/rhs.path.CountAfterAccess > threshold && corrResult <= 0 {
+				return -1, false
+			}
+			if rhs.path.CountAfterAccess/lhs.path.CountAfterAccess > threshold && corrResult >= 0 {
+				return 1, false
 			}
 		}
 	}

From b183768ae7b4abe8f780108a5c4bd55efd91f59f Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Thu, 23 Jan 2025 19:26:01 -0800
Subject: [PATCH 13/23] testcase1

---
 tests/integrationtest/r/imdbload.result | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/tests/integrationtest/r/imdbload.result b/tests/integrationtest/r/imdbload.result
index 3dc0c532d4ccb..787c49b2e81eb 100644
--- a/tests/integrationtest/r/imdbload.result
+++ b/tests/integrationtest/r/imdbload.result
@@ -276,9 +276,9 @@ load stats 's/imdbload_stats/movie_info.json';
 load stats 's/imdbload_stats/cast_info.json';
 explain select * from char_name where ((imdb_index = 'I') and (surname_pcode < 'E436')) or ((imdb_index = 'L') and (surname_pcode < 'E436'));
 id	estRows	task	access object	operator info
-TableReader_7	2.00	root		data:Selection_6
-└─Selection_6	2.00	cop[tikv]		or(and(eq(imdbload.char_name.imdb_index, "I"), lt(imdbload.char_name.surname_pcode, "E436")), and(eq(imdbload.char_name.imdb_index, "L"), lt(imdbload.char_name.surname_pcode, "E436")))
-  └─TableFullScan_5	4314864.00	cop[tikv]	table:char_name	keep order:false
+IndexLookUp_10	2.00	root		
+├─IndexRangeScan_8(Build)	2.00	cop[tikv]	table:char_name, index:itest2(imdb_index, surname_pcode, name_pcode_nf)	range:["I" -inf,"I" "E436"), ["L" -inf,"L" "E436"), keep order:false
+└─TableRowIDScan_9(Probe)	2.00	cop[tikv]	table:char_name	keep order:false
 explain select * from char_name use index (itest2) where ((imdb_index = 'I') and (surname_pcode < 'E436')) or ((imdb_index = 'L') and (surname_pcode < 'E436'));
 id	estRows	task	access object	operator info
 IndexLookUp_7	2.00	root		
@@ -350,18 +350,20 @@ CE_trace
 
 explain select * from keyword where ((phonetic_code = 'R1652') and (keyword > 'ecg-monitor' and keyword < 'killers'));
 id	estRows	task	access object	operator info
-TableReader_7	901.00	root		data:Selection_6
-└─Selection_6	901.00	cop[tikv]		eq(imdbload.keyword.phonetic_code, "R1652"), gt(imdbload.keyword.keyword, "ecg-monitor"), lt(imdbload.keyword.keyword, "killers")
-  └─TableFullScan_5	236627.00	cop[tikv]	table:keyword	keep order:false
+IndexLookUp_11	901.00	root		
+├─IndexRangeScan_8(Build)	901.00	cop[tikv]	table:keyword, index:itest(phonetic_code, keyword)	range:("R1652" "ecg-monitor","R1652" "killers"), keep order:false
+└─Selection_10(Probe)	901.00	cop[tikv]		gt(imdbload.keyword.keyword, "ecg-monitor"), lt(imdbload.keyword.keyword, "killers")
+  └─TableRowIDScan_9	901.00	cop[tikv]	table:keyword	keep order:false
 trace plan target = 'estimation' select * from keyword where ((phonetic_code = 'R1652') and (keyword > 'ecg-monitor' and keyword < 'killers'));
 CE_trace
 [{"table_name":"keyword","type":"Column Stats-Point","expr":"((phonetic_code = 'R1652'))","row_count":23480},{"table_name":"keyword","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":236627},{"table_name":"keyword","type":"Column Stats-Range","expr":"((keyword > 'ecg-monitor' and keyword < 'killers'))","row_count":44075},{"table_name":"keyword","type":"Index Stats-Point","expr":"((phonetic_code = 'R1652'))","row_count":23480},{"table_name":"keyword","type":"Index Stats-Range","expr":"((keyword >= 'ecg-m' and keyword <= 'kille'))","row_count":44036},{"table_name":"keyword","type":"Index Stats-Range","expr":"((phonetic_code = 'R1652') and (keyword > 'ecg-monitor' and keyword < 'killers'))","row_count":901},{"table_name":"keyword","type":"Table Stats-Expression-CNF","expr":"`and`(`eq`(imdbload.keyword.phonetic_code, 'R1652'), `and`(`gt`(imdbload.keyword.keyword, 'ecg-monitor'), `lt`(imdbload.keyword.keyword, 'killers')))","row_count":901}]
 
 explain select * from cast_info where (nr_order is null) and (person_role_id = 2) and (note >= '(key set pa: Florida');
 id	estRows	task	access object	operator info
-TableReader_7	144633.00	root		data:Selection_6
-└─Selection_6	144633.00	cop[tikv]		eq(imdbload.cast_info.person_role_id, 2), ge(imdbload.cast_info.note, "(key set pa: Florida"), isnull(imdbload.cast_info.nr_order)
-  └─TableFullScan_5	63475835.00	cop[tikv]	table:cast_info	keep order:false
+IndexLookUp_11	144633.00	root		
+├─IndexRangeScan_8(Build)	144633.00	cop[tikv]	table:cast_info, index:itest2(nr_order, person_role_id, note)	range:[NULL 2 "(key set pa: Florida",NULL 2 +inf], keep order:false
+└─Selection_10(Probe)	144633.00	cop[tikv]		ge(imdbload.cast_info.note, "(key set pa: Florida")
+  └─TableRowIDScan_9	144633.00	cop[tikv]	table:cast_info	keep order:false
 trace plan target = 'estimation' select * from cast_info where (nr_order is null) and (person_role_id = 2) and (note >= '(key set pa: Florida');
 CE_trace
 [{"table_name":"cast_info","type":"Column Stats-Point","expr":"((nr_order is null))","row_count":45995275},{"table_name":"cast_info","type":"Column Stats-Point","expr":"((person_role_id = 2))","row_count":2089611},{"table_name":"cast_info","type":"Column Stats-Range","expr":"((id >= -9223372036854775808 and id <= 9223372036854775807))","row_count":63475835},{"table_name":"cast_info","type":"Column Stats-Range","expr":"((note >= '(key set pa: Florida' and true))","row_count":14934328},{"table_name":"cast_info","type":"Index Stats-Point","expr":"((person_role_id = 2))","row_count":2089611},{"table_name":"cast_info","type":"Index Stats-Range","expr":"((nr_order is null) and (person_role_id = 2) and (note >= '(key set pa: Florida' and true))","row_count":144633},{"table_name":"cast_info","type":"Table Stats-Expression-CNF","expr":"`and`(`isnull`(imdbload.cast_info.nr_order), `and`(`eq`(imdbload.cast_info.person_role_id, 2), `ge`(imdbload.cast_info.note, '(key set pa: Florida')))","row_count":144633},{"table_name":"cast_info","type":"Table Stats-Expression-CNF","expr":"`eq`(imdbload.cast_info.person_role_id, 2)","row_count":2089611}]

From 301d616532ceea01abbd1446e7347391fa504aac Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Fri, 24 Jan 2025 04:30:56 -0800
Subject: [PATCH 14/23] testcase2

---
 pkg/planner/core/find_best_task.go | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go
index af61f39a6560f..bf04046a0eeae 100644
--- a/pkg/planner/core/find_best_task.go
+++ b/pkg/planner/core/find_best_task.go
@@ -767,8 +767,11 @@ func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, tableI
 	matchResult, globalResult := compareBool(lhs.isMatchProp, rhs.isMatchProp), compareGlobalIndex(lhs, rhs)
 	accessResult, comparable1 := util.CompareCol2Len(lhs.accessCondsColMap, rhs.accessCondsColMap)
 	scanResult, comparable2 := compareIndexBack(lhs, rhs)
-	// corrResult returns the left vs right comparison as a boolean, but also the actual ratio - which will be used in future
-	corrResult, _ := compareCorrRatio(lhs, rhs)
+	corrResult := 0
+	if lhsPseudo == rhsPseudo {
+		// corrResult returns the left vs right comparison as a boolean, but also the actual ratio - which will be used in future
+		corrResult, _ = compareCorrRatio(lhs, rhs)
+	}
 	sum := accessResult + scanResult + matchResult + globalResult + corrResult
 
 	// First rules apply when an index doesn't have statistics and another object (index or table) has statistics

From 5751476bd7616e10e40d01dae9583a060d5fd5aa Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Fri, 24 Jan 2025 17:37:04 -0800
Subject: [PATCH 15/23] code fix1

---
 pkg/planner/core/find_best_task.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go
index bf04046a0eeae..d6f5793ddd4a8 100644
--- a/pkg/planner/core/find_best_task.go
+++ b/pkg/planner/core/find_best_task.go
@@ -714,10 +714,10 @@ func compareCorrRatio(lhs, rhs *candidatePath) (int, float64) {
 		lhsCorrRatio = lhs.path.CorrCountAfterAccess / lhs.path.CountAfterAccess
 		rhsCorrRatio = rhs.path.CorrCountAfterAccess / rhs.path.CountAfterAccess
 	}
-	if lhsCorrRatio > 0 {
+	if lhsCorrRatio < rhsCorrRatio {
 		return 1, lhsCorrRatio
 	}
-	if rhsCorrRatio > 0 {
+	if rhsCorrRatio < lhsCorrRatio {
 		return -1, rhsCorrRatio
 	}
 	return 0, 0

From 8e94e2fa3bf489c207e8e49593f4481fe69430bc Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Fri, 24 Jan 2025 18:26:05 -0800
Subject: [PATCH 16/23] code fix2

---
 pkg/planner/cardinality/row_count_index.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/planner/cardinality/row_count_index.go b/pkg/planner/cardinality/row_count_index.go
index 78dea02c59d52..a50c50b100d53 100644
--- a/pkg/planner/cardinality/row_count_index.go
+++ b/pkg/planner/cardinality/row_count_index.go
@@ -529,7 +529,7 @@ func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll
 		idxLowBound /= 0.9
 	}
 	// corrsel is the selectivity of the most filtering column
-	corrsel = min(idxLowBound, singleColumnEstResults[0])
+	corrsel = max(idxLowBound, singleColumnEstResults[0])
 	minTwoCol := min(singleColumnEstResults[0], singleColumnEstResults[1], idxLowBound)
 	multTwoCol := singleColumnEstResults[0] * math.Sqrt(singleColumnEstResults[1])
 	if l == 2 {

From 31f6ede157b96101d315dc9410f2d88be4c3d5b9 Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Fri, 24 Jan 2025 19:04:59 -0800
Subject: [PATCH 17/23] testcase2

---
 pkg/planner/core/find_best_task.go                   |  2 +-
 .../r/planner/cardinality/selectivity.result         | 12 ++++++------
 .../t/planner/cardinality/selectivity.test           |  4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go
index d6f5793ddd4a8..1b9c0a88c49fb 100644
--- a/pkg/planner/core/find_best_task.go
+++ b/pkg/planner/core/find_best_task.go
@@ -768,7 +768,7 @@ func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, tableI
 	accessResult, comparable1 := util.CompareCol2Len(lhs.accessCondsColMap, rhs.accessCondsColMap)
 	scanResult, comparable2 := compareIndexBack(lhs, rhs)
 	corrResult := 0
-	if lhsPseudo == rhsPseudo {
+	if lhsPseudo == rhsPseudo && !lhsFullScan && !rhsFullScan {
 		// corrResult returns the left vs right comparison as a boolean, but also the actual ratio - which will be used in future
 		corrResult, _ = compareCorrRatio(lhs, rhs)
 	}
diff --git a/tests/integrationtest/r/planner/cardinality/selectivity.result b/tests/integrationtest/r/planner/cardinality/selectivity.result
index 89bcbb5ce9da3..c8ef8f137ee0f 100644
--- a/tests/integrationtest/r/planner/cardinality/selectivity.result
+++ b/tests/integrationtest/r/planner/cardinality/selectivity.result
@@ -1223,15 +1223,15 @@ insert into t values ('tw', 0);
 insert into t values ('tw', 0);
 insert into t values ('tw', 0);
 analyze table t all columns;
-explain select * from t where a = 'tw' and b < 0;
+explain format='brief' select * from t where a = 'tw' and b < 0;
 id	estRows	task	access object	operator info
-IndexReader_6	1.00	root		index:IndexRangeScan_5
-└─IndexRangeScan_5	1.00	cop[tikv]	table:t, index:idx(a, b)	range:["tw" -inf,"tw" 0), keep order:false
+IndexReader	1.00	root		index:IndexRangeScan
+└─IndexRangeScan	1.00	cop[tikv]	table:t, index:idx(a, b)	range:["tw" -inf,"tw" 0), keep order:false
 set @@tidb_opt_fix_control = '47400:on';
-explain select * from t where a = 'tw' and b < 0;
+explain format='brief' select * from t where a = 'tw' and b < 0;
 id	estRows	task	access object	operator info
-IndexReader_6	0.00	root		index:IndexRangeScan_5
-└─IndexRangeScan_5	0.00	cop[tikv]	table:t, index:idx(a, b)	range:["tw" -inf,"tw" 0), keep order:false
+IndexReader	0.00	root		index:IndexRangeScan
+└─IndexRangeScan	0.00	cop[tikv]	table:t, index:idx(a, b)	range:["tw" -inf,"tw" 0), keep order:false
 set @@tidb_opt_fix_control = '47400:off';
 drop table if exists t;
 create table t(id int auto_increment, kid int, pid int, primary key(id), key(kid, pid));
diff --git a/tests/integrationtest/t/planner/cardinality/selectivity.test b/tests/integrationtest/t/planner/cardinality/selectivity.test
index b865738ef56bb..a953f45a821e8 100644
--- a/tests/integrationtest/t/planner/cardinality/selectivity.test
+++ b/tests/integrationtest/t/planner/cardinality/selectivity.test
@@ -646,9 +646,9 @@ insert into t values ('tw', 0);
 insert into t values ('tw', 0);
 insert into t values ('tw', 0);
 analyze table t all columns;
-explain select * from t where a = 'tw' and b < 0;
+explain format='brief' select * from t where a = 'tw' and b < 0;
 set @@tidb_opt_fix_control = '47400:on';
-explain select * from t where a = 'tw' and b < 0;
+explain format='brief' select * from t where a = 'tw' and b < 0;
 set @@tidb_opt_fix_control = '47400:off';
 
 # TestSelectCombinedLowBound

From ed70dac45a69c98dba42af67b3b526f8e8ac0547 Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Fri, 24 Jan 2025 19:46:12 -0800
Subject: [PATCH 18/23] testcase3

---
 pkg/planner/core/find_best_task.go                   |  2 ++
 .../r/planner/cardinality/selectivity.result         | 12 ++++++------
 .../t/planner/cardinality/selectivity.test           |  4 ++--
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go
index 1b9c0a88c49fb..8bf955c1eb356 100644
--- a/pkg/planner/core/find_best_task.go
+++ b/pkg/planner/core/find_best_task.go
@@ -768,6 +768,8 @@ func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, tableI
 	accessResult, comparable1 := util.CompareCol2Len(lhs.accessCondsColMap, rhs.accessCondsColMap)
 	scanResult, comparable2 := compareIndexBack(lhs, rhs)
 	corrResult := 0
+	// corrResult focuses on comparing competing indexes with statistics - potentially those index estimates may have an
+	// implied correlation. Thus - exclude if validity of statistics between lhs & rhs or if these are full scans
 	if lhsPseudo == rhsPseudo && !lhsFullScan && !rhsFullScan {
 		// corrResult returns the left vs right comparison as a boolean, but also the actual ratio - which will be used in future
 		corrResult, _ = compareCorrRatio(lhs, rhs)
diff --git a/tests/integrationtest/r/planner/cardinality/selectivity.result b/tests/integrationtest/r/planner/cardinality/selectivity.result
index c8ef8f137ee0f..89bcbb5ce9da3 100644
--- a/tests/integrationtest/r/planner/cardinality/selectivity.result
+++ b/tests/integrationtest/r/planner/cardinality/selectivity.result
@@ -1223,15 +1223,15 @@ insert into t values ('tw', 0);
 insert into t values ('tw', 0);
 insert into t values ('tw', 0);
 analyze table t all columns;
-explain format='brief' select * from t where a = 'tw' and b < 0;
+explain select * from t where a = 'tw' and b < 0;
 id	estRows	task	access object	operator info
-IndexReader	1.00	root		index:IndexRangeScan
-└─IndexRangeScan	1.00	cop[tikv]	table:t, index:idx(a, b)	range:["tw" -inf,"tw" 0), keep order:false
+IndexReader_6	1.00	root		index:IndexRangeScan_5
+└─IndexRangeScan_5	1.00	cop[tikv]	table:t, index:idx(a, b)	range:["tw" -inf,"tw" 0), keep order:false
 set @@tidb_opt_fix_control = '47400:on';
-explain format='brief' select * from t where a = 'tw' and b < 0;
+explain select * from t where a = 'tw' and b < 0;
 id	estRows	task	access object	operator info
-IndexReader	0.00	root		index:IndexRangeScan
-└─IndexRangeScan	0.00	cop[tikv]	table:t, index:idx(a, b)	range:["tw" -inf,"tw" 0), keep order:false
+IndexReader_6	0.00	root		index:IndexRangeScan_5
+└─IndexRangeScan_5	0.00	cop[tikv]	table:t, index:idx(a, b)	range:["tw" -inf,"tw" 0), keep order:false
 set @@tidb_opt_fix_control = '47400:off';
 drop table if exists t;
 create table t(id int auto_increment, kid int, pid int, primary key(id), key(kid, pid));
diff --git a/tests/integrationtest/t/planner/cardinality/selectivity.test b/tests/integrationtest/t/planner/cardinality/selectivity.test
index a953f45a821e8..b865738ef56bb 100644
--- a/tests/integrationtest/t/planner/cardinality/selectivity.test
+++ b/tests/integrationtest/t/planner/cardinality/selectivity.test
@@ -646,9 +646,9 @@ insert into t values ('tw', 0);
 insert into t values ('tw', 0);
 insert into t values ('tw', 0);
 analyze table t all columns;
-explain format='brief' select * from t where a = 'tw' and b < 0;
+explain select * from t where a = 'tw' and b < 0;
 set @@tidb_opt_fix_control = '47400:on';
-explain format='brief' select * from t where a = 'tw' and b < 0;
+explain select * from t where a = 'tw' and b < 0;
 set @@tidb_opt_fix_control = '47400:off';
 
 # TestSelectCombinedLowBound

From 5e870f3510708fc14c7f5dee880f0cd9a34599ba Mon Sep 17 00:00:00 2001
From: 3pointer <luancheng@pingcap.com>
Date: Thu, 19 Dec 2024 10:34:00 +0800
Subject: [PATCH 19/23] compact restore: use closure to initial snapshot
 restore checkpoint (#58146)

close pingcap/tidb#58237
---
 br/pkg/restore/snap_client/client.go | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/br/pkg/restore/snap_client/client.go b/br/pkg/restore/snap_client/client.go
index 9d03c080356f5..6048f2b8dd191 100644
--- a/br/pkg/restore/snap_client/client.go
+++ b/br/pkg/restore/snap_client/client.go
@@ -78,7 +78,10 @@ const minBatchDdlSize = 1
 
 type SnapClient struct {
 	restorer restore.SstRestorer
+<<<<<<< HEAD
 	importer *SnapFileImporter
+=======
+>>>>>>> 659e3e73dd (compact restore: use closure to initial snapshot restore checkpoint (#58146))
 	// Use a closure to lazy load checkpoint runner
 	getRestorerFn func(*checkpoint.CheckpointRunner[checkpoint.RestoreKeyType, checkpoint.RestoreValueType]) restore.SstRestorer
 	// Tool clients used by SnapClient
@@ -588,7 +591,11 @@ func (rc *SnapClient) initClients(ctx context.Context, backend *backuppb.Storage
 		}
 		// Raw/Txn restore are not support checkpoint for now
 		rc.getRestorerFn = func(checkpointRunner *checkpoint.CheckpointRunner[checkpoint.RestoreKeyType, checkpoint.RestoreValueType]) restore.SstRestorer {
+<<<<<<< HEAD
 			return restore.NewSimpleSstRestorer(ctx, rc.importer, rc.workerPool, nil)
+=======
+			return restore.NewSimpleSstRestorer(ctx, fileImporter, rc.workerPool, nil)
+>>>>>>> 659e3e73dd (compact restore: use closure to initial snapshot restore checkpoint (#58146))
 		}
 	} else {
 		// or create a fileImporter with the cluster API version
@@ -598,7 +605,11 @@ func (rc *SnapClient) initClients(ctx context.Context, backend *backuppb.Storage
 			return errors.Trace(err)
 		}
 		rc.getRestorerFn = func(checkpointRunner *checkpoint.CheckpointRunner[checkpoint.RestoreKeyType, checkpoint.RestoreValueType]) restore.SstRestorer {
+<<<<<<< HEAD
 			return restore.NewMultiTablesRestorer(ctx, rc.importer, rc.workerPool, checkpointRunner)
+=======
+			return restore.NewMultiTablesRestorer(ctx, fileImporter, rc.workerPool, checkpointRunner)
+>>>>>>> 659e3e73dd (compact restore: use closure to initial snapshot restore checkpoint (#58146))
 		}
 	}
 	return nil

From f8b5ddadc2ab482bd57a200188b5eb5b5f556e4a Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Mon, 27 Jan 2025 16:43:35 -0800
Subject: [PATCH 20/23] rebase3

---
 pkg/expression/function_traits.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pkg/expression/function_traits.go b/pkg/expression/function_traits.go
index 978b8eba79eaf..3e81380bc4a67 100644
--- a/pkg/expression/function_traits.go
+++ b/pkg/expression/function_traits.go
@@ -99,6 +99,10 @@ var IllegalFunctions4GeneratedColumns = map[string]struct{}{
 	ast.CurrentUser:          {},
 	ast.Curtime:              {},
 	ast.Database:             {},
+<<<<<<< HEAD
+=======
+	ast.Encrypt:              {},
+>>>>>>> 77866d1f46 (expression: Update generated column function restrictions (#58308))
 	ast.FoundRows:            {},
 	ast.GetLock:              {},
 	ast.GetVar:               {},

From c985de6cc6491270d9c1a709ea628b66b9002300 Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Mon, 27 Jan 2025 17:38:38 -0800
Subject: [PATCH 21/23] rebase4

---
 br/pkg/restore/snap_client/client.go | 11 -----------
 pkg/expression/function_traits.go    |  4 ----
 2 files changed, 15 deletions(-)

diff --git a/br/pkg/restore/snap_client/client.go b/br/pkg/restore/snap_client/client.go
index 6048f2b8dd191..9d03c080356f5 100644
--- a/br/pkg/restore/snap_client/client.go
+++ b/br/pkg/restore/snap_client/client.go
@@ -78,10 +78,7 @@ const minBatchDdlSize = 1
 
 type SnapClient struct {
 	restorer restore.SstRestorer
-<<<<<<< HEAD
 	importer *SnapFileImporter
-=======
->>>>>>> 659e3e73dd (compact restore: use closure to initial snapshot restore checkpoint (#58146))
 	// Use a closure to lazy load checkpoint runner
 	getRestorerFn func(*checkpoint.CheckpointRunner[checkpoint.RestoreKeyType, checkpoint.RestoreValueType]) restore.SstRestorer
 	// Tool clients used by SnapClient
@@ -591,11 +588,7 @@ func (rc *SnapClient) initClients(ctx context.Context, backend *backuppb.Storage
 		}
 		// Raw/Txn restore are not support checkpoint for now
 		rc.getRestorerFn = func(checkpointRunner *checkpoint.CheckpointRunner[checkpoint.RestoreKeyType, checkpoint.RestoreValueType]) restore.SstRestorer {
-<<<<<<< HEAD
 			return restore.NewSimpleSstRestorer(ctx, rc.importer, rc.workerPool, nil)
-=======
-			return restore.NewSimpleSstRestorer(ctx, fileImporter, rc.workerPool, nil)
->>>>>>> 659e3e73dd (compact restore: use closure to initial snapshot restore checkpoint (#58146))
 		}
 	} else {
 		// or create a fileImporter with the cluster API version
@@ -605,11 +598,7 @@ func (rc *SnapClient) initClients(ctx context.Context, backend *backuppb.Storage
 			return errors.Trace(err)
 		}
 		rc.getRestorerFn = func(checkpointRunner *checkpoint.CheckpointRunner[checkpoint.RestoreKeyType, checkpoint.RestoreValueType]) restore.SstRestorer {
-<<<<<<< HEAD
 			return restore.NewMultiTablesRestorer(ctx, rc.importer, rc.workerPool, checkpointRunner)
-=======
-			return restore.NewMultiTablesRestorer(ctx, fileImporter, rc.workerPool, checkpointRunner)
->>>>>>> 659e3e73dd (compact restore: use closure to initial snapshot restore checkpoint (#58146))
 		}
 	}
 	return nil
diff --git a/pkg/expression/function_traits.go b/pkg/expression/function_traits.go
index 3e81380bc4a67..978b8eba79eaf 100644
--- a/pkg/expression/function_traits.go
+++ b/pkg/expression/function_traits.go
@@ -99,10 +99,6 @@ var IllegalFunctions4GeneratedColumns = map[string]struct{}{
 	ast.CurrentUser:          {},
 	ast.Curtime:              {},
 	ast.Database:             {},
-<<<<<<< HEAD
-=======
-	ast.Encrypt:              {},
->>>>>>> 77866d1f46 (expression: Update generated column function restrictions (#58308))
 	ast.FoundRows:            {},
 	ast.GetLock:              {},
 	ast.GetVar:               {},

From c01fbbe830c4da125f16a725b1d0d06525f723e4 Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Fri, 31 Jan 2025 13:36:35 -0800
Subject: [PATCH 22/23] updates after unit test

---
 pkg/planner/cardinality/selectivity.go |  4 +--
 pkg/planner/core/find_best_task.go     | 42 +++++++++++++++++++++-----
 pkg/planner/core/stats.go              |  3 ++
 3 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/pkg/planner/cardinality/selectivity.go b/pkg/planner/cardinality/selectivity.go
index 4a42857b1029b..11a6146582b5b 100644
--- a/pkg/planner/cardinality/selectivity.go
+++ b/pkg/planner/cardinality/selectivity.go
@@ -546,8 +546,8 @@ type StatsNode struct {
 	// Selectivity indicates the Selectivity of this column/index.
 	Selectivity float64
 	// CorrSelectivity indicates the Selectivity of this column/index with correlated column.
-	// That is - it is the selectivity assuming the most filtering column only, and all other
-	// columns are uncorrelated.
+	// That is - it is the selectivity assuming the most filtering index column only, and all other
+	// columns are correlated with this column.
 	CorrSelectivity float64
 	// numCols is the number of columns contained in the index or column(which is always 1).
 	numCols int
diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go
index 8fd569e1d1fad..0d5862af84602 100644
--- a/pkg/planner/core/find_best_task.go
+++ b/pkg/planner/core/find_best_task.go
@@ -710,15 +710,42 @@ func compareGlobalIndex(lhs, rhs *candidatePath) int {
 
 func compareCorrRatio(lhs, rhs *candidatePath) (int, float64) {
 	lhsCorrRatio, rhsCorrRatio := 0.0, 0.0
+	// CorrCountAfterAccess tracks the "CountAfterAccess" only including the most selective index column, thus
+	// lhs/rhsCorrRatio represents the "risk" of the CountAfterAccess value - lower value means less risk that
+	// we do NOT know about actual correlation between indexed columns
 	if lhs.path.CorrCountAfterAccess > 0 || rhs.path.CorrCountAfterAccess > 0 {
 		lhsCorrRatio = lhs.path.CorrCountAfterAccess / lhs.path.CountAfterAccess
 		rhsCorrRatio = rhs.path.CorrCountAfterAccess / rhs.path.CountAfterAccess
 	}
+	// rhs has lower index selectivity and lower risk
+	if rhs.path.CountAfterAccess < lhs.path.CountAfterAccess && rhsCorrRatio < lhsCorrRatio {
+		return -1, lhsCorrRatio
+	}
+	// lhs has lower risk
 	if lhsCorrRatio < rhsCorrRatio {
-		return 1, lhsCorrRatio
+		// And lhs has lower index selectivity
+		if lhs.path.CountAfterAccess < rhs.path.CountAfterAccess {
+			return 1, lhsCorrRatio
+		}
+		// Add 10% of the difference between correlated and actual and compare
+		rhsAdjustCount := rhs.path.CountAfterAccess + ((rhs.path.CorrCountAfterAccess - rhs.path.CountAfterAccess) * 0.1)
+		if (lhs.path.CountAfterAccess < 10 || lhs.path.CountAfterAccess < (rhs.path.CountAfterAccess*10)) &&
+			lhs.path.CorrCountAfterAccess < rhsAdjustCount {
+			return 1, lhsCorrRatio
+		}
 	}
+	// rhs has lower risk
 	if rhsCorrRatio < lhsCorrRatio {
-		return -1, rhsCorrRatio
+		// And rhs has lower index selectivity
+		if rhs.path.CountAfterAccess < lhs.path.CountAfterAccess {
+			return -1, rhsCorrRatio
+		}
+		// Add 10% of the difference between correlated and actual and compare
+		lhsAdjustCount := lhs.path.CountAfterAccess + ((lhs.path.CorrCountAfterAccess - lhs.path.CountAfterAccess) * 0.1)
+		if (rhs.path.CountAfterAccess < 10 || rhs.path.CountAfterAccess < (lhs.path.CountAfterAccess*10)) &&
+			rhs.path.CorrCountAfterAccess < lhsAdjustCount {
+			return -1, rhsCorrRatio
+		}
 	}
 	return 0, 0
 }
@@ -819,23 +846,24 @@ func compareCandidates(sctx base.PlanContext, statsTbl *statistics.Table, tableI
 		}
 	}
 
-	// Below compares the two candidate paths on four dimensions:
+	// Below compares the two candidate paths on multiple dimensions:
 	// (1): the set of columns that occurred in the access condition,
 	// (2): does it require a double scan,
 	// (3): whether or not it matches the physical property,
 	// (4): it's a global index path or not.
+	// (5): whether it's correlation ration indicates that it has high risk in it's index scan estimate
 	// If `x` is not worse than `y` at all factors,
 	// and there exists one factor that `x` is better than `y`, then `x` is better than `y`.
-	if !comparable1 {
+	if !comparable1 && sum == 0 {
 		return 0, false // No winner (0). Do not return the pseudo result
 	}
-	if !comparable2 {
+	if !comparable2 && sum == 0 {
 		return 0, false // No winner (0). Do not return the pseudo result
 	}
-	if accessResult >= 0 && scanResult >= 0 && matchResult >= 0 && globalResult >= 0 && sum > 0 {
+	if accessResult >= 0 && scanResult >= 0 && matchResult >= 0 && globalResult >= 0 && corrResult >= 0 && sum > 0 {
 		return 1, lhsPseudo // left wins - also return whether it has statistics (pseudo) or not
 	}
-	if accessResult <= 0 && scanResult <= 0 && matchResult <= 0 && globalResult <= 0 && sum < 0 {
+	if accessResult <= 0 && scanResult <= 0 && matchResult <= 0 && globalResult <= 0 && corrResult <= 0 && sum < 0 {
 		return -1, rhsPseudo // right wins - also return whether it has statistics (pseudo) or not
 	}
 	return 0, false // No winner (0). Do not return the pseudo result
diff --git a/pkg/planner/core/stats.go b/pkg/planner/core/stats.go
index 15bf9252d738a..2fad34d84f185 100644
--- a/pkg/planner/core/stats.go
+++ b/pkg/planner/core/stats.go
@@ -405,6 +405,9 @@ func detachCondAndBuildRangeForPath(
 		}
 	}
 	path.CountAfterAccess, path.CorrCountAfterAccess, err = cardinality.GetRowCountByIndexRanges(sctx, histColl, path.Index.ID, path.Ranges)
+	if path.CorrCountAfterAccess == 0 {
+		path.CorrCountAfterAccess = path.CountAfterAccess
+	}
 	return err
 }
 

From ef79ba4a682fedeb28c0e4ebc8d59743ed6d742e Mon Sep 17 00:00:00 2001
From: tpp <terry.purcell@pingcap.com>
Date: Fri, 31 Jan 2025 14:55:42 -0800
Subject: [PATCH 23/23] testcase1

---
 pkg/planner/core/find_best_task.go | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/pkg/planner/core/find_best_task.go b/pkg/planner/core/find_best_task.go
index 0d5862af84602..7d9826b7965cd 100644
--- a/pkg/planner/core/find_best_task.go
+++ b/pkg/planner/core/find_best_task.go
@@ -717,12 +717,8 @@ func compareCorrRatio(lhs, rhs *candidatePath) (int, float64) {
 		lhsCorrRatio = lhs.path.CorrCountAfterAccess / lhs.path.CountAfterAccess
 		rhsCorrRatio = rhs.path.CorrCountAfterAccess / rhs.path.CountAfterAccess
 	}
-	// rhs has lower index selectivity and lower risk
-	if rhs.path.CountAfterAccess < lhs.path.CountAfterAccess && rhsCorrRatio < lhsCorrRatio {
-		return -1, lhsCorrRatio
-	}
 	// lhs has lower risk
-	if lhsCorrRatio < rhsCorrRatio {
+	if lhsCorrRatio < rhsCorrRatio && len(lhs.path.TableFilters) <= len(rhs.path.TableFilters) {
 		// And lhs has lower index selectivity
 		if lhs.path.CountAfterAccess < rhs.path.CountAfterAccess {
 			return 1, lhsCorrRatio
@@ -735,7 +731,7 @@ func compareCorrRatio(lhs, rhs *candidatePath) (int, float64) {
 		}
 	}
 	// rhs has lower risk
-	if rhsCorrRatio < lhsCorrRatio {
+	if rhsCorrRatio < lhsCorrRatio && len(rhs.path.TableFilters) <= len(lhs.path.TableFilters) {
 		// And rhs has lower index selectivity
 		if rhs.path.CountAfterAccess < lhs.path.CountAfterAccess {
 			return -1, rhsCorrRatio