Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: Recognize potential for correlation in subset index match (WIP) #58688

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions pkg/planner/cardinality/cross_estimation.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ func crossEstimateRowCount(sctx planctx.PlanContext,
if idxExists && len(idxIDs) > 0 {
idxID = idxIDs[0]
}
rangeCounts, ok := getColumnRangeCounts(sctx, colUniqueID, ranges, dsTableStats.HistColl, idxID)
rangeCounts, _, ok := getColumnRangeCounts(sctx, colUniqueID, ranges, dsTableStats.HistColl, idxID)
if !ok {
return 0, false, corr
}
Expand All @@ -166,7 +166,7 @@ func crossEstimateRowCount(sctx planctx.PlanContext,
}
var rangeCount float64
if idxExists {
rangeCount, err = GetRowCountByIndexRanges(sctx, dsTableStats.HistColl, idxID, convertedRanges)
rangeCount, _, err = GetRowCountByIndexRanges(sctx, dsTableStats.HistColl, idxID, convertedRanges)
} else {
rangeCount, err = GetRowCountByColumnRanges(sctx, dsTableStats.HistColl, colUniqueID, convertedRanges)
}
Expand All @@ -182,30 +182,30 @@ func crossEstimateRowCount(sctx planctx.PlanContext,
}

// getColumnRangeCounts estimates row count for each range respectively.
func getColumnRangeCounts(sctx planctx.PlanContext, colID int64, ranges []*ranger.Range, histColl *statistics.HistColl, idxID int64) ([]float64, bool) {
func getColumnRangeCounts(sctx planctx.PlanContext, colID int64, ranges []*ranger.Range, histColl *statistics.HistColl, idxID int64) ([]float64, float64, bool) {
var err error
var count float64
var count, corrCount float64
rangeCounts := make([]float64, len(ranges))
for i, ran := range ranges {
if idxID >= 0 {
idxHist := histColl.GetIdx(idxID)
if statistics.IndexStatsIsInvalid(sctx, idxHist, histColl, idxID) {
return nil, false
return nil, 0, false
}
count, err = GetRowCountByIndexRanges(sctx, histColl, idxID, []*ranger.Range{ran})
count, corrCount, err = GetRowCountByIndexRanges(sctx, histColl, idxID, []*ranger.Range{ran})
} else {
colHist := histColl.GetCol(colID)
if statistics.ColumnStatsIsInvalid(colHist, sctx, histColl, colID) {
return nil, false
return nil, 0, false
}
count, err = GetRowCountByColumnRanges(sctx, histColl, colID, []*ranger.Range{ran})
}
if err != nil {
return nil, false
return nil, 0, false
}
rangeCounts[i] = count
}
return rangeCounts, true
return rangeCounts, corrCount, true
}

// convertRangeFromExpectedCnt builds new ranges used to estimate row count we need to scan in table scan before finding specified
Expand Down
54 changes: 30 additions & 24 deletions pkg/planner/cardinality/row_count_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ import (
)

// GetRowCountByIndexRanges estimates the row count by a slice of Range.
func GetRowCountByIndexRanges(sctx planctx.PlanContext, coll *statistics.HistColl, idxID int64, indexRanges []*ranger.Range) (result float64, err error) {
func GetRowCountByIndexRanges(sctx planctx.PlanContext, coll *statistics.HistColl, idxID int64, indexRanges []*ranger.Range) (result float64, corrResult float64, err error) {
var name string
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugtrace.EnterContextCommon(sctx)
Expand Down Expand Up @@ -69,7 +69,7 @@ func GetRowCountByIndexRanges(sctx planctx.PlanContext, coll *statistics.HistCol
if err == nil && sc.EnableOptimizerCETrace && idx != nil {
ceTraceRange(sctx, coll.PhysicalID, colNames, indexRanges, "Index Stats-Pseudo", uint64(result))
}
return result, err
return result, 0, err
}
realtimeCnt, modifyCount := coll.GetScaledRealtimeAndModifyCnt(idx)
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
Expand All @@ -79,15 +79,16 @@ func GetRowCountByIndexRanges(sctx planctx.PlanContext, coll *statistics.HistCol
"Increase Factor", idx.GetIncreaseFactor(realtimeCnt),
)
}
corrResult = float64(0)
if idx.CMSketch != nil && idx.StatsVer == statistics.Version1 {
result, err = getIndexRowCountForStatsV1(sctx, coll, idxID, indexRanges)
} else {
result, err = getIndexRowCountForStatsV2(sctx, idx, coll, indexRanges, realtimeCnt, modifyCount)
result, corrResult, err = getIndexRowCountForStatsV2(sctx, idx, coll, indexRanges, realtimeCnt, modifyCount)
}
if sc.EnableOptimizerCETrace {
ceTraceRange(sctx, coll.PhysicalID, colNames, indexRanges, "Index Stats", uint64(result))
}
return result, errors.Trace(err)
return result, corrResult, errors.Trace(err)
}

func getIndexRowCountForStatsV1(sctx planctx.PlanContext, coll *statistics.HistColl, idxID int64, indexRanges []*ranger.Range) (float64, error) {
Expand Down Expand Up @@ -117,7 +118,7 @@ func getIndexRowCountForStatsV1(sctx planctx.PlanContext, coll *statistics.HistC
// values in this case.
if rangePosition == 0 || isSingleColIdxNullRange(idx, ran) {
realtimeCnt, modifyCount := coll.GetScaledRealtimeAndModifyCnt(idx)
count, err := getIndexRowCountForStatsV2(sctx, idx, nil, []*ranger.Range{ran}, realtimeCnt, modifyCount)
count, _, err := getIndexRowCountForStatsV2(sctx, idx, nil, []*ranger.Range{ran}, realtimeCnt, modifyCount)
if err != nil {
return 0, errors.Trace(err)
}
Expand Down Expand Up @@ -181,7 +182,7 @@ func getIndexRowCountForStatsV1(sctx planctx.PlanContext, coll *statistics.HistC
// prefer index stats over column stats
if idxIDs, ok := coll.ColUniqueID2IdxIDs[colUniqueID]; ok && len(idxIDs) > 0 {
idxID := idxIDs[0]
count, err = GetRowCountByIndexRanges(sctx, coll, idxID, []*ranger.Range{&rang})
count, _, err = GetRowCountByIndexRanges(sctx, coll, idxID, []*ranger.Range{&rang})
} else {
count, err = GetRowCountByColumnRanges(sctx, coll, colUniqueID, []*ranger.Range{&rang})
}
Expand Down Expand Up @@ -215,26 +216,26 @@ func isSingleColIdxNullRange(idx *statistics.Index, ran *ranger.Range) bool {
}

// It uses the modifyCount to validate, and realtimeRowCount to adjust the influence of modifications on the table.
func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index, coll *statistics.HistColl, indexRanges []*ranger.Range, realtimeRowCount, modifyCount int64) (float64, error) {
func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index, coll *statistics.HistColl, indexRanges []*ranger.Range, realtimeRowCount, modifyCount int64) (totalCount float64, corrCount float64, err error) {
sc := sctx.GetSessionVars().StmtCtx
debugTrace := sc.EnableOptimizerDebugTrace
if debugTrace {
debugtrace.EnterContextCommon(sctx)
defer debugtrace.LeaveContextCommon(sctx)
}
totalCount := float64(0)
isSingleColIdx := len(idx.Info.Columns) == 1
for _, indexRange := range indexRanges {
var count float64
lb, err := codec.EncodeKey(sc.TimeZone(), nil, indexRange.LowVal...)
var lb, rb []byte
lb, err = codec.EncodeKey(sc.TimeZone(), nil, indexRange.LowVal...)
err = sc.HandleError(err)
if err != nil {
return 0, err
return 0, 0, err
}
rb, err := codec.EncodeKey(sc.TimeZone(), nil, indexRange.HighVal...)
rb, err = codec.EncodeKey(sc.TimeZone(), nil, indexRange.HighVal...)
err = sc.HandleError(err)
if err != nil {
return 0, err
return 0, 0, err
}
if debugTrace {
debugTraceStartEstimateRange(sctx, indexRange, lb, rb, totalCount)
Expand Down Expand Up @@ -293,13 +294,14 @@ func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index,
// Due to the limitation of calcFraction and convertDatumToScalar, the histogram actually won't estimate anything.
// If the first column's range is point.
if rangePosition := getOrdinalOfRangeCond(sc, indexRange); rangePosition > 0 && idx.StatsVer >= statistics.Version2 && coll != nil {
var expBackoffSel float64
expBackoffSel, expBackoffSuccess, err = expBackoffEstimation(sctx, idx, coll, indexRange)
var expBackoffSel, corrSel float64
expBackoffSel, corrSel, expBackoffSuccess, err = expBackoffEstimation(sctx, idx, coll, indexRange)
if err != nil {
return 0, err
return 0, 0, err
}
if expBackoffSuccess {
expBackoffCnt := expBackoffSel * idx.TotalRowCount()
corrCnt := corrSel * idx.TotalRowCount()

upperLimit := expBackoffCnt
// Use the multi-column stats to calculate the max possible row count of [l, r)
Expand All @@ -326,6 +328,7 @@ func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index,
expBackoffCnt = upperLimit
}
count += expBackoffCnt
corrCount += corrCnt
}
}
if !expBackoffSuccess {
Expand All @@ -335,6 +338,7 @@ func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index,
// If the current table row count has changed, we should scale the row count accordingly.
increaseFactor := idx.GetIncreaseFactor(realtimeRowCount)
count *= increaseFactor
corrCount *= increaseFactor

// handling the out-of-range part
if (outOfRangeOnIndex(idx, l) && !(isSingleColIdx && lowIsNull)) || outOfRangeOnIndex(idx, r) {
Expand Down Expand Up @@ -369,7 +373,7 @@ func getIndexRowCountForStatsV2(sctx planctx.PlanContext, idx *statistics.Index,
// Don't allow the final result to go below 1 row
totalCount = mathutil.Clamp(totalCount, 1, float64(realtimeRowCount))
}
return totalCount, nil
return totalCount, corrCount, nil
}

var nullKeyBytes, _ = codec.EncodeKey(time.UTC, nil, types.NewDatum(nil))
Expand Down Expand Up @@ -429,7 +433,7 @@ func equalRowCountOnIndex(sctx planctx.PlanContext, idx *statistics.Index, b []b
}

// expBackoffEstimation estimate the multi-col cases following the Exponential Backoff. See comment below for details.
func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll *statistics.HistColl, indexRange *ranger.Range) (sel float64, success bool, err error) {
func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll *statistics.HistColl, indexRange *ranger.Range) (sel float64, corrsel float64, success bool, err error) {
if sctx.GetSessionVars().StmtCtx.EnableOptimizerDebugTrace {
debugtrace.EnterContextCommon(sctx)
defer func() {
Expand Down Expand Up @@ -485,7 +489,7 @@ func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll
continue
}
foundStats = true
count, err = GetRowCountByIndexRanges(sctx, coll, idxID, tmpRan)
count, _, err = GetRowCountByIndexRanges(sctx, coll, idxID, tmpRan)
if err == nil {
break
}
Expand All @@ -497,7 +501,7 @@ func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll
continue
}
if err != nil {
return 0, false, err
return 0, 0, false, err
}
singleColumnEstResults = append(singleColumnEstResults, selectivity)
}
Expand All @@ -509,9 +513,9 @@ func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll
l = 0
})
if l == 1 {
return singleColumnEstResults[0], true, nil
return singleColumnEstResults[0], 0, true, nil
} else if l == 0 {
return 0, false, nil
return 0, 0, false, nil
}
// Do not allow the exponential backoff to go below the available index bound. If the number of predicates
// is less than the number of index columns - use 90% of the bound to differentiate a subset from full index match.
Expand All @@ -524,19 +528,21 @@ func expBackoffEstimation(sctx planctx.PlanContext, idx *statistics.Index, coll
if l < len(idx.Info.Columns) {
idxLowBound /= 0.9
}
// corrsel is the selectivity of the most filtering column
corrsel = min(idxLowBound, singleColumnEstResults[0])
minTwoCol := min(singleColumnEstResults[0], singleColumnEstResults[1], idxLowBound)
multTwoCol := singleColumnEstResults[0] * math.Sqrt(singleColumnEstResults[1])
if l == 2 {
return max(minTwoCol, multTwoCol), true, nil
return max(minTwoCol, multTwoCol), corrsel, true, nil
}
minThreeCol := min(minTwoCol, singleColumnEstResults[2])
multThreeCol := multTwoCol * math.Sqrt(math.Sqrt(singleColumnEstResults[2]))
if l == 3 {
return max(minThreeCol, multThreeCol), true, nil
return max(minThreeCol, multThreeCol), corrsel, true, nil
}
minFourCol := min(minThreeCol, singleColumnEstResults[3])
multFourCol := multThreeCol * math.Sqrt(math.Sqrt(math.Sqrt(singleColumnEstResults[3])))
return max(minFourCol, multFourCol), true, nil
return max(minFourCol, multFourCol), corrsel, true, nil
}

// outOfRangeOnIndex checks if the datum is out of the range.
Expand Down
8 changes: 7 additions & 1 deletion pkg/planner/cardinality/selectivity.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,18 +202,20 @@ func Selectivity(
if err != nil {
return 0, nil, errors.Trace(err)
}
cnt, err := GetRowCountByIndexRanges(ctx, coll, id, ranges)
cnt, corrCnt, err := GetRowCountByIndexRanges(ctx, coll, id, ranges)
if err != nil {
return 0, nil, errors.Trace(err)
}
selectivity := cnt / float64(coll.RealtimeCount)
corrSelectivity := corrCnt / float64(coll.RealtimeCount)
nodes = append(nodes, &StatsNode{
Tp: IndexType,
ID: id,
mask: maskCovered,
Ranges: ranges,
numCols: len(idxStats.Info.Columns),
Selectivity: selectivity,
CorrSelectivity: corrSelectivity,
partCover: partCover,
minAccessCondsForDNFCond: minAccessCondsForDNFCond,
})
Expand Down Expand Up @@ -543,6 +545,10 @@ type StatsNode struct {
mask int64
// Selectivity indicates the Selectivity of this column/index.
Selectivity float64
// CorrSelectivity indicates the Selectivity of this column/index with correlated column.
// That is - it is the selectivity assuming the most filtering column only, and all other
// columns are uncorrelated.
CorrSelectivity float64
// numCols is the number of columns contained in the index or column(which is always 1).
numCols int
// partCover indicates whether the bit in the mask is for a full cover or partial cover. It is only true
Expand Down
14 changes: 7 additions & 7 deletions pkg/planner/cardinality/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,11 +252,11 @@ func TestEstimationForUnknownValues(t *testing.T) {
require.Equal(t, 12.2, count)

idxID := table.Meta().Indices[0].ID
count, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(30, 30))
count, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(30, 30))
require.NoError(t, err)
require.Equal(t, 0.1, count)

count, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(9, 30))
count, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(9, 30))
require.NoError(t, err)
require.Equal(t, 10.0, count)

Expand Down Expand Up @@ -286,7 +286,7 @@ func TestEstimationForUnknownValues(t *testing.T) {
require.Equal(t, 1.0, count)

idxID = table.Meta().Indices[0].ID
count, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(2, 2))
count, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(2, 2))
require.NoError(t, err)
require.Equal(t, 0.0, count)
}
Expand Down Expand Up @@ -377,11 +377,11 @@ func TestEstimationUniqueKeyEqualConds(t *testing.T) {

sctx := mock.NewContext()
idxID := table.Meta().Indices[0].ID
count, err := cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(7, 7))
count, _, err := cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(7, 7))
require.NoError(t, err)
require.Equal(t, 1.0, count)

count, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(6, 6))
count, _, err = cardinality.GetRowCountByIndexRanges(sctx, &statsTbl.HistColl, idxID, getRange(6, 6))
require.NoError(t, err)
require.Equal(t, 1.0, count)

Expand Down Expand Up @@ -1011,12 +1011,12 @@ func TestIssue39593(t *testing.T) {
sctx := testKit.Session()
idxID := tblInfo.Indices[0].ID
vals := []int64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}
count, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRanges(vals, vals))
count, _, err := cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRanges(vals, vals))
require.NoError(t, err)
// estimated row count without any changes
require.Equal(t, float64(360), count)
statsTbl.RealtimeCount *= 10
count, err = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRanges(vals, vals))
count, _, err = cardinality.GetRowCountByIndexRanges(sctx.GetPlanCtx(), &statsTbl.HistColl, idxID, getRanges(vals, vals))
require.NoError(t, err)
// estimated row count after mock modify on the table
require.Equal(t, float64(3600), count)
Expand Down
16 changes: 9 additions & 7 deletions pkg/planner/core/debugtrace.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,13 +227,14 @@ func stabilizeGetStatsTblInfo(info *getStatsTblInfo) {
*/

type accessPathForDebugTrace struct {
IndexName string `json:",omitempty"`
AccessConditions []string
IndexFilters []string
TableFilters []string
PartialPaths []accessPathForDebugTrace `json:",omitempty"`
CountAfterAccess float64
CountAfterIndex float64
IndexName string `json:",omitempty"`
AccessConditions []string
IndexFilters []string
TableFilters []string
PartialPaths []accessPathForDebugTrace `json:",omitempty"`
CountAfterAccess float64
CorrCountAfterAccess float64
CountAfterIndex float64
}

func convertAccessPathForDebugTrace(ctx expression.EvalContext, path *util.AccessPath, out *accessPathForDebugTrace) {
Expand All @@ -244,6 +245,7 @@ func convertAccessPathForDebugTrace(ctx expression.EvalContext, path *util.Acces
out.IndexFilters = expression.ExprsToStringsForDisplay(ctx, path.IndexFilters)
out.TableFilters = expression.ExprsToStringsForDisplay(ctx, path.TableFilters)
out.CountAfterAccess = path.CountAfterAccess
out.CorrCountAfterAccess = path.CorrCountAfterAccess
out.CountAfterIndex = path.CountAfterIndex
out.PartialPaths = make([]accessPathForDebugTrace, len(path.PartialIndexPaths))
for i, partialPath := range path.PartialIndexPaths {
Expand Down
9 changes: 5 additions & 4 deletions pkg/planner/core/exhaust_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -1347,10 +1347,11 @@ func constructInnerIndexScanTask(
rowCount = math.Min(rowCount, 1.0)
}
tmpPath := &util.AccessPath{
IndexFilters: indexConds,
TableFilters: tblConds,
CountAfterIndex: rowCount,
CountAfterAccess: rowCount,
IndexFilters: indexConds,
TableFilters: tblConds,
CountAfterIndex: rowCount,
CountAfterAccess: rowCount,
CorrCountAfterAccess: 0,
}
// Assume equal conditions used by index join and other conditions are independent.
if len(tblConds) > 0 {
Expand Down
Loading