From 4cd42b3cb8e7ae32c4d15d6fab7d7ab0552facf2 Mon Sep 17 00:00:00 2001 From: JmPotato Date: Thu, 23 May 2024 17:40:19 +0800 Subject: [PATCH] core/region: optimize the logic of randomly selecting regions within a single range (#8211) ref tikv/pd#7897 In the case of only one key range, follow the fast path logic to avoid unnecessary random and loop operations. Signed-off-by: JmPotato --- pkg/core/region_test.go | 19 +++++++++-- pkg/core/region_tree.go | 61 ++++++++++++++++++++++-------------- pkg/core/region_tree_test.go | 6 ++++ 3 files changed, 61 insertions(+), 25 deletions(-) diff --git a/pkg/core/region_test.go b/pkg/core/region_test.go index 8956bd8a357..aaf440eeeea 100644 --- a/pkg/core/region_test.go +++ b/pkg/core/region_test.go @@ -668,18 +668,33 @@ func BenchmarkRandomRegion(b *testing.B) { } }) ranges := []KeyRange{ + NewKeyRange(fmt.Sprintf("%20d", size/4), fmt.Sprintf("%20d", size*3/4)), + } + b.Run(fmt.Sprintf("random region single range with size %d", size), func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + regions.randLeaderRegion(1, ranges) + } + }) + b.Run(fmt.Sprintf("random regions single range with size %d", size), func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + regions.RandLeaderRegions(1, ranges) + } + }) + ranges = []KeyRange{ NewKeyRange(fmt.Sprintf("%20d", 0), fmt.Sprintf("%20d", size/4)), NewKeyRange(fmt.Sprintf("%20d", size/4), fmt.Sprintf("%20d", size/2)), NewKeyRange(fmt.Sprintf("%20d", size/2), fmt.Sprintf("%20d", size*3/4)), NewKeyRange(fmt.Sprintf("%20d", size*3/4), fmt.Sprintf("%20d", size)), } - b.Run(fmt.Sprintf("random region given ranges with size %d", size), func(b *testing.B) { + b.Run(fmt.Sprintf("random region multiple ranges with size %d", size), func(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { regions.randLeaderRegion(1, ranges) } }) - b.Run(fmt.Sprintf("random regions given ranges with size %d", size), func(b *testing.B) { + b.Run(fmt.Sprintf("random regions multiple ranges with size %d", size), func(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { regions.RandLeaderRegions(1, ranges) diff --git a/pkg/core/region_tree.go b/pkg/core/region_tree.go index e6d05d443a1..d4ef4a880fc 100644 --- a/pkg/core/region_tree.go +++ b/pkg/core/region_tree.go @@ -339,20 +339,28 @@ func (t *regionTree) RandomRegions(n int, ranges []KeyRange) []*RegionInfo { } // Pre-allocate the variables to reduce the temporary memory allocations. var ( - startKey, endKey []byte - startIndex, endIndex, randIndex int - startItem *regionItem - pivotItem = ®ionItem{&RegionInfo{meta: &metapb.Region{}}} - region *RegionInfo - regions = make([]*RegionInfo, 0, n) - rangeLen, curLen = len(ranges), len(regions) + startKey, endKey []byte + // By default, we set the `startIndex` and `endIndex` to the whole tree range. + startIndex, endIndex = 0, treeLen + randIndex int + startItem *regionItem + pivotItem = ®ionItem{&RegionInfo{meta: &metapb.Region{}}} + region *RegionInfo + regions = make([]*RegionInfo, 0, n) + rangeLen, curLen = len(ranges), len(regions) // setStartEndIndices is a helper function to set `startIndex` and `endIndex` - // according to the `startKey` and `endKey`. + // according to the `startKey` and `endKey` and check if the range is invalid + // to skip the iteration. // TODO: maybe we could cache the `startIndex` and `endIndex` for each range. - setStartEndIndices = func() { + setAndCheckStartEndIndices = func() (skip bool) { + startKeyLen, endKeyLen := len(startKey), len(endKey) + if startKeyLen == 0 && endKeyLen == 0 { + startIndex, endIndex = 0, treeLen + return false + } pivotItem.meta.StartKey = startKey startItem, startIndex = t.tree.GetWithIndex(pivotItem) - if len(endKey) != 0 { + if endKeyLen > 0 { pivotItem.meta.StartKey = endKey _, endIndex = t.tree.GetWithIndex(pivotItem) } else { @@ -366,13 +374,27 @@ func (t *regionTree) RandomRegions(n int, ranges []KeyRange) []*RegionInfo { startIndex-- } } + // Check whether the `startIndex` and `endIndex` are valid. + if endIndex <= startIndex { + if endKeyLen > 0 && bytes.Compare(startKey, endKey) > 0 { + log.Error("wrong range keys", + logutil.ZapRedactString("start-key", string(HexRegionKey(startKey))), + logutil.ZapRedactString("end-key", string(HexRegionKey(endKey))), + errs.ZapError(errs.ErrWrongRangeKeys)) + } + return true + } + return false } ) - // If no ranges specified, select randomly from the whole tree. - // This is a fast path to reduce the unnecessary iterations. - if rangeLen == 0 { - startKey, endKey = []byte(""), []byte("") - setStartEndIndices() + // This is a fast path to reduce the unnecessary iterations when we only have one range. + if rangeLen <= 1 { + if rangeLen == 1 { + startKey, endKey = ranges[0].StartKey, ranges[0].EndKey + if setAndCheckStartEndIndices() { + return regions + } + } for curLen < n { randIndex = rand.Intn(endIndex-startIndex) + startIndex region = t.tree.GetAt(randIndex).RegionInfo @@ -393,14 +415,7 @@ func (t *regionTree) RandomRegions(n int, ranges []KeyRange) []*RegionInfo { // Shuffle the ranges to increase the randomness. for _, i := range rand.Perm(rangeLen) { startKey, endKey = ranges[i].StartKey, ranges[i].EndKey - setStartEndIndices() - if endIndex <= startIndex { - if len(endKey) > 0 && bytes.Compare(startKey, endKey) > 0 { - log.Error("wrong range keys", - logutil.ZapRedactString("start-key", string(HexRegionKey(startKey))), - logutil.ZapRedactString("end-key", string(HexRegionKey(endKey))), - errs.ZapError(errs.ErrWrongRangeKeys)) - } + if setAndCheckStartEndIndices() { continue } diff --git a/pkg/core/region_tree_test.go b/pkg/core/region_tree_test.go index a86f2f52c47..5886103191c 100644 --- a/pkg/core/region_tree_test.go +++ b/pkg/core/region_tree_test.go @@ -281,8 +281,12 @@ func TestRandomRegion(t *testing.T) { updateNewItem(tree, regionA) ra := tree.randomRegion([]KeyRange{NewKeyRange("", "")}) re.Equal(regionA, ra) + ra = tree.randomRegion(nil) + re.Equal(regionA, ra) ra2 := tree.RandomRegions(2, []KeyRange{NewKeyRange("", "")}) re.Equal([]*RegionInfo{regionA, regionA}, ra2) + ra2 = tree.RandomRegions(2, nil) + re.Equal([]*RegionInfo{regionA, regionA}, ra2) regionB := NewTestRegionInfo(2, 2, []byte("g"), []byte("n")) regionC := NewTestRegionInfo(3, 3, []byte("n"), []byte("t")) @@ -307,6 +311,7 @@ func TestRandomRegion(t *testing.T) { rf = tree.randomRegion([]KeyRange{NewKeyRange("z", "")}) re.Nil(rf) + checkRandomRegion(re, tree, []*RegionInfo{regionA, regionB, regionC, regionD}, nil) checkRandomRegion(re, tree, []*RegionInfo{regionA, regionB, regionC, regionD}, []KeyRange{NewKeyRange("", "")}) checkRandomRegion(re, tree, []*RegionInfo{regionA, regionB}, []KeyRange{NewKeyRange("", "n")}) checkRandomRegion(re, tree, []*RegionInfo{regionC, regionD}, []KeyRange{NewKeyRange("n", "")}) @@ -356,6 +361,7 @@ func TestRandomRegionDiscontinuous(t *testing.T) { rd := tree.randomRegion([]KeyRange{NewKeyRange("", "b")}) re.Equal(regionD, rd) + checkRandomRegion(re, tree, []*RegionInfo{regionA, regionB, regionC, regionD}, nil) checkRandomRegion(re, tree, []*RegionInfo{regionA, regionB, regionC, regionD}, []KeyRange{NewKeyRange("", "")}) }