Skip to content

Commit

Permalink
core/region: optimize the efficiency of random regions selecting (#8205)
Browse files Browse the repository at this point in the history
ref #7897

Optimize the efficiency of random regions selecting.

Signed-off-by: JmPotato <[email protected]>

Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com>
  • Loading branch information
JmPotato and ti-chi-bot[bot] authored May 22, 2024
1 parent b871b57 commit 0056569
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 121 deletions.
39 changes: 8 additions & 31 deletions pkg/core/region.go
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,11 @@ func (r *RegionInfo) isRegionRecreated() bool {
return r.GetRegionEpoch().GetVersion() == 1 && r.GetRegionEpoch().GetConfVer() == 1 && (len(r.GetStartKey()) != 0 || len(r.GetEndKey()) != 0)
}

func (r *RegionInfo) Contains(key []byte) bool {
start, end := r.GetStartKey(), r.GetEndKey()
return bytes.Compare(key, start) >= 0 && (len(end) == 0 || bytes.Compare(key, end) < 0)
}

// RegionGuideFunc is a function that determines which follow-up operations need to be performed based on the origin
// and new region information.
type RegionGuideFunc func(ctx *MetaProcessContext, region, origin *RegionInfo) (saveKV, saveCache, needSync, retained bool)
Expand Down Expand Up @@ -1673,25 +1678,18 @@ func (r *RegionsInfo) GetStoreWitnessCount(storeID uint64) int {
return r.witnesses[storeID].length()
}

// RandPendingRegion randomly gets a store's region with a pending peer.
func (r *RegionsInfo) RandPendingRegion(storeID uint64, ranges []KeyRange) *RegionInfo {
r.st.RLock()
defer r.st.RUnlock()
return r.pendingPeers[storeID].RandomRegion(ranges)
}

// RandPendingRegions randomly gets a store's n regions with a pending peer.
func (r *RegionsInfo) RandPendingRegions(storeID uint64, ranges []KeyRange) []*RegionInfo {
r.st.RLock()
defer r.st.RUnlock()
return r.pendingPeers[storeID].RandomRegions(randomRegionMaxRetry, ranges)
}

// RandLeaderRegion randomly gets a store's leader region.
func (r *RegionsInfo) RandLeaderRegion(storeID uint64, ranges []KeyRange) *RegionInfo {
// This function is used for test only.
func (r *RegionsInfo) randLeaderRegion(storeID uint64, ranges []KeyRange) {
r.st.RLock()
defer r.st.RUnlock()
return r.leaders[storeID].RandomRegion(ranges)
_ = r.leaders[storeID].randomRegion(ranges)
}

// RandLeaderRegions randomly gets a store's n leader regions.
Expand All @@ -1701,41 +1699,20 @@ func (r *RegionsInfo) RandLeaderRegions(storeID uint64, ranges []KeyRange) []*Re
return r.leaders[storeID].RandomRegions(randomRegionMaxRetry, ranges)
}

// RandFollowerRegion randomly gets a store's follower region.
func (r *RegionsInfo) RandFollowerRegion(storeID uint64, ranges []KeyRange) *RegionInfo {
r.st.RLock()
defer r.st.RUnlock()
return r.followers[storeID].RandomRegion(ranges)
}

// RandFollowerRegions randomly gets a store's n follower regions.
func (r *RegionsInfo) RandFollowerRegions(storeID uint64, ranges []KeyRange) []*RegionInfo {
r.st.RLock()
defer r.st.RUnlock()
return r.followers[storeID].RandomRegions(randomRegionMaxRetry, ranges)
}

// RandLearnerRegion randomly gets a store's learner region.
func (r *RegionsInfo) RandLearnerRegion(storeID uint64, ranges []KeyRange) *RegionInfo {
r.st.RLock()
defer r.st.RUnlock()
return r.learners[storeID].RandomRegion(ranges)
}

// RandLearnerRegions randomly gets a store's n learner regions.
func (r *RegionsInfo) RandLearnerRegions(storeID uint64, ranges []KeyRange) []*RegionInfo {
r.st.RLock()
defer r.st.RUnlock()
return r.learners[storeID].RandomRegions(randomRegionMaxRetry, ranges)
}

// RandWitnessRegion randomly gets a store's witness region.
func (r *RegionsInfo) RandWitnessRegion(storeID uint64, ranges []KeyRange) *RegionInfo {
r.st.RLock()
defer r.st.RUnlock()
return r.witnesses[storeID].RandomRegion(ranges)
}

// RandWitnessRegions randomly gets a store's n witness regions.
func (r *RegionsInfo) RandWitnessRegions(storeID uint64, ranges []KeyRange) []*RegionInfo {
r.st.RLock()
Expand Down
58 changes: 43 additions & 15 deletions pkg/core/region_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -642,21 +642,49 @@ func BenchmarkUpdateBuckets(b *testing.B) {
}

func BenchmarkRandomRegion(b *testing.B) {
regions := NewRegionsInfo()
for i := 0; i < 5000000; i++ {
peer := &metapb.Peer{StoreId: 1, Id: uint64(i + 1)}
region := NewRegionInfo(&metapb.Region{
Id: uint64(i + 1),
Peers: []*metapb.Peer{peer},
StartKey: []byte(fmt.Sprintf("%20d", i)),
EndKey: []byte(fmt.Sprintf("%20d", i+1)),
}, peer)
origin, overlaps, rangeChanged := regions.SetRegion(region)
regions.UpdateSubTree(region, origin, overlaps, rangeChanged)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
regions.RandLeaderRegion(1, nil)
for _, size := range []int{10, 100, 1000, 10000, 100000, 1000000, 10000000} {
regions := NewRegionsInfo()
for i := 0; i < size; i++ {
peer := &metapb.Peer{StoreId: 1, Id: uint64(i + 1)}
region := NewRegionInfo(&metapb.Region{
Id: uint64(i + 1),
Peers: []*metapb.Peer{peer},
StartKey: []byte(fmt.Sprintf("%20d", i)),
EndKey: []byte(fmt.Sprintf("%20d", i+1)),
}, peer)
origin, overlaps, rangeChanged := regions.SetRegion(region)
regions.UpdateSubTree(region, origin, overlaps, rangeChanged)
}
b.Run(fmt.Sprintf("random region whole range with size %d", size), func(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
regions.randLeaderRegion(1, nil)
}
})
b.Run(fmt.Sprintf("random regions whole range with size %d", size), func(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
regions.RandLeaderRegions(1, nil)
}
})
ranges := []KeyRange{
NewKeyRange(fmt.Sprintf("%20d", 0), fmt.Sprintf("%20d", size/4)),
NewKeyRange(fmt.Sprintf("%20d", size/4), fmt.Sprintf("%20d", size/2)),
NewKeyRange(fmt.Sprintf("%20d", size/2), fmt.Sprintf("%20d", size*3/4)),
NewKeyRange(fmt.Sprintf("%20d", size*3/4), fmt.Sprintf("%20d", size)),
}
b.Run(fmt.Sprintf("random region given ranges with size %d", size), func(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
regions.randLeaderRegion(1, ranges)
}
})
b.Run(fmt.Sprintf("random regions given ranges with size %d", size), func(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
regions.RandLeaderRegions(1, ranges)
}
})
}
}

Expand Down
137 changes: 85 additions & 52 deletions pkg/core/region_tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,6 @@ func (r *regionItem) Less(other *regionItem) bool {
return bytes.Compare(left, right) < 0
}

func (r *regionItem) Contains(key []byte) bool {
start, end := r.GetStartKey(), r.GetEndKey()
return bytes.Compare(key, start) >= 0 && (len(end) == 0 || bytes.Compare(key, end) < 0)
}

const (
defaultBTreeDegree = 64
)
Expand Down Expand Up @@ -328,62 +323,100 @@ func (t *regionTree) getAdjacentItem(item *regionItem) (prev *regionItem, next *
return prev, next
}

// RandomRegion is used to get a random region within ranges.
func (t *regionTree) RandomRegion(ranges []KeyRange) *RegionInfo {
if t.length() == 0 {
func (t *regionTree) randomRegion(ranges []KeyRange) *RegionInfo {
regions := t.RandomRegions(1, ranges)
if len(regions) == 0 {
return nil
}
return regions[0]
}

if len(ranges) == 0 {
ranges = []KeyRange{NewKeyRange("", "")}
// RandomRegions get n random regions within the given ranges.
func (t *regionTree) RandomRegions(n int, ranges []KeyRange) []*RegionInfo {
treeLen := t.length()
if treeLen == 0 || n < 1 {
return nil
}

for _, i := range rand.Perm(len(ranges)) {
var endIndex int
startKey, endKey := ranges[i].StartKey, ranges[i].EndKey
startRegion, startIndex := t.tree.GetWithIndex(&regionItem{RegionInfo: &RegionInfo{meta: &metapb.Region{StartKey: startKey}}})

if len(endKey) != 0 {
_, endIndex = t.tree.GetWithIndex(&regionItem{RegionInfo: &RegionInfo{meta: &metapb.Region{StartKey: endKey}}})
} else {
endIndex = t.tree.Len()
// Pre-allocate the variables to reduce the temporary memory allocations.
var (
startKey, endKey []byte
startIndex, endIndex, randIndex int
startItem *regionItem
pivotItem = &regionItem{&RegionInfo{meta: &metapb.Region{}}}
region *RegionInfo
regions = make([]*RegionInfo, 0, n)
rangeLen, curLen = len(ranges), len(regions)
// setStartEndIndices is a helper function to set `startIndex` and `endIndex`
// according to the `startKey` and `endKey`.
// TODO: maybe we could cache the `startIndex` and `endIndex` for each range.
setStartEndIndices = func() {
pivotItem.meta.StartKey = startKey
startItem, startIndex = t.tree.GetWithIndex(pivotItem)
if len(endKey) != 0 {
pivotItem.meta.StartKey = endKey
_, endIndex = t.tree.GetWithIndex(pivotItem)
} else {
endIndex = treeLen
}
// Consider that the item in the tree may not be continuous,
// we need to check if the previous item contains the key.
if startIndex != 0 && startItem == nil {
region = t.tree.GetAt(startIndex - 1).RegionInfo
if region.Contains(startKey) {
startIndex--
}
}
}

// Consider that the item in the tree may not be continuous,
// we need to check if the previous item contains the key.
if startIndex != 0 && startRegion == nil && t.tree.GetAt(startIndex-1).Contains(startKey) {
startIndex--
)
// If no ranges specified, select randomly from the whole tree.
// This is a fast path to reduce the unnecessary iterations.
if rangeLen == 0 {
startKey, endKey = []byte(""), []byte("")
setStartEndIndices()
for curLen < n {
randIndex = rand.Intn(endIndex-startIndex) + startIndex
region = t.tree.GetAt(randIndex).RegionInfo
if region.isInvolved(startKey, endKey) {
regions = append(regions, region)
curLen++
}
// No region found, directly break to avoid infinite loop.
if curLen == 0 {
break
}
}
return regions
}
// When there are multiple ranges provided,
// keep retrying until we get enough regions.
for curLen < n {
// Shuffle the ranges to increase the randomness.
for _, i := range rand.Perm(rangeLen) {
startKey, endKey = ranges[i].StartKey, ranges[i].EndKey
setStartEndIndices()
if endIndex <= startIndex {
if len(endKey) > 0 && bytes.Compare(startKey, endKey) > 0 {
log.Error("wrong range keys",
logutil.ZapRedactString("start-key", string(HexRegionKey(startKey))),
logutil.ZapRedactString("end-key", string(HexRegionKey(endKey))),
errs.ZapError(errs.ErrWrongRangeKeys))
}
continue
}

if endIndex <= startIndex {
if len(endKey) > 0 && bytes.Compare(startKey, endKey) > 0 {
log.Error("wrong range keys",
logutil.ZapRedactString("start-key", string(HexRegionKey(startKey))),
logutil.ZapRedactString("end-key", string(HexRegionKey(endKey))),
errs.ZapError(errs.ErrWrongRangeKeys))
randIndex = rand.Intn(endIndex-startIndex) + startIndex
region = t.tree.GetAt(randIndex).RegionInfo
if region.isInvolved(startKey, endKey) {
regions = append(regions, region)
curLen++
if curLen == n {
return regions
}
}
continue
}
index := rand.Intn(endIndex-startIndex) + startIndex
region := t.tree.GetAt(index).RegionInfo
if region.isInvolved(startKey, endKey) {
return region
}
}

return nil
}

func (t *regionTree) RandomRegions(n int, ranges []KeyRange) []*RegionInfo {
if t.length() == 0 {
return nil
}

regions := make([]*RegionInfo, 0, n)

for i := 0; i < n; i++ {
if region := t.RandomRegion(ranges); region != nil {
regions = append(regions, region)
// No region found, directly break to avoid infinite loop.
if curLen == 0 {
break
}
}
return regions
Expand Down
Loading

0 comments on commit 0056569

Please sign in to comment.