From 1cfc300459750bf5c34447e10b07d7f5abc41af8 Mon Sep 17 00:00:00 2001 From: lhy1024 Date: Wed, 30 Oct 2024 11:22:07 +0800 Subject: [PATCH] checker, statistic: avoid leak in label statistic (#8703) #8751 Signed-off-by: lhy1024 --- pkg/statistics/region_collection.go | 24 +++++++++++++++---- server/cluster/cluster.go | 3 ++- server/cluster/cluster_test.go | 36 ++++++++++++++++++++++++++--- 3 files changed, 54 insertions(+), 9 deletions(-) diff --git a/pkg/statistics/region_collection.go b/pkg/statistics/region_collection.go index b6187bf0a68..076044008c4 100644 --- a/pkg/statistics/region_collection.go +++ b/pkg/statistics/region_collection.go @@ -359,6 +359,7 @@ type LabelStatistics struct { sync.RWMutex regionLabelStats map[uint64]string labelCounter map[string]int + defunctRegions map[uint64]struct{} } // NewLabelStatistics creates a new LabelStatistics. @@ -366,6 +367,7 @@ func NewLabelStatistics() *LabelStatistics { return &LabelStatistics{ regionLabelStats: make(map[uint64]string), labelCounter: make(map[string]int), + defunctRegions: make(map[uint64]struct{}), } } @@ -399,14 +401,26 @@ func (l *LabelStatistics) Reset() { regionLabelLevelGauge.Reset() } -// ClearDefunctRegion is used to handle the overlap region. -func (l *LabelStatistics) ClearDefunctRegion(regionID uint64) { +// MarkDefunctRegion is used to handle the overlap region. +// It is used to mark the region as defunct and remove it from the label statistics later. +func (l *LabelStatistics) MarkDefunctRegion(regionID uint64) { l.Lock() defer l.Unlock() - if label, ok := l.regionLabelStats[regionID]; ok { - l.labelCounter[label]-- - delete(l.regionLabelStats, regionID) + l.defunctRegions[regionID] = struct{}{} +} + +// ClearDefunctRegions is used to handle the overlap region. +// It is used to remove the defunct regions from the label statistics. +func (l *LabelStatistics) ClearDefunctRegions() { + l.Lock() + defer l.Unlock() + for regionID := range l.defunctRegions { + if label, ok := l.regionLabelStats[regionID]; ok { + l.labelCounter[label]-- + delete(l.regionLabelStats, regionID) + } } + l.defunctRegions = make(map[uint64]struct{}) } // GetLabelCounter is only used for tests. diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index 3ed7718f69a..0a5db2e91fc 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -1058,7 +1058,7 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error { if c.regionStats != nil { c.regionStats.ClearDefunctRegion(item.GetID()) } - c.labelLevelStats.ClearDefunctRegion(item.GetID()) + c.labelLevelStats.MarkDefunctRegion(item.GetID()) c.ruleManager.InvalidCache(item.GetID()) } regionUpdateCacheEventCounter.Inc() @@ -2167,6 +2167,7 @@ func (c *RaftCluster) updateRegionsLabelLevelStats(regions []*core.RegionInfo) { for _, region := range regions { c.labelLevelStats.Observe(region, c.getStoresWithoutLabelLocked(region, core.EngineKey, core.EngineTiFlash), c.opt.GetLocationLabels()) } + c.labelLevelStats.ClearDefunctRegions() } func (c *RaftCluster) getRegionStoresLocked(region *core.RegionInfo) []*core.StoreInfo { diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index f7ff71b888d..0b4ad9a95b8 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -1093,6 +1093,7 @@ func TestRegionLabelIsolationLevel(t *testing.T) { opt.SetReplicationConfig(cfg) re.NoError(err) cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster.coordinator = newCoordinator(ctx, cluster, nil) for i := uint64(1); i <= 4; i++ { var labels []*metapb.StoreLabel @@ -1127,13 +1128,42 @@ func TestRegionLabelIsolationLevel(t *testing.T) { StartKey: []byte{byte(1)}, EndKey: []byte{byte(2)}, } - r := core.NewRegionInfo(region, peers[0]) - re.NoError(cluster.putRegion(r)) + r1 := core.NewRegionInfo(region, peers[0]) + re.NoError(cluster.putRegion(r1)) - cluster.updateRegionsLabelLevelStats([]*core.RegionInfo{r}) + cluster.updateRegionsLabelLevelStats([]*core.RegionInfo{r1}) counter := cluster.labelLevelStats.GetLabelCounter() re.Equal(0, counter["none"]) re.Equal(1, counter["zone"]) + + region = &metapb.Region{ + Id: 10, + Peers: peers, + StartKey: []byte{byte(2)}, + EndKey: []byte{byte(3)}, + } + r2 := core.NewRegionInfo(region, peers[0]) + re.NoError(cluster.putRegion(r2)) + + cluster.updateRegionsLabelLevelStats([]*core.RegionInfo{r2}) + counter = cluster.labelLevelStats.GetLabelCounter() + re.Equal(0, counter["none"]) + re.Equal(2, counter["zone"]) + + // issue: https://github.com/tikv/pd/issues/8700 + // step1: heartbeat a overlap region, which is used to simulate the case that the region is merged. + // step2: update region 9 and region 10, which is used to simulate the case that patrol is triggered. + // We should only count region 9. + overlapRegion := r1.Clone( + core.WithStartKey(r1.GetStartKey()), + core.WithEndKey(r2.GetEndKey()), + core.WithLeader(r2.GetPeer(8)), + ) + re.NoError(cluster.HandleRegionHeartbeat(overlapRegion)) + cluster.updateRegionsLabelLevelStats([]*core.RegionInfo{r1, r2}) + counter = cluster.labelLevelStats.GetLabelCounter() + re.Equal(0, counter["none"]) + re.Equal(1, counter["zone"]) } func heartbeatRegions(re *require.Assertions, cluster *RaftCluster, regions []*core.RegionInfo) {