From 29ee921d2029e184b908346152ed58160a6551f1 Mon Sep 17 00:00:00 2001
From: lhy1024 <admin@liudos.us>
Date: Mon, 24 Jun 2024 11:18:29 +0800
Subject: [PATCH] scheduler: add more comments

Signed-off-by: lhy1024 <admin@liudos.us>
---
 pkg/schedule/schedulers/hot_region.go        |  9 +++-
 pkg/schedule/schedulers/hot_region_config.go |  1 -
 pkg/schedule/schedulers/hot_region_v2.go     |  4 +-
 pkg/statistics/store_hot_peers_infos.go      | 49 +++++++++-----------
 4 files changed, 32 insertions(+), 31 deletions(-)

diff --git a/pkg/schedule/schedulers/hot_region.go b/pkg/schedule/schedulers/hot_region.go
index 5e5e254596a..05297cf4ee8 100644
--- a/pkg/schedule/schedulers/hot_region.go
+++ b/pkg/schedule/schedulers/hot_region.go
@@ -163,6 +163,7 @@ func (h *baseHotScheduler) prepareForBalance(rw utils.RWType, cluster sche.Sched
 	switch rw {
 	case utils.Read:
 		// update read statistics
+		// avoid to update read statistics frequently
 		if time.Since(h.updateReadTime) >= statisticsInterval {
 			regionRead := cluster.RegionReadStats()
 			prepare(regionRead, constant.LeaderKind)
@@ -171,6 +172,7 @@ func (h *baseHotScheduler) prepareForBalance(rw utils.RWType, cluster sche.Sched
 		}
 	case utils.Write:
 		// update write statistics
+		// avoid to update write statistics frequently
 		if time.Since(h.updateWriteTime) >= statisticsInterval {
 			regionWrite := cluster.RegionWriteStats()
 			prepare(regionWrite, constant.LeaderKind)
@@ -510,6 +512,7 @@ type balanceSolver struct {
 	best *solution
 	ops  []*operator.Operator
 
+	// maxSrc and minDst are used to calculate the rank.
 	maxSrc   *statistics.StoreLoad
 	minDst   *statistics.StoreLoad
 	rankStep *statistics.StoreLoad
@@ -1217,7 +1220,7 @@ func (bs *balanceSolver) isUniformSecondPriority(store *statistics.StoreLoadDeta
 // calcProgressiveRank calculates `bs.cur.progressiveRank`.
 // See the comments of `solution.progressiveRank` for more about progressive rank.
 // | ↓ firstPriority \ secondPriority → | isBetter | isNotWorsened | Worsened |
-// |   isBetter                         | -4       | -3            | -1 / 0   |
+// |   isBetter                         | -4       | -3            | -1       |
 // |   isNotWorsened                    | -2       | 1             | 1        |
 // |   Worsened                         | 0        | 1             | 1        |
 func (bs *balanceSolver) calcProgressiveRankV1() {
@@ -1363,17 +1366,21 @@ func (bs *balanceSolver) betterThanV1(old *solution) bool {
 		firstCmp, secondCmp := bs.getRkCmpPrioritiesV1(old)
 		switch bs.cur.progressiveRank {
 		case -4: // isBetter(firstPriority) && isBetter(secondPriority)
+			// Both are better, prefer the one with higher first priority rate.
+			// If the first priority rate is the similiar, prefer the one with higher second priority rate.
 			if firstCmp != 0 {
 				return firstCmp > 0
 			}
 			return secondCmp > 0
 		case -3: // isBetter(firstPriority) && isNotWorsened(secondPriority)
+			// The first priority is better, prefer the one with higher first priority rate.
 			if firstCmp != 0 {
 				return firstCmp > 0
 			}
 			// prefer smaller second priority rate, to reduce oscillation
 			return secondCmp < 0
 		case -2: // isNotWorsened(firstPriority) && isBetter(secondPriority)
+			// The second priority is better, prefer the one with higher second priority rate.
 			if secondCmp != 0 {
 				return secondCmp > 0
 			}
diff --git a/pkg/schedule/schedulers/hot_region_config.go b/pkg/schedule/schedulers/hot_region_config.go
index d71e5e984bd..7d9de73f076 100644
--- a/pkg/schedule/schedulers/hot_region_config.go
+++ b/pkg/schedule/schedulers/hot_region_config.go
@@ -38,7 +38,6 @@ import (
 )
 
 const (
-
 	// Scheduling has a bigger impact on TiFlash, so it needs to be corrected in configuration items
 	// In the default config, the TiKV difference is 1.05*1.05-1 = 0.1025, and the TiFlash difference is 1.15*1.15-1 = 0.3225
 	tiflashToleranceRatioCorrection = 0.1
diff --git a/pkg/schedule/schedulers/hot_region_v2.go b/pkg/schedule/schedulers/hot_region_v2.go
index 50016231cad..00dd5a5da44 100644
--- a/pkg/schedule/schedulers/hot_region_v2.go
+++ b/pkg/schedule/schedulers/hot_region_v2.go
@@ -184,9 +184,9 @@ func (bs *balanceSolver) setSearchRevertRegionsV2() {
 
 // calcProgressiveRankV2 calculates `bs.cur.progressiveRank`.
 // See the comments of `solution.progressiveRank` for more about progressive rank.
-// isBetter: score > 0
+// isBetter: score < 0
 // isNotWorsened: score == 0
-// isWorsened: score < 0
+// isWorsened: score > 0
 // | ↓ firstPriority \ secondPriority → | isBetter | isNotWorsened | isWorsened |
 // |   isBetter                         | -4       | -3            | -2         |
 // |   isNotWorsened                    | -1       | 1             | 1          |
diff --git a/pkg/statistics/store_hot_peers_infos.go b/pkg/statistics/store_hot_peers_infos.go
index 59ee3c20b6f..f7873bdd744 100644
--- a/pkg/statistics/store_hot_peers_infos.go
+++ b/pkg/statistics/store_hot_peers_infos.go
@@ -15,7 +15,6 @@
 package statistics
 
 import (
-	"fmt"
 	"math"
 
 	"github.com/tikv/pd/pkg/core"
@@ -151,7 +150,7 @@ func summaryStoresLoadByEngine(
 ) []*StoreLoadDetail {
 	loadDetail := make([]*StoreLoadDetail, 0, len(storeInfos))
 	allStoreLoadSum := make([]float64, utils.DimLen)
-	allStoreHistoryLoadSum := make([][]float64, utils.DimLen)
+	allStoreHistoryLoadSum := make([][]float64, utils.DimLen) // row: dim, column: time
 	allStoreCount := 0
 	allHotPeersCount := 0
 
@@ -166,49 +165,37 @@ func summaryStoresLoadByEngine(
 		// Find all hot peers first
 		var hotPeers []*HotPeerStat
 		peerLoadSum := make([]float64, utils.DimLen)
-		// TODO: To remove `filterHotPeers`, we need to:
-		// HotLeaders consider `Write{Bytes,Keys}`, so when we schedule `writeLeader`, all peers are leader.
+		// For hot leaders, we need to calculate the sum of the leader's write and read flow rather than the all peers.
 		for _, peer := range filterHotPeers(kind, storeHotPeers[id]) {
 			for i := range peerLoadSum {
 				peerLoadSum[i] += peer.GetLoad(i)
 			}
 			hotPeers = append(hotPeers, peer.Clone())
 		}
-		{
-			// Metric for debug.
-			// TODO: pre-allocate gauge metrics
-			ty := "byte-rate-" + rwTy.String() + "-" + kind.String()
-			hotPeerSummary.WithLabelValues(ty, fmt.Sprintf("%v", id)).Set(peerLoadSum[utils.ByteDim])
-			ty = "key-rate-" + rwTy.String() + "-" + kind.String()
-			hotPeerSummary.WithLabelValues(ty, fmt.Sprintf("%v", id)).Set(peerLoadSum[utils.KeyDim])
-			ty = "query-rate-" + rwTy.String() + "-" + kind.String()
-			hotPeerSummary.WithLabelValues(ty, fmt.Sprintf("%v", id)).Set(peerLoadSum[utils.QueryDim])
-		}
-		loads := collector.GetLoads(storeLoads, peerLoadSum, rwTy, kind)
+		currentLoads := collector.GetLoads(storeLoads, peerLoadSum, rwTy, kind)
 
 		var historyLoads [][]float64
 		if storesHistoryLoads != nil {
-			historyLoads = storesHistoryLoads.Get(id, rwTy, kind)
-			for i, loads := range historyLoads {
-				if allStoreHistoryLoadSum[i] == nil || len(allStoreHistoryLoadSum[i]) < len(loads) {
-					allStoreHistoryLoadSum[i] = make([]float64, len(loads))
+			for i, historyLoads := range storesHistoryLoads.Get(id, rwTy, kind) {
+				if allStoreHistoryLoadSum[i] == nil || len(allStoreHistoryLoadSum[i]) < len(historyLoads) {
+					allStoreHistoryLoadSum[i] = make([]float64, len(historyLoads))
 				}
-				for j, load := range loads {
-					allStoreHistoryLoadSum[i][j] += load
+				for j, historyLoad := range historyLoads {
+					allStoreHistoryLoadSum[i][j] += historyLoad
 				}
 			}
-			storesHistoryLoads.Add(id, rwTy, kind, loads)
+			storesHistoryLoads.Add(id, rwTy, kind, currentLoads)
 		}
 
 		for i := range allStoreLoadSum {
-			allStoreLoadSum[i] += loads[i]
+			allStoreLoadSum[i] += currentLoads[i]
 		}
 		allStoreCount += 1
 		allHotPeersCount += len(hotPeers)
 
 		// Build store load prediction from current load and pending influence.
 		stLoadPred := (&StoreLoad{
-			Loads:        loads,
+			Loads:        currentLoads,
 			Count:        float64(len(hotPeers)),
 			HistoryLoads: historyLoads,
 		}).ToLoadPred(rwTy, info.PendingSum)
@@ -231,8 +218,8 @@ func summaryStoresLoadByEngine(
 		expectLoads[i] = allStoreLoadSum[i] / float64(allStoreCount)
 	}
 
-	// todo: remove some the max value or min value to avoid the effect of extreme value.
-	expectHistoryLoads := make([][]float64, utils.DimLen)
+	// TODO: remove some the max value or min value to avoid the effect of extreme value.
+	expectHistoryLoads := make([][]float64, utils.DimLen) // row: dim, column: time
 	for i := range allStoreHistoryLoadSum {
 		expectHistoryLoads[i] = make([]float64, len(allStoreHistoryLoadSum[i]))
 		for j := range allStoreHistoryLoadSum[i] {
@@ -285,11 +272,19 @@ func summaryStoresLoadByEngine(
 	return loadDetail
 }
 
+// filterHotPeers filters hot peers according to kind.
+// If kind is RegionKind, all hot peers will be returned.
+// If kind is LeaderKind, only leader hot peers will be returned.
 func filterHotPeers(kind constant.ResourceKind, peers []*HotPeerStat) []*HotPeerStat {
 	ret := make([]*HotPeerStat, 0, len(peers))
 	for _, peer := range peers {
-		if kind != constant.LeaderKind || peer.IsLeader() {
+		switch kind {
+		case constant.RegionKind:
 			ret = append(ret, peer)
+		case constant.LeaderKind:
+			if peer.IsLeader() {
+				ret = append(ret, peer)
+			}
 		}
 	}
 	return ret