From 4f42095506e30330e709069b0ec9e344a164f006 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 13 Sep 2023 15:19:17 +0800 Subject: [PATCH 1/4] *: limit region count Signed-off-by: Neil Shen --- go.mod | 1 + go.sum | 30 +++++++++++++++++++++-------- pkg/core/basic_cluster.go | 33 ++++++++++++++++++++++++++++++++ pkg/core/store.go | 18 +++++++++++++++++ pkg/core/store_stats.go | 14 ++++++++++++++ pkg/errs/errno.go | 1 + pkg/schedule/config/config.go | 28 +++++++++++++++++++++++++++ server/cluster/cluster.go | 31 +++++++++++++++++++++++++++--- server/cluster/cluster_worker.go | 21 +++++++++++++++++++- server/cluster/metrics.go | 17 ++++++++++++++++ server/config/persist_options.go | 15 +++++++++++++++ 11 files changed, 197 insertions(+), 12 deletions(-) diff --git a/go.mod b/go.mod index 503ac53f33f..137939787a3 100644 --- a/go.mod +++ b/go.mod @@ -205,3 +205,4 @@ replace google.golang.org/grpc v1.54.0 => google.golang.org/grpc v1.26.0 // kvproto at the same time. You can run `go mod tidy` to make it replaced with go-mod style specification. // After the PR to kvproto is merged, remember to comment this out and run `go mod tidy`. // replace github.com/pingcap/kvproto => github.com/$YourPrivateRepo $YourPrivateBranch +replace github.com/pingcap/kvproto => /home/stn/kvproto diff --git a/go.sum b/go.sum index e826e53af37..e9afaf099da 100644 --- a/go.sum +++ b/go.sum @@ -89,6 +89,11 @@ github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1 h1:q763qf9huN11kDQavWs github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= +github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI= +github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa h1:OaNxuTZr7kxeODyLWsRMC+OD03aFUH+mW6r2d+MWa5Y= github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8= github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= @@ -128,6 +133,8 @@ github.com/elliotchance/pie/v2 v2.1.0/go.mod h1:18t0dgGFH006g4eVdDtWfgFZPQEgl10I github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= +github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= +github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go.mod h1:KJwIaB5Mv44NWtYuAOFCVOjcI94vtpEz2JU/D2v6IjE= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU= @@ -197,7 +204,6 @@ github.com/goccy/go-graphviz v0.0.9/go.mod h1:wXVsXxmyMQU6TN3zGRttjNn3h+iCAS7xQF github.com/goccy/go-json v0.9.7 h1:IcB+Aqpx/iMHu5Yooh7jEzJk1JZ7Pjtmys2ukPr7EeM= github.com/goccy/go-json v0.9.7/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= github.com/gogo/protobuf v0.0.0-20171007142547-342cbe0a0415/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v0.0.0-20180717141946-636bf0302bc9/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= @@ -217,7 +223,6 @@ github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4er github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef h1:veQD95Isof8w9/WXiA+pa3tz3fJXkt5B7QaRBrM62gk= github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= -github.com/golang/protobuf v0.0.0-20180814211427-aa810b61a9c7/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -228,9 +233,11 @@ github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:x github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= @@ -243,6 +250,7 @@ github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5a github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= @@ -438,9 +446,6 @@ github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c h1:xpW9bvK+HuuTm github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c/go.mod h1:X2r9ueLEUZgtx2cIogM0v4Zj5uvvzhuuiu7Pn8HzMPg= github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 h1:C3N3itkduZXDZFh4N3vQ5HEtld3S+Y+StULhWVvumU0= github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew= -github.com/pingcap/kvproto v0.0.0-20191211054548-3c6b38ea5107/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w= -github.com/pingcap/kvproto v0.0.0-20230905082026-5336fac26974 h1:Gn8rf2Mb3QDifUQHdtcopqKclc9L11hjhZFYBE65lcw= -github.com/pingcap/kvproto v0.0.0-20230905082026-5336fac26974/go.mod h1:r0q/CFcwvyeRhKtoqzmWMBebrtpIziQQ9vR+JKh1knc= github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 h1:HR/ylkkLmGdSSDaD8IDP+SZrdhV1Kibl9KrHxJ9eciw= github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= @@ -610,6 +615,7 @@ go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU= go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= go.etcd.io/etcd v0.5.0-alpha.5.0.20220915004622-85b640cee793 h1:fqmtdYQlwZ/vKWSz5amW+a4cnjg23ojz5iL7rjf08Wg= go.etcd.io/etcd v0.5.0-alpha.5.0.20220915004622-85b640cee793/go.mod h1:eBhtbxXP1qpW0F6+WxoJ64DM1Mrfx46PHtVxEdkLe0I= +go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.5.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= @@ -677,7 +683,6 @@ golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181005035420-146acd28ed58/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -699,6 +704,7 @@ golang.org/x/net v0.0.0-20210421230115-4e50805a0758/go.mod h1:72T/g9IO56b78aLF+1 golang.org/x/net v0.0.0-20210805182204-aaa1db679c0d/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -758,11 +764,13 @@ golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.8.0 h1:n5xxQn2i3PC0yLAbjTpNT85q/Kgzcr2gIoX9OrJUols= golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -770,6 +778,7 @@ golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0 h1:2sjJmO8cDvYveuX97RDLsxlyUxLl+GHoLxBiRdHllBE= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= @@ -815,12 +824,11 @@ google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7 google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= -google.golang.org/genproto v0.0.0-20181004005441-af9cb2a35e7f/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 h1:KpwkzHKEF7B9Zxg18WzOa7djJ+Ha5DzthMyZYQfEn2A= google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1/go.mod h1:nKE/iIaLqn2bQwXBg8f1g2Ylh6r5MN5CmZvuzZCgsCU= -google.golang.org/grpc v0.0.0-20180607172857-7a6a684ca69e/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= @@ -830,14 +838,20 @@ google.golang.org/grpc v1.26.0 h1:2dTRdpdFEEhJYQD8EMLB61nnrzSCTbG38PhqdhvOltg= google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0= +google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU= +google.golang.org/grpc v1.51.0/go.mod h1:wgNDFcnuBGmxLKI/qn4T+m5BtEBYXJPvibbUPsAIPww= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= diff --git a/pkg/core/basic_cluster.go b/pkg/core/basic_cluster.go index 1c8902ca8cb..23ddb0cb59c 100644 --- a/pkg/core/basic_cluster.go +++ b/pkg/core/basic_cluster.go @@ -25,6 +25,7 @@ type BasicCluster struct { Stores struct { mu syncutil.RWMutex *StoresInfo + allowedRegionReplicaCount uint64 } *RegionsInfo @@ -36,6 +37,7 @@ func NewBasicCluster() *BasicCluster { Stores: struct { mu syncutil.RWMutex *StoresInfo + allowedRegionReplicaCount uint64 }{StoresInfo: NewStoresInfo()}, RegionsInfo: NewRegionsInfo(), @@ -246,6 +248,37 @@ func (bc *BasicCluster) GetStoresWriteRate() (storeIDs []uint64, bytesRates, key return bc.getWriteRate(bc.RegionsInfo.GetStoreWriteRate) } +// GetClusterTotalMemory get the total memory the TiKV cluster. +func (bc *BasicCluster) GetClusterTotalMemory() uint64 { + bc.Stores.mu.RLock() + defer bc.Stores.mu.Unlock() + return bc.Stores.GetTotalMemory() +} + +// GetReplicaCount get the current region replica count. +func (bc *BasicCluster) GetReplicaCount() uint64 { + bc.Stores.mu.RLock() + defer bc.Stores.mu.Unlock() + return bc.Stores.GetReplicaCount() +} + +// CheckAllowedRegionReplicaCount get the number of allowed region replica count of the TiKV cluster. +func (bc *BasicCluster) CheckAllowedRegionReplicaCount(count uint64) bool { + bc.Stores.mu.RLock() + defer bc.Stores.mu.Unlock() + if bc.Stores.allowedRegionReplicaCount <= 0 { + return true + } + return bc.Stores.GetReplicaCount()+count < bc.Stores.allowedRegionReplicaCount +} + +// SetAllowedRegionReplicaCount set the number of allowed region replica count of the TiKV cluster. +func (bc *BasicCluster) SetAllowedRegionReplicaCount(count uint64) { + bc.Stores.mu.RLock() + defer bc.Stores.mu.Unlock() + bc.Stores.allowedRegionReplicaCount = count +} + // RegionSetInformer provides access to a shared informer of regions. type RegionSetInformer interface { GetTotalRegionCount() int diff --git a/pkg/core/store.go b/pkg/core/store.go index 1d3362cac0e..dfa51703cc8 100644 --- a/pkg/core/store.go +++ b/pkg/core/store.go @@ -808,6 +808,24 @@ func (s *StoresInfo) UpdateStoreStatus(storeID uint64, leaderCount, regionCount, } } +// GetTotalMemory get total memory of all stores. +func (s *StoresInfo) GetTotalMemory() uint64 { + totalMemory := uint64(0) + for _, store := range s.stores { + totalMemory += store.GetTotalMemory() + } + return totalMemory +} + +// GetReplicaCount get used memory of all stores. +func (s *StoresInfo) GetReplicaCount() uint64 { + replicaCount := 0 + for _, store := range s.stores { + replicaCount += store.regionCount + } + return uint64(replicaCount) +} + // IsStoreContainLabel returns if the store contains the given label. func IsStoreContainLabel(store *metapb.Store, key, value string) bool { for _, l := range store.GetLabels() { diff --git a/pkg/core/store_stats.go b/pkg/core/store_stats.go index bcc90a58a2b..09aa4c007a5 100644 --- a/pkg/core/store_stats.go +++ b/pkg/core/store_stats.go @@ -149,3 +149,17 @@ func climp0(v float64) uint64 { } return uint64(v) } + +// GetTotalMemory returns the total memory of the store. +func (ss *storeStats) GetTotalMemory() uint64 { + ss.mu.RLock() + defer ss.mu.RUnlock() + return ss.rawStats.GetTotalMemory() +} + +// GetUsedMemory returns the current used memory of the store. +func (ss *storeStats) GetUsedMemory() uint64 { + ss.mu.RLock() + defer ss.mu.RUnlock() + return ss.rawStats.GetUsedMemory() +} diff --git a/pkg/errs/errno.go b/pkg/errs/errno.go index 91cd4a78c4f..7053e84de08 100644 --- a/pkg/errs/errno.go +++ b/pkg/errs/errno.go @@ -110,6 +110,7 @@ var ( ErrInternalGrowth = errors.Normalize("unknown interval growth type error", errors.RFCCodeText("PD:scheduler:ErrInternalGrowth")) ErrSchedulerCreateFuncNotRegistered = errors.Normalize("create func of %v is not registered", errors.RFCCodeText("PD:scheduler:ErrSchedulerCreateFuncNotRegistered")) ErrSchedulerTiKVSplitDisabled = errors.Normalize("tikv split region disabled", errors.RFCCodeText("PD:scheduler:ErrSchedulerTiKVSplitDisabled")) + ErrSchedulerTiKVSplitThrottled = errors.Normalize("tikv split region throttled", errors.RFCCodeText("PD:scheduler:ErrSchedulerTiKVSplitThrottled")) ) // checker errors diff --git a/pkg/schedule/config/config.go b/pkg/schedule/config/config.go index c8fa62b8aff..193f86c3748 100644 --- a/pkg/schedule/config/config.go +++ b/pkg/schedule/config/config.go @@ -17,6 +17,7 @@ package config import ( "time" + "github.com/docker/go-units" "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/metapb" "github.com/tikv/pd/pkg/core/storelimit" @@ -58,6 +59,16 @@ const ( defaultEnableWitness = false defaultHaltScheduling = false + // TODO: set default to false + defaultEnableLimitRegionCount = true + defaultStopSplitRegionMemoryRatio = 0.9 + // The default memory usage per-region in raftstore v1. + // 4 MB is concluded through an test. + // See https://gist.github.com/overvenus/4b52386f07ee34cf4dbdc62961b22763 + defaultMemoryUsagePerRegionReplicaV1 = typeutil.ByteSize(4 * units.MB) + // The default memory usage per-region in raftstore v2. + defaultMemoryUsagePerRegionReplicaV2 = typeutil.ByteSize(4 * units.MB) + defaultRegionScoreFormulaVersion = "v2" defaultLeaderSchedulePolicy = "count" defaultStoreLimitVersion = "v1" @@ -235,6 +246,14 @@ type ScheduleConfig struct { // on ebs-based BR we need to disable it with TTL EnableTiKVSplitRegion bool `toml:"enable-tikv-split-region" json:"enable-tikv-split-region,string"` + // EnableLimitRegionCount is the option to enable limit region count in a TiKV node. + EnableLimitRegionCount bool `toml:"enable-limit-region-count" json:"enable-limit-region-count,string,omitempty"` + // MemoryUsagePerRegionReplica is the approximately memory usage that a region replica needs. + MemoryUsagePerRegionReplica typeutil.ByteSize `toml:"memory-usage-per-region-replica" json:"memory-usage-per-region-replica,string,omitempty"` + // StopSplitRegionMemoryRatio is the ratio of used memory to total memory that stops split region. + // The value must be in [0.0, 1.0] + StopSplitRegionMemoryRatio float64 `toml:"stop-split-region-memory-ratio" json:"stop-split-region-memory-ratio,omitempty"` + // Schedulers support for loading customized schedulers Schedulers SchedulerConfigs `toml:"schedulers" json:"schedulers-v2"` // json v2 is for the sake of compatible upgrade @@ -348,6 +367,15 @@ func (c *ScheduleConfig) Adjust(meta *configutil.ConfigMetaData, reloading bool) if !meta.IsDefined("enable-cross-table-merge") { c.EnableCrossTableMerge = defaultEnableCrossTableMerge } + if !meta.IsDefined("enable-limit-region-count") { + c.EnableLimitRegionCount = defaultEnableLimitRegionCount + } + if !meta.IsDefined("memory-usage-per-region-replica") { + configutil.AdjustByteSize(&c.MemoryUsagePerRegionReplica, defaultMemoryUsagePerRegionReplicaV1) + } + if !meta.IsDefined("stop-split-region-memory-ratio") { + configutil.AdjustFloat64(&c.StopSplitRegionMemoryRatio, defaultStopSplitRegionMemoryRatio) + } configutil.AdjustFloat64(&c.LowSpaceRatio, defaultLowSpaceRatio) configutil.AdjustFloat64(&c.HighSpaceRatio, defaultHighSpaceRatio) if !meta.IsDefined("enable-diagnostic") { diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index b16de73f84e..d63c2f5d9a2 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -980,7 +980,8 @@ func (c *RaftCluster) HandleStoreHeartbeat(heartbeat *pdpb.StoreHeartbeatRequest if store := c.core.GetStore(storeID); store != nil { statistics.UpdateStoreHeartbeatMetrics(store) } - c.core.PutStore(newStore) + + c.updateStoreLocked(newStore) var ( regions map[uint64]*core.RegionInfo interval uint64 @@ -1803,7 +1804,7 @@ func (c *RaftCluster) putStoreLocked(store *core.StoreInfo) error { return err } } - c.core.PutStore(store) + c.updateStoreLocked(store) if !c.isAPIServiceMode { c.hotStat.GetOrCreateRollingStoreStats(store.GetID()) c.slowStat.ObserveSlowStoreStatus(store.GetID(), store.IsSlow()) @@ -1811,6 +1812,25 @@ func (c *RaftCluster) putStoreLocked(store *core.StoreInfo) error { return nil } +func (c *RaftCluster) updateStoreLocked(store *core.StoreInfo) { + originTotalMemory := c.core.GetClusterTotalMemory() + c.core.PutStore(store) + newTotalMemory := c.core.GetClusterTotalMemory() + if c.opt.IsLimitRegionCountEnabled() && originTotalMemory != newTotalMemory { + replicaMemory := c.opt.GetMemoryUsagePerRegionReplica() + stopRatio := c.opt.GetStopSplitRegionMemoryRatio() + // Older TiKV does not report memory stats. + if replicaMemory > 0 && stopRatio > 0.0 { + // Reserve some memory for system and auxiliary usage. + allowedMemory := float64(newTotalMemory) * stopRatio + allowedRegionCount := uint64(float64(replicaMemory) / allowedMemory) + c.core.SetAllowedRegionReplicaCount(allowedRegionCount) + tikvClusterAllowedRegionCount.Set(float64(allowedRegionCount)) + tikvClusterTotalMemory.Set(float64(newTotalMemory)) + } + } +} + func (c *RaftCluster) checkStores() { var offlineStores []*metapb.Store var upStoreCount int @@ -2507,7 +2527,7 @@ func (c *RaftCluster) SetMinResolvedTS(storeID, minResolvedTS uint64) error { } newStore := store.Clone(core.SetMinResolvedTS(minResolvedTS)) - c.core.PutStore(newStore) + c.updateStoreLocked(newStore) return nil } @@ -2811,3 +2831,8 @@ func (c *RaftCluster) GetPausedSchedulerDelayAt(name string) (int64, error) { func (c *RaftCluster) GetPausedSchedulerDelayUntil(name string) (int64, error) { return c.coordinator.GetSchedulersController().GetPausedSchedulerDelayUntil(name) } + +// CheckAllowedRegionReplicaCount checks if spilt more replica is allowed. +func (c *RaftCluster) CheckAllowedRegionReplicaCount(count uint64) bool { + return c.core.CheckAllowedRegionReplicaCount(count) +} diff --git a/server/cluster/cluster_worker.go b/server/cluster/cluster_worker.go index c1da97363b5..d486846801d 100644 --- a/server/cluster/cluster_worker.go +++ b/server/cluster/cluster_worker.go @@ -49,6 +49,10 @@ func (c *RaftCluster) HandleAskSplit(request *pdpb.AskSplitRequest) (*pdpb.AskSp if !c.opt.IsTikvRegionSplitEnabled() { return nil, errs.ErrSchedulerTiKVSplitDisabled.FastGenByArgs() } + replicas := len(request.Region.Peers) + if !c.isAskSplitAllowed(uint64(replicas)) { + return nil, errs.ErrSchedulerTiKVSplitThrottled.FastGenByArgs() + } reqRegion := request.GetRegion() err := c.ValidRequestRegion(reqRegion) if err != nil { @@ -90,6 +94,17 @@ func (c *RaftCluster) isSchedulingHalted() bool { return c.opt.IsSchedulingHalted() } +// isAskSplitAllowed checks if split more region is allowed. +// Every region replica needs a certain amount of memory to run. PD needs to +// limit the total number of region replicas so that they do not overload TiKV +// cluster. +func (c *RaftCluster) isAskSplitAllowed(splitCount uint64) bool { + if !c.opt.IsLimitRegionCountEnabled() { + return true + } + return c.CheckAllowedRegionReplicaCount(splitCount) +} + // ValidRequestRegion is used to decide if the region is valid. func (c *RaftCluster) ValidRequestRegion(reqRegion *metapb.Region) error { startKey := reqRegion.GetStartKey() @@ -115,8 +130,12 @@ func (c *RaftCluster) HandleAskBatchSplit(request *pdpb.AskBatchSplitRequest) (* if !c.opt.IsTikvRegionSplitEnabled() { return nil, errs.ErrSchedulerTiKVSplitDisabled.FastGenByArgs() } - reqRegion := request.GetRegion() splitCount := request.GetSplitCount() + replicas := len(request.Region.Peers) + if !c.isAskSplitAllowed(uint64(splitCount) * uint64(replicas)) { + return nil, errs.ErrSchedulerTiKVSplitThrottled.FastGenByArgs() + } + reqRegion := request.GetRegion() err := c.ValidRequestRegion(reqRegion) if err != nil { return nil, err diff --git a/server/cluster/metrics.go b/server/cluster/metrics.go index e43fe595f70..38b88e9dbf3 100644 --- a/server/cluster/metrics.go +++ b/server/cluster/metrics.go @@ -95,6 +95,21 @@ var ( Name: "store_sync", Help: "The state of store sync config", }, []string{"address", "state"}) + + tikvClusterTotalMemory = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: "pd", + Subsystem: "cluster", + Name: "tikv_cluster_total_memory", + Help: "The total memory of TiKV cluster", + }) + tikvClusterAllowedRegionCount = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: "pd", + Subsystem: "cluster", + Name: "tikv_cluster_allowed_region_count", + Help: "The number of maximum allowed region count of TiKV cluster", + }) ) func init() { @@ -108,4 +123,6 @@ func init() { prometheus.MustRegister(storesETAGauge) prometheus.MustRegister(storeSyncConfigEvent) prometheus.MustRegister(updateStoreStatsGauge) + prometheus.MustRegister(tikvClusterTotalMemory) + prometheus.MustRegister(tikvClusterAllowedRegionCount) } diff --git a/server/config/persist_options.go b/server/config/persist_options.go index 1ea0b79424f..133c54695df 100644 --- a/server/config/persist_options.go +++ b/server/config/persist_options.go @@ -623,6 +623,21 @@ func (o *PersistOptions) IsTikvRegionSplitEnabled() bool { return o.getTTLBoolOr(enableTiKVSplitRegion, o.GetScheduleConfig().EnableTiKVSplitRegion) } +// IsLimitRegionCountEnabled returns whether pd needs to limit region count. +func (o *PersistOptions) IsLimitRegionCountEnabled() bool { + return o.GetScheduleConfig().EnableLimitRegionCount +} + +// GetMemoryUsagePerRegionReplica returns whether pd needs to limit region count. +func (o *PersistOptions) GetMemoryUsagePerRegionReplica() uint64 { + return uint64(o.GetScheduleConfig().MemoryUsagePerRegionReplica) +} + +// GetStopSplitRegionMemoryRatio returns whether pd needs to limit region count. +func (o *PersistOptions) GetStopSplitRegionMemoryRatio() float64 { + return o.GetScheduleConfig().StopSplitRegionMemoryRatio +} + // GetMaxMovableHotPeerSize returns the max movable hot peer size. func (o *PersistOptions) GetMaxMovableHotPeerSize() int64 { return o.GetScheduleConfig().MaxMovableHotPeerSize From cdac9ad34b2cb1359d29c5ccce1e4392cfd314de Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 13 Sep 2023 16:00:45 +0800 Subject: [PATCH 2/4] fix zero allowed replica count Signed-off-by: Neil Shen --- pkg/core/basic_cluster.go | 14 ++++++++------ pkg/schedule/config/config.go | 9 +++++++-- server/cluster/cluster.go | 21 +++++++++++++-------- server/cluster/cluster_test.go | 7 +++++++ 4 files changed, 35 insertions(+), 16 deletions(-) diff --git a/pkg/core/basic_cluster.go b/pkg/core/basic_cluster.go index 23ddb0cb59c..c8a1924180b 100644 --- a/pkg/core/basic_cluster.go +++ b/pkg/core/basic_cluster.go @@ -251,32 +251,34 @@ func (bc *BasicCluster) GetStoresWriteRate() (storeIDs []uint64, bytesRates, key // GetClusterTotalMemory get the total memory the TiKV cluster. func (bc *BasicCluster) GetClusterTotalMemory() uint64 { bc.Stores.mu.RLock() - defer bc.Stores.mu.Unlock() + defer bc.Stores.mu.RUnlock() return bc.Stores.GetTotalMemory() } // GetReplicaCount get the current region replica count. func (bc *BasicCluster) GetReplicaCount() uint64 { bc.Stores.mu.RLock() - defer bc.Stores.mu.Unlock() + defer bc.Stores.mu.RUnlock() return bc.Stores.GetReplicaCount() } // CheckAllowedRegionReplicaCount get the number of allowed region replica count of the TiKV cluster. func (bc *BasicCluster) CheckAllowedRegionReplicaCount(count uint64) bool { bc.Stores.mu.RLock() - defer bc.Stores.mu.Unlock() + defer bc.Stores.mu.RUnlock() if bc.Stores.allowedRegionReplicaCount <= 0 { return true } - return bc.Stores.GetReplicaCount()+count < bc.Stores.allowedRegionReplicaCount + return bc.Stores.GetReplicaCount()+count <= bc.Stores.allowedRegionReplicaCount } // SetAllowedRegionReplicaCount set the number of allowed region replica count of the TiKV cluster. -func (bc *BasicCluster) SetAllowedRegionReplicaCount(count uint64) { +func (bc *BasicCluster) SetAllowedRegionReplicaCount(count uint64) bool { bc.Stores.mu.RLock() - defer bc.Stores.mu.Unlock() + defer bc.Stores.mu.RUnlock() + original := bc.Stores.allowedRegionReplicaCount bc.Stores.allowedRegionReplicaCount = count + return original != count } // RegionSetInformer provides access to a shared informer of regions. diff --git a/pkg/schedule/config/config.go b/pkg/schedule/config/config.go index 193f86c3748..300978cf72b 100644 --- a/pkg/schedule/config/config.go +++ b/pkg/schedule/config/config.go @@ -65,9 +65,9 @@ const ( // The default memory usage per-region in raftstore v1. // 4 MB is concluded through an test. // See https://gist.github.com/overvenus/4b52386f07ee34cf4dbdc62961b22763 - defaultMemoryUsagePerRegionReplicaV1 = typeutil.ByteSize(4 * units.MB) + defaultMemoryUsagePerRegionReplicaV1 = typeutil.ByteSize(4 * units.MiB) // The default memory usage per-region in raftstore v2. - defaultMemoryUsagePerRegionReplicaV2 = typeutil.ByteSize(4 * units.MB) + defaultMemoryUsagePerRegionReplicaV2 = typeutil.ByteSize(4 * units.MiB) defaultRegionScoreFormulaVersion = "v2" defaultLeaderSchedulePolicy = "count" @@ -376,6 +376,11 @@ func (c *ScheduleConfig) Adjust(meta *configutil.ConfigMetaData, reloading bool) if !meta.IsDefined("stop-split-region-memory-ratio") { configutil.AdjustFloat64(&c.StopSplitRegionMemoryRatio, defaultStopSplitRegionMemoryRatio) } + if c.StopSplitRegionMemoryRatio < 0 { + c.StopSplitRegionMemoryRatio = 0.0 + } else if c.StopSplitRegionMemoryRatio > 1 { + c.StopSplitRegionMemoryRatio = 1.0 + } configutil.AdjustFloat64(&c.LowSpaceRatio, defaultLowSpaceRatio) configutil.AdjustFloat64(&c.HighSpaceRatio, defaultHighSpaceRatio) if !meta.IsDefined("enable-diagnostic") { diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index d63c2f5d9a2..2eb60f428b3 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -28,6 +28,7 @@ import ( "time" "github.com/coreos/go-semver/semver" + "github.com/docker/go-units" "github.com/pingcap/errors" "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/metapb" @@ -1813,20 +1814,24 @@ func (c *RaftCluster) putStoreLocked(store *core.StoreInfo) error { } func (c *RaftCluster) updateStoreLocked(store *core.StoreInfo) { - originTotalMemory := c.core.GetClusterTotalMemory() c.core.PutStore(store) - newTotalMemory := c.core.GetClusterTotalMemory() - if c.opt.IsLimitRegionCountEnabled() && originTotalMemory != newTotalMemory { + if c.opt.IsLimitRegionCountEnabled() { replicaMemory := c.opt.GetMemoryUsagePerRegionReplica() stopRatio := c.opt.GetStopSplitRegionMemoryRatio() // Older TiKV does not report memory stats. if replicaMemory > 0 && stopRatio > 0.0 { + totalMemory := c.core.GetClusterTotalMemory() // Reserve some memory for system and auxiliary usage. - allowedMemory := float64(newTotalMemory) * stopRatio - allowedRegionCount := uint64(float64(replicaMemory) / allowedMemory) - c.core.SetAllowedRegionReplicaCount(allowedRegionCount) - tikvClusterAllowedRegionCount.Set(float64(allowedRegionCount)) - tikvClusterTotalMemory.Set(float64(newTotalMemory)) + allowedMemory := float64(totalMemory) * stopRatio + allowedReplicaCount := uint64(allowedMemory / float64(replicaMemory)) + changed := c.core.SetAllowedRegionReplicaCount(allowedReplicaCount) + tikvClusterAllowedRegionCount.Set(float64(allowedReplicaCount)) + tikvClusterTotalMemory.Set(float64(totalMemory)) + if changed { + log.Info("cluster allowed replica count changed", + zap.String("cluster-total-memory", units.BytesSize(float64(totalMemory))), + zap.Uint64("allowed-replica-count", allowedReplicaCount)) + } } } } diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index 605fd222502..cd18b37070a 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -67,6 +67,9 @@ func TestStoreHeartbeat(t *testing.T) { _, opt, err := newTestScheduleConfig() opt.GetScheduleConfig().StoreLimitVersion = "v2" + opt.GetScheduleConfig().EnableLimitRegionCount = true + opt.GetScheduleConfig().MemoryUsagePerRegionReplica = typeutil.ByteSize(10) + opt.GetScheduleConfig().StopSplitRegionMemoryRatio = 1.0 re.NoError(err) cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) @@ -88,6 +91,7 @@ func TestStoreHeartbeat(t *testing.T) { Capacity: 100, Available: 50, RegionCount: 1, + TotalMemory: 100, } re.Error(cluster.HandleStoreHeartbeat(req, resp)) @@ -97,6 +101,9 @@ func TestStoreHeartbeat(t *testing.T) { re.Equal(int64(0), store.GetLastHeartbeatTS().UnixNano()) re.NoError(cluster.HandleStoreHeartbeat(req, resp)) + re.EqualValues((i+1)*100, cluster.core.GetClusterTotalMemory()) + re.True(cluster.core.CheckAllowedRegionReplicaCount(uint64((i + 1) * 10))) + re.False(cluster.core.CheckAllowedRegionReplicaCount(uint64((i+1)*10 + 1))) s := cluster.GetStore(store.GetID()) re.NotEqual(int64(0), s.GetLastHeartbeatTS().UnixNano()) From 419c02fc516a11637837703a552c85b4f410f418 Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 13 Sep 2023 18:01:55 +0800 Subject: [PATCH 3/4] debug Signed-off-by: Neil Shen --- pkg/storage/endpoint/config.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/storage/endpoint/config.go b/pkg/storage/endpoint/config.go index 9104e218f98..a0fc2badb9a 100644 --- a/pkg/storage/endpoint/config.go +++ b/pkg/storage/endpoint/config.go @@ -18,8 +18,10 @@ import ( "encoding/json" "strings" + "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" "go.etcd.io/etcd/clientv3" + "go.uber.org/zap" ) // ConfigStorage defines the storage operations on the config. @@ -41,6 +43,7 @@ func (se *StorageEndpoint) LoadConfig(cfg interface{}) (bool, error) { } err = json.Unmarshal([]byte(value), cfg) if err != nil { + log.Warn("dbg invalid config", zap.String("value", string(value))) return false, errs.ErrJSONUnmarshal.Wrap(err).GenWithStackByCause() } return true, nil From 06b390d04b2a1548c19916110a1ea00cc63526ee Mon Sep 17 00:00:00 2001 From: Neil Shen Date: Wed, 13 Sep 2023 18:18:27 +0800 Subject: [PATCH 4/4] fix ByteSize UnmarshalJSON Signed-off-by: Neil Shen --- pkg/utils/typeutil/size.go | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pkg/utils/typeutil/size.go b/pkg/utils/typeutil/size.go index c088ca73e59..dd967af2503 100644 --- a/pkg/utils/typeutil/size.go +++ b/pkg/utils/typeutil/size.go @@ -41,13 +41,16 @@ func (b ByteSize) MarshalJSON() ([]byte, error) { // UnmarshalJSON parses a JSON string into the byte size. func (b *ByteSize) UnmarshalJSON(text []byte) error { - s, err := strconv.Unquote(string(text)) - if err != nil { - return errors.WithStack(err) - } - v, err := units.RAMInBytes(s) + v, err := units.RAMInBytes(string(text)) if err != nil { - return errors.WithStack(err) + s, err := strconv.Unquote(string(text)) + if err != nil { + return errors.WithStack(err) + } + v, err = units.RAMInBytes(s) + if err != nil { + return errors.WithStack(err) + } } *b = ByteSize(v) return nil