diff --git a/Makefile b/Makefile index f6a1c9a809a..99d6d680a7a 100644 --- a/Makefile +++ b/Makefile @@ -148,7 +148,7 @@ static: install-tools @ echo "gofmt ..." @ gofmt -s -l -d $(PACKAGE_DIRECTORIES) 2>&1 | awk '{ print } END { if (NR > 0) { exit 1 } }' @ echo "golangci-lint ..." - @ golangci-lint run --verbose $(PACKAGE_DIRECTORIES) + @ golangci-lint run --verbose $(PACKAGE_DIRECTORIES) --allow-parallel-runners @ echo "revive ..." @ revive -formatter friendly -config revive.toml $(PACKAGES) diff --git a/client/go.mod b/client/go.mod index 92706dbc4f1..c8055548f23 100644 --- a/client/go.mod +++ b/client/go.mod @@ -7,7 +7,7 @@ require ( github.com/opentracing/opentracing-go v1.2.0 github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 - github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172 + github.com/pingcap/kvproto v0.0.0-20230726063044-73d6d7f3756b github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 github.com/prometheus/client_golang v1.11.1 github.com/stretchr/testify v1.7.0 diff --git a/client/go.sum b/client/go.sum index 849d8a8275d..dd018e4c055 100644 --- a/client/go.sum +++ b/client/go.sum @@ -105,8 +105,8 @@ github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c h1:xpW9bvK+HuuTm github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c/go.mod h1:X2r9ueLEUZgtx2cIogM0v4Zj5uvvzhuuiu7Pn8HzMPg= github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 h1:C3N3itkduZXDZFh4N3vQ5HEtld3S+Y+StULhWVvumU0= github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew= -github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172 h1:FYgKV9znRQmzVrrJDZ0gUfMIvKLAMU1tu1UKJib8bEQ= -github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/pingcap/kvproto v0.0.0-20230726063044-73d6d7f3756b h1:dLoYgMFgzUaS6fAAPdjA7oGDM0LdCIm+qhgb3PzrDps= +github.com/pingcap/kvproto v0.0.0-20230726063044-73d6d7f3756b/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 h1:HR/ylkkLmGdSSDaD8IDP+SZrdhV1Kibl9KrHxJ9eciw= github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= diff --git a/cmd/pd-server/main.go b/cmd/pd-server/main.go index 65bb495edf8..7a1dcc4df30 100644 --- a/cmd/pd-server/main.go +++ b/cmd/pd-server/main.go @@ -93,7 +93,10 @@ func main() { // Creates server. ctx, cancel := context.WithCancel(context.Background()) - serviceBuilders := []server.HandlerBuilder{api.NewHandler, apiv2.NewV2Handler, swaggerserver.NewHandler, autoscaling.NewHandler} + serviceBuilders := []server.HandlerBuilder{api.NewHandler, apiv2.NewV2Handler, autoscaling.NewHandler} + if swaggerserver.Enabled() { + serviceBuilders = append(serviceBuilders, swaggerserver.NewHandler) + } serviceBuilders = append(serviceBuilders, dashboard.GetServiceBuilders()...) svr, err := server.CreateServer(ctx, cfg, serviceBuilders...) if err != nil { diff --git a/errors.toml b/errors.toml index 988cd0401f3..4630932c00d 100644 --- a/errors.toml +++ b/errors.toml @@ -91,6 +91,11 @@ error = ''' TiKV cluster not bootstrapped, please start TiKV first ''' +["PD:cluster:ErrSchedulingIsHalted"] +error = ''' +scheduling is halted +''' + ["PD:cluster:ErrStoreIsUp"] error = ''' store is still up, please remove store gracefully diff --git a/go.mod b/go.mod index 5e2c9f6e175..766a0cd734e 100644 --- a/go.mod +++ b/go.mod @@ -25,10 +25,10 @@ require ( github.com/pingcap/errcode v0.3.0 github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce - github.com/pingcap/kvproto v0.0.0-20221104101942-09d82b914df1 + github.com/pingcap/kvproto v0.0.0-20230726063044-73d6d7f3756b github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d - github.com/pingcap/tidb-dashboard v0.0.0-20221201151320-ea3ee6971f2e + github.com/pingcap/tidb-dashboard v0.0.0-20230816095313-e6414634f8fc github.com/prometheus/client_golang v1.11.1 github.com/prometheus/common v0.26.0 github.com/sasha-s/go-deadlock v0.2.0 @@ -50,6 +50,12 @@ require ( gotest.tools/gotestsum v1.7.0 ) +require ( + github.com/google/go-cmp v0.5.9 // indirect + github.com/samber/lo v1.37.0 // indirect + gorm.io/datatypes v1.1.0 // indirect +) + require ( github.com/KyleBanks/depth v1.2.1 // indirect github.com/Masterminds/semver v1.5.0 // indirect @@ -86,7 +92,7 @@ require ( github.com/go-playground/universal-translator v0.17.0 // indirect github.com/go-playground/validator/v10 v10.4.1 // indirect github.com/go-resty/resty/v2 v2.6.0 // indirect - github.com/go-sql-driver/mysql v1.6.0 // indirect + github.com/go-sql-driver/mysql v1.7.0 // indirect github.com/goccy/go-graphviz v0.0.9 // indirect github.com/golang-jwt/jwt v3.2.1+incompatible // indirect github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect @@ -102,7 +108,7 @@ require ( github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d // indirect github.com/inconshreveable/mousetrap v1.0.0 // indirect github.com/jinzhu/inflection v1.0.0 // indirect - github.com/jinzhu/now v1.1.2 // indirect + github.com/jinzhu/now v1.1.5 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/jonboulle/clockwork v0.2.2 // indirect github.com/joomcode/errorx v1.0.1 // indirect @@ -110,11 +116,11 @@ require ( github.com/json-iterator/go v1.1.12 // indirect github.com/konsorten/go-windows-terminal-sequences v1.0.3 // indirect github.com/leodido/go-urn v1.2.0 // indirect - github.com/mailru/easyjson v0.7.6 // indirect + github.com/mailru/easyjson v0.7.6 github.com/mattn/go-colorable v0.1.8 // indirect github.com/mattn/go-isatty v0.0.12 // indirect github.com/mattn/go-runewidth v0.0.8 // indirect - github.com/mattn/go-sqlite3 v1.14.9 // indirect + github.com/mattn/go-sqlite3 v1.14.15 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect github.com/mgechev/dots v0.0.0-20190921121421-c36f7dcfbb81 // indirect github.com/minio/sio v0.3.0 // indirect @@ -139,7 +145,6 @@ require ( github.com/soheilhy/cmux v0.1.4 // indirect github.com/stretchr/objx v0.2.0 // indirect github.com/swaggo/files v0.0.0-20190704085106-630677cd5c14 // indirect - github.com/thoas/go-funk v0.8.0 // indirect github.com/tidwall/gjson v1.9.3 // indirect github.com/tklauser/go-sysconf v0.3.4 // indirect github.com/tklauser/numcpus v0.2.1 // indirect @@ -155,7 +160,7 @@ require ( go.uber.org/dig v1.9.0 // indirect go.uber.org/fx v1.12.0 // indirect go.uber.org/multierr v1.7.0 // indirect - golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4 // indirect + golang.org/x/crypto v0.0.0-20221005025214-4161e89ecf1b // indirect golang.org/x/exp v0.0.0-20220321173239-a90fa8a75705 // indirect golang.org/x/image v0.0.0-20200119044424-58c23975cae1 // indirect golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect @@ -170,9 +175,9 @@ require ( gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect - gorm.io/driver/mysql v1.0.6 // indirect - gorm.io/driver/sqlite v1.1.4 // indirect - gorm.io/gorm v1.21.9 // indirect + gorm.io/driver/mysql v1.4.5 // indirect + gorm.io/driver/sqlite v1.4.3 // indirect + gorm.io/gorm v1.24.3 // indirect moul.io/zapgorm2 v1.1.0 // indirect sigs.k8s.io/yaml v1.1.0 // indirect ) diff --git a/go.sum b/go.sum index 3f76172a367..84324b8f00b 100644 --- a/go.sum +++ b/go.sum @@ -167,8 +167,8 @@ github.com/go-playground/validator/v10 v10.4.1/go.mod h1:nlOn6nFhuKACm19sB/8EGNn github.com/go-resty/resty/v2 v2.6.0 h1:joIR5PNLM2EFqqESUjCMGXrWmXNHEU9CEiK813oKYS4= github.com/go-resty/resty/v2 v2.6.0/go.mod h1:PwvJS6hvaPkjtjNg9ph+VrSD92bi5Zq73w/BIH7cC3Q= github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= -github.com/go-sql-driver/mysql v1.6.0 h1:BCTh4TKNUYmOmMUcQ3IipzF5prigylS7XXjEkfCHuOE= -github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= +github.com/go-sql-driver/mysql v1.7.0 h1:ueSltNNllEqE3qcWBTD0iQd3IpL/6U+mJxLkazJ7YPc= +github.com/go-sql-driver/mysql v1.7.0/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/goccy/go-graphviz v0.0.9 h1:s/FMMJ1Joj6La3S5ApO3Jk2cwM4LpXECC2muFx3IPQQ= github.com/goccy/go-graphviz v0.0.9/go.mod h1:wXVsXxmyMQU6TN3zGRttjNn3h+iCAS7xQFC6TlNvLhk= @@ -180,6 +180,8 @@ github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang-jwt/jwt v3.2.1+incompatible h1:73Z+4BJcrTC+KczS6WvTPvRGOp1WmfEP4Q1lOd9Z/+c= github.com/golang-jwt/jwt v3.2.1+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I= +github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 h1:au07oEsX2xN0ktxqI+Sida1w446QrXBRJ0nee3SNZlA= +github.com/golang-sql/sqlexp v0.1.0 h1:ZCD6MBpcuOVfGVqsEmY5/4FtYiKz6tSyUv9LPEDei6A= github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g= github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58= @@ -214,8 +216,9 @@ github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/pprof v0.0.0-20211122183932-1daafda22083 h1:c8EUapQFi+kjzedr4c6WqbwMdmB95+oDBWZ5XFHFYxY= github.com/google/pprof v0.0.0-20211122183932-1daafda22083/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg= @@ -248,13 +251,22 @@ github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d h1:uGg2frl github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w= github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= +github.com/jackc/chunkreader/v2 v2.0.1 h1:i+RDz65UE+mmpjTfyz0MoVTnzeYxroil2G82ki7MGG8= +github.com/jackc/pgconn v1.13.0 h1:3L1XMNV2Zvca/8BYhzcRFS70Lr0WlDg16Di6SFGAbys= +github.com/jackc/pgio v1.0.0 h1:g12B9UwVnzGhueNavwioyEEpAmqMe1E/BN9ES+8ovkE= +github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= +github.com/jackc/pgproto3/v2 v2.3.1 h1:nwj7qwf0S+Q7ISFfBndqeLwSwxs+4DPsbRFjECT1Y4Y= +github.com/jackc/pgservicefile v0.0.0-20200714003250-2b9c44734f2b h1:C8S2+VttkHFdOOCXJe+YGfa4vHYwlt4Zx+IVXQ97jYg= +github.com/jackc/pgtype v1.12.0 h1:Dlq8Qvcch7kiehm8wPGIW0W3KsCCHJnRacKW0UM8n5w= +github.com/jackc/pgx/v4 v4.17.2 h1:0Ut0rpeKwvIVbMQ1KbMBU4h6wxehBI535LK6Flheh8E= github.com/jarcoal/httpmock v1.0.8 h1:8kI16SoO6LQKgPE7PvQuV+YuD/inwHd7fOOe2zMbo4k= github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E= github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= -github.com/jinzhu/now v1.1.1/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= -github.com/jinzhu/now v1.1.2 h1:eVKgfIdy9b6zbWBMgFpfDPoAMifwSZagU9HmEU6zgiI= github.com/jinzhu/now v1.1.2/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= +github.com/jinzhu/now v1.1.4/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= +github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= +github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= @@ -317,15 +329,15 @@ github.com/mattn/go-runewidth v0.0.8 h1:3tS41NlGYSmhhe/8fhGRzc+z3AYCw1Fe1WAyLuuj github.com/mattn/go-runewidth v0.0.8/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= github.com/mattn/go-shellwords v1.0.12 h1:M2zGm7EW6UQJvDeQxo4T51eKPurbeFbe8WtebGE2xrk= github.com/mattn/go-shellwords v1.0.12/go.mod h1:EZzvwXDESEeg03EKmM+RmDnNOPKG4lLtQsUlTZDWQ8Y= -github.com/mattn/go-sqlite3 v1.14.5/go.mod h1:WVKg1VTActs4Qso6iwGbiFih2UIHo0ENGwNd0Lj+XmI= -github.com/mattn/go-sqlite3 v1.14.9 h1:10HX2Td0ocZpYEjhilsuo6WWtUqttj2Kb0KtD86/KYA= -github.com/mattn/go-sqlite3 v1.14.9/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= +github.com/mattn/go-sqlite3 v1.14.15 h1:vfoHhTN1af61xCRSWzFIWzx2YskyMTwHLrExkBOjvxI= +github.com/mattn/go-sqlite3 v1.14.15/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/mgechev/dots v0.0.0-20190921121421-c36f7dcfbb81 h1:QASJXOGm2RZ5Ardbc86qNFvby9AqkLDibfChMtAg5QM= github.com/mgechev/dots v0.0.0-20190921121421-c36f7dcfbb81/go.mod h1:KQ7+USdGKfpPjXk4Ga+5XxQM4Lm4e3gAogrreFAYpOg= github.com/mgechev/revive v1.0.2 h1:v0NxxQ7fSFz/u1NQydPo6EGdq7va0J1BtsZmae6kzUg= github.com/mgechev/revive v1.0.2/go.mod h1:rb0dQy1LVAxW9SWy5R3LPUjevzUbUS316U5MFySA2lo= +github.com/microsoft/go-mssqldb v0.17.0 h1:Fto83dMZPnYv1Zwx5vHHxpNraeEaUlQ/hhHLgZiaenE= github.com/minio/sio v0.3.0 h1:syEFBewzOMOYVzSTFpp1MqpSZk8rUNbz8VIIc+PNzus= github.com/minio/sio v0.3.0/go.mod h1:8b0yPp2avGThviy/+OCJBI6OMpvxoUuiLvE6F1lebhw= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= @@ -375,16 +387,16 @@ github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c/go.mod h1:X2r9ue github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce h1:Y1kCxlCtlPTMtVcOkjUcuQKh+YrluSo7+7YMCQSzy30= github.com/pingcap/failpoint v0.0.0-20200702092429-9f69995143ce/go.mod h1:w4PEZ5y16LeofeeGwdgZB4ddv9bLyDuIX+ljstgKZyk= github.com/pingcap/kvproto v0.0.0-20191211054548-3c6b38ea5107/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w= -github.com/pingcap/kvproto v0.0.0-20221104101942-09d82b914df1 h1:iJXUNA0LoOYuuMJ6U0tJGg2gCo/8xGZVhKLvuUWNjzw= -github.com/pingcap/kvproto v0.0.0-20221104101942-09d82b914df1/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/pingcap/kvproto v0.0.0-20230726063044-73d6d7f3756b h1:dLoYgMFgzUaS6fAAPdjA7oGDM0LdCIm+qhgb3PzrDps= +github.com/pingcap/kvproto v0.0.0-20230726063044-73d6d7f3756b/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 h1:HR/ylkkLmGdSSDaD8IDP+SZrdhV1Kibl9KrHxJ9eciw= github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d h1:k3/APKZjXOyJrFy8VyYwRlZhMelpD3qBLJNsw3bPl/g= github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d/go.mod h1:7j18ezaWTao2LHOyMlsc2Dg1vW+mDY9dEbPzVyOlaeM= -github.com/pingcap/tidb-dashboard v0.0.0-20221201151320-ea3ee6971f2e h1:FUdoQ6zWktVjIWLokNeulEcqIzGn6TnoOjdS9bQcFUo= -github.com/pingcap/tidb-dashboard v0.0.0-20221201151320-ea3ee6971f2e/go.mod h1:NNF1CfnM5TqrLNfzfSal723h2fVQlieyVBBdQBzfPTg= +github.com/pingcap/tidb-dashboard v0.0.0-20230816095313-e6414634f8fc h1:nw4g5lsSFBSPTU6eUOEgR3qTq2Qr0fr8LLatyM1YM6w= +github.com/pingcap/tidb-dashboard v0.0.0-20230816095313-e6414634f8fc/go.mod h1:OUzFMMVjR1GKlf4LWLqza9QNKjCrYJ7stVn/3PN0djM= github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e h1:FBaTXU8C3xgt/drM58VHxojHo/QoG1oPsgWTGvaSpO4= github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e/go.mod h1:A7mrd7WHBl1o63LE2bIBGEJMTNWXqhgmYiOvMLxozfs= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -424,6 +436,8 @@ github.com/rs/cors v1.7.0 h1:+88SsELBHx5r+hZ8TCkggzSstaWNbDvThkVK8H6f9ik= github.com/rs/cors v1.7.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU= github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/samber/lo v1.37.0 h1:XjVcB8g6tgUp8rsPsJ2CvhClfImrpL04YpQHXeHPhRw= +github.com/samber/lo v1.37.0/go.mod h1:9vaz2O4o8oOnK23pd2TrXufcbdbJIa3b6cstBWKpopA= github.com/sasha-s/go-deadlock v0.2.0 h1:lMqc+fUb7RrFS3gQLtoQsJ7/6TV/pAIFvBsqX73DK8Y= github.com/sasha-s/go-deadlock v0.2.0/go.mod h1:StQn567HiB1fF2yJ44N9au7wOhrPS3iZqiDbRupzT10= github.com/sergi/go-diff v1.0.1-0.20180205163309-da645544ed44 h1:tB9NOR21++IjLyVx3/PCPhWMwqGNCMQEH96A6dMZ/gc= @@ -476,8 +490,6 @@ github.com/swaggo/swag v1.8.3 h1:3pZSSCQ//gAH88lfmxM3Cd1+JCsxV8Md6f36b9hrZ5s= github.com/swaggo/swag v1.8.3/go.mod h1:jMLeXOOmYyjk8PvHTsXBdrubsNd9gUJTTCzL5iBnseg= github.com/syndtr/goleveldb v1.0.1-0.20190318030020-c3a204f8e965 h1:1oFLiOyVl+W7bnBzGhf7BbIv9loSFQcieWWYIjLqcAw= github.com/syndtr/goleveldb v1.0.1-0.20190318030020-c3a204f8e965/go.mod h1:9OrXJhf154huy1nPWmuSrkgjPUtUNhA+Zmy+6AESzuA= -github.com/thoas/go-funk v0.8.0 h1:JP9tKSvnpFVclYgDM0Is7FD9M4fhPvqA0s0BsXmzSRQ= -github.com/thoas/go-funk v0.8.0/go.mod h1:+IWnUfUmFO1+WVYQWQtIJHeRRdaIyyYglZN7xzUPe4Q= github.com/tidwall/gjson v1.6.0/go.mod h1:P256ACg0Mn+j1RXIDXoss50DeIABTYK1PULOJHhxOls= github.com/tidwall/gjson v1.9.3 h1:hqzS9wAHMO+KVBBkLxYdkEeeFHuqr95GfClRLKlgK0E= github.com/tidwall/gjson v1.9.3/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= @@ -563,8 +575,9 @@ golang.org/x/crypto v0.0.0-20190513172903-22d7a77e9e5f/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200204104054-c9f3fb736b72/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4 h1:kUhD7nTDoI3fVd9G4ORWrbV5NY0liEs/Jg2pv5f+bBA= golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.0.0-20221005025214-4161e89ecf1b h1:huxqepDufQpLLIRXiVkTvnxrzJlpwmIWAObmcCcUFr0= +golang.org/x/crypto v0.0.0-20221005025214-4161e89ecf1b/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20220321173239-a90fa8a75705 h1:ba9YlqfDGTTQ5aZ2fwOoQ1hf32QySyQkR6ODGDzHlnE= golang.org/x/exp v0.0.0-20220321173239-a90fa8a75705/go.mod h1:lgLbSvA5ygNOMpwM/9anMpWVlVJ7Z+cHWq/eFuinpGE= @@ -703,7 +716,6 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0 h1:/wp5JvzpHIxhs/dumFmF7BXTf3Z+dd4uXta4kVyO508= @@ -761,13 +773,19 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gorm.io/driver/mysql v1.0.6 h1:mA0XRPjIKi4bkE9nv+NKs6qj6QWOchqUSdWOcpd3x1E= -gorm.io/driver/mysql v1.0.6/go.mod h1:KdrTanmfLPPyAOeYGyG+UpDys7/7eeWT1zCq+oekYnU= -gorm.io/driver/sqlite v1.1.4 h1:PDzwYE+sI6De2+mxAneV9Xs11+ZyKV6oxD3wDGkaNvM= -gorm.io/driver/sqlite v1.1.4/go.mod h1:mJCeTFr7+crvS+TRnWc5Z3UvwxUN1BGBLMrf5LA9DYw= -gorm.io/gorm v1.20.7/go.mod h1:0HFTzE/SqkGTzK6TlDPPQbAYCluiVvhzoA1+aVyzenw= -gorm.io/gorm v1.21.9 h1:INieZtn4P2Pw6xPJ8MzT0G4WUOsHq3RhfuDF1M6GW0E= +gorm.io/datatypes v1.1.0 h1:EVp1Z28N4ACpYFK1nHboEIJGIFfjY7vLeieDk8jSHJA= +gorm.io/datatypes v1.1.0/go.mod h1:SH2K9R+2RMjuX1CkCONrPwoe9JzVv2hkQvEu4bXGojE= +gorm.io/driver/mysql v1.4.5 h1:u1lytId4+o9dDaNcPCFzNv7h6wvmc92UjNk3z8enSBU= +gorm.io/driver/mysql v1.4.5/go.mod h1:SxzItlnT1cb6e1e4ZRpgJN2VYtcqJgqnHxWr4wsP8oc= +gorm.io/driver/postgres v1.4.5 h1:mTeXTTtHAgnS9PgmhN2YeUbazYpLhUI1doLnw42XUZc= +gorm.io/driver/sqlite v1.4.3 h1:HBBcZSDnWi5BW3B3rwvVTc510KGkBkexlOg0QrmLUuU= +gorm.io/driver/sqlite v1.4.3/go.mod h1:0Aq3iPO+v9ZKbcdiz8gLWRw5VOPcBOPUQJFLq5e2ecI= +gorm.io/driver/sqlserver v1.4.1 h1:t4r4r6Jam5E6ejqP7N82qAJIJAht27EGT41HyPfXRw0= gorm.io/gorm v1.21.9/go.mod h1:F+OptMscr0P2F2qU97WT1WimdH9GaQPoDW7AYd5i2Y0= +gorm.io/gorm v1.23.8/go.mod h1:l2lP/RyAtc1ynaTjFksBde/O8v9oOGIApu2/xRitmZk= +gorm.io/gorm v1.24.0/go.mod h1:DVrVomtaYTbqs7gB/x2uVvqnXzv0nqjB396B8cG4dBA= +gorm.io/gorm v1.24.3 h1:WL2ifUmzR/SLp85CSURAfybcHnGZ+yLSGSxgYXlFBHg= +gorm.io/gorm v1.24.3/go.mod h1:DVrVomtaYTbqs7gB/x2uVvqnXzv0nqjB396B8cG4dBA= gotest.tools/gotestsum v1.7.0 h1:RwpqwwFKBAa2h+F6pMEGpE707Edld0etUD3GhqqhDNc= gotest.tools/gotestsum v1.7.0/go.mod h1:V1m4Jw3eBerhI/A6qCxUE07RnCg7ACkKj9BYcAm09V8= gotest.tools/v3 v3.0.3 h1:4AuOwCGf4lLR9u3YOe2awrHygurzhO/HeQ6laiA6Sx0= diff --git a/metrics/grafana/pd.json b/metrics/grafana/pd.json index 1847b2be097..d221dd4526e 100644 --- a/metrics/grafana/pd.json +++ b/metrics/grafana/pd.json @@ -2332,6 +2332,113 @@ "alignLevel": null } }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The allowance status of the scheduling.", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 41 + }, + "hiddenSeries": false, + "id": 1464, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "7.5.10", + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "pd_scheduling_allowance_status{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=\"$instance\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{kind}}", + "metric": "pd_scheduling_allowance_status", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Scheduling Allowance Status", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:533", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:534", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, { "cacheTimeout": null, "colorBackground": false, @@ -2959,7 +3066,7 @@ "format": "time_series", "intervalFactor": 2, "legendFormat": "{{event}}", - "metric": "pd_scheduler_status", + "metric": "pd_schedule_operators_count", "refId": "A", "step": 4 } diff --git a/pkg/cache/ttl.go b/pkg/cache/ttl.go index 11dd1248370..9baafc1d4b0 100644 --- a/pkg/cache/ttl.go +++ b/pkg/cache/ttl.go @@ -19,6 +19,7 @@ import ( "time" "github.com/pingcap/log" + "github.com/tikv/pd/pkg/logutil" "github.com/tikv/pd/pkg/syncutil" "go.uber.org/zap" ) @@ -142,6 +143,7 @@ func (c *ttlCache) Clear() { } func (c *ttlCache) doGC() { + defer logutil.LogPanic() ticker := time.NewTicker(c.gcInterval) defer ticker.Stop() diff --git a/pkg/errs/errno.go b/pkg/errs/errno.go index 0de1ad51f21..38c8385004d 100644 --- a/pkg/errs/errno.go +++ b/pkg/errs/errno.go @@ -121,9 +121,10 @@ var ( // cluster errors var ( - ErrNotBootstrapped = errors.Normalize("TiKV cluster not bootstrapped, please start TiKV first", errors.RFCCodeText("PD:cluster:ErrNotBootstrapped")) - ErrStoreIsUp = errors.Normalize("store is still up, please remove store gracefully", errors.RFCCodeText("PD:cluster:ErrStoreIsUp")) - ErrInvalidStoreID = errors.Normalize("invalid store id %d, not found", errors.RFCCodeText("PD:cluster:ErrInvalidStoreID")) + ErrNotBootstrapped = errors.Normalize("TiKV cluster not bootstrapped, please start TiKV first", errors.RFCCodeText("PD:cluster:ErrNotBootstrapped")) + ErrStoreIsUp = errors.Normalize("store is still up, please remove store gracefully", errors.RFCCodeText("PD:cluster:ErrStoreIsUp")) + ErrInvalidStoreID = errors.Normalize("invalid store id %d, not found", errors.RFCCodeText("PD:cluster:ErrInvalidStoreID")) + ErrSchedulingIsHalted = errors.Normalize("scheduling is halted", errors.RFCCodeText("PD:cluster:ErrSchedulingIsHalted")) ) // versioninfo errors diff --git a/pkg/mock/mockcluster/mockcluster.go b/pkg/mock/mockcluster/mockcluster.go index 854996dd96f..8d041682593 100644 --- a/pkg/mock/mockcluster/mockcluster.go +++ b/pkg/mock/mockcluster/mockcluster.go @@ -95,6 +95,9 @@ func (mc *Cluster) GetAllocator() id.Allocator { return mc.IDAllocator } +// CheckSchedulingAllowance checks if the cluster allows scheduling currently. +func (mc *Cluster) CheckSchedulingAllowance() (bool, error) { return true, nil } + // ScanRegions scans region with start key, until number greater than limit. func (mc *Cluster) ScanRegions(startKey, endKey []byte, limit int) []*core.RegionInfo { return mc.ScanRange(startKey, endKey, limit) @@ -337,6 +340,13 @@ func (mc *Cluster) AddLabelsStore(storeID uint64, regionCount int, labels map[st mc.PutStore(store) } +// AddLabersStoreWithLearnerCount adds store with specified count of region, learner and labels. +func (mc *Cluster) AddLabersStoreWithLearnerCount(storeID uint64, regionCount int, learnerCount int, labels map[string]string) { + mc.AddLabelsStore(storeID, regionCount, labels) + store := mc.GetStore(storeID).Clone(core.SetLearnerCount(learnerCount)) + mc.PutStore(store) +} + // AddLeaderRegion adds region with specified leader and followers. func (mc *Cluster) AddLeaderRegion(regionID uint64, leaderStoreID uint64, otherPeerStoreIDs ...uint64) *core.RegionInfo { origin := mc.newMockRegionInfo(regionID, leaderStoreID, otherPeerStoreIDs...) diff --git a/pkg/swaggerserver/swagger_handler.go b/pkg/swaggerserver/swagger_handler.go deleted file mode 100644 index 69cff3d2751..00000000000 --- a/pkg/swaggerserver/swagger_handler.go +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2020 TiKV Project Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build swagger_server -// +build swagger_server - -package swaggerserver - -import ( - "net/http" - - httpSwagger "github.com/swaggo/http-swagger" - _ "github.com/tikv/pd/docs/swagger" -) - -func handler() http.Handler { - return httpSwagger.Handler() -} diff --git a/pkg/swaggerserver/swaggerserver.go b/pkg/swaggerserver/swaggerserver.go index e2bac01bf0f..d95659ff570 100644 --- a/pkg/swaggerserver/swaggerserver.go +++ b/pkg/swaggerserver/swaggerserver.go @@ -12,12 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build swagger_server +// +build swagger_server + package swaggerserver import ( "context" "net/http" + httpSwagger "github.com/swaggo/http-swagger" + _ "github.com/tikv/pd/docs/swagger" "github.com/tikv/pd/server" ) @@ -32,9 +37,14 @@ var ( } ) +// Enabled return true if swagger server is disabled. +func Enabled() bool { + return true +} + // NewHandler creates a HTTP handler for Swagger. func NewHandler(context.Context, *server.Server) (http.Handler, server.ServiceGroup, error) { swaggerHandler := http.NewServeMux() - swaggerHandler.Handle(swaggerPrefix, handler()) + swaggerHandler.Handle(swaggerPrefix, httpSwagger.Handler()) return swaggerHandler, swaggerServiceGroup, nil } diff --git a/pkg/swaggerserver/empty_handler.go b/pkg/swaggerserver/swaggerserver_disable.go similarity index 64% rename from pkg/swaggerserver/empty_handler.go rename to pkg/swaggerserver/swaggerserver_disable.go index 888593b215e..c0e390bfdfc 100644 --- a/pkg/swaggerserver/empty_handler.go +++ b/pkg/swaggerserver/swaggerserver_disable.go @@ -1,4 +1,4 @@ -// Copyright 2020 TiKV Project Authors. +// Copyright 2023 TiKV Project Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,12 +18,18 @@ package swaggerserver import ( - "io" + "context" "net/http" + + "github.com/tikv/pd/server" ) -func handler() http.Handler { - return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - _, _ = io.WriteString(w, "Swagger UI is not built. Try `make` without `SWAGGER=1`.\n") - }) +// Enabled return false if swagger server is disabled. +func Enabled() bool { + return false +} + +// NewHandler creates a HTTP handler for Swagger. +func NewHandler(context.Context, *server.Server) (http.Handler, server.ServiceGroup, error) { + return nil, server.ServiceGroup{}, nil } diff --git a/pkg/systimemon/systimemon.go b/pkg/systimemon/systimemon.go index a3124312fa2..ad3929caea9 100644 --- a/pkg/systimemon/systimemon.go +++ b/pkg/systimemon/systimemon.go @@ -20,11 +20,13 @@ import ( "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/logutil" "go.uber.org/zap" ) // StartMonitor calls systimeErrHandler if system time jump backward. func StartMonitor(ctx context.Context, now func() time.Time, systimeErrHandler func()) { + defer logutil.LogPanic() log.Info("start system time monitor") tick := time.NewTicker(100 * time.Millisecond) defer tick.Stop() diff --git a/server/api/min_resolved_ts.go b/server/api/min_resolved_ts.go index c367aabdd1f..9501bf39951 100644 --- a/server/api/min_resolved_ts.go +++ b/server/api/min_resolved_ts.go @@ -16,7 +16,9 @@ package api import ( "net/http" + "strconv" + "github.com/gorilla/mux" "github.com/tikv/pd/pkg/typeutil" "github.com/tikv/pd/server" "github.com/unrolled/render" @@ -41,6 +43,30 @@ type minResolvedTS struct { PersistInterval typeutil.Duration `json:"persist_interval,omitempty"` } +// @Tags min_store_resolved_ts +// @Summary Get store-level min resolved ts. +// @Produce json +// @Success 200 {array} minResolvedTS +// @Failure 400 {string} string "The input is invalid." +// @Failure 500 {string} string "PD server failed to proceed the request." +// @Router /min-resolved-ts/{store_id} [get] +func (h *minResolvedTSHandler) GetStoreMinResolvedTS(w http.ResponseWriter, r *http.Request) { + c := h.svr.GetRaftCluster() + idStr := mux.Vars(r)["store_id"] + storeID, err := strconv.ParseUint(idStr, 10, 64) + if err != nil { + h.rd.JSON(w, http.StatusBadRequest, err.Error()) + return + } + value := c.GetStoreMinResolvedTS(storeID) + persistInterval := c.GetOpts().GetPDServerConfig().MinResolvedTSPersistenceInterval + h.rd.JSON(w, http.StatusOK, minResolvedTS{ + MinResolvedTS: value, + PersistInterval: persistInterval, + IsRealTime: persistInterval.Duration != 0, + }) +} + // @Tags min_resolved_ts // @Summary Get cluster-level min resolved ts. // @Produce json diff --git a/server/api/operator_test.go b/server/api/operator_test.go index 4b30f9f5e16..973f679f098 100644 --- a/server/api/operator_test.go +++ b/server/api/operator_test.go @@ -217,7 +217,13 @@ func (suite *transferRegionOperatorTestSuite) TestTransferRegionWithPlacementRul regionURL := fmt.Sprintf("%s/operators/%d", suite.urlPrefix, region.GetId()) operator := mustReadURL(re, regionURL) suite.Contains(operator, "operator not found") - + convertStepsToStr := func(steps []string) string { + stepStrs := make([]string, len(steps)) + for i := range steps { + stepStrs[i] = fmt.Sprintf("%d:{%s}", i, steps[i]) + } + return strings.Join(stepStrs, ", ") + } testCases := []struct { name string placementRuleEnable bool @@ -231,25 +237,25 @@ func (suite *transferRegionOperatorTestSuite) TestTransferRegionWithPlacementRul placementRuleEnable: false, input: []byte(`{"name":"transfer-region", "region_id": 1, "to_store_ids": [2, 3]}`), expectedError: nil, - expectSteps: strings.Join([]string{ + expectSteps: convertStepsToStr([]string{ pdoperator.AddLearner{ToStore: 3, PeerID: 1}.String(), pdoperator.PromoteLearner{ToStore: 3, PeerID: 1}.String(), pdoperator.TransferLeader{FromStore: 1, ToStore: 2}.String(), pdoperator.RemovePeer{FromStore: 1, PeerID: 1}.String(), - }, ", "), + }), }, { name: "placement rule disable with peer role", placementRuleEnable: false, input: []byte(`{"name":"transfer-region", "region_id": 1, "to_store_ids": [2, 3], "peer_roles":["follower", "leader"]}`), expectedError: nil, - expectSteps: strings.Join([]string{ + expectSteps: convertStepsToStr([]string{ pdoperator.AddLearner{ToStore: 3, PeerID: 2}.String(), pdoperator.PromoteLearner{ToStore: 3, PeerID: 2}.String(), pdoperator.TransferLeader{FromStore: 1, ToStore: 2}.String(), pdoperator.RemovePeer{FromStore: 1, PeerID: 2}.String(), pdoperator.TransferLeader{FromStore: 2, ToStore: 3}.String(), - }, ", "), + }), }, { name: "default placement rule without peer role", @@ -262,13 +268,13 @@ func (suite *transferRegionOperatorTestSuite) TestTransferRegionWithPlacementRul name: "default placement rule with peer role", placementRuleEnable: true, input: []byte(`{"name":"transfer-region", "region_id": 1, "to_store_ids": [2, 3], "peer_roles":["follower", "leader"]}`), - expectSteps: strings.Join([]string{ + expectSteps: convertStepsToStr([]string{ pdoperator.AddLearner{ToStore: 3, PeerID: 3}.String(), pdoperator.PromoteLearner{ToStore: 3, PeerID: 3}.String(), pdoperator.TransferLeader{FromStore: 1, ToStore: 2}.String(), pdoperator.RemovePeer{FromStore: 1, PeerID: 1}.String(), pdoperator.TransferLeader{FromStore: 2, ToStore: 3}.String(), - }, ", "), + }), }, { name: "default placement rule with invalid input", @@ -323,12 +329,12 @@ func (suite *transferRegionOperatorTestSuite) TestTransferRegionWithPlacementRul }, input: []byte(`{"name":"transfer-region", "region_id": 1, "to_store_ids": [2, 3], "peer_roles":["follower", "leader"]}`), expectedError: nil, - expectSteps: strings.Join([]string{ + expectSteps: convertStepsToStr([]string{ pdoperator.AddLearner{ToStore: 3, PeerID: 5}.String(), pdoperator.PromoteLearner{ToStore: 3, PeerID: 5}.String(), pdoperator.TransferLeader{FromStore: 1, ToStore: 3}.String(), pdoperator.RemovePeer{FromStore: 1, PeerID: 1}.String(), - }, ", "), + }), }, { name: "customized placement rule with valid peer role2", @@ -363,12 +369,12 @@ func (suite *transferRegionOperatorTestSuite) TestTransferRegionWithPlacementRul }, input: []byte(`{"name":"transfer-region", "region_id": 1, "to_store_ids": [2, 3], "peer_roles":["leader", "follower"]}`), expectedError: nil, - expectSteps: strings.Join([]string{ + expectSteps: convertStepsToStr([]string{ pdoperator.AddLearner{ToStore: 3, PeerID: 6}.String(), pdoperator.PromoteLearner{ToStore: 3, PeerID: 6}.String(), pdoperator.TransferLeader{FromStore: 1, ToStore: 2}.String(), pdoperator.RemovePeer{FromStore: 1, PeerID: 1}.String(), - }, ", "), + }), }, } for _, testCase := range testCases { diff --git a/server/api/region.go b/server/api/region.go index 796c3acafa4..06a002a561d 100644 --- a/server/api/region.go +++ b/server/api/region.go @@ -16,14 +16,17 @@ package api import ( "container/heap" + "context" "encoding/hex" "fmt" "net/http" "net/url" "sort" "strconv" + "strings" "github.com/gorilla/mux" + jwriter "github.com/mailru/easyjson/jwriter" "github.com/pingcap/failpoint" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/kvproto/pkg/pdpb" @@ -52,6 +55,17 @@ type MetaPeer struct { IsLearner bool `json:"is_learner,omitempty"` } +func (m *MetaPeer) setDefaultIfNil() { + if m.Peer == nil { + m.Peer = &metapb.Peer{ + Id: m.GetId(), + StoreId: m.GetStoreId(), + Role: m.GetRole(), + IsWitness: m.GetIsWitness(), + } + } +} + // PDPeerStats is api compatible with *pdpb.PeerStats. // NOTE: This type is exported by HTTP API. Please pay more attention when modifying it. type PDPeerStats struct { @@ -59,6 +73,16 @@ type PDPeerStats struct { Peer MetaPeer `json:"peer"` } +func (s *PDPeerStats) setDefaultIfNil() { + if s.PeerStats == nil { + s.PeerStats = &pdpb.PeerStats{ + Peer: s.GetPeer(), + DownSeconds: s.GetDownSeconds(), + } + } + s.Peer.setDefaultIfNil() +} + func fromPeer(peer *metapb.Peer) MetaPeer { if peer == nil { return MetaPeer{} @@ -101,6 +125,7 @@ func fromPeerStatsSlice(peers []*pdpb.PeerStats) []PDPeerStats { // RegionInfo records detail region info for api usage. // NOTE: This type is exported by HTTP API. Please pay more attention when modifying it. +// easyjson:json type RegionInfo struct { ID uint64 `json:"id"` StartKey string `json:"start_key"` @@ -167,9 +192,9 @@ func InitRegion(r *core.RegionInfo, s *RegionInfo) *RegionInfo { s.ApproximateSize = r.GetApproximateSize() s.ApproximateKeys = r.GetApproximateKeys() s.ReplicationStatus = fromPBReplicationStatus(r.GetReplicationStatus()) + s.Buckets = nil keys := r.GetBuckets().GetKeys() - if len(keys) > 0 { s.Buckets = make([]string, len(keys)) for i, key := range keys { @@ -311,15 +336,48 @@ func newRegionsHandler(svr *server.Server, rd *render.Render) *regionsHandler { } } -func convertToAPIRegions(regions []*core.RegionInfo) *RegionsInfo { - regionInfos := make([]RegionInfo, len(regions)) +// marshalRegionsInfoJSON marshals regions to bytes in `RegionsInfo`'s JSON format. +// It is used to reduce the cost of JSON serialization. +func marshalRegionsInfoJSON(ctx context.Context, regions []*core.RegionInfo) ([]byte, error) { + out := &jwriter.Writer{} + out.RawByte('{') + + out.RawString("\"count\":") + out.Int(len(regions)) + + out.RawString(",\"regions\":") + out.RawByte('[') + region := &RegionInfo{} for i, r := range regions { - InitRegion(r, ®ionInfos[i]) - } - return &RegionsInfo{ - Count: len(regions), - Regions: regionInfos, + select { + case <-ctx.Done(): + // Return early, avoid the unnecessary computation. + // See more details in https://github.com/tikv/pd/issues/6835 + return nil, ctx.Err() + default: + } + if i > 0 { + out.RawByte(',') + } + InitRegion(r, region) + // EasyJSON will not check anonymous struct pointer field and will panic if the field is nil. + // So we need to set the field to default value explicitly when the anonymous struct pointer is nil. + region.Leader.setDefaultIfNil() + for i := range region.Peers { + region.Peers[i].setDefaultIfNil() + } + for i := range region.PendingPeers { + region.PendingPeers[i].setDefaultIfNil() + } + for i := range region.DownPeers { + region.DownPeers[i].setDefaultIfNil() + } + region.MarshalEasyJSON(out) } + out.RawByte(']') + + out.RawByte('}') + return out.Buffer.BuildBytes(), out.Error } // @Tags region @@ -330,8 +388,7 @@ func convertToAPIRegions(regions []*core.RegionInfo) *RegionsInfo { func (h *regionsHandler) GetRegions(w http.ResponseWriter, r *http.Request) { rc := getCluster(r) regions := rc.GetRegions() - regionsInfo := convertToAPIRegions(regions) - h.rd.JSON(w, http.StatusOK, regionsInfo) + h.returnWithRegions(w, r, regions) } // @Tags region @@ -361,8 +418,7 @@ func (h *regionsHandler) ScanRegions(w http.ResponseWriter, r *http.Request) { limit = maxRegionLimit } regions := rc.ScanRegions([]byte(startKey), []byte(endKey), limit) - regionsInfo := convertToAPIRegions(regions) - h.rd.JSON(w, http.StatusOK, regionsInfo) + h.returnWithRegions(w, r, regions) } // @Tags region @@ -393,8 +449,7 @@ func (h *regionsHandler) GetStoreRegions(w http.ResponseWriter, r *http.Request) return } regions := rc.GetStoreRegions(uint64(id)) - regionsInfo := convertToAPIRegions(regions) - h.rd.JSON(w, http.StatusOK, regionsInfo) + h.returnWithRegions(w, r, regions) } // @Tags region @@ -404,14 +459,7 @@ func (h *regionsHandler) GetStoreRegions(w http.ResponseWriter, r *http.Request) // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /regions/check/miss-peer [get] func (h *regionsHandler) GetMissPeerRegions(w http.ResponseWriter, r *http.Request) { - handler := h.svr.GetHandler() - regions, err := handler.GetRegionsByType(statistics.MissPeer) - if err != nil { - h.rd.JSON(w, http.StatusInternalServerError, err.Error()) - return - } - regionsInfo := convertToAPIRegions(regions) - h.rd.JSON(w, http.StatusOK, regionsInfo) + h.getRegionsByType(w, statistics.MissPeer, r) } // @Tags region @@ -421,14 +469,7 @@ func (h *regionsHandler) GetMissPeerRegions(w http.ResponseWriter, r *http.Reque // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /regions/check/extra-peer [get] func (h *regionsHandler) GetExtraPeerRegions(w http.ResponseWriter, r *http.Request) { - handler := h.svr.GetHandler() - regions, err := handler.GetRegionsByType(statistics.ExtraPeer) - if err != nil { - h.rd.JSON(w, http.StatusInternalServerError, err.Error()) - return - } - regionsInfo := convertToAPIRegions(regions) - h.rd.JSON(w, http.StatusOK, regionsInfo) + h.getRegionsByType(w, statistics.ExtraPeer, r) } // @Tags region @@ -438,14 +479,7 @@ func (h *regionsHandler) GetExtraPeerRegions(w http.ResponseWriter, r *http.Requ // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /regions/check/pending-peer [get] func (h *regionsHandler) GetPendingPeerRegions(w http.ResponseWriter, r *http.Request) { - handler := h.svr.GetHandler() - regions, err := handler.GetRegionsByType(statistics.PendingPeer) - if err != nil { - h.rd.JSON(w, http.StatusInternalServerError, err.Error()) - return - } - regionsInfo := convertToAPIRegions(regions) - h.rd.JSON(w, http.StatusOK, regionsInfo) + h.getRegionsByType(w, statistics.PendingPeer, r) } // @Tags region @@ -455,14 +489,7 @@ func (h *regionsHandler) GetPendingPeerRegions(w http.ResponseWriter, r *http.Re // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /regions/check/down-peer [get] func (h *regionsHandler) GetDownPeerRegions(w http.ResponseWriter, r *http.Request) { - handler := h.svr.GetHandler() - regions, err := handler.GetRegionsByType(statistics.DownPeer) - if err != nil { - h.rd.JSON(w, http.StatusInternalServerError, err.Error()) - return - } - regionsInfo := convertToAPIRegions(regions) - h.rd.JSON(w, http.StatusOK, regionsInfo) + h.getRegionsByType(w, statistics.DownPeer, r) } // @Tags region @@ -472,14 +499,7 @@ func (h *regionsHandler) GetDownPeerRegions(w http.ResponseWriter, r *http.Reque // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /regions/check/learner-peer [get] func (h *regionsHandler) GetLearnerPeerRegions(w http.ResponseWriter, r *http.Request) { - handler := h.svr.GetHandler() - regions, err := handler.GetRegionsByType(statistics.LearnerPeer) - if err != nil { - h.rd.JSON(w, http.StatusInternalServerError, err.Error()) - return - } - regionsInfo := convertToAPIRegions(regions) - h.rd.JSON(w, http.StatusOK, regionsInfo) + h.getRegionsByType(w, statistics.LearnerPeer, r) } // @Tags region @@ -489,14 +509,7 @@ func (h *regionsHandler) GetLearnerPeerRegions(w http.ResponseWriter, r *http.Re // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /regions/check/offline-peer [get] func (h *regionsHandler) GetOfflinePeerRegions(w http.ResponseWriter, r *http.Request) { - handler := h.svr.GetHandler() - regions, err := handler.GetOfflinePeer(statistics.OfflinePeer) - if err != nil { - h.rd.JSON(w, http.StatusInternalServerError, err.Error()) - return - } - regionsInfo := convertToAPIRegions(regions) - h.rd.JSON(w, http.StatusOK, regionsInfo) + h.getRegionsByType(w, statistics.OfflinePeer, r) } // @Tags region @@ -506,14 +519,7 @@ func (h *regionsHandler) GetOfflinePeerRegions(w http.ResponseWriter, r *http.Re // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /regions/check/oversized-region [get] func (h *regionsHandler) GetOverSizedRegions(w http.ResponseWriter, r *http.Request) { - handler := h.svr.GetHandler() - regions, err := handler.GetRegionsByType(statistics.OversizedRegion) - if err != nil { - h.rd.JSON(w, http.StatusInternalServerError, err.Error()) - return - } - regionsInfo := convertToAPIRegions(regions) - h.rd.JSON(w, http.StatusOK, regionsInfo) + h.getRegionsByType(w, statistics.OversizedRegion, r) } // @Tags region @@ -523,14 +529,7 @@ func (h *regionsHandler) GetOverSizedRegions(w http.ResponseWriter, r *http.Requ // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /regions/check/undersized-region [get] func (h *regionsHandler) GetUndersizedRegions(w http.ResponseWriter, r *http.Request) { - handler := h.svr.GetHandler() - regions, err := handler.GetRegionsByType(statistics.UndersizedRegion) - if err != nil { - h.rd.JSON(w, http.StatusInternalServerError, err.Error()) - return - } - regionsInfo := convertToAPIRegions(regions) - h.rd.JSON(w, http.StatusOK, regionsInfo) + h.getRegionsByType(w, statistics.UndersizedRegion, r) } // @Tags region @@ -540,14 +539,24 @@ func (h *regionsHandler) GetUndersizedRegions(w http.ResponseWriter, r *http.Req // @Failure 500 {string} string "PD server failed to proceed the request." // @Router /regions/check/empty-region [get] func (h *regionsHandler) GetEmptyRegions(w http.ResponseWriter, r *http.Request) { - handler := h.svr.GetHandler() - regions, err := handler.GetRegionsByType(statistics.EmptyRegion) + h.getRegionsByType(w, statistics.EmptyRegion, r) +} + +func (h *regionsHandler) getRegionsByType(w http.ResponseWriter, t statistics.RegionStatisticType, r *http.Request) { + regions, err := h.svr.GetHandler().GetRegionsByType(t) if err != nil { h.rd.JSON(w, http.StatusInternalServerError, err.Error()) return } - regionsInfo := convertToAPIRegions(regions) - h.rd.JSON(w, http.StatusOK, regionsInfo) + h.returnWithRegions(w, r, regions) +} + +func (h *regionsHandler) returnWithRegions(w http.ResponseWriter, r *http.Request, regions []*core.RegionInfo) { + b, err := marshalRegionsInfoJSON(r.Context(), regions) + if err != nil { + h.rd.JSON(w, http.StatusInternalServerError, err.Error()) + } + h.rd.Data(w, http.StatusOK, b) } type histItem struct { @@ -687,8 +696,12 @@ func (h *regionsHandler) GetRegionSiblings(w http.ResponseWriter, r *http.Reques } left, right := rc.GetAdjacentRegions(region) - regionsInfo := convertToAPIRegions([]*core.RegionInfo{left, right}) - h.rd.JSON(w, http.StatusOK, regionsInfo) + b, err := marshalRegionsInfoJSON(r.Context(), []*core.RegionInfo{left, right}) + if err != nil { + h.rd.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + h.rd.Data(w, http.StatusOK, b) } const ( @@ -836,6 +849,64 @@ func (h *regionsHandler) AccelerateRegionsScheduleInRange(w http.ResponseWriter, h.rd.Text(w, http.StatusOK, fmt.Sprintf("Accelerate regions scheduling in a given range [%s,%s)", rawStartKey, rawEndKey)) } +// @Tags region +// @Summary Accelerate regions scheduling in given ranges, only receive hex format for keys +// @Accept json +// @Param body body object true "json params" +// @Param limit query integer false "Limit count" default(256) +// @Produce json +// @Success 200 {string} string "Accelerate regions scheduling in given ranges [startKey1, endKey1), [startKey2, endKey2), ..." +// @Failure 400 {string} string "The input is invalid." +// @Router /regions/accelerate-schedule/batch [post] +func (h *regionsHandler) AccelerateRegionsScheduleInRanges(w http.ResponseWriter, r *http.Request) { + rc := getCluster(r) + var input []map[string]interface{} + if err := apiutil.ReadJSONRespondError(h.rd, w, r.Body, &input); err != nil { + return + } + limit := 256 + if limitStr := r.URL.Query().Get("limit"); limitStr != "" { + var err error + limit, err = strconv.Atoi(limitStr) + if err != nil { + h.rd.JSON(w, http.StatusBadRequest, err.Error()) + return + } + } + if limit > maxRegionLimit { + limit = maxRegionLimit + } + var msgBuilder strings.Builder + msgBuilder.Grow(128) + msgBuilder.WriteString("Accelerate regions scheduling in given ranges: ") + regionsIDSet := make(map[uint64]struct{}) + for _, rg := range input { + startKey, rawStartKey, err := apiutil.ParseKey("start_key", rg) + if err != nil { + h.rd.JSON(w, http.StatusBadRequest, err.Error()) + return + } + endKey, rawEndKey, err := apiutil.ParseKey("end_key", rg) + if err != nil { + h.rd.JSON(w, http.StatusBadRequest, err.Error()) + return + } + regions := rc.ScanRegions(startKey, endKey, limit) + for _, region := range regions { + regionsIDSet[region.GetID()] = struct{}{} + } + msgBuilder.WriteString(fmt.Sprintf("[%s,%s), ", rawStartKey, rawEndKey)) + } + if len(regionsIDSet) > 0 { + regionsIDList := make([]uint64, 0, len(regionsIDSet)) + for id := range regionsIDSet { + regionsIDList = append(regionsIDList, id) + } + rc.AddSuspectRegions(regionsIDList...) + } + h.rd.Text(w, http.StatusOK, msgBuilder.String()) +} + func (h *regionsHandler) GetTopNRegions(w http.ResponseWriter, r *http.Request, less func(a, b *core.RegionInfo) bool) { rc := getCluster(r) limit := defaultRegionLimit @@ -851,8 +922,12 @@ func (h *regionsHandler) GetTopNRegions(w http.ResponseWriter, r *http.Request, limit = maxRegionLimit } regions := TopNRegions(rc.GetRegions(), less, limit) - regionsInfo := convertToAPIRegions(regions) - h.rd.JSON(w, http.StatusOK, regionsInfo) + b, err := marshalRegionsInfoJSON(r.Context(), regions) + if err != nil { + h.rd.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + h.rd.Data(w, http.StatusOK, b) } // @Tags region diff --git a/server/api/region_easyjson.go b/server/api/region_easyjson.go new file mode 100644 index 00000000000..4bd9fe69e42 --- /dev/null +++ b/server/api/region_easyjson.go @@ -0,0 +1,567 @@ +// Code generated by easyjson for marshaling/unmarshaling. DO NOT EDIT. + +package api + +import ( + json "encoding/json" + easyjson "github.com/mailru/easyjson" + jlexer "github.com/mailru/easyjson/jlexer" + jwriter "github.com/mailru/easyjson/jwriter" + metapb "github.com/pingcap/kvproto/pkg/metapb" + pdpb "github.com/pingcap/kvproto/pkg/pdpb" +) + +// suppress unused package warning +var ( + _ *json.RawMessage + _ *jlexer.Lexer + _ *jwriter.Writer + _ easyjson.Marshaler +) + +func easyjson75d7afa0DecodeGithubComTikvPdServerApi(in *jlexer.Lexer, out *RegionInfo) { + isTopLevel := in.IsStart() + if in.IsNull() { + if isTopLevel { + in.Consumed() + } + in.Skip() + return + } + in.Delim('{') + for !in.IsDelim('}') { + key := in.UnsafeFieldName(false) + in.WantColon() + if in.IsNull() { + in.Skip() + in.WantComma() + continue + } + switch key { + case "id": + out.ID = uint64(in.Uint64()) + case "start_key": + out.StartKey = string(in.String()) + case "end_key": + out.EndKey = string(in.String()) + case "epoch": + if in.IsNull() { + in.Skip() + out.RegionEpoch = nil + } else { + if out.RegionEpoch == nil { + out.RegionEpoch = new(metapb.RegionEpoch) + } + easyjson75d7afa0DecodeGithubComPingcapKvprotoPkgMetapb(in, out.RegionEpoch) + } + case "peers": + if in.IsNull() { + in.Skip() + out.Peers = nil + } else { + in.Delim('[') + if out.Peers == nil { + if !in.IsDelim(']') { + out.Peers = make([]MetaPeer, 0, 2) + } else { + out.Peers = []MetaPeer{} + } + } else { + out.Peers = (out.Peers)[:0] + } + for !in.IsDelim(']') { + var v1 MetaPeer + easyjson75d7afa0DecodeGithubComTikvPdServerApi1(in, &v1) + out.Peers = append(out.Peers, v1) + in.WantComma() + } + in.Delim(']') + } + case "leader": + easyjson75d7afa0DecodeGithubComTikvPdServerApi1(in, &out.Leader) + case "down_peers": + if in.IsNull() { + in.Skip() + out.DownPeers = nil + } else { + in.Delim('[') + if out.DownPeers == nil { + if !in.IsDelim(']') { + out.DownPeers = make([]PDPeerStats, 0, 1) + } else { + out.DownPeers = []PDPeerStats{} + } + } else { + out.DownPeers = (out.DownPeers)[:0] + } + for !in.IsDelim(']') { + var v2 PDPeerStats + easyjson75d7afa0DecodeGithubComTikvPdServerApi2(in, &v2) + out.DownPeers = append(out.DownPeers, v2) + in.WantComma() + } + in.Delim(']') + } + case "pending_peers": + if in.IsNull() { + in.Skip() + out.PendingPeers = nil + } else { + in.Delim('[') + if out.PendingPeers == nil { + if !in.IsDelim(']') { + out.PendingPeers = make([]MetaPeer, 0, 2) + } else { + out.PendingPeers = []MetaPeer{} + } + } else { + out.PendingPeers = (out.PendingPeers)[:0] + } + for !in.IsDelim(']') { + var v3 MetaPeer + easyjson75d7afa0DecodeGithubComTikvPdServerApi1(in, &v3) + out.PendingPeers = append(out.PendingPeers, v3) + in.WantComma() + } + in.Delim(']') + } + case "cpu_usage": + out.CPUUsage = uint64(in.Uint64()) + case "written_bytes": + out.WrittenBytes = uint64(in.Uint64()) + case "read_bytes": + out.ReadBytes = uint64(in.Uint64()) + case "written_keys": + out.WrittenKeys = uint64(in.Uint64()) + case "read_keys": + out.ReadKeys = uint64(in.Uint64()) + case "approximate_size": + out.ApproximateSize = int64(in.Int64()) + case "approximate_keys": + out.ApproximateKeys = int64(in.Int64()) + case "buckets": + if in.IsNull() { + in.Skip() + out.Buckets = nil + } else { + in.Delim('[') + if out.Buckets == nil { + if !in.IsDelim(']') { + out.Buckets = make([]string, 0, 4) + } else { + out.Buckets = []string{} + } + } else { + out.Buckets = (out.Buckets)[:0] + } + for !in.IsDelim(']') { + var v4 string + v4 = string(in.String()) + out.Buckets = append(out.Buckets, v4) + in.WantComma() + } + in.Delim(']') + } + case "replication_status": + if in.IsNull() { + in.Skip() + out.ReplicationStatus = nil + } else { + if out.ReplicationStatus == nil { + out.ReplicationStatus = new(ReplicationStatus) + } + easyjson75d7afa0DecodeGithubComTikvPdServerApi3(in, out.ReplicationStatus) + } + default: + in.SkipRecursive() + } + in.WantComma() + } + in.Delim('}') + if isTopLevel { + in.Consumed() + } +} +func easyjson75d7afa0EncodeGithubComTikvPdServerApi(out *jwriter.Writer, in RegionInfo) { + out.RawByte('{') + first := true + _ = first + { + const prefix string = ",\"id\":" + out.RawString(prefix[1:]) + out.Uint64(uint64(in.ID)) + } + { + const prefix string = ",\"start_key\":" + out.RawString(prefix) + out.String(string(in.StartKey)) + } + { + const prefix string = ",\"end_key\":" + out.RawString(prefix) + out.String(string(in.EndKey)) + } + if in.RegionEpoch != nil { + const prefix string = ",\"epoch\":" + out.RawString(prefix) + easyjson75d7afa0EncodeGithubComPingcapKvprotoPkgMetapb(out, *in.RegionEpoch) + } + if len(in.Peers) != 0 { + const prefix string = ",\"peers\":" + out.RawString(prefix) + { + out.RawByte('[') + for v5, v6 := range in.Peers { + if v5 > 0 { + out.RawByte(',') + } + easyjson75d7afa0EncodeGithubComTikvPdServerApi1(out, v6) + } + out.RawByte(']') + } + } + if true { + const prefix string = ",\"leader\":" + out.RawString(prefix) + easyjson75d7afa0EncodeGithubComTikvPdServerApi1(out, in.Leader) + } + if len(in.DownPeers) != 0 { + const prefix string = ",\"down_peers\":" + out.RawString(prefix) + { + out.RawByte('[') + for v7, v8 := range in.DownPeers { + if v7 > 0 { + out.RawByte(',') + } + easyjson75d7afa0EncodeGithubComTikvPdServerApi2(out, v8) + } + out.RawByte(']') + } + } + if len(in.PendingPeers) != 0 { + const prefix string = ",\"pending_peers\":" + out.RawString(prefix) + { + out.RawByte('[') + for v9, v10 := range in.PendingPeers { + if v9 > 0 { + out.RawByte(',') + } + easyjson75d7afa0EncodeGithubComTikvPdServerApi1(out, v10) + } + out.RawByte(']') + } + } + { + const prefix string = ",\"cpu_usage\":" + out.RawString(prefix) + out.Uint64(uint64(in.CPUUsage)) + } + { + const prefix string = ",\"written_bytes\":" + out.RawString(prefix) + out.Uint64(uint64(in.WrittenBytes)) + } + { + const prefix string = ",\"read_bytes\":" + out.RawString(prefix) + out.Uint64(uint64(in.ReadBytes)) + } + { + const prefix string = ",\"written_keys\":" + out.RawString(prefix) + out.Uint64(uint64(in.WrittenKeys)) + } + { + const prefix string = ",\"read_keys\":" + out.RawString(prefix) + out.Uint64(uint64(in.ReadKeys)) + } + { + const prefix string = ",\"approximate_size\":" + out.RawString(prefix) + out.Int64(int64(in.ApproximateSize)) + } + { + const prefix string = ",\"approximate_keys\":" + out.RawString(prefix) + out.Int64(int64(in.ApproximateKeys)) + } + if len(in.Buckets) != 0 { + const prefix string = ",\"buckets\":" + out.RawString(prefix) + { + out.RawByte('[') + for v11, v12 := range in.Buckets { + if v11 > 0 { + out.RawByte(',') + } + out.String(string(v12)) + } + out.RawByte(']') + } + } + if in.ReplicationStatus != nil { + const prefix string = ",\"replication_status\":" + out.RawString(prefix) + easyjson75d7afa0EncodeGithubComTikvPdServerApi3(out, *in.ReplicationStatus) + } + out.RawByte('}') +} + +// MarshalJSON supports json.Marshaler interface +func (v RegionInfo) MarshalJSON() ([]byte, error) { + w := jwriter.Writer{} + easyjson75d7afa0EncodeGithubComTikvPdServerApi(&w, v) + return w.Buffer.BuildBytes(), w.Error +} + +// MarshalEasyJSON supports easyjson.Marshaler interface +func (v RegionInfo) MarshalEasyJSON(w *jwriter.Writer) { + easyjson75d7afa0EncodeGithubComTikvPdServerApi(w, v) +} + +// UnmarshalJSON supports json.Unmarshaler interface +func (v *RegionInfo) UnmarshalJSON(data []byte) error { + r := jlexer.Lexer{Data: data} + easyjson75d7afa0DecodeGithubComTikvPdServerApi(&r, v) + return r.Error() +} + +// UnmarshalEasyJSON supports easyjson.Unmarshaler interface +func (v *RegionInfo) UnmarshalEasyJSON(l *jlexer.Lexer) { + easyjson75d7afa0DecodeGithubComTikvPdServerApi(l, v) +} +func easyjson75d7afa0DecodeGithubComTikvPdServerApi3(in *jlexer.Lexer, out *ReplicationStatus) { + isTopLevel := in.IsStart() + if in.IsNull() { + if isTopLevel { + in.Consumed() + } + in.Skip() + return + } + in.Delim('{') + for !in.IsDelim('}') { + key := in.UnsafeFieldName(false) + in.WantColon() + if in.IsNull() { + in.Skip() + in.WantComma() + continue + } + switch key { + case "state": + out.State = string(in.String()) + case "state_id": + out.StateID = uint64(in.Uint64()) + default: + in.SkipRecursive() + } + in.WantComma() + } + in.Delim('}') + if isTopLevel { + in.Consumed() + } +} +func easyjson75d7afa0EncodeGithubComTikvPdServerApi3(out *jwriter.Writer, in ReplicationStatus) { + out.RawByte('{') + first := true + _ = first + { + const prefix string = ",\"state\":" + out.RawString(prefix[1:]) + out.String(string(in.State)) + } + { + const prefix string = ",\"state_id\":" + out.RawString(prefix) + out.Uint64(uint64(in.StateID)) + } + out.RawByte('}') +} +func easyjson75d7afa0DecodeGithubComTikvPdServerApi2(in *jlexer.Lexer, out *PDPeerStats) { + isTopLevel := in.IsStart() + if in.IsNull() { + if isTopLevel { + in.Consumed() + } + in.Skip() + return + } + out.PeerStats = new(pdpb.PeerStats) + in.Delim('{') + for !in.IsDelim('}') { + key := in.UnsafeFieldName(false) + in.WantColon() + if in.IsNull() { + in.Skip() + in.WantComma() + continue + } + switch key { + case "peer": + easyjson75d7afa0DecodeGithubComTikvPdServerApi1(in, &out.Peer) + case "down_seconds": + out.DownSeconds = uint64(in.Uint64()) + default: + in.SkipRecursive() + } + in.WantComma() + } + in.Delim('}') + if isTopLevel { + in.Consumed() + } +} +func easyjson75d7afa0EncodeGithubComTikvPdServerApi2(out *jwriter.Writer, in PDPeerStats) { + out.RawByte('{') + first := true + _ = first + { + const prefix string = ",\"peer\":" + out.RawString(prefix[1:]) + easyjson75d7afa0EncodeGithubComTikvPdServerApi1(out, in.Peer) + } + if in.DownSeconds != 0 { + const prefix string = ",\"down_seconds\":" + out.RawString(prefix) + out.Uint64(uint64(in.DownSeconds)) + } + out.RawByte('}') +} +func easyjson75d7afa0DecodeGithubComTikvPdServerApi1(in *jlexer.Lexer, out *MetaPeer) { + isTopLevel := in.IsStart() + if in.IsNull() { + if isTopLevel { + in.Consumed() + } + in.Skip() + return + } + out.Peer = new(metapb.Peer) + in.Delim('{') + for !in.IsDelim('}') { + key := in.UnsafeFieldName(false) + in.WantColon() + if in.IsNull() { + in.Skip() + in.WantComma() + continue + } + switch key { + case "role_name": + out.RoleName = string(in.String()) + case "is_learner": + out.IsLearner = bool(in.Bool()) + case "id": + out.Id = uint64(in.Uint64()) + case "store_id": + out.StoreId = uint64(in.Uint64()) + case "role": + out.Role = metapb.PeerRole(in.Int32()) + case "is_witness": + out.IsWitness = bool(in.Bool()) + default: + in.SkipRecursive() + } + in.WantComma() + } + in.Delim('}') + if isTopLevel { + in.Consumed() + } +} +func easyjson75d7afa0EncodeGithubComTikvPdServerApi1(out *jwriter.Writer, in MetaPeer) { + out.RawByte('{') + first := true + _ = first + { + const prefix string = ",\"role_name\":" + out.RawString(prefix[1:]) + out.String(string(in.RoleName)) + } + if in.IsLearner { + const prefix string = ",\"is_learner\":" + out.RawString(prefix) + out.Bool(bool(in.IsLearner)) + } + if in.Id != 0 { + const prefix string = ",\"id\":" + out.RawString(prefix) + out.Uint64(uint64(in.Id)) + } + if in.StoreId != 0 { + const prefix string = ",\"store_id\":" + out.RawString(prefix) + out.Uint64(uint64(in.StoreId)) + } + if in.Role != 0 { + const prefix string = ",\"role\":" + out.RawString(prefix) + out.Int32(int32(in.Role)) + } + if in.IsWitness { + const prefix string = ",\"is_witness\":" + out.RawString(prefix) + out.Bool(bool(in.IsWitness)) + } + out.RawByte('}') +} +func easyjson75d7afa0DecodeGithubComPingcapKvprotoPkgMetapb(in *jlexer.Lexer, out *metapb.RegionEpoch) { + isTopLevel := in.IsStart() + if in.IsNull() { + if isTopLevel { + in.Consumed() + } + in.Skip() + return + } + in.Delim('{') + for !in.IsDelim('}') { + key := in.UnsafeFieldName(false) + in.WantColon() + if in.IsNull() { + in.Skip() + in.WantComma() + continue + } + switch key { + case "conf_ver": + out.ConfVer = uint64(in.Uint64()) + case "version": + out.Version = uint64(in.Uint64()) + default: + in.SkipRecursive() + } + in.WantComma() + } + in.Delim('}') + if isTopLevel { + in.Consumed() + } +} +func easyjson75d7afa0EncodeGithubComPingcapKvprotoPkgMetapb(out *jwriter.Writer, in metapb.RegionEpoch) { + out.RawByte('{') + first := true + _ = first + if in.ConfVer != 0 { + const prefix string = ",\"conf_ver\":" + first = false + out.RawString(prefix[1:]) + out.Uint64(uint64(in.ConfVer)) + } + if in.Version != 0 { + const prefix string = ",\"version\":" + if first { + first = false + out.RawString(prefix[1:]) + } else { + out.RawString(prefix) + } + out.Uint64(uint64(in.Version)) + } + out.RawByte('}') +} diff --git a/server/api/region_test.go b/server/api/region_test.go index 0796cd49c97..8527acf41ec 100644 --- a/server/api/region_test.go +++ b/server/api/region_test.go @@ -16,6 +16,7 @@ package api import ( "bytes" + "context" "encoding/hex" "encoding/json" "fmt" @@ -24,6 +25,7 @@ import ( "net/url" "sort" "testing" + "time" "github.com/docker/go-units" "github.com/pingcap/failpoint" @@ -31,6 +33,7 @@ import ( "github.com/pingcap/kvproto/pkg/pdpb" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + "github.com/tikv/pd/pkg/apiutil" tu "github.com/tikv/pd/pkg/testutil" "github.com/tikv/pd/server" "github.com/tikv/pd/server/core" @@ -351,6 +354,26 @@ func (suite *regionTestSuite) TestAccelerateRegionsScheduleInRange() { suite.Len(idList, 2) } +func (suite *regionTestSuite) TestAccelerateRegionsScheduleInRanges() { + re := suite.Require() + r1 := newTestRegionInfo(557, 13, []byte("a1"), []byte("a2")) + r2 := newTestRegionInfo(558, 14, []byte("a2"), []byte("a3")) + r3 := newTestRegionInfo(559, 15, []byte("a3"), []byte("a4")) + r4 := newTestRegionInfo(560, 16, []byte("a4"), []byte("a5")) + r5 := newTestRegionInfo(561, 17, []byte("a5"), []byte("a6")) + mustRegionHeartbeat(re, suite.svr, r1) + mustRegionHeartbeat(re, suite.svr, r2) + mustRegionHeartbeat(re, suite.svr, r3) + mustRegionHeartbeat(re, suite.svr, r4) + mustRegionHeartbeat(re, suite.svr, r5) + body := fmt.Sprintf(`[{"start_key":"%s", "end_key": "%s"}, {"start_key":"%s", "end_key": "%s"}]`, hex.EncodeToString([]byte("a1")), hex.EncodeToString([]byte("a3")), hex.EncodeToString([]byte("a4")), hex.EncodeToString([]byte("a6"))) + + err := tu.CheckPostJSON(testDialClient, fmt.Sprintf("%s/regions/accelerate-schedule/batch", suite.urlPrefix), []byte(body), tu.StatusOK(re)) + suite.NoError(err) + idList := suite.svr.GetRaftCluster().GetSuspectRegions() + suite.Len(idList, 4) +} + func (suite *regionTestSuite) TestScatterRegions() { re := suite.Require() r1 := newTestRegionInfo(601, 13, []byte("b1"), []byte("b2")) @@ -439,6 +462,40 @@ func (suite *regionTestSuite) TestTopN() { } } +func TestRegionsWithKillRequest(t *testing.T) { + re := require.New(t) + svr, cleanup := mustNewServer(re) + defer cleanup() + server.MustWaitLeader(re, []*server.Server{svr}) + + addr := svr.GetAddr() + url := fmt.Sprintf("%s%s/api/v1/regions", addr, apiPrefix) + mustBootstrapCluster(re, svr) + regionCount := 100000 + for i := 0; i < regionCount; i++ { + r := core.NewTestRegionInfoWithID(uint64(i+2), 1, + []byte(fmt.Sprintf("%09d", i)), + []byte(fmt.Sprintf("%09d", i+1)), + core.SetApproximateKeys(10), core.SetApproximateSize(10)) + mustRegionHeartbeat(re, svr, r) + } + + ctx, cancel := context.WithCancel(context.Background()) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, bytes.NewBuffer(nil)) + re.NoError(err) + respCh := make(chan *http.Response) + go func() { + resp, err := testDialClient.Do(req) // nolint:bodyclose + re.Error(err) + re.Contains(err.Error(), "context canceled") + respCh <- resp + }() + time.Sleep(100 * time.Millisecond) // wait for the request to be sent + cancel() // close the request + resp := <-respCh + re.Nil(resp) +} + type getRegionTestSuite struct { suite.Suite svr *server.Server @@ -721,54 +778,60 @@ func (suite *regionsReplicatedTestSuite) TestCheckRegionsReplicated() { suite.Equal("REPLICATED", status) } -// Create n regions (0..n) of n stores (0..n). -// Each region contains np peers, the first peer is the leader. -// (copied from server/cluster_test.go) -func newTestRegions() []*core.RegionInfo { - n := uint64(10000) - np := uint64(3) - - regions := make([]*core.RegionInfo, 0, n) - for i := uint64(0); i < n; i++ { - peers := make([]*metapb.Peer, 0, np) - for j := uint64(0); j < np; j++ { - peer := &metapb.Peer{ - Id: i*np + j, - } - peer.StoreId = (i + j) % n - peers = append(peers, peer) - } - region := &metapb.Region{ - Id: i, - Peers: peers, - StartKey: []byte(fmt.Sprintf("%d", i)), - EndKey: []byte(fmt.Sprintf("%d", i+1)), - RegionEpoch: &metapb.RegionEpoch{ConfVer: 2, Version: 2}, - } - regions = append(regions, core.NewRegionInfo(region, peers[0])) - } - return regions -} - -func BenchmarkRenderJSON(b *testing.B) { - regionInfos := newTestRegions() - rd := createStreamingRender() - regions := convertToAPIRegions(regionInfos) - - b.ResetTimer() - for i := 0; i < b.N; i++ { - var buffer bytes.Buffer - rd.JSON(&buffer, 200, regions) +func TestRegionsInfoMarshal(t *testing.T) { + re := require.New(t) + regionWithNilPeer := core.NewRegionInfo(&metapb.Region{Id: 1}, &metapb.Peer{Id: 1}) + core.SetPeers([]*metapb.Peer{{Id: 2}, nil})(regionWithNilPeer) + cases := [][]*core.RegionInfo{ + {}, + { + // leader is nil + core.NewRegionInfo(&metapb.Region{Id: 1}, nil), + // Peers is empty + core.NewRegionInfo(&metapb.Region{Id: 1}, &metapb.Peer{Id: 1}, + core.SetPeers([]*metapb.Peer{})), + // There is nil peer in peers. + regionWithNilPeer, + }, + { + // PendingPeers is empty + core.NewRegionInfo(&metapb.Region{Id: 1}, &metapb.Peer{Id: 1}, + core.WithPendingPeers([]*metapb.Peer{})), + // There is nil peer in peers. + core.NewRegionInfo(&metapb.Region{Id: 1}, &metapb.Peer{Id: 1}, + core.WithPendingPeers([]*metapb.Peer{nil})), + }, + { + // DownPeers is empty + core.NewRegionInfo(&metapb.Region{Id: 1}, &metapb.Peer{Id: 1}, + core.WithDownPeers([]*pdpb.PeerStats{})), + // There is nil peer in peers. + core.NewRegionInfo(&metapb.Region{Id: 1}, &metapb.Peer{Id: 1}, + core.WithDownPeers([]*pdpb.PeerStats{{Peer: nil}})), + }, + { + // Buckets is nil + core.NewRegionInfo(&metapb.Region{Id: 1}, &metapb.Peer{Id: 1}, + core.SetBuckets(nil)), + // Buckets is empty + core.NewRegionInfo(&metapb.Region{Id: 1}, &metapb.Peer{Id: 1}, + core.SetBuckets(&metapb.Buckets{})), + }, + { + core.NewRegionInfo(&metapb.Region{Id: 1, StartKey: []byte{}, EndKey: []byte{}, + RegionEpoch: &metapb.RegionEpoch{Version: 1, ConfVer: 1}}, + &metapb.Peer{Id: 1}, core.SetCPUUsage(10), + core.SetApproximateKeys(10), core.SetApproximateSize(10), + core.SetWrittenBytes(10), core.SetReadBytes(10), + core.SetReadKeys(10), core.SetWrittenKeys(10)), + }, } -} - -func BenchmarkConvertToAPIRegions(b *testing.B) { - regionInfos := newTestRegions() - - b.ResetTimer() - for i := 0; i < b.N; i++ { - regions := convertToAPIRegions(regionInfos) - _ = regions.Count + regionsInfo := &RegionsInfo{} + for _, regions := range cases { + b, err := marshalRegionsInfoJSON(context.Background(), regions) + re.NoError(err) + err = json.Unmarshal(b, regionsInfo) + re.NoError(err) } } @@ -787,3 +850,34 @@ func BenchmarkHexRegionKeyStr(b *testing.B) { _ = core.HexRegionKeyStr(key) } } + +func BenchmarkGetRegions(b *testing.B) { + re := require.New(b) + svr, cleanup := mustNewServer(re) + defer cleanup() + server.MustWaitLeader(re, []*server.Server{svr}) + + addr := svr.GetAddr() + url := fmt.Sprintf("%s%s/api/v1/regions", addr, apiPrefix) + mustBootstrapCluster(re, svr) + regionCount := 1000000 + for i := 0; i < regionCount; i++ { + r := core.NewTestRegionInfoWithID(uint64(i+2), 1, + []byte(fmt.Sprintf("%09d", i)), + []byte(fmt.Sprintf("%09d", i+1)), + core.SetApproximateKeys(10), core.SetApproximateSize(10)) + mustRegionHeartbeat(re, svr, r) + } + resp, _ := apiutil.GetJSON(testDialClient, url, nil) + regions := &RegionsInfo{} + err := json.NewDecoder(resp.Body).Decode(regions) + re.NoError(err) + re.Equal(regionCount, regions.Count) + resp.Body.Close() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + resp, _ := apiutil.GetJSON(testDialClient, url, nil) + resp.Body.Close() + } +} diff --git a/server/api/router.go b/server/api/router.go index 2c750b12eb7..eb87ef05bc2 100644 --- a/server/api/router.go +++ b/server/api/router.go @@ -266,6 +266,7 @@ func createRouter(prefix string, svr *server.Server) *mux.Router { registerFunc(clusterRouter, "/regions/check/hist-keys", regionsHandler.GetKeysHistogram, setMethods(http.MethodGet), setAuditBackend(prometheus)) registerFunc(clusterRouter, "/regions/sibling/{id}", regionsHandler.GetRegionSiblings, setMethods(http.MethodGet), setAuditBackend(prometheus)) registerFunc(clusterRouter, "/regions/accelerate-schedule", regionsHandler.AccelerateRegionsScheduleInRange, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) + registerFunc(clusterRouter, "/regions/accelerate-schedule/batch", regionsHandler.AccelerateRegionsScheduleInRanges, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) registerFunc(clusterRouter, "/regions/scatter", regionsHandler.ScatterRegions, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) registerFunc(clusterRouter, "/regions/split", regionsHandler.SplitRegions, setMethods(http.MethodPost), setAuditBackend(localLog, prometheus)) registerFunc(clusterRouter, "/regions/range-holes", regionsHandler.GetRangeHoles, setMethods(http.MethodGet), setAuditBackend(prometheus)) @@ -350,6 +351,7 @@ func createRouter(prefix string, svr *server.Server) *mux.Router { // min resolved ts API minResolvedTSHandler := newMinResolvedTSHandler(svr, rd) registerFunc(clusterRouter, "/min-resolved-ts", minResolvedTSHandler.GetMinResolvedTS, setMethods(http.MethodGet), setAuditBackend(prometheus)) + registerFunc(clusterRouter, "/min-resolved-ts/{store_id}", minResolvedTSHandler.GetStoreMinResolvedTS, setMethods(http.MethodGet), setAuditBackend(prometheus)) // unsafe admin operation API unsafeOperationHandler := newUnsafeOperationHandler(svr, rd) diff --git a/server/api/store.go b/server/api/store.go index ca86a626a22..8d6c55d5326 100644 --- a/server/api/store.go +++ b/server/api/store.go @@ -55,8 +55,9 @@ type StoreStatus struct { RegionWeight float64 `json:"region_weight"` RegionScore float64 `json:"region_score"` RegionSize int64 `json:"region_size"` - WitnessCount int `json:"witness_count"` - SlowScore uint64 `json:"slow_score"` + LearnerCount int `json:"learner_count,omitempty"` + WitnessCount int `json:"witness_count,omitempty"` + SlowScore uint64 `json:"slow_score,omitempty"` SendingSnapCount uint32 `json:"sending_snap_count,omitempty"` ReceivingSnapCount uint32 `json:"receiving_snap_count,omitempty"` IsBusy bool `json:"is_busy,omitempty"` @@ -94,6 +95,7 @@ func newStoreInfo(opt *config.ScheduleConfig, store *core.StoreInfo) *StoreInfo RegionWeight: store.GetRegionWeight(), RegionScore: store.RegionScore(opt.RegionScoreFormulaVersion, opt.HighSpaceRatio, opt.LowSpaceRatio, 0), RegionSize: store.GetRegionSize(), + LearnerCount: store.GetLearnerCount(), WitnessCount: store.GetWitnessCount(), SlowScore: store.GetSlowScore(), SendingSnapCount: store.GetSendingSnapCount(), diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index 8779e96cd8a..6d557bc5a7c 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -66,6 +66,8 @@ var ( DefaultMinResolvedTSPersistenceInterval = config.DefaultMinResolvedTSPersistenceInterval regionUpdateCacheEventCounter = regionEventCounter.WithLabelValues("update_cache") regionUpdateKVEventCounter = regionEventCounter.WithLabelValues("update_kv") + + denySchedulersByLabelerCounter = schedule.LabelerEventCounter.WithLabelValues("schedulers", "deny") ) // regionLabelGCInterval is the interval to run region-label's GC work. @@ -304,22 +306,28 @@ func (c *RaftCluster) runSyncConfig() { defer ticker.Stop() stores := c.GetStores() - syncConfig(c.storeConfigManager, stores) + syncConfig(c.ctx, c.storeConfigManager, stores) for { select { case <-c.ctx.Done(): log.Info("sync store config job is stopped") return case <-ticker.C: - if !syncConfig(c.storeConfigManager, stores) { + if !syncConfig(c.ctx, c.storeConfigManager, stores) { stores = c.GetStores() } } } } -func syncConfig(manager *config.StoreConfigManager, stores []*core.StoreInfo) bool { +func syncConfig(ctx context.Context, manager *config.StoreConfigManager, stores []*core.StoreInfo) bool { for index := 0; index < len(stores); index++ { + select { + case <-ctx.Done(): + log.Info("stop sync store config job due to raft cluster exit") + return false + default: + } // filter out the stores that are tiflash store := stores[index] if core.IsStoreContainLabel(store.GetMeta(), core.EngineKey, core.EngineTiFlash) { @@ -332,7 +340,9 @@ func syncConfig(manager *config.StoreConfigManager, stores []*core.StoreInfo) bo } // it will try next store if the current store is failed. address := netutil.ResolveLoopBackAddr(stores[index].GetStatusAddress(), stores[index].GetAddress()) - if err := manager.ObserveConfig(address); err != nil { + if err := manager.ObserveConfig(ctx, address); err != nil { + stores = append(stores[:index], stores[index+1:]...) + index-- storeSyncConfigEvent.WithLabelValues(address, "fail").Inc() log.Debug("sync store config failed, it will try next store", zap.Error(err)) continue @@ -845,7 +855,7 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error { c.coordinator.CheckTransferWitnessLeader(region) hasRegionStats := c.regionStats != nil - // Save to storage if meta is updated. + // Save to storage if meta is updated, except for flashback. // Save to cache if meta or leader is updated, or contains any down/pending peer. // Mark isNew if the region in cache does not have leader. isNew, saveKV, saveCache, needSync := regionGuide(region, origin) @@ -1073,21 +1083,15 @@ func (c *RaftCluster) GetRangeHoles() [][]string { } // UpdateStoreLabels updates a store's location labels -// If 'force' is true, then update the store's labels forcibly. +// If 'force' is true, the origin labels will be overwritten with the new one forcibly. func (c *RaftCluster) UpdateStoreLabels(storeID uint64, labels []*metapb.StoreLabel, force bool) error { store := c.GetStore(storeID) if store == nil { return errs.ErrInvalidStoreID.FastGenByArgs(storeID) } newStore := typeutil.DeepClone(store.GetMeta(), core.StoreFactory) - if force { - newStore.Labels = labels - } else { - // If 'force' isn't set, the given labels will merge into those labels which already existed in the store. - newStore.Labels = core.MergeLabels(newStore.GetLabels(), labels) - } - // PutStore will perform label merge. - return c.putStoreImpl(newStore) + newStore.Labels = labels + return c.putStoreImpl(newStore, force) } // DeleteStoreLabel updates a store's location labels @@ -1108,13 +1112,12 @@ func (c *RaftCluster) DeleteStoreLabel(storeID uint64, labelKey string) error { return errors.Errorf("the label key %s does not exist", labelKey) } newStore.Labels = labels - // PutStore will perform label merge. - return c.putStoreImpl(newStore) + return c.putStoreImpl(newStore, true) } // PutStore puts a store. func (c *RaftCluster) PutStore(store *metapb.Store) error { - if err := c.putStoreImpl(store); err != nil { + if err := c.putStoreImpl(store, false); err != nil { return err } c.OnStoreVersionChange() @@ -1123,8 +1126,9 @@ func (c *RaftCluster) PutStore(store *metapb.Store) error { } // putStoreImpl puts a store. -// If 'force' is true, then overwrite the store's labels. -func (c *RaftCluster) putStoreImpl(store *metapb.Store) error { +// If 'force' is true, the store's labels will overwrite those labels which already existed in the store. +// If 'force' is false, the store's labels will merge into those labels which already existed in the store. +func (c *RaftCluster) putStoreImpl(store *metapb.Store, force bool) error { c.Lock() defer c.Unlock() @@ -1154,6 +1158,9 @@ func (c *RaftCluster) putStoreImpl(store *metapb.Store) error { } else { // Use the given labels to update the store. labels := store.GetLabels() + if !force { + labels = core.MergeLabels(s.GetLabels(), labels) + } // Update an existed store. s = s.Clone( core.SetStoreAddress(store.Address, store.StatusAddress, store.PeerAddress), @@ -2308,6 +2315,16 @@ func (c *RaftCluster) GetMinResolvedTS() uint64 { return c.minResolvedTS } +// GetStoreMinResolvedTS returns the min resolved ts of the store. +func (c *RaftCluster) GetStoreMinResolvedTS(storeID uint64) uint64 { + c.RLock() + defer c.RUnlock() + if !c.isInitialized() || !core.IsAvailableForMinResolvedTS(c.GetStore(storeID)) { + return math.MaxUint64 + } + return c.GetStore(storeID).GetMinResolvedTS() +} + // GetExternalTS returns the external timestamp. func (c *RaftCluster) GetExternalTS() uint64 { c.RLock() @@ -2521,3 +2538,25 @@ func (c *RaftCluster) GetPausedSchedulerDelayAt(name string) (int64, error) { func (c *RaftCluster) GetPausedSchedulerDelayUntil(name string) (int64, error) { return c.coordinator.getPausedSchedulerDelayUntil(name) } + +var ( + onlineUnsafeRecoveryStatus = schedulingAllowanceStatusGauge.WithLabelValues("online-unsafe-recovery") + haltSchedulingStatus = schedulingAllowanceStatusGauge.WithLabelValues("halt-scheduling") +) + +// CheckSchedulingAllowance checks if the cluster allows scheduling currently. +func (c *RaftCluster) CheckSchedulingAllowance() (bool, error) { + // If the cluster is in the process of online unsafe recovery, it should not allow scheduling. + if c.GetUnsafeRecoveryController().IsRunning() { + onlineUnsafeRecoveryStatus.Set(1) + return false, errs.ErrUnsafeRecoveryIsRunning.FastGenByArgs() + } + onlineUnsafeRecoveryStatus.Set(0) + // If the halt-scheduling is set, it should not allow scheduling. + if c.opt.IsSchedulingHalted() { + haltSchedulingStatus.Set(1) + return false, errs.ErrSchedulingIsHalted.FastGenByArgs() + } + haltSchedulingStatus.Set(0) + return true, nil +} diff --git a/server/cluster/cluster_test.go b/server/cluster/cluster_test.go index 025264c194f..2b7da991368 100644 --- a/server/cluster/cluster_test.go +++ b/server/cluster/cluster_test.go @@ -16,9 +16,12 @@ package cluster import ( "context" + "encoding/json" "fmt" "math" "math/rand" + "net/http" + "net/http/httptest" "sync" "testing" "time" @@ -31,6 +34,7 @@ import ( "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/mock/mockid" "github.com/tikv/pd/pkg/progress" + "github.com/tikv/pd/pkg/typeutil" "github.com/tikv/pd/server/config" "github.com/tikv/pd/server/core" "github.com/tikv/pd/server/id" @@ -833,6 +837,16 @@ func TestRegionHeartbeat(t *testing.T) { regions[i] = region re.NoError(cluster.processRegionHeartbeat(region)) checkRegions(re, cluster.core, regions[:i+1]) + + // Flashback + region = region.Clone(core.WithFlashback(true, 1)) + regions[i] = region + re.NoError(cluster.processRegionHeartbeat(region)) + checkRegions(re, cluster.core, regions[:i+1]) + region = region.Clone(core.WithFlashback(false, 0)) + regions[i] = region + re.NoError(cluster.processRegionHeartbeat(region)) + checkRegions(re, cluster.core, regions[:i+1]) } regionCounts := make(map[uint64]int) @@ -1318,11 +1332,47 @@ func TestSyncConfig(t *testing.T) { for _, v := range testdata { tc.storeConfigManager = config.NewTestStoreConfigManager(v.whiteList) re.Equal(uint64(144), tc.GetStoreConfig().GetRegionMaxSize()) - re.Equal(v.updated, syncConfig(tc.storeConfigManager, tc.GetStores())) + re.Equal(v.updated, syncConfig(tc.ctx, tc.storeConfigManager, tc.GetStores())) re.Equal(v.maxRegionSize, tc.GetStoreConfig().GetRegionMaxSize()) } } +func TestSyncConfigContext(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + _, opt, err := newTestScheduleConfig() + re.NoError(err) + tc := newTestCluster(ctx, opt) + tc.storeConfigManager = config.NewStoreConfigManager(http.DefaultClient) + tc.httpClient = &http.Client{} + + server := httptest.NewServer(http.HandlerFunc(func(res http.ResponseWriter, req *http.Request) { + time.Sleep(time.Second * 100) + cfg := &config.StoreConfig{} + b, err := json.Marshal(cfg) + if err != nil { + res.WriteHeader(http.StatusInternalServerError) + res.Write([]byte(fmt.Sprintf("failed setting up test server: %s", err))) + return + } + + res.WriteHeader(http.StatusOK) + res.Write(b) + })) + stores := newTestStores(1, "2.0.0") + for _, s := range stores { + re.NoError(tc.putStoreLocked(s)) + } + // trip schema header + now := time.Now() + stores[0].GetMeta().StatusAddress = server.URL[7:] + synced := syncConfig(tc.ctx, tc.storeConfigManager, stores) + re.False(synced) + re.Less(time.Since(now), clientTimeout*2) +} + func TestUpdateStorePendingPeerCount(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) @@ -1800,6 +1850,114 @@ func TestAwakenStore(t *testing.T) { re.True(store1.NeedAwakenStore()) } +func TestUpdateAndDeleteLabel(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + _, opt, err := newTestScheduleConfig() + re.NoError(err) + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + stores := newTestStores(1, "6.5.1") + for _, store := range stores { + re.NoError(cluster.PutStore(store.GetMeta())) + } + re.Empty(cluster.GetStore(1).GetLabels()) + // Update label. + cluster.UpdateStoreLabels( + 1, + []*metapb.StoreLabel{ + {Key: "zone", Value: "zone1"}, + {Key: "host", Value: "host1"}, + }, + false, + ) + re.Equal( + []*metapb.StoreLabel{ + {Key: "zone", Value: "zone1"}, + {Key: "host", Value: "host1"}, + }, + cluster.GetStore(1).GetLabels(), + ) + // Update label again. + cluster.UpdateStoreLabels( + 1, + []*metapb.StoreLabel{ + {Key: "mode", Value: "readonly"}, + }, + false, + ) + // Update label with empty value. + cluster.UpdateStoreLabels( + 1, + []*metapb.StoreLabel{}, + false, + ) + re.Equal( + []*metapb.StoreLabel{ + {Key: "zone", Value: "zone1"}, + {Key: "host", Value: "host1"}, + {Key: "mode", Value: "readonly"}, + }, + cluster.GetStore(1).GetLabels(), + ) + // Delete label. + err = cluster.DeleteStoreLabel(1, "mode") + re.NoError(err) + re.Equal( + []*metapb.StoreLabel{ + {Key: "zone", Value: "zone1"}, + {Key: "host", Value: "host1"}, + }, + cluster.GetStore(1).GetLabels(), + ) + // Delete a non-exist label. + err = cluster.DeleteStoreLabel(1, "mode") + re.Error(err) + re.Equal( + []*metapb.StoreLabel{ + {Key: "zone", Value: "zone1"}, + {Key: "host", Value: "host1"}, + }, + cluster.GetStore(1).GetLabels(), + ) + // Update label without force. + cluster.UpdateStoreLabels( + 1, + []*metapb.StoreLabel{}, + false, + ) + re.Equal( + []*metapb.StoreLabel{ + {Key: "zone", Value: "zone1"}, + {Key: "host", Value: "host1"}, + }, + cluster.GetStore(1).GetLabels(), + ) + // Update label with force. + cluster.UpdateStoreLabels( + 1, + []*metapb.StoreLabel{}, + true, + ) + re.Empty(cluster.GetStore(1).GetLabels()) + // Update label first and then reboot the store. + cluster.UpdateStoreLabels( + 1, + []*metapb.StoreLabel{{Key: "mode", Value: "readonly"}}, + false, + ) + re.Equal([]*metapb.StoreLabel{{Key: "mode", Value: "readonly"}}, cluster.GetStore(1).GetLabels()) + // Mock the store doesn't have any label configured. + newStore := typeutil.DeepClone(cluster.GetStore(1).GetMeta(), core.StoreFactory) + newStore.Labels = nil + // Store rebooting will call PutStore. + err = cluster.PutStore(newStore) + re.NoError(err) + // Check the label after rebooting. + re.Equal([]*metapb.StoreLabel{{Key: "mode", Value: "readonly"}}, cluster.GetStore(1).GetLabels()) +} + type testCluster struct { *RaftCluster } diff --git a/server/cluster/cluster_worker.go b/server/cluster/cluster_worker.go index 07be76f0782..2bdd1a6d3d0 100644 --- a/server/cluster/cluster_worker.go +++ b/server/cluster/cluster_worker.go @@ -43,8 +43,8 @@ func (c *RaftCluster) HandleRegionHeartbeat(region *core.RegionInfo) error { // HandleAskSplit handles the split request. func (c *RaftCluster) HandleAskSplit(request *pdpb.AskSplitRequest) (*pdpb.AskSplitResponse, error) { - if c.GetUnsafeRecoveryController().IsRunning() { - return nil, errs.ErrUnsafeRecoveryIsRunning.FastGenByArgs() + if allowed, err := c.CheckSchedulingAllowance(); !allowed { + return nil, err } if !c.opt.IsTikvRegionSplitEnabled() { return nil, errs.ErrSchedulerTiKVSplitDisabled.FastGenByArgs() @@ -105,8 +105,8 @@ func (c *RaftCluster) ValidRequestRegion(reqRegion *metapb.Region) error { // HandleAskBatchSplit handles the batch split request. func (c *RaftCluster) HandleAskBatchSplit(request *pdpb.AskBatchSplitRequest) (*pdpb.AskBatchSplitResponse, error) { - if c.GetUnsafeRecoveryController().IsRunning() { - return nil, errs.ErrUnsafeRecoveryIsRunning.FastGenByArgs() + if allowed, err := c.CheckSchedulingAllowance(); !allowed { + return nil, err } if !c.opt.IsTikvRegionSplitEnabled() { return nil, errs.ErrSchedulerTiKVSplitDisabled.FastGenByArgs() diff --git a/server/cluster/coordinator.go b/server/cluster/coordinator.go index c1770f861eb..4f4d5df8510 100644 --- a/server/cluster/coordinator.go +++ b/server/cluster/coordinator.go @@ -129,8 +129,7 @@ func (c *coordinator) patrolRegions() { log.Info("patrol regions has been stopped") return } - if c.cluster.GetUnsafeRecoveryController().IsRunning() { - // Skip patrolling regions during unsafe recovery. + if allowed, _ := c.cluster.CheckSchedulingAllowance(); !allowed { continue } @@ -495,11 +494,28 @@ func (c *coordinator) getHotRegionsByType(typ statistics.RWType) *statistics.Sto default: } // update params `IsLearner` and `LastUpdateTime` - for _, stores := range []statistics.StoreHotPeersStat{infos.AsLeader, infos.AsPeer} { - for _, store := range stores { - for _, hotPeer := range store.Stats { - region := c.cluster.GetRegion(hotPeer.RegionID) - hotPeer.UpdateHotPeerStatShow(region) + s := []statistics.StoreHotPeersStat{infos.AsLeader, infos.AsPeer} + for i, stores := range s { + for j, store := range stores { + for k := range store.Stats { + h := &s[i][j].Stats[k] + region := c.cluster.GetRegion(h.RegionID) + if region != nil { + h.IsLearner = core.IsLearner(region.GetPeer(h.StoreID)) + } + switch typ { + case statistics.Write: + if region != nil { + h.LastUpdateTime = time.Unix(int64(region.GetInterval().GetEndTimestamp()), 0) + } + case statistics.Read: + store := c.cluster.GetStore(h.StoreID) + if store != nil { + ts := store.GetMeta().GetLastHeartbeat() + h.LastUpdateTime = time.Unix(ts/1e9, ts%1e9) + } + default: + } } } } @@ -533,7 +549,7 @@ func (c *coordinator) collectSchedulerMetrics() { var allowScheduler float64 // If the scheduler is not allowed to schedule, it will disappear in Grafana panel. // See issue #1341. - if !s.IsPaused() && !s.cluster.GetUnsafeRecoveryController().IsRunning() { + if allowed, _ := s.cluster.CheckSchedulingAllowance(); !s.IsPaused() && allowed { allowScheduler = 1 } schedulerStatusGauge.WithLabelValues(s.GetName(), "allow").Set(allowScheduler) @@ -893,9 +909,27 @@ func (s *scheduleController) Schedule(diagnosable bool) []*operator.Operator { if diagnosable { s.diagnosticRecorder.setResultFromPlans(ops, plans) } + foundDisabled := false + for _, op := range ops { + if labelMgr := s.cluster.GetRegionLabeler(); labelMgr != nil { + region := s.cluster.GetRegion(op.RegionID()) + if region == nil { + continue + } + if labelMgr.ScheduleDisabled(region) { + denySchedulersByLabelerCounter.Inc() + foundDisabled = true + break + } + } + } if len(ops) > 0 { // If we have schedule, reset interval to the minimal interval. s.nextInterval = s.Scheduler.GetMinInterval() + // try regenerating operators + if foundDisabled { + continue + } return ops } } @@ -921,7 +955,14 @@ func (s *scheduleController) AllowSchedule(diagnosable bool) bool { } return false } - if s.IsPaused() || s.cluster.GetUnsafeRecoveryController().IsRunning() { + allowed, _ := s.cluster.CheckSchedulingAllowance() + if !allowed { + if diagnosable { + s.diagnosticRecorder.setResultFromStatus(halted) + } + return false + } + if s.IsPaused() { if diagnosable { s.diagnosticRecorder.setResultFromStatus(paused) } diff --git a/server/cluster/coordinator_test.go b/server/cluster/coordinator_test.go index 7df642f333e..b21a7910a98 100644 --- a/server/cluster/coordinator_test.go +++ b/server/cluster/coordinator_test.go @@ -411,11 +411,23 @@ func TestCheckRegionWithScheduleDeny(t *testing.T) { Data: []interface{}{map[string]interface{}{"start_key": "", "end_key": ""}}, }) + // should allow to do rule checker re.True(labelerManager.ScheduleDisabled(region)) - checkRegionAndOperator(re, tc, co, 1, 0) + checkRegionAndOperator(re, tc, co, 1, 1) + + // should not allow to merge + tc.opt.SetSplitMergeInterval(time.Duration(0)) + + re.NoError(tc.addLeaderRegion(2, 2, 3, 4)) + re.NoError(tc.addLeaderRegion(3, 2, 3, 4)) + region = tc.GetRegion(2) + re.True(labelerManager.ScheduleDisabled(region)) + checkRegionAndOperator(re, tc, co, 2, 0) + + // delete label rule, should allow to do merge labelerManager.DeleteLabelRule("schedulelabel") re.False(labelerManager.ScheduleDisabled(region)) - checkRegionAndOperator(re, tc, co, 1, 1) + checkRegionAndOperator(re, tc, co, 2, 2) } func TestCheckerIsBusy(t *testing.T) { @@ -876,6 +888,45 @@ func TestPersistScheduler(t *testing.T) { re.Len(co.schedulers, 3) } +func TestDenyScheduler(t *testing.T) { + re := require.New(t) + + tc, co, cleanup := prepare(nil, nil, func(co *coordinator) { + labelerManager := co.cluster.GetRegionLabeler() + labelerManager.SetLabelRule(&labeler.LabelRule{ + ID: "schedulelabel", + Labels: []labeler.RegionLabel{{Key: "schedule", Value: "deny"}}, + RuleType: labeler.KeyRange, + Data: []interface{}{map[string]interface{}{"start_key": "", "end_key": ""}}, + }) + co.run() + }, re) + defer cleanup() + + re.Len(co.schedulers, len(config.DefaultSchedulers)) + + // Transfer peer from store 4 to store 1 if not set deny. + re.NoError(tc.addRegionStore(4, 40)) + re.NoError(tc.addRegionStore(3, 30)) + re.NoError(tc.addRegionStore(2, 20)) + re.NoError(tc.addRegionStore(1, 10)) + re.NoError(tc.addLeaderRegion(1, 2, 3, 4)) + + // Transfer leader from store 4 to store 2 if not set deny. + re.NoError(tc.updateLeaderCount(4, 1000)) + re.NoError(tc.updateLeaderCount(3, 50)) + re.NoError(tc.updateLeaderCount(2, 20)) + re.NoError(tc.updateLeaderCount(1, 10)) + re.NoError(tc.addLeaderRegion(2, 4, 3, 2)) + + // there should no balance leader/region operator + for i := 0; i < 10; i++ { + re.Nil(co.opController.GetOperator(1)) + re.Nil(co.opController.GetOperator(2)) + time.Sleep(10 * time.Millisecond) + } +} + func TestRemoveScheduler(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) diff --git a/server/cluster/diagnostic_manager.go b/server/cluster/diagnostic_manager.go index 2d6f41aa504..0b634672f34 100644 --- a/server/cluster/diagnostic_manager.go +++ b/server/cluster/diagnostic_manager.go @@ -33,6 +33,8 @@ const ( disabled = "disabled" // paused means the current scheduler is paused paused = "paused" + // halted means the current scheduler is halted + halted = "halted" // scheduling means the current scheduler is generating. scheduling = "scheduling" // pending means the current scheduler cannot generate scheduling operator diff --git a/server/cluster/metrics.go b/server/cluster/metrics.go index 8c0bceb94ca..49f43cc1bef 100644 --- a/server/cluster/metrics.go +++ b/server/cluster/metrics.go @@ -135,6 +135,14 @@ var ( Name: "store_sync", Help: "The state of store sync config", }, []string{"address", "state"}) + + schedulingAllowanceStatusGauge = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: "pd", + Subsystem: "scheduling", + Name: "allowance_status", + Help: "Status of the scheduling allowance.", + }, []string{"kind"}) ) func init() { @@ -143,6 +151,7 @@ func init() { prometheus.MustRegister(schedulerStatusGauge) prometheus.MustRegister(hotSpotStatusGauge) prometheus.MustRegister(patrolCheckRegionsGauge) + prometheus.MustRegister(schedulingAllowanceStatusGauge) prometheus.MustRegister(clusterStateCPUGauge) prometheus.MustRegister(clusterStateCurrent) prometheus.MustRegister(regionListGauge) diff --git a/server/cluster/unsafe_recovery_controller.go b/server/cluster/unsafe_recovery_controller.go index 01021069179..e640c9a2eb6 100644 --- a/server/cluster/unsafe_recovery_controller.go +++ b/server/cluster/unsafe_recovery_controller.go @@ -706,9 +706,6 @@ func (u *unsafeRecoveryController) getFailedPeers(region *metapb.Region) []*meta var failedPeers []*metapb.Peer for _, peer := range region.Peers { - if peer.Role == metapb.PeerRole_Learner || peer.Role == metapb.PeerRole_DemotingVoter { - continue - } if u.isFailed(peer) { failedPeers = append(failedPeers, peer) } diff --git a/server/cluster/unsafe_recovery_controller_test.go b/server/cluster/unsafe_recovery_controller_test.go index 1209b5cd0c4..aa9d84384d8 100644 --- a/server/cluster/unsafe_recovery_controller_test.go +++ b/server/cluster/unsafe_recovery_controller_test.go @@ -606,6 +606,48 @@ func TestAutoDetectMode(t *testing.T) { } } +// Failed learner replica/ store should be considered by auto-recover. +func TestAutoDetectModeWithOneLearner(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + _, opt, _ := newTestScheduleConfig() + cluster := newTestRaftCluster(ctx, mockid.NewIDAllocator(), opt, storage.NewStorageWithMemoryBackend(), core.NewBasicCluster()) + cluster.coordinator = newCoordinator(ctx, cluster, hbstream.NewTestHeartbeatStreams(ctx, cluster.meta.GetId(), cluster, true)) + cluster.coordinator.run() + for _, store := range newTestStores(1, "6.0.0") { + re.NoError(cluster.PutStore(store.GetMeta())) + } + recoveryController := newUnsafeRecoveryController(cluster) + re.NoError(recoveryController.RemoveFailedStores(nil, 60, true)) + + storeReport := pdpb.StoreReport{ + PeerReports: []*pdpb.PeerReport{ + { + RaftState: &raft_serverpb.RaftLocalState{LastIndex: 10, HardState: &eraftpb.HardState{Term: 1, Commit: 10}}, + RegionState: &raft_serverpb.RegionLocalState{ + Region: &metapb.Region{ + Id: 1001, + RegionEpoch: &metapb.RegionEpoch{ConfVer: 7, Version: 10}, + Peers: []*metapb.Peer{ + {Id: 11, StoreId: 1}, {Id: 12, StoreId: 2}, {Id: 13, StoreId: 3, Role: metapb.PeerRole_Learner}}}}}, + }, + } + req := newStoreHeartbeat(1, &storeReport) + req.StoreReport.Step = 1 + resp := &pdpb.StoreHeartbeatResponse{} + recoveryController.HandleStoreHeartbeat(req, resp) + hasStore3AsFailedStore := false + for _, failedStore := range resp.RecoveryPlan.ForceLeader.FailedStores { + if failedStore == 3 { + hasStore3AsFailedStore = true + break + } + } + re.True(hasStore3AsFailedStore) +} + func TestOneLearner(t *testing.T) { re := require.New(t) ctx, cancel := context.WithCancel(context.Background()) diff --git a/server/config/config.go b/server/config/config.go index 81cd4c8e76b..c0066e9597c 100644 --- a/server/config/config.go +++ b/server/config/config.go @@ -247,6 +247,7 @@ const ( defaultEnableGRPCGateway = true defaultDisableErrorVerbose = true defaultEnableWitness = false + defaultHaltScheduling = false defaultDashboardAddress = "auto" @@ -772,6 +773,10 @@ type ScheduleConfig struct { // EnableWitness is the option to enable using witness EnableWitness bool `toml:"enable-witness" json:"enable-witness,string"` + + // HaltScheduling is the option to halt the scheduling. Once it's on, PD will halt the scheduling, + // and any other scheduling configs will be ignored. + HaltScheduling bool `toml:"halt-scheduling" json:"halt-scheduling,string,omitempty"` } // Clone returns a cloned scheduling configuration. @@ -895,6 +900,10 @@ func (c *ScheduleConfig) adjust(meta *configMetaData, reloading bool) error { adjustString(&c.RegionScoreFormulaVersion, defaultRegionScoreFormulaVersion) } + if !meta.IsDefined("halt-scheduling") { + c.HaltScheduling = defaultHaltScheduling + } + adjustSchedulers(&c.Schedulers, DefaultSchedulers) for k, b := range c.migrateConfigurationMap() { diff --git a/server/config/persist_options.go b/server/config/persist_options.go index 6264cb447b2..3e20990c6b7 100644 --- a/server/config/persist_options.go +++ b/server/config/persist_options.go @@ -856,3 +856,15 @@ func (o *PersistOptions) SetAllStoresLimitTTL(ctx context.Context, client *clien } return err } + +// SetHaltScheduling set HaltScheduling. +func (o *PersistOptions) SetHaltScheduling(halt bool) { + v := o.GetScheduleConfig().Clone() + v.HaltScheduling = halt + o.SetScheduleConfig(v) +} + +// IsSchedulingHalted returns if PD scheduling is halted. +func (o *PersistOptions) IsSchedulingHalted() bool { + return o.GetScheduleConfig().HaltScheduling +} diff --git a/server/config/store_config.go b/server/config/store_config.go index 960ea6688e7..e322f3e122e 100644 --- a/server/config/store_config.go +++ b/server/config/store_config.go @@ -15,12 +15,14 @@ package config import ( + "context" "encoding/json" "fmt" "io" "net/http" "reflect" "sync/atomic" + "time" "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" @@ -41,6 +43,7 @@ var ( defaultRegionMaxKey = uint64(1440000) // default region split key is 960000 defaultRegionSplitKey = uint64(960000) + clientTimeout = 3 * time.Second ) // StoreConfig is the config of store like TiKV. @@ -191,8 +194,8 @@ func NewTestStoreConfigManager(whiteList []string) *StoreConfigManager { } // ObserveConfig is used to observe the config change. -func (m *StoreConfigManager) ObserveConfig(address string) error { - cfg, err := m.source.GetConfig(address) +func (m *StoreConfigManager) ObserveConfig(ctx context.Context, address string) error { + cfg, err := m.source.GetConfig(ctx, address) if err != nil { return err } @@ -222,7 +225,7 @@ func (m *StoreConfigManager) GetStoreConfig() *StoreConfig { // Source is used to get the store config. type Source interface { - GetConfig(statusAddress string) (*StoreConfig, error) + GetConfig(ctx context.Context, statusAddress string) (*StoreConfig, error) } // TiKVConfigSource is used to get the store config from TiKV. @@ -239,9 +242,15 @@ func newTiKVConfigSource(schema string, client *http.Client) *TiKVConfigSource { } // GetConfig returns the store config from TiKV. -func (s TiKVConfigSource) GetConfig(statusAddress string) (*StoreConfig, error) { +func (s TiKVConfigSource) GetConfig(ctx context.Context, statusAddress string) (*StoreConfig, error) { url := fmt.Sprintf("%s://%s/config", s.schema, statusAddress) - resp, err := s.client.Get(url) + ctx, cancel := context.WithTimeout(ctx, clientTimeout) + defer cancel() + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, fmt.Errorf("failed to create store config http request: %w", err) + } + resp, err := s.client.Do(req) if err != nil { return nil, err } @@ -269,7 +278,7 @@ func newFakeSource(whiteList []string) *FakeSource { } // GetConfig returns the config. -func (f *FakeSource) GetConfig(url string) (*StoreConfig, error) { +func (f *FakeSource) GetConfig(_ context.Context, url string) (*StoreConfig, error) { if !slice.Contains(f.whiteList, url) { return nil, fmt.Errorf("[url:%s] is not in white list", url) } diff --git a/server/config/store_config_test.go b/server/config/store_config_test.go index 6916fedc929..3ab3e4900e3 100644 --- a/server/config/store_config_test.go +++ b/server/config/store_config_test.go @@ -15,6 +15,7 @@ package config import ( + "context" "crypto/tls" "encoding/json" "net/http" @@ -63,13 +64,13 @@ func TestTiKVConfig(t *testing.T) { func TestUpdateConfig(t *testing.T) { re := require.New(t) manager := NewTestStoreConfigManager([]string{"tidb.com"}) - manager.ObserveConfig("tikv.com") + manager.ObserveConfig(context.Background(), "tikv.com") re.Equal(uint64(144), manager.GetStoreConfig().GetRegionMaxSize()) - manager.ObserveConfig("tidb.com") + manager.ObserveConfig(context.Background(), "tidb.com") re.Equal(uint64(10), manager.GetStoreConfig().GetRegionMaxSize()) // case2: the config should not update if config is same expect some ignore field. - c, err := manager.source.GetConfig("tidb.com") + c, err := manager.source.GetConfig(context.Background(), "tidb.com") re.NoError(err) re.True(manager.GetStoreConfig().Equal(c)) diff --git a/server/core/basic_cluster.go b/server/core/basic_cluster.go index e395a56b9c7..88efc7898ae 100644 --- a/server/core/basic_cluster.go +++ b/server/core/basic_cluster.go @@ -150,10 +150,10 @@ func (bc *BasicCluster) ResetStoreLimit(storeID uint64, limitType storelimit.Typ // UpdateStoreStatus updates the information of the store. func (bc *BasicCluster) UpdateStoreStatus(storeID uint64) { - leaderCount, regionCount, witnessCount, pendingPeerCount, leaderRegionSize, regionSize := bc.RegionsInfo.GetStoreStats(storeID) + leaderCount, regionCount, witnessCount, learnerCount, pendingPeerCount, leaderRegionSize, regionSize := bc.RegionsInfo.GetStoreStats(storeID) bc.Stores.mu.Lock() defer bc.Stores.mu.Unlock() - bc.Stores.UpdateStoreStatus(storeID, leaderCount, regionCount, pendingPeerCount, leaderRegionSize, regionSize, witnessCount) + bc.Stores.UpdateStoreStatus(storeID, leaderCount, regionCount, witnessCount, learnerCount, pendingPeerCount, leaderRegionSize, regionSize) } // PutStore put a store. diff --git a/server/core/region.go b/server/core/region.go index 48114bdb951..8870d30b57a 100644 --- a/server/core/region.go +++ b/server/core/region.go @@ -583,6 +583,11 @@ func (r *RegionInfo) GetReplicationStatus() *replication_modepb.RegionReplicatio return r.replicationStatus } +// IsFlashbackChanged returns true if flashback changes. +func (r *RegionInfo) IsFlashbackChanged(l *RegionInfo) bool { + return r.meta.IsInFlashback != l.meta.IsInFlashback +} + // IsFromHeartbeat returns whether the region info is from the region heartbeat. func (r *RegionInfo) IsFromHeartbeat() bool { return r.fromHeartbeat @@ -689,6 +694,14 @@ func GenerateRegionGuideFunc(enableLog bool) RegionGuideFunc { (region.GetReplicationStatus().GetState() != origin.GetReplicationStatus().GetState() || region.GetReplicationStatus().GetStateId() != origin.GetReplicationStatus().GetStateId()) { saveCache = true + return + } + // Do not save to kv, because 1) flashback will be eventually set to + // false, 2) flashback changes almost all regions in a cluster. + // Saving kv may downgrade PD performance when there are many regions. + if region.IsFlashbackChanged(origin) { + saveCache = true + return } if !origin.IsFromHeartbeat() { isNew = true @@ -1171,11 +1184,11 @@ func (r *RegionsInfo) GetMetaRegions() []*metapb.Region { } // GetStoreStats returns the store stats. -func (r *RegionsInfo) GetStoreStats(storeID uint64) (leader, region, witness, pending int, leaderSize, regionSize int64) { +func (r *RegionsInfo) GetStoreStats(storeID uint64) (leader, region, witness, learner, pending int, leaderSize, regionSize int64) { r.st.RLock() defer r.st.RUnlock() return r.leaders[storeID].length(), r.getStoreRegionCountLocked(storeID), r.witnesses[storeID].length(), - r.pendingPeers[storeID].length(), r.leaders[storeID].TotalSize(), r.getStoreRegionSizeLocked(storeID) + r.learners[storeID].length(), r.pendingPeers[storeID].length(), r.leaders[storeID].TotalSize(), r.getStoreRegionSizeLocked(storeID) } // GetRegionCount gets the total count of RegionInfo of regionMap diff --git a/server/core/region_option.go b/server/core/region_option.go index 4fa3de02862..92fd1aeebec 100644 --- a/server/core/region_option.go +++ b/server/core/region_option.go @@ -170,6 +170,13 @@ func WithDecConfVer() RegionCreateOption { } } +// WithFlashback set region flashback states. +func WithFlashback(isInFlashback bool, flashbackTS uint64) RegionCreateOption { + return func(region *RegionInfo) { + region.meta.IsInFlashback = isInFlashback + } +} + // SetCPUUsage sets the CPU usage of the region. func SetCPUUsage(v uint64) RegionCreateOption { return func(region *RegionInfo) { diff --git a/server/core/store.go b/server/core/store.go index dcfb2c55dd5..1556d6d0955 100644 --- a/server/core/store.go +++ b/server/core/store.go @@ -52,6 +52,7 @@ type StoreInfo struct { slowStoreEvicted bool // this store has been evicted as a slow store, should not transfer leader to it leaderCount int regionCount int + learnerCount int witnessCount int leaderSize int64 regionSize int64 @@ -221,6 +222,11 @@ func (s *StoreInfo) GetRegionCount() int { return s.regionCount } +// GetLearnerCount returns the learner count of the store. +func (s *StoreInfo) GetLearnerCount() int { + return s.learnerCount +} + // GetWitnessCount returns the witness count of the store. func (s *StoreInfo) GetWitnessCount() int { return s.witnessCount @@ -709,11 +715,12 @@ func (s *StoresInfo) SetRegionSize(storeID uint64, regionSize int64) { } // UpdateStoreStatus updates the information of the store. -func (s *StoresInfo) UpdateStoreStatus(storeID uint64, leaderCount int, regionCount int, pendingPeerCount int, leaderSize int64, regionSize int64, witnessCount int) { +func (s *StoresInfo) UpdateStoreStatus(storeID uint64, leaderCount, regionCount, witnessCount, learnerCount, pendingPeerCount int, leaderSize int64, regionSize int64) { if store, ok := s.stores[storeID]; ok { newStore := store.ShallowClone(SetLeaderCount(leaderCount), SetRegionCount(regionCount), SetWitnessCount(witnessCount), + SetLearnerCount(learnerCount), SetPendingPeerCount(pendingPeerCount), SetLeaderSize(leaderSize), SetRegionSize(regionSize)) diff --git a/server/core/store_option.go b/server/core/store_option.go index 84d190634ae..871de47f6b5 100644 --- a/server/core/store_option.go +++ b/server/core/store_option.go @@ -150,6 +150,13 @@ func SetRegionCount(regionCount int) StoreCreateOption { } } +// SetLearnerCount sets the learner count for the store. +func SetLearnerCount(learnerCount int) StoreCreateOption { + return func(store *StoreInfo) { + store.learnerCount = learnerCount + } +} + // SetWitnessCount sets the witness count for the store. func SetWitnessCount(witnessCount int) StoreCreateOption { return func(store *StoreInfo) { diff --git a/server/core/test_util.go b/server/core/test_util.go index 055623edca4..9a72702c294 100644 --- a/server/core/test_util.go +++ b/server/core/test_util.go @@ -85,6 +85,22 @@ func NewTestRegionInfo(start, end []byte) *RegionInfo { }} } +// NewTestRegionInfoWithID creates a new RegionInfo for test purpose. +func NewTestRegionInfoWithID(regionID, storeID uint64, start, end []byte, opts ...RegionCreateOption) *RegionInfo { + leader := &metapb.Peer{ + Id: regionID, + StoreId: storeID, + } + metaRegion := &metapb.Region{ + Id: regionID, + StartKey: start, + EndKey: end, + Peers: []*metapb.Peer{leader}, + RegionEpoch: &metapb.RegionEpoch{ConfVer: 1, Version: 1}, + } + return NewRegionInfo(metaRegion, leader, opts...) +} + // NewStoreInfoWithDisk is created with all disk infos. func NewStoreInfoWithDisk(id, used, available, capacity, regionSize uint64) *StoreInfo { stats := &pdpb.StoreStats{} diff --git a/server/election/leadership.go b/server/election/leadership.go index c9215318c64..0489a6c7e4e 100644 --- a/server/election/leadership.go +++ b/server/election/leadership.go @@ -16,6 +16,7 @@ package election import ( "context" + "sync" "sync/atomic" "github.com/pingcap/failpoint" @@ -54,8 +55,9 @@ type Leadership struct { leaderKey string leaderValue string - keepAliveCtx context.Context - keepAliveCancelFunc context.CancelFunc + keepAliveCtx context.Context + keepAliveCancelFunc context.CancelFunc + keepAliveCancelFuncLock sync.Mutex } // NewLeadership creates a new Leadership. @@ -137,7 +139,9 @@ func (ls *Leadership) Keep(ctx context.Context) { if ls == nil { return } + ls.keepAliveCancelFuncLock.Lock() ls.keepAliveCtx, ls.keepAliveCancelFunc = context.WithCancel(ctx) + ls.keepAliveCancelFuncLock.Unlock() go ls.getLease().KeepAlive(ls.keepAliveCtx) } @@ -230,8 +234,10 @@ func (ls *Leadership) Reset() { if ls == nil || ls.getLease() == nil { return } + ls.keepAliveCancelFuncLock.Lock() if ls.keepAliveCancelFunc != nil { ls.keepAliveCancelFunc() } + ls.keepAliveCancelFuncLock.Unlock() ls.getLease().Close() } diff --git a/server/election/lease.go b/server/election/lease.go index cb5d13ddb33..f3c00f47089 100644 --- a/server/election/lease.go +++ b/server/election/lease.go @@ -22,6 +22,7 @@ import ( "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/etcdutil" + "github.com/tikv/pd/pkg/logutil" "github.com/tikv/pd/pkg/typeutil" "go.etcd.io/etcd/clientv3" "go.uber.org/zap" @@ -129,6 +130,7 @@ func (l *lease) keepAliveWorker(ctx context.Context, interval time.Duration) <-c ch := make(chan time.Time) go func() { + defer logutil.LogPanic() ticker := time.NewTicker(interval) defer ticker.Stop() @@ -137,6 +139,7 @@ func (l *lease) keepAliveWorker(ctx context.Context, interval time.Duration) <-c for { go func() { + defer logutil.LogPanic() start := time.Now() ctx1, cancel := context.WithTimeout(ctx, l.leaseTimeout) defer cancel() @@ -149,8 +152,11 @@ func (l *lease) keepAliveWorker(ctx context.Context, interval time.Duration) <-c expire := start.Add(time.Duration(res.TTL) * time.Second) select { case ch <- expire: - case <-ctx1.Done(): + // Here we don't use `ctx1.Done()` because we want to make sure if the keep alive success, we can update the expire time. + case <-ctx.Done(): } + } else { + log.Error("keep alive response ttl is zero", zap.String("purpose", l.Purpose)) } }() diff --git a/server/election/lease_test.go b/server/election/lease_test.go index 6298c22f0f2..f848010d7ca 100644 --- a/server/election/lease_test.go +++ b/server/election/lease_test.go @@ -101,3 +101,34 @@ func TestLease(t *testing.T) { time.Sleep((defaultLeaseTimeout + 1) * time.Second) re.True(lease1.IsExpired()) } + +func TestLeaseKeepAlive(t *testing.T) { + re := require.New(t) + cfg := etcdutil.NewTestSingleConfig(t) + etcd, err := embed.StartEtcd(cfg) + defer func() { + etcd.Close() + }() + re.NoError(err) + + ep := cfg.LCUrls[0].String() + client, err := clientv3.New(clientv3.Config{ + Endpoints: []string{ep}, + }) + re.NoError(err) + + <-etcd.Server.ReadyNotify() + + // Create the lease. + lease := &lease{ + Purpose: "test_lease", + client: client, + lease: clientv3.NewLease(client), + } + + re.NoError(lease.Grant(defaultLeaseTimeout)) + ch := lease.keepAliveWorker(context.Background(), 2*time.Second) + time.Sleep(2 * time.Second) + <-ch + re.NoError(lease.Close()) +} diff --git a/server/grpc_service.go b/server/grpc_service.go index 7229b085d0b..7f038b55101 100644 --- a/server/grpc_service.go +++ b/server/grpc_service.go @@ -63,6 +63,7 @@ var ( ErrNotLeader = status.Errorf(codes.Unavailable, "not leader") ErrNotStarted = status.Errorf(codes.Unavailable, "server not started") ErrSendHeartbeatTimeout = status.Errorf(codes.DeadlineExceeded, "send heartbeat timeout") + ErrEtcdNotStarted = status.Errorf(codes.Unavailable, "server is started, but etcd not started") ) // GrpcServer wraps Server to provide grpc service. @@ -1896,6 +1897,9 @@ func checkStream(streamCtx context.Context, cancel context.CancelFunc, done chan // StoreGlobalConfig store global config into etcd by transaction func (s *GrpcServer) StoreGlobalConfig(_ context.Context, request *pdpb.StoreGlobalConfigRequest) (*pdpb.StoreGlobalConfigResponse, error) { + if s.client == nil { + return nil, ErrEtcdNotStarted + } ops := make([]clientv3.Op, len(request.Changes)) for i, item := range request.Changes { name := globalConfigPath + item.GetName() @@ -1915,6 +1919,9 @@ func (s *GrpcServer) StoreGlobalConfig(_ context.Context, request *pdpb.StoreGlo // LoadGlobalConfig load global config from etcd func (s *GrpcServer) LoadGlobalConfig(ctx context.Context, request *pdpb.LoadGlobalConfigRequest) (*pdpb.LoadGlobalConfigResponse, error) { + if s.client == nil { + return nil, ErrEtcdNotStarted + } names := request.Names res := make([]*pdpb.GlobalConfigItem, len(names)) for i, name := range names { @@ -1935,6 +1942,9 @@ func (s *GrpcServer) LoadGlobalConfig(ctx context.Context, request *pdpb.LoadGlo // or stoped by whatever reason // just reconnect to it. func (s *GrpcServer) WatchGlobalConfig(_ *pdpb.WatchGlobalConfigRequest, server pdpb.PD_WatchGlobalConfigServer) error { + if s.client == nil { + return ErrEtcdNotStarted + } ctx, cancel := context.WithCancel(s.Context()) defer cancel() err := s.sendAllGlobalConfig(ctx, server) diff --git a/server/member/member.go b/server/member/member.go index 470ee6330b1..1297fb641ee 100644 --- a/server/member/member.go +++ b/server/member/member.go @@ -59,6 +59,8 @@ type Member struct { // etcd leader key when the PD node is successfully elected as the PD leader // of the cluster. Every write will use it to check PD leadership. memberValue string + // lastLeaderUpdatedTime is the last time when the leader is updated. + lastLeaderUpdatedTime atomic.Value } // NewMember create a new Member. @@ -121,11 +123,13 @@ func (m *Member) GetLeader() *pdpb.Member { // setLeader sets the member's PD leader. func (m *Member) setLeader(member *pdpb.Member) { m.leader.Store(member) + m.lastLeaderUpdatedTime.Store(time.Now()) } // unsetLeader unsets the member's PD leader. func (m *Member) unsetLeader() { m.leader.Store(&pdpb.Member{}) + m.lastLeaderUpdatedTime.Store(time.Now()) } // EnableLeader sets the member itself to a PD leader. @@ -143,6 +147,15 @@ func (m *Member) GetLeadership() *election.Leadership { return m.leadership } +// GetLastLeaderUpdatedTime returns the last time when the leader is updated. +func (m *Member) GetLastLeaderUpdatedTime() time.Time { + lastLeaderUpdatedTime := m.lastLeaderUpdatedTime.Load() + if lastLeaderUpdatedTime == nil { + return time.Time{} + } + return lastLeaderUpdatedTime.(time.Time) +} + // CampaignLeader is used to campaign a PD member's leadership // and make it become a PD leader. func (m *Member) CampaignLeader(leaseTimeout int64) error { diff --git a/server/region_syncer/client.go b/server/region_syncer/client.go index debf39f556f..294407eb664 100644 --- a/server/region_syncer/client.go +++ b/server/region_syncer/client.go @@ -24,6 +24,7 @@ import ( "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/grpcutil" + "github.com/tikv/pd/pkg/logutil" "github.com/tikv/pd/server/core" "github.com/tikv/pd/server/storage" "go.uber.org/zap" @@ -117,6 +118,7 @@ func (s *RegionSyncer) StartSyncWithLeader(addr string) { ctx := s.mu.clientCtx go func() { + defer logutil.LogPanic() defer s.wg.Done() // used to load region from kv storage to cache storage. bc := s.server.GetBasicCluster() diff --git a/server/replication/replication_mode.go b/server/replication/replication_mode.go index be9415309d4..d276bd8ec18 100644 --- a/server/replication/replication_mode.go +++ b/server/replication/replication_mode.go @@ -29,7 +29,6 @@ import ( "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/logutil" - "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/pkg/syncutil" "github.com/tikv/pd/server/config" "github.com/tikv/pd/server/core" @@ -62,7 +61,7 @@ type FileReplicater interface { } const drStatusFile = "DR_STATE" -const persistFileTimeout = time.Second * 10 +const persistFileTimeout = time.Second * 3 // ModeManager is used to control how raft logs are synchronized between // different tikv nodes. @@ -70,11 +69,11 @@ type ModeManager struct { initTime time.Time syncutil.RWMutex - config config.ReplicationModeConfig - storage endpoint.ReplicationStatusStorage - cluster schedule.Cluster - fileReplicater FileReplicater - replicatedMembers []uint64 + config config.ReplicationModeConfig + storage endpoint.ReplicationStatusStorage + cluster schedule.Cluster + fileReplicater FileReplicater + replicateState sync.Map drAutoSync drAutoSyncStatus // intermediate states of the recovery process @@ -240,7 +239,6 @@ func (m *ModeManager) drSwitchToAsyncWait(availableStores []uint64) error { return err } dr := drAutoSyncStatus{State: drStateAsyncWait, StateID: id, AvailableStores: availableStores} - m.drPersistStatusWithLock(dr) if err := m.storage.SaveReplicationStatus(modeDRAutoSync, dr); err != nil { log.Warn("failed to switch to async state", zap.String("replicate-mode", modeDRAutoSync), errs.ZapError(err)) return err @@ -263,7 +261,6 @@ func (m *ModeManager) drSwitchToAsyncWithLock(availableStores []uint64) error { return err } dr := drAutoSyncStatus{State: drStateAsync, StateID: id, AvailableStores: availableStores} - m.drPersistStatusWithLock(dr) if err := m.storage.SaveReplicationStatus(modeDRAutoSync, dr); err != nil { log.Warn("failed to switch to async state", zap.String("replicate-mode", modeDRAutoSync), errs.ZapError(err)) return err @@ -287,7 +284,6 @@ func (m *ModeManager) drSwitchToSyncRecoverWithLock() error { } now := time.Now() dr := drAutoSyncStatus{State: drStateSyncRecover, StateID: id, RecoverStartTime: &now} - m.drPersistStatusWithLock(dr) if err = m.storage.SaveReplicationStatus(modeDRAutoSync, dr); err != nil { log.Warn("failed to switch to sync_recover state", zap.String("replicate-mode", modeDRAutoSync), errs.ZapError(err)) return err @@ -307,7 +303,6 @@ func (m *ModeManager) drSwitchToSync() error { return err } dr := drAutoSyncStatus{State: drStateSync, StateID: id} - m.drPersistStatusWithLock(dr) if err := m.storage.SaveReplicationStatus(modeDRAutoSync, dr); err != nil { log.Warn("failed to switch to sync state", zap.String("replicate-mode", modeDRAutoSync), errs.ZapError(err)) return err @@ -317,50 +312,6 @@ func (m *ModeManager) drSwitchToSync() error { return nil } -func (m *ModeManager) drPersistStatusWithLock(status drAutoSyncStatus) { - ctx, cancel := context.WithTimeout(context.Background(), persistFileTimeout) - defer cancel() - - members, err := m.fileReplicater.GetMembers() - if err != nil { - log.Warn("failed to get members", zap.String("replicate-mode", modeDRAutoSync)) - return - } - - data, _ := json.Marshal(status) - - m.replicatedMembers = m.replicatedMembers[:0] - for _, member := range members { - if err := m.fileReplicater.ReplicateFileToMember(ctx, member, drStatusFile, data); err != nil { - log.Warn("failed to switch state", zap.String("replicate-mode", modeDRAutoSync), zap.String("new-state", status.State), errs.ZapError(err)) - // Throw away the error to make it possible to switch to async when - // primary and dr DC are disconnected. This will result in the - // inability to accurately determine whether data is fully - // synchronized when using dr DC to disaster recovery. - // Since the member will not be in `replicatedMembers` list, PD will - // try to replicate state file later. - } else { - m.replicatedMembers = append(m.replicatedMembers, member.GetMemberId()) - } - } -} - -func (m *ModeManager) drCheckNeedPersistStatus(members []*pdpb.Member) bool { - m.RLock() - defer m.RUnlock() - return slice.AnyOf(members, func(i int) bool { // if there is any member in the new list - return slice.NoneOf(m.replicatedMembers, func(j int) bool { // not replicated - return m.replicatedMembers[j] == members[i].GetMemberId() - }) - }) -} - -func (m *ModeManager) drPersistStatus() { - m.Lock() - defer m.Unlock() - m.drPersistStatusWithLock(drAutoSyncStatus{State: m.drAutoSync.State, StateID: m.drAutoSync.StateID}) -} - func (m *ModeManager) drGetState() string { m.RLock() defer m.RUnlock() @@ -368,8 +319,9 @@ func (m *ModeManager) drGetState() string { } const ( - idleTimeout = time.Minute - tickInterval = 500 * time.Millisecond + idleTimeout = time.Minute + tickInterval = 500 * time.Millisecond + replicateStateInterval = time.Second * 5 ) // Run starts the background job. @@ -380,17 +332,38 @@ func (m *ModeManager) Run(ctx context.Context) { case <-ctx.Done(): return } - for { - select { - case <-time.After(tickInterval): - case <-ctx.Done(): - return + + var wg sync.WaitGroup + wg.Add(2) + + go func() { + defer wg.Done() + for { + select { + case <-time.After(tickInterval): + case <-ctx.Done(): + return + } + m.tickUpdateState() } - m.tickDR() - } + }() + + go func() { + defer wg.Done() + for { + select { + case <-time.After(replicateStateInterval): + case <-ctx.Done(): + return + } + m.tickReplicateStatus() + } + }() + + wg.Wait() } -func (m *ModeManager) tickDR() { +func (m *ModeManager) tickUpdateState() { if m.getModeName() != modeDRAutoSync { return } @@ -483,8 +456,42 @@ func (m *ModeManager) tickDR() { } } } +} + +func (m *ModeManager) tickReplicateStatus() { + if m.getModeName() != modeDRAutoSync { + return + } + + m.RLock() + state := drAutoSyncStatus{ + State: m.drAutoSync.State, + StateID: m.drAutoSync.StateID, + AvailableStores: m.drAutoSync.AvailableStores, + RecoverStartTime: m.drAutoSync.RecoverStartTime, + } + m.RUnlock() - m.checkReplicateFile() + data, _ := json.Marshal(state) + + members, err := m.fileReplicater.GetMembers() + if err != nil { + log.Warn("failed to get members", zap.String("replicate-mode", modeDRAutoSync)) + return + } + for _, member := range members { + stateID, ok := m.replicateState.Load(member.GetMemberId()) + if !ok || stateID.(uint64) != state.StateID { + ctx, cancel := context.WithTimeout(context.Background(), persistFileTimeout) + err := m.fileReplicater.ReplicateFileToMember(ctx, member, drStatusFile, data) + if err != nil { + log.Warn("failed to switch state", zap.String("replicate-mode", modeDRAutoSync), zap.String("new-state", state.State), errs.ZapError(err)) + } else { + m.replicateState.Store(member.GetMemberId(), state.StateID) + } + cancel() + } + } } const ( @@ -503,6 +510,10 @@ func (m *ModeManager) checkStoreStatus() [][]uint64 { if s.IsRemoved() { continue } + // learner peers do not participate in major commit or vote, so it should not count in primary/dr as a normal store. + if s.GetRegionCount() == s.GetLearnerCount() { + continue + } down := s.DownTime() >= m.config.DRAutoSync.WaitStoreTimeout.Duration labelValue := s.GetLabelValue(m.config.DRAutoSync.LabelKey) if labelValue == m.config.DRAutoSync.Primary { @@ -552,17 +563,6 @@ func (m *ModeManager) drCheckStoreStateUpdated(stores []uint64) bool { return true } -func (m *ModeManager) checkReplicateFile() { - members, err := m.fileReplicater.GetMembers() - if err != nil { - log.Warn("failed to get members", zap.String("replicate-mode", modeDRAutoSync)) - return - } - if m.drCheckNeedPersistStatus(members) { - m.drPersistStatus() - } -} - var ( regionScanBatchSize = 1024 regionMinSampleSize = 512 diff --git a/server/replication/replication_mode_test.go b/server/replication/replication_mode_test.go index 605465d3416..7ddd4880011 100644 --- a/server/replication/replication_mode_test.go +++ b/server/replication/replication_mode_test.go @@ -167,7 +167,7 @@ func TestStateSwitch(t *testing.T) { Primary: "zone1", DR: "zone2", PrimaryReplicas: 4, - DRReplicas: 1, + DRReplicas: 2, WaitStoreTimeout: typeutil.Duration{Duration: time.Minute}, }} cluster := mockcluster.NewCluster(ctx, config.NewTestOptions()) @@ -184,6 +184,7 @@ func TestStateSwitch(t *testing.T) { re.Equal(drStateSync, rep.drGetState()) stateID := rep.drAutoSync.StateID re.NotEqual(uint64(0), stateID) + rep.tickReplicateStatus() re.Equal(fmt.Sprintf(`{"state":"sync","state_id":%d}`, stateID), replicator.lastData[1]) assertStateIDUpdate := func() { re.NotEqual(stateID, rep.drAutoSync.StateID) @@ -197,9 +198,10 @@ func TestStateSwitch(t *testing.T) { } // only one zone, sync -> async_wait -> async - rep.tickDR() + rep.tickUpdateState() re.Equal(drStateAsyncWait, rep.drGetState()) assertStateIDUpdate() + rep.tickReplicateStatus() re.Equal(fmt.Sprintf(`{"state":"async_wait","state_id":%d,"available_stores":[1,2,3,4]}`, stateID), replicator.lastData[1]) re.False(rep.GetReplicationStatus().GetDrAutoSync().GetPauseRegionSplit()) @@ -208,108 +210,119 @@ func TestStateSwitch(t *testing.T) { re.True(rep.GetReplicationStatus().GetDrAutoSync().GetPauseRegionSplit()) syncStoreStatus(1, 2, 3, 4) - rep.tickDR() + rep.tickUpdateState() assertStateIDUpdate() + rep.tickReplicateStatus() re.Equal(fmt.Sprintf(`{"state":"async","state_id":%d,"available_stores":[1,2,3,4]}`, stateID), replicator.lastData[1]) // add new store in dr zone. cluster.AddLabelsStore(5, 1, map[string]string{"zone": "zone2"}) - cluster.AddLabelsStore(6, 1, map[string]string{"zone": "zone2"}) + cluster.AddLabersStoreWithLearnerCount(6, 1, 1, map[string]string{"zone": "zone2"}) // async -> sync - rep.tickDR() + rep.tickUpdateState() re.Equal(drStateSyncRecover, rep.drGetState()) rep.drSwitchToSync() re.Equal(drStateSync, rep.drGetState()) assertStateIDUpdate() // sync -> async_wait - rep.tickDR() + rep.tickUpdateState() re.Equal(drStateSync, rep.drGetState()) setStoreState(cluster, "down", "up", "up", "up", "up", "up") - rep.tickDR() + rep.tickUpdateState() re.Equal(drStateSync, rep.drGetState()) setStoreState(cluster, "down", "down", "up", "up", "up", "up") setStoreState(cluster, "down", "down", "down", "up", "up", "up") - rep.tickDR() + rep.tickUpdateState() re.Equal(drStateSync, rep.drGetState()) // cannot guarantee majority, keep sync. + setStoreState(cluster, "up", "up", "up", "up", "up", "down") + rep.tickUpdateState() + re.Equal(drStateSync, rep.drGetState()) + + // once the voter node down, even learner node up, swith to async state. setStoreState(cluster, "up", "up", "up", "up", "down", "up") - rep.tickDR() + rep.tickUpdateState() re.Equal(drStateAsyncWait, rep.drGetState()) - assertStateIDUpdate() rep.drSwitchToSync() replicator.errors[2] = errors.New("fail to replicate") - rep.tickDR() + rep.tickUpdateState() re.Equal(drStateAsyncWait, rep.drGetState()) assertStateIDUpdate() delete(replicator.errors, 1) // async_wait -> sync setStoreState(cluster, "up", "up", "up", "up", "up", "up") - rep.tickDR() + rep.tickUpdateState() re.Equal(drStateSync, rep.drGetState()) re.False(rep.GetReplicationStatus().GetDrAutoSync().GetPauseRegionSplit()) // async_wait -> async_wait setStoreState(cluster, "up", "up", "up", "up", "down", "up") - rep.tickDR() + rep.tickUpdateState() re.Equal(drStateAsyncWait, rep.drGetState()) assertStateIDUpdate() + rep.tickReplicateStatus() re.Equal(fmt.Sprintf(`{"state":"async_wait","state_id":%d,"available_stores":[1,2,3,4]}`, stateID), replicator.lastData[1]) setStoreState(cluster, "down", "up", "up", "up", "down", "up") - rep.tickDR() + rep.tickUpdateState() assertStateIDUpdate() + rep.tickReplicateStatus() re.Equal(fmt.Sprintf(`{"state":"async_wait","state_id":%d,"available_stores":[2,3,4]}`, stateID), replicator.lastData[1]) setStoreState(cluster, "up", "down", "up", "up", "down", "up") - rep.tickDR() + rep.tickUpdateState() assertStateIDUpdate() + rep.tickReplicateStatus() re.Equal(fmt.Sprintf(`{"state":"async_wait","state_id":%d,"available_stores":[1,3,4]}`, stateID), replicator.lastData[1]) // async_wait -> async - rep.tickDR() + rep.tickUpdateState() re.Equal(drStateAsyncWait, rep.drGetState()) syncStoreStatus(1, 3) - rep.tickDR() + rep.tickUpdateState() re.Equal(drStateAsyncWait, rep.drGetState()) syncStoreStatus(4) - rep.tickDR() + rep.tickUpdateState() assertStateIDUpdate() + rep.tickReplicateStatus() re.Equal(fmt.Sprintf(`{"state":"async","state_id":%d,"available_stores":[1,3,4]}`, stateID), replicator.lastData[1]) // async -> async setStoreState(cluster, "up", "up", "up", "up", "down", "up") - rep.tickDR() + rep.tickUpdateState() // store 2 won't be available before it syncs status. + rep.tickReplicateStatus() re.Equal(fmt.Sprintf(`{"state":"async","state_id":%d,"available_stores":[1,3,4]}`, stateID), replicator.lastData[1]) syncStoreStatus(1, 2, 3, 4) - rep.tickDR() + rep.tickUpdateState() assertStateIDUpdate() + rep.tickReplicateStatus() re.Equal(fmt.Sprintf(`{"state":"async","state_id":%d,"available_stores":[1,2,3,4]}`, stateID), replicator.lastData[1]) // async -> sync_recover setStoreState(cluster, "up", "up", "up", "up", "up", "up") - rep.tickDR() + rep.tickUpdateState() re.Equal(drStateSyncRecover, rep.drGetState()) assertStateIDUpdate() rep.drSwitchToAsync([]uint64{1, 2, 3, 4, 5}) setStoreState(cluster, "down", "up", "up", "up", "up", "up") - rep.tickDR() + rep.tickUpdateState() re.Equal(drStateSyncRecover, rep.drGetState()) assertStateIDUpdate() // sync_recover -> async - rep.tickDR() + rep.tickUpdateState() re.Equal(drStateSyncRecover, rep.drGetState()) setStoreState(cluster, "up", "up", "up", "up", "down", "up") - rep.tickDR() + rep.tickUpdateState() re.Equal(drStateAsync, rep.drGetState()) assertStateIDUpdate() // lost majority, does not switch to async. rep.drSwitchToSyncRecover() assertStateIDUpdate() setStoreState(cluster, "down", "down", "up", "up", "down", "up") - rep.tickDR() + rep.tickUpdateState() re.Equal(drStateSyncRecover, rep.drGetState()) // sync_recover -> sync @@ -323,7 +336,7 @@ func TestStateSwitch(t *testing.T) { State: pb.RegionReplicationState_SIMPLE_MAJORITY, })) cluster.PutRegion(region) - rep.tickDR() + rep.tickUpdateState() re.Equal(drStateSyncRecover, rep.drGetState()) region = region.Clone(core.SetReplicationStatus(&pb.RegionReplicationStatus{ @@ -331,14 +344,14 @@ func TestStateSwitch(t *testing.T) { StateId: rep.drAutoSync.StateID - 1, // mismatch state id })) cluster.PutRegion(region) - rep.tickDR() + rep.tickUpdateState() re.Equal(drStateSyncRecover, rep.drGetState()) region = region.Clone(core.SetReplicationStatus(&pb.RegionReplicationStatus{ State: pb.RegionReplicationState_INTEGRITY_OVER_LABEL, StateId: rep.drAutoSync.StateID, })) cluster.PutRegion(region) - rep.tickDR() + rep.tickUpdateState() re.Equal(drStateSync, rep.drGetState()) assertStateIDUpdate() } @@ -363,25 +376,27 @@ func TestReplicateState(t *testing.T) { stateID := rep.drAutoSync.StateID // replicate after initialized + rep.tickReplicateStatus() re.Equal(fmt.Sprintf(`{"state":"sync","state_id":%d}`, stateID), replicator.lastData[1]) // repliate state to new member replicator.memberIDs = append(replicator.memberIDs, 2, 3) - rep.checkReplicateFile() + rep.tickReplicateStatus() re.Equal(fmt.Sprintf(`{"state":"sync","state_id":%d}`, stateID), replicator.lastData[2]) re.Equal(fmt.Sprintf(`{"state":"sync","state_id":%d}`, stateID), replicator.lastData[3]) // inject error replicator.errors[2] = errors.New("failed to persist") - rep.tickDR() // switch async_wait since there is only one zone + rep.tickUpdateState() // switch async_wait since there is only one zone newStateID := rep.drAutoSync.StateID + rep.tickReplicateStatus() re.Equal(fmt.Sprintf(`{"state":"async_wait","state_id":%d}`, newStateID), replicator.lastData[1]) re.Equal(fmt.Sprintf(`{"state":"sync","state_id":%d}`, stateID), replicator.lastData[2]) re.Equal(fmt.Sprintf(`{"state":"async_wait","state_id":%d}`, newStateID), replicator.lastData[3]) // clear error, replicate to node 2 next time delete(replicator.errors, 2) - rep.checkReplicateFile() + rep.tickReplicateStatus() re.Equal(fmt.Sprintf(`{"state":"async_wait","state_id":%d}`, newStateID), replicator.lastData[2]) } @@ -408,7 +423,7 @@ func TestAsynctimeout(t *testing.T) { cluster.AddLabelsStore(3, 1, map[string]string{"zone": "zone2"}) setStoreState(cluster, "up", "up", "down") - rep.tickDR() + rep.tickUpdateState() re.Equal(drStateAsyncWait, rep.drGetState()) } diff --git a/server/schedule/checker/checker_controller.go b/server/schedule/checker/checker_controller.go index 4106ed1c780..d3e082ff60a 100644 --- a/server/schedule/checker/checker_controller.go +++ b/server/schedule/checker/checker_controller.go @@ -32,6 +32,8 @@ import ( // DefaultCacheSize is the default length of waiting list. const DefaultCacheSize = 1000 +var denyCheckersByLabelerCounter = schedule.LabelerEventCounter.WithLabelValues("checkers", "deny") + // Controller is used to manage all checkers. type Controller struct { cluster schedule.Cluster @@ -80,13 +82,6 @@ func (c *Controller) CheckRegion(region *core.RegionInfo) []*operator.Operator { return []*operator.Operator{op} } - if cl, ok := c.cluster.(interface{ GetRegionLabeler() *labeler.RegionLabeler }); ok { - l := cl.GetRegionLabeler() - if l.ScheduleDisabled(region) { - return nil - } - } - if op := c.splitChecker.Check(region); op != nil { return []*operator.Operator{op} } @@ -112,6 +107,15 @@ func (c *Controller) CheckRegion(region *core.RegionInfo) []*operator.Operator { c.regionWaitingList.Put(region.GetID(), nil) } } + // skip the joint checker, split checker and rule checker when region label is set to "schedule=deny". + // those checkers is help to make region health, it's necessary to skip them when region is set to deny. + if cl, ok := c.cluster.(interface{ GetRegionLabeler() *labeler.RegionLabeler }); ok { + l := cl.GetRegionLabeler() + if l.ScheduleDisabled(region) { + denyCheckersByLabelerCounter.Inc() + return nil + } + } if c.mergeChecker != nil { allowed := opController.OperatorCount(operator.OpMerge) < c.opts.GetMergeScheduleLimit() diff --git a/server/schedule/checker/rule_checker.go b/server/schedule/checker/rule_checker.go index 16cc3cf46b4..af3f28839ba 100644 --- a/server/schedule/checker/rule_checker.go +++ b/server/schedule/checker/rule_checker.go @@ -390,14 +390,15 @@ func (c *RuleChecker) fixOrphanPeers(region *core.RegionInfo, fit *placement.Reg if len(fit.OrphanPeers) == 0 { return nil, nil } + var pinDownPeer *metapb.Peer isUnhealthyPeer := func(id uint64) bool { - for _, pendingPeer := range region.GetPendingPeers() { - if pendingPeer.GetId() == id { + for _, downPeer := range region.GetDownPeers() { + if downPeer.Peer.GetId() == id { return true } } - for _, downPeer := range region.GetDownPeers() { - if downPeer.Peer.GetId() == id { + for _, pendingPeer := range region.GetPendingPeers() { + if pendingPeer.GetId() == id { return true } } @@ -414,24 +415,71 @@ loopFits: } for _, p := range rf.Peers { if isUnhealthyPeer(p.GetId()) { + // make sure is down peer. + if region.GetDownPeer(p.GetId()) != nil { + pinDownPeer = p + } hasUnhealthyFit = true break loopFits } } } + // If hasUnhealthyFit is false, it is safe to delete the OrphanPeer. if !hasUnhealthyFit { checkerCounter.WithLabelValues("rule_checker", "remove-orphan-peer").Inc() return operator.CreateRemovePeerOperator("remove-orphan-peer", c.cluster, 0, region, fit.OrphanPeers[0].StoreId) } + + // try to use orphan peers to replace unhealthy down peers. + for _, orphanPeer := range fit.OrphanPeers { + if pinDownPeer != nil { + // make sure the orphan peer is healthy. + if isUnhealthyPeer(orphanPeer.GetId()) { + continue + } + // no consider witness in this path. + if pinDownPeer.GetIsWitness() || orphanPeer.GetIsWitness() { + continue + } + // down peer's store should be down. + if !c.isStoreDownTimeHitMaxDownTime(pinDownPeer.GetStoreId()) { + continue + } + // check if down peer can replace with orphan peer. + dstStore := c.cluster.GetStore(orphanPeer.GetStoreId()) + if fit.Replace(pinDownPeer.GetStoreId(), dstStore) { + destRole := pinDownPeer.GetRole() + orphanPeerRole := orphanPeer.GetRole() + checkerCounter.WithLabelValues("rule_checker", "replace-orphan-peer").Inc() + switch { + case orphanPeerRole == metapb.PeerRole_Learner && destRole == metapb.PeerRole_Voter: + return operator.CreatePromoteLearnerOperatorAndRemovePeer("replace-down-peer-with-orphan-peer", c.cluster, region, orphanPeer, pinDownPeer) + case orphanPeerRole == metapb.PeerRole_Voter && destRole == metapb.PeerRole_Learner: + return operator.CreateDemoteLearnerOperatorAndRemovePeer("replace-down-peer-with-orphan-peer", c.cluster, region, orphanPeer, pinDownPeer) + default: + // destRole should not same with orphanPeerRole. if role is same, it fit with orphanPeer should be better than now. + // destRole never be leader, so we not consider it. + } + } + } + } + // If hasUnhealthyFit is true, try to remove unhealthy orphan peers only if number of OrphanPeers is >= 2. // Ref https://github.com/tikv/pd/issues/4045 if len(fit.OrphanPeers) >= 2 { + hasHealthPeer := false for _, orphanPeer := range fit.OrphanPeers { if isUnhealthyPeer(orphanPeer.GetId()) { checkerCounter.WithLabelValues("rule_checker", "remove-orphan-peer").Inc() return operator.CreateRemovePeerOperator("remove-orphan-peer", c.cluster, 0, region, orphanPeer.StoreId) } + if hasHealthPeer { + // there already exists a healthy orphan peer, so we can remove other orphan Peers. + checkerCounter.WithLabelValues("rule_checker", "remove-orphan-peer").Inc() + return operator.CreateRemovePeerOperator("remove-orphan-peer", c.cluster, 0, region, orphanPeer.StoreId) + } + hasHealthPeer = true } } checkerCounter.WithLabelValues("rule_checker", "skip-remove-orphan-peer").Inc() @@ -455,6 +503,10 @@ func (c *RuleChecker) isDownPeer(region *core.RegionInfo, peer *metapb.Peer) boo func (c *RuleChecker) isStoreDownTimeHitMaxDownTime(storeID uint64) bool { store := c.cluster.GetStore(storeID) + if store == nil { + log.Warn("lost the store, maybe you are recovering the PD cluster", zap.Uint64("store-id", storeID)) + return false + } return store.DownTime() >= c.cluster.GetOpts().GetMaxStoreDownTime() } diff --git a/server/schedule/checker/rule_checker_test.go b/server/schedule/checker/rule_checker_test.go index 8032eee8b01..4230cef537a 100644 --- a/server/schedule/checker/rule_checker_test.go +++ b/server/schedule/checker/rule_checker_test.go @@ -167,6 +167,39 @@ func (suite *ruleCheckerTestSuite) TestFixOrphanPeers() { suite.Equal(uint64(4), op.Step(0).(operator.RemovePeer).FromStore) } +func (suite *ruleCheckerTestSuite) TestFixToManyOrphanPeers() { + suite.cluster.AddLeaderStore(1, 1) + suite.cluster.AddLeaderStore(2, 1) + suite.cluster.AddLeaderStore(3, 1) + suite.cluster.AddLeaderStore(4, 1) + suite.cluster.AddLeaderStore(5, 1) + suite.cluster.AddLeaderStore(6, 1) + suite.cluster.AddRegionWithLearner(1, 1, []uint64{2, 3}, []uint64{4, 5, 6}) + // Case1: + // store 4, 5, 6 are orphan peers, and peer on store 3 is pending and down peer. + region := suite.cluster.GetRegion(1) + region = region.Clone( + core.WithDownPeers([]*pdpb.PeerStats{{Peer: region.GetStorePeer(3), DownSeconds: 60000}}), + core.WithPendingPeers([]*metapb.Peer{region.GetStorePeer(3)})) + suite.cluster.PutRegion(region) + op := suite.rc.Check(suite.cluster.GetRegion(1)) + suite.NotNil(op) + suite.Equal("remove-orphan-peer", op.Desc()) + suite.Equal(uint64(5), op.Step(0).(operator.RemovePeer).FromStore) + + // Case2: + // store 4, 5, 6 are orphan peers, and peer on store 3 is down peer. and peer on store 4, 5 are pending. + region = suite.cluster.GetRegion(1) + region = region.Clone( + core.WithDownPeers([]*pdpb.PeerStats{{Peer: region.GetStorePeer(3), DownSeconds: 60000}}), + core.WithPendingPeers([]*metapb.Peer{region.GetStorePeer(4), region.GetStorePeer(5)})) + suite.cluster.PutRegion(region) + op = suite.rc.Check(suite.cluster.GetRegion(1)) + suite.NotNil(op) + suite.Equal("remove-orphan-peer", op.Desc()) + suite.Equal(uint64(4), op.Step(0).(operator.RemovePeer).FromStore) +} + func (suite *ruleCheckerTestSuite) TestFixOrphanPeers2() { // check orphan peers can only be handled when all rules are satisfied. suite.cluster.AddLabelsStore(1, 1, map[string]string{"foo": "bar"}) @@ -311,7 +344,7 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness() { suite.cluster.AddLabelsStore(1, 1, map[string]string{"A": "leader"}) suite.cluster.AddLabelsStore(2, 1, map[string]string{"B": "follower"}) suite.cluster.AddLabelsStore(3, 1, map[string]string{"C": "voter"}) - suite.cluster.AddLeaderRegion(1, 1, 2) + suite.cluster.AddLeaderRegion(1, 1) suite.ruleManager.SetRule(&placement.Rule{ GroupID: "pd", @@ -336,24 +369,25 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness2() { suite.cluster.AddLabelsStore(1, 1, map[string]string{"A": "leader"}) suite.cluster.AddLabelsStore(2, 1, map[string]string{"B": "voter"}) suite.cluster.AddLabelsStore(3, 1, map[string]string{"C": "voter"}) - suite.cluster.AddLeaderRegion(1, 1, 2, 3) + suite.cluster.AddLabelsStore(4, 1, map[string]string{"D": "voter"}) + suite.cluster.AddLeaderRegion(1, 1, 2, 3, 4) suite.ruleManager.SetRule(&placement.Rule{ GroupID: "pd", ID: "r1", Index: 100, - Override: true, + Override: false, Role: placement.Voter, Count: 1, IsWitness: true, LabelConstraints: []placement.LabelConstraint{ - {Key: "C", Op: "in", Values: []string{"voter"}}, + {Key: "D", Op: "in", Values: []string{"voter"}}, }, }) op := suite.rc.Check(suite.cluster.GetRegion(1)) suite.NotNil(op) suite.Equal("fix-witness-peer", op.Desc()) - suite.Equal(uint64(3), op.Step(0).(operator.BecomeWitness).StoreID) + suite.Equal(uint64(4), op.Step(0).(operator.BecomeWitness).StoreID) } func (suite *ruleCheckerTestSuite) TestFixRuleWitness3() { @@ -365,7 +399,7 @@ func (suite *ruleCheckerTestSuite) TestFixRuleWitness3() { r := suite.cluster.GetRegion(1) // set peer3 to witness r = r.Clone(core.WithWitnesses([]*metapb.Peer{r.GetPeer(3)})) - + suite.cluster.PutRegion(r) op := suite.rc.Check(r) suite.NotNil(op) suite.Equal("fix-non-witness-peer", op.Desc()) @@ -650,6 +684,132 @@ func (suite *ruleCheckerTestSuite) TestPriorityFixOrphanPeer() { suite.Equal("remove-orphan-peer", op.Desc()) } +func (suite *ruleCheckerTestSuite) TestPriorityFitHealthWithDifferentRole1() { + suite.cluster.SetEnableUseJointConsensus(true) + suite.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host2"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host3"}) + suite.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4"}) + suite.cluster.AddRegionWithLearner(1, 1, []uint64{2, 3}, []uint64{4}) + r1 := suite.cluster.GetRegion(1) + suite.cluster.GetStore(3).GetMeta().LastHeartbeat = time.Now().Add(-31 * time.Minute).UnixNano() + + // set peer3 to pending and down + r1 = r1.Clone(core.WithPendingPeers([]*metapb.Peer{r1.GetPeer(3)})) + r1 = r1.Clone(core.WithDownPeers([]*pdpb.PeerStats{ + { + Peer: r1.GetStorePeer(3), + DownSeconds: 30000, + }, + })) + suite.cluster.PutRegion(r1) + + op := suite.rc.Check(suite.cluster.GetRegion(1)) + suite.Equal(uint64(3), op.Step(0).(operator.ChangePeerV2Enter).DemoteVoters[0].ToStore) + suite.Equal(uint64(4), op.Step(0).(operator.ChangePeerV2Enter).PromoteLearners[0].ToStore) + suite.Equal(uint64(3), op.Step(1).(operator.ChangePeerV2Leave).DemoteVoters[0].ToStore) + suite.Equal(uint64(4), op.Step(1).(operator.ChangePeerV2Leave).PromoteLearners[0].ToStore) + suite.Equal("replace-down-peer-with-orphan-peer", op.Desc()) + + // set peer3 only pending + r1 = r1.Clone(core.WithDownPeers(nil)) + suite.cluster.PutRegion(r1) + op = suite.rc.Check(suite.cluster.GetRegion(1)) + suite.Nil(op) +} + +func (suite *ruleCheckerTestSuite) TestPriorityFitHealthWithDifferentRole2() { + suite.cluster.SetEnableUseJointConsensus(true) + suite.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host2"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host3"}) + suite.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4"}) + suite.cluster.AddLabelsStore(5, 1, map[string]string{"host": "host5"}) + suite.cluster.AddLeaderRegion(1, 1, 2, 3, 4, 5) + r1 := suite.cluster.GetRegion(1) + + // set peer3 to pending and down, and peer 3 to learner, and store 3 is down + suite.cluster.GetStore(3).GetMeta().LastHeartbeat = time.Now().Add(-31 * time.Minute).UnixNano() + r1 = r1.Clone(core.WithLearners([]*metapb.Peer{r1.GetPeer(3)})) + r1 = r1.Clone( + core.WithPendingPeers([]*metapb.Peer{r1.GetPeer(3)}), + core.WithDownPeers([]*pdpb.PeerStats{ + { + Peer: r1.GetStorePeer(3), + DownSeconds: 30000, + }, + }), + ) + suite.cluster.PutRegion(r1) + + // default and test group => 3 voter + 1 learner + err := suite.ruleManager.SetRule(&placement.Rule{ + GroupID: "test", + ID: "10", + Role: placement.Learner, + Count: 1, + }) + suite.NoError(err) + + op := suite.rc.Check(suite.cluster.GetRegion(1)) + suite.Equal(uint64(5), op.Step(0).(operator.ChangePeerV2Enter).DemoteVoters[0].ToStore) + suite.Equal(uint64(3), op.Step(1).(operator.RemovePeer).FromStore) + suite.Equal("replace-down-peer-with-orphan-peer", op.Desc()) +} + +func (suite *ruleCheckerTestSuite) TestPriorityFitHealthPeersAndTiFlash() { + suite.cluster.SetEnableUseJointConsensus(true) + suite.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) + suite.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host2"}) + suite.cluster.AddLabelsStore(3, 1, map[string]string{"host": "host3"}) + suite.cluster.AddLabelsStore(4, 1, map[string]string{"host": "host4", "engine": "tiflash"}) + suite.cluster.AddRegionWithLearner(1, 1, []uint64{2, 3}, []uint64{4}) + rule := &placement.Rule{ + GroupID: "pd", + ID: "test", + Role: placement.Voter, + Count: 3, + } + rule2 := &placement.Rule{ + GroupID: "pd", + ID: "test2", + Role: placement.Learner, + Count: 1, + LabelConstraints: []placement.LabelConstraint{ + { + Key: "engine", + Op: placement.In, + Values: []string{"tiflash"}, + }, + }, + } + suite.ruleManager.SetRule(rule) + suite.ruleManager.SetRule(rule2) + suite.ruleManager.DeleteRule("pd", "default") + + r1 := suite.cluster.GetRegion(1) + // set peer3 to pending and down + r1 = r1.Clone(core.WithPendingPeers([]*metapb.Peer{r1.GetPeer(3)})) + r1 = r1.Clone(core.WithDownPeers([]*pdpb.PeerStats{ + { + Peer: r1.GetStorePeer(3), + DownSeconds: 30000, + }, + })) + suite.cluster.PutRegion(r1) + suite.cluster.GetStore(3).GetMeta().LastHeartbeat = time.Now().Add(-31 * time.Minute).UnixNano() + + op := suite.rc.Check(suite.cluster.GetRegion(1)) + // should not promote tiflash peer + suite.Nil(op) + + // scale a node, can replace the down peer + suite.cluster.AddLabelsStore(5, 1, map[string]string{"host": "host5"}) + op = suite.rc.Check(suite.cluster.GetRegion(1)) + suite.NotNil(op) + suite.Equal("replace-rule-down-peer", op.Desc()) +} + func (suite *ruleCheckerTestSuite) TestIssue3293() { suite.cluster.AddLabelsStore(1, 1, map[string]string{"host": "host1"}) suite.cluster.AddLabelsStore(2, 1, map[string]string{"host": "host1"}) diff --git a/server/schedule/metrics.go b/server/schedule/metrics.go index 89dabf8e74e..79e789075fe 100644 --- a/server/schedule/metrics.go +++ b/server/schedule/metrics.go @@ -83,6 +83,15 @@ var ( Name: "scatter_distribution", Help: "Counter of the distribution in scatter.", }, []string{"store", "is_leader", "engine"}) + + // LabelerEventCounter is a counter of the scheduler labeler system. + LabelerEventCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "pd", + Subsystem: "schedule", + Name: "labeler_event_counter", + Help: "Counter of the scheduler label.", + }, []string{"type", "event"}) ) func init() { @@ -94,4 +103,5 @@ func init() { prometheus.MustRegister(scatterCounter) prometheus.MustRegister(scatterDistributionCounter) prometheus.MustRegister(operatorSizeHist) + prometheus.MustRegister(LabelerEventCounter) } diff --git a/server/schedule/operator/builder.go b/server/schedule/operator/builder.go index 4dfb98324e5..93c4048e79d 100644 --- a/server/schedule/operator/builder.go +++ b/server/schedule/operator/builder.go @@ -403,7 +403,6 @@ func (b *Builder) Build(kind OpKind) (*Operator, error) { if brief, b.err = b.prepareBuild(); b.err != nil { return nil, b.err } - if b.useJointConsensus { kind, b.err = b.buildStepsWithJointConsensus(kind) } else { @@ -549,6 +548,10 @@ func (b *Builder) brief() string { return fmt.Sprintf("%s: store %s to %s", op, b.toRemove, b.toAdd) case len(b.toAdd) > 0: return fmt.Sprintf("add peer: store %s", b.toAdd) + case len(b.toRemove) > 0 && len(b.toPromote) > 0: + return fmt.Sprintf("promote peer: store %s, rm peer: store %s", b.toRemove, b.toPromote) + case len(b.toRemove) > 0 && len(b.toDemote) > 0: + return fmt.Sprintf("demote peer: store %s, rm peer: store %s", b.toDemote, b.toRemove) case len(b.toRemove) > 0: return fmt.Sprintf("rm peer: store %s", b.toRemove) case len(b.toPromote) > 0: diff --git a/server/schedule/operator/create_operator.go b/server/schedule/operator/create_operator.go index 206d839ab28..71c37f99c24 100644 --- a/server/schedule/operator/create_operator.go +++ b/server/schedule/operator/create_operator.go @@ -50,6 +50,25 @@ func CreatePromoteLearnerOperator(desc string, ci ClusterInformer, region *core. Build(0) } +// CreatePromoteLearnerOperatorAndRemovePeer creates an operator that promotes a learner and removes a peer. +func CreatePromoteLearnerOperatorAndRemovePeer(desc string, ci ClusterInformer, region *core.RegionInfo, toPromote *metapb.Peer, toRemove *metapb.Peer) (*Operator, error) { + return NewBuilder(desc, ci, region). + PromoteLearner(toPromote.GetStoreId()). + RemovePeer(toRemove.GetStoreId()). + Build(0) +} + +// CreateDemoteLearnerOperatorAndRemovePeer creates an operator that demotes a learner and removes a peer. +func CreateDemoteLearnerOperatorAndRemovePeer(desc string, ci ClusterInformer, region *core.RegionInfo, toDemote *metapb.Peer, toRemove *metapb.Peer) (*Operator, error) { + if !ci.GetOpts().IsUseJointConsensus() { + return nil, errors.Errorf("cannot build demote learner operator due to disabling using joint state") + } + return NewBuilder(desc, ci, region). + DemoteVoter(toDemote.GetStoreId()). + RemovePeer(toRemove.GetStoreId()). + Build(0) +} + // CreateRemovePeerOperator creates an operator that removes a peer from region. func CreateRemovePeerOperator(desc string, ci ClusterInformer, kind OpKind, region *core.RegionInfo, storeID uint64) (*Operator, error) { return NewBuilder(desc, ci, region). @@ -238,7 +257,7 @@ func CreateLeaveJointStateOperator(desc string, ci ClusterInformer, origin *core b := NewBuilder(desc, ci, origin, SkipOriginJointStateCheck, SkipPlacementRulesCheck) if b.err == nil && !core.IsInJointState(origin.GetPeers()...) { - b.err = errors.Errorf("cannot build leave joint state operator for region which is not in joint state") + b.err = errors.Errorf("cannot build leave joint state operator due to disabling using joint state") } if b.err != nil { diff --git a/server/schedule/operator/operator.go b/server/schedule/operator/operator.go index 3fae9d86eea..b38f4555d6c 100644 --- a/server/schedule/operator/operator.go +++ b/server/schedule/operator/operator.go @@ -87,7 +87,7 @@ func (o *Operator) Sync(other *Operator) { func (o *Operator) String() string { stepStrs := make([]string, len(o.steps)) for i := range o.steps { - stepStrs[i] = o.steps[i].String() + stepStrs[i] = fmt.Sprintf("%d:{%s}", i, o.steps[i].String()) } s := fmt.Sprintf("%s {%s} (kind:%s, region:%v(%v, %v), createAt:%s, startAt:%s, currentStep:%v, size:%d, steps:[%s],timeout:[%s])", o.desc, o.brief, o.kind, o.regionID, o.regionEpoch.GetVersion(), o.regionEpoch.GetConfVer(), o.GetCreateTime(), diff --git a/server/schedule/operator_controller.go b/server/schedule/operator_controller.go index c7b2cc99d34..4beb10b665c 100644 --- a/server/schedule/operator_controller.go +++ b/server/schedule/operator_controller.go @@ -30,7 +30,6 @@ import ( "github.com/tikv/pd/server/core" "github.com/tikv/pd/server/core/storelimit" "github.com/tikv/pd/server/schedule/hbstream" - "github.com/tikv/pd/server/schedule/labeler" "github.com/tikv/pd/server/schedule/operator" "github.com/tikv/pd/server/versioninfo" "go.uber.org/zap" @@ -423,14 +422,6 @@ func (oc *OperatorController) checkAddOperator(isPromoting bool, ops ...*operato if op.SchedulerKind() == operator.OpAdmin || op.IsLeaveJointStateOperator() { continue } - if cl, ok := oc.cluster.(interface{ GetRegionLabeler() *labeler.RegionLabeler }); ok { - l := cl.GetRegionLabeler() - if l.ScheduleDisabled(region) { - log.Debug("schedule disabled", zap.Uint64("region-id", op.RegionID())) - operatorWaitCounter.WithLabelValues(op.Desc(), "schedule-disabled").Inc() - return false - } - } } expired := false for _, op := range ops { diff --git a/server/schedule/operator_controller_test.go b/server/schedule/operator_controller_test.go index afbfdc22e1b..3e1121289d0 100644 --- a/server/schedule/operator_controller_test.go +++ b/server/schedule/operator_controller_test.go @@ -762,23 +762,8 @@ func (suite *operatorControllerTestSuite) TestAddWaitingOperator() { }) suite.True(labelerManager.ScheduleDisabled(source)) - // add operator should be failed since it is labeled with `schedule=deny`. - suite.Equal(0, controller.AddWaitingOperator(ops...)) - - // add operator should be success without `schedule=deny` - labelerManager.DeleteLabelRule("schedulelabel") - labelerManager.ScheduleDisabled(source) - suite.False(labelerManager.ScheduleDisabled(source)) - // now there is one operator being allowed to add, if it is a merge operator - // both of the pair are allowed - ops, err = operator.CreateMergeRegionOperator("merge-region", cluster, source, target, operator.OpMerge) - suite.NoError(err) - suite.Len(ops, 2) + // add operator should be success since it is not check in addWaitingOperator suite.Equal(2, controller.AddWaitingOperator(ops...)) - suite.Equal(0, controller.AddWaitingOperator(ops...)) - - // no space left, new operator can not be added. - suite.Equal(0, controller.AddWaitingOperator(addPeerOp(0))) } // issue #5279 diff --git a/server/schedule/placement/fit.go b/server/schedule/placement/fit.go index 82af3c17d11..454715cdc8e 100644 --- a/server/schedule/placement/fit.go +++ b/server/schedule/placement/fit.go @@ -56,6 +56,9 @@ func (f *RegionFit) IsCached() bool { // Replace return true if the replacement store is fit all constraints and isolation score is not less than the origin. func (f *RegionFit) Replace(srcStoreID uint64, dstStore *core.StoreInfo) bool { + if dstStore == nil { + return false + } fit := f.getRuleFitByStoreID(srcStoreID) // check the target store is fit all constraints. if fit == nil { diff --git a/server/schedule/waiting_operator.go b/server/schedule/waiting_operator.go index 513f8edea1a..9097e4fa45e 100644 --- a/server/schedule/waiting_operator.go +++ b/server/schedule/waiting_operator.go @@ -69,9 +69,7 @@ func (b *RandBuckets) ListOperator() []*operator.Operator { var ops []*operator.Operator for i := range b.buckets { bucket := b.buckets[i] - for j := range bucket.ops { - ops = append(ops, bucket.ops[j]) - } + ops = append(ops, bucket.ops...) } return ops } diff --git a/server/schedulers/hot_region.go b/server/schedulers/hot_region.go index 8e3881bece7..16cdce0eb81 100644 --- a/server/schedulers/hot_region.go +++ b/server/schedulers/hot_region.go @@ -38,6 +38,10 @@ import ( "go.uber.org/zap" ) +var ( + topnPosition = 10 +) + type baseHotScheduler struct { *BaseScheduler // store information, including pending Influence by resource type @@ -107,6 +111,9 @@ func (h *baseHotScheduler) prepareForBalance(rw statistics.RWType, cluster sched // It makes each dim rate or count become `weight` times to the origin value. func (h *baseHotScheduler) summaryPendingInfluence(cluster schedule.Cluster) { for id, p := range h.regionPendings { + if p.op == nil { + continue + } from := h.stInfos[p.from] to := h.stInfos[p.to] maxZombieDur := p.maxZombieDuration @@ -417,11 +424,13 @@ func isAvailableV1(s *solution) bool { type balanceSolver struct { schedule.Cluster - sche *hotScheduler - stLoadDetail map[uint64]*statistics.StoreLoadDetail - rwTy statistics.RWType - opTy opType - resourceTy resourceType + sche *hotScheduler + stLoadDetail map[uint64]*statistics.StoreLoadDetail + filteredHotPeers map[uint64][]*statistics.HotPeerStat // storeID -> hotPeers(filtered) + nthHotPeer map[uint64][]*statistics.HotPeerStat // storeID -> [dimLen]hotPeers + rwTy statistics.RWType + opTy opType + resourceTy resourceType cur *solution @@ -457,8 +466,20 @@ type balanceSolver struct { } func (bs *balanceSolver) init() { - // Init store load detail according to the type. + // Load the configuration items of the scheduler. bs.resourceTy = toResourceType(bs.rwTy, bs.opTy) + bs.maxPeerNum = bs.sche.conf.GetMaxPeerNumber() + bs.minHotDegree = bs.GetOpts().GetHotRegionCacheHitsThreshold() + bs.firstPriority, bs.secondPriority = prioritiesToDim(bs.getPriorities()) + bs.greatDecRatio, bs.minorDecRatio = bs.sche.conf.GetGreatDecRatio(), bs.sche.conf.GetMinorDecRatio() + switch bs.sche.conf.GetRankFormulaVersion() { + case "v1": + bs.initRankV1() + default: + bs.initRankV2() + } + + // Init store load detail according to the type. bs.stLoadDetail = bs.sche.stLoadInfos[bs.resourceTy] bs.maxSrc = &statistics.StoreLoad{Loads: make([]float64, statistics.DimLen)} @@ -471,10 +492,14 @@ func (bs *balanceSolver) init() { } maxCur := &statistics.StoreLoad{Loads: make([]float64, statistics.DimLen)} + bs.filteredHotPeers = make(map[uint64][]*statistics.HotPeerStat) + bs.nthHotPeer = make(map[uint64][]*statistics.HotPeerStat) for _, detail := range bs.stLoadDetail { bs.maxSrc = statistics.MaxLoad(bs.maxSrc, detail.LoadPred.Min()) bs.minDst = statistics.MinLoad(bs.minDst, detail.LoadPred.Max()) maxCur = statistics.MaxLoad(maxCur, &detail.LoadPred.Current) + bs.nthHotPeer[detail.GetID()] = make([]*statistics.HotPeerStat, statistics.DimLen) + bs.filteredHotPeers[detail.GetID()] = bs.filterHotPeers(detail) } rankStepRatios := []float64{ @@ -489,18 +514,6 @@ func (bs *balanceSolver) init() { Loads: stepLoads, Count: maxCur.Count * bs.sche.conf.GetCountRankStepRatio(), } - - bs.firstPriority, bs.secondPriority = prioritiesToDim(bs.getPriorities()) - bs.greatDecRatio, bs.minorDecRatio = bs.sche.conf.GetGreatDecRatio(), bs.sche.conf.GetMinorDecRatio() - bs.maxPeerNum = bs.sche.conf.GetMaxPeerNumber() - bs.minHotDegree = bs.GetOpts().GetHotRegionCacheHitsThreshold() - - switch bs.sche.conf.GetRankFormulaVersion() { - case "v1": - bs.initRankV1() - default: - bs.initRankV2() - } } func (bs *balanceSolver) initRankV1() { @@ -621,7 +634,7 @@ func (bs *balanceSolver) solve() []*operator.Operator { for _, srcStore := range bs.filterSrcStores() { bs.cur.srcStore = srcStore srcStoreID := srcStore.GetID() - for _, mainPeerStat := range bs.filterHotPeers(srcStore) { + for _, mainPeerStat := range bs.filteredHotPeers[srcStoreID] { if bs.cur.region = bs.getRegion(mainPeerStat, srcStoreID); bs.cur.region == nil { continue } else if bs.opTy == movePeer && bs.cur.region.GetApproximateSize() > bs.GetOpts().GetMaxMovableHotPeerSize() { @@ -637,7 +650,7 @@ func (bs *balanceSolver) solve() []*operator.Operator { if bs.needSearchRevertRegions() { schedulerCounter.WithLabelValues(bs.sche.GetName(), "search-revert-regions").Inc() dstStoreID := dstStore.GetID() - for _, revertPeerStat := range bs.filterHotPeers(bs.cur.dstStore) { + for _, revertPeerStat := range bs.filteredHotPeers[dstStoreID] { revertRegion := bs.getRegion(revertPeerStat, dstStoreID) if revertRegion == nil || revertRegion.GetID() == bs.cur.region.GetID() || !allowRevertRegion(revertRegion, srcStoreID) { @@ -760,7 +773,9 @@ func (bs *balanceSolver) checkSrcByPriorityAndTolerance(minLoad, expectLoad *sta // filterHotPeers filtered hot peers from statistics.HotPeerStat and deleted the peer if its region is in pending status. // The returned hotPeer count in controlled by `max-peer-number`. -func (bs *balanceSolver) filterHotPeers(storeLoad *statistics.StoreLoadDetail) (ret []*statistics.HotPeerStat) { +func (bs *balanceSolver) filterHotPeers(storeLoad *statistics.StoreLoadDetail) []*statistics.HotPeerStat { + hotPeers := storeLoad.HotPeers + ret := make([]*statistics.HotPeerStat, 0, len(hotPeers)) appendItem := func(item *statistics.HotPeerStat) { if _, ok := bs.sche.regionPendings[item.ID()]; !ok && !item.IsNeedCoolDownTransferLeader(bs.minHotDegree, bs.rwTy) { // no in pending operator and no need cool down after transfer leader @@ -768,36 +783,42 @@ func (bs *balanceSolver) filterHotPeers(storeLoad *statistics.StoreLoadDetail) ( } } - src := storeLoad.HotPeers - // At most MaxPeerNum peers, to prevent balanceSolver.solve() too slow. - if len(src) <= bs.maxPeerNum { - ret = make([]*statistics.HotPeerStat, 0, len(src)) - for _, peer := range src { - appendItem(peer) - } - } else { - union := bs.sortHotPeers(src) + var firstSort, secondSort []*statistics.HotPeerStat + if len(hotPeers) >= topnPosition || len(hotPeers) > bs.maxPeerNum { + firstSort = make([]*statistics.HotPeerStat, len(hotPeers)) + copy(firstSort, hotPeers) + sort.Slice(firstSort, func(i, j int) bool { + return firstSort[i].GetLoad(bs.firstPriority) > firstSort[j].GetLoad(bs.firstPriority) + }) + secondSort = make([]*statistics.HotPeerStat, len(hotPeers)) + copy(secondSort, hotPeers) + sort.Slice(secondSort, func(i, j int) bool { + return secondSort[i].GetLoad(bs.secondPriority) > secondSort[j].GetLoad(bs.secondPriority) + }) + } + if len(hotPeers) >= topnPosition { + storeID := storeLoad.GetID() + bs.nthHotPeer[storeID][bs.firstPriority] = firstSort[topnPosition-1] + bs.nthHotPeer[storeID][bs.secondPriority] = secondSort[topnPosition-1] + } + if len(hotPeers) > bs.maxPeerNum { + union := bs.sortHotPeers(firstSort, secondSort) ret = make([]*statistics.HotPeerStat, 0, len(union)) for peer := range union { appendItem(peer) } + return ret } - return + for _, peer := range hotPeers { + appendItem(peer) + } + return ret } -func (bs *balanceSolver) sortHotPeers(ret []*statistics.HotPeerStat) map[*statistics.HotPeerStat]struct{} { - firstSort := make([]*statistics.HotPeerStat, len(ret)) - copy(firstSort, ret) - sort.Slice(firstSort, func(i, j int) bool { - return firstSort[i].GetLoad(bs.firstPriority) > firstSort[j].GetLoad(bs.firstPriority) - }) - secondSort := make([]*statistics.HotPeerStat, len(ret)) - copy(secondSort, ret) - sort.Slice(secondSort, func(i, j int) bool { - return secondSort[i].GetLoad(bs.secondPriority) > secondSort[j].GetLoad(bs.secondPriority) - }) +func (bs *balanceSolver) sortHotPeers(firstSort, secondSort []*statistics.HotPeerStat) map[*statistics.HotPeerStat]struct{} { union := make(map[*statistics.HotPeerStat]struct{}, bs.maxPeerNum) + // At most MaxPeerNum peers, to prevent balanceSolver.solve() too slow. for len(union) < bs.maxPeerNum { for len(firstSort) > 0 { peer := firstSort[0] diff --git a/server/schedulers/hot_region_test.go b/server/schedulers/hot_region_test.go index cdff13c550c..32cab361ec2 100644 --- a/server/schedulers/hot_region_test.go +++ b/server/schedulers/hot_region_test.go @@ -1788,20 +1788,23 @@ func TestHotCacheSortHotPeer(t *testing.T) { }, }} + st := &statistics.StoreLoadDetail{ + HotPeers: hotPeers, + } leaderSolver.maxPeerNum = 1 - u := leaderSolver.sortHotPeers(hotPeers) + u := leaderSolver.filterHotPeers(st) checkSortResult(re, []uint64{1}, u) leaderSolver.maxPeerNum = 2 - u = leaderSolver.sortHotPeers(hotPeers) + u = leaderSolver.filterHotPeers(st) checkSortResult(re, []uint64{1, 2}, u) } -func checkSortResult(re *require.Assertions, regions []uint64, hotPeers map[*statistics.HotPeerStat]struct{}) { +func checkSortResult(re *require.Assertions, regions []uint64, hotPeers []*statistics.HotPeerStat) { re.Equal(len(hotPeers), len(regions)) for _, region := range regions { in := false - for hotPeer := range hotPeers { + for _, hotPeer := range hotPeers { if hotPeer.RegionID == region { in = true break diff --git a/server/schedulers/hot_region_v2.go b/server/schedulers/hot_region_v2.go index 49b7f7042b0..49de50c47cd 100644 --- a/server/schedulers/hot_region_v2.go +++ b/server/schedulers/hot_region_v2.go @@ -204,11 +204,17 @@ func (bs *balanceSolver) getScoreByPriorities(dim int, rs *rankV2Ratios) int { srcPendingRate, dstPendingRate := bs.cur.getPendingLoad(dim) peersRate := bs.cur.getPeersRateFromCache(dim) highRate, lowRate := srcRate, dstRate + topnHotPeer := bs.nthHotPeer[bs.cur.srcStore.GetID()][dim] reverse := false if srcRate < dstRate { highRate, lowRate = dstRate, srcRate peersRate = -peersRate reverse = true + topnHotPeer = bs.nthHotPeer[bs.cur.dstStore.GetID()][dim] + } + topnRate := math.MaxFloat64 + if topnHotPeer != nil { + topnRate = topnHotPeer.GetLoad(dim) } if highRate*rs.balancedCheckRatio <= lowRate { @@ -260,6 +266,7 @@ func (bs *balanceSolver) getScoreByPriorities(dim int, rs *rankV2Ratios) int { // maxBetterRate may be less than minBetterRate, in which case a positive fraction cannot be produced. minNotWorsenedRate = -bs.getMinRate(dim) minBetterRate = math.Min(minBalancedRate*rs.perceivedRatio, lowRate*rs.minHotRatio) + minBetterRate = math.Min(minBetterRate, topnRate) maxBetterRate = maxBalancedRate + (highRate-lowRate-minBetterRate-maxBalancedRate)*rs.perceivedRatio maxNotWorsenedRate = maxBalancedRate + (highRate-lowRate-minNotWorsenedRate-maxBalancedRate)*rs.perceivedRatio } diff --git a/server/schedulers/hot_region_v2_test.go b/server/schedulers/hot_region_v2_test.go index 811e179bf10..698264f4ab3 100644 --- a/server/schedulers/hot_region_v2_test.go +++ b/server/schedulers/hot_region_v2_test.go @@ -354,3 +354,124 @@ func TestSkipUniformStore(t *testing.T) { testutil.CheckTransferLeader(re, ops[0], operator.OpHotRegion, 3, 2) clearPendingInfluence(hb.(*hotScheduler)) } + +func TestHotReadRegionScheduleWithSmallHotRegion(t *testing.T) { + // This is a test that we can schedule small hot region, + // which is smaller than 20% of diff or 2% of low node. (#6645) + // 20% is from `firstPriorityPerceivedRatio`, 2% is from `firstPriorityMinHotRatio`. + // The byte of high node is 2000MB/s, the low node is 200MB/s. + // The query of high node is 2000qps, the low node is 200qps. + // There are all small hot regions in the cluster, which are smaller than 20% of diff or 2% of low node. + re := require.New(t) + emptyFunc := func(*mockcluster.Cluster, *hotScheduler) {} + highLoad, lowLoad := uint64(2000), uint64(200) + bigHotRegionByte := uint64(float64(lowLoad) * firstPriorityMinHotRatio * 10 * units.MiB * statistics.ReadReportInterval) + bigHotRegionQuery := uint64(float64(lowLoad) * firstPriorityMinHotRatio * 10 * statistics.ReadReportInterval) + + // Case1: Before #6827, we only use minHotRatio, so cannot schedule small hot region in this case. + // Because 10000 is larger than the length of hotRegions, so `filterHotPeers` will skip the topn calculation. + origin := topnPosition + topnPosition = 10000 + ops := checkHotReadRegionScheduleWithSmallHotRegion(re, highLoad, lowLoad, emptyFunc) + re.Empty(ops) + topnPosition = origin + + // Case2: After #6827, we use top10 as the threshold of minHotPeer. + ops = checkHotReadRegionScheduleWithSmallHotRegion(re, highLoad, lowLoad, emptyFunc) + re.Len(ops, 1) + ops = checkHotReadRegionScheduleWithSmallHotRegion(re, lowLoad, highLoad, emptyFunc) + re.Len(ops, 0) + + // Case3: If there is larger hot region, we will schedule it. + hotRegionID := uint64(100) + ops = checkHotReadRegionScheduleWithSmallHotRegion(re, highLoad, lowLoad, func(tc *mockcluster.Cluster, _ *hotScheduler) { + tc.AddRegionWithReadInfo(hotRegionID, 1, bigHotRegionByte, 0, bigHotRegionQuery, statistics.ReadReportInterval, []uint64{2, 3}) + }) + re.Len(ops, 1) + re.Equal(hotRegionID, ops[0].RegionID()) + + // Case4: If there is larger hot region, but it need to cool down, we will schedule small hot region. + ops = checkHotReadRegionScheduleWithSmallHotRegion(re, highLoad, lowLoad, func(tc *mockcluster.Cluster, _ *hotScheduler) { + // just transfer leader + tc.AddRegionWithReadInfo(hotRegionID, 2, bigHotRegionByte, 0, bigHotRegionQuery, statistics.ReadReportInterval, []uint64{1, 3}) + tc.AddRegionWithReadInfo(hotRegionID, 1, bigHotRegionByte, 0, bigHotRegionQuery, statistics.ReadReportInterval, []uint64{2, 3}) + }) + re.Len(ops, 1) + re.NotEqual(hotRegionID, ops[0].RegionID()) + + // Case5: If there is larger hot region, but it is pending, we will schedule small hot region. + ops = checkHotReadRegionScheduleWithSmallHotRegion(re, highLoad, lowLoad, func(tc *mockcluster.Cluster, hb *hotScheduler) { + tc.AddRegionWithReadInfo(hotRegionID, 1, bigHotRegionByte, 0, bigHotRegionQuery, statistics.ReadReportInterval, []uint64{2, 3}) + hb.regionPendings[hotRegionID] = &pendingInfluence{} + }) + re.Len(ops, 1) + re.NotEqual(hotRegionID, ops[0].RegionID()) + + // Case5: If there are more than topnPosition hot regions, but them need to cool down, + // we will schedule large hot region rather than small hot region, so there is no operator. + topnPosition = 2 + ops = checkHotReadRegionScheduleWithSmallHotRegion(re, highLoad, lowLoad, func(tc *mockcluster.Cluster, _ *hotScheduler) { + // just transfer leader + tc.AddRegionWithReadInfo(hotRegionID, 2, bigHotRegionByte, 0, bigHotRegionQuery, statistics.ReadReportInterval, []uint64{1, 3}) + tc.AddRegionWithReadInfo(hotRegionID, 1, bigHotRegionByte, 0, bigHotRegionQuery, statistics.ReadReportInterval, []uint64{2, 3}) + // just transfer leader + tc.AddRegionWithReadInfo(hotRegionID+1, 2, bigHotRegionByte, 0, bigHotRegionQuery, statistics.ReadReportInterval, []uint64{1, 3}) + tc.AddRegionWithReadInfo(hotRegionID+1, 1, bigHotRegionByte, 0, bigHotRegionQuery, statistics.ReadReportInterval, []uint64{2, 3}) + }) + re.Len(ops, 0) + topnPosition = origin + + // Case6: If there are more than topnPosition hot regions, but them are pending, + // we will schedule large hot region rather than small hot region, so there is no operator. + topnPosition = 2 + ops = checkHotReadRegionScheduleWithSmallHotRegion(re, highLoad, lowLoad, func(tc *mockcluster.Cluster, hb *hotScheduler) { + tc.AddRegionWithReadInfo(hotRegionID, 1, bigHotRegionByte, 0, bigHotRegionQuery, statistics.ReadReportInterval, []uint64{2, 3}) + hb.regionPendings[hotRegionID] = &pendingInfluence{} + tc.AddRegionWithReadInfo(hotRegionID+1, 1, bigHotRegionByte, 0, bigHotRegionQuery, statistics.ReadReportInterval, []uint64{2, 3}) + hb.regionPendings[hotRegionID+1] = &pendingInfluence{} + }) + re.Len(ops, 0) + topnPosition = origin +} + +func checkHotReadRegionScheduleWithSmallHotRegion(re *require.Assertions, highLoad, lowLoad uint64, + addOtherRegions func(*mockcluster.Cluster, *hotScheduler)) []*operator.Operator { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + statistics.Denoising = false + hb, err := schedule.CreateScheduler(statistics.Read.String(), schedule.NewOperatorController(ctx, nil, nil), storage.NewStorageWithMemoryBackend(), nil) + re.NoError(err) + hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) + hb.(*hotScheduler).conf.SetDstToleranceRatio(1) + hb.(*hotScheduler).conf.SetRankFormulaVersion("v2") + hb.(*hotScheduler).conf.ReadPriorities = []string{statistics.QueryPriority, statistics.BytePriority} + opt := config.NewTestOptions() + tc := mockcluster.NewCluster(ctx, opt) + tc.SetHotRegionCacheHitsThreshold(0) + tc.AddRegionStore(1, 40) + tc.AddRegionStore(2, 10) + tc.AddRegionStore(3, 10) + + tc.UpdateStorageReadQuery(1, highLoad*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageReadQuery(2, lowLoad*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageReadQuery(3, (highLoad+lowLoad)/2*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageReadStats(1, highLoad*units.MiB*statistics.StoreHeartBeatReportInterval, 0) + tc.UpdateStorageReadStats(2, lowLoad*units.MiB*statistics.StoreHeartBeatReportInterval, 0) + tc.UpdateStorageReadStats(3, (highLoad+lowLoad)/2*units.MiB*statistics.StoreHeartBeatReportInterval, 0) + + smallHotPeerQuery := float64(lowLoad) * firstPriorityMinHotRatio * 0.9 // it's a small hot region than the firstPriorityMinHotRatio + smallHotPeerByte := float64(lowLoad) * secondPriorityMinHotRatio * 0.9 * units.MiB // it's a small hot region than the secondPriorityMinHotRatio + regions := make([]testRegionInfo, 0) + for i := 10; i < 50; i++ { + regions = append(regions, testRegionInfo{uint64(i), []uint64{1, 2, 3}, smallHotPeerByte, 0, smallHotPeerQuery}) + if i < 20 { + regions = append(regions, testRegionInfo{uint64(i), []uint64{2, 1, 3}, smallHotPeerByte, 0, smallHotPeerQuery}) + regions = append(regions, testRegionInfo{uint64(i), []uint64{3, 1, 2}, smallHotPeerByte, 0, smallHotPeerQuery}) + } + } + addRegionInfo(tc, statistics.Read, regions) + tc.SetHotRegionCacheHitsThreshold(1) + addOtherRegions(tc, hb.(*hotScheduler)) + ops, _ := hb.Schedule(tc, false) + return ops +} diff --git a/server/server.go b/server/server.go index 736e36fefec..2ab40799b65 100644 --- a/server/server.go +++ b/server/server.go @@ -88,6 +88,9 @@ const ( idAllocLabel = "idalloc" recoveringMarkPath = "cluster/markers/snapshot-recovering" + + lostPDLeaderMaxTimeoutSecs = 10 + lostPDLeaderReElectionFactor = 10 ) // EtcdStartTimeout the timeout of the startup etcd. @@ -121,6 +124,8 @@ type Server struct { member *member.Member // etcd client client *clientv3.Client + // electionClient is used for leader election. + electionClient *clientv3.Client // http client httpClient *http.Client clusterID uint64 // pd cluster id. @@ -330,12 +335,18 @@ func (s *Server) startEtcd(ctx context.Context) error { lgc := zap.NewProductionConfig() lgc.Encoding = log.ZapEncodingName - client, err := clientv3.New(clientv3.Config{ + clientConfig := clientv3.Config{ Endpoints: endpoints, DialTimeout: etcdTimeout, TLS: tlsConfig, LogConfig: &lgc, - }) + } + client, err := clientv3.New(clientConfig) + if err != nil { + return errs.ErrNewEtcdClient.Wrap(err).GenWithStackByCause() + } + + s.electionClient, err = clientv3.New(clientConfig) if err != nil { return errs.ErrNewEtcdClient.Wrap(err).GenWithStackByCause() } @@ -357,17 +368,21 @@ func (s *Server) startEtcd(ctx context.Context) error { } } s.client = client - s.httpClient = &http.Client{ - Transport: &http.Transport{ - DisableKeepAlives: true, - TLSClientConfig: tlsConfig, - }, + // FIXME: Currently, there is no timeout set for certain requests, such as GetRegions, + // which may take a significant amount of time. However, it might be necessary to + // define an appropriate timeout in the future. + httpCli := &http.Client{} + if tlsConfig != nil { + transport := http.DefaultTransport.(*http.Transport).Clone() + transport.TLSClientConfig = tlsConfig + httpCli.Transport = transport } + s.httpClient = httpCli failpoint.Inject("memberNil", func() { time.Sleep(1500 * time.Millisecond) }) - s.member = member.NewMember(etcd, client, etcdServerID) + s.member = member.NewMember(etcd, s.electionClient, etcdServerID) return nil } @@ -494,6 +509,11 @@ func (s *Server) Close() { log.Error("close etcd client meet error", errs.ZapError(errs.ErrCloseEtcdClient, err)) } } + if s.electionClient != nil { + if err := s.electionClient.Close(); err != nil { + log.Error("close election client meet error", errs.ZapError(errs.ErrCloseEtcdClient, err)) + } + } if s.httpClient != nil { s.httpClient.CloseIdleConnections() @@ -1388,6 +1408,14 @@ func (s *Server) leaderLoop() { } leader, rev, checkAgain := s.member.CheckLeader() + // add failpoint to test leader check go to stuck. + failpoint.Inject("leaderLoopCheckAgain", func(val failpoint.Value) { + memberString := val.(string) + memberID, _ := strconv.ParseUint(memberString, 10, 64) + if s.member.ID() == memberID { + checkAgain = true + } + }) if checkAgain { continue } @@ -1408,6 +1436,25 @@ func (s *Server) leaderLoop() { // To make sure the etcd leader and PD leader are on the same server. etcdLeader := s.member.GetEtcdLeader() if etcdLeader != s.member.ID() { + if s.member.GetLeader() == nil { + lastUpdated := s.member.GetLastLeaderUpdatedTime() + // use random timeout to avoid leader campaigning storm. + randomTimeout := time.Duration(rand.Intn(int(lostPDLeaderMaxTimeoutSecs)))*time.Second + lostPDLeaderMaxTimeoutSecs*time.Second + lostPDLeaderReElectionFactor*s.cfg.ElectionInterval.Duration + // add failpoint to test the campaign leader logic. + failpoint.Inject("timeoutWaitPDLeader", func() { + log.Info("timeoutWaitPDLeader is injected, skip wait other etcd leader be etcd leader") + randomTimeout = time.Duration(rand.Intn(10))*time.Millisecond + 100*time.Millisecond + }) + if lastUpdated.Add(randomTimeout).Before(time.Now()) && !lastUpdated.IsZero() && etcdLeader != 0 { + log.Info("the pd leader is lost for a long time, try to re-campaign a pd leader with resign etcd leader", + zap.Duration("timeout", randomTimeout), + zap.Time("last-updated", lastUpdated), + zap.String("current-leader-member-id", types.ID(etcdLeader).String()), + zap.String("transferee-member-id", types.ID(s.member.ID()).String()), + ) + s.member.MoveEtcdLeader(s.ctx, etcdLeader, s.member.ID()) + } + } log.Info("skip campaigning of pd leader and check later", zap.String("server-name", s.Name()), zap.Uint64("etcd-leader-id", etcdLeader), @@ -1516,6 +1563,16 @@ func (s *Server) campaignLeader() { log.Info("no longer a leader because lease has expired, pd leader will step down") return } + // add failpoint to test exit leader, failpoint judge the member is the give value, then break + failpoint.Inject("exitCampaignLeader", func(val failpoint.Value) { + memberString := val.(string) + memberID, _ := strconv.ParseUint(memberString, 10, 64) + if s.member.ID() == memberID { + log.Info("exit PD leader") + failpoint.Return() + } + }) + etcdLeader := s.member.GetEtcdLeader() if etcdLeader != s.member.ID() { log.Info("etcd leader changed, resigns pd leadership", zap.String("old-pd-leader-name", s.Name())) @@ -1708,3 +1765,9 @@ func (s *Server) SetExternalTS(externalTS uint64) error { s.GetRaftCluster().SetExternalTS(externalTS) return nil } + +// SetClient sets the etcd client. +// Notes: it is only used for test. +func (s *Server) SetClient(client *clientv3.Client) { + s.client = client +} diff --git a/server/statistics/hot_peer_cache.go b/server/statistics/hot_peer_cache.go index 6cf46e8dcb1..61147be7363 100644 --- a/server/statistics/hot_peer_cache.go +++ b/server/statistics/hot_peer_cache.go @@ -206,16 +206,17 @@ func (f *hotPeerCache) checkPeerFlow(peer *core.PeerInfo, region *core.RegionInf } } + peers := region.GetPeers() newItem := &HotPeerStat{ StoreID: storeID, RegionID: regionID, Loads: f.kind.GetLoadRatesFromPeer(peer), isLeader: region.GetLeader().GetStoreId() == storeID, actionType: Update, - stores: make([]uint64, len(region.GetPeers())), + stores: make([]uint64, len(peers)), } - for _, peer := range region.GetPeers() { - newItem.stores = append(newItem.stores, peer.GetStoreId()) + for i, peer := range peers { + newItem.stores[i] = peer.GetStoreId() } if oldItem == nil { diff --git a/server/statistics/hot_regions_stat.go b/server/statistics/hot_regions_stat.go index 92a5181ab8a..d30a153492b 100644 --- a/server/statistics/hot_regions_stat.go +++ b/server/statistics/hot_regions_stat.go @@ -14,11 +14,7 @@ package statistics -import ( - "time" - - "github.com/tikv/pd/server/core" -) +import "time" // HotPeersStat records all hot regions statistics type HotPeersStat struct { @@ -44,14 +40,5 @@ type HotPeerStatShow struct { KeyRate float64 `json:"flow_keys"` QueryRate float64 `json:"flow_query"` AntiCount int `json:"anti_count"` - LastUpdateTime time.Time `json:"last_update_time"` -} - -// UpdateHotPeerStatShow updates the region information, such as `IsLearner` and `LastUpdateTime`. -func (h *HotPeerStatShow) UpdateHotPeerStatShow(region *core.RegionInfo) { - if region == nil { - return - } - h.IsLearner = core.IsLearner(region.GetPeer(h.StoreID)) - h.LastUpdateTime = time.Unix(int64(region.GetInterval().GetEndTimestamp()), 0) + LastUpdateTime time.Time `json:"last_update_time,omitempty"` } diff --git a/server/statistics/store_collection.go b/server/statistics/store_collection.go index 04138f61c58..a2348a88563 100644 --- a/server/statistics/store_collection.go +++ b/server/statistics/store_collection.go @@ -43,6 +43,7 @@ type storeStatistics struct { StorageCapacity uint64 RegionCount int LeaderCount int + LearnerCount int WitnessCount int LabelCounter map[string]int Preparing int @@ -119,6 +120,7 @@ func (s *storeStatistics) Observe(store *core.StoreInfo, stats *StoresStats) { storeStatusGauge.WithLabelValues(storeAddress, id, "leader_size").Set(float64(store.GetLeaderSize())) storeStatusGauge.WithLabelValues(storeAddress, id, "leader_count").Set(float64(store.GetLeaderCount())) storeStatusGauge.WithLabelValues(storeAddress, id, "witness_count").Set(float64(store.GetWitnessCount())) + storeStatusGauge.WithLabelValues(storeAddress, id, "learner_count").Set(float64(store.GetLearnerCount())) storeStatusGauge.WithLabelValues(storeAddress, id, "store_available").Set(float64(store.GetAvailable())) storeStatusGauge.WithLabelValues(storeAddress, id, "store_used").Set(float64(store.GetUsedSize())) storeStatusGauge.WithLabelValues(storeAddress, id, "store_capacity").Set(float64(store.GetCapacity())) @@ -170,6 +172,7 @@ func (s *storeStatistics) Collect() { metrics["region_count"] = float64(s.RegionCount) metrics["leader_count"] = float64(s.LeaderCount) metrics["witness_count"] = float64(s.WitnessCount) + metrics["learner_count"] = float64(s.LearnerCount) metrics["storage_size"] = float64(s.StorageSize) metrics["storage_capacity"] = float64(s.StorageCapacity) @@ -241,6 +244,7 @@ func (s *storeStatistics) resetStoreStatistics(storeAddress string, id string) { "leader_size", "leader_count", "witness_count", + "learner_count", "store_available", "store_used", "store_capacity", diff --git a/server/tso/allocator_manager.go b/server/tso/allocator_manager.go index 8ce1b898287..94116c16577 100644 --- a/server/tso/allocator_manager.go +++ b/server/tso/allocator_manager.go @@ -31,6 +31,7 @@ import ( "github.com/tikv/pd/pkg/errs" "github.com/tikv/pd/pkg/etcdutil" "github.com/tikv/pd/pkg/grpcutil" + "github.com/tikv/pd/pkg/logutil" "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/pkg/syncutil" "github.com/tikv/pd/server/config" @@ -361,6 +362,7 @@ func (am *AllocatorManager) getLocalTSOAllocatorPath() string { // similar logic with leaderLoop in server/server.go func (am *AllocatorManager) allocatorLeaderLoop(ctx context.Context, allocator *LocalTSOAllocator) { + defer logutil.LogPanic() defer log.Info("server is closed, return local tso allocator leader loop", zap.String("dc-location", allocator.GetDCLocation()), zap.String("local-tso-allocator-name", am.member.Member().Name)) @@ -612,6 +614,7 @@ func (am *AllocatorManager) allocatorUpdater() { // updateAllocator is used to update the allocator in the group. func (am *AllocatorManager) updateAllocator(ag *allocatorGroup) { + defer logutil.LogPanic() defer am.wg.Done() select { case <-ag.ctx.Done(): @@ -662,6 +665,7 @@ func (am *AllocatorManager) allocatorPatroller(serverCtx context.Context) { // ClusterDCLocationChecker collects all dc-locations of a cluster, computes some related info // and stores them into the DCLocationInfo, then finally writes them into am.mu.clusterDCLocations. func (am *AllocatorManager) ClusterDCLocationChecker() { + defer logutil.LogPanic() // Wait for the PD leader to be elected out. if am.member.GetLeader() == nil { return diff --git a/server/tso/global_allocator.go b/server/tso/global_allocator.go index 8e035808317..3515ad9b611 100644 --- a/server/tso/global_allocator.go +++ b/server/tso/global_allocator.go @@ -26,6 +26,7 @@ import ( "github.com/pingcap/kvproto/pkg/pdpb" "github.com/pingcap/log" "github.com/tikv/pd/pkg/errs" + "github.com/tikv/pd/pkg/logutil" "github.com/tikv/pd/pkg/slice" "github.com/tikv/pd/pkg/tsoutil" "github.com/tikv/pd/pkg/typeutil" @@ -338,6 +339,7 @@ func (gta *GlobalTSOAllocator) SyncMaxTS( // Send SyncMaxTSRequest to all allocator leaders concurrently. wg.Add(1) go func(ctx context.Context, conn *grpc.ClientConn, respCh chan<- *syncResp) { + defer logutil.LogPanic() defer wg.Done() syncMaxTSResp := &syncResp{} syncCtx, cancel := context.WithTimeout(ctx, rpcTimeout) diff --git a/tests/client/go.mod b/tests/client/go.mod index 70dd76c61bf..c1e1b3c5788 100644 --- a/tests/client/go.mod +++ b/tests/client/go.mod @@ -5,7 +5,7 @@ go 1.19 require ( github.com/gogo/protobuf v1.3.2 // indirect github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 - github.com/pingcap/kvproto v0.0.0-20221104101942-09d82b914df1 + github.com/pingcap/kvproto v0.0.0-20230726063044-73d6d7f3756b github.com/stretchr/testify v1.7.1 github.com/tikv/pd v0.0.0-00010101000000-000000000000 github.com/tikv/pd/client v0.0.0-00010101000000-000000000000 @@ -52,7 +52,7 @@ require ( github.com/go-playground/universal-translator v0.17.0 // indirect github.com/go-playground/validator/v10 v10.4.1 // indirect github.com/go-resty/resty/v2 v2.6.0 // indirect - github.com/go-sql-driver/mysql v1.6.0 // indirect + github.com/go-sql-driver/mysql v1.7.0 // indirect github.com/goccy/go-graphviz v0.0.9 // indirect github.com/golang-jwt/jwt v3.2.1+incompatible // indirect github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect @@ -69,7 +69,7 @@ require ( github.com/gtank/cryptopasta v0.0.0-20170601214702-1f550f6f2f69 // indirect github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d // indirect github.com/jinzhu/inflection v1.0.0 // indirect - github.com/jinzhu/now v1.1.2 // indirect + github.com/jinzhu/now v1.1.5 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/joho/godotenv v1.4.0 // indirect github.com/jonboulle/clockwork v0.2.2 // indirect @@ -80,7 +80,7 @@ require ( github.com/leodido/go-urn v1.2.0 // indirect github.com/mailru/easyjson v0.7.6 // indirect github.com/mattn/go-isatty v0.0.12 // indirect - github.com/mattn/go-sqlite3 v1.14.9 // indirect + github.com/mattn/go-sqlite3 v1.14.15 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect github.com/minio/sio v0.3.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect @@ -94,7 +94,7 @@ require ( github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c // indirect github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 // indirect github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d // indirect - github.com/pingcap/tidb-dashboard v0.0.0-20221201151320-ea3ee6971f2e // indirect + github.com/pingcap/tidb-dashboard v0.0.0-20230816095313-e6414634f8fc // indirect github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect @@ -103,6 +103,7 @@ require ( github.com/prometheus/common v0.26.0 // indirect github.com/prometheus/procfs v0.6.0 // indirect github.com/rs/cors v1.7.0 // indirect + github.com/samber/lo v1.37.0 // indirect github.com/sasha-s/go-deadlock v0.2.0 // indirect github.com/shirou/gopsutil v3.21.3+incompatible // indirect github.com/shurcooL/httpgzip v0.0.0-20190720172056-320755c1c1b0 // indirect @@ -114,7 +115,6 @@ require ( github.com/swaggo/http-swagger v0.0.0-20200308142732-58ac5e232fba // indirect github.com/swaggo/swag v1.8.3 // indirect github.com/syndtr/goleveldb v1.0.1-0.20190318030020-c3a204f8e965 // indirect - github.com/thoas/go-funk v0.8.0 // indirect github.com/tklauser/go-sysconf v0.3.4 // indirect github.com/tklauser/numcpus v0.2.1 // indirect github.com/tmc/grpc-websocket-proxy v0.0.0-20200427203606-3cfed13b9966 // indirect @@ -130,7 +130,7 @@ require ( go.uber.org/fx v1.12.0 // indirect go.uber.org/multierr v1.7.0 // indirect go.uber.org/zap v1.20.0 // indirect - golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4 // indirect + golang.org/x/crypto v0.0.0-20221005025214-4161e89ecf1b // indirect golang.org/x/exp v0.0.0-20220321173239-a90fa8a75705 // indirect golang.org/x/image v0.0.0-20200119044424-58c23975cae1 // indirect golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect @@ -146,9 +146,10 @@ require ( gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect - gorm.io/driver/mysql v1.0.6 // indirect - gorm.io/driver/sqlite v1.1.4 // indirect - gorm.io/gorm v1.21.9 // indirect + gorm.io/datatypes v1.1.0 // indirect + gorm.io/driver/mysql v1.4.5 // indirect + gorm.io/driver/sqlite v1.4.3 // indirect + gorm.io/gorm v1.24.3 // indirect moul.io/zapgorm2 v1.1.0 // indirect sigs.k8s.io/yaml v1.1.0 // indirect ) diff --git a/tests/client/go.sum b/tests/client/go.sum index e79e0691045..f7867b39e3c 100644 --- a/tests/client/go.sum +++ b/tests/client/go.sum @@ -136,8 +136,8 @@ github.com/go-playground/validator/v10 v10.4.1/go.mod h1:nlOn6nFhuKACm19sB/8EGNn github.com/go-resty/resty/v2 v2.6.0 h1:joIR5PNLM2EFqqESUjCMGXrWmXNHEU9CEiK813oKYS4= github.com/go-resty/resty/v2 v2.6.0/go.mod h1:PwvJS6hvaPkjtjNg9ph+VrSD92bi5Zq73w/BIH7cC3Q= github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= -github.com/go-sql-driver/mysql v1.6.0 h1:BCTh4TKNUYmOmMUcQ3IipzF5prigylS7XXjEkfCHuOE= -github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= +github.com/go-sql-driver/mysql v1.7.0 h1:ueSltNNllEqE3qcWBTD0iQd3IpL/6U+mJxLkazJ7YPc= +github.com/go-sql-driver/mysql v1.7.0/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/goccy/go-graphviz v0.0.9 h1:s/FMMJ1Joj6La3S5ApO3Jk2cwM4LpXECC2muFx3IPQQ= github.com/goccy/go-graphviz v0.0.9/go.mod h1:wXVsXxmyMQU6TN3zGRttjNn3h+iCAS7xQFC6TlNvLhk= @@ -149,6 +149,8 @@ github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang-jwt/jwt v3.2.1+incompatible h1:73Z+4BJcrTC+KczS6WvTPvRGOp1WmfEP4Q1lOd9Z/+c= github.com/golang-jwt/jwt v3.2.1+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I= +github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 h1:au07oEsX2xN0ktxqI+Sida1w446QrXBRJ0nee3SNZlA= +github.com/golang-sql/sqlexp v0.1.0 h1:ZCD6MBpcuOVfGVqsEmY5/4FtYiKz6tSyUv9LPEDei6A= github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g= github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58= @@ -182,8 +184,8 @@ github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/pprof v0.0.0-20211122183932-1daafda22083 h1:c8EUapQFi+kjzedr4c6WqbwMdmB95+oDBWZ5XFHFYxY= github.com/google/pprof v0.0.0-20211122183932-1daafda22083/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg= @@ -209,13 +211,22 @@ github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpO github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d h1:uGg2frlt3IcT7kbV6LEp5ONv4vmoO2FW4qSO+my/aoM= github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w= github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= +github.com/jackc/chunkreader/v2 v2.0.1 h1:i+RDz65UE+mmpjTfyz0MoVTnzeYxroil2G82ki7MGG8= +github.com/jackc/pgconn v1.13.0 h1:3L1XMNV2Zvca/8BYhzcRFS70Lr0WlDg16Di6SFGAbys= +github.com/jackc/pgio v1.0.0 h1:g12B9UwVnzGhueNavwioyEEpAmqMe1E/BN9ES+8ovkE= +github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= +github.com/jackc/pgproto3/v2 v2.3.1 h1:nwj7qwf0S+Q7ISFfBndqeLwSwxs+4DPsbRFjECT1Y4Y= +github.com/jackc/pgservicefile v0.0.0-20200714003250-2b9c44734f2b h1:C8S2+VttkHFdOOCXJe+YGfa4vHYwlt4Zx+IVXQ97jYg= +github.com/jackc/pgtype v1.12.0 h1:Dlq8Qvcch7kiehm8wPGIW0W3KsCCHJnRacKW0UM8n5w= +github.com/jackc/pgx/v4 v4.17.2 h1:0Ut0rpeKwvIVbMQ1KbMBU4h6wxehBI535LK6Flheh8E= github.com/jarcoal/httpmock v1.0.8 h1:8kI16SoO6LQKgPE7PvQuV+YuD/inwHd7fOOe2zMbo4k= github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E= github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= -github.com/jinzhu/now v1.1.1/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= -github.com/jinzhu/now v1.1.2 h1:eVKgfIdy9b6zbWBMgFpfDPoAMifwSZagU9HmEU6zgiI= github.com/jinzhu/now v1.1.2/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= +github.com/jinzhu/now v1.1.4/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= +github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= +github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= @@ -268,11 +279,11 @@ github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hd github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= -github.com/mattn/go-sqlite3 v1.14.5/go.mod h1:WVKg1VTActs4Qso6iwGbiFih2UIHo0ENGwNd0Lj+XmI= -github.com/mattn/go-sqlite3 v1.14.9 h1:10HX2Td0ocZpYEjhilsuo6WWtUqttj2Kb0KtD86/KYA= -github.com/mattn/go-sqlite3 v1.14.9/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= +github.com/mattn/go-sqlite3 v1.14.15 h1:vfoHhTN1af61xCRSWzFIWzx2YskyMTwHLrExkBOjvxI= +github.com/mattn/go-sqlite3 v1.14.15/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= +github.com/microsoft/go-mssqldb v0.17.0 h1:Fto83dMZPnYv1Zwx5vHHxpNraeEaUlQ/hhHLgZiaenE= github.com/minio/sio v0.3.0 h1:syEFBewzOMOYVzSTFpp1MqpSZk8rUNbz8VIIc+PNzus= github.com/minio/sio v0.3.0/go.mod h1:8b0yPp2avGThviy/+OCJBI6OMpvxoUuiLvE6F1lebhw= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -318,17 +329,16 @@ github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c/go.mod h1:X2r9ue github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 h1:C3N3itkduZXDZFh4N3vQ5HEtld3S+Y+StULhWVvumU0= github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew= github.com/pingcap/kvproto v0.0.0-20191211054548-3c6b38ea5107/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w= -github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= -github.com/pingcap/kvproto v0.0.0-20221104101942-09d82b914df1 h1:iJXUNA0LoOYuuMJ6U0tJGg2gCo/8xGZVhKLvuUWNjzw= -github.com/pingcap/kvproto v0.0.0-20221104101942-09d82b914df1/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/pingcap/kvproto v0.0.0-20230726063044-73d6d7f3756b h1:dLoYgMFgzUaS6fAAPdjA7oGDM0LdCIm+qhgb3PzrDps= +github.com/pingcap/kvproto v0.0.0-20230726063044-73d6d7f3756b/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9/go.mod h1:4rbK1p9ILyIfb6hU7OG2CiWSqMXnp3JMbiaVJ6mvoY8= github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 h1:HR/ylkkLmGdSSDaD8IDP+SZrdhV1Kibl9KrHxJ9eciw= github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d h1:k3/APKZjXOyJrFy8VyYwRlZhMelpD3qBLJNsw3bPl/g= github.com/pingcap/sysutil v0.0.0-20211208032423-041a72e5860d/go.mod h1:7j18ezaWTao2LHOyMlsc2Dg1vW+mDY9dEbPzVyOlaeM= -github.com/pingcap/tidb-dashboard v0.0.0-20221201151320-ea3ee6971f2e h1:FUdoQ6zWktVjIWLokNeulEcqIzGn6TnoOjdS9bQcFUo= -github.com/pingcap/tidb-dashboard v0.0.0-20221201151320-ea3ee6971f2e/go.mod h1:NNF1CfnM5TqrLNfzfSal723h2fVQlieyVBBdQBzfPTg= +github.com/pingcap/tidb-dashboard v0.0.0-20230816095313-e6414634f8fc h1:nw4g5lsSFBSPTU6eUOEgR3qTq2Qr0fr8LLatyM1YM6w= +github.com/pingcap/tidb-dashboard v0.0.0-20230816095313-e6414634f8fc/go.mod h1:OUzFMMVjR1GKlf4LWLqza9QNKjCrYJ7stVn/3PN0djM= github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e h1:FBaTXU8C3xgt/drM58VHxojHo/QoG1oPsgWTGvaSpO4= github.com/pingcap/tipb v0.0.0-20220718022156-3e2483c20a9e/go.mod h1:A7mrd7WHBl1o63LE2bIBGEJMTNWXqhgmYiOvMLxozfs= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -361,6 +371,8 @@ github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6L github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rs/cors v1.7.0 h1:+88SsELBHx5r+hZ8TCkggzSstaWNbDvThkVK8H6f9ik= github.com/rs/cors v1.7.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU= +github.com/samber/lo v1.37.0 h1:XjVcB8g6tgUp8rsPsJ2CvhClfImrpL04YpQHXeHPhRw= +github.com/samber/lo v1.37.0/go.mod h1:9vaz2O4o8oOnK23pd2TrXufcbdbJIa3b6cstBWKpopA= github.com/sasha-s/go-deadlock v0.2.0 h1:lMqc+fUb7RrFS3gQLtoQsJ7/6TV/pAIFvBsqX73DK8Y= github.com/sasha-s/go-deadlock v0.2.0/go.mod h1:StQn567HiB1fF2yJ44N9au7wOhrPS3iZqiDbRupzT10= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= @@ -401,8 +413,6 @@ github.com/swaggo/swag v1.8.3 h1:3pZSSCQ//gAH88lfmxM3Cd1+JCsxV8Md6f36b9hrZ5s= github.com/swaggo/swag v1.8.3/go.mod h1:jMLeXOOmYyjk8PvHTsXBdrubsNd9gUJTTCzL5iBnseg= github.com/syndtr/goleveldb v1.0.1-0.20190318030020-c3a204f8e965 h1:1oFLiOyVl+W7bnBzGhf7BbIv9loSFQcieWWYIjLqcAw= github.com/syndtr/goleveldb v1.0.1-0.20190318030020-c3a204f8e965/go.mod h1:9OrXJhf154huy1nPWmuSrkgjPUtUNhA+Zmy+6AESzuA= -github.com/thoas/go-funk v0.8.0 h1:JP9tKSvnpFVclYgDM0Is7FD9M4fhPvqA0s0BsXmzSRQ= -github.com/thoas/go-funk v0.8.0/go.mod h1:+IWnUfUmFO1+WVYQWQtIJHeRRdaIyyYglZN7xzUPe4Q= github.com/tidwall/gjson v1.6.0/go.mod h1:P256ACg0Mn+j1RXIDXoss50DeIABTYK1PULOJHhxOls= github.com/tidwall/gjson v1.9.3 h1:hqzS9wAHMO+KVBBkLxYdkEeeFHuqr95GfClRLKlgK0E= github.com/tidwall/match v1.0.1 h1:PnKP62LPNxHKTwvHHZZzdOAOCtsJTjo6dZLCwpKm5xc= @@ -481,8 +491,9 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200204104054-c9f3fb736b72/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4 h1:kUhD7nTDoI3fVd9G4ORWrbV5NY0liEs/Jg2pv5f+bBA= golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.0.0-20221005025214-4161e89ecf1b h1:huxqepDufQpLLIRXiVkTvnxrzJlpwmIWAObmcCcUFr0= +golang.org/x/crypto v0.0.0-20221005025214-4161e89ecf1b/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20220321173239-a90fa8a75705 h1:ba9YlqfDGTTQ5aZ2fwOoQ1hf32QySyQkR6ODGDzHlnE= golang.org/x/exp v0.0.0-20220321173239-a90fa8a75705/go.mod h1:lgLbSvA5ygNOMpwM/9anMpWVlVJ7Z+cHWq/eFuinpGE= @@ -612,7 +623,6 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0 h1:/wp5JvzpHIxhs/dumFmF7BXTf3Z+dd4uXta4kVyO508= @@ -669,13 +679,19 @@ gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gorm.io/driver/mysql v1.0.6 h1:mA0XRPjIKi4bkE9nv+NKs6qj6QWOchqUSdWOcpd3x1E= -gorm.io/driver/mysql v1.0.6/go.mod h1:KdrTanmfLPPyAOeYGyG+UpDys7/7eeWT1zCq+oekYnU= -gorm.io/driver/sqlite v1.1.4 h1:PDzwYE+sI6De2+mxAneV9Xs11+ZyKV6oxD3wDGkaNvM= -gorm.io/driver/sqlite v1.1.4/go.mod h1:mJCeTFr7+crvS+TRnWc5Z3UvwxUN1BGBLMrf5LA9DYw= -gorm.io/gorm v1.20.7/go.mod h1:0HFTzE/SqkGTzK6TlDPPQbAYCluiVvhzoA1+aVyzenw= -gorm.io/gorm v1.21.9 h1:INieZtn4P2Pw6xPJ8MzT0G4WUOsHq3RhfuDF1M6GW0E= +gorm.io/datatypes v1.1.0 h1:EVp1Z28N4ACpYFK1nHboEIJGIFfjY7vLeieDk8jSHJA= +gorm.io/datatypes v1.1.0/go.mod h1:SH2K9R+2RMjuX1CkCONrPwoe9JzVv2hkQvEu4bXGojE= +gorm.io/driver/mysql v1.4.5 h1:u1lytId4+o9dDaNcPCFzNv7h6wvmc92UjNk3z8enSBU= +gorm.io/driver/mysql v1.4.5/go.mod h1:SxzItlnT1cb6e1e4ZRpgJN2VYtcqJgqnHxWr4wsP8oc= +gorm.io/driver/postgres v1.4.5 h1:mTeXTTtHAgnS9PgmhN2YeUbazYpLhUI1doLnw42XUZc= +gorm.io/driver/sqlite v1.4.3 h1:HBBcZSDnWi5BW3B3rwvVTc510KGkBkexlOg0QrmLUuU= +gorm.io/driver/sqlite v1.4.3/go.mod h1:0Aq3iPO+v9ZKbcdiz8gLWRw5VOPcBOPUQJFLq5e2ecI= +gorm.io/driver/sqlserver v1.4.1 h1:t4r4r6Jam5E6ejqP7N82qAJIJAht27EGT41HyPfXRw0= gorm.io/gorm v1.21.9/go.mod h1:F+OptMscr0P2F2qU97WT1WimdH9GaQPoDW7AYd5i2Y0= +gorm.io/gorm v1.23.8/go.mod h1:l2lP/RyAtc1ynaTjFksBde/O8v9oOGIApu2/xRitmZk= +gorm.io/gorm v1.24.0/go.mod h1:DVrVomtaYTbqs7gB/x2uVvqnXzv0nqjB396B8cG4dBA= +gorm.io/gorm v1.24.3 h1:WL2ifUmzR/SLp85CSURAfybcHnGZ+yLSGSxgYXlFBHg= +gorm.io/gorm v1.24.3/go.mod h1:DVrVomtaYTbqs7gB/x2uVvqnXzv0nqjB396B8cG4dBA= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= diff --git a/tests/cluster.go b/tests/cluster.go index 2bafe3e08f4..d0478a333e9 100644 --- a/tests/cluster.go +++ b/tests/cluster.go @@ -82,7 +82,10 @@ func NewTestServer(ctx context.Context, cfg *config.Config) (*TestServer, error) if err != nil { return nil, err } - serviceBuilders := []server.HandlerBuilder{api.NewHandler, apiv2.NewV2Handler, swaggerserver.NewHandler, autoscaling.NewHandler} + serviceBuilders := []server.HandlerBuilder{api.NewHandler, apiv2.NewV2Handler, autoscaling.NewHandler} + if swaggerserver.Enabled() { + serviceBuilders = append(serviceBuilders, swaggerserver.NewHandler) + } serviceBuilders = append(serviceBuilders, dashboard.GetServiceBuilders()...) svr, err := server.CreateServer(ctx, cfg, serviceBuilders...) if err != nil { diff --git a/tests/server/api/api_test.go b/tests/server/api/api_test.go index 7b36618e62c..7df52240298 100644 --- a/tests/server/api/api_test.go +++ b/tests/server/api/api_test.go @@ -377,6 +377,15 @@ func (suite *middlewareTestSuite) TestRateLimitMiddleware() { } } +func (suite *middlewareTestSuite) TestSwaggerUrl() { + leader := suite.cluster.GetServer(suite.cluster.GetLeader()) + req, _ := http.NewRequest(http.MethodGet, leader.GetAddr()+"/swagger/ui/index", nil) + resp, err := dialClient.Do(req) + suite.NoError(err) + suite.True(resp.StatusCode == http.StatusNotFound) + resp.Body.Close() +} + func (suite *middlewareTestSuite) TestAuditPrometheusBackend() { leader := suite.cluster.GetServer(suite.cluster.GetLeader()) input := map[string]interface{}{ diff --git a/tests/server/global_config/global_config_test.go b/tests/server/global_config/global_config_test.go index f821d664b7a..83f49165311 100644 --- a/tests/server/global_config/global_config_test.go +++ b/tests/server/global_config/global_config_test.go @@ -18,6 +18,7 @@ import ( "context" "strconv" "strings" + "sync" "testing" "time" @@ -57,6 +58,7 @@ type globalConfigTestSuite struct { server *server.GrpcServer client *grpc.ClientConn cleanup server.CleanupFunc + mu sync.Mutex } func TestGlobalConfigTestSuite(t *testing.T) { @@ -237,3 +239,27 @@ func (suite *globalConfigTestSuite) TestClientWatch() { } } } + +func (suite *globalConfigTestSuite) TestEtcdNotStart() { + cli := suite.server.GetClient() + defer func() { + suite.mu.Lock() + suite.server.SetClient(cli) + suite.mu.Unlock() + }() + suite.mu.Lock() + suite.server.SetClient(nil) + suite.mu.Unlock() + err := suite.server.WatchGlobalConfig(&pdpb.WatchGlobalConfigRequest{}, testReceiver{re: suite.Require()}) + suite.Error(err) + + _, err = suite.server.StoreGlobalConfig(suite.server.Context(), &pdpb.StoreGlobalConfigRequest{ + Changes: []*pdpb.GlobalConfigItem{{Name: "0", Value: "0"}}, + }) + suite.Error(err) + + _, err = suite.server.LoadGlobalConfig(suite.server.Context(), &pdpb.LoadGlobalConfigRequest{ + Names: []string{"test_etcd"}, + }) + suite.Error(err) +} diff --git a/tests/server/member/member_test.go b/tests/server/member/member_test.go index 5d2b0bfdd5a..dd62c90c75a 100644 --- a/tests/server/member/member_test.go +++ b/tests/server/member/member_test.go @@ -248,6 +248,30 @@ func TestLeaderResignWithBlock(t *testing.T) { re.NoError(failpoint.Disable("github.com/tikv/pd/server/raftclusterIsBusy")) } +func TestPDLeaderLostWhileEtcdLeaderIntact(t *testing.T) { + re := require.New(t) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + cluster, err := tests.NewTestCluster(ctx, 2) + defer cluster.Destroy() + re.NoError(err) + + err = cluster.RunInitialServers() + re.NoError(err) + + leader1 := cluster.WaitLeader() + memberID := cluster.GetServer(leader1).GetLeader().GetMemberId() + + re.NoError(failpoint.Enable("github.com/tikv/pd/server/leaderLoopCheckAgain", fmt.Sprintf("return(\"%d\")", memberID))) + re.NoError(failpoint.Enable("github.com/tikv/pd/server/exitCampaignLeader", fmt.Sprintf("return(\"%d\")", memberID))) + re.NoError(failpoint.Enable("github.com/tikv/pd/server/timeoutWaitPDLeader", `return(true)`)) + leader2 := waitLeaderChange(re, cluster, leader1) + re.NotEqual(leader1, leader2) + re.NoError(failpoint.Disable("github.com/tikv/pd/server/leaderLoopCheckAgain")) + re.NoError(failpoint.Disable("github.com/tikv/pd/server/exitCampaignLeader")) + re.NoError(failpoint.Disable("github.com/tikv/pd/server/timeoutWaitPDLeader")) +} + func waitLeaderChange(re *require.Assertions, cluster *tests.TestCluster, old string) string { var leader string testutil.Eventually(re, func() bool { diff --git a/tools/pd-tso-bench/go.mod b/tools/pd-tso-bench/go.mod index bcc8a620468..906a0e6ab1d 100644 --- a/tools/pd-tso-bench/go.mod +++ b/tools/pd-tso-bench/go.mod @@ -3,7 +3,6 @@ module github.com/tools/pd-tso-bench go 1.16 require ( - github.com/cespare/xxhash/v2 v2.1.2 // indirect github.com/influxdata/tdigest v0.0.1 github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 diff --git a/tools/pd-tso-bench/go.sum b/tools/pd-tso-bench/go.sum index be96df5ff7f..114f602bb45 100644 --- a/tools/pd-tso-bench/go.sum +++ b/tools/pd-tso-bench/go.sum @@ -16,7 +16,6 @@ github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+Ce github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= -github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.1.2 h1:YRXhKfTDauu4ajMg1TPgFO5jnlC2HCbmLXMcTG5cbYE= github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= @@ -106,8 +105,8 @@ github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c h1:xpW9bvK+HuuTm github.com/pingcap/errors v0.11.5-0.20211224045212-9687c2b0f87c/go.mod h1:X2r9ueLEUZgtx2cIogM0v4Zj5uvvzhuuiu7Pn8HzMPg= github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00 h1:C3N3itkduZXDZFh4N3vQ5HEtld3S+Y+StULhWVvumU0= github.com/pingcap/failpoint v0.0.0-20210918120811-547c13e3eb00/go.mod h1:4qGtCB0QK0wBzKtFEGDhxXnSnbQApw1gc9siScUl8ew= -github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172 h1:FYgKV9znRQmzVrrJDZ0gUfMIvKLAMU1tu1UKJib8bEQ= -github.com/pingcap/kvproto v0.0.0-20221026112947-f8d61344b172/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= +github.com/pingcap/kvproto v0.0.0-20230726063044-73d6d7f3756b h1:dLoYgMFgzUaS6fAAPdjA7oGDM0LdCIm+qhgb3PzrDps= +github.com/pingcap/kvproto v0.0.0-20230726063044-73d6d7f3756b/go.mod h1:OYtxs0786qojVTmkVeufx93xe+jUgm56GUYRIKnmaGI= github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3 h1:HR/ylkkLmGdSSDaD8IDP+SZrdhV1Kibl9KrHxJ9eciw= github.com/pingcap/log v1.1.1-0.20221110025148-ca232912c9f3/go.mod h1:DWQW5jICDR7UJh4HtxXSM20Churx4CQL0fwL/SoOSA4= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=