Skip to content

Commit

Permalink
Merge branch 'master' into support_transfer_primary2
Browse files Browse the repository at this point in the history
  • Loading branch information
ti-chi-bot[bot] authored Jul 8, 2024
2 parents ec8e737 + b35f18d commit 9e3b798
Show file tree
Hide file tree
Showing 27 changed files with 444 additions and 213 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ basic-test: install-tools

ci-test-job: install-tools dashboard-ui pd-ut
@$(FAILPOINT_ENABLE)
./scripts/ci-subtask.sh $(JOB_COUNT) $(JOB_INDEX) || { $(FAILPOINT_DISABLE); exit 1; }
./scripts/ci-subtask.sh $(JOB_INDEX) || { $(FAILPOINT_DISABLE); exit 1; }
@$(FAILPOINT_DISABLE)

TSO_INTEGRATION_TEST_PKGS := $(PD_PKG)/tests/server/tso
Expand Down
1 change: 1 addition & 0 deletions client/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ require (
github.com/prometheus/client_model v0.5.0 // indirect
github.com/prometheus/common v0.46.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
github.com/stretchr/objx v0.5.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/net v0.23.0 // indirect
golang.org/x/sys v0.18.0 // indirect
Expand Down
1 change: 1 addition & 0 deletions client/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncj
github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
Expand Down
2 changes: 1 addition & 1 deletion client/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ func initMetrics(constLabels prometheus.Labels) {
Subsystem: "request",
Name: "tso_batch_send_latency",
ConstLabels: constLabels,
Buckets: prometheus.ExponentialBuckets(1, 2, 34), // 1ns ~ 8s
Buckets: prometheus.ExponentialBuckets(0.0005, 2, 13),
Help: "tso batch send latency",
})

Expand Down
72 changes: 57 additions & 15 deletions client/resource_group/controller/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,10 @@ const (
defaultTargetPeriod = 5 * time.Second
// defaultMaxWaitDuration is the max duration to wait for the token before throwing error.
defaultMaxWaitDuration = 30 * time.Second
// defaultLTBTokenRPCMaxDelay is the upper bound of backoff delay for local token bucket RPC.
defaultLTBTokenRPCMaxDelay = 1 * time.Second
// defaultWaitRetryTimes is the times to retry when waiting for the token.
defaultWaitRetryTimes = 10
defaultWaitRetryTimes = 20
// defaultWaitRetryInterval is the interval to retry when waiting for the token.
defaultWaitRetryInterval = 50 * time.Millisecond
)
Expand All @@ -77,23 +79,35 @@ const (

// Because the resource manager has not been deployed in microservice mode,
// do not enable this function.
defaultDegradedModeWaitDuration = 0
defaultDegradedModeWaitDuration = time.Duration(0)
defaultAvgBatchProportion = 0.7
)

// Config is the configuration of the resource manager controller which includes some option for client needed.
type Config struct {
// TokenRPCParams is the parameters for local bucket RPC.
type TokenRPCParams struct {
// WaitRetryInterval is the interval to retry when waiting for the token.
WaitRetryInterval Duration `toml:"wait-retry-interval" json:"wait-retry-interval"`

// WaitRetryTimes is the times to retry when waiting for the token.
WaitRetryTimes int `toml:"wait-retry-times" json:"wait-retry-times"`
}

// LocalBucketConfig is the configuration for local bucket. not export to server side.
type LocalBucketConfig struct {
TokenRPCParams `toml:"token-rpc-params" json:"token-rpc-params"`
}

// BaseConfig is the configuration of the resource manager controller which includes some option for client needed.
// TODO: unified the configuration for client and server, server side in pkg/mcs/resourcemanger/config.go.
type BaseConfig struct {
// EnableDegradedMode is to control whether resource control client enable degraded mode when server is disconnect.
DegradedModeWaitDuration Duration `toml:"degraded-mode-wait-duration" json:"degraded-mode-wait-duration"`

// LTBMaxWaitDuration is the max wait time duration for local token bucket.
LTBMaxWaitDuration Duration `toml:"ltb-max-wait-duration" json:"ltb-max-wait-duration"`

// WaitRetryInterval is the interval to retry when waiting for the token.
WaitRetryInterval Duration `toml:"wait-retry-interval" json:"wait-retry-interval"`

// WaitRetryTimes is the times to retry when waiting for the token.
WaitRetryTimes int `toml:"wait-retry-times" json:"wait-retry-times"`
// LTBTokenRPCMaxDelay is the upper bound of backoff delay for local token bucket RPC.
LTBTokenRPCMaxDelay Duration `toml:"ltb-token-rpc-max-delay" json:"ltb-token-rpc-max-delay"`

// RequestUnit is the configuration determines the coefficients of the RRU and WRU cost.
// This configuration should be modified carefully.
Expand All @@ -103,15 +117,43 @@ type Config struct {
EnableControllerTraceLog bool `toml:"enable-controller-trace-log" json:"enable-controller-trace-log,string"`
}

// Config is the configuration of the resource manager controller.
type Config struct {
BaseConfig
LocalBucketConfig
}

// Adjust adjusts the configuration.
func (c *Config) Adjust() {
// valid the configuration, TODO: separately add the valid function.
if c.BaseConfig.LTBMaxWaitDuration.Duration == 0 {
c.BaseConfig.LTBMaxWaitDuration = NewDuration(defaultMaxWaitDuration)
}
if c.LocalBucketConfig.WaitRetryInterval.Duration == 0 {
c.LocalBucketConfig.WaitRetryInterval = NewDuration(defaultWaitRetryInterval)
}
// adjust the client settings. calculate the retry times.
if int(c.BaseConfig.LTBTokenRPCMaxDelay.Duration) != int(c.LocalBucketConfig.WaitRetryInterval.Duration)*c.LocalBucketConfig.WaitRetryTimes {
c.LocalBucketConfig.WaitRetryTimes = int(c.BaseConfig.LTBTokenRPCMaxDelay.Duration / c.LocalBucketConfig.WaitRetryInterval.Duration)
}
}

// DefaultConfig returns the default resource manager controller configuration.
func DefaultConfig() *Config {
return &Config{
DegradedModeWaitDuration: NewDuration(defaultDegradedModeWaitDuration),
LTBMaxWaitDuration: NewDuration(defaultMaxWaitDuration),
WaitRetryInterval: NewDuration(defaultWaitRetryInterval),
WaitRetryTimes: defaultWaitRetryTimes,
RequestUnit: DefaultRequestUnitConfig(),
EnableControllerTraceLog: false,
BaseConfig: BaseConfig{
DegradedModeWaitDuration: NewDuration(defaultDegradedModeWaitDuration),
RequestUnit: DefaultRequestUnitConfig(),
EnableControllerTraceLog: false,
LTBMaxWaitDuration: NewDuration(defaultMaxWaitDuration),
LTBTokenRPCMaxDelay: NewDuration(defaultLTBTokenRPCMaxDelay),
},
LocalBucketConfig: LocalBucketConfig{
TokenRPCParams: TokenRPCParams{
WaitRetryInterval: NewDuration(defaultWaitRetryInterval),
WaitRetryTimes: defaultWaitRetryTimes,
},
},
}
}

Expand Down
21 changes: 14 additions & 7 deletions client/resource_group/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ func NewResourceGroupController(
log.Info("load resource controller config", zap.Reflect("config", config), zap.Reflect("ru-config", controller.ruConfig))
controller.calculators = []ResourceCalculator{newKVCalculator(controller.ruConfig), newSQLCalculator(controller.ruConfig)}
controller.safeRuConfig.Store(controller.ruConfig)
enableControllerTraceLog.Store(config.EnableControllerTraceLog)
return controller, nil
}

Expand All @@ -201,12 +202,13 @@ func loadServerConfig(ctx context.Context, provider ResourceGroupProvider) (*Con
if err != nil {
return nil, err
}
config := DefaultConfig()
defer config.Adjust()
kvs := resp.GetKvs()
if len(kvs) == 0 {
log.Warn("[resource group controller] server does not save config, load config failed")
return DefaultConfig(), nil
return config, nil
}
config := DefaultConfig()
err = json.Unmarshal(kvs[0].GetValue(), config)
if err != nil {
return nil, err
Expand Down Expand Up @@ -309,7 +311,6 @@ func (c *ResourceGroupsController) Start(ctx context.Context) {
watchRetryTimer.Reset(watchRetryInterval)
}
}

case <-emergencyTokenAcquisitionTicker.C:
c.executeOnAllGroups((*groupCostController).resetEmergencyTokenAcquisition)
/* channels */
Expand All @@ -329,9 +330,7 @@ func (c *ResourceGroupsController) Start(ctx context.Context) {
case notifyMsg := <-c.lowTokenNotifyChan:
c.executeOnAllGroups((*groupCostController).updateRunState)
c.executeOnAllGroups((*groupCostController).updateAvgRequestResourcePerSec)
if len(c.run.currentRequests) == 0 {
c.collectTokenBucketRequests(c.loopCtx, FromLowRU, lowToken /* select low tokens resource group */, notifyMsg)
}
c.collectTokenBucketRequests(c.loopCtx, FromLowRU, lowToken /* select low tokens resource group */, notifyMsg)
if c.run.inDegradedMode {
c.executeOnAllGroups((*groupCostController).applyDegradedMode)
}
Expand Down Expand Up @@ -391,6 +390,7 @@ func (c *ResourceGroupsController) Start(ctx context.Context) {
if err := json.Unmarshal(item.Kv.Value, config); err != nil {
continue
}
config.Adjust()
c.ruConfig = GenerateRUConfig(config)

// Stay compatible with serverless
Expand All @@ -404,7 +404,6 @@ func (c *ResourceGroupsController) Start(ctx context.Context) {
}
log.Info("load resource controller config after config changed", zap.Reflect("config", config), zap.Reflect("ruConfig", c.ruConfig))
}

case gc := <-c.tokenBucketUpdateChan:
go gc.handleTokenBucketUpdateEvent(c.loopCtx)
}
Expand Down Expand Up @@ -1178,11 +1177,19 @@ func (gc *groupCostController) collectRequestAndConsumption(selectTyp selectType
switch selectTyp {
case periodicReport:
selected = selected || gc.shouldReportConsumption()
failpoint.Inject("triggerPeriodicReport", func(val failpoint.Value) {
selected = gc.name == val.(string)
})
fallthrough
case lowToken:
if counter.limiter.IsLowTokens() {
selected = true
}
failpoint.Inject("triggerLowRUReport", func(val failpoint.Value) {
if selectTyp == lowToken {
selected = gc.name == val.(string)
}
})
}
request := &rmpb.RequestUnitItem{
Type: typ,
Expand Down
139 changes: 139 additions & 0 deletions client/resource_group/controller/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,12 @@ import (
"testing"
"time"

"github.com/pingcap/failpoint"
"github.com/pingcap/kvproto/pkg/meta_storagepb"
rmpb "github.com/pingcap/kvproto/pkg/resource_manager"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
pd "github.com/tikv/pd/client"
"github.com/tikv/pd/client/errs"
)

Expand Down Expand Up @@ -132,3 +136,138 @@ func TestResourceGroupThrottledError(t *testing.T) {
re.Error(err)
re.True(errs.ErrClientResourceGroupThrottled.Equal(err))
}

// MockResourceGroupProvider is a mock implementation of the ResourceGroupProvider interface.
type MockResourceGroupProvider struct {
mock.Mock
}

func (m *MockResourceGroupProvider) GetResourceGroup(ctx context.Context, resourceGroupName string, opts ...pd.GetResourceGroupOption) (*rmpb.ResourceGroup, error) {
args := m.Called(ctx, resourceGroupName, opts)
return args.Get(0).(*rmpb.ResourceGroup), args.Error(1)
}

func (m *MockResourceGroupProvider) ListResourceGroups(ctx context.Context, opts ...pd.GetResourceGroupOption) ([]*rmpb.ResourceGroup, error) {
args := m.Called(ctx, opts)
return args.Get(0).([]*rmpb.ResourceGroup), args.Error(1)
}

func (m *MockResourceGroupProvider) AddResourceGroup(ctx context.Context, metaGroup *rmpb.ResourceGroup) (string, error) {
args := m.Called(ctx, metaGroup)
return args.String(0), args.Error(1)
}

func (m *MockResourceGroupProvider) ModifyResourceGroup(ctx context.Context, metaGroup *rmpb.ResourceGroup) (string, error) {
args := m.Called(ctx, metaGroup)
return args.String(0), args.Error(1)
}

func (m *MockResourceGroupProvider) DeleteResourceGroup(ctx context.Context, resourceGroupName string) (string, error) {
args := m.Called(ctx, resourceGroupName)
return args.String(0), args.Error(1)
}

func (m *MockResourceGroupProvider) AcquireTokenBuckets(ctx context.Context, request *rmpb.TokenBucketsRequest) ([]*rmpb.TokenBucketResponse, error) {
args := m.Called(ctx, request)
return args.Get(0).([]*rmpb.TokenBucketResponse), args.Error(1)
}

func (m *MockResourceGroupProvider) LoadResourceGroups(ctx context.Context) ([]*rmpb.ResourceGroup, int64, error) {
args := m.Called(ctx)
return args.Get(0).([]*rmpb.ResourceGroup), args.Get(1).(int64), args.Error(2)
}

func (m *MockResourceGroupProvider) Watch(ctx context.Context, key []byte, opts ...pd.OpOption) (chan []*meta_storagepb.Event, error) {
args := m.Called(ctx, key, opts)
return args.Get(0).(chan []*meta_storagepb.Event), args.Error(1)
}

func (m *MockResourceGroupProvider) Get(ctx context.Context, key []byte, opts ...pd.OpOption) (*meta_storagepb.GetResponse, error) {
args := m.Called(ctx, key, opts)
return args.Get(0).(*meta_storagepb.GetResponse), args.Error(1)
}

func TestControllerWithTwoGroupRequestConcurrency(t *testing.T) {
re := require.New(t)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
mockProvider := new(MockResourceGroupProvider)

mockProvider.On("Get", mock.Anything, mock.Anything, mock.Anything).Return(&meta_storagepb.GetResponse{}, nil)
// LoadResourceGroups
mockProvider.On("LoadResourceGroups", mock.Anything).Return([]*rmpb.ResourceGroup{}, int64(0), nil)
// Watch
mockProvider.On("Watch", mock.Anything, mock.Anything, mock.Anything).Return(make(chan []*meta_storagepb.Event), nil)

re.NoError(failpoint.Enable("github.com/tikv/pd/client/resource_group/controller/triggerPeriodicReport", fmt.Sprintf("return(\"%s\")", "default")))
defer failpoint.Disable("github.com/tikv/pd/client/resource_group/controller/triggerPeriodicReport")
re.NoError(failpoint.Enable("github.com/tikv/pd/client/resource_group/controller/triggerLowRUReport", fmt.Sprintf("return(\"%s\")", "test-group")))
defer failpoint.Disable("github.com/tikv/pd/client/resource_group/controller/triggerLowRUReport")

controller, _ := NewResourceGroupController(ctx, 1, mockProvider, nil)
controller.Start(ctx)

defaultResourceGroup := &rmpb.ResourceGroup{Name: "default", Mode: rmpb.GroupMode_RUMode, RUSettings: &rmpb.GroupRequestUnitSettings{RU: &rmpb.TokenBucket{Settings: &rmpb.TokenLimitSettings{FillRate: 1000000}}}}
testResourceGroup := &rmpb.ResourceGroup{Name: "test-group", Mode: rmpb.GroupMode_RUMode, RUSettings: &rmpb.GroupRequestUnitSettings{RU: &rmpb.TokenBucket{Settings: &rmpb.TokenLimitSettings{FillRate: 1000000}}}}
mockProvider.On("GetResourceGroup", mock.Anything, "default", mock.Anything).Return(defaultResourceGroup, nil)
mockProvider.On("GetResourceGroup", mock.Anything, "test-group", mock.Anything).Return(testResourceGroup, nil)

c1, err := controller.tryGetResourceGroup(ctx, "default")
re.NoError(err)
re.Equal(defaultResourceGroup, c1.meta)

c2, err := controller.tryGetResourceGroup(ctx, "test-group")
re.NoError(err)
re.Equal(testResourceGroup, c2.meta)

var expectResp []*rmpb.TokenBucketResponse
recTestGroupAcquireTokenRequest := make(chan bool)
mockProvider.On("AcquireTokenBuckets", mock.Anything, mock.Anything).Run(func(args mock.Arguments) {
request := args.Get(1).(*rmpb.TokenBucketsRequest)
var responses []*rmpb.TokenBucketResponse
for _, req := range request.Requests {
if req.ResourceGroupName == "default" {
// no response the default group request, that's mean `len(c.run.currentRequests) != 0` always.
time.Sleep(100 * time.Second)
responses = append(responses, &rmpb.TokenBucketResponse{
ResourceGroupName: "default",
GrantedRUTokens: []*rmpb.GrantedRUTokenBucket{
{
GrantedTokens: &rmpb.TokenBucket{
Tokens: 100000,
},
},
},
})
} else {
responses = append(responses, &rmpb.TokenBucketResponse{
ResourceGroupName: req.ResourceGroupName,
GrantedRUTokens: []*rmpb.GrantedRUTokenBucket{
{
GrantedTokens: &rmpb.TokenBucket{
Tokens: 100000,
},
},
},
})
}
}
// receive test-group request
if len(request.Requests) == 1 && request.Requests[0].ResourceGroupName == "test-group" {
recTestGroupAcquireTokenRequest <- true
}
expectResp = responses
}).Return(expectResp, nil)
// wait default group request token by PeriodicReport.
time.Sleep(2 * time.Second)
counter := c2.run.requestUnitTokens[0]
counter.limiter.mu.Lock()
counter.limiter.notify()
counter.limiter.mu.Unlock()
select {
case res := <-recTestGroupAcquireTokenRequest:
re.True(res)
case <-time.After(5 * time.Second):
re.Fail("timeout")
}
}
Loading

0 comments on commit 9e3b798

Please sign in to comment.