Skip to content

Commit

Permalink
mcs: avoid stuck goroutine when raftcluster failed to create (#7025)
Browse files Browse the repository at this point in the history
close #7001

Signed-off-by: lhy1024 <[email protected]>

Co-authored-by: ti-chi-bot[bot] <108142056+ti-chi-bot[bot]@users.noreply.github.com>
  • Loading branch information
lhy1024 and ti-chi-bot[bot] authored Sep 4, 2023
1 parent ee8654e commit 0e278e3
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 8 deletions.
10 changes: 9 additions & 1 deletion pkg/keyspace/tso_keyspace_group.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,16 @@ func (m *GroupManager) allocNodesToAllKeyspaceGroups(ctx context.Context) {
log.Info("start to alloc nodes to all keyspace groups")
for {
select {
case <-m.ctx.Done():
// When the group manager is closed, we should stop to alloc nodes to all keyspace groups.
// Note: If raftcluster is created failed but the group manager has been bootstrapped,
// we need to close this goroutine by m.cancel() rather than ctx.Done() from the raftcluster.
// because the ctx.Done() from the raftcluster will be triggered after raftcluster is created successfully.
log.Info("server is closed, stop to alloc nodes to all keyspace groups")
return
case <-ctx.Done():
log.Info("stop to alloc nodes to all keyspace groups")
// When the API leader is changed, we should stop to alloc nodes to all keyspace groups.
log.Info("the raftcluster is closed, stop to alloc nodes to all keyspace groups")
return
case <-ticker.C:
}
Expand Down
16 changes: 9 additions & 7 deletions server/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -296,13 +296,6 @@ func (c *RaftCluster) Start(s Server) error {
return nil
}

if s.IsAPIServiceMode() {
err = c.keyspaceGroupManager.Bootstrap(c.ctx)
if err != nil {
return err
}
}

c.ruleManager = placement.NewRuleManager(c.storage, c, c.GetOpts())
if c.opt.IsPlacementRulesEnabled() {
err = c.ruleManager.Initialize(c.opt.GetMaxReplicas(), c.opt.GetLocationLabels())
Expand All @@ -327,6 +320,15 @@ func (c *RaftCluster) Start(s Server) error {
log.Error("load external timestamp meets error", zap.Error(err))
}

// bootstrap keyspace group manager after starting other parts successfully.
// This order avoids a stuck goroutine in keyspaceGroupManager when it fails to create raftcluster.
if s.IsAPIServiceMode() {
err = c.keyspaceGroupManager.Bootstrap(c.ctx)
if err != nil {
return err
}
}

c.wg.Add(10)
go c.runCoordinator()
go c.runMetricsCollectionJob()
Expand Down

0 comments on commit 0e278e3

Please sign in to comment.