Skip to content

Commit

Permalink
fix(overlord): allow ensure when state is available
Browse files Browse the repository at this point in the history
  • Loading branch information
flotter committed Aug 12, 2024
1 parent d56c1b8 commit 7103bb8
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 20 deletions.
10 changes: 1 addition & 9 deletions internals/overlord/checkstate/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (
"reflect"
"sort"
"sync"
"sync/atomic"

"gopkg.in/tomb.v2"

Expand All @@ -38,8 +37,7 @@ const (

// CheckManager starts and manages the health checks.
type CheckManager struct {
state *state.State
ensureDone atomic.Bool
state *state.State

failureHandlers []FailureFunc

Expand Down Expand Up @@ -87,7 +85,6 @@ func NewManager(s *state.State, runner *state.TaskRunner) *CheckManager {
}

func (m *CheckManager) Ensure() error {
m.ensureDone.Store(true)
return nil
}

Expand Down Expand Up @@ -164,11 +161,6 @@ func (m *CheckManager) PlanChanged(newPlan *plan.Plan) {
shouldEnsure = true
}
}
if !m.ensureDone.Load() {
// Can't call EnsureBefore before Overlord.Loop is running (which will
// call m.Ensure for the first time).
return
}
if shouldEnsure {
m.state.EnsureBefore(0) // start new tasks right away
}
Expand Down
43 changes: 32 additions & 11 deletions internals/overlord/overlord.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,13 @@ func New(opts *Options) (*Overlord, error) {
extension: opts.Extension,
}

// Enable the ensure timer before overlord managers are added. This allows
// managers to call state.EnsureBefore() safely before the overlord loop
// is started, as soon as they have a valid state instance. The overlord
// loop will always ensure on entry, so any pending ensure requests are
// serviced as soon as the overlord loop is started.
o.ensureTimerSetup()

if !filepath.IsAbs(o.pebbleDir) {
return nil, fmt.Errorf("directory %q must be absolute", o.pebbleDir)
}
Expand Down Expand Up @@ -321,6 +328,25 @@ func (o *Overlord) ensureTimerSetup() {
o.pruneTicker = time.NewTicker(pruneInterval)
}

func (o *Overlord) ensureTimerStopClearChannel() {
o.ensureLock.Lock()
defer o.ensureLock.Unlock()
// The Go v1.x timer implementation does not guarantee that a Reset
// clears the channel. The documented proposal is to call stop and
// manually clear the channel if it already expired. This is required
// on loop entry because the timer is started much earlier, and
// managers are now allowed to call state.EnsureBefore() as soon as state
// is available. If the ensure timer expired before loop entry, this code
// ensures the channel is cleared and that only the ensure on entry is
// performed (and not a second ensure as a result of the channel).
// See: https://pkg.go.dev/time#Timer.Reset
o.ensureTimer.Stop()
select {
case <-o.ensureTimer.C:
default:
}
}

func (o *Overlord) ensureTimerReset() time.Time {
o.ensureLock.Lock()
defer o.ensureLock.Unlock()
Expand All @@ -334,7 +360,7 @@ func (o *Overlord) ensureBefore(d time.Duration) {
o.ensureLock.Lock()
defer o.ensureLock.Unlock()
if o.ensureTimer == nil {
panic("cannot use EnsureBefore before Overlord.Loop")
panic("cannot use EnsureBefore before overlord timer exists")
}
now := time.Now()
next := now.Add(d)
Expand All @@ -357,8 +383,11 @@ func (o *Overlord) ensureBefore(d time.Duration) {

// Loop runs a loop in a goroutine to ensure the current state regularly through StateEngine Ensure.
func (o *Overlord) Loop() {
o.ensureTimerSetup()
o.loopTomb.Go(func() error {
// We are about to perform the first ensure on loop entry. Stop and
// clear the timer channel, so we do not process a spurious second
// ensure due to a pending timer channel expiry.
o.ensureTimerStopClearChannel()
for {
// TODO: pass a proper context into Ensure
o.ensureTimerReset()
Expand Down Expand Up @@ -403,15 +432,6 @@ func (o *Overlord) settle(timeout time.Duration, beforeCleanups func()) error {
return err
}

func() {
o.ensureLock.Lock()
defer o.ensureLock.Unlock()
if o.ensureTimer != nil {
panic("cannot use Settle concurrently with other Settle or Loop calls")
}
o.ensureTimer = time.NewTimer(0)
}()

defer func() {
o.ensureLock.Lock()
defer o.ensureLock.Unlock()
Expand Down Expand Up @@ -548,6 +568,7 @@ func FakeWithState(handleRestart func(restart.RestartType)) *Overlord {
loopTomb: new(tomb.Tomb),
inited: false,
}
o.ensureTimerSetup()
s := state.New(fakeBackend{o: o})
o.stateEng = NewStateEngine(s)
o.runner = state.NewTaskRunner(s)
Expand Down

0 comments on commit 7103bb8

Please sign in to comment.