-
Notifications
You must be signed in to change notification settings - Fork 54
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(planstate): make plan propagation safer #474
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,7 +20,6 @@ import ( | |
"reflect" | ||
"sort" | ||
"sync" | ||
"sync/atomic" | ||
|
||
"gopkg.in/tomb.v2" | ||
|
||
|
@@ -38,8 +37,7 @@ const ( | |
|
||
// CheckManager starts and manages the health checks. | ||
type CheckManager struct { | ||
state *state.State | ||
ensureDone atomic.Bool | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This logic, which would also have been needed in derived project managers, can now safely be removed. The race is prevented in the overlord. |
||
state *state.State | ||
|
||
failureHandlers []FailureFunc | ||
|
||
|
@@ -87,7 +85,6 @@ func NewManager(s *state.State, runner *state.TaskRunner) *CheckManager { | |
} | ||
|
||
func (m *CheckManager) Ensure() error { | ||
m.ensureDone.Store(true) | ||
return nil | ||
} | ||
|
||
|
@@ -164,11 +161,6 @@ func (m *CheckManager) PlanChanged(newPlan *plan.Plan) { | |
shouldEnsure = true | ||
} | ||
} | ||
if !m.ensureDone.Load() { | ||
// Can't call EnsureBefore before Overlord.Loop is running (which will | ||
// call m.Ensure for the first time). | ||
return | ||
} | ||
if shouldEnsure { | ||
m.state.EnsureBefore(0) // start new tasks right away | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,7 +22,6 @@ import ( | |
"os" | ||
"path/filepath" | ||
"sync" | ||
"sync/atomic" | ||
"time" | ||
|
||
"github.com/canonical/x-go/randutil" | ||
|
@@ -89,7 +88,7 @@ type Overlord struct { | |
ensureLock sync.Mutex | ||
ensureTimer *time.Timer | ||
ensureNext time.Time | ||
ensureRun int32 | ||
ensureRun chan struct{} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am extending an existing feature here to also allow for waiting, not only checking if ensure ran already. |
||
pruneTicker *time.Ticker | ||
|
||
startOfOperationTime time.Time | ||
|
@@ -114,6 +113,7 @@ func New(opts *Options) (*Overlord, error) { | |
o := &Overlord{ | ||
pebbleDir: opts.PebbleDir, | ||
loopTomb: new(tomb.Tomb), | ||
ensureRun: make(chan struct{}), | ||
inited: true, | ||
extension: opts.Extension, | ||
} | ||
|
@@ -208,14 +208,6 @@ func New(opts *Options) (*Overlord, error) { | |
// before it. | ||
o.stateEng.AddManager(o.runner) | ||
|
||
// Load the plan from the Pebble layers directory (which may be missing | ||
// or have no layers, resulting in an empty plan), and propagate PlanChanged | ||
// notifications to all notification subscribers. | ||
err = o.planMgr.Load() | ||
if err != nil { | ||
return nil, fmt.Errorf("cannot load plan: %w", err) | ||
} | ||
|
||
return o, nil | ||
} | ||
|
||
|
@@ -379,15 +371,34 @@ func (o *Overlord) Loop() { | |
} | ||
} | ||
}) | ||
o.waitEnsureRun() | ||
} | ||
|
||
func (o *Overlord) ensureDidRun() { | ||
atomic.StoreInt32(&o.ensureRun, 1) | ||
select { | ||
case <-o.ensureRun: | ||
// Already closed. Ensure already ran at least once. | ||
default: | ||
close(o.ensureRun) | ||
} | ||
} | ||
|
||
// waitEnsureRun waits until StateEngine.Ensure() was called at least once. | ||
func (o *Overlord) waitEnsureRun() { | ||
select { | ||
case <-o.ensureRun: | ||
case <-o.loopTomb.Dying(): | ||
} | ||
} | ||
|
||
func (o *Overlord) CanStandby() bool { | ||
run := atomic.LoadInt32(&o.ensureRun) | ||
return run != 0 | ||
select { | ||
case <-o.ensureRun: | ||
// Already closed. Ensure already ran at least once. | ||
return true | ||
default: | ||
return false | ||
} | ||
} | ||
|
||
// Stop stops the ensure loop and the managers under the StateEngine. | ||
|
@@ -545,8 +556,9 @@ func Fake() *Overlord { | |
// testing. | ||
func FakeWithState(handleRestart func(restart.RestartType)) *Overlord { | ||
o := &Overlord{ | ||
loopTomb: new(tomb.Tomb), | ||
inited: false, | ||
loopTomb: new(tomb.Tomb), | ||
inited: false, | ||
ensureRun: make(chan struct{}), | ||
} | ||
s := state.New(fakeBackend{o: o}) | ||
o.stateEng = NewStateEngine(s) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SetServiceArgs
requires the service manager plan so we have to move it down slightly. Care must be taken to not make direct Plan Manager calls before the state engine is running, as the lock-less load happens during this time. We could add the lock for the initial load / propagate if really needed.