Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add 'recovering' status #1797

Merged
merged 12 commits into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ linters:
- errcheck
- errname
# - errorlint
# - exhaustive
- exhaustive
raulb marked this conversation as resolved.
Show resolved Hide resolved
# - exhaustivestruct
- exportloopref
# - forbidigo
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ run:

.PHONY: proto-generate
proto-generate:
rm -rf proto/gen && cd proto && buf generate
raulb marked this conversation as resolved.
Show resolved Hide resolved
cd proto && buf generate

.PHONY: proto-update
proto-update:
Expand Down
1 change: 1 addition & 0 deletions pkg/foundation/log/fields.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ const (
NodeIDField = "node_id"
ParallelWorkerIDField = "parallel_worker_id"
PipelineIDField = "pipeline_id"
PipelineStatusField = "pipeline_status"
ProcessorIDField = "processor_id"
RecordPositionField = "record_position"
RequestIDField = "request_id"
Expand Down
1 change: 1 addition & 0 deletions pkg/pipeline/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import "github.com/conduitio/conduit/pkg/foundation/cerrors"
var (
ErrTimeout = cerrors.New("operation timed out")
ErrGracefulShutdown = cerrors.New("graceful shutdown")
ErrForceStop = cerrors.New("force stop")
ErrPipelineRunning = cerrors.New("pipeline is running")
ErrPipelineNotRunning = cerrors.New("pipeline not running")
ErrInstanceNotFound = cerrors.New("pipeline instance not found")
Expand Down
1 change: 1 addition & 0 deletions pkg/pipeline/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ const (
StatusSystemStopped
StatusUserStopped
StatusDegraded
StatusRecovering
)

const (
Expand Down
16 changes: 11 additions & 5 deletions pkg/pipeline/lifecycle.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ func (s *Service) Stop(ctx context.Context, pipelineID string, force bool) error
return err
}

if pl.GetStatus() != StatusRunning {
if pl.GetStatus() != StatusRunning && pl.GetStatus() != StatusRecovering {
return cerrors.Errorf("can't stop pipeline with status %q: %w", pl.GetStatus(), ErrPipelineNotRunning)
}

Expand All @@ -138,7 +138,10 @@ func (s *Service) Stop(ctx context.Context, pipelineID string, force bool) error
}

func (s *Service) stopGraceful(ctx context.Context, pl *Instance, reason error) error {
s.logger.Info(ctx).Str(log.PipelineIDField, pl.ID).Msg("gracefully stopping pipeline")
s.logger.Info(ctx).
raulb marked this conversation as resolved.
Show resolved Hide resolved
Str(log.PipelineIDField, pl.ID).
Any(log.PipelineStatusField, pl.GetStatus()).
Msg("gracefully stopping pipeline")
var errs []error
for _, n := range pl.n {
if node, ok := n.(stream.StoppableNode); ok {
Expand All @@ -155,8 +158,11 @@ func (s *Service) stopGraceful(ctx context.Context, pl *Instance, reason error)
}

func (s *Service) stopForceful(ctx context.Context, pl *Instance) error {
s.logger.Info(ctx).Str(log.PipelineIDField, pl.ID).Msg("force stopping pipeline")
pl.t.Kill(cerrors.New("force stop"))
s.logger.Info(ctx).
Str(log.PipelineIDField, pl.ID).
Any(log.PipelineStatusField, pl.GetStatus()).
Msg("force stopping pipeline")
pl.t.Kill(ErrForceStop)
for _, n := range pl.n {
if node, ok := n.(stream.ForceStoppableNode); ok {
// stop all pub nodes
Expand All @@ -171,7 +177,7 @@ func (s *Service) stopForceful(ctx context.Context, pl *Instance) error {
// (i.e. that existing messages get processed but not new messages get produced).
func (s *Service) StopAll(ctx context.Context, reason error) {
for _, pl := range s.instances {
if pl.GetStatus() != StatusRunning {
if pl.GetStatus() != StatusRunning && pl.GetStatus() != StatusRecovering {
continue
}
err := s.stopGraceful(ctx, pl, reason)
Expand Down
112 changes: 112 additions & 0 deletions pkg/pipeline/lifecycle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,118 @@ func TestServiceLifecycle_PipelineError(t *testing.T) {
is.True(cerrors.Is(event.Error, wantErr))
}

func TestServiceLifecycle_StopAll_Recovering(t *testing.T) {
type testCase struct {
name string
stopFn func(ctx context.Context, is *is.I, pipelineService *Service, pipelineID string)
// whether we expect the source plugin's Stop() function to be called
// (doesn't happen when force-stopping)
wantSourceStop bool
want Status
wantErr error
}

runTest := func(t *testing.T, tc testCase) {
is := is.New(t)
ctx, killAll := context.WithCancel(context.Background())
defer killAll()
logger := log.New(zerolog.Nop())
db := &inmemory.DB{}
persister := connector.NewPersister(logger, db, time.Second, 3)

ps := NewService(logger, db)

// create a host pipeline
pl, err := ps.Create(ctx, uuid.NewString(), Config{Name: "test pipeline"}, ProvisionTypeAPI)
is.NoErr(err)

// create mocked connectors
// source will stop and return ErrGracefulShutdown which should signal to the
// service that everything went well and the pipeline was gracefully shutdown
ctrl := gomock.NewController(t)
wantRecords := generateRecords(0)
source, sourceDispenser := generatorSource(ctrl, persister, wantRecords, nil, tc.wantSourceStop)
destination, destDispenser := asserterDestination(ctrl, persister, wantRecords)
dlq, dlqDispenser := asserterDestination(ctrl, persister, nil)
pl.DLQ.Plugin = dlq.Plugin

pl, err = ps.AddConnector(ctx, pl.ID, source.ID)
is.NoErr(err)
pl, err = ps.AddConnector(ctx, pl.ID, destination.ID)
is.NoErr(err)

// start the pipeline now that everything is set up
err = ps.Start(
ctx,
testConnectorFetcher{
source.ID: source,
destination.ID: destination,
testDLQID: dlq,
},
testProcessorFetcher{},
testPluginFetcher{
source.Plugin: sourceDispenser,
destination.Plugin: destDispenser,
dlq.Plugin: dlqDispenser,
},
pl.ID,
)
is.NoErr(err)

// wait for pipeline to finish consuming records from the source
time.Sleep(100 * time.Millisecond)

pl.SetStatus(StatusRecovering)
tc.stopFn(ctx, is, ps, pl.ID)

// wait for pipeline to finish
err = pl.Wait()
if tc.wantErr != nil {
is.True(err != nil)
} else {
is.NoErr(err)
is.Equal("", pl.Error)
}

is.Equal(tc.want, pl.GetStatus())
}

testCases := []testCase{
{
name: "system stop (graceful shutdown err)",
stopFn: func(ctx context.Context, is *is.I, ps *Service, pipelineID string) {
ps.StopAll(ctx, ErrGracefulShutdown)
},
wantSourceStop: true,
want: StatusSystemStopped,
},
{
name: "system stop (terrible err)",
stopFn: func(ctx context.Context, is *is.I, ps *Service, pipelineID string) {
ps.StopAll(ctx, cerrors.New("terrible err"))
},
wantSourceStop: true,
want: StatusDegraded,
wantErr: cerrors.New("terrible err"),
},
{
name: "user stop (graceful)",
stopFn: func(ctx context.Context, is *is.I, ps *Service, pipelineID string) {
err := ps.Stop(ctx, pipelineID, false)
is.NoErr(err)
},
wantSourceStop: true,
want: StatusUserStopped,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
runTest(t, tc)
})
}
}

func TestServiceLifecycle_PipelineStop(t *testing.T) {
is := is.New(t)
ctx, killAll := context.WithCancel(context.Background())
Expand Down
5 changes: 3 additions & 2 deletions pkg/pipeline/status_string.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions pkg/pipeline/stream/message.go
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,10 @@ func (m *Message) StatusError() error {
return m.Ack()
case MessageStatusNacked:
return m.Nack(nil, "")
case MessageStatusOpen:
return nil
}

return nil
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/plugin/connector/standalone/acceptance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (
"testing"

"github.com/conduitio/conduit-connector-protocol/pconnector/mock"
v1 "github.com/conduitio/conduit-connector-protocol/pconnector/v1" //nolint:staticcheck // backwards compatibility
v1 "github.com/conduitio/conduit-connector-protocol/pconnector/v1" //nolint:staticcheck // Disabling Staticcheck linter due to backwards compatibility requirements
v2 "github.com/conduitio/conduit-connector-protocol/pconnector/v2"
"github.com/conduitio/conduit/pkg/plugin/connector"
"github.com/rs/zerolog"
Expand Down
2 changes: 2 additions & 0 deletions pkg/web/api/toproto/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ func PipelineStatus(in pipeline.Status) apiv1.Pipeline_Status {
return apiv1.Pipeline_STATUS_STOPPED
case pipeline.StatusDegraded:
return apiv1.Pipeline_STATUS_DEGRADED
case pipeline.StatusRecovering:
return apiv1.Pipeline_STATUS_RECOVERING
}
return apiv1.Pipeline_STATUS_UNSPECIFIED
}
Expand Down
5 changes: 3 additions & 2 deletions pkg/web/openapi/swagger-ui/api/v1/api.swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -2201,10 +2201,11 @@
"STATUS_UNSPECIFIED",
"STATUS_RUNNING",
"STATUS_STOPPED",
"STATUS_DEGRADED"
"STATUS_DEGRADED",
"STATUS_RECOVERING"
],
"default": "STATUS_UNSPECIFIED",
"description": "Status describes the pipeline status.\n\n - STATUS_RUNNING: Pipeline is running.\n - STATUS_STOPPED: Pipeline gracefully stopped.\n - STATUS_DEGRADED: Pipeline stopped with an error (see State.error)."
"description": "Status describes the pipeline status.\n\n - STATUS_RUNNING: Pipeline is running.\n - STATUS_STOPPED: Pipeline gracefully stopped.\n - STATUS_DEGRADED: Pipeline stopped with an error (see State.error).\n - STATUS_RECOVERING: Pipeline is recovering. This case on of following:\n(1) pipeline is being restarted\n(2) Conduit is backing off and pipeline will be restarted later\n(3) pipeline was restarted, but Conduit is checking if the pipeline is healthy."
},
"v1PluginSpecifications": {
"type": "object",
Expand Down
Loading