From 8981b960fedb7c8490913e5ba4fae84157b9a7c9 Mon Sep 17 00:00:00 2001 From: Jared O'Connell <46976761+jaredoconnell@users.noreply.github.com> Date: Mon, 6 May 2024 09:11:55 -0400 Subject: [PATCH] Disabled steps (#180) * Transition to stage cancelled This fixes a potential deadlock in a specific case * Improved test documentation * Improved documentation * Fix linter error * Ignore nolintlint for specific problematic case * Explain why nolint unparam is required * Address review comments * Address review comments * Setup basic version of disabling steps * Added disabled stage, with an output * Implement code to replace TODO * Address review comments --- internal/step/plugin/provider.go | 279 +++++++++++++++++++++----- internal/step/plugin/provider_test.go | 12 ++ workflow/workflow_test.go | 119 ++++++++++- 3 files changed, 358 insertions(+), 52 deletions(-) diff --git a/internal/step/plugin/provider.go b/internal/step/plugin/provider.go index 10447053..23ad9798 100644 --- a/internal/step/plugin/provider.go +++ b/internal/step/plugin/provider.go @@ -147,6 +147,11 @@ const ( StageIDDeployFailed StageID = "deploy_failed" // StageIDRunning is a stage that indicates that a plugin is now working. StageIDRunning StageID = "running" + // StageIDEnabling is a stage that indicates that the plugin is waiting to be enabled. + // This is required to be separate to ensure that it exits immediately if disabled. + StageIDEnabling StageID = "enabling" + // StageIDDisabled is a stage that indicates that the plugin's step was disabled. + StageIDDisabled StageID = "disabled" // StageIDCancelled is a stage that indicates that the plugin's step was cancelled. StageIDCancelled StageID = "cancelled" // StageIDOutput is a stage that indicates that the plugin has completed working successfully. @@ -176,6 +181,19 @@ var deployFailedLifecycleStage = step.LifecycleStage{ Fatal: true, } +var enablingLifecycleStage = step.LifecycleStage{ + ID: string(StageIDEnabling), + WaitingName: "waiting to be enabled", + RunningName: "enabling", + FinishedName: "enablement determined", + InputFields: map[string]struct{}{ + "enabled": {}, + }, + NextStages: []string{ + string(StageIDStarting), string(StageIDDisabled), string(StageIDCrashed), + }, +} + var startingLifecycleStage = step.LifecycleStage{ ID: string(StageIDStarting), WaitingName: "waiting to start", @@ -213,6 +231,15 @@ var cancelledLifecycleStage = step.LifecycleStage{ string(StageIDOutput), string(StageIDCrashed), string(StageIDDeployFailed), }, } + +var disabledLifecycleStage = step.LifecycleStage{ + ID: string(StageIDDisabled), + WaitingName: "waiting for the step to be disabled", + RunningName: "disabling", + FinishedName: "disabled", + InputFields: map[string]struct{}{}, +} + var finishedLifecycleStage = step.LifecycleStage{ ID: string(StageIDOutput), WaitingName: "finished", @@ -233,9 +260,11 @@ func (p *pluginProvider) Lifecycle() step.Lifecycle[step.LifecycleStage] { Stages: []step.LifecycleStage{ deployingLifecycleStage, deployFailedLifecycleStage, + enablingLifecycleStage, startingLifecycleStage, runningLifecycleStage, cancelledLifecycleStage, + disabledLifecycleStage, finishedLifecycleStage, crashedLifecycleStage, }, @@ -338,6 +367,60 @@ func (r *runnableStep) StartedSchema() *schema.StepOutputSchema { ) } +func (r *runnableStep) EnabledOutputSchema() *schema.StepOutputSchema { + return schema.NewStepOutputSchema( + schema.NewScopeSchema( + schema.NewObjectSchema( + "EnabledOutput", + map[string]*schema.PropertySchema{ + "enabled": schema.NewPropertySchema( + schema.NewBoolSchema(), + schema.NewDisplayValue( + schema.PointerTo("enabled"), + schema.PointerTo("Whether the step was enabled"), + nil), + true, + nil, + nil, + nil, + nil, + nil, + ), + }, + ), + ), + nil, + false, + ) +} + +func (r *runnableStep) DisabledOutputSchema() *schema.StepOutputSchema { + return schema.NewStepOutputSchema( + schema.NewScopeSchema( + schema.NewObjectSchema( + "DisabledMessageOutput", + map[string]*schema.PropertySchema{ + "message": schema.NewPropertySchema( + schema.NewStringSchema(nil, nil, nil), + schema.NewDisplayValue( + schema.PointerTo("message"), + schema.PointerTo("A human readable message stating that the step was disabled."), + nil), + true, + nil, + nil, + nil, + nil, + nil, + ), + }, + ), + ), + nil, + false, + ) +} + func (r *runnableStep) Lifecycle(input map[string]any) (result step.Lifecycle[step.LifecycleStageWithSchema], err error) { rawStepID, ok := input["step"] if !ok || rawStepID == nil { @@ -436,6 +519,28 @@ func (r *runnableStep) Lifecycle(input map[string]any) (result step.Lifecycle[st }, }, }, + { + LifecycleStage: enablingLifecycleStage, + InputSchema: map[string]*schema.PropertySchema{ + "enabled": schema.NewPropertySchema( + schema.NewBoolSchema(), + schema.NewDisplayValue( + schema.PointerTo("Enabled"), + schema.PointerTo("Used to set whether the step is enabled."), + nil, + ), + false, + nil, + nil, + nil, + nil, + nil, + ), + }, + Outputs: map[string]*schema.StepOutputSchema{ + "resolved": r.EnabledOutputSchema(), + }, + }, { LifecycleStage: startingLifecycleStage, InputSchema: map[string]*schema.PropertySchema{ @@ -480,6 +585,13 @@ func (r *runnableStep) Lifecycle(input map[string]any) (result step.Lifecycle[st }, Outputs: nil, }, + { + LifecycleStage: disabledLifecycleStage, + InputSchema: nil, + Outputs: map[string]*schema.StepOutputSchema{ + "output": r.DisabledOutputSchema(), + }, + }, { LifecycleStage: finishedLifecycleStage, InputSchema: nil, @@ -563,6 +675,7 @@ func (r *runnableStep) Start(input map[string]any, runID string, stageChangeHand cancel: cancel, deployInput: make(chan any, 1), runInput: make(chan any, 1), + enabledInput: make(chan bool, 1), logger: r.logger, deploymentType: r.deploymentType, source: r.source, @@ -581,32 +694,34 @@ func (r *runnableStep) Start(input map[string]any, runID string, stageChangeHand } type runningStep struct { - deployerRegistry registry.Registry - stepSchema schema.Step - stageChangeHandler step.StageChangeHandler - lock *sync.Mutex - wg sync.WaitGroup - ctx context.Context - cancel context.CancelFunc - atpClient atp.Client - deployInput chan any - deployInputAvailable bool - runInput chan any - runInputAvailable bool - logger log.Logger - currentStage StageID - runID string // The ID associated with this execution (the workflow step ID) - deploymentType deployer.DeploymentType - source string - pluginStepID string // The ID of the step in the plugin - state step.RunningStepState - useLocalDeployer bool - localDeployer deployer.Connector - container deployer.Plugin - executionChannel chan atp.ExecutionResult - signalToStep chan schema.Input // Communicates with the ATP client, not other steps. - signalFromStep chan schema.Input // Communicates with the ATP client, not other steps. - closed bool + deployerRegistry registry.Registry + stepSchema schema.Step + stageChangeHandler step.StageChangeHandler + lock *sync.Mutex + wg sync.WaitGroup + ctx context.Context + cancel context.CancelFunc + atpClient atp.Client + deployInput chan any + deployInputAvailable bool + enabledInput chan bool + enabledInputAvailable bool + runInput chan any + runInputAvailable bool + logger log.Logger + currentStage StageID + runID string // The ID associated with this execution (the workflow step ID) + deploymentType deployer.DeploymentType + source string + pluginStepID string // The ID of the step in the plugin + state step.RunningStepState + useLocalDeployer bool + localDeployer deployer.Connector + container deployer.Plugin + executionChannel chan atp.ExecutionResult + signalToStep chan schema.Input // Communicates with the ATP client, not other steps. + signalFromStep chan schema.Input // Communicates with the ATP client, not other steps. + closed bool // Store channels for sending pre-calculated signal outputs to other steps? // Store channels for receiving pre-calculated signal inputs from other steps? } @@ -640,6 +755,8 @@ func (r *runningStep) ProvideStageInput(stage string, input map[string]any) erro return r.provideDeployInput(input) case string(StageIDStarting): return r.provideStartingInput(input) + case string(StageIDEnabling): + return r.provideEnablingInput(input) case string(StageIDRunning): return nil case string(StageIDCancelled): @@ -688,10 +805,23 @@ func (r *runningStep) provideDeployInput(input map[string]any) error { return nil } +func (r *runningStep) provideEnablingInput(input map[string]any) error { + // Note: The calling function must have the step mutex locked + if r.enabledInputAvailable { + return fmt.Errorf("enabled input provided more than once") + } + // Check to make sure it's enabled. + // This is an optional field, so no input means enabled. + enabled := input["enabled"] == nil || input["enabled"] == true + r.enabledInputAvailable = true + r.enabledInput <- enabled + return nil +} + func (r *runningStep) provideStartingInput(input map[string]any) error { // Note: The calling function must have the step mutex locked if r.runInputAvailable { - return fmt.Errorf("input provided more than once") + return fmt.Errorf("starting input provided more than once") } // Ensure input is given if input["input"] == nil { @@ -817,6 +947,17 @@ func (r *runningStep) run() { } r.lock.Unlock() r.logger.Debugf("Successfully deployed container with ID '%s' for step %s/%s", container.ID(), r.runID, r.pluginStepID) + + r.logger.Debugf("Checking to see if step %s/%s is enabled", r.runID, r.pluginStepID) + enabled, err := r.enableStage() + if err != nil { + r.startFailed(err) + } + r.logger.Debugf("Step %s/%s enablement state: %t", r.runID, r.pluginStepID, enabled) + if !enabled { + r.transitionToDisabled() + return + } if err := r.startStage(container); err != nil { r.startFailed(err) return @@ -885,26 +1026,13 @@ func (r *runningStep) deployStage() (deployer.Plugin, error) { return container, nil } -func (r *runningStep) startStage(container deployer.Plugin) error { - r.logger.Debugf("Starting stage for step %s/%s", r.runID, r.pluginStepID) - atpClient := atp.NewClientWithLogger(container, r.logger) +// enableStage returns the result of whether the stage was enabled or not. +func (r *runningStep) enableStage() (bool, error) { r.lock.Lock() previousStage := string(r.currentStage) - r.currentStage = StageIDStarting - inputRecievedEarly := false - r.atpClient = atpClient - - var runInput any - select { - case runInput = <-r.runInput: - // Good. It received it immediately. - r.state = step.RunningStepStateRunning - inputRecievedEarly = true - default: // The default makes it not wait. - r.state = step.RunningStepStateWaitingForInput - } - - runInputAvailable := r.runInputAvailable + r.currentStage = StageIDEnabling + enabledInputAvailable := r.enabledInputAvailable + r.state = step.RunningStepStateWaitingForInput r.lock.Unlock() r.stageChangeHandler.OnStageChange( @@ -912,20 +1040,52 @@ func (r *runningStep) startStage(container deployer.Plugin) error { &previousStage, nil, nil, - string(StageIDStarting), - runInputAvailable, + string(StageIDEnabling), + enabledInputAvailable, &r.wg, ) + select { + case enabled := <-r.enabledInput: + return enabled, nil + case <-r.ctx.Done(): + return false, fmt.Errorf("step closed while determining enablement status") + } +} + +func (r *runningStep) startStage(container deployer.Plugin) error { + r.logger.Debugf("Starting stage for step %s/%s", r.runID, r.pluginStepID) + atpClient := atp.NewClientWithLogger(container, r.logger) + var inputReceivedEarly bool r.lock.Lock() - r.currentStage = StageIDStarting - r.logger.Debugf("Waiting for input state while starting 2.") + r.atpClient = atpClient r.lock.Unlock() + var runInput any + var newState step.RunningStepState + select { + case runInput = <-r.runInput: + // Good. It received it immediately. + newState = step.RunningStepStateRunning + inputReceivedEarly = true + default: // The default makes it not wait. + newState = step.RunningStepStateWaitingForInput + inputReceivedEarly = false + } + + enabledOutput := any(map[any]any{"enabled": true}) + // End Enabling with resolved output, and start starting + r.transitionStageWithOutput( + StageIDStarting, + newState, + schema.PointerTo("resolved"), + &enabledOutput, + ) + // First, try to non-blocking retrieve the runInput. // If not yet available, set to state waiting for input and do a blocking receive. // If it is available, continue. - if !inputRecievedEarly { + if !inputReceivedEarly { // Input is not yet available. Now waiting. r.lock.Lock() if r.state != step.RunningStepStateWaitingForInput { @@ -1030,6 +1190,25 @@ func (r *runningStep) transitionToCancelled() { r.transitionStage(StageIDCancelled, step.RunningStepStateFinished) } +func (r *runningStep) transitionToDisabled() { + r.logger.Infof("Step %s/%s disabled", r.runID, r.pluginStepID) + enabledOutput := any(map[any]any{"enabled": false}) + // End prior stage "enabling" with "resolved" output, and start "disabled" stage. + r.transitionStageWithOutput( + StageIDDisabled, + step.RunningStepStateRunning, + schema.PointerTo("resolved"), + &enabledOutput, + ) + disabledOutput := any(map[any]any{"message": fmt.Sprintf("Step %s/%s disabled", r.runID, r.pluginStepID)}) + r.completeStep( + StageIDDisabled, + step.RunningStepStateFinished, // Must set the stage to finished for the engine realize the step is done. + schema.PointerTo("output"), + &disabledOutput, + ) +} + func (r *runningStep) startFailed(err error) { r.logger.Debugf("Start failed stage for step %s/%s", r.runID, r.pluginStepID) r.transitionStage(StageIDCrashed, step.RunningStepStateRunning) diff --git a/internal/step/plugin/provider_test.go b/internal/step/plugin/provider_test.go index e695355f..84a523e0 100644 --- a/internal/step/plugin/provider_test.go +++ b/internal/step/plugin/provider_test.go @@ -250,6 +250,13 @@ func TestProvider_HappyError(t *testing.T) { "deploy_time": "abc"}}, )) + // Enable the step + assert.NoError(t, running.ProvideStageInput( + string(plugin.StageIDEnabling), + map[string]any{}, + )) + + // Start with valid input assert.NoError(t, running.ProvideStageInput( string(plugin.StageIDDeploy), map[string]any{"deploy": map[string]any{ @@ -485,6 +492,11 @@ func TestProvider_StartFail(t *testing.T) { "wait_time_ms": 50}}, )) + assert.NoError(t, running.ProvideStageInput( + string(plugin.StageIDEnabling), + map[string]any{}, + )) + // wait for message, but we don't care about its value <-handler.message diff --git a/workflow/workflow_test.go b/workflow/workflow_test.go index c12d45a4..657b91e2 100644 --- a/workflow/workflow_test.go +++ b/workflow/workflow_test.go @@ -729,13 +729,13 @@ steps: wait_time_ms: 0 deploy: deployer_name: "test-impl" - #deploy_time: 20000 # 10 ms - deploy_succeed: false + deploy_succeed: false # This step will fail due to this. wait_2: plugin: src: "n/a" deployment_type: "builtin" step: wait + # This step waits for the failing step here. wait_for: !expr $.steps.wait_1.outputs.success input: wait_time_ms: 0 @@ -1213,6 +1213,121 @@ func TestInputCancelledStepWorkflow(t *testing.T) { assert.Contains(t, err.Error(), "cannot construct any output") } +var inputDisabledStepWorkflow = ` +version: v0.2.0 +input: + root: WorkflowInput + objects: + WorkflowInput: + id: WorkflowInput + properties: + step_enabled: + type: + type_id: bool +steps: + simple_wait: + plugin: + src: "n/a" + deployment_type: "builtin" + step: wait + input: + wait_time_ms: 20 + enabled: !expr $.input.step_enabled +outputs: + success: + simple_wait_output: !expr $.steps.simple_wait.outputs.success +` + +func TestInputDisabledStepWorkflow(t *testing.T) { + // Run a workflow with one step that has its enablement state + // set by the input. The output depends on successful output + // of the step that can be disabled, so this test case also + // tests that the failure caused when it's disabled doesn't + // lead to a deadlock. + preparedWorkflow := assert.NoErrorR[workflow.ExecutableWorkflow](t)( + getTestImplPreparedWorkflow(t, inputDisabledStepWorkflow), + ) + // The workflow should pass with it enabled + outputID, _, err := preparedWorkflow.Execute(context.Background(), map[string]any{ + "step_enabled": true, + }) + assert.NoError(t, err) + assert.Equals(t, outputID, "success") + // The workflow should fail with it disabled because the output cannot be resolved. + _, _, err = preparedWorkflow.Execute(context.Background(), map[string]any{ + "step_enabled": false, + }) + assert.Error(t, err) + assert.Contains(t, err.Error(), "cannot construct any output") +} + +var dynamicDisabledStepWorkflow = ` +version: v0.2.0 +input: + root: WorkflowInput + objects: + WorkflowInput: + id: WorkflowInput + properties: + sleep_time: + type: + type_id: integer +steps: + initial_wait: + plugin: + src: "n/a" + deployment_type: "builtin" + step: wait + input: + wait_time_ms: !expr $.input.sleep_time # ms + toggled_wait: + plugin: + src: "n/a" + deployment_type: "builtin" + step: wait + input: + wait_time_ms: 0 + enabled: !expr $.steps.initial_wait.outputs.success.message == "Plugin slept for 20 ms." +outputs: + success: + initial_wait_output: !expr $.steps.initial_wait.outputs.success + toggled_wait_output: !expr $.steps.toggled_wait.outputs.success + disabled: + initial_wait_output: !expr $.steps.initial_wait.outputs.success + toggled_wait_output: !expr $.steps.toggled_wait.disabled.output +` + +func TestDelayedDisabledStepWorkflow(t *testing.T) { + // Run a workflow where the step is disabled by a value that isn't available + // at the start of the workflow; in this case the step is disabled from + // another step's output. + // This workflow has an output for success and an output for disabled. + preparedWorkflow := assert.NoErrorR[workflow.ExecutableWorkflow](t)( + getTestImplPreparedWorkflow(t, dynamicDisabledStepWorkflow), + ) + // The second step expects a 20ms sleep/wait. + // Pass with a 20ms input. + outputID, _, err := preparedWorkflow.Execute(context.Background(), map[string]any{ + "sleep_time": 20, + }) + assert.NoError(t, err) + assert.Equals(t, outputID, "success") + // Fail with a non-20ms input. + outputID, outputData, err := preparedWorkflow.Execute(context.Background(), map[string]any{ + "sleep_time": 19, + }) + assert.NoError(t, err) + assert.Equals(t, outputID, "disabled") + assert.InstanceOf[map[any]any](t, outputData) + outputMap := outputData.(map[any]any) + assert.MapContainsKey(t, "toggled_wait_output", outputMap) + toggledOutput := outputMap["toggled_wait_output"] + assert.InstanceOf[map[any]any](t, toggledOutput) + toggledOutputMap := toggledOutput.(map[any]any) + assert.MapContainsKey(t, "message", toggledOutputMap) + assert.Equals(t, toggledOutputMap["message"], "Step toggled_wait/wait disabled") +} + func createTestExecutableWorkflow(t *testing.T, workflowStr string, workflowCtx map[string][]byte) (workflow.ExecutableWorkflow, error) { logConfig := log.Config{ Level: log.LevelDebug,