Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use dependency System: Part 2; Marking as unresolvable #197

Merged
merged 24 commits into from
Aug 16, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ go 1.21

require (
go.arcalot.io/assert v1.8.0
go.arcalot.io/dgraph v1.4.1
go.arcalot.io/dgraph v1.5.0
go.arcalot.io/lang v1.1.0
go.arcalot.io/log/v2 v2.1.0
go.flow.arcalot.io/deployer v0.6.1
go.flow.arcalot.io/dockerdeployer v0.7.2
go.flow.arcalot.io/expressions v0.4.3
go.flow.arcalot.io/kubernetesdeployer v0.9.3
go.flow.arcalot.io/pluginsdk v0.12.1
go.flow.arcalot.io/pluginsdk v0.12.2
go.flow.arcalot.io/podmandeployer v0.11.2
go.flow.arcalot.io/pythondeployer v0.6.1
go.flow.arcalot.io/testdeployer v0.6.1
Expand All @@ -29,7 +29,7 @@ require (
github.com/docker/go-units v0.5.0 // indirect
github.com/emicklei/go-restful/v3 v3.12.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fxamacker/cbor/v2 v2.6.0 // indirect
github.com/fxamacker/cbor/v2 v2.7.0 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-openapi/jsonpointer v0.21.0 // indirect
Expand Down
12 changes: 6 additions & 6 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ github.com/evanphx/json-patch v4.12.0+incompatible h1:4onqiflcdA9EOZ4RxV643DvftH
github.com/evanphx/json-patch v4.12.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/fxamacker/cbor/v2 v2.6.0 h1:sU6J2usfADwWlYDAFhZBQ6TnLFBHxgesMrQfQgk1tWA=
github.com/fxamacker/cbor/v2 v2.6.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
Expand Down Expand Up @@ -123,8 +123,8 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.arcalot.io/assert v1.8.0 h1:hGcHMPncQXwQvjj7MbyOu2gg8VIBB00crUJZpeQOjxs=
go.arcalot.io/assert v1.8.0/go.mod h1:nNmWPoNUHFyrPkNrD2aASm5yPuAfiWdB/4X7Lw3ykHk=
go.arcalot.io/dgraph v1.4.1 h1:y/lhJ68WzNUDR2BYSk7tZAZhVokts92svcrJLbK4Ebo=
go.arcalot.io/dgraph v1.4.1/go.mod h1:+Kxc81utiihMSmC1/ttSPGLDlWPpvgOpNxSFmIDPxFM=
go.arcalot.io/dgraph v1.5.0 h1:6cGlxLzmmehJoD/nj0Hkql7uh90EU0A0GtZhGYkr28M=
go.arcalot.io/dgraph v1.5.0/go.mod h1:+Kxc81utiihMSmC1/ttSPGLDlWPpvgOpNxSFmIDPxFM=
go.arcalot.io/exex v0.2.0 h1:u44pjwPwcH57TF8knhaqVZP/1V/KbnRe//pKzMwDpLw=
go.arcalot.io/exex v0.2.0/go.mod h1:5zlFr+7vOQNZKYCNOEDdsad+z/dlvXKs2v4kG+v+bQo=
go.arcalot.io/lang v1.1.0 h1:ugglRKpd3qIMkdghAjKJxsziIgHm8QpxrzZPSXoa08I=
Expand All @@ -139,8 +139,8 @@ go.flow.arcalot.io/expressions v0.4.3 h1:0BRRghutHp0sctsITHe/A1le0yYiJtKNTxm27T+
go.flow.arcalot.io/expressions v0.4.3/go.mod h1:UORX78N4ep71wOzNXdIo/UY+6SdDD0id0mvuRNEQMeM=
go.flow.arcalot.io/kubernetesdeployer v0.9.3 h1:XKiqmCqXb6ZLwP5IQTAKS/gJHpq0Ub/yEjCfgAwQF2A=
go.flow.arcalot.io/kubernetesdeployer v0.9.3/go.mod h1:DtB6HR7HBt/HA1vME0faIpOQ/lhfBJjL6OAGgT3Bu/Q=
go.flow.arcalot.io/pluginsdk v0.12.1 h1:HlWo1+Fn13u0EoeH9KpcWzdn1miqucNEvj0cirFhcA8=
go.flow.arcalot.io/pluginsdk v0.12.1/go.mod h1:J0RYsfD6g1WKMVSbLGZR/ZJBVdcjt+bOuKoOvnNPpy4=
go.flow.arcalot.io/pluginsdk v0.12.2 h1:ue+Ax1c3Bw0TmnuAEZ4r19D6ZA4dns9rP3NO4cvmTec=
go.flow.arcalot.io/pluginsdk v0.12.2/go.mod h1:U+7mtXo4aT3YNn9fNWZ3b5RBvXttm/iocc2/P4MGa88=
go.flow.arcalot.io/podmandeployer v0.11.2 h1:aqrHaNaCXYDREqDJpKhVeVIIZPiPld5SDvnUcc0Tjiw=
go.flow.arcalot.io/podmandeployer v0.11.2/go.mod h1:70+9M6eVQa0EEynDZ720P3AEzXt1gZto2lvoMlyjuzo=
go.flow.arcalot.io/pythondeployer v0.6.1 h1:IyaA9BVfHJ2fhC+fNfT6VicrtRGFlZOlSaAVGGPwo1E=
Expand Down
3 changes: 3 additions & 0 deletions internal/step/dummy/provider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ type stageChangeHandler struct {
message chan string
}

func (s *stageChangeHandler) OnStepStageFailure(_ step.RunningStep, _ string, _ *sync.WaitGroup, _ error) {
}

func (s *stageChangeHandler) OnStageChange(_ step.RunningStep, _ *string, _ *string, _ *any, _ string, _ bool, _ *sync.WaitGroup) {

}
Expand Down
53 changes: 50 additions & 3 deletions internal/step/plugin/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ func (p *pluginProvider) LoadSchema(inputs map[string]any, _ map[string][]byte)
requestedDeploymentType, pluginSource, err)
}
// Set up the ATP connection
transport := atp.NewClientWithLogger(pluginConnector, p.logger)
transport := atp.NewClientWithLogger(pluginConnector, p.logger.WithLabel("source", "atp-client"))
// Read the schema information
s, err := transport.ReadSchema()
if err != nil {
Expand Down Expand Up @@ -688,6 +688,7 @@ func (r *runnableStep) Start(input map[string]any, runID string, stageChangeHand
runID: runID,
}

s.wg.Add(1) // Wait for the run to finish before closing.
dbutenhof marked this conversation as resolved.
Show resolved Hide resolved
go s.run()

return s, nil
Expand Down Expand Up @@ -922,8 +923,8 @@ func (r *runningStep) closeComponents(closeATP bool) error {
return nil
}

// Note: Caller must add 1 to the waitgroup before calling.
func (r *runningStep) run() {
r.wg.Add(1) // Wait for the run to finish before closing.
defer func() {
r.cancel() // Close before WaitGroup done
r.wg.Done() // Done. Close may now exit.
Expand Down Expand Up @@ -958,6 +959,10 @@ func (r *runningStep) run() {
r.transitionToDisabled()
return
}

// It's enabled, so the disabled stage will not occur.
r.stageChangeHandler.OnStepStageFailure(r, string(StageIDDisabled), &r.wg, err)
dbutenhof marked this conversation as resolved.
Show resolved Hide resolved

if err := r.startStage(container); err != nil {
r.startFailed(err)
return
Expand Down Expand Up @@ -1055,7 +1060,7 @@ func (r *runningStep) enableStage() (bool, error) {

func (r *runningStep) startStage(container deployer.Plugin) error {
r.logger.Debugf("Starting stage for step %s/%s", r.runID, r.pluginStepID)
atpClient := atp.NewClientWithLogger(container, r.logger)
atpClient := atp.NewClientWithLogger(container, r.logger.WithLabel("source", "atp-client"))
var inputReceivedEarly bool
r.lock.Lock()
r.atpClient = atpClient
Expand Down Expand Up @@ -1118,9 +1123,12 @@ func (r *runningStep) startStage(container deployer.Plugin) error {
return fmt.Errorf("schema mismatch between local and remote deployed plugin in step %s/%s, unserializing input failed (%w)", r.runID, r.pluginStepID, err)
}

r.wg.Add(1)

// Runs the ATP client in a goroutine in order to wait for it.
// On context done, the deployer has 30 seconds before it will error out.
go func() {
defer r.wg.Done()
result := r.atpClient.Execute(
schema.Input{RunID: r.runID, ID: r.pluginStepID, InputData: runInput},
r.signalToStep,
Expand Down Expand Up @@ -1169,6 +1177,27 @@ func (r *runningStep) runStage() error {
return nil
}

func (r *runningStep) markStageFailures(firstStage StageID, err error) {
switch firstStage {
case StageIDEnabling:
r.stageChangeHandler.OnStepStageFailure(r, string(StageIDEnabling), &r.wg, err)
fallthrough
case StageIDDisabled:
r.stageChangeHandler.OnStepStageFailure(r, string(StageIDDisabled), &r.wg, err)
fallthrough
case StageIDStarting:
r.stageChangeHandler.OnStepStageFailure(r, string(StageIDStarting), &r.wg, err)
fallthrough
case StageIDRunning:
r.stageChangeHandler.OnStepStageFailure(r, string(StageIDRunning), &r.wg, err)
fallthrough
case StageIDOutput:
r.stageChangeHandler.OnStepStageFailure(r, string(StageIDOutput), &r.wg, err)
default:
panic("unknown StageID")
}
}

func (r *runningStep) deployFailed(err error) {
r.logger.Debugf("Deploy failed stage for step %s/%s", r.runID, r.pluginStepID)
r.transitionStage(StageIDDeployFailed, step.RunningStepStateRunning)
Expand All @@ -1180,6 +1209,9 @@ func (r *runningStep) deployFailed(err error) {
Error: err.Error(),
})
r.completeStep(StageIDDeployFailed, step.RunningStepStateFinished, &outputID, &output)
// If deployment fails, enabling, disabled, starting, running, and output cannot occur.
err = fmt.Errorf("deployment failed for step %s/%s", r.runID, r.pluginStepID)
r.markStageFailures(StageIDEnabling, err)
}

func (r *runningStep) transitionToCancelled() {
Expand All @@ -1188,6 +1220,12 @@ func (r *runningStep) transitionToCancelled() {
r.transitionStage(StageIDCancelled, step.RunningStepStateRunning)
// Cancelled currently has no output.
r.transitionStage(StageIDCancelled, step.RunningStepStateFinished)

// This is called after deployment. So everything after deployment cannot occur.
err := fmt.Errorf("step %s/%s cancelled", r.runID, r.pluginStepID)
// Note: This function is only called if it's cancelled during the deployment phase.
// If that changes, the stage IDs marked as failed need to be changed.
r.markStageFailures(StageIDEnabling, err)
}

func (r *runningStep) transitionToDisabled() {
Expand All @@ -1207,6 +1245,10 @@ func (r *runningStep) transitionToDisabled() {
schema.PointerTo("output"),
&disabledOutput,
)

err := fmt.Errorf("step %s/%s disabled", r.runID, r.pluginStepID)
r.markStageFailures(StageIDStarting, err)

}

func (r *runningStep) startFailed(err error) {
Expand All @@ -1221,6 +1263,9 @@ func (r *runningStep) startFailed(err error) {
})

r.completeStep(StageIDCrashed, step.RunningStepStateFinished, &outputID, &output)

r.markStageFailures(StageIDRunning, err)

}

func (r *runningStep) runFailed(err error) {
Expand All @@ -1234,6 +1279,8 @@ func (r *runningStep) runFailed(err error) {
Output: err.Error(),
})
r.completeStep(StageIDCrashed, step.RunningStepStateFinished, &outputID, &output)

r.markStageFailures(StageIDOutput, err)
}

// TransitionStage transitions the stage to the specified stage, and the state to the specified state.
Expand Down
12 changes: 12 additions & 0 deletions internal/step/plugin/provider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ func (s *deployFailStageChangeHandler) OnStepComplete(
s.message <- message
}

func (s *deployFailStageChangeHandler) OnStepStageFailure(_ step.RunningStep, _ string, _ *sync.WaitGroup, _ error) {

}

type startFailStageChangeHandler struct {
message chan string
}
Expand Down Expand Up @@ -79,6 +83,10 @@ func (s *startFailStageChangeHandler) OnStepComplete(
s.message <- message
}

func (s *startFailStageChangeHandler) OnStepStageFailure(_ step.RunningStep, _ string, _ *sync.WaitGroup, _ error) {

}

type stageChangeHandler struct {
message chan string
}
Expand Down Expand Up @@ -111,6 +119,10 @@ func (s *stageChangeHandler) OnStepComplete(
s.message <- message
}

func (s *stageChangeHandler) OnStepStageFailure(_ step.RunningStep, _ string, _ *sync.WaitGroup, _ error) {

}

func TestProvider_MissingDeployer(t *testing.T) {
logger := log.New(
log.Config{
Expand Down
9 changes: 9 additions & 0 deletions internal/step/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,15 @@ type StageChangeHandler interface {
previousStageOutput *any,
wg *sync.WaitGroup,
)

// OnStepStageFailure is called when it becomes known that the step's stage will not produce an output.
// The error is optional.
OnStepStageFailure(
step RunningStep,
stage string,
wg *sync.WaitGroup,
err error,
)
}

// RunnableStep is a step that already has a schema and can be run.
Expand Down
37 changes: 14 additions & 23 deletions workflow/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,10 @@ package workflow

import (
"fmt"
"regexp"
"strings"

"go.arcalot.io/dgraph"
"go.flow.arcalot.io/engine/internal/step"
"go.flow.arcalot.io/pluginsdk/schema"
"regexp"
)

// Workflow is the primary data structure describing workflows.
Expand Down Expand Up @@ -229,30 +227,23 @@ type ErrNoMorePossibleSteps struct {

// Error returns an explanation on why the error happened.
func (e ErrNoMorePossibleSteps) Error() string {
var outputsUnmetDependencies []string //nolint:prealloc
for _, node := range e.dag.ListNodes() {
if node.Item().Kind != DAGItemKindOutput {
continue
}
var unmetDependencies []string
inbound, err := node.ListInboundConnections()
if err != nil {
panic(fmt.Errorf("failed to fetch output node inbound dependencies (%w)", err))
}
for i := range inbound {
unmetDependencies = append(unmetDependencies, i)
}
outputsUnmetDependencies = append(
outputsUnmetDependencies,
fmt.Sprintf("%s: %s", node.Item().OutputID, strings.Join(unmetDependencies, ", ")),
)
}
return fmt.Sprintf(
"no steps running, no more executable steps, cannot construct any output (outputs have the following dependencies: %s)",
strings.Join(outputsUnmetDependencies, "; "),
"no steps running, no more executable steps; cannot construct any output." +
" this is the fallback system, indicating a failure of the output resolution system",
)
}

// ErrNoMorePossibleOutputs indicates that the workflow has terminated due to it being impossible to resolve an output.
// This means that steps that the output(s) depended on did not have the required results.
type ErrNoMorePossibleOutputs struct {
dag dgraph.DirectedGraph[*DAGItem]
}

// Error returns an explanation on why the error happened.
func (e ErrNoMorePossibleOutputs) Error() string {
return "all outputs marked as unresolvable"
}

// ErrInvalidState indicates that the workflow failed due to an invalid state.
type ErrInvalidState struct {
processingSteps int
Expand Down
10 changes: 5 additions & 5 deletions workflow/model_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
"testing"
)

var versionExp = "v0.2.0"
var testVersionExp = "v0.2.0"
var inputExp = map[string]any{
"root": "RootObject",
"objects": map[string]any{
Expand All @@ -24,9 +24,9 @@ var stepsExp = map[string]any{
"input": map[string]any{
"wait_time_ms": 1}},
}
var outputID = "success"
var testExpectedOutputID = "success"
var outputsExp = map[string]any{
outputID: "!expr $.steps.long_wait.outputs",
testExpectedOutputID: "!expr $.steps.long_wait.outputs",
}
var outputSchemaRootID = "RootObjectOut"
var stepOutputSchemaInput = map[string]any{
Expand All @@ -42,10 +42,10 @@ var stepOutputSchemaInput = map[string]any{
}}}}}},
}
var outputSchemaInput = map[string]any{
outputID: stepOutputSchemaInput,
testExpectedOutputID: stepOutputSchemaInput,
}
var workflowSchemaInput = map[string]any{
"version": versionExp,
"version": testVersionExp,
"input": inputExp,
"steps": stepsExp,
"outputs": outputsExp,
Expand Down
Loading
Loading