Skip to content

Commit

Permalink
simpler and accurate dependency cycle detection (#14)
Browse files Browse the repository at this point in the history
* simpler and accurate dependency cycle detection

* clarify comment

* pr feedback and fix potential infnite loop
  • Loading branch information
mmoghaddam385 authored Oct 20, 2021
1 parent c43b7ba commit 8f15175
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 63 deletions.
82 changes: 31 additions & 51 deletions schemas/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,65 +170,45 @@ func (g dependencyGraph) findUnblockedErrands() []*Errand {
}

// checkForDependencyCycles returns an error if it finds a cyclic dependency in the dependencyGraph.
// It determines cycles by doing the following:
// For each node N, perform a depth first traversal of G starting at N.
// If we ever encounter N again during the traversal, we've detected a cycle.
func (g dependencyGraph) checkForDependencyCycles() error {
independentErrands := g.findUnblockedErrands()
if len(independentErrands) == 0 {
return fmt.Errorf("dependency cycle found; all errands have dependencies")
}

// Prime the visit stack with the first independent errand and remove it from the list.
toVisitStack := []*Errand{independentErrands[0]}
independentErrands = independentErrands[1:]

// visitedSet keeps track of all the errands we've already seen so we can ensure we've seen all of them
visitedSet := make(map[string]struct{}, len(g.dependencyToDependents))

// currentTreeVisitedSet keeps track of the errands we've seen in the current tree traversal.
// This set gets cleared if we run out of errands in a tree and have to start at a new root node.
// It's possible that two trees share some nodes without forming a cycle.
// For example, consider this graph:
//
// A --> B --|
// |--> C --> E
// D --|
//
// Both A and D are root nodes whose trees both contain C and E, however they are not in a cycle.
currentTreeVisitedSet := make(map[string]struct{}, len(g.dependencyToDependents))

for len(toVisitStack) > 0 {
topOfStackIndex := len(toVisitStack) - 1
errand := toVisitStack[topOfStackIndex]
toVisitStack = toVisitStack[:topOfStackIndex] // Pop off the last value from the stack

// If we've seen this errand already in this tree, we found a cycle!
if _, exists := currentTreeVisitedSet[errand.Name]; exists {
return fmt.Errorf("dependency cycle found involving '%s'", errand.Name)
}

// Add this errand to the visitedSet
visitedSet[errand.Name] = struct{}{}
currentTreeVisitedSet[errand.Name] = struct{}{}

// Add all of this errand's dependencies to the visit stack
toVisitStack = append(toVisitStack, g.dependencyToDependents[errand.Name]...)

// If our visit stack is empty, we've exhausted the nodes in this tree without finding a cycle.
// If we have more independent (root) errands, add the next one to the visit stack and reset our current tree visited set.
if len(toVisitStack) == 0 && len(independentErrands) > 0 {
toVisitStack = independentErrands[0:1]
independentErrands = independentErrands[1:]
currentTreeVisitedSet = make(map[string]struct{})
for _, currentHomeErrand := range g.errands {
toVisitStack := []*Errand{currentHomeErrand}
currentTreeVisitedSet := make(map[string]struct{}, len(g.dependencyToDependents))

// While we have nodes to visit, keep traversing
for len(toVisitStack) > 0 {
topOfStackIndex := len(toVisitStack) - 1
errand := toVisitStack[topOfStackIndex]
toVisitStack = toVisitStack[:topOfStackIndex] // Pop off the last value from the stack

// If we've seen this node before, we may have detected a cycle.
if _, exists := currentTreeVisitedSet[errand.Name]; exists {
// If the errand we've seen before is the currentHomeErrand then we know for certain
// that we found a cycle.
if errand.Name == currentHomeErrand.Name {
return fmt.Errorf("dependency cycle found involving '%s'", errand.Name)
}

// If we've already visited this node, but it's not the start node, we _might_ have
// found a cycle, but we can't be sure until we traverse starting from that node.
// Until then, don't continue to add dependencies from this duplicate node to the visit stack.
} else {
// If we haven't seen this node before, Add add it to the visited set
currentTreeVisitedSet[errand.Name] = struct{}{}

// Add all of this errand's dependencies to the visit stack
toVisitStack = append(toVisitStack, g.dependencyToDependents[errand.Name]...)
}
}
}

// If we visited fewer nodes than there were in the graph, it means our independent nodes didn't
// lead us everywhere. That's only possible if there's a strongly connected component in the graph somewhere.
// Consider the example: A <--> B C
// In this graph, C is the only independent errand and has no connection to A or C,
// Neither A nor B are independent so we'll never visit them.
if len(visitedSet) < len(g.dependencyToDependents) {
return fmt.Errorf("dependency cycle found; there is a strongly connected subgraph which was not visited")
}

return nil
}
25 changes: 13 additions & 12 deletions schemas/pipeline_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
func TestPipelineValidate(t *testing.T) {
t.Run("no errands", func(t *testing.T) {
p := Pipeline{Name: "no errands!"}
assert.EqualError(t, p.Validate(), "no errands specified in pipeline")
assert.Error(t, p.Validate())
})

t.Run("errands with duplicate name", func(t *testing.T) {
Expand All @@ -22,7 +22,7 @@ func TestPipelineValidate(t *testing.T) {
}},
}

assert.EqualError(t, p.Validate(), "duplicate name in errands list: errand")
assert.Error(t, p.Validate())
})

t.Run("errand with self dependency", func(t *testing.T) {
Expand All @@ -35,7 +35,7 @@ func TestPipelineValidate(t *testing.T) {
}},
}

assert.EqualError(t, p.Validate(), "errand cannot depend on itself: single errand")
assert.Error(t, p.Validate())
})

t.Run("dependency with invalid target", func(t *testing.T) {
Expand All @@ -48,7 +48,7 @@ func TestPipelineValidate(t *testing.T) {
}},
}

assert.EqualError(t, p.Validate(), "dependency references unknown errand name: not a real errand")
assert.Error(t, p.Validate())
})

t.Run("dependency with invalid dependsOn", func(t *testing.T) {
Expand All @@ -61,7 +61,7 @@ func TestPipelineValidate(t *testing.T) {
}},
}

assert.EqualError(t, p.Validate(), "dependency references unknown errand name: not a real dependency")
assert.Error(t, p.Validate())
})

t.Run("simple dependency cycle | 2 errands", func(t *testing.T) {
Expand All @@ -80,7 +80,7 @@ func TestPipelineValidate(t *testing.T) {
},
}

assert.EqualError(t, p.Validate(), "dependency cycle found; all errands have dependencies")
assert.Error(t, p.Validate())
})

t.Run("strongly connected subgraph cycle", func(t *testing.T) {
Expand All @@ -100,7 +100,7 @@ func TestPipelineValidate(t *testing.T) {
},
}

assert.EqualError(t, p.Validate(), "dependency cycle found; there is a strongly connected subgraph which was not visited")
assert.Error(t, p.Validate())
})

t.Run("single graph with cycle", func(t *testing.T) {
Expand All @@ -121,7 +121,7 @@ func TestPipelineValidate(t *testing.T) {
},
}

assert.EqualError(t, p.Validate(), "dependency cycle found involving 'B'")
assert.Error(t, p.Validate())
})

t.Run("multiple sub-graphs one with cycle", func(t *testing.T) {
Expand All @@ -148,7 +148,7 @@ func TestPipelineValidate(t *testing.T) {
},
}

assert.EqualError(t, p.Validate(), "dependency cycle found involving 'B'")
assert.Error(t, p.Validate())
})

t.Run("multiple sub-graphs happy path", func(t *testing.T) {
Expand Down Expand Up @@ -229,9 +229,9 @@ func TestPipelineValidate(t *testing.T) {

t.Run("single graph happy path | converging and diverging", func(t *testing.T) {
/*
A --> B --| |--> E
|--> C --|
D --| |--> F --> G
|--> B --| |--> E
A -->| |--> C --|
|--> D --| |--> F --> G
*/
p := Pipeline{
Name: "single graph with cycle",
Expand All @@ -245,6 +245,7 @@ func TestPipelineValidate(t *testing.T) {
{Name: "G"},
},
Dependencies: []*PipelineDependency{
{Target: "D", DependsOn: "A"},
{Target: "B", DependsOn: "A"},
{Target: "C", DependsOn: "B"},
{Target: "C", DependsOn: "D"},
Expand Down

0 comments on commit 8f15175

Please sign in to comment.