Skip to content

Commit

Permalink
fix(GenerateDAG): new algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
Antoine Gelloz committed Jun 3, 2024
1 parent 5a76192 commit 0b9d64d
Showing 1 changed file with 193 additions and 155 deletions.
348 changes: 193 additions & 155 deletions pkg/dib/generate_dag.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,205 +28,243 @@ const (
// GenerateDAG discovers and parses all Dockerfiles at a given path,
// and generates the DAG representing the relationships between images.
func GenerateDAG(buildPath, registryPrefix, customHashListPath string, buildArgs map[string]string) (*dag.DAG, error) {
var allFiles []string
cache := make(map[string]*dag.Node)
allParents := make(map[string][]dockerfile.ImageRef)
err := filepath.Walk(buildPath, func(filePath string, info os.FileInfo, err error) error {
if err != nil {
nodes := make(map[string]*dag.Node)
if err := filepath.WalkDir(buildPath, func(filePath string, dirEntry os.DirEntry, err error) error {
switch {
case err != nil:

Check warning on line 34 in pkg/dib/generate_dag.go

View check run for this annotation

Codecov / codecov/patch

pkg/dib/generate_dag.go#L34

Added line #L34 was not covered by tests
return err
}
if !info.IsDir() {
allFiles = append(allFiles, filePath)
}

if dockerfile.IsDockerfile(filePath) {
dckfile, err := dockerfile.ParseDockerfile(filePath)
if err != nil {
case filePath == buildPath:
return nil
case dirEntry.IsDir():
if err := filepath.WalkDir(filePath, func(otherFile string, dirEntry os.DirEntry, err error) error {
switch {
case err != nil:
return err

Check warning on line 42 in pkg/dib/generate_dag.go

View check run for this annotation

Codecov / codecov/patch

pkg/dib/generate_dag.go#L41-L42

Added lines #L41 - L42 were not covered by tests
case dirEntry.IsDir():
return nil
default:
if path.Base(otherFile) == dockerignore {
// We ignore dockerignore files for simplicity
// In the real world, this file should not be ignored, but it
// helps us in managing refactoring
return nil
}
if _, ok := nodes[filePath]; !ok {
nodes[filePath] = dag.NewNode(nil)
}
nodes[filePath].AddFile(otherFile)
return nil
}
}); err != nil {

Check warning on line 58 in pkg/dib/generate_dag.go

View check run for this annotation

Codecov / codecov/patch

pkg/dib/generate_dag.go#L58

Added line #L58 was not covered by tests
return err
}

skipBuild, hasSkipLabel := dckfile.Labels["skipbuild"]
if hasSkipLabel && skipBuild == "true" {
return nil
}
imageShortName, hasNameLabel := dckfile.Labels["name"]
if !hasNameLabel {
return fmt.Errorf("missing label \"name\" in Dockerfile at path \"%s\"", filePath)
}
img := &dag.Image{
Name: fmt.Sprintf("%s/%s", registryPrefix, imageShortName),
ShortName: imageShortName,
Dockerfile: dckfile,
case dockerfile.IsDockerfile(filePath):
nodes, err = processDockerfile(filePath, registryPrefix, nodes)
if err != nil {
return fmt.Errorf("could not process Dockerfile %q: %w", filePath, err)
}
default:
nodes = processFile(filePath, nodes)
}
return nil
}); err != nil {
return nil, err
}

extraTagsLabel, hasLabel := img.Dockerfile.Labels["dib.extra-tags"]
if hasLabel {
img.ExtraTags = append(img.ExtraTags, strings.Split(extraTagsLabel, ",")...)
}
for key, node := range nodes {
skipBuild, hasSkipLabel := node.Image.Dockerfile.Labels["skipbuild"]
if hasSkipLabel && skipBuild == "true" {
delete(nodes, key)
}
}

useCustomHashList, hasLabel := img.Dockerfile.Labels["dib.use-custom-hash-list"]
if hasLabel && useCustomHashList == "true" {
img.UseCustomHashList = true
}
graph := buildGraph(nodes)

ignorePatterns, err := build.ReadDockerignore(path.Dir(filePath))
if err != nil {
return fmt.Errorf("could not read ignore patterns: %w", err)
}
img.IgnorePatterns = ignorePatterns
if err := computeGraphHashes(graph, customHashListPath, buildArgs); err != nil {
return nil, fmt.Errorf("could not compute graph hashes: %w", err)

Check warning on line 84 in pkg/dib/generate_dag.go

View check run for this annotation

Codecov / codecov/patch

pkg/dib/generate_dag.go#L84

Added line #L84 was not covered by tests
}

if n, ok := cache[img.Name]; ok {
return fmt.Errorf("duplicate image name %q: previous file was %q",
img.Name, path.Join(n.Image.Dockerfile.ContextPath, n.Image.Dockerfile.Filename))
}
return graph, nil
}

allParents[img.Name] = dckfile.From
cache[img.Name] = dag.NewNode(img)
}
return nil
})
func processDockerfile(filePath, registryPrefix string, nodes map[string]*dag.Node) (map[string]*dag.Node, error) {
dckfile, err := dockerfile.ParseDockerfile(filePath)
if err != nil {
return nil, err
}

// Fill parents for each image, for simplicity of use in other functions
for name, parents := range allParents {
for _, parent := range parents {
node, ok := cache[parent.Name]
if !ok {
continue
}
shortName, hasNameLabel := dckfile.Labels["name"]
if !hasNameLabel {
return nil, fmt.Errorf("missing label \"name\" in Dockerfile at path %q", filePath)

Check warning on line 98 in pkg/dib/generate_dag.go

View check run for this annotation

Codecov / codecov/patch

pkg/dib/generate_dag.go#L98

Added line #L98 was not covered by tests
}

// Check that children does not already exist to avoid duplicates.
childAlreadyExists := false
for _, child := range node.Children() {
if child.Image.Name == name {
childAlreadyExists = true
}
}
imageName := fmt.Sprintf("%s/%s", registryPrefix, shortName)

if childAlreadyExists {
continue
}
var extraTags []string
value, hasLabel := dckfile.Labels["dib.extra-tags"]
if hasLabel {
extraTags = strings.Split(value, ",")
}

node.AddChild(cache[name])
}
useCustomHashList := false
value, hasLabel = dckfile.Labels["dib.use-custom-hash-list"]
if hasLabel && value == "true" {
useCustomHashList = true
}

graph := &dag.DAG{}
// If an image has no parents in the DAG, we consider it a root image
for name, img := range cache {
if len(img.Parents()) == 0 {
graph.AddNode(cache[name])
ignorePatterns, err := build.ReadDockerignore(dckfile.ContextPath)
if err != nil {
return nil, fmt.Errorf("could not read ignore patterns: %w", err)

Check warning on line 117 in pkg/dib/generate_dag.go

View check run for this annotation

Codecov / codecov/patch

pkg/dib/generate_dag.go#L117

Added line #L117 was not covered by tests
}

for _, node := range nodes {
if node.Image != nil && node.Image.Name == imageName {
return nil, fmt.Errorf("duplicate image name %q found: previous file was %q",
imageName, path.Join(node.Image.Dockerfile.ContextPath, node.Image.Dockerfile.Filename))
}
}

if err := generateHashes(graph, allFiles, customHashListPath, buildArgs); err != nil {
return nil, err
if _, ok := nodes[dckfile.ContextPath]; !ok {
nodes[dckfile.ContextPath] = dag.NewNode(nil)

Check warning on line 128 in pkg/dib/generate_dag.go

View check run for this annotation

Codecov / codecov/patch

pkg/dib/generate_dag.go#L128

Added line #L128 was not covered by tests
}
nodes[dckfile.ContextPath].Image = &dag.Image{
Name: imageName,
ShortName: shortName,
ExtraTags: extraTags,
Dockerfile: dckfile,
IgnorePatterns: ignorePatterns,
UseCustomHashList: useCustomHashList,
}

return graph, nil
return nodes, nil
}

func generateHashes(graph *dag.DAG, allFiles []string, customHashListPath string, buildArgs map[string]string) error {
customHumanizedHashList, err := LoadCustomHashList(customHashListPath)
if err != nil {
return fmt.Errorf("could not load custom humanized hash list: %w", err)
func processFile(filePath string, nodes map[string]*dag.Node) map[string]*dag.Node {
if path.Base(filePath) == dockerignore {
// We ignore dockerignore files for simplicity
// In the real world, this file should not be ignored, but it
// helps us in managing refactoring
return nodes
}

fileBelongsTo := map[string]*dag.Node{}
for _, file := range allFiles {
fileBelongsTo[file] = nil
dirPath := path.Dir(filePath)
if _, ok := nodes[dirPath]; !ok {
nodes[dirPath] = dag.NewNode(nil)

Check warning on line 152 in pkg/dib/generate_dag.go

View check run for this annotation

Codecov / codecov/patch

pkg/dib/generate_dag.go#L152

Added line #L152 was not covered by tests
}

// First, we do a depth-first search in the image graph to map every file the image they belong to.
// We start from the most specific image paths (children of children of children...), and we get back up
// to parent images, to avoid false-positive and false-negative matches.
// Files matching any pattern in the .dockerignore file are ignored.
graph.WalkInDepth(func(node *dag.Node) {
for _, file := range allFiles {
if !strings.HasPrefix(file, node.Image.Dockerfile.ContextPath+"/") {
// The current file is not lying in the current image build context, nor in a subdirectory.
continue
}
alreadyAdded := false
for _, file := range nodes[dirPath].Files {
if file == filePath {
alreadyAdded = true
}
}
if !alreadyAdded {
nodes[dirPath].AddFile(filePath)

Check warning on line 162 in pkg/dib/generate_dag.go

View check run for this annotation

Codecov / codecov/patch

pkg/dib/generate_dag.go#L162

Added line #L162 was not covered by tests
}

if fileBelongsTo[file] != nil {
// The current file has already been assigned to an image, most likely to a child image.
continue
}
return nodes
}

if path.Base(file) == dockerignore {
// We ignore dockerignore file itself for simplicity
// In the real world, this file should not be ignored but it
// helps us in managing refactoring
continue
func buildGraph(nodes map[string]*dag.Node) *dag.DAG {
for _, node := range nodes {
if node.Image == nil {
continue

Check warning on line 171 in pkg/dib/generate_dag.go

View check run for this annotation

Codecov / codecov/patch

pkg/dib/generate_dag.go#L171

Added line #L171 was not covered by tests
}
for _, parent := range node.Image.Dockerfile.From {
for _, parentNode := range nodes {
if parentNode.Image == nil {
continue

Check warning on line 176 in pkg/dib/generate_dag.go

View check run for this annotation

Codecov / codecov/patch

pkg/dib/generate_dag.go#L176

Added line #L176 was not covered by tests
}
if parentNode.Image.Name == parent.Name {
parentNode.AddChild(node)
}
}
}
}

graph := &dag.DAG{}
// If an image has no parents in the DAG, we can consider it root
for name, img := range nodes {
if len(img.Parents()) == 0 {
graph.AddNode(nodes[name])
}
}

return graph
}

func computeGraphHashes(graph *dag.DAG, customHashListPath string, buildArgs map[string]string) error {
customHumanizedHashList, err := LoadCustomHashList(customHashListPath)
if err != nil {
return fmt.Errorf("could not load custom humanized hash list: %w", err)

Check warning on line 199 in pkg/dib/generate_dag.go

View check run for this annotation

Codecov / codecov/patch

pkg/dib/generate_dag.go#L199

Added line #L199 was not covered by tests
}

if isFileIgnored(node, file) {
// The current file matches a pattern in the dockerignore file
continue
currNodes := graph.Nodes()
for len(currNodes) > 0 {
for _, node := range currNodes {
if err := computeNodeHash(node, customHumanizedHashList, buildArgs); err != nil {
return fmt.Errorf("could not compute hash for image %q: %w", node.Image.Name, err)

Check warning on line 206 in pkg/dib/generate_dag.go

View check run for this annotation

Codecov / codecov/patch

pkg/dib/generate_dag.go#L206

Added line #L206 was not covered by tests
}
}

// If we reach here, the file is part of the current image's context, we mark it as so.
fileBelongsTo[file] = node
node.AddFile(file)
nextNodes := []*dag.Node{}
for _, currNode := range currNodes {
nextNodes = append(nextNodes, currNode.Children()...)
}
})

for {
needRepass := false
err := graph.WalkErr(func(node *dag.Node) error {
var parentHashes []string
for _, parent := range node.Parents() {
if parent.Image.Hash == "" {
// At least one of the parent image has not been processed yet, we'll need to do an other pass
needRepass = true
}
parentHashes = append(parentHashes, parent.Image.Hash)
}
currNodes = nextNodes
}

var humanizedKeywords []string
if node.Image.UseCustomHashList {
humanizedKeywords = customHumanizedHashList
}
return nil
}

filename := path.Join(node.Image.Dockerfile.ContextPath, node.Image.Dockerfile.Filename)
func computeNodeHash(node *dag.Node, customHumanizedHashList []string, buildArgs map[string]string) error {
var parentHashes []string
for _, parent := range node.Parents() {
parentHashes = append(parentHashes, parent.Image.Hash)
}

argInstructionsToReplace := make(map[string]string)
for key, newArg := range buildArgs {
prevArgInstruction, ok := node.Image.Dockerfile.Args[key]
if ok {
argInstructionsToReplace[prevArgInstruction] = fmt.Sprintf("ARG %s=%s", key, newArg)
logger.Debugf("Overriding ARG instruction %q in %q [%q -> %q]",
key, filename, prevArgInstruction, fmt.Sprintf("ARG %s=%s", key, newArg))
}
}
var humanizedKeywords []string
if node.Image.UseCustomHashList {
humanizedKeywords = customHumanizedHashList
}

if err := dockerfile.ReplaceInFile(
filename, argInstructionsToReplace); err != nil {
return fmt.Errorf("failed to replace ARG instructions in file %s: %w", filename, err)
}
defer func() {
if err := dockerfile.ResetFile(
filename, argInstructionsToReplace); err != nil {
logger.Warnf("failed to reset ARG instructions in file %q: %v", filename, err)
}
}()
filename := path.Join(node.Image.Dockerfile.ContextPath, node.Image.Dockerfile.Filename)

hash, err := HashFiles(node.Image.Dockerfile.ContextPath, node.Files, parentHashes, humanizedKeywords)
if err != nil {
return fmt.Errorf("could not hash files for node %s: %w", node.Image.Name, err)
}
node.Image.Hash = hash
return nil
})
if err != nil {
return err
argInstructionsToReplace := make(map[string]string)
for key, newArg := range buildArgs {
prevArgInstruction, ok := node.Image.Dockerfile.Args[key]
if ok {
argInstructionsToReplace[prevArgInstruction] = fmt.Sprintf("ARG %s=%s", key, newArg)
logger.Debugf("Overriding ARG instruction %q in %q [%q -> %q]",
key, filename, prevArgInstruction, fmt.Sprintf("ARG %s=%s", key, newArg))
}
if !needRepass {
return nil
}

if err := dockerfile.ReplaceInFile(
filename, argInstructionsToReplace); err != nil {
return fmt.Errorf("failed to replace ARG instructions in file %s: %w", filename, err)

Check warning on line 245 in pkg/dib/generate_dag.go

View check run for this annotation

Codecov / codecov/patch

pkg/dib/generate_dag.go#L245

Added line #L245 was not covered by tests
}
defer func() {
if err := dockerfile.ResetFile(
filename, argInstructionsToReplace); err != nil {
logger.Warnf("failed to reset ARG instructions in file %q: %v", filename, err)

Check warning on line 250 in pkg/dib/generate_dag.go

View check run for this annotation

Codecov / codecov/patch

pkg/dib/generate_dag.go#L250

Added line #L250 was not covered by tests
}
}()

files := []string{}
for _, file := range node.Files {
if !isFileIgnored(node, file) {
files = append(files, file)
}
}

var err error
node.Image.Hash, err = HashFiles(node.Image.Dockerfile.ContextPath, files, parentHashes, humanizedKeywords)
if err != nil {
return fmt.Errorf("could not hash files: %w", err)

Check warning on line 264 in pkg/dib/generate_dag.go

View check run for this annotation

Codecov / codecov/patch

pkg/dib/generate_dag.go#L264

Added line #L264 was not covered by tests
}

return nil
}

// isFileIgnored checks whether a file matches the images ignore patterns.
Expand Down

0 comments on commit 0b9d64d

Please sign in to comment.