From dcf0fbe528a456fd8e6f04da6417a90ab46ed732 Mon Sep 17 00:00:00 2001 From: Antoine Gelloz Date: Mon, 3 Jun 2024 17:11:26 +0200 Subject: [PATCH] fix(GenerateDAG): new algorithm --- pkg/dib/generate_dag.go | 350 ++++++++++++++++++++++------------------ 1 file changed, 194 insertions(+), 156 deletions(-) diff --git a/pkg/dib/generate_dag.go b/pkg/dib/generate_dag.go index 064e30fe9..b38060781 100644 --- a/pkg/dib/generate_dag.go +++ b/pkg/dib/generate_dag.go @@ -28,205 +28,243 @@ const ( // GenerateDAG discovers and parses all Dockerfiles at a given path, // and generates the DAG representing the relationships between images. func GenerateDAG(buildPath, registryPrefix, customHashListPath string, buildArgs map[string]string) (*dag.DAG, error) { - var allFiles []string - cache := make(map[string]*dag.Node) - allParents := make(map[string][]dockerfile.ImageRef) - err := filepath.Walk(buildPath, func(filePath string, info os.FileInfo, err error) error { - if err != nil { + nodes := make(map[string]*dag.Node) + if err := filepath.WalkDir(buildPath, func(filePath string, dirEntry os.DirEntry, err error) error { + switch { + case err != nil: return err - } - if !info.IsDir() { - allFiles = append(allFiles, filePath) - } - - if dockerfile.IsDockerfile(filePath) { - dckfile, err := dockerfile.ParseDockerfile(filePath) - if err != nil { + case filePath == buildPath: + return nil + case dirEntry.IsDir(): + if err := filepath.WalkDir(filePath, func(otherFile string, dirEntry os.DirEntry, err error) error { + switch { + case err != nil: + return err + case dirEntry.IsDir(): + return nil + default: + if path.Base(otherFile) == dockerignore { + // We ignore dockerignore files for simplicity + // In the real world, this file should not be ignored, but it + // helps us in managing refactoring + return nil + } + if _, ok := nodes[filePath]; !ok { + nodes[filePath] = dag.NewNode(nil) + } + nodes[filePath].AddFile(otherFile) + return nil + } + }); err != nil { return err } - - skipBuild, hasSkipLabel := dckfile.Labels["skipbuild"] - if hasSkipLabel && skipBuild == "true" { - return nil - } - imageShortName, hasNameLabel := dckfile.Labels["name"] - if !hasNameLabel { - return fmt.Errorf("missing label \"name\" in Dockerfile at path \"%s\"", filePath) - } - img := &dag.Image{ - Name: fmt.Sprintf("%s/%s", registryPrefix, imageShortName), - ShortName: imageShortName, - Dockerfile: dckfile, + case dockerfile.IsDockerfile(filePath): + nodes, err = processDockerfile(filePath, registryPrefix, nodes) + if err != nil { + return fmt.Errorf("could not process Dockerfile %q: %w", filePath, err) } + default: + nodes = processFile(filePath, nodes) + } + return nil + }); err != nil { + return nil, err + } - extraTagsLabel, hasLabel := img.Dockerfile.Labels["dib.extra-tags"] - if hasLabel { - img.ExtraTags = append(img.ExtraTags, strings.Split(extraTagsLabel, ",")...) - } + for key, node := range nodes { + skipBuild, hasSkipLabel := node.Image.Dockerfile.Labels["skipbuild"] + if hasSkipLabel && skipBuild == "true" { + delete(nodes, key) + } + } - useCustomHashList, hasLabel := img.Dockerfile.Labels["dib.use-custom-hash-list"] - if hasLabel && useCustomHashList == "true" { - img.UseCustomHashList = true - } + graph := buildGraph(nodes) - ignorePatterns, err := build.ReadDockerignore(path.Dir(filePath)) - if err != nil { - return fmt.Errorf("could not read ignore patterns: %w", err) - } - img.IgnorePatterns = ignorePatterns + if err := computeGraphHashes(graph, customHashListPath, buildArgs); err != nil { + return nil, fmt.Errorf("could not compute graph hashes: %w", err) + } - if n, ok := cache[img.Name]; ok { - return fmt.Errorf("duplicate image name %q: previous file was %q", - img.Name, path.Join(n.Image.Dockerfile.ContextPath, n.Image.Dockerfile.Filename)) - } + return graph, nil +} - allParents[img.Name] = dckfile.From - cache[img.Name] = dag.NewNode(img) - } - return nil - }) +func processDockerfile(filePath, registryPrefix string, nodes map[string]*dag.Node) (map[string]*dag.Node, error) { + dckfile, err := dockerfile.ParseDockerfile(filePath) if err != nil { return nil, err } - // Fill parents for each image, for simplicity of use in other functions - for name, parents := range allParents { - for _, parent := range parents { - node, ok := cache[parent.Name] - if !ok { - continue - } + shortName, hasNameLabel := dckfile.Labels["name"] + if !hasNameLabel { + return nil, fmt.Errorf("missing label \"name\" in Dockerfile at path %q", filePath) + } - // Check that children does not already exist to avoid duplicates. - childAlreadyExists := false - for _, child := range node.Children() { - if child.Image.Name == name { - childAlreadyExists = true - } - } + imageName := fmt.Sprintf("%s/%s", registryPrefix, shortName) - if childAlreadyExists { - continue - } + var extraTags []string + value, hasLabel := dckfile.Labels["dib.extra-tags"] + if hasLabel { + extraTags = strings.Split(value, ",") + } - node.AddChild(cache[name]) - } + useCustomHashList := false + value, hasLabel = dckfile.Labels["dib.use-custom-hash-list"] + if hasLabel && value == "true" { + useCustomHashList = true } - graph := &dag.DAG{} - // If an image has no parents in the DAG, we consider it a root image - for name, img := range cache { - if len(img.Parents()) == 0 { - graph.AddNode(cache[name]) + ignorePatterns, err := build.ReadDockerignore(dckfile.ContextPath) + if err != nil { + return nil, fmt.Errorf("could not read ignore patterns: %w", err) + } + + for _, node := range nodes { + if node.Image != nil && node.Image.Name == imageName { + return nil, fmt.Errorf("duplicate image name %q found: previous file was %q", + imageName, path.Join(node.Image.Dockerfile.ContextPath, node.Image.Dockerfile.Filename)) } } - if err := generateHashes(graph, allFiles, customHashListPath, buildArgs); err != nil { - return nil, err + if _, ok := nodes[dckfile.ContextPath]; !ok { + nodes[dckfile.ContextPath] = dag.NewNode(nil) + } + nodes[dckfile.ContextPath].Image = &dag.Image{ + Name: imageName, + ShortName: shortName, + ExtraTags: extraTags, + Dockerfile: dckfile, + IgnorePatterns: ignorePatterns, + UseCustomHashList: useCustomHashList, } - return graph, nil + return nodes, nil } -func generateHashes(graph *dag.DAG, allFiles []string, customHashListPath string, buildArgs map[string]string) error { - customHumanizedHashList, err := LoadCustomHashList(customHashListPath) - if err != nil { - return fmt.Errorf("could not load custom humanized hash list: %w", err) +func processFile(filePath string, nodes map[string]*dag.Node) map[string]*dag.Node { + if path.Base(filePath) == dockerignore { + // We ignore dockerignore files for simplicity + // In the real world, this file should not be ignored, but it + // helps us in managing refactoring + return nodes } - fileBelongsTo := map[string]*dag.Node{} - for _, file := range allFiles { - fileBelongsTo[file] = nil + dirPath := path.Dir(filePath) + if _, ok := nodes[dirPath]; !ok { + nodes[dirPath] = dag.NewNode(nil) } - // First, we do a depth-first search in the image graph to map every file the image they belong to. - // We start from the most specific image paths (children of children of children...), and we get back up - // to parent images, to avoid false-positive and false-negative matches. - // Files matching any pattern in the .dockerignore file are ignored. - graph.WalkInDepth(func(node *dag.Node) { - for _, file := range allFiles { - if !strings.HasPrefix(file, node.Image.Dockerfile.ContextPath+"/") { - // The current file is not lying in the current image build context, nor in a subdirectory. - continue - } + alreadyAdded := false + for _, file := range nodes[dirPath].Files { + if file == filePath { + alreadyAdded = true + } + } + if !alreadyAdded { + nodes[dirPath].AddFile(filePath) + } - if fileBelongsTo[file] != nil { - // The current file has already been assigned to an image, most likely to a child image. - continue - } + return nodes +} - if path.Base(file) == dockerignore { - // We ignore dockerignore file itself for simplicity - // In the real world, this file should not be ignored but it - // helps us in managing refactoring - continue +func buildGraph(nodes map[string]*dag.Node) *dag.DAG { + for _, node := range nodes { + if node.Image == nil { + continue + } + for _, parent := range node.Image.Dockerfile.From { + for _, parentNode := range nodes { + if parentNode.Image == nil { + continue + } + if parentNode.Image.Name == parent.Name { + parentNode.AddChild(node) + } } + } + } + + graph := &dag.DAG{} + // If an image has no parents in the DAG, we can consider it root + for name, img := range nodes { + if len(img.Parents()) == 0 { + graph.AddNode(nodes[name]) + } + } + + return graph +} + +func computeGraphHashes(graph *dag.DAG, customHashListPath string, buildArgs map[string]string) error { + customHumanizedHashList, err := LoadCustomHashList(customHashListPath) + if err != nil { + return fmt.Errorf("could not load custom humanized hash list: %w", err) + } - if isFileIgnored(node, file) { - // The current file matches a pattern in the dockerignore file - continue + currNodes := graph.Nodes() + for len(currNodes) > 0 { + for _, node := range currNodes { + if err := computeNodeHash(node, customHumanizedHashList, buildArgs); err != nil { + return fmt.Errorf("could not compute hash for image %q: %w", node.Image.Name, err) } + } - // If we reach here, the file is part of the current image's context, we mark it as so. - fileBelongsTo[file] = node - node.AddFile(file) + nextNodes := []*dag.Node{} + for _, currNode := range currNodes { + nextNodes = append(nextNodes, currNode.Children()...) } - }) - - for { - needRepass := false - err := graph.WalkErr(func(node *dag.Node) error { - var parentHashes []string - for _, parent := range node.Parents() { - if parent.Image.Hash == "" { - // At least one of the parent image has not been processed yet, we'll need to do an other pass - needRepass = true - } - parentHashes = append(parentHashes, parent.Image.Hash) - } + currNodes = nextNodes + } - var humanizedKeywords []string - if node.Image.UseCustomHashList { - humanizedKeywords = customHumanizedHashList - } + return nil +} - filename := path.Join(node.Image.Dockerfile.ContextPath, node.Image.Dockerfile.Filename) +func computeNodeHash(node *dag.Node, customHumanizedHashList []string, buildArgs map[string]string) error { + var parentHashes []string + for _, parent := range node.Parents() { + parentHashes = append(parentHashes, parent.Image.Hash) + } - argInstructionsToReplace := make(map[string]string) - for key, newArg := range buildArgs { - prevArgInstruction, ok := node.Image.Dockerfile.Args[key] - if ok { - argInstructionsToReplace[prevArgInstruction] = fmt.Sprintf("ARG %s=%s", key, newArg) - logger.Debugf("Overriding ARG instruction %q in %q [%q -> %q]", - key, filename, prevArgInstruction, fmt.Sprintf("ARG %s=%s", key, newArg)) - } - } + var humanizedKeywords []string + if node.Image.UseCustomHashList { + humanizedKeywords = customHumanizedHashList + } - if err := dockerfile.ReplaceInFile( - filename, argInstructionsToReplace); err != nil { - return fmt.Errorf("failed to replace ARG instructions in file %s: %w", filename, err) - } - defer func() { - if err := dockerfile.ResetFile( - filename, argInstructionsToReplace); err != nil { - logger.Warnf("failed to reset ARG instructions in file %q: %v", filename, err) - } - }() + filename := path.Join(node.Image.Dockerfile.ContextPath, node.Image.Dockerfile.Filename) - hash, err := HashFiles(node.Image.Dockerfile.ContextPath, node.Files, parentHashes, humanizedKeywords) - if err != nil { - return fmt.Errorf("could not hash files for node %s: %w", node.Image.Name, err) - } - node.Image.Hash = hash - return nil - }) - if err != nil { - return err + argInstructionsToReplace := make(map[string]string) + for key, newArg := range buildArgs { + prevArgInstruction, ok := node.Image.Dockerfile.Args[key] + if ok { + argInstructionsToReplace[prevArgInstruction] = fmt.Sprintf("ARG %s=%s", key, newArg) + logger.Debugf("Overriding ARG instruction %q in %q [%q -> %q]", + key, filename, prevArgInstruction, fmt.Sprintf("ARG %s=%s", key, newArg)) } - if !needRepass { - return nil + } + + if err := dockerfile.ReplaceInFile( + filename, argInstructionsToReplace); err != nil { + return fmt.Errorf("failed to replace ARG instructions in file %s: %w", filename, err) + } + defer func() { + if err := dockerfile.ResetFile( + filename, argInstructionsToReplace); err != nil { + logger.Warnf("failed to reset ARG instructions in file %q: %v", filename, err) + } + }() + + files := []string{} + for _, file := range node.Files { + if !isFileIgnored(node, file) { + files = append(files, file) } } + + var err error + node.Image.Hash, err = HashFiles(node.Image.Dockerfile.ContextPath, files, parentHashes, humanizedKeywords) + if err != nil { + return fmt.Errorf("could not hash files: %w", err) + } + + return nil } // isFileIgnored checks whether a file matches the images ignore patterns. @@ -261,7 +299,7 @@ func HashFiles(baseDir string, files, parentHashes, customHumanizedHashWordList slices.Sort(files) for _, filename := range files { if strings.Contains(filename, "\n") { - return "", errors.New("file names with newlines are not supported") + return "", errors.New("filenames with newlines are not supported") } file, err := os.Open(filename)