Skip to content

Commit

Permalink
fix(GenerateDAG): new algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
Antoine Gelloz authored and Thibaut-gauvin committed Oct 28, 2024
1 parent 3ca3fd9 commit 7f25170
Show file tree
Hide file tree
Showing 2 changed files with 162 additions and 159 deletions.
1 change: 1 addition & 0 deletions pkg/dag/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ type Image struct {
IgnorePatterns []string `yaml:"ignore_patterns,flow,omitempty"`
ContextFiles []string `yaml:"-"`
NeedsRebuild bool `yaml:"-"`
SkipBuild bool `yaml:"-"`
NeedsTests bool `yaml:"-"`
RetagDone bool `yaml:"-"`
RebuildDone bool `yaml:"-"`
Expand Down
320 changes: 161 additions & 159 deletions pkg/dib/generate_dag.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,219 +45,221 @@ func GenerateDAG(buildPath, registryPrefix, customHashListPath string, buildArgs
}

func buildGraph(buildPath, registryPrefix string) (*dag.DAG, error) {
var allFiles []string
cache := make(map[string]*dag.Node)
allParents := make(map[string][]dockerfile.ImageRef)
err := filepath.Walk(buildPath, func(filePath string, info os.FileInfo, err error) error {
if err != nil {
nodes := make(map[string]*dag.Node)
if err := filepath.WalkDir(buildPath, func(name string, dir os.DirEntry, err error) error {
switch {
case err != nil:
return err
}
if !info.IsDir() {
allFiles = append(allFiles, filePath)
}

if dockerfile.IsDockerfile(filePath) {
dckfile, err := dockerfile.ParseDockerfile(filePath)
case dir.IsDir():
case dockerfile.IsDockerfile(name):
img, err := newImageFromDockerfile(name, registryPrefix)
if err != nil {
return err
}

skipBuild, hasSkipLabel := dckfile.Labels["skipbuild"]
if hasSkipLabel && skipBuild == "true" {
return nil
}
imageShortName, hasNameLabel := dckfile.Labels["name"]
if !hasNameLabel {
return fmt.Errorf("missing label \"name\" in Dockerfile at path \"%s\"", filePath)
}
img := &dag.Image{
Name: fmt.Sprintf("%s/%s", registryPrefix, imageShortName),
ShortName: imageShortName,
Dockerfile: dckfile,
}

extraTagsLabel, hasLabel := img.Dockerfile.Labels["dib.extra-tags"]
if hasLabel {
img.ExtraTags = append(img.ExtraTags, strings.Split(extraTagsLabel, ",")...)
}

useCustomHashList, hasLabel := img.Dockerfile.Labels["dib.use-custom-hash-list"]
if hasLabel && useCustomHashList == "true" {
img.UseCustomHashList = true
}

ignorePatterns, err := build.ReadDockerignore(path.Dir(filePath))
if err != nil {
return fmt.Errorf("could not read ignore patterns: %w", err)
for _, node := range nodes {
if node.Image != nil && node.Image.Name == img.Name {
return fmt.Errorf("duplicate image name %q found while reading file %q: previous file was %q",
img.Name, name, path.Join(node.Image.Dockerfile.ContextPath, node.Image.Dockerfile.Filename))
}
}
img.IgnorePatterns = ignorePatterns

if n, ok := cache[img.Name]; ok {
return fmt.Errorf("duplicate image name %q found while reading file %q: previous file was %q",
img.Name, filePath, path.Join(n.Image.Dockerfile.ContextPath, n.Image.Dockerfile.Filename))
// Don't create the node if the image has the skipbuild label.
if img.SkipBuild {
return nil
}

allParents[img.Name] = dckfile.From
cache[img.Name] = dag.NewNode(img)
nodes[path.Dir(name)] = dag.NewNode(img)
}
return nil
})
}); err != nil {
return nil, err
}

return newGraphFromNodes(nodes), nil
}

func newImageFromDockerfile(filePath, registryPrefix string) (*dag.Image, error) {
dckfile, err := dockerfile.ParseDockerfile(filePath)
if err != nil {
return nil, err
}

// Fill parents for each image, for simplicity of use in other functions
for name, parents := range allParents {
for _, parent := range parents {
node, ok := cache[parent.Name]
if !ok {
continue
}
skipBuild := false
skipBuildString, hasSkipLabel := dckfile.Labels["skipbuild"]
if hasSkipLabel && skipBuildString == "true" {
skipBuild = true
}

// Check that children does not already exist to avoid duplicates.
childAlreadyExists := false
for _, child := range node.Children() {
if child.Image.Name == name {
childAlreadyExists = true
}
}
shortName, hasNameLabel := dckfile.Labels["name"]
if !skipBuild && !hasNameLabel {
return nil, fmt.Errorf("missing label \"name\" in Dockerfile at path %q", filePath)
}

if childAlreadyExists {
continue
}
imageName := fmt.Sprintf("%s/%s", registryPrefix, shortName)

node.AddChild(cache[name])
}
var extraTags []string
value, hasLabel := dckfile.Labels["dib.extra-tags"]
if hasLabel {
extraTags = strings.Split(value, ",")
}

graph := &dag.DAG{}
// If an image has no parents in the DAG, we consider it a root image
for name, img := range cache {
if len(img.Parents()) == 0 {
graph.AddNode(cache[name])
}
useCustomHashList := false
value, hasLabel = dckfile.Labels["dib.use-custom-hash-list"]
if hasLabel && value == "true" {
useCustomHashList = true
}

fileBelongsTo := map[string]*dag.Node{}
for _, file := range allFiles {
fileBelongsTo[file] = nil
ignorePatterns, err := build.ReadDockerignore(dckfile.ContextPath)
if err != nil {
return nil, fmt.Errorf("could not read dockerignore: %w", err)
}

// First, we do a depth-first search in the image graph to map every file the image they belong to.
// We start from the most specific image paths (children of children of children...), and we get back up
// to parent images, to avoid false-positive and false-negative matches.
// Files matching any pattern in the .dockerignore file are ignored.
graph.WalkInDepth(func(node *dag.Node) {
for _, file := range allFiles {
if !strings.HasPrefix(file, node.Image.Dockerfile.ContextPath+"/") {
// The current file is not lying in the current image build context, nor in a subdirectory.
continue
contextFiles, err := getDockerContextFiles(dckfile.ContextPath, ignorePatterns)
if err != nil {
return nil, fmt.Errorf("could not get docker context files: %w", err)
}

return &dag.Image{
Name: imageName,
ShortName: shortName,
ExtraTags: extraTags,
Dockerfile: dckfile,
IgnorePatterns: ignorePatterns,
ContextFiles: contextFiles,
SkipBuild: skipBuild,
UseCustomHashList: useCustomHashList,
}, nil
}

func getDockerContextFiles(contextPath string, ignorePatterns []string) ([]string, error) {
contextFiles := []string{}
if err := filepath.WalkDir(contextPath, func(name string, dir os.DirEntry, err error) error {
switch {
case err != nil:
return err
case dir.IsDir():
default:
// Don't add ignored files/folders and .dockerignore from the root folder of the context path.
// We ignore .dockerignore files for simplicity
// In the real world, this file should not be ignored, but it
// helps us in managing refactoring.
prefix := strings.TrimPrefix(strings.TrimPrefix(name, contextPath), "/")
if prefix == dockerignore {
return nil
}

if fileBelongsTo[file] != nil {
// The current file has already been assigned to an image, most likely to a child image.
continue
if len(ignorePatterns) == 0 {
contextFiles = append(contextFiles, name)
return nil
}

if path.Base(file) == dockerignore {
// We ignore dockerignore file itself for simplicity
// In the real world, this file should not be ignored but it
// helps us in managing refactoring
continue
ignorePatternMatcher, err := patternmatcher.New(ignorePatterns)
if err != nil {
return fmt.Errorf("could not create pattern matcher: %w", err)
}

if isFileIgnored(node, file) {
// The current file matches a pattern in the dockerignore file
continue
ignored, err := ignorePatternMatcher.MatchesOrParentMatches(prefix)
if err != nil {
return fmt.Errorf("could not match pattern: %w", err)
}

// If we reach here, the file is part of the current image's context, we mark it as so.
fileBelongsTo[file] = node
node.Image.ContextFiles = append(node.Image.ContextFiles, file)
if !ignored {
contextFiles = append(contextFiles, name)
}
}
})
return nil
}); err != nil {
return nil, err
}

return graph, nil
return contextFiles, nil
}

func computeHashes(graph *dag.DAG, customHashList []string, buildArgs map[string]string) (*dag.DAG, error) {
for {
needRepass := false
err := graph.WalkErr(func(node *dag.Node) error {
var parentHashes []string
for _, parent := range node.Parents() {
if parent.Image.Hash == "" {
// At least one of the parent image has not been processed yet, we'll need to do an other pass
needRepass = true
func newGraphFromNodes(nodes map[string]*dag.Node) *dag.DAG {
for _, node := range nodes {
if node.Image == nil {
continue
}
for _, parent := range node.Image.Dockerfile.From {
for _, parentNode := range nodes {
if parentNode.Image == nil {
continue
}
parentHashes = append(parentHashes, parent.Image.Hash)
}

var hashList []string
if node.Image.UseCustomHashList {
hashList = customHashList
}

filename := path.Join(node.Image.Dockerfile.ContextPath, node.Image.Dockerfile.Filename)

argInstructionsToReplace := make(map[string]string)
for key, newArg := range buildArgs {
prevArgInstruction, ok := node.Image.Dockerfile.Args[key]
if ok {
argInstructionsToReplace[prevArgInstruction] = fmt.Sprintf("ARG %s=%s", key, newArg)
logger.Debugf("Overriding ARG instruction %q in %q [%q -> %q]",
key, filename, prevArgInstruction, fmt.Sprintf("ARG %s=%s", key, newArg))
if parentNode.Image.Name == parent.Name {
parentNode.AddChild(node)
}
}
}
}

if err := dockerfile.ReplaceInFile(
filename, argInstructionsToReplace); err != nil {
return fmt.Errorf("failed to replace ARG instructions in file %s: %w", filename, err)
}
defer func() {
if err := dockerfile.ResetFile(
filename, argInstructionsToReplace); err != nil {
logger.Warnf("failed to reset ARG instructions in file %q: %v", filename, err)
}
}()
graph := &dag.DAG{}
// If an image has no parents in the DAG, we can consider it root
for name, img := range nodes {
if len(img.Parents()) == 0 {
graph.AddNode(nodes[name])
}
}

hash, err := hashFiles(node.Image.Dockerfile.ContextPath, node.Image.ContextFiles, parentHashes, hashList)
return graph
}

func computeHashes(graph *dag.DAG, customHashList []string, buildArgs map[string]string) (*dag.DAG, error) {
currNodes := graph.Nodes()
for len(currNodes) > 0 {
for _, node := range currNodes {
var err error
node.Image.Hash, err = computeNodeHash(node, customHashList, buildArgs)
if err != nil {
return fmt.Errorf("could not hash files for node %s: %w", node.Image.Name, err)
return nil, fmt.Errorf("could not compute hash for image %q: %w", node.Image.Name, err)
}
node.Image.Hash = hash
return nil
})
if err != nil {
return nil, err
}
if !needRepass {
return graph, nil

nextNodes := []*dag.Node{}
for _, currNode := range currNodes {
nextNodes = append(nextNodes, currNode.Children()...)
}
currNodes = nextNodes
}

return graph, nil
}

// isFileIgnored checks whether a file matches the images ignore patterns.
// It returns true if the file matches at least one pattern (meaning it should be ignored).
func isFileIgnored(node *dag.Node, file string) bool {
if len(node.Image.IgnorePatterns) == 0 {
return false
func computeNodeHash(node *dag.Node, customHashList []string, buildArgs map[string]string) (string, error) {
var parentHashes []string
for _, parent := range node.Parents() {
parentHashes = append(parentHashes, parent.Image.Hash)
}

ignorePatternMatcher, err := patternmatcher.New(node.Image.IgnorePatterns)
if err != nil {
logger.Errorf("Could not create pattern matcher for %s, ignoring", node.Image.ShortName)
return false
var hashList []string
if node.Image.UseCustomHashList {
hashList = customHashList
}

prefix := strings.TrimPrefix(strings.TrimPrefix(file, node.Image.Dockerfile.ContextPath), "/")
match, err := ignorePatternMatcher.MatchesOrParentMatches(prefix)
if err != nil {
logger.Errorf("Could not match pattern for %s, ignoring", node.Image.ShortName)
return false
filename := path.Join(node.Image.Dockerfile.ContextPath, node.Image.Dockerfile.Filename)

argInstructionsToReplace := make(map[string]string)
for key, newArg := range buildArgs {
prevArgInstruction, ok := node.Image.Dockerfile.Args[key]
if ok {
argInstructionsToReplace[prevArgInstruction] = fmt.Sprintf("ARG %s=%s", key, newArg)
logger.Debugf("Overriding ARG instruction %q in %q [%q -> %q]",
key, filename, prevArgInstruction, fmt.Sprintf("ARG %s=%s", key, newArg))
}
}

if err := dockerfile.ReplaceInFile(
filename, argInstructionsToReplace); err != nil {
return "", fmt.Errorf("failed to replace ARG instructions in file %s: %w", filename, err)
}
defer func() {
if err := dockerfile.ResetFile(
filename, argInstructionsToReplace); err != nil {
logger.Warnf("failed to reset ARG instructions in file %q: %v", filename, err)
}
}()

return match
return hashFiles(node.Image.Dockerfile.ContextPath, node.Image.ContextFiles, parentHashes, hashList)
}

// hashFiles computes the sha256 from the contents of the files passed as argument.
Expand Down

0 comments on commit 7f25170

Please sign in to comment.