From 2aa04e263639d1261ab1e63cf16f65e927936ecb Mon Sep 17 00:00:00 2001 From: Antoine Gelloz Date: Mon, 3 Jun 2024 10:57:06 +0200 Subject: [PATCH] refact(GenerateDAG): prepare for new algorithm --- pkg/dag/node.go | 5 ++ pkg/dib/generate_dag.go | 84 ++++++++++++--------------- pkg/dib/generate_dag_internal_test.go | 60 ------------------- pkg/dib/generate_dag_test.go | 50 ++++++++++++++++ 4 files changed, 93 insertions(+), 106 deletions(-) delete mode 100644 pkg/dib/generate_dag_internal_test.go diff --git a/pkg/dag/node.go b/pkg/dag/node.go index 99e10458..278b703b 100644 --- a/pkg/dag/node.go +++ b/pkg/dag/node.go @@ -15,6 +15,7 @@ type NodeVisitorFuncErr func(*Node) error // Node represents a node of a graph. type Node struct { Image *Image + Files []string waitCond *sync.Cond done bool @@ -95,3 +96,7 @@ func (n *Node) walkInDepth(visitor NodeVisitorFunc) { } visitor(n) } + +func (n *Node) AddFile(file string) { + n.Files = append(n.Files, file) +} diff --git a/pkg/dib/generate_dag.go b/pkg/dib/generate_dag.go index 7f64bb40..669a9225 100644 --- a/pkg/dib/generate_dag.go +++ b/pkg/dib/generate_dag.go @@ -9,7 +9,7 @@ import ( "os" "path" "path/filepath" - "sort" + "slices" "strings" "github.com/docker/cli/cli/command/image/build" @@ -129,13 +129,11 @@ func GenerateDAG(buildPath, registryPrefix, customHashListPath string, buildArgs } func generateHashes(graph *dag.DAG, allFiles []string, customHashListPath string, buildArgs map[string]string) error { - customHumanizedHashList, err := loadCustomHumanizedHashList(customHashListPath) + customHumanizedHashList, err := LoadCustomHashList(customHashListPath) if err != nil { - return err + return fmt.Errorf("could not load custom humanized hash list: %w", err) } - nodeFiles := map[*dag.Node][]string{} - fileBelongsTo := map[string]*dag.Node{} for _, file := range allFiles { fileBelongsTo[file] = nil @@ -146,7 +144,6 @@ func generateHashes(graph *dag.DAG, allFiles []string, customHashListPath string // to parent images, to avoid false-positive and false-negative matches. // Files matching any pattern in the .dockerignore file are ignored. graph.WalkInDepth(func(node *dag.Node) { - nodeFiles[node] = []string{} for _, file := range allFiles { if !strings.HasPrefix(file, node.Image.Dockerfile.ContextPath+"/") { // The current file is not lying in the current image build context, nor in a subdirectory. @@ -165,16 +162,14 @@ func generateHashes(graph *dag.DAG, allFiles []string, customHashListPath string continue } - if node.Image.IgnorePatterns != nil { - if matchPattern(node, file) { - // The current file matches a pattern in the dockerignore file - continue - } + if isFileIgnored(node, file) { + // The current file matches a pattern in the dockerignore file + continue } // If we reach here, the file is part of the current image's context, we mark it as so. fileBelongsTo[file] = node - nodeFiles[node] = append(nodeFiles[node], file) + node.AddFile(file) } }) @@ -218,7 +213,7 @@ func generateHashes(graph *dag.DAG, allFiles []string, customHashListPath string } }() - hash, err := hashFiles(node.Image.Dockerfile.ContextPath, nodeFiles[node], parentHashes, humanizedKeywords) + hash, err := HashFiles(node.Image.Dockerfile.ContextPath, node.Files, parentHashes, humanizedKeywords) if err != nil { return fmt.Errorf("could not hash files for node %s: %w", node.Image.Name, err) } @@ -234,9 +229,14 @@ func generateHashes(graph *dag.DAG, allFiles []string, customHashListPath string } } -// matchPattern checks whether a file matches the images ignore patterns. +// isFileIgnored checks whether a file matches the images ignore patterns. // It returns true if the file matches at least one pattern (meaning it should be ignored). -func matchPattern(node *dag.Node, file string) bool { +func isFileIgnored(node *dag.Node, file string) bool { + if node.Image.IgnorePatterns == nil || + len(node.Image.IgnorePatterns) == 0 { + return false + } + ignorePatternMatcher, err := patternmatcher.New(node.Image.IgnorePatterns) if err != nil { logger.Errorf("Could not create pattern matcher for %s, ignoring", node.Image.ShortName) @@ -249,75 +249,68 @@ func matchPattern(node *dag.Node, file string) bool { logger.Errorf("Could not match pattern for %s, ignoring", node.Image.ShortName) return false } + return match } -// hashFiles computes the sha256 from the contents of the files passed as argument. +// HashFiles computes the sha256 from the contents of the files passed as argument. // The files are alphabetically sorted so the returned hash is always the same. // This also means the hash will change if the file names change but the contents don't. -func hashFiles( - baseDir string, - files []string, - parentHashes []string, - customHumanizedHashWordList []string, -) (string, error) { +func HashFiles(baseDir string, files, parentHashes, customHumanizedHashWordList []string) (string, error) { hash := sha256.New() - files = append([]string(nil), files...) - sort.Strings(files) - for _, file := range files { - if strings.Contains(file, "\n") { - return "", errors.New("filenames with newlines are not supported") + slices.Sort(files) + for _, filename := range files { + if strings.Contains(filename, "\n") { + return "", errors.New("file names with newlines are not supported") } - readCloser, err := os.Open(file) + + file, err := os.Open(filename) if err != nil { return "", err } + defer file.Close() + hashFile := sha256.New() - _, err = io.Copy(hashFile, readCloser) - if err != nil { + if _, err := io.Copy(hashFile, file); err != nil { return "", err } - err = readCloser.Close() - if err != nil { - return "", err - } - filename := strings.TrimPrefix(file, baseDir) - _, err = fmt.Fprintf(hash, "%x %s\n", hashFile.Sum(nil), filename) - if err != nil { + + filename := strings.TrimPrefix(filename, baseDir) + if _, err := fmt.Fprintf(hash, "%x %s\n", hashFile.Sum(nil), filename); err != nil { return "", err } } parentHashes = append([]string(nil), parentHashes...) - sort.Strings(parentHashes) + slices.Sort(parentHashes) for _, parentHash := range parentHashes { hash.Write([]byte(parentHash)) } - var humanReadableHash string - var err error - worldListToUse := humanhash.DefaultWordList if customHumanizedHashWordList != nil { worldListToUse = customHumanizedHashWordList } - humanReadableHash, err = humanhash.HumanizeUsing(hash.Sum(nil), humanizedHashWordLength, worldListToUse, "-") + humanReadableHash, err := humanhash.HumanizeUsing(hash.Sum(nil), humanizedHashWordLength, worldListToUse, "-") if err != nil { return "", fmt.Errorf("could not humanize hash: %w", err) } + return humanReadableHash, nil } -// loadCustomHumanizedHashList try to load & parse a list of custom humanized hash to use. -func loadCustomHumanizedHashList(filepath string) ([]string, error) { +// LoadCustomHashList try to load & parse a list of custom humanized hash to use. +func LoadCustomHashList(filepath string) ([]string, error) { if filepath == "" { return nil, nil } + file, err := os.Open(filepath) if err != nil { - return nil, fmt.Errorf("cannot load custom humanized word list file, err: %w", err) + return nil, err } + defer file.Close() fileScanner := bufio.NewScanner(file) fileScanner.Split(bufio.ScanLines) @@ -326,7 +319,6 @@ func loadCustomHumanizedHashList(filepath string) ([]string, error) { for fileScanner.Scan() { lines = append(lines, fileScanner.Text()) } - _ = file.Close() return lines, nil } diff --git a/pkg/dib/generate_dag_internal_test.go b/pkg/dib/generate_dag_internal_test.go deleted file mode 100644 index 39ce77c2..00000000 --- a/pkg/dib/generate_dag_internal_test.go +++ /dev/null @@ -1,60 +0,0 @@ -package dib - -import ( - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func Test_loadCustomHumanizedHashList(t *testing.T) { - t.Parallel() - - testCases := []struct { - name string - input string - expected []string - expectedErr string - }{ - { - name: "standard wordlist", - input: "", - expected: nil, - expectedErr: "", - }, - { - name: "custom wordlist txt", - input: "../../test/fixtures/dib/wordlist.txt", - expected: []string{"a", "b", "c"}, - expectedErr: "", - }, - { - name: "custom wordlist yml", - input: "../../test/fixtures/dib/wordlist.yml", - expected: []string{"e", "f", "g"}, - expectedErr: "", - }, - { - name: "wordlist file not exist", - input: "../../test/fixtures/dib/lorem.txt", - expected: nil, - expectedErr: "cannot load custom humanized word list file," + - " err: open ../../test/fixtures/dib/lorem.txt: no such file or directory", - }, - } - - for _, test := range testCases { - t.Run(test.name, func(t *testing.T) { - t.Parallel() - - actual, err := loadCustomHumanizedHashList(test.input) - - if test.expectedErr == "" { - require.NoError(t, err) - } else { - require.EqualError(t, err, test.expectedErr) - } - assert.Equal(t, test.expected, actual) - }) - } -} diff --git a/pkg/dib/generate_dag_test.go b/pkg/dib/generate_dag_test.go index 6e6cf6f6..c59da5e1 100644 --- a/pkg/dib/generate_dag_test.go +++ b/pkg/dib/generate_dag_test.go @@ -172,3 +172,53 @@ func flattenNodes(graph *dag.DAG) map[string]*dag.Node { return flatNodes } + +func TestLoadCustomHashList(t *testing.T) { + t.Parallel() + + testCases := []struct { + name string + input string + expected []string + expectedErr string + }{ + { + name: "standard wordlist", + input: "", + expected: nil, + expectedErr: "", + }, + { + name: "custom wordlist txt", + input: "../../test/fixtures/dib/wordlist.txt", + expected: []string{"a", "b", "c"}, + expectedErr: "", + }, + { + name: "custom wordlist yml", + input: "../../test/fixtures/dib/wordlist.yml", + expected: []string{"e", "f", "g"}, + expectedErr: "", + }, + { + name: "wordlist file not exist", + input: "../../test/fixtures/dib/lorem.txt", + expected: nil, + expectedErr: "open ../../test/fixtures/dib/lorem.txt: no such file or directory", + }, + } + + for _, test := range testCases { + t.Run(test.name, func(t *testing.T) { + t.Parallel() + + actual, err := dib.LoadCustomHashList(test.input) + if test.expectedErr == "" { + require.NoError(t, err) + } else { + require.EqualError(t, err, test.expectedErr) + } + assert.Equal(t, test.expected, actual) + }) + } +}