diff --git a/internal/policy/container/has_modified_files.go b/internal/policy/container/has_modified_files.go index 497157d9..4cef185b 100644 --- a/internal/policy/container/has_modified_files.go +++ b/internal/policy/container/has_modified_files.go @@ -60,7 +60,12 @@ type packageFilesRef struct { // Validate runs the check of whether any Red Hat files were modified func (p *HasModifiedFilesCheck) Validate(ctx context.Context, imgRef image.ImageReference) (bool, error) { fs := afero.NewOsFs() - layerIDs, packageFiles, packageDist, err := p.gatherDataToValidate(ctx, imgRef, fs) + layerIDs, packageFiles, err := p.gatherDataToValidate(ctx, imgRef, fs) + if err != nil { + return false, fmt.Errorf("could not generate modified files list: %v", err) + } + + packageDist, err := p.parsePackageDist(ctx, imgRef.ImageFSPath, fs) if err != nil { return false, fmt.Errorf("could not generate modified files list: %v", err) } @@ -68,28 +73,56 @@ func (p *HasModifiedFilesCheck) Validate(ctx context.Context, imgRef image.Image return p.validate(ctx, layerIDs, packageFiles, packageDist) } +// parsePackageDist returns the platform's distribution value from the +// os-release contents in the extracted image. +func (p *HasModifiedFilesCheck) parsePackageDist(_ context.Context, extractedImageFSPath string, fs afero.Fs) (string, error) { + osRelease, err := fs.Open(filepath.Join(extractedImageFSPath, "etc", "os-release")) + if err != nil { + return "", fmt.Errorf("could not open os-release: %v", err) + } + defer osRelease.Close() + scanner := bufio.NewScanner(osRelease) + packageDist := "unknown" + for scanner.Scan() { + line := scanner.Text() + r, _ := regexp.Compile(`PLATFORM_ID="platform:([[:alnum:]]+)"`) + m := r.FindStringSubmatch(line) + if m == nil { + continue + } + packageDist = m[1] + break + } + + if err := scanner.Err(); err != nil { + return "", fmt.Errorf("error while scanning for package dist: %v", err) + } + + return packageDist, nil +} + // gatherDataToValidate returns a map from layer digest to a struct containing the list of files // (packageFilesRef.LayerPackageFiles) installed via packages (packageFilesRef.LayerPackages) // from the container image, and the list of files (packageFilesRef.LayerFiles) modified/added // via layers in the image. -func (p *HasModifiedFilesCheck) gatherDataToValidate(ctx context.Context, imgRef image.ImageReference, fs afero.Fs) ([]string, map[string]packageFilesRef, string, error) { +func (p *HasModifiedFilesCheck) gatherDataToValidate(ctx context.Context, imgRef image.ImageReference, fs afero.Fs) ([]string, map[string]packageFilesRef, error) { logger := logr.FromContextOrDiscard(ctx) layerDir, err := afero.TempDir(fs, "", "rpm-layers-") if err != nil { - return nil, nil, "", err + return nil, nil, err } defer func() { _ = fs.RemoveAll(layerDir) }() if imgRef.ImageInfo == nil { - return nil, nil, "", fmt.Errorf("image reference invalid") + return nil, nil, fmt.Errorf("image reference invalid") } layers, err := imgRef.ImageInfo.Layers() if err != nil { - return nil, nil, "", err + return nil, nil, err } layerIDs := make([]string, 0, len(layers)) @@ -102,21 +135,36 @@ func (p *HasModifiedFilesCheck) gatherDataToValidate(ctx context.Context, imgRef for idx, layer := range layers { layerIDHash, err := layer.Digest() if err != nil { - return nil, nil, "", fmt.Errorf("unable to retrieve diff id for layer: %w", err) + return nil, nil, fmt.Errorf("unable to retrieve diff id for layer: %w", err) + } + + // Capture the diff ID to aid in debugging. We don't technically care if + // there's an error returned here because we don't use the layerDiffID + // value for anything meaningful. + layerDiffID := "unknown" + layerDiffHash, err := layer.DiffID() + if err == nil && layerDiffHash.String() != "" { + layerDiffID = layerDiffHash.String() } - layerID := layerIDHash.String() - layerDir := filepath.Join(layerDir, fmt.Sprintf("%02d-%s", idx, layerID)) + rawLayerID := layerIDHash.String() + // Map everything using a combination of the layer index and the layer + // ID to avoid problems when images have multiple scattered layers with + // the same ID. + layerID := fmt.Sprintf("%02d-%s", idx, rawLayerID) + logger.V(log.TRC).Info("generating unique layer ID", "uniqueLayerID", layerID, "layerID", rawLayerID, "layerDiffID", layerDiffID) + + layerDir := filepath.Join(layerDir, layerID) err = fs.Mkdir(layerDir, 0o755) if err != nil { - return nil, nil, "", fmt.Errorf("could not create layer directory: %w", err) + return nil, nil, fmt.Errorf("could not create layer directory: %w", err) } layerIDs = append(layerIDs, layerID) files, err := generateChangesFor(ctx, layer) if err != nil { - return nil, nil, "", err + return nil, nil, err } found, pkgList := findRPMDB(ctx, layer) @@ -142,7 +190,7 @@ func (p *HasModifiedFilesCheck) gatherDataToValidate(ctx context.Context, imgRef packageFiles, err := installedFileMapWithExclusions(ctx, pkgList) if err != nil { - return nil, nil, "", err + return nil, nil, err } layerRefs[layerID] = packageFilesRef{ @@ -153,29 +201,7 @@ func (p *HasModifiedFilesCheck) gatherDataToValidate(ctx context.Context, imgRef } } - osRelease, err := fs.Open(filepath.Join(imgRef.ImageFSPath, "etc", "os-release")) - if err != nil { - return nil, nil, "", fmt.Errorf("could not open os-release: %v", err) - } - defer osRelease.Close() - scanner := bufio.NewScanner(osRelease) - packageDist := "unknown" - for scanner.Scan() { - line := scanner.Text() - r, _ := regexp.Compile(`PLATFORM_ID="platform:([[:alnum:]]+)"`) - m := r.FindStringSubmatch(line) - if m == nil { - continue - } - packageDist = m[1] - break - } - - if err := scanner.Err(); err != nil { - return nil, nil, "", fmt.Errorf("error while scanning for package dist: %v", err) - } - - return layerIDs, layerRefs, packageDist, nil + return layerIDs, layerRefs, nil } // validate compares the list of LayerFiles and PackageFiles to see what PackageFiles diff --git a/internal/policy/container/has_modified_files_test.go b/internal/policy/container/has_modified_files_test.go index 03190d86..1962f989 100644 --- a/internal/policy/container/has_modified_files_test.go +++ b/internal/policy/container/has_modified_files_test.go @@ -5,6 +5,9 @@ import ( "context" "path" + "github.com/google/go-containerregistry/pkg/crane" + "github.com/spf13/afero" + "github.com/redhat-openshift-ecosystem/openshift-preflight/internal/image" "github.com/bombsimon/logrusr/v4" @@ -402,6 +405,37 @@ var _ = Describe("HasModifiedFiles", func() { }) }) + When("multiple no-op layers with the same IDs split layers containing RPMDB modifications", func() { + // Test case ensures that we properly deduplicate layer hashes in our file mapping to avoid cases where + // a later layer with the same ID as an earlier layer doesn't overwrite the earlier layer's file mapping. + var img image.ImageReference + var actualLayerCount int + BeforeEach(func() { + // TODO: The containerfile that generates this test fixture is stored in-repo tests/containerfiles. + // The external call here avoids having to store the image locally. A crane-built image runs into + // issues because we cannot run `microdnf` commands using Crane, and need to have multiple layers + // containing RPMDBs to test this issue correctly. + const dupeLayerTestFixture = "quay.io/opdev/preflight-test-fixture:duplicate-layers" + cImg, pullError := crane.Pull(dupeLayerTestFixture) + Expect(pullError).ToNot(HaveOccurred()) + img = image.ImageReference{ + ImageInfo: cImg, + } + + layers, err := img.ImageInfo.Layers() + Expect(err).ToNot(HaveOccurred()) + actualLayerCount = len(layers) + }) + + It("should validate and have matching layer counts", func() { + fs := afero.NewOsFs() + layerIDs, layerRefs, err := hasModifiedFiles.gatherDataToValidate(context.TODO(), img, fs) + Expect(err).ToNot(HaveOccurred()) + Expect(len(layerIDs)).To(Equal(actualLayerCount)) + Expect(len(layerRefs)).To(Equal(actualLayerCount)) + }) + }) + When("calling the top level Validate", func() { It("should fail with an invalid ImageReference", func() { passed, err := hasModifiedFiles.Validate(context.TODO(), image.ImageReference{}) diff --git a/test/containerfiles/container-duplicate-noop-layers.Dockerfile b/test/containerfiles/container-duplicate-noop-layers.Dockerfile new file mode 100644 index 00000000..b3b3c140 --- /dev/null +++ b/test/containerfiles/container-duplicate-noop-layers.Dockerfile @@ -0,0 +1,15 @@ +FROM registry.access.redhat.com/ubi8-minimal@sha256:9e458f41ff8868ceae00608a6fff35b45fd8bbe967bf8655e5ab08da5964f4d0 + +# This container file backs +# quay.io/opdev/preflight-test-fixture:duplicate-layers, and is intended to test +# an edge case with HasModifiedFiles where multiple duplicate layers were geting +# squashed into a single entry in our layer-to-file map, causing invalid +# modification flags. +# +# The produced artifact is about 100mb, so this fixture exists just to avoid +# having that blob stored in-repo. + +COPY example-license.txt /LICENSE +RUN microdnf install gzip -y +COPY example-license.txt /LICENSE +RUN microdnf install gzip -y