From e2324ddbb2a729945bbf4e7127066be008fe5056 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 15 Oct 2024 22:14:40 +0200 Subject: [PATCH 1/5] Add a FIXME about unstable name lookups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Miloslav Trmač --- libimage/pull.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libimage/pull.go b/libimage/pull.go index c4ad5df0c..67838e845 100644 --- a/libimage/pull.go +++ b/libimage/pull.go @@ -533,6 +533,14 @@ func (r *Runtime) copySingleImageFromRegistry(ctx context.Context, imageName str if options.OS != runtime.GOOS { lookupImageOptions.OS = options.OS } + // FIXME: We sometimes return resolvedImageName from this function. + // The function documentation says this returns an image ID, resolvedImageName is frequently not an image ID. + // + // Ultimately Runtime.Pull looks up the returned name... again, possibly finding some other match + // than we did. + // + // This should be restructured so that the image we found here is returned to the caller of Pull + // directly, without another image -> name -> image round-trip and possible inconsistency. localImage, resolvedImageName, err = r.LookupImage(imageName, lookupImageOptions) if err != nil && !errors.Is(err, storage.ErrImageUnknown) { logrus.Errorf("Looking up %s in local storage: %v", imageName, err) From 3339ddc405347f6ef661a9b56f7a40d840e6282f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 15 Oct 2024 22:14:58 +0200 Subject: [PATCH 2/5] Don't fall back if imagesIDsForManifest fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we can't find the image we have just pulled by digest, the image was probably already removed, and returning candidate.Value could only possibly point at a _different_ image with the same tag. Instead, fail immediately. Signed-off-by: Miloslav Trmač --- libimage/pull.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/libimage/pull.go b/libimage/pull.go index 67838e845..095e6c289 100644 --- a/libimage/pull.go +++ b/libimage/pull.go @@ -699,10 +699,11 @@ func (r *Runtime) copySingleImageFromRegistry(ctx context.Context, imageName str } logrus.Debugf("Pulled candidate %s successfully", candidateString) - if ids, err := r.imagesIDsForManifest(manifestBytes, sys); err == nil { - return ids, nil + ids, err := r.imagesIDsForManifest(manifestBytes, sys) + if err != nil { + return nil, err } - return []string{candidate.Value.String()}, nil + return ids, nil } if localImage != nil && pullPolicy == config.PullPolicyNewer { From a376d89ecba8ba903e0db797cff232053cc34aee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 15 Oct 2024 22:18:06 +0200 Subject: [PATCH 3/5] Only return one image ID from imagesIDForManifest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's no benefit in returning multiple matches; we ideally want to return exactly the image we pulled, but even if that were hard, returning multiple guesses is not what the user asked for. Signed-off-by: Miloslav Trmač --- libimage/pull.go | 45 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/libimage/pull.go b/libimage/pull.go index 095e6c289..5486f24e0 100644 --- a/libimage/pull.go +++ b/libimage/pull.go @@ -453,53 +453,52 @@ func (r *Runtime) copyFromRegistry(ctx context.Context, ref types.ImageReference return pulledIDs, nil } -// imageIDsForManifest() parses the manifest of the copied image and then looks -// up the IDs of the matching image. There's a small slice of time, between +// imageIDForManifest() parses the manifest of the copied image and then looks +// up the ID of the matching image. There's a small slice of time, between // when we copy the image into local storage and when we go to look for it // using the name that we gave it when we copied it, when the name we wanted to // assign to the image could have been moved, but the image's ID will remain // the same until it is deleted. -func (r *Runtime) imagesIDsForManifest(manifestBytes []byte, sys *types.SystemContext) ([]string, error) { +func (r *Runtime) imageIDForManifest(manifestBytes []byte, sys *types.SystemContext) (string, error) { var imageDigest digest.Digest manifestType := manifest.GuessMIMEType(manifestBytes) if manifest.MIMETypeIsMultiImage(manifestType) { list, err := manifest.ListFromBlob(manifestBytes, manifestType) if err != nil { - return nil, fmt.Errorf("parsing manifest list: %w", err) + return "", fmt.Errorf("parsing manifest list: %w", err) } d, err := list.ChooseInstance(sys) if err != nil { - return nil, fmt.Errorf("choosing instance from manifest list: %w", err) + return "", fmt.Errorf("choosing instance from manifest list: %w", err) } imageDigest = d } else { d, err := manifest.Digest(manifestBytes) if err != nil { - return nil, errors.New("digesting manifest") + return "", errors.New("digesting manifest") } imageDigest = d } images, err := r.store.ImagesByDigest(imageDigest) if err != nil { - return nil, fmt.Errorf("listing images by manifest digest: %w", err) + return "", fmt.Errorf("listing images by manifest digest: %w", err) } // If you have additionStores defined and the same image stored in // both storage and additional store, it can be output twice. - // Fixes github.com/containers/podman/issues/18647 - results := []string{} - imageMap := map[string]bool{} - for _, image := range images { - if imageMap[image.ID] { - continue - } - imageMap[image.ID] = true - results = append(results, image.ID) - } - if len(results) == 0 { - return nil, fmt.Errorf("identifying new image by manifest digest: %w", storage.ErrImageUnknown) - } - return results, nil + // + // Worse, with zstd:chunked partial pulls, the same image can have several + // different IDs, depending on which layers of the image were pulled using the + // partial pull (are identified by TOC, not by uncompressed digest). + // + // At this point, from just the manifest digest, we can’t tell which image + // is the one that was actually pulled. (They should all have the same contents + // unless the image author is malicious.) + // So just return the first matching image ID. + if len(images) == 0 { + return "", fmt.Errorf("identifying new image by manifest digest: %w", storage.ErrImageUnknown) + } + return images[0].ID, nil } // copySingleImageFromRegistry pulls the specified, possibly unqualified, name @@ -699,11 +698,11 @@ func (r *Runtime) copySingleImageFromRegistry(ctx context.Context, imageName str } logrus.Debugf("Pulled candidate %s successfully", candidateString) - ids, err := r.imagesIDsForManifest(manifestBytes, sys) + ids, err := r.imageIDForManifest(manifestBytes, sys) if err != nil { return nil, err } - return ids, nil + return []string{ids}, nil } if localImage != nil && pullPolicy == config.PullPolicyNewer { From be7818aeae17f3e47a966307cfb05e83dbdb0939 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 15 Oct 2024 22:21:43 +0200 Subject: [PATCH 4/5] Only return one image ID (/name?) from copySingleImageFromRegistry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... because we now never return more than one. Should not change behavior. Signed-off-by: Miloslav Trmač --- libimage/pull.go | 48 ++++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/libimage/pull.go b/libimage/pull.go index 5486f24e0..9d2e97672 100644 --- a/libimage/pull.go +++ b/libimage/pull.go @@ -421,7 +421,11 @@ func (r *Runtime) copyFromRegistry(ctx context.Context, ref types.ImageReference } if !options.AllTags { - return r.copySingleImageFromRegistry(ctx, inputName, pullPolicy, options) + pulled, err := r.copySingleImageFromRegistry(ctx, inputName, pullPolicy, options) + if err != nil { + return nil, err + } + return []string{pulled}, nil } // Copy all tags @@ -447,7 +451,7 @@ func (r *Runtime) copyFromRegistry(ctx context.Context, ref types.ImageReference if err != nil { return nil, err } - pulledIDs = append(pulledIDs, pulled...) + pulledIDs = append(pulledIDs, pulled) } return pulledIDs, nil @@ -503,11 +507,11 @@ func (r *Runtime) imageIDForManifest(manifestBytes []byte, sys *types.SystemCont // copySingleImageFromRegistry pulls the specified, possibly unqualified, name // from a registry. On successful pull it returns the ID of the image in local -// storage. -func (r *Runtime) copySingleImageFromRegistry(ctx context.Context, imageName string, pullPolicy config.PullPolicy, options *PullOptions) ([]string, error) { //nolint:gocyclo +// storage (or, FIXME, a name/ID? that could be resolved in local storage) +func (r *Runtime) copySingleImageFromRegistry(ctx context.Context, imageName string, pullPolicy config.PullPolicy, options *PullOptions) (string, error) { //nolint:gocyclo // Sanity check. if err := pullPolicy.Validate(); err != nil { - return nil, err + return "", err } var ( @@ -570,23 +574,23 @@ func (r *Runtime) copySingleImageFromRegistry(ctx context.Context, imageName str if pullPolicy == config.PullPolicyNever { if localImage != nil { logrus.Debugf("Pull policy %q and %s resolved to local image %s", pullPolicy, imageName, resolvedImageName) - return []string{resolvedImageName}, nil + return resolvedImageName, nil } logrus.Debugf("Pull policy %q but no local image has been found for %s", pullPolicy, imageName) - return nil, fmt.Errorf("%s: %w", imageName, storage.ErrImageUnknown) + return "", fmt.Errorf("%s: %w", imageName, storage.ErrImageUnknown) } if pullPolicy == config.PullPolicyMissing && localImage != nil { - return []string{resolvedImageName}, nil + return resolvedImageName, nil } // If we looked up the image by ID, we cannot really pull from anywhere. if localImage != nil && strings.HasPrefix(localImage.ID(), imageName) { switch pullPolicy { case config.PullPolicyAlways: - return nil, fmt.Errorf("pull policy is always but image has been referred to by ID (%s)", imageName) + return "", fmt.Errorf("pull policy is always but image has been referred to by ID (%s)", imageName) default: - return []string{resolvedImageName}, nil + return resolvedImageName, nil } } @@ -611,9 +615,9 @@ func (r *Runtime) copySingleImageFromRegistry(ctx context.Context, imageName str resolved, err := shortnames.Resolve(sys, imageName) if err != nil { if localImage != nil && pullPolicy == config.PullPolicyNewer { - return []string{resolvedImageName}, nil + return resolvedImageName, nil } - return nil, err + return "", err } // NOTE: Below we print the description from the short-name resolution. @@ -645,7 +649,7 @@ func (r *Runtime) copySingleImageFromRegistry(ctx context.Context, imageName str } c, err := r.newCopier(&options.CopyOptions) if err != nil { - return nil, err + return "", err } defer c.Close() @@ -655,7 +659,7 @@ func (r *Runtime) copySingleImageFromRegistry(ctx context.Context, imageName str logrus.Debugf("Attempting to pull candidate %s for %s", candidateString, imageName) srcRef, err := registryTransport.NewReference(candidate.Value) if err != nil { - return nil, err + return "", err } if pullPolicy == config.PullPolicyNewer && localImage != nil { @@ -673,15 +677,15 @@ func (r *Runtime) copySingleImageFromRegistry(ctx context.Context, imageName str destRef, err := storageTransport.Transport.ParseStoreReference(r.store, candidate.Value.String()) if err != nil { - return nil, err + return "", err } if err := writeDesc(); err != nil { - return nil, err + return "", err } if options.Writer != nil { if _, err := io.WriteString(options.Writer, fmt.Sprintf("Trying to pull %s...\n", candidateString)); err != nil { - return nil, err + return "", err } } var manifestBytes []byte @@ -700,18 +704,18 @@ func (r *Runtime) copySingleImageFromRegistry(ctx context.Context, imageName str logrus.Debugf("Pulled candidate %s successfully", candidateString) ids, err := r.imageIDForManifest(manifestBytes, sys) if err != nil { - return nil, err + return "", err } - return []string{ids}, nil + return ids, nil } if localImage != nil && pullPolicy == config.PullPolicyNewer { - return []string{resolvedImageName}, nil + return resolvedImageName, nil } if len(pullErrors) == 0 { - return nil, fmt.Errorf("internal error: no image pulled (pull policy %s)", pullPolicy) + return "", fmt.Errorf("internal error: no image pulled (pull policy %s)", pullPolicy) } - return nil, resolved.FormatPullErrors(pullErrors) + return "", resolved.FormatPullErrors(pullErrors) } From aa722efd5cabed0e9f3e2ebae7b1cf8932f50e81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Tue, 15 Oct 2024 22:36:14 +0200 Subject: [PATCH 5/5] Improve image ID lookup for pulled images MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use the image's repo, not just the digest, to be more precise when zstd:chunked ambiguities are involved - Remove the multi-platform lookup code, it is never used Signed-off-by: Miloslav Trmač --- libimage/pull.go | 61 ++++++++++++++++++------------------------------ 1 file changed, 23 insertions(+), 38 deletions(-) diff --git a/libimage/pull.go b/libimage/pull.go index 9d2e97672..ad4699c60 100644 --- a/libimage/pull.go +++ b/libimage/pull.go @@ -25,7 +25,6 @@ import ( "github.com/containers/image/v5/transports/alltransports" "github.com/containers/image/v5/types" "github.com/containers/storage" - digest "github.com/opencontainers/go-digest" ociSpec "github.com/opencontainers/image-spec/specs-go/v1" "github.com/sirupsen/logrus" ) @@ -457,52 +456,38 @@ func (r *Runtime) copyFromRegistry(ctx context.Context, ref types.ImageReference return pulledIDs, nil } -// imageIDForManifest() parses the manifest of the copied image and then looks -// up the ID of the matching image. There's a small slice of time, between -// when we copy the image into local storage and when we go to look for it -// using the name that we gave it when we copied it, when the name we wanted to -// assign to the image could have been moved, but the image's ID will remain -// the same until it is deleted. -func (r *Runtime) imageIDForManifest(manifestBytes []byte, sys *types.SystemContext) (string, error) { - var imageDigest digest.Digest - manifestType := manifest.GuessMIMEType(manifestBytes) - if manifest.MIMETypeIsMultiImage(manifestType) { - list, err := manifest.ListFromBlob(manifestBytes, manifestType) - if err != nil { - return "", fmt.Errorf("parsing manifest list: %w", err) - } - d, err := list.ChooseInstance(sys) - if err != nil { - return "", fmt.Errorf("choosing instance from manifest list: %w", err) - } - imageDigest = d - } else { - d, err := manifest.Digest(manifestBytes) - if err != nil { - return "", errors.New("digesting manifest") - } - imageDigest = d +// imageIDForPulledImage makes a best-effort guess at an image ID for +// a just-pulled image written to destName, where the pull returned manifestBytes +func (r *Runtime) imageIDForPulledImage(destName reference.Named, manifestBytes []byte) (string, error) { + // The caller, copySingleImageFromRegistry, never triggers a multi-platform copy, so manifestBytes + // is always a single-platform manifest instance. + manifestDigest, err := manifest.Digest(manifestBytes) + if err != nil { + return "", err } - images, err := r.store.ImagesByDigest(imageDigest) + destDigestedName, err := reference.WithDigest(reference.TrimNamed(destName), manifestDigest) if err != nil { - return "", fmt.Errorf("listing images by manifest digest: %w", err) + return "", err } - - // If you have additionStores defined and the same image stored in - // both storage and additional store, it can be output twice. - // - // Worse, with zstd:chunked partial pulls, the same image can have several + storeRef, err := storageTransport.Transport.NewStoreReference(r.store, destDigestedName, "") + if err != nil { + return "", err + } + // With zstd:chunked partial pulls, the same image can have several // different IDs, depending on which layers of the image were pulled using the // partial pull (are identified by TOC, not by uncompressed digest). // // At this point, from just the manifest digest, we can’t tell which image // is the one that was actually pulled. (They should all have the same contents // unless the image author is malicious.) - // So just return the first matching image ID. - if len(images) == 0 { - return "", fmt.Errorf("identifying new image by manifest digest: %w", storage.ErrImageUnknown) + // + // FIXME: To return an accurate value, c/image would need to return the image ID, + // not just manifestBytes. + _, image, err := storageTransport.ResolveReference(storeRef) + if err != nil { + return "", fmt.Errorf("looking up a just-pulled image: %w", err) } - return images[0].ID, nil + return image.ID, nil } // copySingleImageFromRegistry pulls the specified, possibly unqualified, name @@ -702,7 +687,7 @@ func (r *Runtime) copySingleImageFromRegistry(ctx context.Context, imageName str } logrus.Debugf("Pulled candidate %s successfully", candidateString) - ids, err := r.imageIDForManifest(manifestBytes, sys) + ids, err := r.imageIDForPulledImage(candidate.Value, manifestBytes) if err != nil { return "", err }