Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support pulling Ollama [non-]OCI image #2539

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions docker/docker_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -986,7 +986,8 @@ func (c *dockerClient) fetchManifest(ctx context.Context, ref dockerReference, t
if err != nil {
return nil, "", err
}
logrus.Debugf("Content-Type from manifest GET is %q", res.Header.Get("Content-Type"))
contentType := res.Header.Get("Content-Type")
logrus.Debugf("Content-Type from manifest GET is %q", contentType)
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return nil, "", fmt.Errorf("reading manifest %s in %s: %w", tagOrDigest, ref.ref.Name(), registryHTTPResponseToError(res))
Expand All @@ -996,7 +997,18 @@ func (c *dockerClient) fetchManifest(ctx context.Context, ref dockerReference, t
if err != nil {
return nil, "", err
}
return manblob, simplifyContentType(res.Header.Get("Content-Type")), nil
// Extra check for the content type in the manifest
if contentType == "text/plain" {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • It is fundamentally unsound to override an externally-supplied MIME type of some data by parsing the data in another way and trusting its contents. The whole point of having the MIME type separate is that it directs how the data is to be interpreted. I’m unhappy about such special cases, and I’m especially unhappy about adding them in entirely new use cases where we could just choose not to do that. Why is this necessary here?
  • The docker transport has no business interpreting image contents this way; that’s the responsibility of the generic code.
  • … and the generic code already has a heuristic fallback for text/plain; this would override/break that.

man := make(map[string]any)
if err := json.Unmarshal(manblob, &man); err != nil {
return nil, "", fmt.Errorf("decoding manifest %s in %s: %w", tagOrDigest, ref.ref.Name(), err)
}
if mediaType, ok := man["mediaType"]; ok {
contentType = mediaType.(string)
logrus.Debugf("Content-Type from manifest JSON is %q", contentType)
}
}
return manblob, simplifyContentType(contentType), nil
}

// getExternalBlob returns the reader of the first available blob URL from urls, which must not be empty.
Expand Down
21 changes: 21 additions & 0 deletions internal/manifest/manifest.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,27 @@ const (
DockerV2Schema2ForeignLayerMediaType = "application/vnd.docker.image.rootfs.foreign.diff.tar"
// DockerV2Schema2ForeignLayerMediaType is the MIME type used for gzipped schema 2 foreign layers.
DockerV2Schema2ForeignLayerMediaTypeGzip = "application/vnd.docker.image.rootfs.foreign.diff.tar.gzip"

// OllamaImageLayerMediaTypePrefix is the prefix for Ollama image layer media types.
OllamaImageLayerMediaTypePrefix = "application/vnd.ollama.image."
// OllamaImageLayerMediaType is the media type used for Ollama image model layers.
OllamaImageModelLayerMediaType = OllamaImageLayerMediaTypePrefix + "model"
// OllamaImageAdapterLayerMediaType is the media type used for Ollama image adapter layers.
OllamaImageAdapterLayerMediaType = OllamaImageLayerMediaTypePrefix + "adapter"
// OllamaImageProjectorLayerMediaType is the media type used for Ollama image projector layers.
OllamaImageProjectorLayerMediaType = OllamaImageLayerMediaTypePrefix + "projector"
// OllamaImagePromptLayerMediaType is the media type used for Ollama image prompt layers.
OllamaImagePromptLayerMediaType = OllamaImageLayerMediaTypePrefix + "prompt"
// OllamaImageTemplateLayerMediaType is the media type used for Ollama image template layers.
OllamaImageTemplateLayerMediaType = OllamaImageLayerMediaTypePrefix + "template"
// OllamaImageSystemLayerMediaType is the media type used for Ollama image system layers.
OllamaImageSystemLayerMediaType = OllamaImageLayerMediaTypePrefix + "system"
// OllamaImageParamsLayerMediaType is the media type used for Ollama image params layers.
OllamaImageParamsLayerMediaType = OllamaImageLayerMediaTypePrefix + "params"
// OllamaImageMessagesLayerMediaType is the media type used for Ollama image messages layers.
OllamaImageMessagesLayerMediaType = OllamaImageLayerMediaTypePrefix + "messages"
// OllamaImageLicenseLayerMediaType is the media type used for Ollama image license layers.
OllamaImageLicenseLayerMediaType = OllamaImageLayerMediaTypePrefix + "license"
)

// GuessMIMEType guesses MIME type of a manifest and returns it _if it is recognized_, or "" if unknown or unrecognized.
Expand Down
23 changes: 23 additions & 0 deletions manifest/manifest.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,27 @@ const (
DockerV2Schema2ForeignLayerMediaType = manifest.DockerV2Schema2ForeignLayerMediaType
// DockerV2Schema2ForeignLayerMediaType is the MIME type used for gzipped schema 2 foreign layers.
DockerV2Schema2ForeignLayerMediaTypeGzip = manifest.DockerV2Schema2ForeignLayerMediaTypeGzip

// OllamaImageLayerMediaTypePrefix is the prefix for all Ollama image layer media types.
OllamaImageLayerMediaTypePrefix = manifest.OllamaImageLayerMediaTypePrefix
// OllamaImageModelLayerMediaType is the media type used for model layers.
OllamaImageModelLayerMediaType = manifest.OllamaImageModelLayerMediaType
// OllamaImageAdapterLayerMediaType is the media type used for adapter layers.
OllamaImageAdapterLayerMediaType = manifest.OllamaImageAdapterLayerMediaType
// OllamaImageProjectorLayerMediaType is the media type used for projector layers.
OllamaImageProjectorLayerMediaType = manifest.OllamaImageProjectorLayerMediaType
// OllamaImagePromptLayerMediaType is the media type used for prompt layers.
OllamaImagePromptLayerMediaType = manifest.OllamaImagePromptLayerMediaType
// OllamaImageTemplateLayerMediaType is the media type used for template layers.
OllamaImageTemplateLayerMediaType = manifest.OllamaImageTemplateLayerMediaType
// OllamaImageSystemLayerMediaType is the media type used for system layers.
OllamaImageSystemLayerMediaType = manifest.OllamaImageSystemLayerMediaType
// OllamaImageParamsLayerMediaType is the media type used for params layers.
OllamaImageParamsLayerMediaType = manifest.OllamaImageParamsLayerMediaType
// OllamaImageMessagesLayerMediaType is the media type used for messages layers.
OllamaImageMessagesLayerMediaType = manifest.OllamaImageMessagesLayerMediaType
// OllamaImageLicenseLayerMediaType is the media type used for license layers.
OllamaImageLicenseLayerMediaType = manifest.OllamaImageLicenseLayerMediaType
)

// NonImageArtifactError (detected via errors.As) is used when asking for an image-specific operation
Expand All @@ -43,6 +64,8 @@ func SupportedSchema2MediaType(m string) error {
switch m {
case DockerV2ListMediaType, DockerV2Schema1MediaType, DockerV2Schema1SignedMediaType, DockerV2Schema2ConfigMediaType, DockerV2Schema2ForeignLayerMediaType, DockerV2Schema2ForeignLayerMediaTypeGzip, DockerV2Schema2LayerMediaType, DockerV2Schema2MediaType, DockerV2SchemaLayerMediaTypeUncompressed:
return nil
case OllamaImageModelLayerMediaType, OllamaImageAdapterLayerMediaType, OllamaImageProjectorLayerMediaType, OllamaImagePromptLayerMediaType, OllamaImageTemplateLayerMediaType, OllamaImageSystemLayerMediaType, OllamaImageParamsLayerMediaType, OllamaImageMessagesLayerMediaType, OllamaImageLicenseLayerMediaType:
return nil
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don’t know why we would add all of these to v2s2; that’s a ~frozen format.

And if we did add them, we would need to adjust all of the rest of the v2s2 code to correctly interpret such data, at the very least to not parse it as an ordinary image.

Either way, it seems to me that this should either be completely opaque (a proper OCI artifact, maybe), or intentionally a completely new manifest / image format implementation.

default:
return fmt.Errorf("unsupported docker v2s2 media type: %q", m)
}
Expand Down
26 changes: 19 additions & 7 deletions storage/storage_dest.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"os"
"path/filepath"
"slices"
"strings"
"sync"
"sync/atomic"

Expand Down Expand Up @@ -125,8 +126,9 @@ type storageImageDestinationLockProtected struct {

// addedLayerInfo records data about a layer to use in this image.
type addedLayerInfo struct {
digest digest.Digest // Mandatory, the digest of the layer.
emptyLayer bool // The layer is an “empty”/“throwaway” one, and may or may not be physically represented in various transport / storage systems. false if the manifest type does not have the concept.
digest digest.Digest // Mandatory, the digest of the layer.
emptyLayer bool // The layer is an “empty”/“throwaway” one, and may or may not be physically represented in various transport / storage systems. false if the manifest type does not have the concept.
layerFilename *string
}

// newImageDestination sets us up to write a new image, caching blobs in a temporary directory until
Expand Down Expand Up @@ -218,9 +220,18 @@ func (s *storageImageDestination) PutBlobWithOptions(ctx context.Context, stream
return info, nil
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is an early return above; in that case .filename would not be set.

OK, that’s somewhat theoretical. But also, nothing sets .filename on the TryReusingBlobWithOptions code path. That one is 100% a blocker.


layerFilename := func() *string {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don’t see why a nested function is necessary here.

if strings.HasPrefix(blobinfo.MediaType, manifest.OllamaImageLayerMediaTypePrefix) {
filename := strings.TrimPrefix(blobinfo.MediaType, manifest.OllamaImageLayerMediaTypePrefix)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Choosing a “filename”, whatever the value is, this way makes no sense to me. What happens if there are two layers with the same MIME type in the image?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What would the storageImageSource counterpart look like?!

return &filename
}
return nil
}()

return info, s.queueOrCommit(*options.LayerIndex, addedLayerInfo{
digest: info.Digest,
emptyLayer: options.EmptyLayer,
digest: info.Digest,
emptyLayer: options.EmptyLayer,
layerFilename: layerFilename,
})
}

Expand Down Expand Up @@ -817,7 +828,7 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si
if index != 0 {
prev, ok := s.indexToStorageID[index-1]
if !ok {
return false, fmt.Errorf("Internal error: commitLayer called with previous layer %d not committed yet", index-1)
return false, fmt.Errorf("internal error: commitLayer called with previous layer %d not committed yet", index-1)
}
parentLayer = prev
}
Expand Down Expand Up @@ -873,7 +884,7 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si
return false, nil
}

layer, err := s.createNewLayer(index, info.digest, parentLayer, id)
layer, err := s.createNewLayer(index, info.digest, parentLayer, id, info.layerFilename)
if err != nil {
return false, err
}
Expand All @@ -886,7 +897,7 @@ func (s *storageImageDestination) commitLayer(index int, info addedLayerInfo, si

// createNewLayer creates a new layer newLayerID for (index, layerDigest) on top of parentLayer (which may be "").
// If the layer cannot be committed yet, the function returns (nil, nil).
func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.Digest, parentLayer, newLayerID string) (*storage.Layer, error) {
func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.Digest, parentLayer, newLayerID string, layerFilename *string) (*storage.Layer, error) {
s.lock.Lock()
diffOutput, ok := s.lockProtected.diffOutputs[index]
s.lock.Unlock()
Expand Down Expand Up @@ -1061,6 +1072,7 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D
OriginalDigest: trustedOriginalDigest,
// This might be "" if trusted.layerIdentifiedByTOC; in that case PutLayer will compute the value from the stream.
UncompressedDigest: trusted.diffID,
LayerFilename: layerFilename,
}, file)
if err != nil && !errors.Is(err, storage.ErrDuplicateID) {
return nil, fmt.Errorf("adding layer with blob %q/%q/%q: %w", trusted.blobDigest, trusted.tocDigest, trusted.diffID, err)
Expand Down