Skip to content

Commit

Permalink
Merge pull request #2092 from mtrmac/sqlite-bic
Browse files Browse the repository at this point in the history
Implement, and default to, a SQLite BlobInfoCache instead of BoltDB
  • Loading branch information
mtrmac authored Sep 5, 2023
2 parents 864f4b0 + 874860c commit 88e6310
Show file tree
Hide file tree
Showing 15 changed files with 675 additions and 21 deletions.
3 changes: 2 additions & 1 deletion copy/compression.go
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,8 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf
if d.uploadedCompressorName != "" && d.uploadedCompressorName != internalblobinfocache.UnknownCompression {
c.blobInfoCache.RecordDigestCompressorName(uploadedInfo.Digest, d.uploadedCompressorName)
}
if srcInfo.Digest != "" && d.srcCompressorName != "" && d.srcCompressorName != internalblobinfocache.UnknownCompression {
if srcInfo.Digest != "" && srcInfo.Digest != uploadedInfo.Digest &&
d.srcCompressorName != "" && d.srcCompressorName != internalblobinfocache.UnknownCompression {
c.blobInfoCache.RecordDigestCompressorName(srcInfo.Digest, d.srcCompressorName)
}
return nil
Expand Down
6 changes: 4 additions & 2 deletions copy/copy.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,11 +242,13 @@ func Image(ctx context.Context, policyContext *signature.PolicyContext, destRef,

unparsedToplevel: image.UnparsedInstance(rawSource, nil),
// FIXME? The cache is used for sources and destinations equally, but we only have a SourceCtx and DestinationCtx.
// For now, use DestinationCtx (because blob reuse changes the behavior of the destination side more); eventually
// we might want to add a separate CommonCtx — or would that be too confusing?
// For now, use DestinationCtx (because blob reuse changes the behavior of the destination side more).
// Conceptually the cache settings should be in copy.Options instead.
blobInfoCache: internalblobinfocache.FromBlobInfoCache(blobinfocache.DefaultCache(options.DestinationCtx)),
}
defer c.close()
c.blobInfoCache.Open()
defer c.blobInfoCache.Close()

// Set the concurrentBlobCopiesSemaphore if we can copy layers in parallel.
if dest.HasThreadSafePutBlob() && rawSource.HasThreadSafeGetBlob() {
Expand Down
8 changes: 6 additions & 2 deletions copy/single.go
Original file line number Diff line number Diff line change
Expand Up @@ -662,8 +662,12 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to

ic.c.printCopyInfo("blob", srcInfo)

cachedDiffID := ic.c.blobInfoCache.UncompressedDigest(srcInfo.Digest) // May be ""
diffIDIsNeeded := ic.diffIDsAreNeeded && cachedDiffID == ""
diffIDIsNeeded := false
var cachedDiffID digest.Digest = ""
if ic.diffIDsAreNeeded {
cachedDiffID = ic.c.blobInfoCache.UncompressedDigest(srcInfo.Digest) // May be ""
diffIDIsNeeded = cachedDiffID == ""
}
// When encrypting to decrypting, only use the simple code path. We might be able to optimize more
// (e.g. if we know the DiffID of an encrypted compressed layer, it might not be necessary to pull, decrypt and decompress again),
// but it’s not trivially safe to do such things, so until someone takes the effort to make a comprehensive argument, let’s not.
Expand Down
2 changes: 1 addition & 1 deletion docker/docker_transport_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ func TestReferenceNewImageSource(t *testing.T) {
require.NoError(t, err)
src, err := ref.NewImageSource(context.Background(),
&types.SystemContext{RegistriesDirPath: "/this/does/not/exist", DockerPerHostCertDirPath: "/this/does/not/exist"})
assert.NoError(t, err)
require.NoError(t, err)
defer src.Close()
}

Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ require (
github.com/klauspost/compress v1.16.7
github.com/klauspost/pgzip v1.2.6
github.com/manifoldco/promptui v0.9.0
github.com/mattn/go-sqlite3 v1.14.17
github.com/opencontainers/go-digest v1.0.0
github.com/opencontainers/image-spec v1.1.0-rc4
github.com/opencontainers/selinux v1.11.0
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,8 @@ github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZ
github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mattn/go-shellwords v1.0.12 h1:M2zGm7EW6UQJvDeQxo4T51eKPurbeFbe8WtebGE2xrk=
github.com/mattn/go-shellwords v1.0.12/go.mod h1:EZzvwXDESEeg03EKmM+RmDnNOPKG4lLtQsUlTZDWQ8Y=
github.com/mattn/go-sqlite3 v1.14.17 h1:mCRHCLDUBXgpKAqIKsaAaAsrAlbkeomtRFKXh2L6YIM=
github.com/mattn/go-sqlite3 v1.14.17/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo=
github.com/miekg/pkcs11 v1.1.1 h1:Ugu9pdy6vAYku5DEpVWVFPYnzV+bxB+iRdbuFSu7TvU=
github.com/miekg/pkcs11 v1.1.1/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs=
Expand Down
6 changes: 6 additions & 0 deletions internal/blobinfocache/blobinfocache.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ type v1OnlyBlobInfoCache struct {
types.BlobInfoCache
}

func (bic *v1OnlyBlobInfoCache) Open() {
}

func (bic *v1OnlyBlobInfoCache) Close() {
}

func (bic *v1OnlyBlobInfoCache) RecordDigestCompressorName(anyDigest digest.Digest, compressorName string) {
}

Expand Down
7 changes: 7 additions & 0 deletions internal/blobinfocache/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@ const (
// of compression was applied to the blobs it keeps information about.
type BlobInfoCache2 interface {
types.BlobInfoCache

// Open() sets up the cache for future accesses, potentially acquiring costly state. Each Open() must be paired with a Close().
// Note that public callers may call the types.BlobInfoCache operations without Open()/Close().
Open()
// Close destroys state created by Open().
Close()

// RecordDigestCompressorName records a compressor for the blob with the specified digest,
// or Uncompressed or UnknownCompression.
// WARNING: Only call this with LOCALLY VERIFIED data; don’t record a compressor for a
Expand Down
16 changes: 15 additions & 1 deletion pkg/blobinfocache/boltdb/boltdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ var (

// uncompressedDigestBucket stores a mapping from any digest to an uncompressed digest.
uncompressedDigestBucket = []byte("uncompressedDigest")
// digestCompressorBucket stores a mapping from any digest to a compressor, or blobinfocache.Uncompressed
// digestCompressorBucket stores a mapping from any digest to a compressor, or blobinfocache.Uncompressed (not blobinfocache.UnknownCompression).
// It may not exist in caches created by older versions, even if uncompressedDigestBucket is present.
digestCompressorBucket = []byte("digestCompressor")
// digestByUncompressedBucket stores a bucket per uncompressed digest, with the bucket containing a set of digests for that uncompressed digest
Expand Down Expand Up @@ -98,13 +98,27 @@ type cache struct {
// New returns a BlobInfoCache implementation which uses a BoltDB file at path.
//
// Most users should call blobinfocache.DefaultCache instead.
//
// Deprecated: The BoltDB implementation triggers a panic() on some database format errors; that does not allow
// practical error recovery / fallback.
//
// Use blobinfocache.DefaultCache if at all possible; if not, the pkg/blobinfocache/sqlite implementation.
func New(path string) types.BlobInfoCache {
return new2(path)
}
func new2(path string) *cache {
return &cache{path: path}
}

// Open() sets up the cache for future accesses, potentially acquiring costly state. Each Open() must be paired with a Close().
// Note that public callers may call the types.BlobInfoCache operations without Open()/Close().
func (bdc *cache) Open() {
}

// Close destroys state created by Open().
func (bdc *cache) Close() {
}

// view returns runs the specified fn within a read-only transaction on the database.
func (bdc *cache) view(fn func(tx *bolt.Tx) error) (retErr error) {
// bolt.Open(bdc.path, 0600, &bolt.Options{ReadOnly: true}) will, if the file does not exist,
Expand Down
20 changes: 15 additions & 5 deletions pkg/blobinfocache/default.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@ import (
"path/filepath"

"github.com/containers/image/v5/internal/rootless"
"github.com/containers/image/v5/pkg/blobinfocache/boltdb"
"github.com/containers/image/v5/pkg/blobinfocache/memory"
"github.com/containers/image/v5/pkg/blobinfocache/sqlite"
"github.com/containers/image/v5/types"
"github.com/sirupsen/logrus"
)

const (
// blobInfoCacheFilename is the file name used for blob info caches.
// If the format changes in an incompatible way, increase the version number.
blobInfoCacheFilename = "blob-info-cache-v1.boltdb"
blobInfoCacheFilename = "blob-info-cache-v1.sqlite"
// systemBlobInfoCacheDir is the directory containing the blob info cache (in blobInfocacheFilename) for root-running processes.
systemBlobInfoCacheDir = "/var/lib/containers/cache"
)
Expand Down Expand Up @@ -57,10 +57,20 @@ func DefaultCache(sys *types.SystemContext) types.BlobInfoCache {
}
path := filepath.Join(dir, blobInfoCacheFilename)
if err := os.MkdirAll(dir, 0700); err != nil {
logrus.Debugf("Error creating parent directories for %s, using a memory-only cache: %v", blobInfoCacheFilename, err)
logrus.Debugf("Error creating parent directories for %s, using a memory-only cache: %v", path, err)
return memory.New()
}

logrus.Debugf("Using blob info cache at %s", path)
return boltdb.New(path)
// It might make sense to keep a single sqlite cache object, and a single initialized sqlite connection, open
// as global singleton, for the vast majority of callers who don’t override thde cache location.
// OTOH that would keep a file descriptor open forever, even for long-term callers who copy images rarely,
// and the performance benefit to this over using an Open()/Close() pair for a single image copy is < 10%.

cache, err := sqlite.New(path)
if err != nil {
logrus.Debugf("Error creating a SQLite blob info cache at %s, using a memory-only cache: %v", path, err)
return memory.New()
}
logrus.Debugf("Using SQLite blob info cache at %s", path)
return cache
}
10 changes: 6 additions & 4 deletions pkg/blobinfocache/default_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ import (
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"

"github.com/containers/image/v5/pkg/blobinfocache/boltdb"
"github.com/containers/image/v5/pkg/blobinfocache/memory"
"github.com/containers/image/v5/pkg/blobinfocache/sqlite"
"github.com/containers/image/v5/types"
"github.com/stretchr/testify/assert"
)
Expand Down Expand Up @@ -103,8 +103,10 @@ func TestDefaultCache(t *testing.T) {
// Success
normalDir := filepath.Join(tmpDir, "normal")
c := DefaultCache(&types.SystemContext{BlobInfoCacheDir: normalDir})
// This is ugly hard-coding internals of boltDBCache:
assert.Equal(t, boltdb.New(filepath.Join(normalDir, blobInfoCacheFilename)), c)
// This is ugly hard-coding internals of sqlite.cache
sqliteCache, err := sqlite.New(filepath.Join(normalDir, blobInfoCacheFilename))
require.NoError(t, err)
assert.Equal(t, sqliteCache, c)

// Error running blobInfoCacheDir:
// Use t.Setenv() just as a way to set up cleanup to original values; then os.Unsetenv() to test a situation where the values are not set.
Expand All @@ -117,7 +119,7 @@ func TestDefaultCache(t *testing.T) {

// Error creating the parent directory:
unwritableDir := filepath.Join(tmpDir, "unwritable")
err := os.Mkdir(unwritableDir, 0700)
err = os.Mkdir(unwritableDir, 0700)
require.NoError(t, err)
defer func() {
err = os.Chmod(unwritableDir, 0700) // To make it possible to remove it again
Expand Down
23 changes: 19 additions & 4 deletions pkg/blobinfocache/internal/test/test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ const (
// GenericCache runs an implementation-independent set of tests, given a
// newTestCache, which can be called repeatedly and always returns a fresh cache instance
func GenericCache(t *testing.T, newTestCache func(t *testing.T) blobinfocache.BlobInfoCache2) {
for _, s := range []struct {
subs := []struct {
name string
fn func(t *testing.T, cache blobinfocache.BlobInfoCache2)
}{
Expand All @@ -35,9 +35,22 @@ func GenericCache(t *testing.T, newTestCache func(t *testing.T) blobinfocache.Bl
{"RecordKnownLocations", testGenericRecordKnownLocations},
{"CandidateLocations", testGenericCandidateLocations},
{"CandidateLocations2", testGenericCandidateLocations2},
} {
t.Run(s.name, func(t *testing.T) {
}

// Without Open()/Close()
for _, s := range subs {
t.Run("no Open: "+s.name, func(t *testing.T) {
cache := newTestCache(t)
s.fn(t, cache)
})
}

// With Open()/Close()
for _, s := range subs {
t.Run("with Open: "+s.name, func(t *testing.T) {
cache := newTestCache(t)
cache.Open()
defer cache.Close()
s.fn(t, cache)
})
}
Expand Down Expand Up @@ -212,7 +225,9 @@ func testGenericCandidateLocations2(t *testing.T, cache blobinfocache.BlobInfoCa
}
}

// Clear any "known" compression values, except on the first loop where they've never been set
// Clear any "known" compression values, except on the first loop where they've never been set.
// This probably triggers “Compressor for blob with digest … previously recorded as …, now unknown” warnings here, for test purposes;
// that shouldn’t happen in real-world usage.
if scopeIndex != 0 {
for _, e := range digestNameSet {
cache.RecordDigestCompressorName(e.d, blobinfocache.UnknownCompression)
Expand Down
14 changes: 13 additions & 1 deletion pkg/blobinfocache/memory/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ type cache struct {
uncompressedDigests map[digest.Digest]digest.Digest
digestsByUncompressed map[digest.Digest]*set.Set[digest.Digest] // stores a set of digests for each uncompressed digest
knownLocations map[locationKey]map[types.BICLocationReference]time.Time // stores last known existence time for each location reference
compressors map[digest.Digest]string // stores a compressor name, or blobinfocache.Unknown, for each digest
compressors map[digest.Digest]string // stores a compressor name, or blobinfocache.Unknown (not blobinfocache.UnknownCompression), for each digest
}

// New returns a BlobInfoCache implementation which is in-memory only.
Expand All @@ -51,6 +51,15 @@ func new2() *cache {
}
}

// Open() sets up the cache for future accesses, potentially acquiring costly state. Each Open() must be paired with a Close().
// Note that public callers may call the types.BlobInfoCache operations without Open()/Close().
func (mem *cache) Open() {
}

// Close destroys state created by Open().
func (mem *cache) Close() {
}

// UncompressedDigest returns an uncompressed digest corresponding to anyDigest.
// May return anyDigest if it is known to be uncompressed.
// Returns "" if nothing is known about the digest (it may be compressed or uncompressed).
Expand Down Expand Up @@ -114,6 +123,9 @@ func (mem *cache) RecordKnownLocation(transport types.ImageTransport, scope type
func (mem *cache) RecordDigestCompressorName(blobDigest digest.Digest, compressorName string) {
mem.mutex.Lock()
defer mem.mutex.Unlock()
if previous, ok := mem.compressors[blobDigest]; ok && previous != compressorName {
logrus.Warnf("Compressor for blob with digest %s previously recorded as %s, now %s", blobDigest, previous, compressorName)
}
if compressorName == blobinfocache.UnknownCompression {
delete(mem.compressors, blobDigest)
return
Expand Down
Loading

0 comments on commit 88e6310

Please sign in to comment.