From 3ea708914ef9ea9535d994dab1fb061241738b0e Mon Sep 17 00:00:00 2001 From: Ashlie Martinez Date: Thu, 5 Sep 2024 14:28:28 -0700 Subject: [PATCH 1/9] Wire up env var for splitting contents Allow setting the max number of manifests per content piece during compaction using an env var. --- repo/manifest/committed_manifest_manager.go | 25 +++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/repo/manifest/committed_manifest_manager.go b/repo/manifest/committed_manifest_manager.go index 9c0b4ca1be6..225d5af8d94 100644 --- a/repo/manifest/committed_manifest_manager.go +++ b/repo/manifest/committed_manifest_manager.go @@ -9,6 +9,7 @@ import ( "math/rand" "os" "sort" + "strconv" "sync" "github.com/pkg/errors" @@ -18,7 +19,10 @@ import ( "github.com/kopia/kopia/repo/content/index" ) -const maxManifestsPerContent = 1000000 +const ( + defaultMaxManifestsPerContent = 1000000 + maxManifestsPerContentEnvKey = "KOPIA_MAX_MANIFESTS_PER_CONTENT_COUNT" +) // committedManifestManager manages committed manifest entries stored in 'm' contents. type committedManifestManager struct { @@ -110,6 +114,18 @@ func (m *committedManifestManager) writeContentChunk( return contentID, nil } +func getMaxManifestsPerContent() int { + retVal := defaultMaxManifestsPerContent + + if v := os.Getenv(maxManifestsPerContentEnvKey); v != "" { + if vint, err := strconv.Atoi(v); err == nil { + retVal = vint + } + } + + return retVal +} + // writeEntriesLocked writes entries in the provided map as manifest contents // and removes all entries from the map when complete and returns the set of content IDs written // (typically one). @@ -140,7 +156,12 @@ func (m *committedManifestManager) writeEntriesLocked( // Still write all entries out in a single content piece if we're not // compacting things. - if !isCompaction || len(man.Entries) < maxManifestsPerContent { + // + // Even if the result of getMaxManifestsPer content is 0 or negative, we'll + // still make progress because this is a less-than check. At worst, if + // there's a configuration that asks for a negative or zero manifests per + // content piece then we'll write out each manifest individually. + if !isCompaction || len(man.Entries) < getMaxManifestsPerContent() { continue } From e81de6dce466fa74c8cf4c6517526b78db95cf63 Mon Sep 17 00:00:00 2001 From: Ashlie Martinez Date: Thu, 5 Sep 2024 14:29:00 -0700 Subject: [PATCH 2/9] Update unit test for env var --- repo/manifest/manifest_manager_test.go | 105 +++++++++++++++++-------- 1 file changed, 74 insertions(+), 31 deletions(-) diff --git a/repo/manifest/manifest_manager_test.go b/repo/manifest/manifest_manager_test.go index 73f84c60a00..a7d4af1869c 100644 --- a/repo/manifest/manifest_manager_test.go +++ b/repo/manifest/manifest_manager_test.go @@ -501,7 +501,7 @@ func TestWriteManyManifests(t *testing.T) { data := blobtesting.DataMap{} item1 := map[string]int{"foo": 1, "bar": 2} labels1 := map[string]string{"type": "item", "color": "red"} - numManifests := maxManifestsPerContent + 5 + numManifests := defaultMaxManifestsPerContent + 5 mgr := newManagerForTesting(ctx, t, data, ManagerOptions{}) @@ -522,44 +522,87 @@ func TestWriteManyManifests(t *testing.T) { } func TestCompactManyManifests(t *testing.T) { - ctx := testlogging.Context(t) - data := blobtesting.DataMap{} - item1 := map[string]int{"foo": 1, "bar": 2} - labels1 := map[string]string{"type": "item", "color": "red"} + table := []struct { + name string + envFlag string + initialManifests int + otherManifests int + expectContents int + }{ + { + name: "DefaultValue", + envFlag: "", + initialManifests: defaultMaxManifestsPerContent - 1, + otherManifests: 6, + expectContents: 2, + }, + { + name: "SmallerEnvValue", + envFlag: "100", + initialManifests: 99, + otherManifests: 6, + expectContents: 2, + }, + { + name: "ZeroEnvValue", + envFlag: "0", + initialManifests: 3, + otherManifests: 3, + expectContents: 6, + }, + { + name: "NegativeEnvValue", + envFlag: "-42", + initialManifests: 3, + otherManifests: 3, + expectContents: 6, + }, + } - mgr := newManagerForTesting(ctx, t, data, ManagerOptions{}) + for _, test := range table { + t.Run(test.name, func(t *testing.T) { + ctx := testlogging.Context(t) + data := blobtesting.DataMap{} + item1 := map[string]int{"foo": 1, "bar": 2} + labels1 := map[string]string{"type": "item", "color": "red"} - for i := 0; i < maxManifestsPerContent-1; i++ { - addAndVerify(ctx, t, mgr, labels1, item1) - } + t.Setenv(maxManifestsPerContentEnvKey, test.envFlag) - require.NoError(t, mgr.Flush(ctx)) - require.NoError(t, mgr.b.Flush(ctx)) + mgr := newManagerForTesting(ctx, t, data, ManagerOptions{}) - // Should only have a single content piece since this wasn't compaction. - foundContents := getManifestContentCount(ctx, t, mgr) - assert.Equal(t, 1, foundContents) + for i := 0; i < test.initialManifests; i++ { + addAndVerify(ctx, t, mgr, labels1, item1) + } - // Add individually so we can tell that compaction deleted the old content - // pieces. - for i := 0; i < 6; i++ { - addAndVerify(ctx, t, mgr, labels1, item1) + require.NoError(t, mgr.Flush(ctx)) + require.NoError(t, mgr.b.Flush(ctx)) - require.NoError(t, mgr.Flush(ctx)) - require.NoError(t, mgr.b.Flush(ctx)) - } + // Should only have a single content piece since this wasn't compaction. + foundContents := getManifestContentCount(ctx, t, mgr) + assert.Equal(t, 1, foundContents) - foundContents = getManifestContentCount(ctx, t, mgr) - assert.Equal(t, 7, foundContents) + // Add individually so we can tell that compaction deleted the old content + // pieces. + for i := 0; i < test.otherManifests; i++ { + addAndVerify(ctx, t, mgr, labels1, item1) - // Run compaction which should result in multiple content pieces. - err := mgr.Compact(ctx) - require.NoError(t, err, "compacting manifests") + require.NoError(t, mgr.Flush(ctx)) + require.NoError(t, mgr.b.Flush(ctx)) + } - foundContents = getManifestContentCount(ctx, t, mgr) - assert.Equal(t, 2, foundContents) + foundContents = getManifestContentCount(ctx, t, mgr) + assert.Equal(t, test.otherManifests+1, foundContents) - mans, err := mgr.Find(ctx, map[string]string{"color": "red"}) - assert.NoError(t, err) - assert.Len(t, mans, maxManifestsPerContent+5) + // Run compaction which should result in multiple content pieces. + err := mgr.Compact(ctx) + require.NoError(t, err, "compacting manifests") + + foundContents = getManifestContentCount(ctx, t, mgr) + assert.Equal(t, test.expectContents, foundContents) + + mans, err := mgr.Find(ctx, map[string]string{"color": "red"}) + assert.NoError(t, err) + assert.Len(t, mans, test.initialManifests+test.otherManifests) + }) + } } From cf82e7df6c74efcf73b57b5699568256e109d3d5 Mon Sep 17 00:00:00 2001 From: Ashlie Martinez Date: Thu, 5 Sep 2024 20:18:13 -0700 Subject: [PATCH 3/9] Add another test case --- repo/manifest/manifest_manager_test.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/repo/manifest/manifest_manager_test.go b/repo/manifest/manifest_manager_test.go index a7d4af1869c..81149bfbd79 100644 --- a/repo/manifest/manifest_manager_test.go +++ b/repo/manifest/manifest_manager_test.go @@ -557,6 +557,13 @@ func TestCompactManyManifests(t *testing.T) { otherManifests: 3, expectContents: 6, }, + { + name: "EnvLargerThanNumManifests", + envFlag: "500", + initialManifests: 3, + otherManifests: 3, + expectContents: 1, + }, } for _, test := range table { From 818eb2259c4eee701d414fbefb14b7d7df5a12c2 Mon Sep 17 00:00:00 2001 From: Ashlie Martinez Date: Thu, 5 Sep 2024 20:18:27 -0700 Subject: [PATCH 4/9] Test race condition tests --- repo/manifest/committed_manifest_manager.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repo/manifest/committed_manifest_manager.go b/repo/manifest/committed_manifest_manager.go index 225d5af8d94..42b3fc3ffa2 100644 --- a/repo/manifest/committed_manifest_manager.go +++ b/repo/manifest/committed_manifest_manager.go @@ -20,7 +20,7 @@ import ( ) const ( - defaultMaxManifestsPerContent = 1000000 + defaultMaxManifestsPerContent = 100000 maxManifestsPerContentEnvKey = "KOPIA_MAX_MANIFESTS_PER_CONTENT_COUNT" ) From 2e4d04153ee686557f07e9fb004a7e071303f9e6 Mon Sep 17 00:00:00 2001 From: Ashlie Martinez Date: Fri, 6 Sep 2024 10:24:11 -0700 Subject: [PATCH 5/9] Further harden multi-content write path Default to having the feature off if the env var isn't set or is set to a value that is <= 0. This makes things a bit safer since it requires explicit configuration to enable. Also be more careful about the types being used to contain values since we're parsing data from a string which may make different assumptions about integer widths. This also adds a layer of protection in some sense since values larger than maxInt64 will overflow to a negative value and result in the feature being disabled instead of writing out 1 manifest per content piece. --- repo/manifest/committed_manifest_manager.go | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/repo/manifest/committed_manifest_manager.go b/repo/manifest/committed_manifest_manager.go index 42b3fc3ffa2..228852a6117 100644 --- a/repo/manifest/committed_manifest_manager.go +++ b/repo/manifest/committed_manifest_manager.go @@ -19,10 +19,7 @@ import ( "github.com/kopia/kopia/repo/content/index" ) -const ( - defaultMaxManifestsPerContent = 100000 - maxManifestsPerContentEnvKey = "KOPIA_MAX_MANIFESTS_PER_CONTENT_COUNT" -) +const maxManifestsPerContentEnvKey = "KOPIA_MAX_MANIFESTS_PER_CONTENT_COUNT" // committedManifestManager manages committed manifest entries stored in 'm' contents. type committedManifestManager struct { @@ -114,11 +111,15 @@ func (m *committedManifestManager) writeContentChunk( return contentID, nil } -func getMaxManifestsPerContent() int { - retVal := defaultMaxManifestsPerContent +// getMaxManifestsPerContent returns the max number of manifests that are +// allowed in a single content piece. It checks the corresponding environment +// variable and returns a default value if there's no env var populated. Returns +// an int64 to avoid at least basic overflow issues. +func getMaxManifestsPerContent() int64 { + var retVal int64 if v := os.Getenv(maxManifestsPerContentEnvKey); v != "" { - if vint, err := strconv.Atoi(v); err == nil { + if vint, err := strconv.ParseInt(v, 10, 64); err == nil { retVal = vint } } @@ -147,6 +148,7 @@ func (m *committedManifestManager) writeEntriesLocked( buf gather.WriteBuffer man manifest newlyCommitted = map[content.ID]bool{} + maxPerContent = getMaxManifestsPerContent() ) defer buf.Close() @@ -161,7 +163,9 @@ func (m *committedManifestManager) writeEntriesLocked( // still make progress because this is a less-than check. At worst, if // there's a configuration that asks for a negative or zero manifests per // content piece then we'll write out each manifest individually. - if !isCompaction || len(man.Entries) < getMaxManifestsPerContent() { + if !isCompaction || + maxPerContent <= 0 || + int64(len(man.Entries)) < maxPerContent { continue } From 4a5eb5a2a68c062653a1c293f69390343865daa7 Mon Sep 17 00:00:00 2001 From: Ashlie Martinez Date: Fri, 6 Sep 2024 10:26:07 -0700 Subject: [PATCH 6/9] Update tests for hardened code This should also fix race detector failures in CI since we no longer need to write out as many manifests. --- repo/manifest/manifest_manager_test.go | 51 ++++++++++++++++---------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/repo/manifest/manifest_manager_test.go b/repo/manifest/manifest_manager_test.go index 81149bfbd79..2a1bbef703c 100644 --- a/repo/manifest/manifest_manager_test.go +++ b/repo/manifest/manifest_manager_test.go @@ -3,6 +3,7 @@ package manifest import ( "context" "encoding/json" + "fmt" "reflect" "sort" "strconv" @@ -501,7 +502,9 @@ func TestWriteManyManifests(t *testing.T) { data := blobtesting.DataMap{} item1 := map[string]int{"foo": 1, "bar": 2} labels1 := map[string]string{"type": "item", "color": "red"} - numManifests := defaultMaxManifestsPerContent + 5 + numManifests := 105 + + t.Setenv(maxManifestsPerContentEnvKey, fmt.Sprintf("%d", numManifests-5)) mgr := newManagerForTesting(ctx, t, data, ManagerOptions{}) @@ -525,43 +528,51 @@ func TestCompactManyManifests(t *testing.T) { table := []struct { name string envFlag string + unsetEnvFlag bool initialManifests int otherManifests int expectContents int }{ { - name: "DefaultValue", - envFlag: "", - initialManifests: defaultMaxManifestsPerContent - 1, - otherManifests: 6, - expectContents: 2, + name: "UnsetFlag", + unsetEnvFlag: true, + initialManifests: 5, + otherManifests: 5, + expectContents: 1, }, { - name: "SmallerEnvValue", - envFlag: "100", - initialManifests: 99, - otherManifests: 6, - expectContents: 2, + name: "EmptyValue", + envFlag: "", + initialManifests: 5, + otherManifests: 5, + expectContents: 1, }, { name: "ZeroEnvValue", envFlag: "0", - initialManifests: 3, - otherManifests: 3, - expectContents: 6, + initialManifests: 5, + otherManifests: 5, + expectContents: 1, }, { name: "NegativeEnvValue", envFlag: "-42", - initialManifests: 3, - otherManifests: 3, - expectContents: 6, + initialManifests: 5, + otherManifests: 5, + expectContents: 1, + }, + { + name: "MoreManifestsThanEnvValue", + envFlag: "100", + initialManifests: 99, + otherManifests: 6, + expectContents: 2, }, { - name: "EnvLargerThanNumManifests", + name: "LessManifestsThanEnvValue", envFlag: "500", - initialManifests: 3, - otherManifests: 3, + initialManifests: 5, + otherManifests: 5, expectContents: 1, }, } From 6b4327dc293751b8e4949ee869452df6aab465af Mon Sep 17 00:00:00 2001 From: Ashlie Martinez Date: Fri, 6 Sep 2024 10:41:00 -0700 Subject: [PATCH 7/9] Add more tests --- repo/manifest/manifest_manager_test.go | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/repo/manifest/manifest_manager_test.go b/repo/manifest/manifest_manager_test.go index 2a1bbef703c..4723d6c9c47 100644 --- a/repo/manifest/manifest_manager_test.go +++ b/repo/manifest/manifest_manager_test.go @@ -568,6 +568,20 @@ func TestCompactManyManifests(t *testing.T) { otherManifests: 6, expectContents: 2, }, + { + name: "ExactlyAtEnvValue", + envFlag: "100", + initialManifests: 94, + otherManifests: 6, + expectContents: 1, + }, + { + name: "ExactlyDoubleEnvValue", + envFlag: "100", + initialManifests: 194, + otherManifests: 6, + expectContents: 2, + }, { name: "LessManifestsThanEnvValue", envFlag: "500", From ab12454b539b0c657b98376e496f5692d3f891fc Mon Sep 17 00:00:00 2001 From: Ashlie Martinez Date: Fri, 6 Sep 2024 10:41:10 -0700 Subject: [PATCH 8/9] Fix comment --- repo/manifest/committed_manifest_manager.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/repo/manifest/committed_manifest_manager.go b/repo/manifest/committed_manifest_manager.go index 228852a6117..4bad68c22bf 100644 --- a/repo/manifest/committed_manifest_manager.go +++ b/repo/manifest/committed_manifest_manager.go @@ -158,11 +158,6 @@ func (m *committedManifestManager) writeEntriesLocked( // Still write all entries out in a single content piece if we're not // compacting things. - // - // Even if the result of getMaxManifestsPer content is 0 or negative, we'll - // still make progress because this is a less-than check. At worst, if - // there's a configuration that asks for a negative or zero manifests per - // content piece then we'll write out each manifest individually. if !isCompaction || maxPerContent <= 0 || int64(len(man.Entries)) < maxPerContent { From 870e7896152931bca1a5206bd4927e899caefc30 Mon Sep 17 00:00:00 2001 From: Ashlie Martinez Date: Fri, 6 Sep 2024 10:44:42 -0700 Subject: [PATCH 9/9] Actually unset env if test asks --- repo/manifest/manifest_manager_test.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/repo/manifest/manifest_manager_test.go b/repo/manifest/manifest_manager_test.go index 4723d6c9c47..7f6896b6a4c 100644 --- a/repo/manifest/manifest_manager_test.go +++ b/repo/manifest/manifest_manager_test.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "os" "reflect" "sort" "strconv" @@ -591,6 +592,8 @@ func TestCompactManyManifests(t *testing.T) { }, } + t.Setenv(maxManifestsPerContentEnvKey, "") + for _, test := range table { t.Run(test.name, func(t *testing.T) { ctx := testlogging.Context(t) @@ -600,6 +603,10 @@ func TestCompactManyManifests(t *testing.T) { t.Setenv(maxManifestsPerContentEnvKey, test.envFlag) + if test.unsetEnvFlag { + os.Unsetenv(maxManifestsPerContentEnvKey) + } + mgr := newManagerForTesting(ctx, t, data, ManagerOptions{}) for i := 0; i < test.initialManifests; i++ {