Skip to content

Commit

Permalink
Merge pull request #388 from alcionai/compaction-flag
Browse files Browse the repository at this point in the history
Allow controlling max manifests per content with env var
  • Loading branch information
ashmrtn authored Sep 6, 2024
2 parents c745927 + 870e789 commit 9e18afb
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 33 deletions.
24 changes: 22 additions & 2 deletions repo/manifest/committed_manifest_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"math/rand"
"os"
"sort"
"strconv"
"sync"

"github.com/pkg/errors"
Expand All @@ -18,7 +19,7 @@ import (
"github.com/kopia/kopia/repo/content/index"
)

const maxManifestsPerContent = 1000000
const maxManifestsPerContentEnvKey = "KOPIA_MAX_MANIFESTS_PER_CONTENT_COUNT"

// committedManifestManager manages committed manifest entries stored in 'm' contents.
type committedManifestManager struct {
Expand Down Expand Up @@ -110,6 +111,22 @@ func (m *committedManifestManager) writeContentChunk(
return contentID, nil
}

// getMaxManifestsPerContent returns the max number of manifests that are
// allowed in a single content piece. It checks the corresponding environment
// variable and returns a default value if there's no env var populated. Returns
// an int64 to avoid at least basic overflow issues.
func getMaxManifestsPerContent() int64 {
var retVal int64

if v := os.Getenv(maxManifestsPerContentEnvKey); v != "" {
if vint, err := strconv.ParseInt(v, 10, 64); err == nil {
retVal = vint
}
}

return retVal
}

// writeEntriesLocked writes entries in the provided map as manifest contents
// and removes all entries from the map when complete and returns the set of content IDs written
// (typically one).
Expand All @@ -131,6 +148,7 @@ func (m *committedManifestManager) writeEntriesLocked(
buf gather.WriteBuffer
man manifest
newlyCommitted = map[content.ID]bool{}
maxPerContent = getMaxManifestsPerContent()
)

defer buf.Close()
Expand All @@ -140,7 +158,9 @@ func (m *committedManifestManager) writeEntriesLocked(

// Still write all entries out in a single content piece if we're not
// compacting things.
if !isCompaction || len(man.Entries) < maxManifestsPerContent {
if !isCompaction ||
maxPerContent <= 0 ||
int64(len(man.Entries)) < maxPerContent {
continue
}

Expand Down
144 changes: 113 additions & 31 deletions repo/manifest/manifest_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package manifest
import (
"context"
"encoding/json"
"fmt"
"os"
"reflect"
"sort"
"strconv"
Expand Down Expand Up @@ -501,7 +503,9 @@ func TestWriteManyManifests(t *testing.T) {
data := blobtesting.DataMap{}
item1 := map[string]int{"foo": 1, "bar": 2}
labels1 := map[string]string{"type": "item", "color": "red"}
numManifests := maxManifestsPerContent + 5
numManifests := 105

t.Setenv(maxManifestsPerContentEnvKey, fmt.Sprintf("%d", numManifests-5))

mgr := newManagerForTesting(ctx, t, data, ManagerOptions{})

Expand All @@ -522,44 +526,122 @@ func TestWriteManyManifests(t *testing.T) {
}

func TestCompactManyManifests(t *testing.T) {
ctx := testlogging.Context(t)
data := blobtesting.DataMap{}
item1 := map[string]int{"foo": 1, "bar": 2}
labels1 := map[string]string{"type": "item", "color": "red"}
table := []struct {
name string
envFlag string
unsetEnvFlag bool
initialManifests int
otherManifests int
expectContents int
}{
{
name: "UnsetFlag",
unsetEnvFlag: true,
initialManifests: 5,
otherManifests: 5,
expectContents: 1,
},
{
name: "EmptyValue",
envFlag: "",
initialManifests: 5,
otherManifests: 5,
expectContents: 1,
},
{
name: "ZeroEnvValue",
envFlag: "0",
initialManifests: 5,
otherManifests: 5,
expectContents: 1,
},
{
name: "NegativeEnvValue",
envFlag: "-42",
initialManifests: 5,
otherManifests: 5,
expectContents: 1,
},
{
name: "MoreManifestsThanEnvValue",
envFlag: "100",
initialManifests: 99,
otherManifests: 6,
expectContents: 2,
},
{
name: "ExactlyAtEnvValue",
envFlag: "100",
initialManifests: 94,
otherManifests: 6,
expectContents: 1,
},
{
name: "ExactlyDoubleEnvValue",
envFlag: "100",
initialManifests: 194,
otherManifests: 6,
expectContents: 2,
},
{
name: "LessManifestsThanEnvValue",
envFlag: "500",
initialManifests: 5,
otherManifests: 5,
expectContents: 1,
},
}

mgr := newManagerForTesting(ctx, t, data, ManagerOptions{})
t.Setenv(maxManifestsPerContentEnvKey, "")

for i := 0; i < maxManifestsPerContent-1; i++ {
addAndVerify(ctx, t, mgr, labels1, item1)
}
for _, test := range table {
t.Run(test.name, func(t *testing.T) {
ctx := testlogging.Context(t)
data := blobtesting.DataMap{}
item1 := map[string]int{"foo": 1, "bar": 2}
labels1 := map[string]string{"type": "item", "color": "red"}

require.NoError(t, mgr.Flush(ctx))
require.NoError(t, mgr.b.Flush(ctx))
t.Setenv(maxManifestsPerContentEnvKey, test.envFlag)

// Should only have a single content piece since this wasn't compaction.
foundContents := getManifestContentCount(ctx, t, mgr)
assert.Equal(t, 1, foundContents)
if test.unsetEnvFlag {
os.Unsetenv(maxManifestsPerContentEnvKey)
}

// Add individually so we can tell that compaction deleted the old content
// pieces.
for i := 0; i < 6; i++ {
addAndVerify(ctx, t, mgr, labels1, item1)
mgr := newManagerForTesting(ctx, t, data, ManagerOptions{})

require.NoError(t, mgr.Flush(ctx))
require.NoError(t, mgr.b.Flush(ctx))
}
for i := 0; i < test.initialManifests; i++ {
addAndVerify(ctx, t, mgr, labels1, item1)
}

foundContents = getManifestContentCount(ctx, t, mgr)
assert.Equal(t, 7, foundContents)
require.NoError(t, mgr.Flush(ctx))
require.NoError(t, mgr.b.Flush(ctx))

// Run compaction which should result in multiple content pieces.
err := mgr.Compact(ctx)
require.NoError(t, err, "compacting manifests")
// Should only have a single content piece since this wasn't compaction.
foundContents := getManifestContentCount(ctx, t, mgr)
assert.Equal(t, 1, foundContents)

foundContents = getManifestContentCount(ctx, t, mgr)
assert.Equal(t, 2, foundContents)
// Add individually so we can tell that compaction deleted the old content
// pieces.
for i := 0; i < test.otherManifests; i++ {
addAndVerify(ctx, t, mgr, labels1, item1)

mans, err := mgr.Find(ctx, map[string]string{"color": "red"})
assert.NoError(t, err)
assert.Len(t, mans, maxManifestsPerContent+5)
require.NoError(t, mgr.Flush(ctx))
require.NoError(t, mgr.b.Flush(ctx))
}

foundContents = getManifestContentCount(ctx, t, mgr)
assert.Equal(t, test.otherManifests+1, foundContents)

// Run compaction which should result in multiple content pieces.
err := mgr.Compact(ctx)
require.NoError(t, err, "compacting manifests")

foundContents = getManifestContentCount(ctx, t, mgr)
assert.Equal(t, test.expectContents, foundContents)

mans, err := mgr.Find(ctx, map[string]string{"color": "red"})
assert.NoError(t, err)
assert.Len(t, mans, test.initialManifests+test.otherManifests)
})
}
}

0 comments on commit 9e18afb

Please sign in to comment.