From beec0c579bca23e51394408b34aaaeb7fc025843 Mon Sep 17 00:00:00 2001 From: Tobias Grieger Date: Wed, 17 Aug 2022 09:50:32 +0200 Subject: [PATCH 1/6] storage: skip BenchmarkMVCCExportToSST under short Release note: None --- pkg/storage/bench_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/storage/bench_test.go b/pkg/storage/bench_test.go index 6f67d9fcf316..5b6dacfeaa05 100644 --- a/pkg/storage/bench_test.go +++ b/pkg/storage/bench_test.go @@ -108,6 +108,7 @@ func BenchmarkMVCCGarbageCollect(b *testing.B) { } func BenchmarkMVCCExportToSST(b *testing.B) { + skip.UnderShort(b) defer log.Scope(b).Close(b) numKeys := []int{64, 512, 1024, 8192, 65536} From 68237edb5167498e18ef27ddb7ccf4e178226a92 Mon Sep 17 00:00:00 2001 From: Tobias Grieger Date: Wed, 17 Aug 2022 08:26:38 +0200 Subject: [PATCH 2/6] storage: fix accidental no-op in runMVCCExportToSST The keys were accidentally prefixed by 16 null bytes, and these sort before `keys.LocalMax`. As a result, the SST export in this benchmark never returned any kv pairs, rendering the benchmark useless. Release note: None --- pkg/storage/bench_test.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pkg/storage/bench_test.go b/pkg/storage/bench_test.go index 5b6dacfeaa05..ff4b821bddd1 100644 --- a/pkg/storage/bench_test.go +++ b/pkg/storage/bench_test.go @@ -1616,10 +1616,11 @@ func runMVCCExportToSST(b *testing.B, numKeys int, numRevisions int, exportAllRe ctx := context.Background() st := cluster.MakeTestingClusterSettings() - batch := engine.NewUnindexedBatch(true /* writeOnly */) + batch := engine.NewBatch() for i := 0; i < numKeys; i++ { - key := make([]byte, 16) - key = append(key, 'a', 'a', 'a') + var key []byte + key = append(key, keys.LocalMax...) + key = append(key, bytes.Repeat([]byte{'a'}, 19)...) key = encoding.EncodeUint32Ascending(key, uint32(i)) for j := 0; j < numRevisions; j++ { From d20aae427919e7957932153778f8e5a581762e72 Mon Sep 17 00:00:00 2001 From: Tobias Grieger Date: Wed, 17 Aug 2022 08:51:24 +0200 Subject: [PATCH 3/6] storage: don't throw away output in runMVCCExportToSST That'll allow us to check it (more on that later). Release note: None --- pkg/storage/bench_test.go | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/pkg/storage/bench_test.go b/pkg/storage/bench_test.go index ff4b821bddd1..db7f16f1e3b5 100644 --- a/pkg/storage/bench_test.go +++ b/pkg/storage/bench_test.go @@ -1640,8 +1640,14 @@ func runMVCCExportToSST(b *testing.B, numKeys int, numRevisions int, exportAllRe b.Fatal(err) } + var buf bytes.Buffer + buf.Grow(1 << 20) b.ResetTimer() + b.StopTimer() + var assertLen int // buf.Len shouldn't change between runs for i := 0; i < b.N; i++ { + buf.Reset() + b.StartTimer() startTS := hlc.Timestamp{WallTime: int64(numRevisions / 2)} endTS := hlc.Timestamp{WallTime: int64(numRevisions + 2)} _, _, err := MVCCExportToSST(ctx, st, engine, MVCCExportOptions{ @@ -1653,18 +1659,20 @@ func runMVCCExportToSST(b *testing.B, numKeys int, numRevisions int, exportAllRe TargetSize: 0, MaxSize: 0, StopMidKey: false, - }, noopWriter{}) + }, &buf) if err != nil { b.Fatal(err) } - } - b.StopTimer() -} + b.StopTimer() -type noopWriter struct{} + if i == 0 { + require.NotZero(b, buf.Len()) + assertLen = buf.Len() + } -func (noopWriter) Close() error { return nil } -func (noopWriter) Write(p []byte) (int, error) { return len(p), nil } + require.Equal(b, assertLen, buf.Len()) + } +} func runCheckSSTConflicts( b *testing.B, numEngineKeys, numVersions, numSstKeys int, overlap, usePrefixSeek bool, From e6882fba9cae8f067b51f6a63ad1cb522ff0497e Mon Sep 17 00:00:00 2001 From: Tobias Grieger Date: Wed, 17 Aug 2022 08:30:21 +0200 Subject: [PATCH 4/6] storage: minor refactor in runMVCCExportToSST We want to slide MVCC range deletions under the kvs in a follow-up commit, and this helps make that easier. Release note: None --- pkg/storage/bench_test.go | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/pkg/storage/bench_test.go b/pkg/storage/bench_test.go index db7f16f1e3b5..cb47939ce5f5 100644 --- a/pkg/storage/bench_test.go +++ b/pkg/storage/bench_test.go @@ -1616,15 +1616,24 @@ func runMVCCExportToSST(b *testing.B, numKeys int, numRevisions int, exportAllRe ctx := context.Background() st := cluster.MakeTestingClusterSettings() - batch := engine.NewBatch() - for i := 0; i < numKeys; i++ { + mkKey := func(i int) roachpb.Key { var key []byte key = append(key, keys.LocalMax...) key = append(key, bytes.Repeat([]byte{'a'}, 19)...) key = encoding.EncodeUint32Ascending(key, uint32(i)) + return key + } + + mkWall := func(j int) int64 { + return int64(j + 1) + } + + batch := engine.NewBatch() + for i := 0; i < numKeys; i++ { + key := mkKey(i) for j := 0; j < numRevisions; j++ { - mvccKey := MVCCKey{Key: key, Timestamp: hlc.Timestamp{WallTime: int64(j + 1), Logical: 0}} + mvccKey := MVCCKey{Key: key, Timestamp: hlc.Timestamp{WallTime: mkWall(j), Logical: 0}} mvccValue := MVCCValue{Value: roachpb.MakeValueFromString("foobar")} err := batch.PutMVCC(mvccKey, mvccValue) if err != nil { @@ -1648,8 +1657,8 @@ func runMVCCExportToSST(b *testing.B, numKeys int, numRevisions int, exportAllRe for i := 0; i < b.N; i++ { buf.Reset() b.StartTimer() - startTS := hlc.Timestamp{WallTime: int64(numRevisions / 2)} - endTS := hlc.Timestamp{WallTime: int64(numRevisions + 2)} + startTS := hlc.Timestamp{WallTime: mkWall(numRevisions/2 - 1)} + endTS := hlc.Timestamp{WallTime: mkWall(numRevisions + 1)} _, _, err := MVCCExportToSST(ctx, st, engine, MVCCExportOptions{ StartKey: MVCCKey{Key: keys.LocalMax}, EndKey: roachpb.KeyMax, From 752faa116cb23ae058067eda8e95753ba3646375 Mon Sep 17 00:00:00 2001 From: Tobias Grieger Date: Wed, 17 Aug 2022 08:36:14 +0200 Subject: [PATCH 5/6] storage: add mvcc rangedels to BenchmarkMVCCExportToSST Also adds a sanity check that we're not emitting the range dels. This wasn't actually true, and required a small tweak to the start timestamp so that the rangedels would actually be excluded. Closes https://github.com/cockroachdb/cockroach/issues/86140. --- pkg/storage/bench_test.go | 114 +++++++++++++++++++++++++++++++++++--- 1 file changed, 106 insertions(+), 8 deletions(-) diff --git a/pkg/storage/bench_test.go b/pkg/storage/bench_test.go index cb47939ce5f5..77f53b29abde 100644 --- a/pkg/storage/bench_test.go +++ b/pkg/storage/bench_test.go @@ -111,6 +111,13 @@ func BenchmarkMVCCExportToSST(b *testing.B) { skip.UnderShort(b) defer log.Scope(b).Close(b) + // To run and compare on range keys: + // + // go test ./pkg/storage -run - -count 5 -bench BenchmarkMVCCExportToSST -timeout 500m 2>&1 | tee bench.txt + // for flavor in numRangeKeys=0 numRangeKeys=1 numRangeKeys=100; do grep -E "${flavor}[^0-9]+" bench.txt | sed -E "s/${flavor}+/X/" > $flavor.txt; done + // benchstat numRangeKeys\={0,1}.txt + // benchstat numRangeKeys\={0,100}.txt + numKeys := []int{64, 512, 1024, 8192, 65536} numRevisions := []int{1, 10, 100} exportAllRevisions := []bool{false, true} @@ -121,7 +128,17 @@ func BenchmarkMVCCExportToSST(b *testing.B) { b.Run(fmt.Sprintf("numRevisions=%d", numRevision), func(b *testing.B) { for _, exportAllRevisionsVal := range exportAllRevisions { b.Run(fmt.Sprintf("exportAllRevisions=%t", exportAllRevisionsVal), func(b *testing.B) { - runMVCCExportToSST(b, numKey, numRevision, exportAllRevisionsVal) + for _, numRangeKeys := range []int{0, 1, 100} { + b.Run(fmt.Sprintf("numRangeKeys=%d", numRangeKeys), func(b *testing.B) { + opts := mvccExportToSSTOpts{ + numKeys: numKey, + numRevisions: numRevision, + exportAllRevisions: exportAllRevisionsVal, + numRangeKeys: numRangeKeys, + } + runMVCCExportToSST(b, opts) + }) + } }) } }) @@ -1607,7 +1624,12 @@ func runBatchApplyBatchRepr( b.StopTimer() } -func runMVCCExportToSST(b *testing.B, numKeys int, numRevisions int, exportAllRevisions bool) { +type mvccExportToSSTOpts struct { + numKeys, numRevisions, numRangeKeys int + exportAllRevisions bool +} + +func runMVCCExportToSST(b *testing.B, opts mvccExportToSSTOpts) { dir, cleanup := testutils.TempDir(b) defer cleanup() engine := setupMVCCPebble(b, dir) @@ -1625,14 +1647,40 @@ func runMVCCExportToSST(b *testing.B, numKeys int, numRevisions int, exportAllRe } mkWall := func(j int) int64 { - return int64(j + 1) + wt := int64(j + 2) + return wt } + // Write range keys. + func() { + rng := rand.New(rand.NewSource(12345)) + batch := engine.NewBatch() + defer batch.Close() + for i := 0; i < opts.numRangeKeys; i++ { + // NB: regular keys are written at ts 2+, so this is below any of the + // regular writes and thus won't delete anything. + ts := hlc.Timestamp{WallTime: 1, Logical: int32(i + 1)} + start := rng.Intn(opts.numKeys) + end := start + rng.Intn(opts.numKeys-start) + 1 + // As a special case, if we're only writing one range key, write it across + // the entire span. + if opts.numRangeKeys == 1 { + start = 0 + end = opts.numKeys + 1 + } + startKey := mkKey(start) + endKey := mkKey(end) + require.NoError(b, MVCCDeleteRangeUsingTombstone( + ctx, batch, nil, startKey, endKey, ts, hlc.ClockTimestamp{}, nil, nil, false, 0, nil)) + } + require.NoError(b, batch.Commit(false /* sync */)) + }() + batch := engine.NewBatch() - for i := 0; i < numKeys; i++ { + for i := 0; i < opts.numKeys; i++ { key := mkKey(i) - for j := 0; j < numRevisions; j++ { + for j := 0; j < opts.numRevisions; j++ { mvccKey := MVCCKey{Key: key, Timestamp: hlc.Timestamp{WallTime: mkWall(j), Logical: 0}} mvccValue := MVCCValue{Value: roachpb.MakeValueFromString("foobar")} err := batch.PutMVCC(mvccKey, mvccValue) @@ -1649,6 +1697,25 @@ func runMVCCExportToSST(b *testing.B, numKeys int, numRevisions int, exportAllRe b.Fatal(err) } + startWall := mkWall(opts.numRevisions/2 - 1) // exclusive, at least 1, so never see rangedels + endWall := mkWall(opts.numRevisions + 1) // see latest revision for every key + var expKVsInSST int + if opts.exportAllRevisions { + // First, compute how many revisions are visible for each key. + // Could probably use a closed formula for this but this works. + for i := 0; i < opts.numRevisions; i++ { + wall := mkWall(i) + if wall > startWall && wall <= endWall { + expKVsInSST++ + } + } + // Then compute the total. + expKVsInSST *= opts.numKeys + } else { + // See one revision per key. + expKVsInSST = opts.numKeys + } + var buf bytes.Buffer buf.Grow(1 << 20) b.ResetTimer() @@ -1657,14 +1724,14 @@ func runMVCCExportToSST(b *testing.B, numKeys int, numRevisions int, exportAllRe for i := 0; i < b.N; i++ { buf.Reset() b.StartTimer() - startTS := hlc.Timestamp{WallTime: mkWall(numRevisions/2 - 1)} - endTS := hlc.Timestamp{WallTime: mkWall(numRevisions + 1)} + startTS := hlc.Timestamp{WallTime: startWall, Logical: math.MaxInt32} // use 1.infinity to avoid rangedels at 1. + endTS := hlc.Timestamp{WallTime: endWall} _, _, err := MVCCExportToSST(ctx, st, engine, MVCCExportOptions{ StartKey: MVCCKey{Key: keys.LocalMax}, EndKey: roachpb.KeyMax, StartTS: startTS, EndTS: endTS, - ExportAllRevisions: exportAllRevisions, + ExportAllRevisions: opts.exportAllRevisions, TargetSize: 0, MaxSize: 0, StopMidKey: false, @@ -1681,6 +1748,37 @@ func runMVCCExportToSST(b *testing.B, numKeys int, numRevisions int, exportAllRe require.Equal(b, assertLen, buf.Len()) } + + // Run sanity checks on last produced SST. + it, err := NewPebbleMemSSTIterator( + buf.Bytes(), true /* verify */, IterOptions{ + LowerBound: keys.LocalMax, + UpperBound: roachpb.KeyMax, + KeyTypes: IterKeyTypePointsAndRanges, + }, + ) + it.SeekGE(MakeMVCCMetadataKey(roachpb.LocalMax)) + require.NoError(b, err) + var n int // points + var r int // range key stacks + for { + ok, err := it.Valid() + require.NoError(b, err) + if !ok { + break + } + hasPoint, hasRange := it.HasPointAndRange() + if hasPoint { + n++ + } + if hasRange && it.RangeKeyChanged() { + r++ + } + it.Next() + } + require.Equal(b, expKVsInSST, n) + // Should not see any rangedel stacks. + require.Zero(b, r) } func runCheckSSTConflicts( From ff6d72d1aee70d4809a3b62eccf69b296336aa84 Mon Sep 17 00:00:00 2001 From: Tobias Grieger Date: Wed, 17 Aug 2022 11:26:43 +0200 Subject: [PATCH 6/6] storage: touch up comment on MVCCExportToSST Release note: None --- pkg/storage/mvcc.go | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/pkg/storage/mvcc.go b/pkg/storage/mvcc.go index cf67cf9578f4..3ef46d122415 100644 --- a/pkg/storage/mvcc.go +++ b/pkg/storage/mvcc.go @@ -5712,12 +5712,24 @@ func MVCCIsSpanEmpty( // MVCCExportToSST exports changes to the keyrange [StartKey, EndKey) over the // interval (StartTS, EndTS] as a Pebble SST. See MVCCExportOptions for options. +// StartTS may be zero. // -// Tombstones are included if all revisions are requested (all tombstones) or if -// the StartTS is non-zero (latest tombstone), including both MVCC point -// tombstones and MVCC range tombstones. Intents within the time interval will -// return a WriteIntentError, while intents outside the time interval are -// ignored. +// This comes in two principal flavors: all revisions or latest revision only. +// In all-revisions mode, exports everything matching the span and time bounds, +// i.e. extracts contiguous blocks of MVCC history. In latest-revision mode, +// extracts just the changes necessary to transform an MVCC snapshot at StartTS +// into one equivalent to the data at EndTS, but without including all +// intermediate revisions not visible at EndTS. The latter mode is used for +// incremental backups that can only be restored to EndTS, the former allows +// restoring to any intermediate timestamp. +// +// Tombstones (both point and MVCC range tombstones) are treated like revisions. +// That is, if all revisions are requested, all tombstones in (StartTS, EndTS] +// and overlapping [StartKey, EndKey) are returned. If only the latest revision +// is requested, only the most recent matching tombstone is returned. +// +// Intents within the time and span bounds will return a WriteIntentError, while +// intents outside are ignored. // // Returns an export summary and a resume key that allows resuming the export if // it reached a limit. Data is written to dest as it is collected. If an error