From a8d34649f6fd646d58528aa2b3ffaa7d6566073f Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Sat, 13 Aug 2022 20:51:51 +0000 Subject: [PATCH] storage: use `RangeKeyChanged()` in `ComputeStats()` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch makes use of `RangeKeyChanged()` in `ComputeStats()`, for more efficient processing of MVCC range tombstones. ``` name old time/op new time/op delta MVCCComputeStats_Pebble/valueSize=8/numRangeKeys=0-24 230ms ± 0% 220ms ± 1% -4.57% (p=0.008 n=5+5) MVCCComputeStats_Pebble/valueSize=8/numRangeKeys=1-24 344ms ± 1% 271ms ± 1% -21.18% (p=0.008 n=5+5) MVCCComputeStats_Pebble/valueSize=8/numRangeKeys=100-24 365ms ± 1% 293ms ± 0% -19.80% (p=0.008 n=5+5) MVCCComputeStats_Pebble/valueSize=32/numRangeKeys=0-24 162ms ± 1% 154ms ± 0% -4.92% (p=0.008 n=5+5) MVCCComputeStats_Pebble/valueSize=32/numRangeKeys=1-24 239ms ± 0% 190ms ± 0% -20.75% (p=0.008 n=5+5) MVCCComputeStats_Pebble/valueSize=32/numRangeKeys=100-24 260ms ± 1% 209ms ± 0% -19.54% (p=0.008 n=5+5) MVCCComputeStats_Pebble/valueSize=256/numRangeKeys=0-24 55.7ms ± 8% 51.0ms ± 7% ~ (p=0.095 n=5+5) MVCCComputeStats_Pebble/valueSize=256/numRangeKeys=1-24 72.8ms ± 2% 58.6ms ± 1% -19.49% (p=0.016 n=5+4) MVCCComputeStats_Pebble/valueSize=256/numRangeKeys=100-24 82.1ms ± 5% 67.9ms ± 3% -17.26% (p=0.008 n=5+5) ``` Release note: None --- pkg/kv/bulk/sst_batcher.go | 2 +- pkg/kv/kvserver/stateloader/stateloader.go | 2 +- pkg/storage/mvcc.go | 47 ++++++++++++---------- 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/pkg/kv/bulk/sst_batcher.go b/pkg/kv/bulk/sst_batcher.go index 2db090484dd7..d1c88e7c6a44 100644 --- a/pkg/kv/bulk/sst_batcher.go +++ b/pkg/kv/bulk/sst_batcher.go @@ -648,7 +648,7 @@ func (b *SSTBatcher) addSSTable( sendStart := timeutil.Now() // Currently, the SSTBatcher cannot ingest range keys, so it is safe to - // ComputeStatsForRange with an iterator that only surfaces point keys. + // ComputeStats with an iterator that only surfaces point keys. iterOpts := storage.IterOptions{ KeyTypes: storage.IterKeyTypePointsOnly, LowerBound: start, diff --git a/pkg/kv/kvserver/stateloader/stateloader.go b/pkg/kv/kvserver/stateloader/stateloader.go index e19a2b3d62ae..ece27e90c3e6 100644 --- a/pkg/kv/kvserver/stateloader/stateloader.go +++ b/pkg/kv/kvserver/stateloader/stateloader.go @@ -213,7 +213,7 @@ func (rsl StateLoader) SetRangeAppliedState( RaftAppliedIndexTerm: appliedIndexTerm, } // The RangeAppliedStateKey is not included in stats. This is also reflected - // in ComputeStatsForRange. + // in ComputeStats. ms := (*enginepb.MVCCStats)(nil) return storage.MVCCPutProto(ctx, readWriter, ms, rsl.RangeAppliedStateKey(), hlc.Timestamp{}, hlc.ClockTimestamp{}, nil, as) diff --git a/pkg/storage/mvcc.go b/pkg/storage/mvcc.go index 2ba1bcdf14ca..b9de3d4f8e84 100644 --- a/pkg/storage/mvcc.go +++ b/pkg/storage/mvcc.go @@ -4844,10 +4844,10 @@ func MVCCGarbageCollect( } // For GCBytesAge, this requires keeping track of the previous key's - // timestamp (prevNanos). See ComputeStatsForRange for a more easily digested - // and better commented version of this logic. The below block will set - // prevNanos to the appropriate value and position the iterator at the - // first garbage version. + // timestamp (prevNanos). See ComputeStats for a more easily digested and + // better commented version of this logic. The below block will set + // prevNanos to the appropriate value and position the iterator at the first + // garbage version. prevNanos := timestamp.WallTime { // If true - forward iteration positioned iterator on first garbage @@ -5446,8 +5446,8 @@ func computeStatsForIterWithVisitors( ) (enginepb.MVCCStats, error) { var ms enginepb.MVCCStats var meta enginepb.MVCCMetadata - var prevKey, prevRangeStart []byte - first := false + var prevKey roachpb.Key + var first bool // Values start accruing GCBytesAge at the timestamp at which they // are shadowed (i.e. overwritten) whereas deletion tombstones @@ -5455,7 +5455,7 @@ func computeStatsForIterWithVisitors( // reverse chronological order and use this variable to keep track // of the point in time at which the current key begins to age. var accrueGCAgeNanos int64 - var rangeKeys MVCCRangeKeyStack + var rangeTombstones MVCCRangeKeyVersions for ; ; iter.Next() { if ok, err := iter.Valid(); err != nil { @@ -5464,14 +5464,14 @@ func computeStatsForIterWithVisitors( break } - hasPoint, hasRange := iter.HasPointAndRange() - - if hasRange { - if rangeStart := iter.RangeBounds().Key; !rangeStart.Equal(prevRangeStart) { - prevRangeStart = append(prevRangeStart[:0], rangeStart...) - rangeKeys = iter.RangeKeys().Clone() + // Process MVCC range tombstones, and buffer them in rangeTombstones + // for all overlapping point keys. + if iter.RangeKeyChanged() { + if hasPoint, hasRange := iter.HasPointAndRange(); hasRange { + rangeKeys := iter.RangeKeys() + rangeKeys.Versions.CloneInto(&rangeTombstones) - for i, v := range rangeKeys.Versions { + for i, v := range rangeTombstones { // Only the top-most fragment contributes the key and its bounds, but // all versions contribute timestamps and values. // @@ -5497,13 +5497,13 @@ func computeStatsForIterWithVisitors( } } } - } - } else if !rangeKeys.IsEmpty() { - rangeKeys = MVCCRangeKeyStack{} - } - if !hasPoint { - continue + if !hasPoint { + continue + } + } else { + rangeTombstones.Clear() + } } unsafeKey := iter.UnsafeKey() @@ -5540,9 +5540,14 @@ func computeStatsForIterWithVisitors( // Find the closest range tombstone above the point key. Range tombstones // cannot exist above intents, and are undefined across inline values, so we // only take them into account for versioned values. + // + // TODO(erikgrinaker): Rather than doing a full binary search for each + // point, we can keep track of the current index and move downwards in the + // stack as we descend through older versions, resetting once we hit a new + // key. var nextRangeTombstone hlc.Timestamp if isValue { - if v, ok := rangeKeys.FirstAbove(unsafeKey.Timestamp); ok { + if v, ok := rangeTombstones.FirstAbove(unsafeKey.Timestamp); ok { nextRangeTombstone = v.Timestamp } }