diff --git a/pkg/kv/kvserver/batcheval/cmd_gc.go b/pkg/kv/kvserver/batcheval/cmd_gc.go index 95491d3ee48a..b148481d6d3b 100644 --- a/pkg/kv/kvserver/batcheval/cmd_gc.go +++ b/pkg/kv/kvserver/batcheval/cmd_gc.go @@ -53,6 +53,10 @@ func declareKeysGC( )) { latchSpans.AddMVCC(spanset.SpanReadWrite, span, hlc.MaxTimestamp) } + if rk := gcr.ClearRangeKey; rk != nil { + latchSpans.AddMVCC(spanset.SpanReadWrite, roachpb.Span{Key: rk.StartKey, EndKey: rk.EndKey}, + hlc.MaxTimestamp) + } // The RangeGCThresholdKey is only written to if the // req.(*GCRequest).Threshold is set. However, we always declare an exclusive // access over this key in order to serialize with other GC requests. @@ -134,12 +138,19 @@ func GC( // GC request's effect from the raft log. Latches held on the leaseholder // would have no impact on a follower read. if !args.Threshold.IsEmpty() && - (len(args.Keys) != 0 || len(args.RangeKeys) != 0) && + (len(args.Keys) != 0 || len(args.RangeKeys) != 0 || args.ClearRangeKey != nil) && !cArgs.EvalCtx.EvalKnobs().AllowGCWithNewThresholdAndKeys { return result.Result{}, errors.AssertionFailedf( "GC request can set threshold or it can GC keys, but it is unsafe for it to do both") } + // We do not allow removal of point or range keys combined with clear range + // operation as they could cover the same set of keys. + if (len(args.Keys) != 0 || len(args.RangeKeys) != 0) && args.ClearRangeKey != nil { + return result.Result{}, errors.AssertionFailedf( + "GC request can remove point and range keys or clear entire range, but it is unsafe for it to do both") + } + // All keys must be inside the current replica range. Keys outside // of this range in the GC request are dropped silently, which is // safe because they can simply be re-collected later on the correct @@ -178,6 +189,18 @@ func GC( return result.Result{}, err } + // Fast path operation to try to remove all user key data from the range. + if rk := args.ClearRangeKey; rk != nil { + if !rk.StartKey.Equal(desc.StartKey.AsRawKey()) || !rk.EndKey.Equal(desc.EndKey.AsRawKey()) { + return result.Result{}, errors.Errorf("gc with clear range operation could only be used on the full range") + } + + if err := storage.MVCCGarbageCollectWholeRange(ctx, readWriter, cArgs.Stats, + rk.StartKey, rk.EndKey, cArgs.EvalCtx.GetGCThreshold(), cArgs.EvalCtx.GetMVCCStats()); err != nil { + return result.Result{}, err + } + } + // Optionally bump the GC threshold timestamp. var res result.Result if !args.Threshold.IsEmpty() { diff --git a/pkg/kv/kvserver/gc/gc.go b/pkg/kv/kvserver/gc/gc.go index 8a575f20da10..d016930fa506 100644 --- a/pkg/kv/kvserver/gc/gc.go +++ b/pkg/kv/kvserver/gc/gc.go @@ -151,7 +151,9 @@ type Thresholder interface { // PureGCer is part of the GCer interface. type PureGCer interface { - GC(context.Context, []roachpb.GCRequest_GCKey, []roachpb.GCRequest_GCRangeKey) error + GC(context.Context, []roachpb.GCRequest_GCKey, []roachpb.GCRequest_GCRangeKey, + *roachpb.GCRequest_GCClearRangeKey, + ) error } // A GCer is an abstraction used by the MVCC GC queue to carry out chunked deletions. @@ -170,7 +172,10 @@ func (NoopGCer) SetGCThreshold(context.Context, Threshold) error { return nil } // GC implements storage.GCer. func (NoopGCer) GC( - context.Context, []roachpb.GCRequest_GCKey, []roachpb.GCRequest_GCRangeKey, + context.Context, + []roachpb.GCRequest_GCKey, + []roachpb.GCRequest_GCRangeKey, + *roachpb.GCRequest_GCClearRangeKey, ) error { return nil } @@ -231,6 +236,12 @@ type Info struct { // AffectedVersionsRangeValBytes is the number of (fully encoded) bytes deleted from values that // belong to removed range keys. AffectedVersionsRangeValBytes int64 + // ClearRangeKeyOperations reports 1 if GC succeeded performing collection with + // ClearRange operation. + ClearRangeKeyOperations int + // ClearRangeKeyFailures reports 1 if GC identified a possibility to collect + // with ClearRange operation, but request failed. + ClearRangeKeyFailures int } // RunOptions contains collection of limits that GC run applies when performing operations @@ -299,7 +310,7 @@ func Run( Threshold: newThreshold, } - err := processReplicatedKeyRange(ctx, desc, snap, now, newThreshold, options.IntentAgeThreshold, gcer, + fastPath, err := processReplicatedKeyRange(ctx, desc, snap, now, newThreshold, options.IntentAgeThreshold, gcer, intentBatcherOptions{ maxIntentsPerIntentCleanupBatch: options.MaxIntentsPerIntentCleanupBatch, maxIntentKeyBytesPerIntentCleanupBatch: options.MaxIntentKeyBytesPerIntentCleanupBatch, @@ -309,7 +320,7 @@ func Run( if err != nil { return Info{}, err } - err = processReplicatedRangeTombstones(ctx, desc, snap, now, newThreshold, gcer, &info) + err = processReplicatedRangeTombstones(ctx, desc, snap, fastPath, now, newThreshold, gcer, &info) if err != nil { return Info{}, err } @@ -343,6 +354,7 @@ func Run( // // The logic iterates all versions of all keys in the range from oldest to // newest. Expired intents are written into the txnMap and intentKeyMap. +// Returns true if clear range was used to remove all user data. func processReplicatedKeyRange( ctx context.Context, desc *roachpb.RangeDescriptor, @@ -354,7 +366,30 @@ func processReplicatedKeyRange( options intentBatcherOptions, cleanupIntentsFn CleanupIntentsFunc, info *Info, -) error { +) (bool, error) { + // Perform fast path check prior to performing GC. Fast path only collects + // user key span portion, so we don't need to clean it up once again if + // we succeeded. + excludeUserKeySpan := false + { + start := desc.StartKey.AsRawKey() + end := desc.EndKey.AsRawKey() + if coveredByRangeTombstone, err := storage.CanGCEntireRange(ctx, snap, start, end, + threshold); err == nil && coveredByRangeTombstone { + if err = gcer.GC(ctx, nil, nil, &roachpb.GCRequest_GCClearRangeKey{ + StartKey: start, + EndKey: end, + }); err == nil { + excludeUserKeySpan = true + info.ClearRangeKeyOperations++ + } else { + log.Warningf(ctx, "failed to perform GC clear range operation on range %s: %s", + desc.String(), err) + info.ClearRangeKeyFailures++ + } + } + } + var alloc bufalloc.ByteAllocator // Compute intent expiration (intent age at which we attempt to resolve). intentExp := now.Add(-intentAgeThreshold.Nanoseconds(), 0) @@ -401,13 +436,13 @@ func processReplicatedKeyRange( gcTimestampForThisKey hlc.Timestamp sentBatchForThisKey bool ) - it := makeGCIterator(desc, snap, threshold) + it := makeGCIterator(desc, snap, threshold, excludeUserKeySpan) defer it.close() for ; ; it.step() { s, ok := it.state() if !ok { if it.err != nil { - return it.err + return false, it.err } break } @@ -416,7 +451,7 @@ func processReplicatedKeyRange( } if s.curIsIntent() { if err := handleIntent(s.next); err != nil { - return err + return false, err } continue } @@ -458,9 +493,9 @@ func processReplicatedKeyRange( } // If limit was reached, delegate to GC'r to remove collected batch. if shouldSendBatch { - if err := gcer.GC(ctx, batchGCKeys, nil); err != nil { + if err := gcer.GC(ctx, batchGCKeys, nil, nil); err != nil { if errors.Is(err, ctx.Err()) { - return err + return false, err } // Even though we are batching the GC process, it's // safe to continue because we bumped the GC @@ -476,16 +511,16 @@ func processReplicatedKeyRange( // We need to send out last intent cleanup batch. if err := intentBatcher.maybeFlushPendingIntents(ctx); err != nil { if errors.Is(err, ctx.Err()) { - return err + return false, err } log.Warningf(ctx, "failed to cleanup intents batch: %v", err) } if len(batchGCKeys) > 0 { - if err := gcer.GC(ctx, batchGCKeys, nil); err != nil { - return err + if err := gcer.GC(ctx, batchGCKeys, nil, nil); err != nil { + return false, err } } - return nil + return excludeUserKeySpan, nil } type intentBatcher struct { @@ -853,7 +888,7 @@ func (b *rangeKeyBatcher) flushPendingFragments(ctx context.Context) error { } b.pending = b.pending[:0] b.pendingSize = 0 - return b.gcer.GC(ctx, nil, toSend) + return b.gcer.GC(ctx, nil, toSend, nil) } return nil } @@ -862,15 +897,17 @@ func processReplicatedRangeTombstones( ctx context.Context, desc *roachpb.RangeDescriptor, snap storage.Reader, + excludeUserKeySpan bool, now hlc.Timestamp, gcThreshold hlc.Timestamp, gcer GCer, info *Info, ) error { iter := rditer.NewReplicaMVCCDataIterator(desc, snap, rditer.ReplicaDataIteratorOptions{ - Reverse: false, - IterKind: storage.MVCCKeyIterKind, - KeyTypes: storage.IterKeyTypeRangesOnly, + Reverse: false, + IterKind: storage.MVCCKeyIterKind, + KeyTypes: storage.IterKeyTypeRangesOnly, + ExcludeUserKeySpan: excludeUserKeySpan, }) defer iter.Close() @@ -934,7 +971,7 @@ func (b *batchingInlineGCer) FlushingAdd(ctx context.Context, key roachpb.Key) { } func (b *batchingInlineGCer) Flush(ctx context.Context) { - err := b.gcer.GC(ctx, b.gcKeys, nil) + err := b.gcer.GC(ctx, b.gcKeys, nil, nil) b.gcKeys = nil b.size = 0 if err != nil { diff --git a/pkg/kv/kvserver/gc/gc_iterator.go b/pkg/kv/kvserver/gc/gc_iterator.go index 33c2bb5ec768..f4d97b8fba4c 100644 --- a/pkg/kv/kvserver/gc/gc_iterator.go +++ b/pkg/kv/kvserver/gc/gc_iterator.go @@ -36,13 +36,17 @@ type gcIterator struct { } func makeGCIterator( - desc *roachpb.RangeDescriptor, snap storage.Reader, threshold hlc.Timestamp, + desc *roachpb.RangeDescriptor, + snap storage.Reader, + threshold hlc.Timestamp, + excludeUserKeySpan bool, ) gcIterator { return gcIterator{ it: rditer.NewReplicaMVCCDataIterator(desc, snap, rditer.ReplicaDataIteratorOptions{ - Reverse: true, - IterKind: storage.MVCCKeyAndIntentsIterKind, - KeyTypes: storage.IterKeyTypePointsAndRanges, + Reverse: true, + IterKind: storage.MVCCKeyAndIntentsIterKind, + KeyTypes: storage.IterKeyTypePointsAndRanges, + ExcludeUserKeySpan: excludeUserKeySpan, }), threshold: threshold, } diff --git a/pkg/kv/kvserver/gc/gc_iterator_test.go b/pkg/kv/kvserver/gc/gc_iterator_test.go index 77f049ba8da0..78bbba173884 100644 --- a/pkg/kv/kvserver/gc/gc_iterator_test.go +++ b/pkg/kv/kvserver/gc/gc_iterator_test.go @@ -152,7 +152,7 @@ func TestGCIterator(t *testing.T) { ds.setupTest(t, eng, desc) snap := eng.NewSnapshot() defer snap.Close() - it := makeGCIterator(&desc, snap, tc.gcThreshold) + it := makeGCIterator(&desc, snap, tc.gcThreshold, false) defer it.close() expectations := tc.expectations for i, ex := range expectations { diff --git a/pkg/kv/kvserver/gc/gc_old_test.go b/pkg/kv/kvserver/gc/gc_old_test.go index a4bbed1455ab..4163d7701a80 100644 --- a/pkg/kv/kvserver/gc/gc_old_test.go +++ b/pkg/kv/kvserver/gc/gc_old_test.go @@ -150,7 +150,7 @@ func runGCOld( if batchGCKeysBytes >= KeyVersionChunkBytes { batchGCKeys = append(batchGCKeys, roachpb.GCRequest_GCKey{Key: expBaseKey, Timestamp: keys[i].Timestamp}) - err := gcer.GC(ctx, batchGCKeys, nil) + err := gcer.GC(ctx, batchGCKeys, nil, nil) batchGCKeys = nil batchGCKeysBytes = 0 @@ -209,7 +209,7 @@ func runGCOld( // Handle last collected set of keys/vals. processKeysAndValues() if len(batchGCKeys) > 0 { - if err := gcer.GC(ctx, batchGCKeys, nil); err != nil { + if err := gcer.GC(ctx, batchGCKeys, nil, nil); err != nil { return Info{}, err } } diff --git a/pkg/kv/kvserver/gc/gc_random_test.go b/pkg/kv/kvserver/gc/gc_random_test.go index 409711687f8e..5a783c256d94 100644 --- a/pkg/kv/kvserver/gc/gc_random_test.go +++ b/pkg/kv/kvserver/gc/gc_random_test.go @@ -733,6 +733,7 @@ type fakeGCer struct { // feed them into MVCCGarbageCollectRangeKeys and ranges argument should be // non-overlapping. gcRangeKeyBatches [][]roachpb.GCRequest_GCRangeKey + gcClearRangeKeys []roachpb.GCRequest_GCClearRangeKey threshold Threshold intents []roachpb.Intent batches [][]roachpb.Intent @@ -753,12 +754,18 @@ func (f *fakeGCer) SetGCThreshold(ctx context.Context, t Threshold) error { } func (f *fakeGCer) GC( - ctx context.Context, keys []roachpb.GCRequest_GCKey, rangeKeys []roachpb.GCRequest_GCRangeKey, + ctx context.Context, + keys []roachpb.GCRequest_GCKey, + rangeKeys []roachpb.GCRequest_GCRangeKey, + clearRangeKey *roachpb.GCRequest_GCClearRangeKey, ) error { for _, k := range keys { f.gcKeys[k.Key.String()] = k } f.gcRangeKeyBatches = append(f.gcRangeKeyBatches, rangeKeys) + if clearRangeKey != nil { + f.gcClearRangeKeys = append(f.gcClearRangeKeys, *clearRangeKey) + } return nil } diff --git a/pkg/kv/kvserver/gc/gc_test.go b/pkg/kv/kvserver/gc/gc_test.go index cec753e92a14..5b9f91fcdbd2 100644 --- a/pkg/kv/kvserver/gc/gc_test.go +++ b/pkg/kv/kvserver/gc/gc_test.go @@ -56,7 +56,10 @@ type collectingGCer struct { } func (c *collectingGCer) GC( - _ context.Context, keys []roachpb.GCRequest_GCKey, rangeKeys []roachpb.GCRequest_GCRangeKey, + _ context.Context, + keys []roachpb.GCRequest_GCKey, + _ []roachpb.GCRequest_GCRangeKey, + _ *roachpb.GCRequest_GCClearRangeKey, ) error { c.keys = append(c.keys, keys) return nil diff --git a/pkg/kv/kvserver/metrics.go b/pkg/kv/kvserver/metrics.go index 12e60c230231..b74403f81fcd 100644 --- a/pkg/kv/kvserver/metrics.go +++ b/pkg/kv/kvserver/metrics.go @@ -1349,6 +1349,18 @@ The count is emitted by the leaseholder of each range. Measurement: "Intent Resolutions", Unit: metric.Unit_COUNT, } + metaGCUsedClearRange = metric.Metadata{ + Name: "queue.gc.info.clearrangesuccess", + Help: "Number of successful ClearRange operation during GC", + Measurement: "Requests", + Unit: metric.Unit_COUNT, + } + metaGCFailedClearRange = metric.Metadata{ + Name: "queue.gc.info.clearrangefailed", + Help: "Number of failed ClearRange operation during GC", + Measurement: "Requests", + Unit: metric.Unit_COUNT, + } // Slow request metrics. metaLatchRequests = metric.Metadata{ @@ -1807,6 +1819,8 @@ type StoreMetrics struct { GCResolveFailed *metric.Counter // Failures resolving intents that belong to local transactions. GCTxnIntentsResolveFailed *metric.Counter + GCUsedClearRange *metric.Counter + GCFailedClearRange *metric.Counter // Slow request counts. SlowLatchRequests *metric.Gauge @@ -2317,6 +2331,8 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics { GCResolveSuccess: metric.NewCounter(metaGCResolveSuccess), GCResolveFailed: metric.NewCounter(metaGCResolveFailed), GCTxnIntentsResolveFailed: metric.NewCounter(metaGCTxnIntentsResolveFailed), + GCUsedClearRange: metric.NewCounter(metaGCUsedClearRange), + GCFailedClearRange: metric.NewCounter(metaGCFailedClearRange), // Wedge request counters. SlowLatchRequests: metric.NewGauge(metaLatchRequests), diff --git a/pkg/kv/kvserver/mvcc_gc_queue.go b/pkg/kv/kvserver/mvcc_gc_queue.go index a9c3b2aedebc..61d3b4810f78 100644 --- a/pkg/kv/kvserver/mvcc_gc_queue.go +++ b/pkg/kv/kvserver/mvcc_gc_queue.go @@ -530,14 +530,18 @@ func (r *replicaGCer) SetGCThreshold(ctx context.Context, thresh gc.Threshold) e } func (r *replicaGCer) GC( - ctx context.Context, keys []roachpb.GCRequest_GCKey, rangeKeys []roachpb.GCRequest_GCRangeKey, + ctx context.Context, + keys []roachpb.GCRequest_GCKey, + rangeKeys []roachpb.GCRequest_GCRangeKey, + clearRangeKey *roachpb.GCRequest_GCClearRangeKey, ) error { - if len(keys) == 0 && len(rangeKeys) == 0 { + if len(keys) == 0 && len(rangeKeys) == 0 && clearRangeKey == nil { return nil } req := r.template() req.Keys = keys req.RangeKeys = rangeKeys + req.ClearRangeKey = clearRangeKey return r.send(ctx, req) } @@ -713,6 +717,8 @@ func updateStoreMetricsWithGCInfo(metrics *StoreMetrics, info gc.Info) { metrics.GCAbortSpanGCNum.Inc(int64(info.AbortSpanGCNum)) metrics.GCPushTxn.Inc(int64(info.PushTxn)) metrics.GCResolveTotal.Inc(int64(info.ResolveTotal)) + metrics.GCUsedClearRange.Inc(int64(info.ClearRangeKeyOperations)) + metrics.GCFailedClearRange.Inc(int64(info.ClearRangeKeyFailures)) } // timer returns a constant duration to space out GC processing diff --git a/pkg/kv/kvserver/rditer/replica_data_iter.go b/pkg/kv/kvserver/rditer/replica_data_iter.go index 9435fedf55c2..a8bc761b4e81 100644 --- a/pkg/kv/kvserver/rditer/replica_data_iter.go +++ b/pkg/kv/kvserver/rditer/replica_data_iter.go @@ -25,6 +25,8 @@ type ReplicaDataIteratorOptions struct { IterKind storage.MVCCIterKind // KeyTypes is passed to underlying iterator to select desired key types. KeyTypes storage.IterKeyType + // ExcludeUserKeySpan removes UserKeySpace span portion. + ExcludeUserKeySpan bool } // ReplicaMVCCDataIterator provides a complete iteration over MVCC or unversioned @@ -103,6 +105,18 @@ func MakeReplicatedKeySpansExceptLockTable(d *roachpb.RangeDescriptor) []roachpb } } +// MakeReplicatedKeySpansExcludingUserAndLockTable returns all key spans that are fully Raft +// replicated for the given Range, except for the lock table spans and user key span. +// These are returned in the following sorted order: +// 1. Replicated range-id local key span. +// 2. Range-local key span. +func MakeReplicatedKeySpansExcludingUserAndLockTable(d *roachpb.RangeDescriptor) []roachpb.Span { + return []roachpb.Span{ + MakeRangeIDLocalKeySpan(d.RangeID, true /* replicatedOnly */), + makeRangeLocalKeySpan(d), + } +} + // MakeReplicatedKeySpansExceptRangeID returns all key spans that are fully Raft // replicated for the given Range, except for the replicated range-id local key span. // These are returned in the following sorted order: @@ -210,10 +224,14 @@ func NewReplicaMVCCDataIterator( if !reader.ConsistentIterators() { panic("ReplicaMVCCDataIterator needs a Reader that provides ConsistentIterators") } + spans := MakeReplicatedKeySpansExceptLockTable(d) + if opts.ExcludeUserKeySpan { + spans = MakeReplicatedKeySpansExcludingUserAndLockTable(d) + } ri := &ReplicaMVCCDataIterator{ ReplicaDataIteratorOptions: opts, reader: reader, - spans: MakeReplicatedKeySpansExceptLockTable(d), + spans: spans, } if ri.Reverse { ri.curIndex = len(ri.spans) - 1 diff --git a/pkg/roachpb/api.go b/pkg/roachpb/api.go index fce6515dfcde..602e1fdf877c 100644 --- a/pkg/roachpb/api.go +++ b/pkg/roachpb/api.go @@ -1389,11 +1389,17 @@ func (*AdminTransferLeaseRequest) flags() flag { return isAdmin | isAlone } func (*AdminChangeReplicasRequest) flags() flag { return isAdmin | isAlone } func (*AdminRelocateRangeRequest) flags() flag { return isAdmin | isAlone } -func (*GCRequest) flags() flag { +func (gcr *GCRequest) flags() flag { // We defensively let GCRequest bypass the circuit breaker because otherwise, // the GC queue might busy loop on an unavailable range, doing lots of work // but never making progress. - return isWrite | isRange | bypassesReplicaCircuitBreaker + flags := isWrite | isRange | bypassesReplicaCircuitBreaker + // For clear range requests that GC entire range we don't want to batch with + // anything else. + if gcr.ClearRangeKey != nil { + flags |= isAlone + } + return flags } // HeartbeatTxn updates the timestamp cache with transaction records, diff --git a/pkg/roachpb/api.proto b/pkg/roachpb/api.proto index 967580884255..be7d760b89ae 100644 --- a/pkg/roachpb/api.proto +++ b/pkg/roachpb/api.proto @@ -1007,6 +1007,18 @@ message GCRequest { // Threshold is the expiration timestamp. util.hlc.Timestamp threshold = 4 [(gogoproto.nullable) = false]; + // GCClearRangeKey contains a range for clear range operation. + message GCClearRangeKey { + bytes start_key = 1 [(gogoproto.casttype) = "Key"]; + bytes end_key = 2 [(gogoproto.casttype) = "Key"]; + } + // clear_range_key contains zero or one range that would be deleted using + // storage range clear operation. Note that this range key must cover + // cleared range in its entirety, range must not have any data newer than + // GC threshold and all data must be covered by a range tombstone otherwise + // the request will fail. + GCClearRangeKey clear_range_key = 7; + reserved 5; } diff --git a/pkg/storage/mvcc.go b/pkg/storage/mvcc.go index 5cf08f5f00e2..42cd51b82377 100644 --- a/pkg/storage/mvcc.go +++ b/pkg/storage/mvcc.go @@ -5189,6 +5189,93 @@ func MVCCGarbageCollectRangeKeys( return nil } +// MVCCGarbageCollectWholeRange removes all the range data and resets counters. +// It only does so if data is completely covered by range keys +func MVCCGarbageCollectWholeRange( + ctx context.Context, + rw ReadWriter, + ms *enginepb.MVCCStats, + start, end roachpb.Key, + gcThreshold hlc.Timestamp, + rangeStats enginepb.MVCCStats, +) error { + if rangeStats.ContainsEstimates == 0 && rangeStats.LiveCount > 0 { + return errors.Errorf("range contains live data, can't use GC clear range") + } + if _, err := CanGCEntireRange(ctx, rw, start, end, gcThreshold); err != nil { + return err + } + if err := rw.ClearRawRange(start, end, true, true); err != nil { + return err + } + if ms != nil { + // Reset point and range counters as we deleted the whole range. + rangeStats.AgeTo(ms.LastUpdateNanos) + ms.LiveCount -= rangeStats.LiveCount + ms.LiveBytes -= rangeStats.LiveBytes + ms.KeyCount -= rangeStats.KeyCount + ms.KeyBytes -= rangeStats.KeyBytes + ms.ValCount -= rangeStats.ValCount + ms.ValBytes -= rangeStats.ValBytes + ms.RangeKeyCount -= rangeStats.RangeKeyCount + ms.RangeKeyBytes -= rangeStats.RangeKeyBytes + ms.RangeValCount -= rangeStats.RangeValCount + ms.RangeValBytes -= rangeStats.RangeValBytes + ms.GCBytesAge -= rangeStats.GCBytesAge + // We also zero out intents as range can't be cleared if intents are + // present. + // This should only be the case if stats are estimates and intent + // information was not accurate. + ms.IntentCount -= rangeStats.IntentCount + ms.IntentBytes -= rangeStats.IntentBytes + ms.IntentAge -= rangeStats.IntentAge + ms.SeparatedIntentCount -= rangeStats.SeparatedIntentCount + } + return nil +} + +// CanGCEntireRange checks if a span of keys doesn't contain any live data +// and all data is covered by range tombstones at or below provided threshold. +// This functions is meant for fast path deletion by GC where range can be +// removed by a range tombstone. +func CanGCEntireRange( + ctx context.Context, rw Reader, start, end roachpb.Key, gcThreshold hlc.Timestamp, +) (coveredByRangeTombstones bool, err error) { + // It makes no sense to check local ranges for fast path. + if isLocal(start) || isLocal(end) { + return coveredByRangeTombstones, errors.Errorf("range emptiness check can only be done on global ranges") + } + iter := rw.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{ + KeyTypes: IterKeyTypePointsAndRanges, + LowerBound: start, + UpperBound: end, + RangeKeyMaskingBelow: gcThreshold, + }) + defer iter.Close() + iter.SeekGE(MVCCKey{Key: start}) + for ; ; iter.Next() { + if ok, err := iter.Valid(); err != nil { + return coveredByRangeTombstones, err + } else if !ok { + break + } + hasPoint, hasRange := iter.HasPointAndRange() + if hasPoint { + return coveredByRangeTombstones, errors.Errorf("found key not covered by range tombstone %s", + iter.UnsafeKey()) + } + if hasRange { + coveredByRangeTombstones = true + newest := iter.RangeKeys().Newest() + if gcThreshold.Less(newest) { + return coveredByRangeTombstones, errors.Errorf("range tombstones above gc threshold. GC=%s, range=%s", + gcThreshold.String(), newest.String()) + } + } + } + return coveredByRangeTombstones, nil +} + // MVCCFindSplitKey finds a key from the given span such that the left side of // the split is roughly targetSize bytes. The returned key will never be chosen // from the key ranges listed in keys.NoSplitSpans. diff --git a/pkg/storage/mvcc_history_test.go b/pkg/storage/mvcc_history_test.go index 8de94151a0e3..0f9f5e90ac1f 100644 --- a/pkg/storage/mvcc_history_test.go +++ b/pkg/storage/mvcc_history_test.go @@ -111,6 +111,8 @@ var ( // clear_rangekey k= end= ts=[,] // clear_time_range k= end= ts=[,] targetTs=[,] [clearRangeThreshold=] [maxBatchSize=] [maxBatchByteSize=] // +// gc_clear_range k= end= startTs=[,] ts=[,] +// // sst_put [ts=[,]] [localTs=[,]] k= [v=] // sst_put_rangekey ts=[,] [localTS=[,]] k= end= // sst_clear_range k= end= @@ -668,6 +670,7 @@ var commands = map[string]cmd{ "del_range_pred": {typDataUpdate, cmdDeleteRangePredicate}, "export": {typReadOnly, cmdExport}, "get": {typReadOnly, cmdGet}, + "gc_clear_range": {typDataUpdate, cmdGCClearRange}, "increment": {typDataUpdate, cmdIncrement}, "initput": {typDataUpdate, cmdInitPut}, "merge": {typDataUpdate, cmdMerge}, @@ -948,6 +951,16 @@ func cmdClearTimeRange(e *evalCtx) error { return nil } +func cmdGCClearRange(e *evalCtx) error { + key, endKey := e.getKeyRange() + gcTs := e.getTs(nil) + return e.withWriter("gc_clear_range", func(rw ReadWriter) error { + cms, err := ComputeStats(rw, key, endKey, 100e9) + require.NoError(e.t, err, "failed to compute range stats") + return MVCCGarbageCollectWholeRange(e.ctx, rw, e.ms, key, endKey, gcTs, cms) + }) +} + func cmdCPut(e *evalCtx) error { txn := e.getTxn(optional) ts := e.getTs(txn) diff --git a/pkg/storage/mvcc_test.go b/pkg/storage/mvcc_test.go index 978135a18c85..d4ec79c832dc 100644 --- a/pkg/storage/mvcc_test.go +++ b/pkg/storage/mvcc_test.go @@ -5244,6 +5244,10 @@ func pt(key roachpb.Key, ts hlc.Timestamp) rangeTestDataItem { return rangeTestDataItem{point: MVCCKeyValue{Key: mvccVersionKey(key, ts), Value: val}} } +// inlineValue constant is used with pt function for readability of created inline +// values. +var inlineValue hlc.Timestamp + // txn wraps point update and adds transaction to it for intent creation. func txn(d rangeTestDataItem) rangeTestDataItem { ts := d.point.Key.Timestamp @@ -5884,6 +5888,134 @@ func TestMVCCGarbageCollectRangesFailures(t *testing.T) { } } +// TestMVCCGarbageCollectClearRange checks that basic GCClearRange functionality +// works. Fine grained tests cases are tested in mvcc_histories_test +// 'gc_clear_range'. This test could be used when debugging any issues found. +func TestMVCCGarbageCollectClearRange(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + ctx := context.Background() + + mkKey := func(k string) roachpb.Key { + return append(keys.SystemSQLCodec.TablePrefix(42), k...) + } + rangeStart := mkKey("") + rangeEnd := rangeStart.PrefixEnd() + + // Note we use keys of different lengths so that stats accounting errors + // would not obviously cancel out if right and left bounds are used + // incorrectly. + keyA := mkKey("a") + keyB := mkKey("bb") + keyD := mkKey("dddd") + + mkTs := func(wallTimeSec int64) hlc.Timestamp { + return hlc.Timestamp{WallTime: time.Second.Nanoseconds() * wallTimeSec} + } + + ts2 := mkTs(2) + ts4 := mkTs(4) + tsGC := mkTs(5) + tsMax := mkTs(9) + + mkGCReq := func(start roachpb.Key, end roachpb.Key) roachpb.GCRequest_GCClearRangeKey { + return roachpb.GCRequest_GCClearRangeKey{ + StartKey: start, + EndKey: end, + } + } + + before := rangeTestData{ + pt(keyB, ts2), + rng(keyA, keyD, ts4), + } + request := mkGCReq(keyA, keyD) + + for _, engineImpl := range mvccEngineImpls { + t.Run(engineImpl.name, func(t *testing.T) { + engine := engineImpl.create() + defer engine.Close() + + var ms, diff enginepb.MVCCStats + before.populateEngine(t, engine, &ms) + + require.NoError(t, + MVCCGarbageCollectWholeRange(ctx, engine, &diff, request.StartKey, request.EndKey, tsGC, ms), + "failed to run mvcc range tombstone garbage collect") + ms.Add(diff) + + rks := scanRangeKeys(t, engine) + require.Empty(t, rks) + ks := scanPointKeys(t, engine) + require.Empty(t, ks) + + ms.AgeTo(tsMax.WallTime) + it := engine.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{ + KeyTypes: IterKeyTypePointsAndRanges, + LowerBound: rangeStart, + UpperBound: rangeEnd, + }) + expMs, err := ComputeStatsForIter(it, tsMax.WallTime) + require.NoError(t, err, "failed to compute stats for range") + require.EqualValues(t, expMs, ms, "computed range stats vs gc'd") + }) + } +} + +func TestMVCCGarbageCollectClearRangeInlinedValue(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + ctx := context.Background() + + mkKey := func(k string) roachpb.Key { + return append(keys.SystemSQLCodec.TablePrefix(42), k...) + } + + // Note we use keys of different lengths so that stats accounting errors + // would not obviously cancel out if right and left bounds are used + // incorrectly. + keyA := mkKey("a") + keyB := mkKey("b") + keyD := mkKey("dddd") + + mkTs := func(wallTimeSec int64) hlc.Timestamp { + return hlc.Timestamp{WallTime: time.Second.Nanoseconds() * wallTimeSec} + } + + tsGC := mkTs(5) + + mkGCReq := func(start roachpb.Key, end roachpb.Key) roachpb.GCRequest_GCClearRangeKey { + return roachpb.GCRequest_GCClearRangeKey{ + StartKey: start, + EndKey: end, + } + } + + before := rangeTestData{ + pt(keyB, inlineValue), + } + request := mkGCReq(keyA, keyD) + expectedError := `found key not covered by range tombstone /Table/42/"b"/0,0` + for _, engineImpl := range mvccEngineImpls { + t.Run(engineImpl.name, func(t *testing.T) { + engine := engineImpl.create() + defer engine.Close() + + var ms, diff enginepb.MVCCStats + before.populateEngine(t, engine, &ms) + // We are forcing stats to be estimates to bypass quick liveness check + // that will prevent actual data checks if there's some live data. + ms.ContainsEstimates = 1 + err := MVCCGarbageCollectWholeRange(ctx, engine, &diff, request.StartKey, request.EndKey, + tsGC, ms) + ms.Add(diff) + require.Errorf(t, err, "expected error '%s' but found none", expectedError) + require.True(t, testutils.IsError(err, expectedError), + "expected error '%s' found '%s'", expectedError, err) + }) + } +} + // TestResolveIntentWithLowerEpoch verifies that trying to resolve // an intent at an epoch that is lower than the epoch of the intent // leaves the intent untouched. diff --git a/pkg/storage/testdata/mvcc_histories/gc_clear_range b/pkg/storage/testdata/mvcc_histories/gc_clear_range new file mode 100644 index 000000000000..d3f8dc651f6c --- /dev/null +++ b/pkg/storage/testdata/mvcc_histories/gc_clear_range @@ -0,0 +1,98 @@ +run ok +put k=A v=B ts=10 +---- +>> at end: +data: "A"/10.000000000,0 -> /BYTES/B + +# Check that we can't invoke gc_clear_range if we have live data. +run error +gc_clear_range k=A end=Z ts=30 +---- +>> at end: +data: "A"/10.000000000,0 -> /BYTES/B +error: (*withstack.withStack:) range contains live data, can't use GC clear range + +run ok +del k=A ts=30 +---- +del: "A": found key true +>> at end: +data: "A"/30.000000000,0 -> / +data: "A"/10.000000000,0 -> /BYTES/B + +# Check that we can't invoke gc_clear_range if we are not covered by range tombstones. +run error +gc_clear_range k=A end=Z ts=30 +---- +>> at end: +data: "A"/30.000000000,0 -> / +data: "A"/10.000000000,0 -> /BYTES/B +error: (*withstack.withStack:) found key not covered by range tombstone "A"/30.000000000,0 + +run ok stats +del_range_ts k=A endKey=Z ts=50 +---- +>> del_range_ts k=A endKey=Z ts=50 +stats: range_key_count=+1 range_key_bytes=+14 range_val_count=+1 gc_bytes_age=+700 +>> at end: +rangekey: A{-\x00}/[50.000000000,0=/] +data: "A"/30.000000000,0 -> / +data: "A"/10.000000000,0 -> /BYTES/B +stats: key_count=1 key_bytes=26 val_count=2 val_bytes=6 range_key_count=1 range_key_bytes=14 range_val_count=1 gc_bytes_age=2940 + +# Check that we can't delete if range tombstone covering all range is above gc threshold. +run error +gc_clear_range k=A end=Z ts=40 +---- +>> at end: +rangekey: A{-\x00}/[50.000000000,0=/] +data: "A"/30.000000000,0 -> / +data: "A"/10.000000000,0 -> /BYTES/B +error: (*withstack.withStack:) range tombstones above gc threshold. GC=40.000000000,0, range=50.000000000,0 + +# Check that we can delete if range tombstone covers all range. +run stats ok +gc_clear_range k=A end=Z ts=60 +---- +>> gc_clear_range k=A end=Z ts=60 +stats: key_count=-1 key_bytes=-26 val_count=-2 val_bytes=-6 range_key_count=-1 range_key_bytes=-14 range_val_count=-1 gc_bytes_age=-2940 +>> at end: + +stats: + +# Check that is we have range tombstone coverage that covers subset but there's no other data we can still clear. +run ok +put k=A v=B ts=10 +del_range_ts k=A endKey=D ts=20 +---- +>> at end: +rangekey: A{-\x00}/[20.000000000,0=/] +data: "A"/10.000000000,0 -> /BYTES/B + +run stats ok +gc_clear_range k=A end=Z ts=60 +---- +>> gc_clear_range k=A end=Z ts=60 +stats: key_count=-1 key_bytes=-14 val_count=-1 val_bytes=-6 range_key_count=-1 range_key_bytes=-14 range_val_count=-1 gc_bytes_age=-2720 +>> at end: + +stats: + +# Check that gc clear range can't be performed over intents +run ok +with t=A + txn_begin ts=10 + put k=B v=O +---- +>> at end: +txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=10.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=10.000000000,0 wto=false gul=0,0 +meta: "B"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=10.000000000,0 min=0,0 seq=0} ts=10.000000000,0 del=false klen=12 vlen=6 mergeTs= txnDidNotUpdateMeta=true +data: "B"/10.000000000,0 -> /BYTES/O + +run error +gc_clear_range k=A end=Z ts=40 +---- +>> at end: +meta: "B"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=10.000000000,0 min=0,0 seq=0} ts=10.000000000,0 del=false klen=12 vlen=6 mergeTs= txnDidNotUpdateMeta=true +data: "B"/10.000000000,0 -> /BYTES/O +error: (*withstack.withStack:) range contains live data, can't use GC clear range diff --git a/pkg/ts/catalog/chart_catalog.go b/pkg/ts/catalog/chart_catalog.go index f37b4e6d758b..b25ec26e2660 100644 --- a/pkg/ts/catalog/chart_catalog.go +++ b/pkg/ts/catalog/chart_catalog.go @@ -893,6 +893,13 @@ var charts = []sectionDescription{ "queue.gc.info.transactionspangcstaging", }, }, + { + Title: "GC Clear Range", + Metrics: []string{ + "queue.gc.info.clearrangesuccess", + "queue.gc.info.clearrangefailed", + }, + }, }, }, {