Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
86000: gc: add clear range for full range span when it is covered by range tombstone r=aliher1911 a=aliher1911

This commit adds range_deletion_keys parameters to GC request.
range_deletion_keys is used by GC queue to remove chunks of
consecutive keys where no new data was written above the GC
threshold and storage can optimize deletions with range
tombstones.
To support new types of keys, GCer interface is also updated
to pass provided keys down to request.

---

storage: add gc operation that uses clear range
GC relies on point and range key deletion operations
when removing data. This is not good in cases where full
range is deleted e.g. schema change.
This commit add a clear operation that would remove all
range data if it is completely covered by range tombstones. And
range tombstones themselves are below gc threshold.

---

rditer: skip option for range user key space
Option to skip user key space to allow fast path GC
to avoid unnecessary scans.

---

gc: add fast path check to identify empty ranges
This commit adds a fast path check for ranges where
all data is covered by a range tombstone below gc ttl
threshold. If such range is identified, GC will perform
GC Clear Range of the full range to avoid rescanning
range and recomputing stats.

---

batcheval: add clear range option to cmd_gc
GC request will perform fast path clear operation on the
whole range if instructed by GC. Its range bounds should
match descriptor bounds exactly for operation to succeed.
Underlying mvcc operation will validate if no data is
present for safety.


Co-authored-by: Oleg Afanasyev <[email protected]>
  • Loading branch information
craig[bot] and aliher1911 committed Aug 14, 2022
2 parents d25cb57 + 459b8bf commit 4b148d8
Show file tree
Hide file tree
Showing 17 changed files with 503 additions and 34 deletions.
25 changes: 24 additions & 1 deletion pkg/kv/kvserver/batcheval/cmd_gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ func declareKeysGC(
)) {
latchSpans.AddMVCC(spanset.SpanReadWrite, span, hlc.MaxTimestamp)
}
if rk := gcr.ClearRangeKey; rk != nil {
latchSpans.AddMVCC(spanset.SpanReadWrite, roachpb.Span{Key: rk.StartKey, EndKey: rk.EndKey},
hlc.MaxTimestamp)
}
// The RangeGCThresholdKey is only written to if the
// req.(*GCRequest).Threshold is set. However, we always declare an exclusive
// access over this key in order to serialize with other GC requests.
Expand Down Expand Up @@ -134,12 +138,19 @@ func GC(
// GC request's effect from the raft log. Latches held on the leaseholder
// would have no impact on a follower read.
if !args.Threshold.IsEmpty() &&
(len(args.Keys) != 0 || len(args.RangeKeys) != 0) &&
(len(args.Keys) != 0 || len(args.RangeKeys) != 0 || args.ClearRangeKey != nil) &&
!cArgs.EvalCtx.EvalKnobs().AllowGCWithNewThresholdAndKeys {
return result.Result{}, errors.AssertionFailedf(
"GC request can set threshold or it can GC keys, but it is unsafe for it to do both")
}

// We do not allow removal of point or range keys combined with clear range
// operation as they could cover the same set of keys.
if (len(args.Keys) != 0 || len(args.RangeKeys) != 0) && args.ClearRangeKey != nil {
return result.Result{}, errors.AssertionFailedf(
"GC request can remove point and range keys or clear entire range, but it is unsafe for it to do both")
}

// All keys must be inside the current replica range. Keys outside
// of this range in the GC request are dropped silently, which is
// safe because they can simply be re-collected later on the correct
Expand Down Expand Up @@ -178,6 +189,18 @@ func GC(
return result.Result{}, err
}

// Fast path operation to try to remove all user key data from the range.
if rk := args.ClearRangeKey; rk != nil {
if !rk.StartKey.Equal(desc.StartKey.AsRawKey()) || !rk.EndKey.Equal(desc.EndKey.AsRawKey()) {
return result.Result{}, errors.Errorf("gc with clear range operation could only be used on the full range")
}

if err := storage.MVCCGarbageCollectWholeRange(ctx, readWriter, cArgs.Stats,
rk.StartKey, rk.EndKey, cArgs.EvalCtx.GetGCThreshold(), cArgs.EvalCtx.GetMVCCStats()); err != nil {
return result.Result{}, err
}
}

// Optionally bump the GC threshold timestamp.
var res result.Result
if !args.Threshold.IsEmpty() {
Expand Down
75 changes: 56 additions & 19 deletions pkg/kv/kvserver/gc/gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,9 @@ type Thresholder interface {

// PureGCer is part of the GCer interface.
type PureGCer interface {
GC(context.Context, []roachpb.GCRequest_GCKey, []roachpb.GCRequest_GCRangeKey) error
GC(context.Context, []roachpb.GCRequest_GCKey, []roachpb.GCRequest_GCRangeKey,
*roachpb.GCRequest_GCClearRangeKey,
) error
}

// A GCer is an abstraction used by the MVCC GC queue to carry out chunked deletions.
Expand All @@ -170,7 +172,10 @@ func (NoopGCer) SetGCThreshold(context.Context, Threshold) error { return nil }

// GC implements storage.GCer.
func (NoopGCer) GC(
context.Context, []roachpb.GCRequest_GCKey, []roachpb.GCRequest_GCRangeKey,
context.Context,
[]roachpb.GCRequest_GCKey,
[]roachpb.GCRequest_GCRangeKey,
*roachpb.GCRequest_GCClearRangeKey,
) error {
return nil
}
Expand Down Expand Up @@ -231,6 +236,12 @@ type Info struct {
// AffectedVersionsRangeValBytes is the number of (fully encoded) bytes deleted from values that
// belong to removed range keys.
AffectedVersionsRangeValBytes int64
// ClearRangeKeyOperations reports 1 if GC succeeded performing collection with
// ClearRange operation.
ClearRangeKeyOperations int
// ClearRangeKeyFailures reports 1 if GC identified a possibility to collect
// with ClearRange operation, but request failed.
ClearRangeKeyFailures int
}

// RunOptions contains collection of limits that GC run applies when performing operations
Expand Down Expand Up @@ -299,7 +310,7 @@ func Run(
Threshold: newThreshold,
}

err := processReplicatedKeyRange(ctx, desc, snap, now, newThreshold, options.IntentAgeThreshold, gcer,
fastPath, err := processReplicatedKeyRange(ctx, desc, snap, now, newThreshold, options.IntentAgeThreshold, gcer,
intentBatcherOptions{
maxIntentsPerIntentCleanupBatch: options.MaxIntentsPerIntentCleanupBatch,
maxIntentKeyBytesPerIntentCleanupBatch: options.MaxIntentKeyBytesPerIntentCleanupBatch,
Expand All @@ -309,7 +320,7 @@ func Run(
if err != nil {
return Info{}, err
}
err = processReplicatedRangeTombstones(ctx, desc, snap, now, newThreshold, gcer, &info)
err = processReplicatedRangeTombstones(ctx, desc, snap, fastPath, now, newThreshold, gcer, &info)
if err != nil {
return Info{}, err
}
Expand Down Expand Up @@ -343,6 +354,7 @@ func Run(
//
// The logic iterates all versions of all keys in the range from oldest to
// newest. Expired intents are written into the txnMap and intentKeyMap.
// Returns true if clear range was used to remove all user data.
func processReplicatedKeyRange(
ctx context.Context,
desc *roachpb.RangeDescriptor,
Expand All @@ -354,7 +366,30 @@ func processReplicatedKeyRange(
options intentBatcherOptions,
cleanupIntentsFn CleanupIntentsFunc,
info *Info,
) error {
) (bool, error) {
// Perform fast path check prior to performing GC. Fast path only collects
// user key span portion, so we don't need to clean it up once again if
// we succeeded.
excludeUserKeySpan := false
{
start := desc.StartKey.AsRawKey()
end := desc.EndKey.AsRawKey()
if coveredByRangeTombstone, err := storage.CanGCEntireRange(ctx, snap, start, end,
threshold); err == nil && coveredByRangeTombstone {
if err = gcer.GC(ctx, nil, nil, &roachpb.GCRequest_GCClearRangeKey{
StartKey: start,
EndKey: end,
}); err == nil {
excludeUserKeySpan = true
info.ClearRangeKeyOperations++
} else {
log.Warningf(ctx, "failed to perform GC clear range operation on range %s: %s",
desc.String(), err)
info.ClearRangeKeyFailures++
}
}
}

var alloc bufalloc.ByteAllocator
// Compute intent expiration (intent age at which we attempt to resolve).
intentExp := now.Add(-intentAgeThreshold.Nanoseconds(), 0)
Expand Down Expand Up @@ -401,13 +436,13 @@ func processReplicatedKeyRange(
gcTimestampForThisKey hlc.Timestamp
sentBatchForThisKey bool
)
it := makeGCIterator(desc, snap, threshold)
it := makeGCIterator(desc, snap, threshold, excludeUserKeySpan)
defer it.close()
for ; ; it.step() {
s, ok := it.state()
if !ok {
if it.err != nil {
return it.err
return false, it.err
}
break
}
Expand All @@ -416,7 +451,7 @@ func processReplicatedKeyRange(
}
if s.curIsIntent() {
if err := handleIntent(s.next); err != nil {
return err
return false, err
}
continue
}
Expand Down Expand Up @@ -458,9 +493,9 @@ func processReplicatedKeyRange(
}
// If limit was reached, delegate to GC'r to remove collected batch.
if shouldSendBatch {
if err := gcer.GC(ctx, batchGCKeys, nil); err != nil {
if err := gcer.GC(ctx, batchGCKeys, nil, nil); err != nil {
if errors.Is(err, ctx.Err()) {
return err
return false, err
}
// Even though we are batching the GC process, it's
// safe to continue because we bumped the GC
Expand All @@ -476,16 +511,16 @@ func processReplicatedKeyRange(
// We need to send out last intent cleanup batch.
if err := intentBatcher.maybeFlushPendingIntents(ctx); err != nil {
if errors.Is(err, ctx.Err()) {
return err
return false, err
}
log.Warningf(ctx, "failed to cleanup intents batch: %v", err)
}
if len(batchGCKeys) > 0 {
if err := gcer.GC(ctx, batchGCKeys, nil); err != nil {
return err
if err := gcer.GC(ctx, batchGCKeys, nil, nil); err != nil {
return false, err
}
}
return nil
return excludeUserKeySpan, nil
}

type intentBatcher struct {
Expand Down Expand Up @@ -853,7 +888,7 @@ func (b *rangeKeyBatcher) flushPendingFragments(ctx context.Context) error {
}
b.pending = b.pending[:0]
b.pendingSize = 0
return b.gcer.GC(ctx, nil, toSend)
return b.gcer.GC(ctx, nil, toSend, nil)
}
return nil
}
Expand All @@ -862,15 +897,17 @@ func processReplicatedRangeTombstones(
ctx context.Context,
desc *roachpb.RangeDescriptor,
snap storage.Reader,
excludeUserKeySpan bool,
now hlc.Timestamp,
gcThreshold hlc.Timestamp,
gcer GCer,
info *Info,
) error {
iter := rditer.NewReplicaMVCCDataIterator(desc, snap, rditer.ReplicaDataIteratorOptions{
Reverse: false,
IterKind: storage.MVCCKeyIterKind,
KeyTypes: storage.IterKeyTypeRangesOnly,
Reverse: false,
IterKind: storage.MVCCKeyIterKind,
KeyTypes: storage.IterKeyTypeRangesOnly,
ExcludeUserKeySpan: excludeUserKeySpan,
})
defer iter.Close()

Expand Down Expand Up @@ -934,7 +971,7 @@ func (b *batchingInlineGCer) FlushingAdd(ctx context.Context, key roachpb.Key) {
}

func (b *batchingInlineGCer) Flush(ctx context.Context) {
err := b.gcer.GC(ctx, b.gcKeys, nil)
err := b.gcer.GC(ctx, b.gcKeys, nil, nil)
b.gcKeys = nil
b.size = 0
if err != nil {
Expand Down
12 changes: 8 additions & 4 deletions pkg/kv/kvserver/gc/gc_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,17 @@ type gcIterator struct {
}

func makeGCIterator(
desc *roachpb.RangeDescriptor, snap storage.Reader, threshold hlc.Timestamp,
desc *roachpb.RangeDescriptor,
snap storage.Reader,
threshold hlc.Timestamp,
excludeUserKeySpan bool,
) gcIterator {
return gcIterator{
it: rditer.NewReplicaMVCCDataIterator(desc, snap, rditer.ReplicaDataIteratorOptions{
Reverse: true,
IterKind: storage.MVCCKeyAndIntentsIterKind,
KeyTypes: storage.IterKeyTypePointsAndRanges,
Reverse: true,
IterKind: storage.MVCCKeyAndIntentsIterKind,
KeyTypes: storage.IterKeyTypePointsAndRanges,
ExcludeUserKeySpan: excludeUserKeySpan,
}),
threshold: threshold,
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/kv/kvserver/gc/gc_iterator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ func TestGCIterator(t *testing.T) {
ds.setupTest(t, eng, desc)
snap := eng.NewSnapshot()
defer snap.Close()
it := makeGCIterator(&desc, snap, tc.gcThreshold)
it := makeGCIterator(&desc, snap, tc.gcThreshold, false)
defer it.close()
expectations := tc.expectations
for i, ex := range expectations {
Expand Down
4 changes: 2 additions & 2 deletions pkg/kv/kvserver/gc/gc_old_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ func runGCOld(
if batchGCKeysBytes >= KeyVersionChunkBytes {
batchGCKeys = append(batchGCKeys, roachpb.GCRequest_GCKey{Key: expBaseKey, Timestamp: keys[i].Timestamp})

err := gcer.GC(ctx, batchGCKeys, nil)
err := gcer.GC(ctx, batchGCKeys, nil, nil)

batchGCKeys = nil
batchGCKeysBytes = 0
Expand Down Expand Up @@ -209,7 +209,7 @@ func runGCOld(
// Handle last collected set of keys/vals.
processKeysAndValues()
if len(batchGCKeys) > 0 {
if err := gcer.GC(ctx, batchGCKeys, nil); err != nil {
if err := gcer.GC(ctx, batchGCKeys, nil, nil); err != nil {
return Info{}, err
}
}
Expand Down
9 changes: 8 additions & 1 deletion pkg/kv/kvserver/gc/gc_random_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -733,6 +733,7 @@ type fakeGCer struct {
// feed them into MVCCGarbageCollectRangeKeys and ranges argument should be
// non-overlapping.
gcRangeKeyBatches [][]roachpb.GCRequest_GCRangeKey
gcClearRangeKeys []roachpb.GCRequest_GCClearRangeKey
threshold Threshold
intents []roachpb.Intent
batches [][]roachpb.Intent
Expand All @@ -753,12 +754,18 @@ func (f *fakeGCer) SetGCThreshold(ctx context.Context, t Threshold) error {
}

func (f *fakeGCer) GC(
ctx context.Context, keys []roachpb.GCRequest_GCKey, rangeKeys []roachpb.GCRequest_GCRangeKey,
ctx context.Context,
keys []roachpb.GCRequest_GCKey,
rangeKeys []roachpb.GCRequest_GCRangeKey,
clearRangeKey *roachpb.GCRequest_GCClearRangeKey,
) error {
for _, k := range keys {
f.gcKeys[k.Key.String()] = k
}
f.gcRangeKeyBatches = append(f.gcRangeKeyBatches, rangeKeys)
if clearRangeKey != nil {
f.gcClearRangeKeys = append(f.gcClearRangeKeys, *clearRangeKey)
}
return nil
}

Expand Down
5 changes: 4 additions & 1 deletion pkg/kv/kvserver/gc/gc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,10 @@ type collectingGCer struct {
}

func (c *collectingGCer) GC(
_ context.Context, keys []roachpb.GCRequest_GCKey, rangeKeys []roachpb.GCRequest_GCRangeKey,
_ context.Context,
keys []roachpb.GCRequest_GCKey,
_ []roachpb.GCRequest_GCRangeKey,
_ *roachpb.GCRequest_GCClearRangeKey,
) error {
c.keys = append(c.keys, keys)
return nil
Expand Down
16 changes: 16 additions & 0 deletions pkg/kv/kvserver/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -1349,6 +1349,18 @@ The count is emitted by the leaseholder of each range.
Measurement: "Intent Resolutions",
Unit: metric.Unit_COUNT,
}
metaGCUsedClearRange = metric.Metadata{
Name: "queue.gc.info.clearrangesuccess",
Help: "Number of successful ClearRange operation during GC",
Measurement: "Requests",
Unit: metric.Unit_COUNT,
}
metaGCFailedClearRange = metric.Metadata{
Name: "queue.gc.info.clearrangefailed",
Help: "Number of failed ClearRange operation during GC",
Measurement: "Requests",
Unit: metric.Unit_COUNT,
}

// Slow request metrics.
metaLatchRequests = metric.Metadata{
Expand Down Expand Up @@ -1807,6 +1819,8 @@ type StoreMetrics struct {
GCResolveFailed *metric.Counter
// Failures resolving intents that belong to local transactions.
GCTxnIntentsResolveFailed *metric.Counter
GCUsedClearRange *metric.Counter
GCFailedClearRange *metric.Counter

// Slow request counts.
SlowLatchRequests *metric.Gauge
Expand Down Expand Up @@ -2317,6 +2331,8 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
GCResolveSuccess: metric.NewCounter(metaGCResolveSuccess),
GCResolveFailed: metric.NewCounter(metaGCResolveFailed),
GCTxnIntentsResolveFailed: metric.NewCounter(metaGCTxnIntentsResolveFailed),
GCUsedClearRange: metric.NewCounter(metaGCUsedClearRange),
GCFailedClearRange: metric.NewCounter(metaGCFailedClearRange),

// Wedge request counters.
SlowLatchRequests: metric.NewGauge(metaLatchRequests),
Expand Down
10 changes: 8 additions & 2 deletions pkg/kv/kvserver/mvcc_gc_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -530,14 +530,18 @@ func (r *replicaGCer) SetGCThreshold(ctx context.Context, thresh gc.Threshold) e
}

func (r *replicaGCer) GC(
ctx context.Context, keys []roachpb.GCRequest_GCKey, rangeKeys []roachpb.GCRequest_GCRangeKey,
ctx context.Context,
keys []roachpb.GCRequest_GCKey,
rangeKeys []roachpb.GCRequest_GCRangeKey,
clearRangeKey *roachpb.GCRequest_GCClearRangeKey,
) error {
if len(keys) == 0 && len(rangeKeys) == 0 {
if len(keys) == 0 && len(rangeKeys) == 0 && clearRangeKey == nil {
return nil
}
req := r.template()
req.Keys = keys
req.RangeKeys = rangeKeys
req.ClearRangeKey = clearRangeKey
return r.send(ctx, req)
}

Expand Down Expand Up @@ -713,6 +717,8 @@ func updateStoreMetricsWithGCInfo(metrics *StoreMetrics, info gc.Info) {
metrics.GCAbortSpanGCNum.Inc(int64(info.AbortSpanGCNum))
metrics.GCPushTxn.Inc(int64(info.PushTxn))
metrics.GCResolveTotal.Inc(int64(info.ResolveTotal))
metrics.GCUsedClearRange.Inc(int64(info.ClearRangeKeyOperations))
metrics.GCFailedClearRange.Inc(int64(info.ClearRangeKeyFailures))
}

// timer returns a constant duration to space out GC processing
Expand Down
Loading

0 comments on commit 4b148d8

Please sign in to comment.