From 070a6f8188c364c54ffb9a2419d881c2136c48a3 Mon Sep 17 00:00:00 2001 From: Michael Butler Date: Wed, 25 Sep 2024 11:53:36 -0400 Subject: [PATCH] backupccl: fix scheduled backup pts pushing race Informs #128013 Epic: none --- pkg/ccl/backupccl/backup_job.go | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/pkg/ccl/backupccl/backup_job.go b/pkg/ccl/backupccl/backup_job.go index f6ba5d1aa8ed..09fc9eb577a6 100644 --- a/pkg/ccl/backupccl/backup_job.go +++ b/pkg/ccl/backupccl/backup_job.go @@ -617,6 +617,21 @@ func (b *backupResumer) Resume(ctx context.Context, execCtx interface{}) error { defaultURI := details.URI var backupDest backupdest.ResolvedDestination if details.URI == "" { + // Choose which scheduled backup pts we will update at the the end of the + // backup _before_ we resolve the destination of the backup. This avoids a + // race with inc backups where backup destination resolution leads this backup + // to extend a chain that is about to be superseded by a new full backup + // chain, which could cause this inc to accidentally push the pts for the + // _new_ chain instead of the old chain it is apart of. By choosing the pts to + // move before we resolve the destination, we guarantee that we push the old + // chain. + insqlDB := p.ExecCfg().InternalDB + if err := insqlDB.Txn(ctx, func(ctx context.Context, txn isql.Txn) error { + return planSchedulePTSChaining(ctx, p.ExecCfg().JobsKnobs(), txn, &details, b.job.CreatedBy()) + }); err != nil { + return err + } + var err error backupDest, err = backupdest.ResolveDest(ctx, p.User(), details.Destination, details.EndTime, details.IncrementalFrom, p.ExecCfg()) @@ -727,12 +742,6 @@ func (b *backupResumer) Resume(ctx context.Context, execCtx interface{}) error { return err } - if err := insqlDB.Txn(ctx, func(ctx context.Context, txn isql.Txn) error { - return planSchedulePTSChaining(ctx, p.ExecCfg().JobsKnobs(), txn, &details, b.job.CreatedBy()) - }); err != nil { - return err - } - // The description picked during original planning might still say "LATEST", // if resolving that to the actual directory only just happened above here. // Ideally we'd re-render the description now that we know the subdir, but