Skip to content

Commit

Permalink
ebs br: fail the ebs volume backup when some backup member tc is inva…
Browse files Browse the repository at this point in the history
…lid (#6105)

Signed-off-by: BornChanger <[email protected]>
Co-authored-by: BornChanger <[email protected]>
  • Loading branch information
ti-chi-bot and BornChanger authored Mar 4, 2025
1 parent c763208 commit 55a5d62
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 13 deletions.
6 changes: 3 additions & 3 deletions pkg/backup/backup/backup_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ func (bm *backupManager) syncBackupJob(backup *v1alpha1.Backup) error {
}
}

if v1alpha1.IsBackupComplete(backup) || v1alpha1.IsBackupFailed(backup) {
if v1alpha1.IsBackupComplete(backup) || v1alpha1.IsBackupFailed(backup) || v1alpha1.IsBackupInvalid(backup) {
return nil
}

Expand Down Expand Up @@ -1161,8 +1161,8 @@ func (bm *backupManager) teardownVolumeBackup(backup *v1alpha1.Backup) (err erro
return
}

// if backup is failed or complete, just delete job, not modify status
if v1alpha1.IsBackupFailed(backup) || v1alpha1.IsBackupComplete(backup) {
// if backup is failed, complete or invalid, just delete job, not modify status
if v1alpha1.IsBackupFailed(backup) || v1alpha1.IsBackupComplete(backup) || v1alpha1.IsBackupInvalid(backup) {
return
}
backupCondition := v1alpha1.BackupComplete
Expand Down
6 changes: 3 additions & 3 deletions pkg/backup/backupschedule/backup_schedule_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ func (bm *backupScheduleManager) canPerformNextBackup(bs *v1alpha1.BackupSchedul
return fmt.Errorf("backup schedule %s/%s, get backup %s failed, err: %v", ns, bsName, bs.Status.LastBackup, err)
}

if v1alpha1.IsBackupComplete(backup) || (v1alpha1.IsBackupScheduled(backup) && v1alpha1.IsBackupFailed(backup)) {
if v1alpha1.IsBackupComplete(backup) || (v1alpha1.IsBackupScheduled(backup) && v1alpha1.IsBackupFailed(backup)) || v1alpha1.IsBackupInvalid(backup) {
return nil
}
// skip this sync round of the backup schedule and waiting the last backup.
Expand Down Expand Up @@ -451,8 +451,8 @@ func separateSnapshotBackupsAndLogBackup(backupsList []*v1alpha1.Backup) ([]*v1a
logBackup = backup
continue
}
// Completed or failed backups will be GC'ed
if !(v1alpha1.IsBackupFailed(backup) || v1alpha1.IsBackupComplete(backup)) {
// Completed, failed or invalid backups will be GC'ed
if !(v1alpha1.IsBackupFailed(backup) || v1alpha1.IsBackupComplete(backup) || v1alpha1.IsBackupInvalid(backup)) {
continue
}
ascBackupList = append(ascBackupList, backup)
Expand Down
11 changes: 11 additions & 0 deletions pkg/backup/backupschedule/backup_schedule_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,17 @@ func TestManager(t *testing.T) {
g.Expect(err).Should(BeNil())
helper.deleteBackup(bk)

// test last backup invalid state
bk.Status.Conditions = nil
bk.Status.Conditions = append(bk.Status.Conditions, v1alpha1.BackupCondition{
Type: v1alpha1.BackupInvalid,
Status: v1.ConditionTrue,
})
helper.createBackup(bk)
err = m.canPerformNextBackup(bs)
g.Expect(err).Should(BeNil())
helper.deleteBackup(bk)

// test last backup failed state and not scheduled yet
bk.Status.Conditions = nil
bk.Status.Conditions = append(bk.Status.Conditions, v1alpha1.BackupCondition{
Expand Down
4 changes: 2 additions & 2 deletions pkg/controller/backup/backup_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,8 +234,8 @@ func (c *Controller) updateBackup(cur interface{}) {
return
}

if v1alpha1.IsBackupScheduled(newBackup) || v1alpha1.IsBackupRunning(newBackup) || v1alpha1.IsBackupPrepared(newBackup) || v1alpha1.IsBackupFailed(newBackup) {
klog.V(4).Infof("backup %s/%s is already Scheduled, Running, Preparing or Failed, skipping.", ns, name)
if v1alpha1.IsBackupScheduled(newBackup) || v1alpha1.IsBackupRunning(newBackup) || v1alpha1.IsBackupPrepared(newBackup) || v1alpha1.IsBackupFailed(newBackup) || v1alpha1.IsBackupInvalid(newBackup) {
klog.V(4).Infof("backup %s/%s is already Scheduled, Running, Preparing, Failed or InValid, skipping.", ns, name)
return
}

Expand Down
15 changes: 11 additions & 4 deletions pkg/fedvolumebackup/backup/backup_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ func (bm *backupManager) waitBackupMemberInitialized(ctx context.Context, volume
Message: errMsg,
}
}
if pingcapv1alpha1.IsBackupFailed(backupMember.backup) {
if pingcapv1alpha1.IsBackupFailed(backupMember.backup) || pingcapv1alpha1.IsBackupInvalid(backupMember.backup) {
errMsg := fmt.Sprintf("backup member %s of cluster %s failed", backupMember.backup.Name, backupMember.k8sClusterName)
return &fedvolumebackup.BRDataPlaneFailedError{
Reason: reasonVolumeBackupMemberFailed,
Expand Down Expand Up @@ -343,7 +343,8 @@ func (bm *backupManager) waitVolumeSnapshotsCreated(backupMembers []*volumeBacku
for _, backupMember := range backupMembers {
if pingcapv1alpha1.IsVolumeBackupInitializeFailed(backupMember.backup) ||
pingcapv1alpha1.IsVolumeBackupFailed(backupMember.backup) ||
pingcapv1alpha1.IsBackupFailed(backupMember.backup) {
pingcapv1alpha1.IsBackupFailed(backupMember.backup) ||
pingcapv1alpha1.IsBackupInvalid(backupMember.backup) {
errMsg := fmt.Sprintf("backup member %s of cluster %s failed", backupMember.backup.Name, backupMember.k8sClusterName)
return &fedvolumebackup.BRDataPlaneFailedError{
Reason: reasonVolumeBackupMemberFailed,
Expand All @@ -363,7 +364,8 @@ func (bm *backupManager) waitBackupMemberInitializeComplete(volumeBackup *v1alph
for _, backupMember := range backupMembers {
if pingcapv1alpha1.IsVolumeBackupInitializeFailed(backupMember.backup) ||
pingcapv1alpha1.IsVolumeBackupFailed(backupMember.backup) ||
pingcapv1alpha1.IsBackupFailed(backupMember.backup) {
pingcapv1alpha1.IsBackupFailed(backupMember.backup) ||
pingcapv1alpha1.IsBackupInvalid(backupMember.backup) {
errMsg := fmt.Sprintf("backup member %s of cluster %s failed", backupMember.backup.Name, backupMember.k8sClusterName)
return &fedvolumebackup.BRDataPlaneFailedError{
Reason: reasonVolumeBackupMemberFailed,
Expand All @@ -385,7 +387,8 @@ func (bm *backupManager) waitVolumeSnapshotsComplete(backupMembers []*volumeBack
for _, backupMember := range backupMembers {
if pingcapv1alpha1.IsVolumeBackupInitializeFailed(backupMember.backup) ||
pingcapv1alpha1.IsVolumeBackupFailed(backupMember.backup) ||
pingcapv1alpha1.IsBackupFailed(backupMember.backup) {
pingcapv1alpha1.IsBackupFailed(backupMember.backup) ||
pingcapv1alpha1.IsBackupInvalid(backupMember.backup) {
errMsg := fmt.Sprintf("backup member %s of cluster %s failed", backupMember.backup.Name, backupMember.k8sClusterName)
return &fedvolumebackup.BRDataPlaneFailedError{
Reason: reasonVolumeBackupMemberFailed,
Expand Down Expand Up @@ -437,6 +440,10 @@ func (bm *backupManager) waitVolumeBackupComplete(ctx context.Context, volumeBac
failedBackups = append(failedBackups, backupMember)
klog.Errorf("VolumeBackup %s/%s backup member %s of cluster %s is failed",
volumeBackup.Namespace, volumeBackup.Name, backupMember.backup.Name, backupMember.k8sClusterName)
} else if pingcapv1alpha1.IsBackupInvalid(backupMember.backup) {
failedBackups = append(failedBackups, backupMember)
klog.Errorf("VolumeBackup %s/%s backup member %s of cluster %s is invalid",
volumeBackup.Namespace, volumeBackup.Name, backupMember.backup.Name, backupMember.k8sClusterName)
} else if !pingcapv1alpha1.IsBackupComplete(backupMember.backup) {
isBackupRunning = true
klog.Infof(
Expand Down
37 changes: 37 additions & 0 deletions pkg/fedvolumebackup/backup/backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,17 @@ func (h *helper) setDataPlaneFailed(ctx context.Context) {
h.g.Expect(err).To(gomega.BeNil())
}

func (h *helper) setDataPlaneFailed4Invalid(ctx context.Context) {
backupMember1, err := h.dataPlaneClient1.PingcapV1alpha1().Backups(fakeTcNamespace1).Get(ctx, h.backupMemberName1, metav1.GetOptions{})
h.g.Expect(err).To(gomega.BeNil())
pingcapv1alpha1.UpdateBackupCondition(&backupMember1.Status, &pingcapv1alpha1.BackupCondition{
Status: corev1.ConditionTrue,
Type: pingcapv1alpha1.BackupInvalid,
})
_, err = h.dataPlaneClient1.PingcapV1alpha1().Backups(fakeTcNamespace1).UpdateStatus(ctx, backupMember1, metav1.UpdateOptions{})
h.g.Expect(err).To(gomega.BeNil())
}

func (h *helper) setAllDataPlanesFailed(ctx context.Context) {
backupMember1, err := h.dataPlaneClient1.PingcapV1alpha1().Backups(fakeTcNamespace1).Get(ctx, h.backupMemberName1, metav1.GetOptions{})
h.g.Expect(err).To(gomega.BeNil())
Expand Down Expand Up @@ -450,6 +461,32 @@ func TestVolumeBackupVolumeFailed(t *testing.T) {
h.assertFailed(volumeBackup)
}

func TestVolumeBackupFailed4Invalid(t *testing.T) {
ctx := context.Background()
backupName := "backup-2"
backupNamespace := "ns-2"
h := newHelper(t, backupName, backupNamespace)

// create volume backup
volumeBackup := h.createVolumeBackup(ctx)

// run initialize phase
err := h.bm.Sync(volumeBackup)
h.g.Expect(err).To(gomega.BeNil())
h.assertRunInitialize(ctx, volumeBackup)

// invalid member backup, run teardown
h.setDataPlaneFailed4Invalid(ctx)
err = h.bm.Sync(volumeBackup)
h.g.Expect(err).To(gomega.BeNil())
h.assertRunTeardown(ctx, volumeBackup, true)

// volume backup failed
err = h.bm.Sync(volumeBackup)
h.g.Expect(err).To(gomega.BeNil())
h.assertFailed(volumeBackup)
}

func generateVolumeBackup(backupName, backupNamespace string) *v1alpha1.VolumeBackup {
return &v1alpha1.VolumeBackup{
ObjectMeta: metav1.ObjectMeta{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ func TestCalculateExpiredBackups(t *testing.T) {
// 3 backups should be deleted
{
backups: []*v1alpha1.VolumeBackup{
fakeBackup(&last3Day),
fakeFailedBackup(&last3Day),
fakeBackup(&last2Day),
fakeBackup(&last1Day),
fakeBackup(&last10Min),
Expand Down Expand Up @@ -573,6 +573,19 @@ func fakeBackup(ts *time.Time) *v1alpha1.VolumeBackup {
return backup
}

func fakeFailedBackup(ts *time.Time) *v1alpha1.VolumeBackup {
backup := &v1alpha1.VolumeBackup{}
if ts == nil {
return backup
}
backup.CreationTimestamp = metav1.Time{Time: *ts}
backup.Status.Conditions = append(backup.Status.Conditions, v1alpha1.VolumeBackupCondition{
Type: v1alpha1.VolumeBackupFailed,
Status: v1.ConditionTrue,
})
return backup
}

func getTSOStr(ts int64) string {
tso := getTSO(ts)
return strconv.FormatUint(tso, 10)
Expand Down

0 comments on commit 55a5d62

Please sign in to comment.