From 6626cd038dd6f6664182264aa1d8e58b6db9dc16 Mon Sep 17 00:00:00 2001 From: Scott Seago Date: Wed, 13 Sep 2023 13:24:09 -0400 Subject: [PATCH] issue #6807: Retry failed create when using generateName When creating resources with generateName, apimachinery does not guarantee uniqueness when it appends the random suffix to the generateName stub, so if it fails with already exists error, we need to retry. Signed-off-by: Scott Seago --- changelogs/unreleased/6943-sseago | 1 + pkg/client/retry.go | 44 +++++++++++++++++++++++ pkg/cmd/cli/backup/delete.go | 2 +- pkg/cmd/cli/serverstatus/server_status.go | 3 +- pkg/controller/gc_controller.go | 3 +- pkg/podvolume/backupper.go | 7 +++- pkg/podvolume/restorer.go | 6 +++- pkg/repository/ensurer.go | 3 +- pkg/restore/dataupload_retrieve_action.go | 3 +- 9 files changed, 65 insertions(+), 7 deletions(-) create mode 100644 changelogs/unreleased/6943-sseago create mode 100644 pkg/client/retry.go diff --git a/changelogs/unreleased/6943-sseago b/changelogs/unreleased/6943-sseago new file mode 100644 index 00000000000..2055df15a29 --- /dev/null +++ b/changelogs/unreleased/6943-sseago @@ -0,0 +1 @@ +Retry failed create when using generateName diff --git a/pkg/client/retry.go b/pkg/client/retry.go new file mode 100644 index 00000000000..f9674e1edd5 --- /dev/null +++ b/pkg/client/retry.go @@ -0,0 +1,44 @@ +/* +Copyright the Velero contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package client + +import ( + "context" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/client-go/util/retry" + kbclient "sigs.k8s.io/controller-runtime/pkg/client" +) + +func CreateRetryGenerateName(client kbclient.Client, ctx context.Context, obj kbclient.Object) error { + return CreateRetryGenerateNameWithFunc(obj, func() error { + return client.Create(ctx, obj, &kbclient.CreateOptions{}) + }) +} + +func CreateRetryGenerateNameWithFunc(obj kbclient.Object, createFn func() error) error { + retryCreateFn := func() error { + // needed to ensure that the name from the failed create isn't left on the object between retries + obj.SetName("") + return createFn() + } + if obj.GetGenerateName() != "" && obj.GetName() == "" { + return retry.OnError(retry.DefaultRetry, apierrors.IsAlreadyExists, retryCreateFn) + } else { + return createFn() + } +} diff --git a/pkg/cmd/cli/backup/delete.go b/pkg/cmd/cli/backup/delete.go index 5d235bd72f1..4c81477585a 100644 --- a/pkg/cmd/cli/backup/delete.go +++ b/pkg/cmd/cli/backup/delete.go @@ -124,7 +124,7 @@ func Run(o *cli.DeleteOptions) error { ObjectMeta(builder.WithLabels(velerov1api.BackupNameLabel, label.GetValidName(b.Name), velerov1api.BackupUIDLabel, string(b.UID)), builder.WithGenerateName(b.Name+"-")).Result() - if err := o.Client.Create(context.TODO(), deleteRequest, &controllerclient.CreateOptions{}); err != nil { + if err := client.CreateRetryGenerateName(o.Client, context.TODO(), deleteRequest); err != nil { errs = append(errs, err) continue } diff --git a/pkg/cmd/cli/serverstatus/server_status.go b/pkg/cmd/cli/serverstatus/server_status.go index 5bf99aff336..628c4a54534 100644 --- a/pkg/cmd/cli/serverstatus/server_status.go +++ b/pkg/cmd/cli/serverstatus/server_status.go @@ -26,6 +26,7 @@ import ( velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" "github.com/vmware-tanzu/velero/pkg/builder" + veleroclient "github.com/vmware-tanzu/velero/pkg/client" ) type Getter interface { @@ -40,7 +41,7 @@ type DefaultServerStatusGetter struct { func (g *DefaultServerStatusGetter) GetServerStatus(kbClient kbclient.Client) (*velerov1api.ServerStatusRequest, error) { created := builder.ForServerStatusRequest(g.Namespace, "", "0").ObjectMeta(builder.WithGenerateName("velero-cli-")).Result() - if err := kbClient.Create(context.Background(), created, &kbclient.CreateOptions{}); err != nil { + if err := veleroclient.CreateRetryGenerateName(kbClient, context.Background(), created); err != nil { return nil, errors.WithStack(err) } diff --git a/pkg/controller/gc_controller.go b/pkg/controller/gc_controller.go index 3d1fe48c743..9d38e5d1b75 100644 --- a/pkg/controller/gc_controller.go +++ b/pkg/controller/gc_controller.go @@ -32,6 +32,7 @@ import ( velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" pkgbackup "github.com/vmware-tanzu/velero/pkg/backup" + veleroclient "github.com/vmware-tanzu/velero/pkg/client" "github.com/vmware-tanzu/velero/pkg/label" "github.com/vmware-tanzu/velero/pkg/util/kube" ) @@ -187,7 +188,7 @@ func (c *gcReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Re log.Info("Creating a new deletion request") ndbr := pkgbackup.NewDeleteBackupRequest(backup.Name, string(backup.UID)) ndbr.SetNamespace(backup.Namespace) - if err := c.Create(ctx, ndbr); err != nil { + if err := veleroclient.CreateRetryGenerateName(c, ctx, ndbr); err != nil { log.WithError(err).Error("error creating DeleteBackupRequests") return ctrl.Result{}, errors.Wrap(err, "error creating DeleteBackupRequest") } diff --git a/pkg/podvolume/backupper.go b/pkg/podvolume/backupper.go index 2955b06b507..e6fe25e2458 100644 --- a/pkg/podvolume/backupper.go +++ b/pkg/podvolume/backupper.go @@ -31,6 +31,7 @@ import ( "github.com/vmware-tanzu/velero/internal/resourcepolicies" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" + veleroclient "github.com/vmware-tanzu/velero/pkg/client" clientset "github.com/vmware-tanzu/velero/pkg/generated/clientset/versioned" "github.com/vmware-tanzu/velero/pkg/label" "github.com/vmware-tanzu/velero/pkg/nodeagent" @@ -297,7 +298,11 @@ func (b *backupper) BackupPodVolumes(backup *velerov1api.Backup, pod *corev1api. } volumeBackup := newPodVolumeBackup(backup, pod, volume, repo.Spec.ResticIdentifier, b.uploaderType, pvc) - if _, err = b.veleroClient.VeleroV1().PodVolumeBackups(volumeBackup.Namespace).Create(context.TODO(), volumeBackup, metav1.CreateOptions{}); err != nil { + // TODO: once backupper is refactored to use controller-runtime, just pass client instead of anonymous func + if err := veleroclient.CreateRetryGenerateNameWithFunc(volumeBackup, func() error { + _, err := b.veleroClient.VeleroV1().PodVolumeBackups(volumeBackup.Namespace).Create(context.TODO(), volumeBackup, metav1.CreateOptions{}) + return err + }); err != nil { errs = append(errs, err) continue } diff --git a/pkg/podvolume/restorer.go b/pkg/podvolume/restorer.go index 0011f3d473d..7befbf1f6d8 100644 --- a/pkg/podvolume/restorer.go +++ b/pkg/podvolume/restorer.go @@ -31,6 +31,7 @@ import ( "k8s.io/client-go/tools/cache" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" + veleroclient "github.com/vmware-tanzu/velero/pkg/client" clientset "github.com/vmware-tanzu/velero/pkg/generated/clientset/versioned" "github.com/vmware-tanzu/velero/pkg/label" "github.com/vmware-tanzu/velero/pkg/nodeagent" @@ -172,7 +173,10 @@ func (r *restorer) RestorePodVolumes(data RestoreData) []error { volumeRestore := newPodVolumeRestore(data.Restore, data.Pod, data.BackupLocation, volume, backupInfo.snapshotID, repo.Spec.ResticIdentifier, backupInfo.uploaderType, data.SourceNamespace, pvc) - if err := errorOnly(r.veleroClient.VeleroV1().PodVolumeRestores(volumeRestore.Namespace).Create(context.TODO(), volumeRestore, metav1.CreateOptions{})); err != nil { + // TODO: once restorer is refactored to use controller-runtime, just pass client instead of anonymous func + if err := veleroclient.CreateRetryGenerateNameWithFunc(volumeRestore, func() error { + return errorOnly(r.veleroClient.VeleroV1().PodVolumeRestores(volumeRestore.Namespace).Create(context.TODO(), volumeRestore, metav1.CreateOptions{})) + }); err != nil { errs = append(errs, errors.WithStack(err)) continue } diff --git a/pkg/repository/ensurer.go b/pkg/repository/ensurer.go index 885363328ce..255f49d038c 100644 --- a/pkg/repository/ensurer.go +++ b/pkg/repository/ensurer.go @@ -28,6 +28,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" + veleroclient "github.com/vmware-tanzu/velero/pkg/client" ) // Ensurer ensures that backup repositories are created and ready. @@ -107,7 +108,7 @@ func (r *Ensurer) repoLock(key BackupRepositoryKey) *sync.Mutex { func (r *Ensurer) createBackupRepositoryAndWait(ctx context.Context, namespace string, backupRepoKey BackupRepositoryKey) (*velerov1api.BackupRepository, error) { toCreate := NewBackupRepository(namespace, backupRepoKey) - if err := r.repoClient.Create(ctx, toCreate, &client.CreateOptions{}); err != nil { + if err := veleroclient.CreateRetryGenerateName(r.repoClient, ctx, toCreate); err != nil { return nil, errors.Wrap(err, "unable to create backup repository resource") } diff --git a/pkg/restore/dataupload_retrieve_action.go b/pkg/restore/dataupload_retrieve_action.go index c345e908ff0..79193411e7e 100644 --- a/pkg/restore/dataupload_retrieve_action.go +++ b/pkg/restore/dataupload_retrieve_action.go @@ -30,6 +30,7 @@ import ( velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" velerov2alpha1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v2alpha1" + veleroclient "github.com/vmware-tanzu/velero/pkg/client" "github.com/vmware-tanzu/velero/pkg/label" "github.com/vmware-tanzu/velero/pkg/plugin/velero" ) @@ -104,7 +105,7 @@ func (d *DataUploadRetrieveAction) Execute(input *velero.RestoreItemActionExecut }, } - err = d.client.Create(context.Background(), &cm, &client.CreateOptions{}) + err = veleroclient.CreateRetryGenerateName(d.client, context.Background(), &cm) if err != nil { d.logger.Errorf("fail to create DataUploadResult ConfigMap %s/%s: %s", cm.Namespace, cm.Name, err.Error()) return nil, errors.Wrap(err, "fail to create DataUploadResult ConfigMap")