diff --git a/api/v1alpha1/scheduledvolumesnapshot_types.go b/api/v1alpha1/scheduledvolumesnapshot_types.go index 6bf46f18..2d7e0669 100644 --- a/api/v1alpha1/scheduledvolumesnapshot_types.go +++ b/api/v1alpha1/scheduledvolumesnapshot_types.go @@ -166,6 +166,9 @@ const ( // SnapshotPhaseSuspended means the controller is not creating snapshots. Suspended by the user. SnapshotPhaseSuspended SnapshotPhase = "Suspended" + + // SnapshotPhaseMissingCRDs means the controller is not creating snapshots. The required VolumeSnapshot CRDs are missing. + SnapshotPhaseMissingCRDs SnapshotPhase = "MissingCRDs" ) type VolumeSnapshotStatus struct { diff --git a/controllers/scheduledvolumesnapshot_controller.go b/controllers/scheduledvolumesnapshot_controller.go index a7e7d95d..cfd8db72 100644 --- a/controllers/scheduledvolumesnapshot_controller.go +++ b/controllers/scheduledvolumesnapshot_controller.go @@ -36,10 +36,11 @@ import ( // ScheduledVolumeSnapshotReconciler reconciles a ScheduledVolumeSnapshot object type ScheduledVolumeSnapshotReconciler struct { client.Client - fullNodeControl *volsnapshot.FullNodeControl - recorder record.EventRecorder - scheduler *volsnapshot.Scheduler - volSnapshotControl *volsnapshot.VolumeSnapshotControl + fullNodeControl *volsnapshot.FullNodeControl + missingVolSnapshotCRD bool + recorder record.EventRecorder + scheduler *volsnapshot.Scheduler + volSnapshotControl *volsnapshot.VolumeSnapshotControl } func NewScheduledVolumeSnapshotReconciler( @@ -47,13 +48,15 @@ func NewScheduledVolumeSnapshotReconciler( recorder record.EventRecorder, statusClient *fullnode.StatusClient, cache *cosmos.CacheController, + missingVolSnapCRD bool, ) *ScheduledVolumeSnapshotReconciler { return &ScheduledVolumeSnapshotReconciler{ - Client: client, - fullNodeControl: volsnapshot.NewFullNodeControl(statusClient, client), - recorder: recorder, - scheduler: volsnapshot.NewScheduler(client), - volSnapshotControl: volsnapshot.NewVolumeSnapshotControl(client, cache), + Client: client, + fullNodeControl: volsnapshot.NewFullNodeControl(statusClient, client), + missingVolSnapshotCRD: missingVolSnapCRD, + recorder: recorder, + scheduler: volsnapshot.NewScheduler(client), + volSnapshotControl: volsnapshot.NewVolumeSnapshotControl(client, cache), } } @@ -83,6 +86,13 @@ func (r *ScheduledVolumeSnapshotReconciler) Reconcile(ctx context.Context, req c volsnapshot.ResetStatus(crd) defer r.updateStatus(ctx, crd) + if r.missingVolSnapshotCRD { + logger.Error(errMissingVolSnapCRD, "Controller is disabled") + r.reportError(crd, "MissingCRDs", errMissingVolSnapCRD) + crd.Status.Phase = cosmosv1alpha1.SnapshotPhaseMissingCRDs + return ctrl.Result{}, nil + } + retryResult := ctrl.Result{RequeueAfter: 10 * time.Second} phase := crd.Status.Phase diff --git a/controllers/statefuljob_controller.go b/controllers/statefuljob_controller.go index ac4c8b49..968e8bb5 100644 --- a/controllers/statefuljob_controller.go +++ b/controllers/statefuljob_controller.go @@ -18,6 +18,7 @@ package controllers import ( "context" + "errors" "fmt" "time" @@ -37,17 +38,39 @@ import ( "sigs.k8s.io/controller-runtime/pkg/source" ) +var errMissingVolSnapCRD = errors.New("cluster does not have VolumeSnapshot CRDs installed") + +// IndexVolumeSnapshots indexes all VolumeSnapshots by name. Exposed as a separate method so caller can +// test for presence of VolumeSnapshot CRDs in the cluster. +func IndexVolumeSnapshots(ctx context.Context, mgr ctrl.Manager) error { + // Index all VolumeSnapshots. Controller does not own any because it does not create them. + if err := mgr.GetFieldIndexer().IndexField( + ctx, + &snapshotv1.VolumeSnapshot{}, + ".metadata.name", + func(object client.Object) []string { + return []string{object.GetName()} + }, + ); err != nil { + return fmt.Errorf("volume snapshot index: %w", err) + } + return nil +} + // StatefulJobReconciler reconciles a StatefulJob object. type StatefulJobReconciler struct { client.Client - recorder record.EventRecorder + recorder record.EventRecorder + missingVolSnapshotCRD bool } -// NewStatefulJob returns a valid controller. -func NewStatefulJob(client client.Client, recorder record.EventRecorder) *StatefulJobReconciler { +// NewStatefulJob returns a valid controller. If missingVolSnapCRD is true, the controller errors on every reconcile loop +// and will not function. +func NewStatefulJob(client client.Client, recorder record.EventRecorder, missingVolSnapCRD bool) *StatefulJobReconciler { return &StatefulJobReconciler{ - Client: client, - recorder: recorder, + Client: client, + recorder: recorder, + missingVolSnapshotCRD: missingVolSnapCRD, } } @@ -78,6 +101,11 @@ func (r *StatefulJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) return requeueStatefulJob, kube.IgnoreNotFound(err) } + if r.missingVolSnapshotCRD { + r.reportErr(logger, crd, errMissingVolSnapCRD) + return ctrl.Result{}, nil + } + crd.Status.ObservedGeneration = crd.Generation crd.Status.StatusMessage = nil defer r.updateStatus(ctx, crd) @@ -170,20 +198,8 @@ func (r *StatefulJobReconciler) updateStatus(ctx context.Context, crd *cosmosalp } } -// SetupWithManager sets up the controller with the Manager. +// SetupWithManager sets up the controller with the Manager. IndexVolumeSnapshots should be called first. func (r *StatefulJobReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error { - // Index all VolumeSnapshots. Controller does not own any because it does not create them. - if err := mgr.GetFieldIndexer().IndexField( - ctx, - &snapshotv1.VolumeSnapshot{}, - ".metadata.name", - func(object client.Object) []string { - return []string{object.GetName()} - }, - ); err != nil { - return fmt.Errorf("VolumeSnapshot index: %w", err) - } - cbuilder := ctrl.NewControllerManagedBy(mgr).For(&cosmosalpha.StatefulJob{}) // Watch for delete events for jobs. diff --git a/main.go b/main.go index 9cc64cc4..ae8d31d0 100644 --- a/main.go +++ b/main.go @@ -195,24 +195,34 @@ func startManager(cmd *cobra.Command, args []string) error { return fmt.Errorf("unable to create SelfHealing controller: %w", err) } + // Test for presence of VolumeSnapshot CRD. + snapshotErr := controllers.IndexVolumeSnapshots(ctx, mgr) + if snapshotErr != nil { + setupLog.Info("Warning: VolumeSnapshot CRD not found, StatefulJob and ScheduledVolumeSnapshot controllers will be disabled") + } + + // StatefulJobs + jobCtl := controllers.NewStatefulJob( + mgr.GetClient(), + mgr.GetEventRecorderFor(cosmosv1alpha1.StatefulJobController), + snapshotErr != nil, + ) + + if err = jobCtl.SetupWithManager(ctx, mgr); err != nil { + return fmt.Errorf("unable to create StatefulJob controller: %w", err) + } + // ScheduledVolumeSnapshots if err = controllers.NewScheduledVolumeSnapshotReconciler( mgr.GetClient(), mgr.GetEventRecorderFor(cosmosv1alpha1.ScheduledVolumeSnapshotController), statusClient, cacheController, + snapshotErr != nil, ).SetupWithManager(ctx, mgr); err != nil { return fmt.Errorf("unable to create ScheduledVolumeSnapshot controller: %w", err) } - // StatefulJobs - if err = controllers.NewStatefulJob( - mgr.GetClient(), - mgr.GetEventRecorderFor(cosmosv1alpha1.StatefulJobController), - ).SetupWithManager(ctx, mgr); err != nil { - return fmt.Errorf("unable to create StatefulJob controller: %w", err) - } - //+kubebuilder:scaffold:builder if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {