From ca07a17479aaa051c5ddef5e2a5f793925482365 Mon Sep 17 00:00:00 2001 From: haijianyang Date: Wed, 10 Apr 2024 06:38:14 -0400 Subject: [PATCH] Support contextual logging --- controllers/elfcluster_controller.go | 187 ++- controllers/elfcluster_controller_test.go | 124 +- controllers/elfmachine_controller.go | 760 +++++------ controllers/elfmachine_controller_gpu.go | 121 +- controllers/elfmachine_controller_gpu_test.go | 176 ++- .../elfmachine_controller_placement_group.go | 220 ++-- controllers/elfmachine_controller_test.go | 1163 ++++++++--------- controllers/suite_test.go | 39 +- controllers/tower_cache.go | 40 +- controllers/tower_cache_test.go | 76 +- main.go | 15 +- pkg/context/cluster_context.go | 6 +- pkg/context/controller_context.go | 39 - pkg/context/controller_manager_context.go | 10 - pkg/context/machine_context.go | 8 +- pkg/manager/manager.go | 25 +- pkg/manager/options.go | 4 +- pkg/util/machine/kcp_test.go | 8 +- pkg/util/machine/machine_test.go | 11 +- pkg/util/machine/md_test.go | 6 +- test/fake/controller_manager_context.go | 8 +- test/fake/types.go | 20 +- test/helpers/envtest.go | 6 +- 23 files changed, 1502 insertions(+), 1570 deletions(-) delete mode 100644 pkg/context/controller_context.go diff --git a/controllers/elfcluster_controller.go b/controllers/elfcluster_controller.go index 9e50fa10..5147a0f4 100644 --- a/controllers/elfcluster_controller.go +++ b/controllers/elfcluster_controller.go @@ -20,11 +20,11 @@ import ( goctx "context" "fmt" "reflect" - "strings" "github.com/pkg/errors" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog/v2" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" capiutil "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" @@ -56,24 +56,16 @@ import ( // AddClusterControllerToManager adds the cluster controller to the provided // manager. -func AddClusterControllerToManager(ctx *context.ControllerManagerContext, mgr ctrlmgr.Manager, options controller.Options) error { +func AddClusterControllerToManager(ctx goctx.Context, ctrlMgrCtx *context.ControllerManagerContext, mgr ctrlmgr.Manager, options controller.Options) error { var ( clusterControlledType = &infrav1.ElfCluster{} clusterControlledTypeName = reflect.TypeOf(clusterControlledType).Elem().Name() clusterControlledTypeGVK = infrav1.GroupVersion.WithKind(clusterControlledTypeName) - controllerNameShort = fmt.Sprintf("%s-controller", strings.ToLower(clusterControlledTypeName)) ) - // Build the controller context. - controllerContext := &context.ControllerContext{ - ControllerManagerContext: ctx, - Name: controllerNameShort, - Logger: ctx.Logger.WithName(controllerNameShort), - } - reconciler := &ElfClusterReconciler{ - ControllerContext: controllerContext, - NewVMService: service.NewVMService, + ControllerManagerContext: ctrlMgrCtx, + NewVMService: service.NewVMService, } return ctrl.NewControllerManagedBy(mgr). @@ -84,24 +76,26 @@ func AddClusterControllerToManager(ctx *context.ControllerManagerContext, mgr ct &clusterv1.Cluster{}, handler.EnqueueRequestsFromMapFunc(capiutil.ClusterToInfrastructureMapFunc(ctx, clusterControlledTypeGVK, mgr.GetClient(), &infrav1.ElfCluster{})), ). - WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), ctx.WatchFilterValue)). + WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), ctrlMgrCtx.WatchFilterValue)). WithOptions(options). Complete(reconciler) } // ElfClusterReconciler reconciles a ElfCluster object. type ElfClusterReconciler struct { - *context.ControllerContext + *context.ControllerManagerContext NewVMService service.NewVMServiceFunc } // Reconcile ensures the back-end state reflects the Kubernetes resource state intent. func (r *ElfClusterReconciler) Reconcile(ctx goctx.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { + log := ctrl.LoggerFrom(ctx) + // Get the ElfCluster resource for this request. var elfCluster infrav1.ElfCluster - if err := r.Client.Get(r, req.NamespacedName, &elfCluster); err != nil { + if err := r.Client.Get(ctx, req.NamespacedName, &elfCluster); err != nil { if apierrors.IsNotFound(err) { - r.Logger.Info("ElfCluster not found, won't reconcile", "key", req.NamespacedName) + log.Info("ElfCluster not found, won't reconcile", "key", req.NamespacedName) return reconcile.Result{}, nil } @@ -110,22 +104,20 @@ func (r *ElfClusterReconciler) Reconcile(ctx goctx.Context, req ctrl.Request) (_ } // Fetch the CAPI Cluster. - cluster, err := capiutil.GetOwnerCluster(r, r.Client, elfCluster.ObjectMeta) + cluster, err := capiutil.GetOwnerCluster(ctx, r.Client, elfCluster.ObjectMeta) if err != nil { return reconcile.Result{}, err } if cluster == nil { - r.Logger.Info("Waiting for Cluster Controller to set OwnerRef on ElfCluster", - "namespace", elfCluster.Namespace, - "elfCluster", elfCluster.Name) + log.Info("Waiting for Cluster Controller to set OwnerRef on ElfCluster") return reconcile.Result{}, nil } + log = log.WithValues("Cluster", klog.KObj(cluster)) + ctx = ctrl.LoggerInto(ctx, log) if annotations.IsPaused(cluster, &elfCluster) { - r.Logger.V(4).Info("ElfCluster linked to a cluster that is paused", - "namespace", elfCluster.Namespace, - "elfCluster", elfCluster.Name) + log.V(4).Info("ElfCluster linked to a cluster that is paused") return reconcile.Result{}, nil } @@ -133,29 +125,20 @@ func (r *ElfClusterReconciler) Reconcile(ctx goctx.Context, req ctrl.Request) (_ // Create the patch helper. patchHelper, err := patch.NewHelper(&elfCluster, r.Client) if err != nil { - return reconcile.Result{}, errors.Wrapf( - err, - "failed to init patch helper for %s %s/%s", - elfCluster.GroupVersionKind(), - elfCluster.Namespace, - elfCluster.Name) + return reconcile.Result{}, errors.Wrapf(err, "failed to init patch helper") } // Create the cluster context for this request. - logger := r.Logger.WithValues("namespace", cluster.Namespace, "elfCluster", elfCluster.Name) - - clusterContext := &context.ClusterContext{ - ControllerContext: r.ControllerContext, - Cluster: cluster, - ElfCluster: &elfCluster, - Logger: logger, - PatchHelper: patchHelper, + clusterCtx := &context.ClusterContext{ + Cluster: cluster, + ElfCluster: &elfCluster, + PatchHelper: patchHelper, } // If ElfCluster is being deleting and ForceDeleteCluster flag is set, skip creating the VMService object, // because Tower server may be out of service. So we can force delete ElfCluster. if elfCluster.ObjectMeta.DeletionTimestamp.IsZero() || !elfCluster.HasForceDeleteCluster() { - vmService, err := r.NewVMService(r.Context, elfCluster.GetTower(), logger) + vmService, err := r.NewVMService(ctx, elfCluster.GetTower(), log) if err != nil { conditions.MarkFalse(&elfCluster, infrav1.TowerAvailableCondition, infrav1.TowerUnreachableReason, clusterv1.ConditionSeverityError, err.Error()) @@ -163,117 +146,122 @@ func (r *ElfClusterReconciler) Reconcile(ctx goctx.Context, req ctrl.Request) (_ } conditions.MarkTrue(&elfCluster, infrav1.TowerAvailableCondition) - clusterContext.VMService = vmService + clusterCtx.VMService = vmService } // Always issue a patch when exiting this function so changes to the // resource are patched back to the API server. defer func() { // always update the readyCondition. - conditions.SetSummary(clusterContext.ElfCluster, + conditions.SetSummary(clusterCtx.ElfCluster, conditions.WithConditions( infrav1.ControlPlaneEndpointReadyCondition, infrav1.TowerAvailableCondition, ), ) - if err := clusterContext.Patch(); err != nil { + if err := clusterCtx.Patch(ctx); err != nil { if reterr == nil { reterr = err } - clusterContext.Logger.Error(err, "patch failed", "elfCluster", clusterContext.String()) + clusterCtx.Logger.Error(err, "patch failed", "elfCluster", clusterCtx.String()) } }() // Handle deleted clusters if !elfCluster.DeletionTimestamp.IsZero() { - return r.reconcileDelete(clusterContext) + return r.reconcileDelete(ctx, clusterCtx) } // Handle non-deleted clusters - return r.reconcileNormal(clusterContext) + return r.reconcileNormal(ctx, clusterCtx) } -func (r *ElfClusterReconciler) reconcileDelete(ctx *context.ClusterContext) (reconcile.Result, error) { - ctx.Logger.Info("Reconciling ElfCluster delete") +func (r *ElfClusterReconciler) reconcileDelete(ctx goctx.Context, clusterCtx *context.ClusterContext) (reconcile.Result, error) { + log := ctrl.LoggerFrom(ctx) + log.Info("Reconciling ElfCluster delete") - elfMachines, err := machineutil.GetElfMachinesInCluster(ctx, ctx.Client, ctx.ElfCluster.Namespace, ctx.ElfCluster.Name) + elfMachines, err := machineutil.GetElfMachinesInCluster(ctx, r.Client, clusterCtx.ElfCluster.Namespace, clusterCtx.ElfCluster.Name) if err != nil { return reconcile.Result{}, errors.Wrapf(err, - "Unable to list ElfMachines part of ElfCluster %s/%s", ctx.ElfCluster.Namespace, ctx.ElfCluster.Name) + "Unable to list ElfMachines part of ElfCluster %s", klog.KObj(clusterCtx.ElfCluster)) } if len(elfMachines) > 0 { - ctx.Logger.Info("Waiting for ElfMachines to be deleted", "count", len(elfMachines)) + log.Info("Waiting for ElfMachines to be deleted", "count", len(elfMachines)) return reconcile.Result{RequeueAfter: config.DefaultRequeueTimeout}, nil } // if cluster need to force delete, skipping infra resource deletion and remove the finalizer. - if !ctx.ElfCluster.HasForceDeleteCluster() { - if ok, err := r.reconcileDeleteVMPlacementGroups(ctx); err != nil { + if !clusterCtx.ElfCluster.HasForceDeleteCluster() { + if ok, err := r.reconcileDeleteVMPlacementGroups(ctx, clusterCtx); err != nil { return reconcile.Result{}, errors.Wrapf(err, "failed to delete vm placement groups") } else if !ok { return reconcile.Result{RequeueAfter: config.DefaultRequeueTimeout}, nil } - if err := r.reconcileDeleteLabels(ctx); err != nil { + if err := r.reconcileDeleteLabels(ctx, clusterCtx); err != nil { return reconcile.Result{}, errors.Wrapf(err, "failed to delete labels") } } // Cluster is deleted so remove the finalizer. - ctrlutil.RemoveFinalizer(ctx.ElfCluster, infrav1.ClusterFinalizer) + ctrlutil.RemoveFinalizer(clusterCtx.ElfCluster, infrav1.ClusterFinalizer) return reconcile.Result{}, nil } -func (r *ElfClusterReconciler) reconcileDeleteVMPlacementGroups(ctx *context.ClusterContext) (bool, error) { - placementGroupPrefix := towerresources.GetVMPlacementGroupNamePrefix(ctx.Cluster) - if pgNames, err := ctx.VMService.DeleteVMPlacementGroupsByNamePrefix(ctx, placementGroupPrefix); err != nil { +func (r *ElfClusterReconciler) reconcileDeleteVMPlacementGroups(ctx goctx.Context, clusterCtx *context.ClusterContext) (bool, error) { + log := ctrl.LoggerFrom(ctx) + + placementGroupPrefix := towerresources.GetVMPlacementGroupNamePrefix(clusterCtx.Cluster) + if pgNames, err := clusterCtx.VMService.DeleteVMPlacementGroupsByNamePrefix(ctx, placementGroupPrefix); err != nil { return false, err } else if len(pgNames) > 0 { - ctx.Logger.Info(fmt.Sprintf("Waiting for the placement groups with name prefix %s to be deleted", placementGroupPrefix), "count", len(pgNames)) + log.Info(fmt.Sprintf("Waiting for the placement groups with name prefix %s to be deleted", placementGroupPrefix), "count", len(pgNames)) // Delete placement group caches. delPGCaches(pgNames) return false, nil } else { - ctx.Logger.Info(fmt.Sprintf("The placement groups with name prefix %s are deleted successfully", placementGroupPrefix)) + log.Info(fmt.Sprintf("The placement groups with name prefix %s are deleted successfully", placementGroupPrefix)) } return true, nil } -func (r *ElfClusterReconciler) reconcileDeleteLabels(ctx *context.ClusterContext) error { - if err := r.reconcileDeleteLabel(ctx, towerresources.GetVMLabelClusterName(), ctx.ElfCluster.Name, true); err != nil { +func (r *ElfClusterReconciler) reconcileDeleteLabels(ctx goctx.Context, clusterCtx *context.ClusterContext) error { + if err := r.reconcileDeleteLabel(ctx, clusterCtx, towerresources.GetVMLabelClusterName(), clusterCtx.ElfCluster.Name, true); err != nil { return err } - if err := r.reconcileDeleteLabel(ctx, towerresources.GetVMLabelVIP(), ctx.ElfCluster.Spec.ControlPlaneEndpoint.Host, false); err != nil { + if err := r.reconcileDeleteLabel(ctx, clusterCtx, towerresources.GetVMLabelVIP(), clusterCtx.ElfCluster.Spec.ControlPlaneEndpoint.Host, false); err != nil { return err } - if err := r.reconcileDeleteLabel(ctx, towerresources.GetVMLabelNamespace(), ctx.ElfCluster.Namespace, true); err != nil { + if err := r.reconcileDeleteLabel(ctx, clusterCtx, towerresources.GetVMLabelNamespace(), clusterCtx.ElfCluster.Namespace, true); err != nil { return err } - if err := r.reconcileDeleteLabel(ctx, towerresources.GetVMLabelManaged(), "true", true); err != nil { + if err := r.reconcileDeleteLabel(ctx, clusterCtx, towerresources.GetVMLabelManaged(), "true", true); err != nil { return err } return nil } -func (r *ElfClusterReconciler) reconcileDeleteLabel(ctx *context.ClusterContext, key, value string, strict bool) error { - labelID, err := ctx.VMService.DeleteLabel(key, value, strict) +func (r *ElfClusterReconciler) reconcileDeleteLabel(ctx goctx.Context, clusterCtx *context.ClusterContext, key, value string, strict bool) error { + log := ctrl.LoggerFrom(ctx) + + labelID, err := clusterCtx.VMService.DeleteLabel(key, value, strict) if err != nil { return err } if labelID != "" { - ctx.Logger.Info(fmt.Sprintf("Label %s:%s deleted", key, value), "labelId", labelID) + log.Info(fmt.Sprintf("Label %s:%s deleted", key, value), "labelId", labelID) } return nil @@ -282,73 +270,79 @@ func (r *ElfClusterReconciler) reconcileDeleteLabel(ctx *context.ClusterContext, // cleanOrphanLabels cleans unused labels for Tower every day. // If an error is encountered during the cleanup process, // it will not be retried and will be started again in the next reconcile. -func (r *ElfClusterReconciler) cleanOrphanLabels(ctx *context.ClusterContext) { +func (r *ElfClusterReconciler) cleanOrphanLabels(ctx goctx.Context, clusterCtx *context.ClusterContext) { + log := ctrl.LoggerFrom(ctx) + // Locking ensures that only one coroutine cleans at the same time - if ok := acquireLockForGCTowerLabels(ctx.ElfCluster.Spec.Tower.Server); ok { - defer releaseLockForForGCTowerLabels(ctx.ElfCluster.Spec.Tower.Server) + if ok := acquireLockForGCTowerLabels(clusterCtx.ElfCluster.Spec.Tower.Server); ok { + defer releaseLockForForGCTowerLabels(clusterCtx.ElfCluster.Spec.Tower.Server) } else { return } - ctx.Logger.V(1).Info(fmt.Sprintf("Cleaning orphan labels in Tower %s created by CAPE", ctx.ElfCluster.Spec.Tower.Server)) + log.V(1).Info(fmt.Sprintf("Cleaning orphan labels in Tower %s created by CAPE", clusterCtx.ElfCluster.Spec.Tower.Server)) keys := []string{towerresources.GetVMLabelClusterName(), towerresources.GetVMLabelVIP(), towerresources.GetVMLabelNamespace()} - labelIDs, err := ctx.VMService.CleanUnusedLabels(keys) + labelIDs, err := clusterCtx.VMService.CleanUnusedLabels(keys) if err != nil { - ctx.Logger.Error(err, fmt.Sprintf("Warning: failed to clean orphan labels in Tower %s", ctx.ElfCluster.Spec.Tower.Server)) + log.Error(err, fmt.Sprintf("Warning: failed to clean orphan labels in Tower %s", clusterCtx.ElfCluster.Spec.Tower.Server)) return } - recordGCTimeForTowerLabels(ctx.ElfCluster.Spec.Tower.Server) + recordGCTimeForTowerLabels(clusterCtx.ElfCluster.Spec.Tower.Server) - ctx.Logger.V(1).Info(fmt.Sprintf("Labels of Tower %s are cleaned successfully", ctx.ElfCluster.Spec.Tower.Server), "labelCount", len(labelIDs)) + log.V(1).Info(fmt.Sprintf("Labels of Tower %s are cleaned successfully", clusterCtx.ElfCluster.Spec.Tower.Server), "labelCount", len(labelIDs)) } -func (r *ElfClusterReconciler) reconcileNormal(ctx *context.ClusterContext) (reconcile.Result, error) { //nolint:unparam - ctx.Logger.Info("Reconciling ElfCluster") +func (r *ElfClusterReconciler) reconcileNormal(ctx goctx.Context, clusterCtx *context.ClusterContext) (reconcile.Result, error) { //nolint:unparam + log := ctrl.LoggerFrom(ctx) + log.Info("Reconciling ElfCluster") // If the ElfCluster doesn't have our finalizer, add it. - ctrlutil.AddFinalizer(ctx.ElfCluster, infrav1.ClusterFinalizer) + ctrlutil.AddFinalizer(clusterCtx.ElfCluster, infrav1.ClusterFinalizer) // If the cluster already has ControlPlaneEndpoint set then there is nothing to do. - if ok := r.reconcileControlPlaneEndpoint(ctx); !ok { + if ok := r.reconcileControlPlaneEndpoint(ctx, clusterCtx); !ok { return reconcile.Result{}, nil } // Reconcile the ElfCluster resource's ready state. - ctx.ElfCluster.Status.Ready = true + clusterCtx.ElfCluster.Status.Ready = true // If the cluster is deleted, that's mean that the workload cluster is being deleted - if !ctx.Cluster.DeletionTimestamp.IsZero() { + if !clusterCtx.Cluster.DeletionTimestamp.IsZero() { return reconcile.Result{}, nil } - r.cleanOrphanLabels(ctx) + r.cleanOrphanLabels(ctx, clusterCtx) // Wait until the API server is online and accessible. - if !r.isAPIServerOnline(ctx) { + if !r.isAPIServerOnline(ctx, clusterCtx) { return reconcile.Result{}, nil } return reconcile.Result{}, nil } -func (r *ElfClusterReconciler) reconcileControlPlaneEndpoint(ctx *context.ClusterContext) bool { - if !ctx.ElfCluster.Spec.ControlPlaneEndpoint.IsZero() { - conditions.MarkTrue(ctx.ElfCluster, infrav1.ControlPlaneEndpointReadyCondition) +func (r *ElfClusterReconciler) reconcileControlPlaneEndpoint(ctx goctx.Context, clusterCtx *context.ClusterContext) bool { + if !clusterCtx.ElfCluster.Spec.ControlPlaneEndpoint.IsZero() { + conditions.MarkTrue(clusterCtx.ElfCluster, infrav1.ControlPlaneEndpointReadyCondition) return true } - conditions.MarkFalse(ctx.ElfCluster, infrav1.ControlPlaneEndpointReadyCondition, infrav1.WaitingForVIPReason, clusterv1.ConditionSeverityInfo, "") - ctx.Logger.Info("The ControlPlaneEndpoint of ElfCluster is not set") + log := ctrl.LoggerFrom(ctx) + log.Info("The ControlPlaneEndpoint of ElfCluster is not set") + conditions.MarkFalse(clusterCtx.ElfCluster, infrav1.ControlPlaneEndpointReadyCondition, infrav1.WaitingForVIPReason, clusterv1.ConditionSeverityInfo, "") return false } -func (r *ElfClusterReconciler) isAPIServerOnline(ctx *context.ClusterContext) bool { - if kubeClient, err := util.NewKubeClient(ctx, ctx.Client, ctx.Cluster); err == nil { +func (r *ElfClusterReconciler) isAPIServerOnline(ctx goctx.Context, clusterCtx *context.ClusterContext) bool { + log := ctrl.LoggerFrom(ctx) + + if kubeClient, err := util.NewKubeClient(ctx, r.Client, clusterCtx.Cluster); err == nil { if _, err := kubeClient.CoreV1().Nodes().List(ctx, metav1.ListOptions{}); err == nil { // The target cluster is online. To make sure the correct control // plane endpoint information is logged, it is necessary to fetch @@ -357,18 +351,17 @@ func (r *ElfClusterReconciler) isAPIServerOnline(ctx *context.ClusterContext) bo // ElfCluster resource, as it must have the correct information // if the API server is online. cluster := &clusterv1.Cluster{} - clusterKey := client.ObjectKey{Namespace: ctx.Cluster.Namespace, Name: ctx.Cluster.Name} - if err := ctx.Client.Get(ctx, clusterKey, cluster); err != nil { - cluster = ctx.Cluster.DeepCopy() - cluster.Spec.ControlPlaneEndpoint.Host = ctx.ElfCluster.Spec.ControlPlaneEndpoint.Host - cluster.Spec.ControlPlaneEndpoint.Port = ctx.ElfCluster.Spec.ControlPlaneEndpoint.Port + clusterKey := client.ObjectKey{Namespace: clusterCtx.Cluster.Namespace, Name: clusterCtx.Cluster.Name} + if err := r.Client.Get(ctx, clusterKey, cluster); err != nil { + cluster = clusterCtx.Cluster.DeepCopy() + cluster.Spec.ControlPlaneEndpoint.Host = clusterCtx.ElfCluster.Spec.ControlPlaneEndpoint.Host + cluster.Spec.ControlPlaneEndpoint.Port = clusterCtx.ElfCluster.Spec.ControlPlaneEndpoint.Port - ctx.Logger.Error(err, "failed to get updated cluster object while checking if API server is online") + log.Error(err, "failed to get updated cluster object while checking if API server is online") } - ctx.Logger.Info( + log.Info( "API server is online", - "namespace", cluster.Namespace, "cluster", cluster.Name, "controlPlaneEndpoint", cluster.Spec.ControlPlaneEndpoint.String()) return true diff --git a/controllers/elfcluster_controller_test.go b/controllers/elfcluster_controller_test.go index df7fbe2a..05276f9d 100644 --- a/controllers/elfcluster_controller_test.go +++ b/controllers/elfcluster_controller_test.go @@ -81,13 +81,8 @@ var _ = Describe("ElfClusterReconciler", func() { Context("Reconcile an ElfCluster", func() { It("should not reconcile when ElfCluster not found", func() { - ctrlMgrContext := fake.NewControllerManagerContext() - ctrlContext := &context.ControllerContext{ - ControllerManagerContext: ctrlMgrContext, - Logger: ctrllog.Log, - } - - reconciler := &ElfClusterReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + ctrlMgrCtx := fake.NewControllerManagerContext() + reconciler := &ElfClusterReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: capiutil.ObjectKey(elfCluster)}) Expect(err).ToNot(HaveOccurred()) Expect(result.RequeueAfter).To(BeZero()) @@ -95,13 +90,8 @@ var _ = Describe("ElfClusterReconciler", func() { }) It("should not error and not requeue the request without cluster", func() { - ctrlMgrContext := fake.NewControllerManagerContext(elfCluster) - ctrlContext := &context.ControllerContext{ - ControllerManagerContext: ctrlMgrContext, - Logger: ctrllog.Log, - } - - reconciler := &ElfClusterReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster) + reconciler := &ElfClusterReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: capiutil.ObjectKey(elfCluster)}) Expect(err).ToNot(HaveOccurred()) Expect(result.RequeueAfter).To(BeZero()) @@ -111,14 +101,10 @@ var _ = Describe("ElfClusterReconciler", func() { It("should not error and not requeue the request when Cluster is paused", func() { cluster.Spec.Paused = true - ctrlMgrContext := fake.NewControllerManagerContext(cluster, elfCluster) - ctrlContext := &context.ControllerContext{ - ControllerManagerContext: ctrlMgrContext, - Logger: ctrlMgrContext.Logger, - } - fake.InitClusterOwnerReferences(ctrlContext, elfCluster, cluster) + ctrlMgrCtx := fake.NewControllerManagerContext(cluster, elfCluster) + fake.InitClusterOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster) - reconciler := &ElfClusterReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfClusterReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: capiutil.ObjectKey(elfCluster)}) Expect(err).ToNot(HaveOccurred()) Expect(result.RequeueAfter).To(BeZero()) @@ -128,20 +114,16 @@ var _ = Describe("ElfClusterReconciler", func() { It("should add finalizer to the elfcluster", func() { elfCluster.Spec.ControlPlaneEndpoint.Host = "127.0.0.1" elfCluster.Spec.ControlPlaneEndpoint.Port = 6443 - ctrlMgrContext := fake.NewControllerManagerContext(cluster, elfCluster) - ctrlContext := &context.ControllerContext{ - ControllerManagerContext: ctrlMgrContext, - Logger: ctrllog.Log, - } - fake.InitClusterOwnerReferences(ctrlContext, elfCluster, cluster) + ctrlMgrCtx := fake.NewControllerManagerContext(cluster, elfCluster) + fake.InitClusterOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster) keys := []string{towerresources.GetVMLabelClusterName(), towerresources.GetVMLabelVIP(), towerresources.GetVMLabelNamespace()} mockVMService.EXPECT().CleanUnusedLabels(keys).Return(nil, nil) elfClusterKey := capiutil.ObjectKey(elfCluster) - reconciler := &ElfClusterReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfClusterReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} _, _ = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfClusterKey}) - Expect(reconciler.Client.Get(reconciler, elfClusterKey, elfCluster)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfClusterKey, elfCluster)).To(Succeed()) Expect(elfCluster.Status.Ready).To(BeTrue()) Expect(elfCluster.Finalizers).To(ContainElement(infrav1.ClusterFinalizer)) expectConditions(elfCluster, []conditionAssertion{ @@ -152,19 +134,15 @@ var _ = Describe("ElfClusterReconciler", func() { }) It("should not reconcile if without ControlPlaneEndpoint", func() { - ctrlMgrContext := fake.NewControllerManagerContext(cluster, elfCluster) - ctrlContext := &context.ControllerContext{ - ControllerManagerContext: ctrlMgrContext, - Logger: ctrllog.Log, - } - fake.InitClusterOwnerReferences(ctrlContext, elfCluster, cluster) + ctrlMgrCtx := fake.NewControllerManagerContext(cluster, elfCluster) + fake.InitClusterOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster) - reconciler := &ElfClusterReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfClusterReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: capiutil.ObjectKey(elfCluster)}) Expect(err).ToNot(HaveOccurred()) Expect(result).To(BeZero()) Expect(logBuffer.String()).To(ContainSubstring("The ControlPlaneEndpoint of ElfCluster is not set")) - Expect(reconciler.Client.Get(reconciler, capiutil.ObjectKey(elfCluster), elfCluster)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, capiutil.ObjectKey(elfCluster), elfCluster)).To(Succeed()) expectConditions(elfCluster, []conditionAssertion{ {clusterv1.ReadyCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForVIPReason}, {infrav1.ControlPlaneEndpointReadyCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForVIPReason}, @@ -181,33 +159,25 @@ var _ = Describe("ElfClusterReconciler", func() { It("should not remove elfcluster finalizer when has elfmachines", func() { elfMachine, machine := fake.NewMachineObjects(elfCluster, cluster) - ctrlMgrContext := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine) - ctrlContext := &context.ControllerContext{ - ControllerManagerContext: ctrlMgrContext, - Logger: ctrllog.Log, - } - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) - reconciler := &ElfClusterReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfClusterReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfClusterKey := capiutil.ObjectKey(elfCluster) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfClusterKey}) Expect(logBuffer.String()).To(ContainSubstring("Waiting for ElfMachines to be deleted")) Expect(result.RequeueAfter).NotTo(BeZero()) Expect(err).ShouldNot(HaveOccurred()) - Expect(reconciler.Client.Get(reconciler, elfClusterKey, elfCluster)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfClusterKey, elfCluster)).To(Succeed()) Expect(elfCluster.Finalizers).To(ContainElement(infrav1.ClusterFinalizer)) }) It("should delete labels and remove elfcluster finalizer", func() { task := fake.NewTowerTask() - ctrlMgrContext := fake.NewControllerManagerContext(cluster, elfCluster) - ctrlContext := &context.ControllerContext{ - ControllerManagerContext: ctrlMgrContext, - Logger: ctrllog.Log, - } - fake.InitClusterOwnerReferences(ctrlContext, elfCluster, cluster) + ctrlMgrCtx := fake.NewControllerManagerContext(cluster, elfCluster) + fake.InitClusterOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster) - reconciler := &ElfClusterReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfClusterReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfClusterKey := capiutil.ObjectKey(elfCluster) mockVMService.EXPECT().DeleteVMPlacementGroupsByNamePrefix(gomock.Any(), towerresources.GetVMPlacementGroupNamePrefix(cluster)).Return(nil, errors.New("some error")) @@ -239,26 +209,22 @@ var _ = Describe("ElfClusterReconciler", func() { Expect(err).NotTo(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring(fmt.Sprintf("The placement groups with name prefix %s are deleted successfully", placementGroupPrefix))) Expect(logBuffer.String()).To(ContainSubstring(fmt.Sprintf("Label %s:%s deleted", towerresources.GetVMLabelClusterName(), elfCluster.Name))) - Expect(apierrors.IsNotFound(reconciler.Client.Get(reconciler, elfClusterKey, elfCluster))).To(BeTrue()) + Expect(apierrors.IsNotFound(reconciler.Client.Get(ctx, elfClusterKey, elfCluster))).To(BeTrue()) }) It("should delete failed when tower is out of service", func() { mockNewVMService = func(_ goctx.Context, _ infrav1.Tower, _ logr.Logger) (service.VMService, error) { return mockVMService, errors.New("get vm service failed") } - ctrlMgrContext := fake.NewControllerManagerContext(elfCluster, cluster) - ctrlContext := &context.ControllerContext{ - ControllerManagerContext: ctrlMgrContext, - Logger: ctrllog.Log, - } - fake.InitClusterOwnerReferences(ctrlContext, elfCluster, cluster) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster) + fake.InitClusterOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster) - reconciler := &ElfClusterReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfClusterReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfClusterKey := capiutil.ObjectKey(elfCluster) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfClusterKey}) Expect(result).To(BeZero()) Expect(err).To(HaveOccurred()) - Expect(reconciler.Client.Get(reconciler, elfClusterKey, elfCluster)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfClusterKey, elfCluster)).To(Succeed()) Expect(elfCluster.Finalizers).To(ContainElement(infrav1.ClusterFinalizer)) }) @@ -269,20 +235,16 @@ var _ = Describe("ElfClusterReconciler", func() { elfCluster.Annotations = map[string]string{ infrav1.ElfClusterForceDeleteAnnotation: "", } - ctrlMgrContext := fake.NewControllerManagerContext(elfCluster, cluster) - ctrlContext := &context.ControllerContext{ - ControllerManagerContext: ctrlMgrContext, - Logger: ctrllog.Log, - } - fake.InitClusterOwnerReferences(ctrlContext, elfCluster, cluster) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster) + fake.InitClusterOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster) - reconciler := &ElfClusterReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfClusterReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfClusterKey := capiutil.ObjectKey(elfCluster) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfClusterKey}) Expect(result).To(BeZero()) Expect(err).ToNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("")) - Expect(apierrors.IsNotFound(reconciler.Client.Get(reconciler, elfClusterKey, elfCluster))).To(BeTrue()) + Expect(apierrors.IsNotFound(reconciler.Client.Get(ctx, elfClusterKey, elfCluster))).To(BeTrue()) }) }) @@ -294,32 +256,30 @@ var _ = Describe("ElfClusterReconciler", func() { It("should clean labels for Tower", func() { elfCluster.Spec.ControlPlaneEndpoint.Host = "127.0.0.1" elfCluster.Spec.ControlPlaneEndpoint.Port = 6443 - // ctrlMgrContext := fake.NewControllerManagerContext(cluster, elfCluster) - ctrlContext := newCtrlContexts(elfCluster, cluster) - fake.InitClusterOwnerReferences(ctrlContext, elfCluster, cluster) - clusterContext := &context.ClusterContext{ - ControllerContext: ctrlContext, - Cluster: cluster, - ElfCluster: elfCluster, - Logger: ctrllog.Log, - VMService: mockVMService, + ctrlMgrCtx := fake.NewControllerManagerContext(cluster, elfCluster) + fake.InitClusterOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster) + clusterCtx := &context.ClusterContext{ + Cluster: cluster, + ElfCluster: elfCluster, + Logger: ctrllog.Log, + VMService: mockVMService, } logBuffer.Reset() unexpectedError := errors.New("unexpected error") keys := []string{towerresources.GetVMLabelClusterName(), towerresources.GetVMLabelVIP(), towerresources.GetVMLabelNamespace()} mockVMService.EXPECT().CleanUnusedLabels(keys).Return(nil, unexpectedError) - reconciler := &ElfClusterReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - reconciler.cleanOrphanLabels(clusterContext) + reconciler := &ElfClusterReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + reconciler.cleanOrphanLabels(ctx, clusterCtx) Expect(logBuffer.String()).To(ContainSubstring(fmt.Sprintf("Warning: failed to clean orphan labels in Tower %s", elfCluster.Spec.Tower.Server))) logBuffer.Reset() mockVMService.EXPECT().CleanUnusedLabels(keys).Return(nil, nil) - reconciler.cleanOrphanLabels(clusterContext) + reconciler.cleanOrphanLabels(ctx, clusterCtx) Expect(logBuffer.String()).To(ContainSubstring(fmt.Sprintf("Labels of Tower %s are cleaned successfully", elfCluster.Spec.Tower.Server))) logBuffer.Reset() - reconciler.cleanOrphanLabels(clusterContext) + reconciler.cleanOrphanLabels(ctx, clusterCtx) Expect(logBuffer.String()).NotTo(ContainSubstring(fmt.Sprintf("Cleaning orphan labels in Tower %s created by CAPE", elfCluster.Spec.Tower.Server))) }) }) diff --git a/controllers/elfmachine_controller.go b/controllers/elfmachine_controller.go index b537a894..8b185887 100644 --- a/controllers/elfmachine_controller.go +++ b/controllers/elfmachine_controller.go @@ -22,7 +22,6 @@ import ( stderrors "errors" "fmt" "reflect" - "strings" "time" "github.com/pkg/errors" @@ -31,6 +30,7 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" apitypes "k8s.io/apimachinery/pkg/types" + "k8s.io/klog/v2" "k8s.io/utils/pointer" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" capierrors "sigs.k8s.io/cluster-api/errors" @@ -71,31 +71,22 @@ import ( // ElfMachineReconciler reconciles an ElfMachine object. type ElfMachineReconciler struct { - *context.ControllerContext + *context.ControllerManagerContext NewVMService service.NewVMServiceFunc } // AddMachineControllerToManager adds the machine controller to the provided // manager. -func AddMachineControllerToManager(ctx *context.ControllerManagerContext, mgr ctrlmgr.Manager, options controller.Options) error { +func AddMachineControllerToManager(ctx goctx.Context, ctrlMgrCtx *context.ControllerManagerContext, mgr ctrlmgr.Manager, options controller.Options) error { var ( controlledType = &infrav1.ElfMachine{} controlledTypeName = reflect.TypeOf(controlledType).Elem().Name() controlledTypeGVK = infrav1.GroupVersion.WithKind(controlledTypeName) - - controllerNameShort = fmt.Sprintf("%s-controller", strings.ToLower(controlledTypeName)) ) - // Build the controller context. - controllerContext := &context.ControllerContext{ - ControllerManagerContext: ctx, - Name: controllerNameShort, - Logger: ctx.Logger.WithName(controllerNameShort), - } - reconciler := &ElfMachineReconciler{ - ControllerContext: controllerContext, - NewVMService: service.NewVMService, + ControllerManagerContext: ctrlMgrCtx, + NewVMService: service.NewVMService, } return ctrl.NewControllerManagedBy(mgr). @@ -107,17 +98,19 @@ func AddMachineControllerToManager(ctx *context.ControllerManagerContext, mgr ct handler.EnqueueRequestsFromMapFunc(capiutil.MachineToInfrastructureMapFunc(controlledTypeGVK)), ). WithOptions(options). - WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), ctx.WatchFilterValue)). + WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), ctrlMgrCtx.WatchFilterValue)). Complete(reconciler) } // Reconcile ensures the back-end state reflects the Kubernetes resource state intent. func (r *ElfMachineReconciler) Reconcile(ctx goctx.Context, req ctrl.Request) (result ctrl.Result, reterr error) { + log := ctrl.LoggerFrom(ctx) + // Get the ElfMachine resource for this request. var elfMachine infrav1.ElfMachine - if err := r.Client.Get(r, req.NamespacedName, &elfMachine); err != nil { + if err := r.Client.Get(ctx, req.NamespacedName, &elfMachine); err != nil { if apierrors.IsNotFound(err) { - r.Logger.Info("ElfMachine not found, won't reconcile", "key", req.NamespacedName) + log.Info("ElfMachine not found, won't reconcile", "key", req.NamespacedName) return reconcile.Result{}, nil } @@ -126,28 +119,35 @@ func (r *ElfMachineReconciler) Reconcile(ctx goctx.Context, req ctrl.Request) (r } // Fetch the CAPI Machine. - machine, err := capiutil.GetOwnerMachine(r, r.Client, elfMachine.ObjectMeta) + machine, err := capiutil.GetOwnerMachine(ctx, r.Client, elfMachine.ObjectMeta) if err != nil { return reconcile.Result{}, err } if machine == nil { - r.Logger.Info("Waiting for Machine Controller to set OwnerRef on ElfMachine", - "namespace", elfMachine.Namespace, "elfMachine", elfMachine.Name) + log.Info("Waiting for Machine Controller to set OwnerRef on ElfMachine") return reconcile.Result{}, nil } + log = log.WithValues("Machine", klog.KObj(machine)) + ctx = ctrl.LoggerInto(ctx, log) // Fetch the CAPI Cluster. - cluster, err := capiutil.GetClusterFromMetadata(r, r.Client, machine.ObjectMeta) + cluster, err := capiutil.GetClusterFromMetadata(ctx, r.Client, machine.ObjectMeta) if err != nil { - r.Logger.Info("Machine is missing cluster label or cluster does not exist", - "namespace", machine.Namespace, "machine", machine.Name) + log.Info("Machine is missing cluster label or cluster does not exist") - return reconcile.Result{}, nil + return reconcile.Result{}, err + } + if cluster == nil { + log.Info(fmt.Sprintf("Please associate this machine with a cluster using the label %s: ", clusterv1.ClusterNameLabel)) + + return ctrl.Result{}, nil } + log = log.WithValues("Cluster", klog.KObj(cluster)) + ctx = ctrl.LoggerInto(ctx, log) + if annotations.IsPaused(cluster, &elfMachine) { - r.Logger.V(4).Info("ElfMachine linked to a cluster that is paused", - "namespace", elfMachine.Namespace, "elfMachine", elfMachine.Name) + log.V(4).Info("ElfMachine linked to a cluster that is paused") return reconcile.Result{}, nil } @@ -158,42 +158,37 @@ func (r *ElfMachineReconciler) Reconcile(ctx goctx.Context, req ctrl.Request) (r Namespace: elfMachine.Namespace, Name: cluster.Spec.InfrastructureRef.Name, } - if err := r.Client.Get(r, elfClusterName, &elfCluster); err != nil { - r.Logger.Info("ElfMachine Waiting for ElfCluster", - "namespace", elfMachine.Namespace, "elfMachine", elfMachine.Name) + if err := r.Client.Get(ctx, elfClusterName, &elfCluster); err != nil { + if apierrors.IsNotFound(err) { + log.Info("ElfMachine Waiting for ElfCluster") - return reconcile.Result{}, nil + return reconcile.Result{}, nil + } + + return reconcile.Result{}, err } + log = log.WithValues("ElfCluster", klog.KObj(&elfCluster)) + ctx = ctrl.LoggerInto(ctx, log) // Create the patch helper. patchHelper, err := patch.NewHelper(&elfMachine, r.Client) if err != nil { - return reconcile.Result{}, errors.Wrapf( - err, - "failed to init patch helper for %s %s/%s", - elfMachine.GroupVersionKind(), - elfMachine.Namespace, - elfMachine.Name) + return reconcile.Result{}, errors.Wrapf(err, "failed to init patch helper") } - logger := r.Logger.WithValues("namespace", elfMachine.Namespace, - "elfCluster", elfCluster.Name, "elfMachine", elfMachine.Name, "machine", machine.Name) - // Create the machine context for this request. - machineContext := &context.MachineContext{ - ControllerContext: r.ControllerContext, - Cluster: cluster, - ElfCluster: &elfCluster, - Machine: machine, - ElfMachine: &elfMachine, - Logger: logger, - PatchHelper: patchHelper, + machineCtx := &context.MachineContext{ + Cluster: cluster, + ElfCluster: &elfCluster, + Machine: machine, + ElfMachine: &elfMachine, + PatchHelper: patchHelper, } // If ElfMachine is being deleting and ElfCLuster ForceDeleteCluster flag is set, skip creating the VMService object, // because Tower server may be out of service. So we can force delete ElfCluster. if elfMachine.ObjectMeta.DeletionTimestamp.IsZero() || !elfCluster.HasForceDeleteCluster() { - vmService, err := r.NewVMService(r.Context, elfCluster.GetTower(), logger) + vmService, err := r.NewVMService(ctx, elfCluster.GetTower(), log) if err != nil { conditions.MarkFalse(&elfMachine, infrav1.TowerAvailableCondition, infrav1.TowerUnreachableReason, clusterv1.ConditionSeverityError, err.Error()) @@ -201,14 +196,14 @@ func (r *ElfMachineReconciler) Reconcile(ctx goctx.Context, req ctrl.Request) (r } conditions.MarkTrue(&elfMachine, infrav1.TowerAvailableCondition) - machineContext.VMService = vmService + machineCtx.VMService = vmService } // Always issue a patch when exiting this function so changes to the // resource are patched back to the API server. defer func() { // always update the readyCondition. - conditions.SetSummary(machineContext.ElfMachine, + conditions.SetSummary(machineCtx.ElfMachine, conditions.WithConditions( infrav1.VMProvisionedCondition, infrav1.TowerAvailableCondition, @@ -216,53 +211,55 @@ func (r *ElfMachineReconciler) Reconcile(ctx goctx.Context, req ctrl.Request) (r ) // Patch the ElfMachine resource. - if err := machineContext.Patch(); err != nil { + if err := machineCtx.Patch(ctx); err != nil { if reterr == nil { reterr = err } - machineContext.Logger.Error(err, "patch failed", "elfMachine", machineContext.String()) + log.Error(err, "patch failed", "elfMachine", machineCtx.String()) } // If the node's healthy condition is unknown, the virtual machine may // have been shut down through Tower or directly on the virtual machine. // We need to try to reconcile to ensure that the virtual machine is powered on. if err == nil && result.IsZero() && - !machineutil.IsMachineFailed(machineContext.Machine) && - machineContext.Machine.DeletionTimestamp.IsZero() && - machineContext.ElfMachine.DeletionTimestamp.IsZero() && - machineutil.IsNodeHealthyConditionUnknown(machineContext.Machine) { - lastTransitionTime := conditions.GetLastTransitionTime(machineContext.Machine, clusterv1.MachineNodeHealthyCondition) + !machineutil.IsMachineFailed(machineCtx.Machine) && + machineCtx.Machine.DeletionTimestamp.IsZero() && + machineCtx.ElfMachine.DeletionTimestamp.IsZero() && + machineutil.IsNodeHealthyConditionUnknown(machineCtx.Machine) { + lastTransitionTime := conditions.GetLastTransitionTime(machineCtx.Machine, clusterv1.MachineNodeHealthyCondition) if lastTransitionTime != nil && time.Now().Before(lastTransitionTime.Add(config.VMPowerStatusCheckingDuration)) { result.RequeueAfter = config.DefaultRequeueTimeout - machineContext.Logger.Info(fmt.Sprintf("The node's healthy condition is unknown, virtual machine may have been shut down, will reconcile after %s", result.RequeueAfter), "nodeConditionUnknownTime", lastTransitionTime) + log.Info(fmt.Sprintf("The node's healthy condition is unknown, virtual machine may have been shut down, will reconcile after %s", result.RequeueAfter), "nodeConditionUnknownTime", lastTransitionTime) } } }() // Handle deleted machines if !elfMachine.ObjectMeta.DeletionTimestamp.IsZero() { - return r.reconcileDelete(machineContext) + return r.reconcileDelete(ctx, machineCtx) } // Handle non-deleted machines - return r.reconcileNormal(machineContext) + return r.reconcileNormal(ctx, machineCtx) } -func (r *ElfMachineReconciler) reconcileDeleteVM(ctx *context.MachineContext) error { - vm, err := ctx.VMService.Get(ctx.ElfMachine.Status.VMRef) +func (r *ElfMachineReconciler) reconcileDeleteVM(ctx goctx.Context, machineCtx *context.MachineContext) error { + log := ctrl.LoggerFrom(ctx) + + vm, err := machineCtx.VMService.Get(machineCtx.ElfMachine.Status.VMRef) if err != nil { if service.IsVMNotFound(err) { - ctx.Logger.Info("VM already deleted") + log.Info("VM already deleted") - ctx.ElfMachine.SetVM("") + machineCtx.ElfMachine.SetVM("") } return err } - if ok, err := r.reconcileVMTask(ctx, vm); err != nil { + if ok, err := r.reconcileVMTask(ctx, machineCtx, vm); err != nil { return err } else if !ok { return nil @@ -276,225 +273,226 @@ func (r *ElfMachineReconciler) reconcileDeleteVM(ctx *context.MachineContext) er // we should perform a graceful shutdown to ensure that the vGPU license can be released. // Therefore, if the ElfMachine is configured with vGPU or ElfCluster.Spec.VMGracefulShutdown is true, the virtual machine will be shutdown normally. // But if the VM shutdown timed out, simply power off the VM. - if service.IsShutDownTimeout(conditions.GetMessage(ctx.ElfMachine, infrav1.VMProvisionedCondition)) || - !(ctx.ElfMachine.RequiresVGPUDevices() || ctx.ElfCluster.Spec.VMGracefulShutdown) { - task, err = ctx.VMService.PowerOff(ctx.ElfMachine.Status.VMRef) + if service.IsShutDownTimeout(conditions.GetMessage(machineCtx.ElfMachine, infrav1.VMProvisionedCondition)) || + !(machineCtx.ElfMachine.RequiresVGPUDevices() || machineCtx.ElfCluster.Spec.VMGracefulShutdown) { + task, err = machineCtx.VMService.PowerOff(machineCtx.ElfMachine.Status.VMRef) } else { - task, err = ctx.VMService.ShutDown(ctx.ElfMachine.Status.VMRef) + task, err = machineCtx.VMService.ShutDown(machineCtx.ElfMachine.Status.VMRef) } if err != nil { return err } - ctx.ElfMachine.SetTask(*task.ID) + machineCtx.ElfMachine.SetTask(*task.ID) - ctx.Logger.Info("Waiting for VM shut down", - "vmRef", ctx.ElfMachine.Status.VMRef, "taskRef", ctx.ElfMachine.Status.TaskRef) + log.Info("Waiting for VM shut down", + "vmRef", machineCtx.ElfMachine.Status.VMRef, "taskRef", machineCtx.ElfMachine.Status.TaskRef) return nil } // Before destroying VM, attempt to delete kubernetes node. - err = r.deleteNode(ctx, ctx.ElfMachine.Name) + err = r.deleteNode(ctx, machineCtx, machineCtx.ElfMachine.Name) if err != nil { return err } - ctx.Logger.Info("Destroying VM", - "vmRef", ctx.ElfMachine.Status.VMRef, "taskRef", ctx.ElfMachine.Status.TaskRef) + log.Info("Destroying VM", + "vmRef", machineCtx.ElfMachine.Status.VMRef, "taskRef", machineCtx.ElfMachine.Status.TaskRef) // Delete the VM - task, err := ctx.VMService.Delete(ctx.ElfMachine.Status.VMRef) + task, err := machineCtx.VMService.Delete(machineCtx.ElfMachine.Status.VMRef) if err != nil { return err } else { - ctx.ElfMachine.SetTask(*task.ID) + machineCtx.ElfMachine.SetTask(*task.ID) } - ctx.Logger.Info("Waiting for VM to be deleted", - "vmRef", ctx.ElfMachine.Status.VMRef, "taskRef", ctx.ElfMachine.Status.TaskRef) + log.Info("Waiting for VM to be deleted", + "vmRef", machineCtx.ElfMachine.Status.VMRef, "taskRef", machineCtx.ElfMachine.Status.TaskRef) return nil } -func (r *ElfMachineReconciler) reconcileDelete(ctx *context.MachineContext) (reconcile.Result, error) { - ctx.Logger.Info("Reconciling ElfMachine delete") +func (r *ElfMachineReconciler) reconcileDelete(ctx goctx.Context, machineCtx *context.MachineContext) (reconcile.Result, error) { + log := ctrl.LoggerFrom(ctx) + log.Info("Reconciling ElfMachine delete") - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "") defer func() { // When deleting a virtual machine, the GPU device // locked by the virtual machine may not be unlocked. // For example, the Cluster or ElfMachine was deleted during a pause. - if !ctrlutil.ContainsFinalizer(ctx.ElfMachine, infrav1.MachineFinalizer) && - ctx.ElfMachine.RequiresGPUDevices() { - unlockGPUDevicesLockedByVM(ctx.ElfCluster.Spec.Cluster, ctx.ElfMachine.Name) + if !ctrlutil.ContainsFinalizer(machineCtx.ElfMachine, infrav1.MachineFinalizer) && + machineCtx.ElfMachine.RequiresGPUDevices() { + unlockGPUDevicesLockedByVM(machineCtx.ElfCluster.Spec.Cluster, machineCtx.ElfMachine.Name) } }() - if ok, err := r.deletePlacementGroup(ctx); err != nil { + if ok, err := r.deletePlacementGroup(ctx, machineCtx); err != nil { return reconcile.Result{}, err } else if !ok { return reconcile.Result{RequeueAfter: config.DefaultRequeueTimeout}, nil } // if cluster need to force delete, skipping VM deletion and remove the finalizer. - if ctx.ElfCluster.HasForceDeleteCluster() { - ctx.Logger.Info("Skip VM deletion due to the force-delete-cluster annotation") + if machineCtx.ElfCluster.HasForceDeleteCluster() { + log.Info("Skip VM deletion due to the force-delete-cluster annotation") - ctrlutil.RemoveFinalizer(ctx.ElfMachine, infrav1.MachineFinalizer) + ctrlutil.RemoveFinalizer(machineCtx.ElfMachine, infrav1.MachineFinalizer) return reconcile.Result{}, nil } - if !ctx.ElfMachine.HasVM() { + if !machineCtx.ElfMachine.HasVM() { // ElfMachine may not have saved the created virtual machine when deleting ElfMachine - vm, err := ctx.VMService.GetByName(ctx.ElfMachine.Name) + vm, err := machineCtx.VMService.GetByName(machineCtx.ElfMachine.Name) if err != nil { if !service.IsVMNotFound(err) { return reconcile.Result{}, err } - ctx.Logger.Info("VM already deleted") + log.Info("VM already deleted") - ctrlutil.RemoveFinalizer(ctx.ElfMachine, infrav1.MachineFinalizer) + ctrlutil.RemoveFinalizer(machineCtx.ElfMachine, infrav1.MachineFinalizer) return reconcile.Result{}, nil } - ctx.ElfMachine.SetVM(util.GetVMRef(vm)) + machineCtx.ElfMachine.SetVM(util.GetVMRef(vm)) } - if result, err := r.deleteDuplicateVMs(ctx); err != nil || !result.IsZero() { + if result, err := r.deleteDuplicateVMs(ctx, machineCtx); err != nil || !result.IsZero() { return result, err } - err := r.reconcileDeleteVM(ctx) + err := r.reconcileDeleteVM(ctx, machineCtx) if err != nil { if service.IsVMNotFound(err) { // The VM is deleted so remove the finalizer. - ctrlutil.RemoveFinalizer(ctx.ElfMachine, infrav1.MachineFinalizer) + ctrlutil.RemoveFinalizer(machineCtx.ElfMachine, infrav1.MachineFinalizer) return reconcile.Result{}, nil } - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, clusterv1.DeletionFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, clusterv1.DeletionFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) return reconcile.Result{}, err } - ctx.Logger.Info("Waiting for VM to be deleted", - "vmRef", ctx.ElfMachine.Status.VMRef, "taskRef", ctx.ElfMachine.Status.TaskRef) + log.Info("Waiting for VM to be deleted", + "vmRef", machineCtx.ElfMachine.Status.VMRef, "taskRef", machineCtx.ElfMachine.Status.TaskRef) return reconcile.Result{RequeueAfter: config.DefaultRequeueTimeout}, nil } -func (r *ElfMachineReconciler) reconcileNormal(ctx *context.MachineContext) (reconcile.Result, error) { +func (r *ElfMachineReconciler) reconcileNormal(ctx goctx.Context, machineCtx *context.MachineContext) (reconcile.Result, error) { + log := ctrl.LoggerFrom(ctx) + // If the ElfMachine is in an error state, return early. - if ctx.ElfMachine.IsFailed() { - ctx.Logger.Info("Error state detected, skipping reconciliation") + if machineCtx.ElfMachine.IsFailed() { + log.Info("Error state detected, skipping reconciliation") return reconcile.Result{}, nil } // If the ElfMachine doesn't have our finalizer, add it. - if !ctrlutil.ContainsFinalizer(ctx.ElfMachine, infrav1.MachineFinalizer) { - return reconcile.Result{RequeueAfter: config.DefaultRequeueTimeout}, patchutil.AddFinalizerWithOptimisticLock(ctx, r.Client, ctx.ElfMachine, infrav1.MachineFinalizer) + if !ctrlutil.ContainsFinalizer(machineCtx.ElfMachine, infrav1.MachineFinalizer) { + return reconcile.Result{RequeueAfter: config.DefaultRequeueTimeout}, patchutil.AddFinalizerWithOptimisticLock(ctx, r.Client, machineCtx.ElfMachine, infrav1.MachineFinalizer) } // If ElfMachine requires static IPs for devices, should wait for CAPE-IP to set MachineStaticIPFinalizer first // to prevent CAPE from overwriting MachineStaticIPFinalizer when setting MachineFinalizer. // If ElfMachine happens to be deleted at this time, CAPE-IP may not have time to release the IPs. - if ctx.ElfMachine.Spec.Network.RequiresStaticIPs() && !ctrlutil.ContainsFinalizer(ctx.ElfMachine, infrav1.MachineStaticIPFinalizer) { - r.Logger.V(2).Info("Waiting for CAPE-IP to set MachineStaticIPFinalizer on ElfMachine") + if machineCtx.ElfMachine.Spec.Network.RequiresStaticIPs() && !ctrlutil.ContainsFinalizer(machineCtx.ElfMachine, infrav1.MachineStaticIPFinalizer) { + log.V(2).Info("Waiting for CAPE-IP to set MachineStaticIPFinalizer on ElfMachine") return reconcile.Result{RequeueAfter: config.DefaultRequeueTimeout}, nil } - if !ctx.Cluster.Status.InfrastructureReady { - ctx.Logger.Info("Cluster infrastructure is not ready yet", - "cluster", ctx.Cluster.Name) + if !machineCtx.Cluster.Status.InfrastructureReady { + log.Info("Cluster infrastructure is not ready yet") - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForClusterInfrastructureReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForClusterInfrastructureReason, clusterv1.ConditionSeverityInfo, "") return reconcile.Result{}, nil } // Make sure bootstrap data is available and populated. - if ctx.Machine.Spec.Bootstrap.DataSecretName == nil { - if !machineutil.IsControlPlaneMachine(ctx.ElfMachine) && !conditions.IsTrue(ctx.Cluster, clusterv1.ControlPlaneInitializedCondition) { - ctx.Logger.Info("Waiting for the control plane to be initialized") + if machineCtx.Machine.Spec.Bootstrap.DataSecretName == nil { + if !machineutil.IsControlPlaneMachine(machineCtx.ElfMachine) && !conditions.IsTrue(machineCtx.Cluster, clusterv1.ControlPlaneInitializedCondition) { + log.Info("Waiting for the control plane to be initialized") - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, clusterv1.WaitingForControlPlaneAvailableReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, clusterv1.WaitingForControlPlaneAvailableReason, clusterv1.ConditionSeverityInfo, "") return ctrl.Result{}, nil } - ctx.Logger.Info("Waiting for bootstrap data to be available") + log.Info("Waiting for bootstrap data to be available") - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForBootstrapDataReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForBootstrapDataReason, clusterv1.ConditionSeverityInfo, "") return reconcile.Result{}, nil } - if result, err := r.reconcilePlacementGroup(ctx); err != nil || !result.IsZero() { + if result, err := r.reconcilePlacementGroup(ctx, machineCtx); err != nil || !result.IsZero() { return result, err } - if r.isWaitingForStaticIPAllocation(ctx) { - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForStaticIPAllocationReason, clusterv1.ConditionSeverityInfo, "") - ctx.Logger.Info("VM is waiting for static ip to be available") + if r.isWaitingForStaticIPAllocation(machineCtx) { + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForStaticIPAllocationReason, clusterv1.ConditionSeverityInfo, "") + log.Info("VM is waiting for static ip to be available") return reconcile.Result{}, nil } - vm, ok, err := r.reconcileVM(ctx) + vm, ok, err := r.reconcileVM(ctx, machineCtx) switch { - case ctx.ElfMachine.IsFailed(): + case machineCtx.ElfMachine.IsFailed(): return reconcile.Result{}, nil case err != nil: - ctx.Logger.Error(err, "failed to reconcile VM") + log.Error(err, "failed to reconcile VM") if service.IsVMNotFound(err) { return reconcile.Result{RequeueAfter: config.DefaultRequeueTimeout}, nil } return reconcile.Result{}, errors.Wrapf(err, "failed to reconcile VM") - case !ok || ctx.ElfMachine.HasTask(): + case !ok || machineCtx.ElfMachine.HasTask(): return reconcile.Result{RequeueAfter: config.DefaultRequeueTimeout}, nil } // Reconcile the ElfMachine's Labels using the cluster info - if ok, err := r.reconcileLabels(ctx, vm); !ok { + if ok, err := r.reconcileLabels(ctx, machineCtx, vm); !ok { return reconcile.Result{}, errors.Wrapf(err, "failed to reconcile labels") } // Reconcile the ElfMachine's providerID using the VM's UUID. - if err := r.reconcileProviderID(ctx, vm); err != nil { - return reconcile.Result{}, errors.Wrapf(err, "unexpected error while reconciling providerID for %s", ctx) + if err := r.reconcileProviderID(ctx, machineCtx, vm); err != nil { + return reconcile.Result{}, errors.Wrapf(err, "unexpected error while reconciling providerID for %s", machineCtx) } // Reconcile the ElfMachine's node addresses from the VM's IP addresses. - if ok, err := r.reconcileNetwork(ctx, vm); err != nil { + if ok, err := r.reconcileNetwork(ctx, machineCtx, vm); err != nil { return reconcile.Result{}, err } else if !ok { return reconcile.Result{RequeueAfter: config.DefaultRequeueTimeout}, nil } - ctx.ElfMachine.Status.Ready = true - conditions.MarkTrue(ctx.ElfMachine, infrav1.VMProvisionedCondition) + machineCtx.ElfMachine.Status.Ready = true + conditions.MarkTrue(machineCtx.ElfMachine, infrav1.VMProvisionedCondition) - if ok, err := r.reconcileNode(ctx, vm); !ok { + if ok, err := r.reconcileNode(ctx, machineCtx, vm); !ok { if err != nil { return reconcile.Result{}, err } - ctx.Logger.Info("Node providerID is not reconciled", - "namespace", ctx.ElfMachine.Namespace, "elfMachine", ctx.ElfMachine.Name) + log.Info("Node providerID is not reconciled") return reconcile.Result{RequeueAfter: config.DefaultRequeueTimeout}, nil } - if result, err := r.deleteDuplicateVMs(ctx); err != nil || !result.IsZero() { + if result, err := r.deleteDuplicateVMs(ctx, machineCtx); err != nil || !result.IsZero() { return result, err } @@ -512,18 +510,20 @@ func (r *ElfMachineReconciler) reconcileNormal(ctx *context.MachineContext) (rec // 2. false and error is nil means the VM is not running or wait to join the placement group. // //nolint:gocyclo -func (r *ElfMachineReconciler) reconcileVM(ctx *context.MachineContext) (*models.VM, bool, error) { +func (r *ElfMachineReconciler) reconcileVM(ctx goctx.Context, machineCtx *context.MachineContext) (*models.VM, bool, error) { + log := ctrl.LoggerFrom(ctx) + // If there is no vmRef then no VM exists, create one - if !ctx.ElfMachine.HasVM() { + if !machineCtx.ElfMachine.HasVM() { // We are setting this condition only in case it does not exists so we avoid to get flickering LastConditionTime // in case of cloning errors or powering on errors. - if !conditions.Has(ctx.ElfMachine, infrav1.VMProvisionedCondition) { - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.CloningReason, clusterv1.ConditionSeverityInfo, "") + if !conditions.Has(machineCtx.ElfMachine, infrav1.VMProvisionedCondition) { + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.CloningReason, clusterv1.ConditionSeverityInfo, "") } - bootstrapData, err := r.getBootstrapData(ctx) + bootstrapData, err := r.getBootstrapData(ctx, machineCtx) if err != nil { - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.CloningFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.CloningFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) return nil, false, err } @@ -531,142 +531,144 @@ func (r *ElfMachineReconciler) reconcileVM(ctx *context.MachineContext) (*models return nil, false, errors.New("bootstrapData is empty") } - if ok, message, err := isELFScheduleVMErrorRecorded(ctx); err != nil { + if ok, message, err := isELFScheduleVMErrorRecorded(ctx, machineCtx, r.Client); err != nil { return nil, false, err } else if ok { - if canRetry, err := canRetryVMOperation(ctx); err != nil { + if canRetry, err := canRetryVMOperation(ctx, machineCtx, r.Client); err != nil { return nil, false, err } else if !canRetry { - ctx.Logger.V(1).Info(fmt.Sprintf("%s, skip creating VM", message)) + log.V(1).Info(fmt.Sprintf("%s, skip creating VM", message)) return nil, false, nil } - ctx.Logger.V(1).Info(fmt.Sprintf("%s and the retry silence period passes, will try to create the VM again", message)) + log.V(1).Info(fmt.Sprintf("%s and the retry silence period passes, will try to create the VM again", message)) } - if ok, msg := acquireTicketForCreateVM(ctx.ElfMachine.Name, machineutil.IsControlPlaneMachine(ctx.ElfMachine)); !ok { - ctx.Logger.V(1).Info(fmt.Sprintf("%s, skip creating VM", msg)) + if ok, msg := acquireTicketForCreateVM(machineCtx.ElfMachine.Name, machineutil.IsControlPlaneMachine(machineCtx.ElfMachine)); !ok { + log.V(1).Info(fmt.Sprintf("%s, skip creating VM", msg)) return nil, false, nil } var hostID *string var gpuDeviceInfos []*service.GPUDeviceInfo // The virtual machine of the Control Plane does not support GPU Devices. - if machineutil.IsControlPlaneMachine(ctx.Machine) { - hostID, err = r.preCheckPlacementGroup(ctx) + if machineutil.IsControlPlaneMachine(machineCtx.Machine) { + hostID, err = r.preCheckPlacementGroup(ctx, machineCtx) if err != nil || hostID == nil { - releaseTicketForCreateVM(ctx.ElfMachine.Name) + releaseTicketForCreateVM(machineCtx.ElfMachine.Name) return nil, false, err } } else { - hostID, gpuDeviceInfos, err = r.selectHostAndGPUsForVM(ctx, "") + hostID, gpuDeviceInfos, err = r.selectHostAndGPUsForVM(ctx, machineCtx, "") if err != nil || hostID == nil { - releaseTicketForCreateVM(ctx.ElfMachine.Name) + releaseTicketForCreateVM(machineCtx.ElfMachine.Name) return nil, false, err } } - ctx.Logger.Info("Create VM for ElfMachine") + log.Info("Create VM for ElfMachine") - withTaskVM, err := ctx.VMService.Clone(ctx.ElfCluster, ctx.ElfMachine, bootstrapData, *hostID, gpuDeviceInfos) + withTaskVM, err := machineCtx.VMService.Clone(machineCtx.ElfCluster, machineCtx.ElfMachine, bootstrapData, *hostID, gpuDeviceInfos) if err != nil { - releaseTicketForCreateVM(ctx.ElfMachine.Name) + releaseTicketForCreateVM(machineCtx.ElfMachine.Name) if service.IsVMDuplicate(err) { - vm, err := ctx.VMService.GetByName(ctx.ElfMachine.Name) + vm, err := machineCtx.VMService.GetByName(machineCtx.ElfMachine.Name) if err != nil { return nil, false, err } - ctx.ElfMachine.SetVM(util.GetVMRef(vm)) + machineCtx.ElfMachine.SetVM(util.GetVMRef(vm)) } else { // Duplicate VM error does not require unlocking GPU devices. - if ctx.ElfMachine.RequiresGPUDevices() { - unlockGPUDevicesLockedByVM(ctx.ElfCluster.Spec.Cluster, ctx.ElfMachine.Name) + if machineCtx.ElfMachine.RequiresGPUDevices() { + unlockGPUDevicesLockedByVM(machineCtx.ElfCluster.Spec.Cluster, machineCtx.ElfMachine.Name) } - ctx.Logger.Error(err, "failed to create VM", - "vmRef", ctx.ElfMachine.Status.VMRef, "taskRef", ctx.ElfMachine.Status.TaskRef) + log.Error(err, "failed to create VM", + "vmRef", machineCtx.ElfMachine.Status.VMRef, "taskRef", machineCtx.ElfMachine.Status.TaskRef) - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.CloningFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.CloningFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) return nil, false, err } } else { - ctx.ElfMachine.SetVM(*withTaskVM.Data.ID) - ctx.ElfMachine.SetTask(*withTaskVM.TaskID) + machineCtx.ElfMachine.SetVM(*withTaskVM.Data.ID) + machineCtx.ElfMachine.SetTask(*withTaskVM.TaskID) } } - vm, err := r.getVM(ctx) + vm, err := r.getVM(ctx, machineCtx) if err != nil { return nil, false, err } // Remove VM disconnection timestamp - vmDisconnectionTimestamp := ctx.ElfMachine.GetVMDisconnectionTimestamp() + vmDisconnectionTimestamp := machineCtx.ElfMachine.GetVMDisconnectionTimestamp() if vmDisconnectionTimestamp != nil { - ctx.ElfMachine.SetVMDisconnectionTimestamp(nil) + machineCtx.ElfMachine.SetVMDisconnectionTimestamp(nil) - ctx.Logger.Info("The VM was found again", "vmRef", ctx.ElfMachine.Status.VMRef, "disconnectionTimestamp", vmDisconnectionTimestamp.Format(time.RFC3339)) + log.Info("The VM was found again", "vmRef", machineCtx.ElfMachine.Status.VMRef, "disconnectionTimestamp", vmDisconnectionTimestamp.Format(time.RFC3339)) } - if ok, err := r.reconcileVMTask(ctx, vm); err != nil { + if ok, err := r.reconcileVMTask(ctx, machineCtx, vm); err != nil { return nil, false, err } else if !ok { return vm, false, nil } // The host of the virtual machine may change, such as rescheduling caused by HA. - if vm.Host != nil && ctx.ElfMachine.Status.HostServerName != *vm.Host.Name { - hostName := ctx.ElfMachine.Status.HostServerName - ctx.ElfMachine.Status.HostServerRef = *vm.Host.ID - ctx.ElfMachine.Status.HostServerName = *vm.Host.Name - ctx.Logger.V(1).Info(fmt.Sprintf("Updated VM hostServerName from %s to %s", hostName, *vm.Host.Name)) + if vm.Host != nil && machineCtx.ElfMachine.Status.HostServerName != *vm.Host.Name { + hostName := machineCtx.ElfMachine.Status.HostServerName + machineCtx.ElfMachine.Status.HostServerRef = *vm.Host.ID + machineCtx.ElfMachine.Status.HostServerName = *vm.Host.Name + log.V(1).Info(fmt.Sprintf("Updated VM hostServerName from %s to %s", hostName, *vm.Host.Name)) } vmRef := util.GetVMRef(vm) // If vmRef is in UUID format, it means that the ELF VM created. if !typesutil.IsUUID(vmRef) { - ctx.Logger.Info("The VM is being created", "vmRef", vmRef) + log.Info("The VM is being created", "vmRef", vmRef) return vm, false, nil } // When ELF VM created, set UUID to VMRef - if !typesutil.IsUUID(ctx.ElfMachine.Status.VMRef) { - ctx.ElfMachine.SetVM(vmRef) + if !typesutil.IsUUID(machineCtx.ElfMachine.Status.VMRef) { + machineCtx.ElfMachine.SetVM(vmRef) } // The VM was moved to the recycle bin. Treat the VM as deleted, and will not reconganize it even if it's moved back from the recycle bin. if service.IsVMInRecycleBin(vm) { - message := fmt.Sprintf("The VM %s was moved to the Tower recycle bin by users, so treat it as deleted.", ctx.ElfMachine.Status.VMRef) - ctx.ElfMachine.Status.FailureReason = capierrors.MachineStatusErrorPtr(capeerrors.MovedToRecycleBinError) - ctx.ElfMachine.Status.FailureMessage = pointer.String(message) - ctx.ElfMachine.SetVM("") - ctx.Logger.Error(stderrors.New(message), "") + message := fmt.Sprintf("The VM %s was moved to the Tower recycle bin by users, so treat it as deleted.", machineCtx.ElfMachine.Status.VMRef) + machineCtx.ElfMachine.Status.FailureReason = capierrors.MachineStatusErrorPtr(capeerrors.MovedToRecycleBinError) + machineCtx.ElfMachine.Status.FailureMessage = pointer.String(message) + machineCtx.ElfMachine.SetVM("") + log.Error(stderrors.New(message), "") return vm, false, nil } // Before the virtual machine is powered on, put the virtual machine into the specified placement group. - if ok, err := r.joinPlacementGroup(ctx, vm); err != nil || !ok { + if ok, err := r.joinPlacementGroup(ctx, machineCtx, vm); err != nil || !ok { return vm, false, err } - if ok, err := r.reconcileGPUDevices(ctx, vm); err != nil || !ok { + if ok, err := r.reconcileGPUDevices(ctx, machineCtx, vm); err != nil || !ok { return vm, false, err } - if ok, err := r.reconcileVMStatus(ctx, vm); err != nil || !ok { + if ok, err := r.reconcileVMStatus(ctx, machineCtx, vm); err != nil || !ok { return vm, false, err } return vm, true, nil } -func (r *ElfMachineReconciler) getVM(ctx *context.MachineContext) (*models.VM, error) { - vm, err := ctx.VMService.Get(ctx.ElfMachine.Status.VMRef) +func (r *ElfMachineReconciler) getVM(ctx goctx.Context, machineCtx *context.MachineContext) (*models.VM, error) { + log := ctrl.LoggerFrom(ctx) + + vm, err := machineCtx.VMService.Get(machineCtx.ElfMachine.Status.VMRef) if err == nil { return vm, nil } @@ -675,39 +677,39 @@ func (r *ElfMachineReconciler) getVM(ctx *context.MachineContext) (*models.VM, e return nil, err } - if typesutil.IsUUID(ctx.ElfMachine.Status.VMRef) { - vmDisconnectionTimestamp := ctx.ElfMachine.GetVMDisconnectionTimestamp() + if typesutil.IsUUID(machineCtx.ElfMachine.Status.VMRef) { + vmDisconnectionTimestamp := machineCtx.ElfMachine.GetVMDisconnectionTimestamp() if vmDisconnectionTimestamp == nil { now := metav1.Now() vmDisconnectionTimestamp = &now - ctx.ElfMachine.SetVMDisconnectionTimestamp(vmDisconnectionTimestamp) + machineCtx.ElfMachine.SetVMDisconnectionTimestamp(vmDisconnectionTimestamp) } // The machine may only be temporarily disconnected before timeout if !vmDisconnectionTimestamp.Add(infrav1.VMDisconnectionTimeout).Before(time.Now()) { - ctx.Logger.Error(err, "the VM has been disconnected, will try to reconnect", "vmRef", ctx.ElfMachine.Status.VMRef, "disconnectionTimestamp", vmDisconnectionTimestamp.Format(time.RFC3339)) + log.Error(err, "the VM has been disconnected, will try to reconnect", "vmRef", machineCtx.ElfMachine.Status.VMRef, "disconnectionTimestamp", vmDisconnectionTimestamp.Format(time.RFC3339)) return nil, err } // If the machine was not found by UUID and timed out it means that it got deleted directly - ctx.ElfMachine.Status.FailureReason = capierrors.MachineStatusErrorPtr(capeerrors.RemovedFromInfrastructureError) - ctx.ElfMachine.Status.FailureMessage = pointer.String(fmt.Sprintf("Unable to find VM by UUID %s. The VM was removed from infrastructure.", ctx.ElfMachine.Status.VMRef)) - ctx.Logger.Error(err, fmt.Sprintf("failed to get VM by UUID %s in %s", ctx.ElfMachine.Status.VMRef, infrav1.VMDisconnectionTimeout.String()), "message", ctx.ElfMachine.Status.FailureMessage) + machineCtx.ElfMachine.Status.FailureReason = capierrors.MachineStatusErrorPtr(capeerrors.RemovedFromInfrastructureError) + machineCtx.ElfMachine.Status.FailureMessage = pointer.String(fmt.Sprintf("Unable to find VM by UUID %s. The VM was removed from infrastructure.", machineCtx.ElfMachine.Status.VMRef)) + log.Error(err, fmt.Sprintf("failed to get VM by UUID %s in %s", machineCtx.ElfMachine.Status.VMRef, infrav1.VMDisconnectionTimeout.String()), "message", machineCtx.ElfMachine.Status.FailureMessage) return nil, err } // Create VM failed - if _, err := r.reconcileVMTask(ctx, nil); err != nil { + if _, err := r.reconcileVMTask(ctx, machineCtx, nil); err != nil { return nil, err } // If Tower fails to create VM, the temporary DB record for this VM will be deleted. - ctx.ElfMachine.SetVM("") + machineCtx.ElfMachine.SetVM("") - return nil, errors.Wrapf(err, "failed to create VM for ElfMachine %s/%s", ctx.ElfMachine.Namespace, ctx.ElfMachine.Name) + return nil, errors.Wrapf(err, "failed to create VM for ElfMachine %s", klog.KObj(machineCtx.ElfMachine)) } // reconcileVMStatus ensures the VM is in Running status and configured as expected. @@ -718,114 +720,122 @@ func (r *ElfMachineReconciler) getVM(ctx *context.MachineContext) (*models.VM, e // The return value: // 1. true means that the VM is in Running status. // 2. false and error is nil means the VM is not in Running status. -func (r *ElfMachineReconciler) reconcileVMStatus(ctx *context.MachineContext, vm *models.VM) (bool, error) { +func (r *ElfMachineReconciler) reconcileVMStatus(ctx goctx.Context, machineCtx *context.MachineContext, vm *models.VM) (bool, error) { + log := ctrl.LoggerFrom(ctx) + if vm.Status == nil { - ctx.Logger.Info("The status of VM is an unexpected value nil", "vmRef", ctx.ElfMachine.Status.VMRef) + log.Info("The status of VM is an unexpected value nil", "vmRef", machineCtx.ElfMachine.Status.VMRef) return false, nil } - updatedVMRestrictedFields := service.GetUpdatedVMRestrictedFields(vm, ctx.ElfMachine) + updatedVMRestrictedFields := service.GetUpdatedVMRestrictedFields(vm, machineCtx.ElfMachine) switch *vm.Status { case models.VMStatusRUNNING: if len(updatedVMRestrictedFields) > 0 && towerresources.IsAllowCustomVMConfig() { // If VM shutdown timed out, simply power off the VM. - if service.IsShutDownTimeout(conditions.GetMessage(ctx.ElfMachine, infrav1.VMProvisionedCondition)) { - ctx.Logger.Info("The VM configuration has been modified, power off the VM first and then restore the VM configuration", "vmRef", ctx.ElfMachine.Status.VMRef, "updatedVMRestrictedFields", updatedVMRestrictedFields) + if service.IsShutDownTimeout(conditions.GetMessage(machineCtx.ElfMachine, infrav1.VMProvisionedCondition)) { + log.Info("The VM configuration has been modified, power off the VM first and then restore the VM configuration", "vmRef", machineCtx.ElfMachine.Status.VMRef, "updatedVMRestrictedFields", updatedVMRestrictedFields) - return false, r.powerOffVM(ctx) + return false, r.powerOffVM(ctx, machineCtx) } else { - ctx.Logger.Info("The VM configuration has been modified, shut down the VM first and then restore the VM configuration", "vmRef", ctx.ElfMachine.Status.VMRef, "updatedVMRestrictedFields", updatedVMRestrictedFields) + log.Info("The VM configuration has been modified, shut down the VM first and then restore the VM configuration", "vmRef", machineCtx.ElfMachine.Status.VMRef, "updatedVMRestrictedFields", updatedVMRestrictedFields) - return false, r.shutDownVM(ctx) + return false, r.shutDownVM(ctx, machineCtx) } } return true, nil case models.VMStatusSTOPPED: if len(updatedVMRestrictedFields) > 0 && towerresources.IsAllowCustomVMConfig() { - ctx.Logger.Info("The VM configuration has been modified, and the VM is stopped, just restore the VM configuration to expected values", "vmRef", ctx.ElfMachine.Status.VMRef, "updatedVMRestrictedFields", updatedVMRestrictedFields) + log.Info("The VM configuration has been modified, and the VM is stopped, just restore the VM configuration to expected values", "vmRef", machineCtx.ElfMachine.Status.VMRef, "updatedVMRestrictedFields", updatedVMRestrictedFields) - return false, r.updateVM(ctx, vm) + return false, r.updateVM(ctx, machineCtx, vm) } - return false, r.powerOnVM(ctx, vm) + return false, r.powerOnVM(ctx, machineCtx, vm) case models.VMStatusSUSPENDED: // In some abnormal conditions, the VM will be in a suspended state, // e.g. wrong settings in VM or an exception occurred in the Guest OS. // try to 'Power off VM -> Power on VM' resumes the VM from a suspended state. // See issue http://jira.smartx.com/browse/SKS-1351 for details. - return false, r.powerOffVM(ctx) + return false, r.powerOffVM(ctx, machineCtx) default: - ctx.Logger.Info(fmt.Sprintf("The VM is in an unexpected status %s", string(*vm.Status)), "vmRef", ctx.ElfMachine.Status.VMRef) + log.Info(fmt.Sprintf("The VM is in an unexpected status %s", string(*vm.Status)), "vmRef", machineCtx.ElfMachine.Status.VMRef) return false, nil } } -func (r *ElfMachineReconciler) shutDownVM(ctx *context.MachineContext) error { - if ok := acquireTicketForUpdatingVM(ctx.ElfMachine.Name); !ok { - ctx.Logger.V(1).Info("The VM operation reaches rate limit, skip shut down VM") +func (r *ElfMachineReconciler) shutDownVM(ctx goctx.Context, machineCtx *context.MachineContext) error { + log := ctrl.LoggerFrom(ctx) + + if ok := acquireTicketForUpdatingVM(machineCtx.ElfMachine.Name); !ok { + log.V(1).Info("The VM operation reaches rate limit, skip shut down VM") return nil } - task, err := ctx.VMService.ShutDown(ctx.ElfMachine.Status.VMRef) + task, err := machineCtx.VMService.ShutDown(machineCtx.ElfMachine.Status.VMRef) if err != nil { - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.ShuttingDownFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.ShuttingDownFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) return errors.Wrapf(err, "failed to trigger shut down for VM %s", ctx) } - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.ShuttingDownReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.ShuttingDownReason, clusterv1.ConditionSeverityInfo, "") - ctx.ElfMachine.SetTask(*task.ID) + machineCtx.ElfMachine.SetTask(*task.ID) - ctx.Logger.Info("Waiting for VM to be shut down", "vmRef", ctx.ElfMachine.Status.VMRef, "taskRef", ctx.ElfMachine.Status.TaskRef) + log.Info("Waiting for VM to be shut down", "vmRef", machineCtx.ElfMachine.Status.VMRef, "taskRef", machineCtx.ElfMachine.Status.TaskRef) return nil } -func (r *ElfMachineReconciler) powerOffVM(ctx *context.MachineContext) error { - if ok := acquireTicketForUpdatingVM(ctx.ElfMachine.Name); !ok { - ctx.Logger.V(1).Info(fmt.Sprintf("The VM operation reaches rate limit, skip powering off VM %s", ctx.ElfMachine.Status.VMRef)) +func (r *ElfMachineReconciler) powerOffVM(ctx goctx.Context, machineCtx *context.MachineContext) error { + log := ctrl.LoggerFrom(ctx) + + if ok := acquireTicketForUpdatingVM(machineCtx.ElfMachine.Name); !ok { + log.V(1).Info(fmt.Sprintf("The VM operation reaches rate limit, skip powering off VM %s", machineCtx.ElfMachine.Status.VMRef)) return nil } - task, err := ctx.VMService.PowerOff(ctx.ElfMachine.Status.VMRef) + task, err := machineCtx.VMService.PowerOff(machineCtx.ElfMachine.Status.VMRef) if err != nil { - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.PoweringOffFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.PoweringOffFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) - return errors.Wrapf(err, "failed to trigger powering off for VM %s", ctx) + return errors.Wrapf(err, "failed to trigger powering off for VM %s", machineCtx.ElfMachine.Status.VMRef) } - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.PowerOffReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.PowerOffReason, clusterv1.ConditionSeverityInfo, "") - ctx.ElfMachine.SetTask(*task.ID) + machineCtx.ElfMachine.SetTask(*task.ID) - ctx.Logger.Info("Waiting for VM to be powered off", "vmRef", ctx.ElfMachine.Status.VMRef, "taskRef", ctx.ElfMachine.Status.TaskRef) + log.Info("Waiting for VM to be powered off", "vmRef", machineCtx.ElfMachine.Status.VMRef, "taskRef", machineCtx.ElfMachine.Status.TaskRef) return nil } -func (r *ElfMachineReconciler) powerOnVM(ctx *context.MachineContext, vm *models.VM) error { - if ok, message, err := isELFScheduleVMErrorRecorded(ctx); err != nil { +func (r *ElfMachineReconciler) powerOnVM(ctx goctx.Context, machineCtx *context.MachineContext, vm *models.VM) error { + log := ctrl.LoggerFrom(ctx) + + if ok, message, err := isELFScheduleVMErrorRecorded(ctx, machineCtx, r.Client); err != nil { return err } else if ok { - if canRetry, err := canRetryVMOperation(ctx); err != nil { + if canRetry, err := canRetryVMOperation(ctx, machineCtx, r.Client); err != nil { return err } else if !canRetry { - ctx.Logger.V(1).Info(fmt.Sprintf("%s, skip powering on VM %s", message, ctx.ElfMachine.Status.VMRef)) + log.V(1).Info(fmt.Sprintf("%s, skip powering on VM %s", message, machineCtx.ElfMachine.Status.VMRef)) return nil } - ctx.Logger.V(1).Info(fmt.Sprintf("%s and the retry silence period passes, will try to power on the VM again", message)) + log.V(1).Info(fmt.Sprintf("%s and the retry silence period passes, will try to power on the VM again", message)) } - if ok := acquireTicketForUpdatingVM(ctx.ElfMachine.Name); !ok { - ctx.Logger.V(1).Info(fmt.Sprintf("The VM operation reaches rate limit, skip power on VM %s", ctx.ElfMachine.Status.VMRef)) + if ok := acquireTicketForUpdatingVM(machineCtx.ElfMachine.Name); !ok { + log.V(1).Info(fmt.Sprintf("The VM operation reaches rate limit, skip power on VM %s", machineCtx.ElfMachine.Status.VMRef)) return nil } @@ -833,45 +843,47 @@ func (r *ElfMachineReconciler) powerOnVM(ctx *context.MachineContext, vm *models hostID := "" // Starting a virtual machine with GPU/vGPU does not support automatic scheduling, // and need to specify the host where the GPU/vGPU is allocated. - if ctx.ElfMachine.RequiresGPUDevices() { + if machineCtx.ElfMachine.RequiresGPUDevices() { hostID = *vm.Host.ID } - task, err := ctx.VMService.PowerOn(ctx.ElfMachine.Status.VMRef, hostID) + task, err := machineCtx.VMService.PowerOn(machineCtx.ElfMachine.Status.VMRef, hostID) if err != nil { - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.PoweringOnFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.PoweringOnFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) - return errors.Wrapf(err, "failed to trigger power on for VM %s", ctx) + return errors.Wrapf(err, "failed to trigger power on for VM %s", machineCtx.ElfMachine.Status.VMRef) } - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.PoweringOnReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.PoweringOnReason, clusterv1.ConditionSeverityInfo, "") - ctx.ElfMachine.SetTask(*task.ID) + machineCtx.ElfMachine.SetTask(*task.ID) - ctx.Logger.Info("Waiting for VM to be powered on", "vmRef", ctx.ElfMachine.Status.VMRef, "taskRef", ctx.ElfMachine.Status.TaskRef) + log.Info("Waiting for VM to be powered on", "vmRef", machineCtx.ElfMachine.Status.VMRef, "taskRef", machineCtx.ElfMachine.Status.TaskRef) return nil } -func (r *ElfMachineReconciler) updateVM(ctx *context.MachineContext, vm *models.VM) error { - if ok := acquireTicketForUpdatingVM(ctx.ElfMachine.Name); !ok { - ctx.Logger.V(1).Info(fmt.Sprintf("The VM operation reaches rate limit, skip updating VM %s", ctx.ElfMachine.Status.VMRef)) +func (r *ElfMachineReconciler) updateVM(ctx goctx.Context, machineCtx *context.MachineContext, vm *models.VM) error { + log := ctrl.LoggerFrom(ctx) + + if ok := acquireTicketForUpdatingVM(machineCtx.ElfMachine.Name); !ok { + log.V(1).Info(fmt.Sprintf("The VM operation reaches rate limit, skip updating VM %s", machineCtx.ElfMachine.Status.VMRef)) return nil } - withTaskVM, err := ctx.VMService.UpdateVM(vm, ctx.ElfMachine) + withTaskVM, err := machineCtx.VMService.UpdateVM(vm, machineCtx.ElfMachine) if err != nil { - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.UpdatingFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.UpdatingFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) return errors.Wrapf(err, "failed to trigger update for VM %s", ctx) } - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.UpdatingReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.UpdatingReason, clusterv1.ConditionSeverityInfo, "") - ctx.ElfMachine.SetTask(*withTaskVM.TaskID) + machineCtx.ElfMachine.SetTask(*withTaskVM.TaskID) - ctx.Logger.Info("Waiting for the VM to be updated", "vmRef", ctx.ElfMachine.Status.VMRef, "taskRef", ctx.ElfMachine.Status.TaskRef) + log.Info("Waiting for the VM to be updated", "vmRef", machineCtx.ElfMachine.Status.VMRef, "taskRef", machineCtx.ElfMachine.Status.TaskRef) return nil } @@ -881,18 +893,20 @@ func (r *ElfMachineReconciler) updateVM(ctx *context.MachineContext, vm *models. // The return value: // 1. true indicates that the virtual machine task has completed (success or failure). // 2. false indicates that the virtual machine task has not been completed yet. -func (r *ElfMachineReconciler) reconcileVMTask(ctx *context.MachineContext, vm *models.VM) (taskDone bool, reterr error) { - taskRef := ctx.ElfMachine.Status.TaskRef - vmRef := ctx.ElfMachine.Status.VMRef +func (r *ElfMachineReconciler) reconcileVMTask(ctx goctx.Context, machineCtx *context.MachineContext, vm *models.VM) (taskDone bool, reterr error) { + log := ctrl.LoggerFrom(ctx) + + taskRef := machineCtx.ElfMachine.Status.TaskRef + vmRef := machineCtx.ElfMachine.Status.VMRef var err error var task *models.Task - if ctx.ElfMachine.HasTask() { - task, err = ctx.VMService.GetTask(taskRef) + if machineCtx.ElfMachine.HasTask() { + task, err = machineCtx.VMService.GetTask(taskRef) if err != nil { if service.IsTaskNotFound(err) { - ctx.ElfMachine.SetTask("") - ctx.Logger.Error(err, fmt.Sprintf("task %s of VM %s is missing", taskRef, vmRef)) + machineCtx.ElfMachine.SetTask("") + log.Error(err, fmt.Sprintf("task %s of VM %s is missing", taskRef, vmRef)) } else { return false, errors.Wrapf(err, "failed to get task %s for VM %s", taskRef, vmRef) } @@ -902,7 +916,7 @@ func (r *ElfMachineReconciler) reconcileVMTask(ctx *context.MachineContext, vm * if task == nil { // VM is performing an operation if vm != nil && vm.EntityAsyncStatus != nil { - ctx.Logger.Info("Waiting for VM task done", "vmRef", vmRef, "taskRef", taskRef) + log.Info("Waiting for VM task done", "vmRef", vmRef, "taskRef", taskRef) return false, nil } @@ -912,7 +926,7 @@ func (r *ElfMachineReconciler) reconcileVMTask(ctx *context.MachineContext, vm * defer func() { if taskDone { - ctx.ElfMachine.SetTask("") + machineCtx.ElfMachine.SetTask("") } // The task is completed but entityAsyncStatus may not be equal to nil. @@ -925,28 +939,28 @@ func (r *ElfMachineReconciler) reconcileVMTask(ctx *context.MachineContext, vm * switch *task.Status { case models.TaskStatusFAILED: - if err := r.reconcileVMFailedTask(ctx, task, taskRef, vmRef); err != nil { + if err := r.reconcileVMFailedTask(ctx, machineCtx, task, taskRef, vmRef); err != nil { return true, err } case models.TaskStatusSUCCESSED: - ctx.Logger.Info("VM task succeeded", "vmRef", vmRef, "taskRef", taskRef, "taskDescription", service.GetTowerString(task.Description)) + log.Info("VM task succeeded", "vmRef", vmRef, "taskRef", taskRef, "taskDescription", service.GetTowerString(task.Description)) - if ctx.ElfMachine.RequiresGPUDevices() && + if machineCtx.ElfMachine.RequiresGPUDevices() && (service.IsCloneVMTask(task) || service.IsPowerOnVMTask(task) || service.IsUpdateVMTask(task)) { - unlockGPUDevicesLockedByVM(ctx.ElfCluster.Spec.Cluster, ctx.ElfMachine.Name) + unlockGPUDevicesLockedByVM(machineCtx.ElfCluster.Spec.Cluster, machineCtx.ElfMachine.Name) } if service.IsCloneVMTask(task) || service.IsPowerOnVMTask(task) { - releaseTicketForCreateVM(ctx.ElfMachine.Name) - recordElfClusterStorageInsufficient(ctx, false) - recordElfClusterMemoryInsufficient(ctx, false) + releaseTicketForCreateVM(machineCtx.ElfMachine.Name) + recordElfClusterStorageInsufficient(machineCtx, false) + recordElfClusterMemoryInsufficient(machineCtx, false) - if err := recordPlacementGroupPolicyNotSatisfied(ctx, false); err != nil { + if err := recordPlacementGroupPolicyNotSatisfied(ctx, machineCtx, r.Client, false); err != nil { return true, err } } default: - ctx.Logger.Info("Waiting for VM task done", "vmRef", vmRef, "taskRef", taskRef, "taskStatus", service.GetTowerTaskStatus(task.Status), "taskDescription", service.GetTowerString(task.Description)) + log.Info("Waiting for VM task done", "vmRef", vmRef, "taskRef", taskRef, "taskStatus", service.GetTowerTaskStatus(task.Status), "taskDescription", service.GetTowerString(task.Description)) } if *task.Status == models.TaskStatusFAILED || *task.Status == models.TaskStatusSUCCESSED { @@ -957,54 +971,56 @@ func (r *ElfMachineReconciler) reconcileVMTask(ctx *context.MachineContext, vm * } // reconcileVMFailedTask handles failed virtual machine tasks. -func (r *ElfMachineReconciler) reconcileVMFailedTask(ctx *context.MachineContext, task *models.Task, taskRef, vmRef string) error { +func (r *ElfMachineReconciler) reconcileVMFailedTask(ctx goctx.Context, machineCtx *context.MachineContext, task *models.Task, taskRef, vmRef string) error { + log := ctrl.LoggerFrom(ctx) + errorMessage := service.GetTowerString(task.ErrorMessage) if service.IsGPUAssignFailed(errorMessage) { errorMessage = service.ParseGPUAssignFailed(errorMessage) } - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.TaskFailureReason, clusterv1.ConditionSeverityInfo, errorMessage) + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.TaskFailureReason, clusterv1.ConditionSeverityInfo, errorMessage) if service.IsCloudInitConfigError(errorMessage) { - ctx.ElfMachine.Status.FailureReason = capierrors.MachineStatusErrorPtr(capeerrors.CloudInitConfigError) - ctx.ElfMachine.Status.FailureMessage = pointer.String(fmt.Sprintf("VM cloud-init config error: %s", service.FormatCloudInitError(errorMessage))) + machineCtx.ElfMachine.Status.FailureReason = capierrors.MachineStatusErrorPtr(capeerrors.CloudInitConfigError) + machineCtx.ElfMachine.Status.FailureMessage = pointer.String(fmt.Sprintf("VM cloud-init config error: %s", service.FormatCloudInitError(errorMessage))) } - ctx.Logger.Error(errors.New("VM task failed"), "", "vmRef", vmRef, "taskRef", taskRef, "taskErrorMessage", errorMessage, "taskErrorCode", service.GetTowerString(task.ErrorCode), "taskDescription", service.GetTowerString(task.Description)) + log.Error(errors.New("VM task failed"), "", "vmRef", vmRef, "taskRef", taskRef, "taskErrorMessage", errorMessage, "taskErrorCode", service.GetTowerString(task.ErrorCode), "taskDescription", service.GetTowerString(task.Description)) switch { case service.IsCloneVMTask(task): - releaseTicketForCreateVM(ctx.ElfMachine.Name) + releaseTicketForCreateVM(machineCtx.ElfMachine.Name) - if ctx.ElfMachine.RequiresGPUDevices() { - unlockGPUDevicesLockedByVM(ctx.ElfCluster.Spec.Cluster, ctx.ElfMachine.Name) + if machineCtx.ElfMachine.RequiresGPUDevices() { + unlockGPUDevicesLockedByVM(machineCtx.ElfCluster.Spec.Cluster, machineCtx.ElfMachine.Name) } case service.IsPowerOnVMTask(task) || service.IsUpdateVMTask(task) || service.IsVMColdMigrationTask(task): - if ctx.ElfMachine.RequiresGPUDevices() { - unlockGPUDevicesLockedByVM(ctx.ElfCluster.Spec.Cluster, ctx.ElfMachine.Name) + if machineCtx.ElfMachine.RequiresGPUDevices() { + unlockGPUDevicesLockedByVM(machineCtx.ElfCluster.Spec.Cluster, machineCtx.ElfMachine.Name) } } switch { case service.IsVMDuplicateError(errorMessage): - setVMDuplicate(ctx.ElfMachine.Name) + setVMDuplicate(machineCtx.ElfMachine.Name) case service.IsStorageInsufficientError(errorMessage): - recordElfClusterStorageInsufficient(ctx, true) - message := fmt.Sprintf("Insufficient storage detected for the ELF cluster %s", ctx.ElfCluster.Spec.Cluster) - ctx.Logger.Info(message) + recordElfClusterStorageInsufficient(machineCtx, true) + message := fmt.Sprintf("Insufficient storage detected for the ELF cluster %s", machineCtx.ElfCluster.Spec.Cluster) + log.Info(message) return errors.New(message) case service.IsMemoryInsufficientError(errorMessage): - recordElfClusterMemoryInsufficient(ctx, true) - message := fmt.Sprintf("Insufficient memory detected for the ELF cluster %s", ctx.ElfCluster.Spec.Cluster) - ctx.Logger.Info(message) + recordElfClusterMemoryInsufficient(machineCtx, true) + message := fmt.Sprintf("Insufficient memory detected for the ELF cluster %s", machineCtx.ElfCluster.Spec.Cluster) + log.Info(message) return errors.New(message) case service.IsPlacementGroupError(errorMessage): - if err := recordPlacementGroupPolicyNotSatisfied(ctx, true); err != nil { + if err := recordPlacementGroupPolicyNotSatisfied(ctx, machineCtx, r.Client, true); err != nil { return err } message := "The placement group policy can not be satisfied" - ctx.Logger.Info(message) + log.Info(message) return errors.New(message) } @@ -1012,67 +1028,71 @@ func (r *ElfMachineReconciler) reconcileVMFailedTask(ctx *context.MachineContext return nil } -func (r *ElfMachineReconciler) reconcileProviderID(ctx *context.MachineContext, vm *models.VM) error { +func (r *ElfMachineReconciler) reconcileProviderID(ctx goctx.Context, machineCtx *context.MachineContext, vm *models.VM) error { + log := ctrl.LoggerFrom(ctx) + providerID := machineutil.ConvertUUIDToProviderID(*vm.LocalID) if providerID == "" { return errors.Errorf("invalid VM UUID %s from %s %s/%s for %s", *vm.LocalID, - ctx.ElfCluster.GroupVersionKind(), - ctx.ElfCluster.GetNamespace(), - ctx.ElfCluster.GetName(), + machineCtx.ElfCluster.GroupVersionKind(), + machineCtx.ElfCluster.GetNamespace(), + machineCtx.ElfCluster.GetName(), ctx) } - if ctx.ElfMachine.Spec.ProviderID == nil || *ctx.ElfMachine.Spec.ProviderID != providerID { - ctx.ElfMachine.Spec.ProviderID = pointer.String(providerID) + if machineCtx.ElfMachine.Spec.ProviderID == nil || *machineCtx.ElfMachine.Spec.ProviderID != providerID { + machineCtx.ElfMachine.Spec.ProviderID = pointer.String(providerID) - ctx.Logger.Info("updated providerID", "providerID", providerID) + log.Info("updated providerID", "providerID", providerID) } return nil } // reconcileNode sets providerID and host server labels for node. -func (r *ElfMachineReconciler) reconcileNode(ctx *context.MachineContext, vm *models.VM) (bool, error) { +func (r *ElfMachineReconciler) reconcileNode(ctx goctx.Context, machineCtx *context.MachineContext, vm *models.VM) (bool, error) { + log := ctrl.LoggerFrom(ctx) + providerID := machineutil.ConvertUUIDToProviderID(*vm.LocalID) if providerID == "" { return false, errors.Errorf("invalid VM UUID %s from %s %s/%s for %s", *vm.LocalID, - ctx.ElfCluster.GroupVersionKind(), - ctx.ElfCluster.GetNamespace(), - ctx.ElfCluster.GetName(), + machineCtx.ElfCluster.GroupVersionKind(), + machineCtx.ElfCluster.GetNamespace(), + machineCtx.ElfCluster.GetName(), ctx) } - kubeClient, err := util.NewKubeClient(ctx, ctx.Client, ctx.Cluster) + kubeClient, err := util.NewKubeClient(ctx, r.Client, machineCtx.Cluster) if err != nil { - return false, errors.Wrapf(err, "failed to get client for Cluster %s/%s", ctx.Cluster.Namespace, ctx.Cluster.Name) + return false, errors.Wrapf(err, "failed to get client for Cluster %s", klog.KObj(machineCtx.Cluster)) } - node, err := kubeClient.CoreV1().Nodes().Get(ctx, ctx.ElfMachine.Name, metav1.GetOptions{}) + node, err := kubeClient.CoreV1().Nodes().Get(ctx, machineCtx.ElfMachine.Name, metav1.GetOptions{}) if err != nil { - return false, errors.Wrapf(err, "failed to get node %s for setting providerID and labels", ctx.ElfMachine.Name) + return false, errors.Wrapf(err, "failed to get node %s for setting providerID and labels", machineCtx.ElfMachine.Name) } nodeHostID := labelsutil.GetHostServerIDLabel(node) nodeHostName := labelsutil.GetHostServerNameLabel(node) towerVMID := labelsutil.GetTowerVMIDLabel(node) - if node.Spec.ProviderID != "" && nodeHostID == ctx.ElfMachine.Status.HostServerRef && - nodeHostName == ctx.ElfMachine.Status.HostServerName && towerVMID == *vm.ID { + if node.Spec.ProviderID != "" && nodeHostID == machineCtx.ElfMachine.Status.HostServerRef && + nodeHostName == machineCtx.ElfMachine.Status.HostServerName && towerVMID == *vm.ID { return true, nil } - nodeGroupName := machineutil.GetNodeGroupName(ctx.Machine) + nodeGroupName := machineutil.GetNodeGroupName(machineCtx.Machine) labels := map[string]string{ - infrav1.HostServerIDLabel: ctx.ElfMachine.Status.HostServerRef, - infrav1.HostServerNameLabel: ctx.ElfMachine.Status.HostServerName, + infrav1.HostServerIDLabel: machineCtx.ElfMachine.Status.HostServerRef, + infrav1.HostServerNameLabel: machineCtx.ElfMachine.Status.HostServerName, infrav1.TowerVMIDLabel: *vm.ID, infrav1.NodeGroupLabel: nodeGroupName, } - if len(ctx.ElfMachine.Spec.GPUDevices) > 0 { - labels[labelsutil.ClusterAutoscalerCAPIGPULabel] = labelsutil.ConvertToLabelValue(ctx.ElfMachine.Spec.GPUDevices[0].Model) - } else if len(ctx.ElfMachine.Spec.VGPUDevices) > 0 { - labels[labelsutil.ClusterAutoscalerCAPIGPULabel] = labelsutil.ConvertToLabelValue(ctx.ElfMachine.Spec.VGPUDevices[0].Type) + if len(machineCtx.ElfMachine.Spec.GPUDevices) > 0 { + labels[labelsutil.ClusterAutoscalerCAPIGPULabel] = labelsutil.ConvertToLabelValue(machineCtx.ElfMachine.Spec.GPUDevices[0].Model) + } else if len(machineCtx.ElfMachine.Spec.VGPUDevices) > 0 { + labels[labelsutil.ClusterAutoscalerCAPIGPULabel] = labelsutil.ConvertToLabelValue(machineCtx.ElfMachine.Spec.VGPUDevices[0].Type) } payloads := map[string]interface{}{ @@ -1097,9 +1117,9 @@ func (r *ElfMachineReconciler) reconcileNode(ctx *context.MachineContext, vm *mo return false, err } - ctx.Logger.Info("Setting node providerID and labels succeeded", - "cluster", ctx.Cluster.Name, "node", node.Name, - "providerID", providerID, "hostID", ctx.ElfMachine.Status.HostServerRef, "hostName", ctx.ElfMachine.Status.HostServerName) + log.Info("Setting node providerID and labels succeeded", + "cluster", machineCtx.Cluster.Name, "node", node.Name, + "providerID", providerID, "hostID", machineCtx.ElfMachine.Status.HostServerRef, "hostName", machineCtx.ElfMachine.Status.HostServerName) return true, nil } @@ -1110,22 +1130,24 @@ func (r *ElfMachineReconciler) reconcileNode(ctx *context.MachineContext, vm *mo // // In the scenario with many virtual machines, it could be slow for SMTX OS to synchronize VM information via vmtools. // So if not enough IPs can be obtained from Tower API, try to get its IP address from the corresponding K8s Node. -func (r *ElfMachineReconciler) reconcileNetwork(ctx *context.MachineContext, vm *models.VM) (ret bool, reterr error) { +func (r *ElfMachineReconciler) reconcileNetwork(ctx goctx.Context, machineCtx *context.MachineContext, vm *models.VM) (ret bool, reterr error) { + log := ctrl.LoggerFrom(ctx) + defer func() { if reterr != nil { - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForNetworkAddressesReason, clusterv1.ConditionSeverityWarning, reterr.Error()) + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForNetworkAddressesReason, clusterv1.ConditionSeverityWarning, reterr.Error()) } else if !ret { - ctx.Logger.V(1).Info("VM network is not ready yet", "nicStatus", ctx.ElfMachine.Status.Network) - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForNetworkAddressesReason, clusterv1.ConditionSeverityInfo, "") + log.V(1).Info("VM network is not ready yet", "nicStatus", machineCtx.ElfMachine.Status.Network) + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForNetworkAddressesReason, clusterv1.ConditionSeverityInfo, "") } }() - ctx.ElfMachine.Status.Network = []infrav1.NetworkStatus{} - ctx.ElfMachine.Status.Addresses = []clusterv1.MachineAddress{} + machineCtx.ElfMachine.Status.Network = []infrav1.NetworkStatus{} + machineCtx.ElfMachine.Status.Addresses = []clusterv1.MachineAddress{} // A Map of IP to MachineAddress ipToMachineAddressMap := make(map[string]clusterv1.MachineAddress) - nics, err := ctx.VMService.GetVMNics(*vm.ID) + nics, err := machineCtx.VMService.GetVMNics(*vm.ID) if err != nil { return false, err } @@ -1135,7 +1157,7 @@ func (r *ElfMachineReconciler) reconcileNetwork(ctx *context.MachineContext, vm ip := service.GetTowerString(nic.IPAddress) // Add to Status.Network even if IP is empty. - ctx.ElfMachine.Status.Network = append(ctx.ElfMachine.Status.Network, infrav1.NetworkStatus{ + machineCtx.ElfMachine.Status.Network = append(machineCtx.ElfMachine.Status.Network, infrav1.NetworkStatus{ IPAddrs: []string{ip}, MACAddr: service.GetTowerString(nic.MacAddress), }) @@ -1150,26 +1172,26 @@ func (r *ElfMachineReconciler) reconcileNetwork(ctx *context.MachineContext, vm } } - networkDevicesWithIP := ctx.ElfMachine.GetNetworkDevicesRequiringIP() - networkDevicesWithDHCP := ctx.ElfMachine.GetNetworkDevicesRequiringDHCP() + networkDevicesWithIP := machineCtx.ElfMachine.GetNetworkDevicesRequiringIP() + networkDevicesWithDHCP := machineCtx.ElfMachine.GetNetworkDevicesRequiringDHCP() if len(ipToMachineAddressMap) < len(networkDevicesWithIP) { // Try to get VM NIC IP address from the K8s Node. - nodeIP, err := r.getK8sNodeIP(ctx, ctx.ElfMachine.Name) + nodeIP, err := r.getK8sNodeIP(ctx, machineCtx, machineCtx.ElfMachine.Name) if err == nil && nodeIP != "" { ipToMachineAddressMap[nodeIP] = clusterv1.MachineAddress{ Address: nodeIP, Type: clusterv1.MachineInternalIP, } } else if err != nil { - ctx.Logger.Error(err, "failed to get VM NIC IP address from the K8s node", "Node", ctx.ElfMachine.Name) + log.Error(err, "failed to get VM NIC IP address from the K8s node", "Node", machineCtx.ElfMachine.Name) } } if len(networkDevicesWithDHCP) > 0 { dhcpIPNum := 0 for _, ip := range ipToMachineAddressMap { - if !ctx.ElfMachine.IsMachineStaticIP(ip.Address) { + if !machineCtx.ElfMachine.IsMachineStaticIP(ip.Address) { dhcpIPNum++ } } @@ -1184,21 +1206,21 @@ func (r *ElfMachineReconciler) reconcileNetwork(ctx *context.MachineContext, vm } for _, machineAddress := range ipToMachineAddressMap { - ctx.ElfMachine.Status.Addresses = append(ctx.ElfMachine.Status.Addresses, machineAddress) + machineCtx.ElfMachine.Status.Addresses = append(machineCtx.ElfMachine.Status.Addresses, machineAddress) } return true, nil } -func (r *ElfMachineReconciler) getBootstrapData(ctx *context.MachineContext) (string, error) { +func (r *ElfMachineReconciler) getBootstrapData(ctx goctx.Context, machineCtx *context.MachineContext) (string, error) { secret := &corev1.Secret{} secretKey := apitypes.NamespacedName{ - Namespace: ctx.Machine.Namespace, - Name: *ctx.Machine.Spec.Bootstrap.DataSecretName, + Namespace: machineCtx.Machine.Namespace, + Name: *machineCtx.Machine.Spec.Bootstrap.DataSecretName, } - if err := ctx.Client.Get(ctx, secretKey, secret); err != nil { - return "", errors.Wrapf(err, "failed to retrieve bootstrap data secret for %s %s", secretKey.Namespace, secretKey.Name) + if err := r.Client.Get(ctx, secretKey, secret); err != nil { + return "", errors.Wrapf(err, "failed to get bootstrap data secret %s", secretKey) } value, ok := secret.Data["value"] @@ -1209,12 +1231,14 @@ func (r *ElfMachineReconciler) getBootstrapData(ctx *context.MachineContext) (st return string(value), nil } -func (r *ElfMachineReconciler) reconcileLabels(ctx *context.MachineContext, vm *models.VM) (bool, error) { +func (r *ElfMachineReconciler) reconcileLabels(ctx goctx.Context, machineCtx *context.MachineContext, vm *models.VM) (bool, error) { + log := ctrl.LoggerFrom(ctx) + capeManagedLabelKey := towerresources.GetVMLabelManaged() capeManagedLabel := getLabelFromCache(capeManagedLabelKey) if capeManagedLabel == nil { var err error - capeManagedLabel, err = ctx.VMService.UpsertLabel(capeManagedLabelKey, "true") + capeManagedLabel, err = machineCtx.VMService.UpsertLabel(capeManagedLabelKey, "true") if err != nil { return false, errors.Wrapf(err, "failed to upsert label "+towerresources.GetVMLabelManaged()) } @@ -1230,29 +1254,29 @@ func (r *ElfMachineReconciler) reconcileLabels(ctx *context.MachineContext, vm * } } - namespaceLabel, err := ctx.VMService.UpsertLabel(towerresources.GetVMLabelNamespace(), ctx.ElfMachine.Namespace) + namespaceLabel, err := machineCtx.VMService.UpsertLabel(towerresources.GetVMLabelNamespace(), machineCtx.ElfMachine.Namespace) if err != nil { return false, errors.Wrapf(err, "failed to upsert label "+towerresources.GetVMLabelNamespace()) } - clusterNameLabel, err := ctx.VMService.UpsertLabel(towerresources.GetVMLabelClusterName(), ctx.ElfCluster.Name) + clusterNameLabel, err := machineCtx.VMService.UpsertLabel(towerresources.GetVMLabelClusterName(), machineCtx.ElfCluster.Name) if err != nil { return false, errors.Wrapf(err, "failed to upsert label "+towerresources.GetVMLabelClusterName()) } var vipLabel *models.Label - if machineutil.IsControlPlaneMachine(ctx.ElfMachine) { - vipLabel, err = ctx.VMService.UpsertLabel(towerresources.GetVMLabelVIP(), ctx.ElfCluster.Spec.ControlPlaneEndpoint.Host) + if machineutil.IsControlPlaneMachine(machineCtx.ElfMachine) { + vipLabel, err = machineCtx.VMService.UpsertLabel(towerresources.GetVMLabelVIP(), machineCtx.ElfCluster.Spec.ControlPlaneEndpoint.Host) if err != nil { return false, errors.Wrapf(err, "failed to upsert label "+towerresources.GetVMLabelVIP()) } } labelIDs := []string{*namespaceLabel.ID, *clusterNameLabel.ID, *capeManagedLabel.ID} - if machineutil.IsControlPlaneMachine(ctx.ElfMachine) { + if machineutil.IsControlPlaneMachine(machineCtx.ElfMachine) { labelIDs = append(labelIDs, *vipLabel.ID) } - r.Logger.V(3).Info("Upsert labels", "labelIds", labelIDs) - _, err = ctx.VMService.AddLabelsToVM(*vm.ID, labelIDs) + log.V(3).Info("Upsert labels", "labelIds", labelIDs) + _, err = machineCtx.VMService.AddLabelsToVM(*vm.ID, labelIDs) if err != nil { delLabelCache(capeManagedLabelKey) @@ -1263,8 +1287,8 @@ func (r *ElfMachineReconciler) reconcileLabels(ctx *context.MachineContext, vm * // isWaitingForStaticIPAllocation checks whether the VM should wait for a static IP // to be allocated. -func (r *ElfMachineReconciler) isWaitingForStaticIPAllocation(ctx *context.MachineContext) bool { - devices := ctx.ElfMachine.Spec.Network.Devices +func (r *ElfMachineReconciler) isWaitingForStaticIPAllocation(machineCtx *context.MachineContext) bool { + devices := machineCtx.ElfMachine.Spec.Network.Devices for _, device := range devices { if device.NetworkType == infrav1.NetworkTypeIPV4 && len(device.IPAddrs) == 0 { // Static IP is not available yet @@ -1279,20 +1303,20 @@ func (r *ElfMachineReconciler) isWaitingForStaticIPAllocation(ctx *context.Machi // This is necessary since CAPI does not set the nodeRef field on the owner Machine object // until the node moves to Ready state. Hence, on Machine deletion it is unable to delete // the kubernetes node corresponding to the VM. -func (r *ElfMachineReconciler) deleteNode(ctx *context.MachineContext, nodeName string) error { +func (r *ElfMachineReconciler) deleteNode(ctx goctx.Context, machineCtx *context.MachineContext, nodeName string) error { // When the cluster needs to be deleted, there is no need to delete the k8s node. - if ctx.Cluster.DeletionTimestamp != nil { + if machineCtx.Cluster.DeletionTimestamp != nil { return nil } // when the control plane is not ready, there is no need to delete the k8s node. - if !ctx.Cluster.Status.ControlPlaneReady { + if !machineCtx.Cluster.Status.ControlPlaneReady { return nil } - kubeClient, err := util.NewKubeClient(ctx, ctx.Client, ctx.Cluster) + kubeClient, err := util.NewKubeClient(ctx, r.Client, machineCtx.Cluster) if err != nil { - return errors.Wrapf(err, "failed to get client for Cluster %s/%s", ctx.Cluster.Namespace, ctx.Cluster.Name) + return errors.Wrapf(err, "failed to get client for Cluster %s", klog.KObj(machineCtx.Cluster)) } // Attempt to delete the corresponding node. @@ -1303,17 +1327,17 @@ func (r *ElfMachineReconciler) deleteNode(ctx *context.MachineContext, nodeName } if err != nil { - return errors.Wrapf(err, "failed to delete K8s node %s for Cluster %s/%s", nodeName, ctx.Cluster.Namespace, ctx.Cluster.Name) + return errors.Wrapf(err, "failed to delete K8s node %s for Cluster %s/", nodeName, klog.KObj(machineCtx.Cluster)) } return nil } // getK8sNodeIP get the default network IP of K8s Node. -func (r *ElfMachineReconciler) getK8sNodeIP(ctx *context.MachineContext, nodeName string) (string, error) { - kubeClient, err := util.NewKubeClient(ctx, ctx.Client, ctx.Cluster) +func (r *ElfMachineReconciler) getK8sNodeIP(ctx goctx.Context, machineCtx *context.MachineContext, nodeName string) (string, error) { + kubeClient, err := util.NewKubeClient(ctx, r.Client, machineCtx.Cluster) if err != nil { - return "", errors.Wrapf(err, "failed to get client for Cluster %s/%s", ctx.Cluster.Namespace, ctx.Cluster.Name) + return "", errors.Wrapf(err, "failed to get client for Cluster %s", klog.KObj(machineCtx.Cluster)) } k8sNode, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) @@ -1322,7 +1346,7 @@ func (r *ElfMachineReconciler) getK8sNodeIP(ctx *context.MachineContext, nodeNam } if err != nil { - return "", errors.Wrapf(err, "failed to get K8s Node %s for Cluster %s/%s", nodeName, ctx.Cluster.Namespace, ctx.Cluster.Name) + return "", errors.Wrapf(err, "failed to get K8s Node %s for Cluster %s", nodeName, klog.KObj(machineCtx.Cluster)) } if len(k8sNode.Status.Addresses) == 0 { @@ -1351,15 +1375,17 @@ const ( // deleteDuplicateVMs deletes the duplicate virtual machines. // Only be used to delete duplicate VMs before the ElfCluster is deleted. -func (r *ElfMachineReconciler) deleteDuplicateVMs(ctx *context.MachineContext) (reconcile.Result, error) { +func (r *ElfMachineReconciler) deleteDuplicateVMs(ctx goctx.Context, machineCtx *context.MachineContext) (reconcile.Result, error) { + log := ctrl.LoggerFrom(ctx) + // Duplicate virtual machines appear in the process of creating virtual machines, // only need to check within half an hour after creating virtual machines. - if ctx.ElfMachine.DeletionTimestamp.IsZero() && - time.Now().After(ctx.ElfMachine.CreationTimestamp.Add(checkDuplicateVMDuration)) { + if machineCtx.ElfMachine.DeletionTimestamp.IsZero() && + time.Now().After(machineCtx.ElfMachine.CreationTimestamp.Add(checkDuplicateVMDuration)) { return reconcile.Result{}, nil } - vms, err := ctx.VMService.FindVMsByName(ctx.ElfMachine.Name) + vms, err := machineCtx.VMService.FindVMsByName(machineCtx.ElfMachine.Name) if err != nil { return reconcile.Result{}, err } @@ -1368,26 +1394,26 @@ func (r *ElfMachineReconciler) deleteDuplicateVMs(ctx *context.MachineContext) ( return reconcile.Result{}, nil } - if ctx.ElfMachine.Status.VMRef == "" { + if machineCtx.ElfMachine.Status.VMRef == "" { vmIDs := make([]string, 0, len(vms)) for i := 0; i < len(vms); i++ { vmIDs = append(vmIDs, *vms[i].ID) } - ctx.Logger.Info("Waiting for ElfMachine to select one of the duplicate VMs before deleting the other", "vms", vmIDs) + log.Info("Waiting for ElfMachine to select one of the duplicate VMs before deleting the other", "vms", vmIDs) return reconcile.Result{RequeueAfter: config.DefaultRequeueTimeout}, nil } for i := 0; i < len(vms); i++ { // Do not delete already running virtual machines to avoid deleting already used virtual machines. - if *vms[i].ID == ctx.ElfMachine.Status.VMRef || - *vms[i].LocalID == ctx.ElfMachine.Status.VMRef || + if *vms[i].ID == machineCtx.ElfMachine.Status.VMRef || + *vms[i].LocalID == machineCtx.ElfMachine.Status.VMRef || *vms[i].Status != models.VMStatusSTOPPED { continue } // When there are duplicate virtual machines, the service of Tower is unstable. // If there is a deletion operation error, just return and try again. - if err := r.deleteVM(ctx, vms[i]); err != nil { + if err := r.deleteVM(ctx, machineCtx, vms[i]); err != nil { return reconcile.Result{}, err } else { return reconcile.Result{RequeueAfter: config.DefaultRequeueTimeout}, nil @@ -1398,22 +1424,24 @@ func (r *ElfMachineReconciler) deleteDuplicateVMs(ctx *context.MachineContext) ( } // deleteVM deletes the specified virtual machine. -func (r *ElfMachineReconciler) deleteVM(ctx *context.MachineContext, vm *models.VM) error { +func (r *ElfMachineReconciler) deleteVM(ctx goctx.Context, machineCtx *context.MachineContext, vm *models.VM) error { + log := ctrl.LoggerFrom(ctx) + // VM is performing an operation if vm.EntityAsyncStatus != nil { - ctx.Logger.V(1).Info("Waiting for VM task done before deleting the duplicate VM", "vmID", *vm.ID, "name", *vm.Name) + log.V(1).Info("Waiting for VM task done before deleting the duplicate VM", "vmID", *vm.ID, "name", *vm.Name) return nil } // Delete the VM. // Delete duplicate virtual machines asynchronously, // because synchronous deletion will affect the performance of reconcile. - task, err := ctx.VMService.Delete(*vm.ID) + task, err := machineCtx.VMService.Delete(*vm.ID) if err != nil { return err } - ctx.Logger.Info(fmt.Sprintf("Destroying duplicate VM %s in task %s", *vm.ID, *task.ID)) + log.Info(fmt.Sprintf("Destroying duplicate VM %s in task %s", *vm.ID, *task.ID)) return nil } diff --git a/controllers/elfmachine_controller_gpu.go b/controllers/elfmachine_controller_gpu.go index 4860bade..98be5b28 100644 --- a/controllers/elfmachine_controller_gpu.go +++ b/controllers/elfmachine_controller_gpu.go @@ -17,12 +17,15 @@ limitations under the License. package controllers import ( + goctx "context" + "github.com/pkg/errors" "github.com/smartxworks/cloudtower-go-sdk/v2/models" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/utils/pointer" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/util/conditions" + ctrl "sigs.k8s.io/controller-runtime" infrav1 "github.com/smartxworks/cluster-api-provider-elf/api/v1beta1" "github.com/smartxworks/cluster-api-provider-elf/pkg/context" @@ -43,53 +46,55 @@ import ( // 3. A non-empty string indicates that the specified host ID was returned. // // The return gpudevices: the GPU devices for virtual machine. -func (r *ElfMachineReconciler) selectHostAndGPUsForVM(ctx *context.MachineContext, preferredHostID string) (rethost *string, gpudevices []*service.GPUDeviceInfo, reterr error) { - if !ctx.ElfMachine.RequiresGPUDevices() { +func (r *ElfMachineReconciler) selectHostAndGPUsForVM(ctx goctx.Context, machineCtx *context.MachineContext, preferredHostID string) (rethost *string, gpudevices []*service.GPUDeviceInfo, reterr error) { + log := ctrl.LoggerFrom(ctx) + + if !machineCtx.ElfMachine.RequiresGPUDevices() { return pointer.String(""), nil, nil } defer func() { if rethost == nil { - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForAvailableHostWithEnoughGPUsReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForAvailableHostWithEnoughGPUsReason, clusterv1.ConditionSeverityInfo, "") - ctx.Logger.V(1).Info("No host with the required GPU devices for the virtual machine, so wait for enough available hosts") + log.V(1).Info("No host with the required GPU devices for the virtual machine, so wait for enough available hosts") } }() // If the GPU devices locked by the virtual machine still exist, use them directly. - if lockedVMGPUs := getGPUDevicesLockedByVM(ctx.ElfCluster.Spec.Cluster, ctx.ElfMachine.Name); lockedVMGPUs != nil { - if ok, err := r.checkGPUsCanBeUsedForVM(ctx, lockedVMGPUs.GetGPUIDs()); err != nil { + if lockedVMGPUs := getGPUDevicesLockedByVM(machineCtx.ElfCluster.Spec.Cluster, machineCtx.ElfMachine.Name); lockedVMGPUs != nil { + if ok, err := r.checkGPUsCanBeUsedForVM(machineCtx, lockedVMGPUs.GetGPUIDs()); err != nil { return nil, nil, err } else if ok { - ctx.Logger.V(1).Info("Found locked VM GPU devices, so skip allocation", "lockedVMGPUs", lockedVMGPUs) + log.V(1).Info("Found locked VM GPU devices, so skip allocation", "lockedVMGPUs", lockedVMGPUs) return &lockedVMGPUs.HostID, lockedVMGPUs.GetGPUDeviceInfos(), nil } // If the GPU devices returned by Tower is inconsistent with the locked GPU, // delete the locked GPU devices and reallocate. - ctx.Logger.V(1).Info("Locked VM GPU devices are invalid, so remove and reallocate", "lockedVMGPUs", lockedVMGPUs) + log.V(1).Info("Locked VM GPU devices are invalid, so remove and reallocate", "lockedVMGPUs", lockedVMGPUs) - unlockGPUDevicesLockedByVM(ctx.ElfCluster.Spec.Cluster, ctx.ElfMachine.Name) + unlockGPUDevicesLockedByVM(machineCtx.ElfCluster.Spec.Cluster, machineCtx.ElfMachine.Name) } - hosts, err := ctx.VMService.GetHostsByCluster(ctx.ElfCluster.Spec.Cluster) + hosts, err := machineCtx.VMService.GetHostsByCluster(machineCtx.ElfCluster.Spec.Cluster) if err != nil { return nil, nil, err } - availableHosts := hosts.FilterAvailableHostsWithEnoughMemory(*service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)) + availableHosts := hosts.FilterAvailableHostsWithEnoughMemory(*service.TowerMemory(machineCtx.ElfMachine.Spec.MemoryMiB)) if len(availableHosts) == 0 { - ctx.Logger.V(2).Info("Waiting for enough available hosts") + log.V(2).Info("Waiting for enough available hosts") return nil, nil, nil } // Get all GPU devices of available hosts. gpuDeviceUsage := models.GpuDeviceUsagePASSTHROUGH - if ctx.ElfMachine.RequiresVGPUDevices() { + if machineCtx.ElfMachine.RequiresVGPUDevices() { gpuDeviceUsage = models.GpuDeviceUsageVGPU } - gpuVMInfos, err := ctx.VMService.GetGPUDevicesAllocationInfoByHostIDs(availableHosts.IDs(), gpuDeviceUsage) + gpuVMInfos, err := machineCtx.VMService.GetGPUDevicesAllocationInfoByHostIDs(availableHosts.IDs(), gpuDeviceUsage) if err != nil || len(gpuVMInfos) == 0 { return nil, nil, err } @@ -98,7 +103,7 @@ func (r *ElfMachineReconciler) selectHostAndGPUsForVM(ctx *context.MachineContex gpuVMInfos = gpuVMInfos.FilterAvailableGPUVMInfos() // Filter locked GPU devices. - gpuVMInfos = filterGPUVMInfosByLockGPUDevices(ctx.ElfCluster.Spec.Cluster, gpuVMInfos) + gpuVMInfos = filterGPUVMInfosByLockGPUDevices(machineCtx.ElfCluster.Spec.Cluster, gpuVMInfos) // Group GPU deviceInfos by host. hostGPUVMInfoMap := make(map[string]service.GPUVMInfos) @@ -131,21 +136,21 @@ func (r *ElfMachineReconciler) selectHostAndGPUsForVM(ctx *context.MachineContex } var selectedGPUDeviceInfos []*service.GPUDeviceInfo - if ctx.ElfMachine.RequiresPassThroughGPUDevices() { - selectedGPUDeviceInfos = selectGPUDevicesForVM(hostGPUVMInfos, ctx.ElfMachine.Spec.GPUDevices) + if machineCtx.ElfMachine.RequiresPassThroughGPUDevices() { + selectedGPUDeviceInfos = selectGPUDevicesForVM(hostGPUVMInfos, machineCtx.ElfMachine.Spec.GPUDevices) } else { - selectedGPUDeviceInfos = selectVGPUDevicesForVM(hostGPUVMInfos, ctx.ElfMachine.Spec.VGPUDevices) + selectedGPUDeviceInfos = selectVGPUDevicesForVM(hostGPUVMInfos, machineCtx.ElfMachine.Spec.VGPUDevices) } if len(selectedGPUDeviceInfos) > 0 { // Lock the selected GPU devices to prevent it from being allocated to multiple virtual machines. - if !lockGPUDevicesForVM(ctx.ElfCluster.Spec.Cluster, ctx.ElfMachine.Name, unsortedHostIDs[i], selectedGPUDeviceInfos) { + if !lockGPUDevicesForVM(machineCtx.ElfCluster.Spec.Cluster, machineCtx.ElfMachine.Name, unsortedHostIDs[i], selectedGPUDeviceInfos) { // Lock failure indicates that the GPU devices are locked by another virtual machine. // Just trying other hosts. continue } - ctx.Logger.Info("Selected host and GPU devices for VM", "hostId", unsortedHostIDs[i], "gpuDevices", selectedGPUDeviceInfos) + log.Info("Selected host and GPU devices for VM", "hostId", unsortedHostIDs[i], "gpuDevices", selectedGPUDeviceInfos) return &unsortedHostIDs[i], selectedGPUDeviceInfos, nil } @@ -235,8 +240,10 @@ func selectVGPUDevicesForVM(hostGPUVMInfos service.GPUVMInfos, requiredVGPUDevic } // reconcileGPUDevices ensures that the virtual machine has the expected GPU devices. -func (r *ElfMachineReconciler) reconcileGPUDevices(ctx *context.MachineContext, vm *models.VM) (bool, error) { - if !ctx.ElfMachine.RequiresGPUDevices() { +func (r *ElfMachineReconciler) reconcileGPUDevices(ctx goctx.Context, machineCtx *context.MachineContext, vm *models.VM) (bool, error) { + log := ctrl.LoggerFrom(ctx) + + if !machineCtx.ElfMachine.RequiresGPUDevices() { return true, nil } @@ -245,7 +252,7 @@ func (r *ElfMachineReconciler) reconcileGPUDevices(ctx *context.MachineContext, for i := 0; i < len(vm.GpuDevices); i++ { gpuDevices[i] = infrav1.GPUStatus{GPUID: *vm.GpuDevices[i].ID, Name: *vm.GpuDevices[i].Name} } - ctx.ElfMachine.Status.GPUDevices = gpuDevices + machineCtx.ElfMachine.Status.GPUDevices = gpuDevices if *vm.Status != models.VMStatusSTOPPED { return true, nil @@ -253,15 +260,15 @@ func (r *ElfMachineReconciler) reconcileGPUDevices(ctx *context.MachineContext, // GPU devices has been removed, need to select GPU devices. if len(vm.GpuDevices) == 0 { - return r.addGPUDevicesForVM(ctx, vm) + return r.addGPUDevicesForVM(ctx, machineCtx, vm) } // If the GPU devices are already in use, remove the GPU devices first and then reselect the new GPU devices. - message := conditions.GetMessage(ctx.ElfMachine, infrav1.VMProvisionedCondition) + message := conditions.GetMessage(machineCtx.ElfMachine, infrav1.VMProvisionedCondition) if service.IsGPUAssignFailed(message) || service.IsVGPUInsufficientError(message) { - ctx.Logger.Info("GPU devices of the host are not sufficient and the virtual machine cannot be started, so remove the GPU devices and reallocate.") + log.Info("GPU devices of the host are not sufficient and the virtual machine cannot be started, so remove the GPU devices and reallocate.") - return false, r.removeVMGPUDevices(ctx, vm) + return false, r.removeVMGPUDevices(ctx, machineCtx, vm) } gpuIDs := make([]string, len(vm.GpuDevices)) @@ -269,60 +276,64 @@ func (r *ElfMachineReconciler) reconcileGPUDevices(ctx *context.MachineContext, gpuIDs[i] = *vm.GpuDevices[i].ID } - if ok, err := r.checkGPUsCanBeUsedForVM(ctx, gpuIDs); err != nil { + if ok, err := r.checkGPUsCanBeUsedForVM(machineCtx, gpuIDs); err != nil { return false, err } else if !ok { // If the GPU devices are already in use, // remove the GPU devices first and then reallocate the new GPU devices. - ctx.Logger.V(1).Info("GPU devices of VM are already in use, so remove and reallocate", "gpuIDs", gpuIDs) + log.V(1).Info("GPU devices of VM are already in use, so remove and reallocate", "gpuIDs", gpuIDs) - return false, r.removeVMGPUDevices(ctx, vm) + return false, r.removeVMGPUDevices(ctx, machineCtx, vm) } return true, nil } // addGPUDevicesForVM adds expected GPU devices to the virtual machine. -func (r *ElfMachineReconciler) addGPUDevicesForVM(ctx *context.MachineContext, vm *models.VM) (bool, error) { - hostID, gpuDeviceInfos, err := r.selectHostAndGPUsForVM(ctx, *vm.Host.ID) +func (r *ElfMachineReconciler) addGPUDevicesForVM(ctx goctx.Context, machineCtx *context.MachineContext, vm *models.VM) (bool, error) { + log := ctrl.LoggerFrom(ctx) + + hostID, gpuDeviceInfos, err := r.selectHostAndGPUsForVM(ctx, machineCtx, *vm.Host.ID) if err != nil || hostID == nil { return false, err } if *vm.Host.ID != *hostID { - ctx.Logger.Info("The current host does not have enough GPU devices, the virtual machine needs to be migrated to a host that meets the GPU device requirements.", "currentHost", *vm.Host.ID, "targetHost", *hostID) + log.Info("The current host does not have enough GPU devices, the virtual machine needs to be migrated to a host that meets the GPU device requirements.", "currentHost", *vm.Host.ID, "targetHost", *hostID) - ok, err := r.migrateVM(ctx, vm, *hostID) + ok, err := r.migrateVM(ctx, machineCtx, vm, *hostID) if err != nil { - unlockGPUDevicesLockedByVM(ctx.ElfCluster.Spec.Cluster, ctx.ElfMachine.Name) + unlockGPUDevicesLockedByVM(machineCtx.ElfCluster.Spec.Cluster, machineCtx.ElfMachine.Name) } return ok, err } - task, err := ctx.VMService.AddGPUDevices(ctx.ElfMachine.Status.VMRef, gpuDeviceInfos) + task, err := machineCtx.VMService.AddGPUDevices(machineCtx.ElfMachine.Status.VMRef, gpuDeviceInfos) if err != nil { - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.AttachingGPUFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.AttachingGPUFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) - unlockGPUDevicesLockedByVM(ctx.ElfCluster.Spec.Cluster, ctx.ElfMachine.Name) + unlockGPUDevicesLockedByVM(machineCtx.ElfCluster.Spec.Cluster, machineCtx.ElfMachine.Name) - return false, errors.Wrapf(err, "failed to trigger attaching GPU devices for VM %s", ctx) + return false, errors.Wrapf(err, "failed to trigger attaching GPU devices for VM %s", machineCtx.ElfMachine.Status.VMRef) } - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.UpdatingReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.UpdatingReason, clusterv1.ConditionSeverityInfo, "") - ctx.ElfMachine.SetTask(*task.ID) + machineCtx.ElfMachine.SetTask(*task.ID) - ctx.Logger.Info("Waiting for VM to attach GPU devices", "vmRef", ctx.ElfMachine.Status.VMRef, "taskRef", ctx.ElfMachine.Status.TaskRef) + log.Info("Waiting for VM to attach GPU devices", "vmRef", machineCtx.ElfMachine.Status.VMRef, "taskRef", machineCtx.ElfMachine.Status.TaskRef) return false, nil } // removeVMGPUDevices removes all GPU devices from the virtual machine. -func (r *ElfMachineReconciler) removeVMGPUDevices(ctx *context.MachineContext, vm *models.VM) error { +func (r *ElfMachineReconciler) removeVMGPUDevices(ctx goctx.Context, machineCtx *context.MachineContext, vm *models.VM) error { + log := ctrl.LoggerFrom(ctx) + var staleGPUs []*models.VMGpuOperationParams - if ctx.ElfMachine.RequiresVGPUDevices() { - vmGPUInfo, err := ctx.VMService.GetVMGPUAllocationInfo(*vm.ID) + if machineCtx.ElfMachine.RequiresVGPUDevices() { + vmGPUInfo, err := machineCtx.VMService.GetVMGPUAllocationInfo(*vm.ID) if err != nil { return err } @@ -342,34 +353,34 @@ func (r *ElfMachineReconciler) removeVMGPUDevices(ctx *context.MachineContext, v } } - task, err := ctx.VMService.RemoveGPUDevices(ctx.ElfMachine.Status.VMRef, staleGPUs) + task, err := machineCtx.VMService.RemoveGPUDevices(machineCtx.ElfMachine.Status.VMRef, staleGPUs) if err != nil { // If the GPU/vGPU is removed due to insufficient GPU/vGPU, // the original error message will not be overwritten if the remove fails. - message := conditions.GetMessage(ctx.ElfMachine, infrav1.VMProvisionedCondition) + message := conditions.GetMessage(machineCtx.ElfMachine, infrav1.VMProvisionedCondition) if !(service.IsGPUAssignFailed(message) || service.IsVGPUInsufficientError(message)) { - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.DetachingGPUFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.DetachingGPUFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) } - return errors.Wrapf(err, "failed to trigger detaching stale GPU devices for VM %s", ctx) + return errors.Wrapf(err, "failed to trigger detaching stale GPU devices for VM %s", machineCtx.ElfMachine.Status.VMRef) } - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.UpdatingReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.UpdatingReason, clusterv1.ConditionSeverityInfo, "") - ctx.ElfMachine.SetTask(*task.ID) + machineCtx.ElfMachine.SetTask(*task.ID) - ctx.Logger.Info("Waiting for VM to be removed stale GPU devices", "vmRef", ctx.ElfMachine.Status.VMRef, "taskRef", ctx.ElfMachine.Status.TaskRef) + log.Info("Waiting for VM to be removed stale GPU devices", "vmRef", machineCtx.ElfMachine.Status.VMRef, "taskRef", machineCtx.ElfMachine.Status.TaskRef) return nil } // checkGPUsCanBeUsedForVM checks whether GPU devices can be used by the specified virtual machine. // The return true means the GPU devices can be used for the virtual machine. -func (r *ElfMachineReconciler) checkGPUsCanBeUsedForVM(ctx *context.MachineContext, gpuDeviceIDs []string) (bool, error) { +func (r *ElfMachineReconciler) checkGPUsCanBeUsedForVM(machineCtx *context.MachineContext, gpuDeviceIDs []string) (bool, error) { gpuVMInfos := getGPUVMInfosFromCache(gpuDeviceIDs) if gpuVMInfos.Len() != len(gpuDeviceIDs) { var err error - gpuVMInfos, err = ctx.VMService.GetGPUDevicesAllocationInfoByIDs(gpuDeviceIDs) + gpuVMInfos, err = machineCtx.VMService.GetGPUDevicesAllocationInfoByIDs(gpuDeviceIDs) if err != nil || len(gpuVMInfos) != len(gpuDeviceIDs) { return false, err } @@ -377,7 +388,7 @@ func (r *ElfMachineReconciler) checkGPUsCanBeUsedForVM(ctx *context.MachineConte setGPUVMInfosCache(gpuVMInfos) } - if service.HasGPUsCanNotBeUsedForVM(gpuVMInfos, ctx.ElfMachine) { + if service.HasGPUsCanNotBeUsedForVM(gpuVMInfos, machineCtx.ElfMachine) { return false, nil } diff --git a/controllers/elfmachine_controller_gpu_test.go b/controllers/elfmachine_controller_gpu_test.go index e9c1283d..c079049e 100644 --- a/controllers/elfmachine_controller_gpu_test.go +++ b/controllers/elfmachine_controller_gpu_test.go @@ -32,7 +32,6 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/controller-runtime/pkg/client" - ctrllog "sigs.k8s.io/controller-runtime/pkg/log" infrav1 "github.com/smartxworks/cluster-api-provider-elf/api/v1beta1" "github.com/smartxworks/cluster-api-provider-elf/pkg/context" @@ -93,12 +92,12 @@ var _ = Describe("ElfMachineReconciler-GPU", func() { It("should not handle ElfMachine without GPU", func() { elfMachine.Spec.GPUDevices = nil - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - host, gpus, err := reconciler.selectHostAndGPUsForVM(machineContext, "") + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + host, gpus, err := reconciler.selectHostAndGPUsForVM(ctx, machineContext, "") Expect(err).NotTo(HaveOccurred()) Expect(*host).To(BeEmpty()) Expect(gpus).To(BeEmpty()) @@ -111,14 +110,14 @@ var _ = Describe("ElfMachineReconciler-GPU", func() { gpuVMInfo.Model = service.TowerString(gpuModel) gpuIDs := []string{*gpuVMInfo.ID} gpuVMInfos := service.NewGPUVMInfos(gpuVMInfo) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host), nil) mockVMService.EXPECT().GetGPUDevicesAllocationInfoByHostIDs([]string{*host.ID}, models.GpuDeviceUsagePASSTHROUGH).Return(gpuVMInfos, nil) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - hostID, gpus, err := reconciler.selectHostAndGPUsForVM(machineContext, "") + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + hostID, gpus, err := reconciler.selectHostAndGPUsForVM(ctx, machineContext, "") Expect(err).NotTo(HaveOccurred()) Expect(*hostID).To(Equal(*host.ID)) Expect(gpus).To(HaveLen(1)) @@ -126,7 +125,7 @@ var _ = Describe("ElfMachineReconciler-GPU", func() { Expect(gpus[0].AllocatedCount).To(Equal(int32(1))) mockVMService.EXPECT().GetGPUDevicesAllocationInfoByIDs([]string{*gpuVMInfo.ID}).Return(gpuVMInfos, nil) - hostID, gpus, err = reconciler.selectHostAndGPUsForVM(machineContext, "") + hostID, gpus, err = reconciler.selectHostAndGPUsForVM(ctx, machineContext, "") Expect(err).NotTo(HaveOccurred()) Expect(*hostID).To(Equal(*host.ID)) Expect(gpus).To(HaveLen(1)) @@ -139,7 +138,7 @@ var _ = Describe("ElfMachineReconciler-GPU", func() { gpuVMInfo.Vms = []*models.GpuVMDetail{{ID: service.TowerString("id"), Name: service.TowerString("vm"), Status: models.NewVMStatus(models.VMStatusRUNNING)}} mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(nil, nil) mockVMService.EXPECT().GetGPUDevicesAllocationInfoByIDs([]string{*gpuVMInfo.ID}).Return(gpuVMInfos, nil) - hostID, gpus, err = reconciler.selectHostAndGPUsForVM(machineContext, "") + hostID, gpus, err = reconciler.selectHostAndGPUsForVM(ctx, machineContext, "") Expect(err).NotTo(HaveOccurred()) Expect(hostID).To(BeNil()) Expect(gpus).To(BeEmpty()) @@ -157,14 +156,14 @@ var _ = Describe("ElfMachineReconciler-GPU", func() { preferredGPUVMInfo := fake.NewTowerGPUVMInfo() preferredGPUVMInfo.Host = &models.NestedHost{ID: preferredHost.ID} gpuVMInfos := service.NewGPUVMInfos(gpuVMInfo, preferredGPUVMInfo) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host, preferredHost), nil) mockVMService.EXPECT().GetGPUDevicesAllocationInfoByHostIDs(gomock.InAnyOrder([]string{*host.ID, *preferredHost.ID}), models.GpuDeviceUsagePASSTHROUGH).Return(gpuVMInfos, nil) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - hostID, gpus, err := reconciler.selectHostAndGPUsForVM(machineContext, *preferredHost.ID) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + hostID, gpus, err := reconciler.selectHostAndGPUsForVM(ctx, machineContext, *preferredHost.ID) Expect(err).NotTo(HaveOccurred()) Expect(*hostID).To(Equal(*preferredHost.ID)) Expect(gpus).To(HaveLen(1)) @@ -185,27 +184,27 @@ var _ = Describe("ElfMachineReconciler-GPU", func() { requiredVGPUDevice := infrav1.VGPUDeviceSpec{Type: vGPUType, Count: 3} elfMachine.Spec.GPUDevices = nil elfMachine.Spec.VGPUDevices = []infrav1.VGPUDeviceSpec{requiredVGPUDevice} - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(), nil) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - hostID, gpus, err := reconciler.selectHostAndGPUsForVM(machineContext, "") + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + hostID, gpus, err := reconciler.selectHostAndGPUsForVM(ctx, machineContext, "") Expect(err).NotTo(HaveOccurred()) Expect(hostID).To(BeNil()) Expect(gpus).To(BeEmpty()) mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host), nil) mockVMService.EXPECT().GetGPUDevicesAllocationInfoByHostIDs([]string{*host.ID}, models.GpuDeviceUsageVGPU).Return(service.NewGPUVMInfos(), nil) - hostID, gpus, err = reconciler.selectHostAndGPUsForVM(machineContext, "") + hostID, gpus, err = reconciler.selectHostAndGPUsForVM(ctx, machineContext, "") Expect(err).NotTo(HaveOccurred()) Expect(hostID).To(BeNil()) Expect(gpus).To(BeEmpty()) mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host), nil) mockVMService.EXPECT().GetGPUDevicesAllocationInfoByHostIDs([]string{*host.ID}, models.GpuDeviceUsageVGPU).Return(gpuVMInfos, nil) - hostID, gpus, err = reconciler.selectHostAndGPUsForVM(machineContext, "") + hostID, gpus, err = reconciler.selectHostAndGPUsForVM(ctx, machineContext, "") Expect(err).NotTo(HaveOccurred()) Expect(hostID).NotTo(BeNil()) Expect(*hostID).To(Equal(*host.ID)) @@ -231,7 +230,7 @@ var _ = Describe("ElfMachineReconciler-GPU", func() { }}) mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host), nil) mockVMService.EXPECT().GetGPUDevicesAllocationInfoByHostIDs([]string{*host.ID}, models.GpuDeviceUsageVGPU).Return(gpuVMInfos, nil) - hostID, gpus, err = reconciler.selectHostAndGPUsForVM(machineContext, "") + hostID, gpus, err = reconciler.selectHostAndGPUsForVM(ctx, machineContext, "") Expect(err).NotTo(HaveOccurred()) Expect(hostID).To(BeNil()) Expect(gpus).To(BeEmpty()) @@ -246,12 +245,12 @@ var _ = Describe("ElfMachineReconciler-GPU", func() { It("should not handle ElfMachine without GPU", func() { elfMachine.Spec.GPUDevices = nil vm := fake.NewTowerVMFromElfMachine(elfMachine) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.reconcileGPUDevices(machineContext, vm) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.reconcileGPUDevices(ctx, machineContext, vm) Expect(err).NotTo(HaveOccurred()) Expect(ok).To(BeTrue()) }) @@ -260,12 +259,12 @@ var _ = Describe("ElfMachineReconciler-GPU", func() { vm := fake.NewTowerVMFromElfMachine(elfMachine) vm.Status = models.NewVMStatus(models.VMStatusRUNNING) vm.GpuDevices = []*models.NestedGpuDevice{{ID: service.TowerString(fake.ID()), Name: service.TowerString(fake.ID())}} - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.reconcileGPUDevices(machineContext, vm) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.reconcileGPUDevices(ctx, machineContext, vm) Expect(err).NotTo(HaveOccurred()) Expect(ok).To(BeTrue()) Expect(elfMachine.Status.GPUDevices).To(Equal([]infrav1.GPUStatus{{GPUID: *vm.GpuDevices[0].ID, Name: *vm.GpuDevices[0].Name}})) @@ -276,13 +275,13 @@ var _ = Describe("ElfMachineReconciler-GPU", func() { vm := fake.NewTowerVMFromElfMachine(elfMachine) vm.Host = &models.NestedHost{ID: host.ID} vm.Status = models.NewVMStatus(models.VMStatusSTOPPED) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(nil, unexpectedError) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.reconcileGPUDevices(machineContext, vm) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.reconcileGPUDevices(ctx, machineContext, vm) Expect(err).To(HaveOccurred()) Expect(ok).To(BeFalse()) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForAvailableHostWithEnoughGPUsReason}}) @@ -295,13 +294,13 @@ var _ = Describe("ElfMachineReconciler-GPU", func() { vm.Status = models.NewVMStatus(models.VMStatusSTOPPED) vm.GpuDevices = []*models.NestedGpuDevice{{ID: service.TowerString(fake.ID()), Name: service.TowerString(gpuModel)}} conditions.MarkFalse(elfMachine, infrav1.VMProvisionedCondition, infrav1.TaskFailureReason, clusterv1.ConditionSeverityInfo, service.GPUAssignFailed) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().RemoveGPUDevices(elfMachine.Status.VMRef, gomock.Len(1)).Return(nil, unexpectedError) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.reconcileGPUDevices(machineContext, vm) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.reconcileGPUDevices(ctx, machineContext, vm) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring(unexpectedError.Error())) Expect(ok).To(BeFalse()) @@ -318,14 +317,14 @@ var _ = Describe("ElfMachineReconciler-GPU", func() { vm.Host = &models.NestedHost{ID: host.ID} vm.Status = models.NewVMStatus(models.VMStatusSTOPPED) vm.GpuDevices = []*models.NestedGpuDevice{{ID: gpuVMInfo.ID, Name: gpuVMInfo.Model}} - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().GetGPUDevicesAllocationInfoByIDs([]string{*gpuVMInfo.ID}).Times(2).Return(gpuVMInfos, nil) mockVMService.EXPECT().RemoveGPUDevices(elfMachine.Status.VMRef, gomock.Len(1)).Return(nil, unexpectedError) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.reconcileGPUDevices(machineContext, vm) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.reconcileGPUDevices(ctx, machineContext, vm) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring(unexpectedError.Error())) Expect(ok).To(BeFalse()) @@ -333,7 +332,7 @@ var _ = Describe("ElfMachineReconciler-GPU", func() { removeGPUVMInfosCache([]string{*gpuVMInfo.ID}) gpuVMInfo.Vms = []*models.GpuVMDetail{{ID: vm.ID, Name: vm.Name, Status: models.NewVMStatus(models.VMStatusRUNNING)}} - ok, err = reconciler.reconcileGPUDevices(machineContext, vm) + ok, err = reconciler.reconcileGPUDevices(ctx, machineContext, vm) Expect(err).NotTo(HaveOccurred()) Expect(ok).To(BeTrue()) }) @@ -354,15 +353,15 @@ var _ = Describe("ElfMachineReconciler-GPU", func() { gpuVMInfos := service.NewGPUVMInfos(gpuVMInfo) task := fake.NewTowerTask() withTaskVM := fake.NewWithTaskVM(vm, task) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Times(2).Return(service.NewHosts(host), nil) mockVMService.EXPECT().GetGPUDevicesAllocationInfoByHostIDs([]string{*host.ID}, models.GpuDeviceUsagePASSTHROUGH).Times(2).Return(gpuVMInfos, nil) mockVMService.EXPECT().Migrate(*vm.ID, *host.ID).Return(withTaskVM, nil) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.addGPUDevicesForVM(machineContext, vm) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.addGPUDevicesForVM(ctx, machineContext, vm) Expect(err).NotTo(HaveOccurred()) Expect(ok).To(BeFalse()) Expect(elfMachine.Status.TaskRef).To(Equal(*task.ID)) @@ -371,7 +370,7 @@ var _ = Describe("ElfMachineReconciler-GPU", func() { elfMachine.Status.TaskRef = "" unlockGPUDevicesLockedByVM(elfCluster.Spec.Cluster, elfMachine.Name) mockVMService.EXPECT().Migrate(*vm.ID, *host.ID).Return(nil, unexpectedError) - ok, err = reconciler.addGPUDevicesForVM(machineContext, vm) + ok, err = reconciler.addGPUDevicesForVM(ctx, machineContext, vm) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring(unexpectedError.Error())) Expect(ok).To(BeFalse()) @@ -387,15 +386,15 @@ var _ = Describe("ElfMachineReconciler-GPU", func() { gpuVMInfo.Host = &models.NestedHost{ID: host.ID} gpuVMInfos := service.NewGPUVMInfos(gpuVMInfo) task := fake.NewTowerTask() - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Times(2).Return(service.NewHosts(host), nil) mockVMService.EXPECT().GetGPUDevicesAllocationInfoByHostIDs([]string{*host.ID}, models.GpuDeviceUsagePASSTHROUGH).Times(2).Return(gpuVMInfos, nil) mockVMService.EXPECT().AddGPUDevices(elfMachine.Status.VMRef, gomock.Any()).Return(task, nil) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.addGPUDevicesForVM(machineContext, vm) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.addGPUDevicesForVM(ctx, machineContext, vm) Expect(err).NotTo(HaveOccurred()) Expect(ok).To(BeFalse()) Expect(elfMachine.Status.TaskRef).To(Equal(*task.ID)) @@ -404,7 +403,7 @@ var _ = Describe("ElfMachineReconciler-GPU", func() { elfMachine.Status.TaskRef = "" unlockGPUDevicesLockedByVM(elfCluster.Spec.Cluster, elfMachine.Name) mockVMService.EXPECT().AddGPUDevices(elfMachine.Status.VMRef, gomock.Any()).Return(task, unexpectedError) - ok, err = reconciler.addGPUDevicesForVM(machineContext, vm) + ok, err = reconciler.addGPUDevicesForVM(ctx, machineContext, vm) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring(unexpectedError.Error())) Expect(ok).To(BeFalse()) @@ -419,20 +418,20 @@ var _ = Describe("ElfMachineReconciler-GPU", func() { elfMachine.Status.VMRef = *vm.LocalID vm.GpuDevices = []*models.NestedGpuDevice{{ID: service.TowerString(fake.ID()), Name: service.TowerString("A16")}} task := fake.NewTowerTask() - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().RemoveGPUDevices(elfMachine.Status.VMRef, gomock.Len(1)).Return(nil, unexpectedError) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - err := reconciler.removeVMGPUDevices(machineContext, vm) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + err := reconciler.removeVMGPUDevices(ctx, machineContext, vm) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring(unexpectedError.Error())) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityWarning, infrav1.DetachingGPUFailedReason}}) Expect(elfMachine.Status.TaskRef).To(BeEmpty()) mockVMService.EXPECT().RemoveGPUDevices(elfMachine.Status.VMRef, gomock.Len(1)).Return(task, nil) - err = reconciler.removeVMGPUDevices(machineContext, vm) + err = reconciler.removeVMGPUDevices(ctx, machineContext, vm) Expect(err).NotTo(HaveOccurred()) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.UpdatingReason}}) Expect(elfMachine.Status.TaskRef).To(Equal(*task.ID)) @@ -442,11 +441,11 @@ var _ = Describe("ElfMachineReconciler-GPU", func() { elfMachine.Spec.GPUDevices = nil elfMachine.Spec.VGPUDevices = []infrav1.VGPUDeviceSpec{{Type: vGPUType, Count: 2}} conditions.MarkFalse(elfMachine, infrav1.VMProvisionedCondition, infrav1.TaskFailureReason, clusterv1.ConditionSeverityInfo, service.VGPUInsufficientError) - ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx = fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().GetVMGPUAllocationInfo(*vm.ID).Return(vmGPUInfo, nil) mockVMService.EXPECT().RemoveGPUDevices(elfMachine.Status.VMRef, []*models.VMGpuOperationParams{{GpuID: service.TowerString(gpuID), Amount: service.TowerInt32(2)}}).Return(nil, unexpectedError) - err = reconciler.removeVMGPUDevices(machineContext, vm) + err = reconciler.removeVMGPUDevices(ctx, machineContext, vm) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring(unexpectedError.Error())) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.TaskFailureReason}}) @@ -468,25 +467,18 @@ var _ = Describe("ElfMachineReconciler-GPU", func() { elfMachine.Status.HostServerRef = fake.UUID() elfMachine.Status.HostServerName = fake.UUID() vm := fake.NewTowerVM() - ctrlMgrContext := &context.ControllerManagerContext{ - Context: goctx.Background(), + ctrlMgrCtx := &context.ControllerManagerContext{ Client: testEnv.Client, - Logger: ctrllog.Log, Name: fake.ControllerManagerName, LeaderElectionNamespace: fake.LeaderElectionNamespace, LeaderElectionID: fake.LeaderElectionID, } - ctrlContext := &context.ControllerContext{ - ControllerManagerContext: ctrlMgrContext, - Logger: ctrllog.Log, - } + machineContext := &context.MachineContext{ - ControllerContext: ctrlContext, - Cluster: cluster, - Machine: machine, - ElfCluster: elfCluster, - ElfMachine: elfMachine, - Logger: ctrllog.Log, + Cluster: cluster, + Machine: machine, + ElfCluster: elfCluster, + ElfMachine: elfMachine, } node = &corev1.Node{ @@ -498,8 +490,8 @@ var _ = Describe("ElfMachineReconciler-GPU", func() { Expect(testEnv.CreateAndWait(ctx, node)).To(Succeed()) Expect(helpers.CreateKubeConfigSecret(testEnv, cluster.Namespace, cluster.Name)).To(Succeed()) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.reconcileNode(machineContext, vm) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.reconcileNode(ctx, machineContext, vm) Expect(ok).Should(BeTrue()) Expect(err).ToNot(HaveOccurred()) Eventually(func() bool { @@ -523,12 +515,12 @@ var _ = Describe("ElfMachineReconciler-GPU", func() { gpuVMInfo.Host = &models.NestedHost{ID: host.ID} gpuIDs := []string{*gpuVMInfo.ID} elfMachine.Spec.GPUDevices = append(elfMachine.Spec.GPUDevices, infrav1.GPUPassthroughDeviceSpec{Model: "A16", Count: 1}) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().GetGPUDevicesAllocationInfoByIDs(gpuIDs).Return(service.NewGPUVMInfos(), nil) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} ok, err := reconciler.checkGPUsCanBeUsedForVM(machineContext, gpuIDs) Expect(err).NotTo(HaveOccurred()) Expect(ok).To(BeFalse()) diff --git a/controllers/elfmachine_controller_placement_group.go b/controllers/elfmachine_controller_placement_group.go index ba204e05..b9ef0997 100644 --- a/controllers/elfmachine_controller_placement_group.go +++ b/controllers/elfmachine_controller_placement_group.go @@ -17,6 +17,7 @@ limitations under the License. package controllers import ( + goctx "context" "fmt" "strings" @@ -24,6 +25,7 @@ import ( "github.com/smartxworks/cloudtower-go-sdk/v2/models" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" "k8s.io/utils/pointer" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" @@ -32,6 +34,7 @@ import ( "sigs.k8s.io/cluster-api/util/collections" "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/patch" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/reconcile" infrav1 "github.com/smartxworks/cluster-api-provider-elf/api/v1beta1" @@ -47,13 +50,13 @@ import ( ) // reconcilePlacementGroup makes sure that the placement group exist. -func (r *ElfMachineReconciler) reconcilePlacementGroup(ctx *context.MachineContext) (reconcile.Result, error) { - placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctx.Client, ctx.Machine, ctx.Cluster) +func (r *ElfMachineReconciler) reconcilePlacementGroup(ctx goctx.Context, machineCtx *context.MachineContext) (reconcile.Result, error) { + placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, r.Client, machineCtx.Machine, machineCtx.Cluster) if err != nil { return reconcile.Result{}, err } - if placementGroup, err := r.getPlacementGroup(ctx, placementGroupName); err != nil { + if placementGroup, err := r.getPlacementGroup(ctx, machineCtx, placementGroupName); err != nil { if !service.IsVMPlacementGroupNotFound(err) { return reconcile.Result{}, err } @@ -64,7 +67,7 @@ func (r *ElfMachineReconciler) reconcilePlacementGroup(ctx *context.MachineConte return reconcile.Result{RequeueAfter: config.DefaultRequeueTimeout}, nil } - if placementGroup, err := r.createPlacementGroup(ctx, placementGroupName); err != nil { + if placementGroup, err := r.createPlacementGroup(ctx, machineCtx, placementGroupName); err != nil { return reconcile.Result{}, err } else if placementGroup == nil { return reconcile.Result{RequeueAfter: config.DefaultRequeueTimeout}, err @@ -76,26 +79,28 @@ func (r *ElfMachineReconciler) reconcilePlacementGroup(ctx *context.MachineConte return reconcile.Result{}, nil } -func (r *ElfMachineReconciler) createPlacementGroup(ctx *context.MachineContext, placementGroupName string) (*models.VMPlacementGroup, error) { +func (r *ElfMachineReconciler) createPlacementGroup(ctx goctx.Context, machineCtx *context.MachineContext, placementGroupName string) (*models.VMPlacementGroup, error) { + log := ctrl.LoggerFrom(ctx) + // TODO: This will be removed when Tower fixes issue with placement group data syncing. if ok := canCreatePlacementGroup(placementGroupName); !ok { - ctx.Logger.V(2).Info(fmt.Sprintf("Tower has duplicate placement group, skip creating placement group %s", placementGroupName)) + log.V(2).Info(fmt.Sprintf("Tower has duplicate placement group, skip creating placement group %s", placementGroupName)) return nil, nil } - towerCluster, err := ctx.VMService.GetCluster(ctx.ElfCluster.Spec.Cluster) + towerCluster, err := machineCtx.VMService.GetCluster(machineCtx.ElfCluster.Spec.Cluster) if err != nil { return nil, err } - placementGroupPolicy := towerresources.GetVMPlacementGroupPolicy(ctx.Machine) - withTaskVMPlacementGroup, err := ctx.VMService.CreateVMPlacementGroup(placementGroupName, *towerCluster.ID, placementGroupPolicy) + placementGroupPolicy := towerresources.GetVMPlacementGroupPolicy(machineCtx.Machine) + withTaskVMPlacementGroup, err := machineCtx.VMService.CreateVMPlacementGroup(placementGroupName, *towerCluster.ID, placementGroupPolicy) if err != nil { return nil, err } - task, err := ctx.VMService.WaitTask(ctx, *withTaskVMPlacementGroup.TaskID, config.WaitTaskTimeoutForPlacementGroupOperation, config.WaitTaskInterval) + task, err := machineCtx.VMService.WaitTask(ctx, *withTaskVMPlacementGroup.TaskID, config.WaitTaskTimeoutForPlacementGroupOperation, config.WaitTaskInterval) if err != nil { // The default timeout for Tower to create a placement group is one minute. // When current task times out, duplicate placement groups may or may not appear. @@ -109,15 +114,15 @@ func (r *ElfMachineReconciler) createPlacementGroup(ctx *context.MachineContext, if service.IsVMPlacementGroupDuplicate(service.GetTowerString(task.ErrorMessage)) { setPlacementGroupDuplicate(placementGroupName) - ctx.Logger.Info(fmt.Sprintf("Duplicate placement group detected, will try again in %s", placementGroupSilenceTime), "placementGroup", placementGroupName) + log.Info(fmt.Sprintf("Duplicate placement group detected, will try again in %s", placementGroupSilenceTime), "placementGroup", placementGroupName) } return nil, errors.Errorf("failed to create placement group %s in task %s", placementGroupName, *task.ID) } - ctx.Logger.Info("Creating placement group succeeded", "taskID", *task.ID, "placementGroup", placementGroupName) + log.Info("Creating placement group succeeded", "taskID", *task.ID, "placementGroup", placementGroupName) - placementGroup, err := r.getPlacementGroup(ctx, placementGroupName) + placementGroup, err := r.getPlacementGroup(ctx, machineCtx, placementGroupName) if err != nil { return nil, err } @@ -131,53 +136,55 @@ func (r *ElfMachineReconciler) createPlacementGroup(ctx *context.MachineContext, // 1. nil means there are not enough hosts. // 2. An empty string indicates that there is an available host. // 3. A non-empty string indicates that the specified host ID was returned. -func (r *ElfMachineReconciler) preCheckPlacementGroup(ctx *context.MachineContext) (rethost *string, reterr error) { +func (r *ElfMachineReconciler) preCheckPlacementGroup(ctx goctx.Context, machineCtx *context.MachineContext) (rethost *string, reterr error) { + log := ctrl.LoggerFrom(ctx) + defer func() { if rethost == nil { - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForAvailableHostRequiredByPlacementGroupReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForAvailableHostRequiredByPlacementGroupReason, clusterv1.ConditionSeverityInfo, "") } }() - if !machineutil.IsControlPlaneMachine(ctx.Machine) { + if !machineutil.IsControlPlaneMachine(machineCtx.Machine) { return pointer.String(""), nil } - placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctx.Client, ctx.Machine, ctx.Cluster) + placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, r.Client, machineCtx.Machine, machineCtx.Cluster) if err != nil { return nil, err } - placementGroup, err := r.getPlacementGroup(ctx, placementGroupName) + placementGroup, err := r.getPlacementGroup(ctx, machineCtx, placementGroupName) if err != nil || placementGroup == nil { return nil, err } - usedHostSetByPG, err := r.getHostsInPlacementGroup(ctx, placementGroup) + usedHostSetByPG, err := r.getHostsInPlacementGroup(machineCtx, placementGroup) if err != nil { return nil, err } - hosts, err := ctx.VMService.GetHostsByCluster(ctx.ElfCluster.Spec.Cluster) + hosts, err := machineCtx.VMService.GetHostsByCluster(machineCtx.ElfCluster.Spec.Cluster) if err != nil { return nil, err } usedHostsByPG := hosts.Find(usedHostSetByPG) - availableHosts := r.getAvailableHostsForVM(ctx, hosts, usedHostsByPG, nil) + availableHosts := r.getAvailableHostsForVM(machineCtx, hosts, usedHostsByPG, nil) if !availableHosts.IsEmpty() { - ctx.Logger.V(1).Info("The placement group still has capacity", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String()) + log.V(1).Info("The placement group still has capacity", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String()) return pointer.String(""), nil } - kcp, err := machineutil.GetKCPByMachine(ctx, ctx.Client, ctx.Machine) + kcp, err := machineutil.GetKCPByMachine(ctx, r.Client, machineCtx.Machine) if err != nil { return nil, err } // When KCP is not in rolling update and not in scaling down, just return since the placement group is full. if !kcputil.IsKCPInRollingUpdate(kcp) && !kcputil.IsKCPInScalingDown(kcp) { - ctx.Logger.V(1).Info("KCP is not in rolling update and not in scaling down, the placement group is full, so wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String()) + log.V(1).Info("KCP is not in rolling update and not in scaling down, the placement group is full, so wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String()) return nil, nil } @@ -189,17 +196,17 @@ func (r *ElfMachineReconciler) preCheckPlacementGroup(ctx *context.MachineContex // so the Machine will not be deleted. // We can add delete machine annotation on the Machine and KCP will delete it. if kcputil.IsKCPInScalingDown(kcp) { - if annotationsutil.HasAnnotation(ctx.Machine, clusterv1.DeleteMachineAnnotation) { + if annotationsutil.HasAnnotation(machineCtx.Machine, clusterv1.DeleteMachineAnnotation) { return nil, nil } - newMachine := ctx.Machine.DeepCopy() - patchHelper, err := patch.NewHelper(ctx.Machine, r.Client) + newMachine := machineCtx.Machine.DeepCopy() + patchHelper, err := patch.NewHelper(machineCtx.Machine, r.Client) if err != nil { - return nil, errors.Wrapf(err, "failed to init patch helper for %s %s/%s", ctx.Machine.GroupVersionKind(), ctx.Machine.Namespace, ctx.Machine.Name) + return nil, errors.Wrapf(err, "failed to init patch helper") } - ctx.Logger.Info("Add the delete machine annotation on KCP Machine in order to delete it, because KCP is being scaled down after a failed scaling up", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String()) + log.Info("Add the delete machine annotation on KCP Machine in order to delete it, because KCP is being scaled down after a failed scaling up", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String()) // Allow scaling down of KCP with the possibility of marking specific control plane machine(s) to be deleted with delete annotation key. // The presence of the annotation will affect the rollout strategy in a way that, it implements the following prioritization logic in descending order, @@ -211,8 +218,8 @@ func (r *ElfMachineReconciler) preCheckPlacementGroup(ctx *context.MachineContex // // Refer to https://github.com/kubernetes-sigs/cluster-api/blob/main/docs/proposals/20191017-kubeadm-based-control-plane.md#scale-down annotations.AddAnnotations(newMachine, map[string]string{clusterv1.DeleteMachineAnnotation: ""}) - if err := patchHelper.Patch(r, newMachine); err != nil { - return nil, errors.Wrapf(err, "failed to patch Machine %s to add delete machine annotation %s.", newMachine.Name, clusterv1.DeleteMachineAnnotation) + if err := patchHelper.Patch(ctx, newMachine); err != nil { + return nil, errors.Wrapf(err, "failed to patch Machine %s to add delete machine annotation %s.", klog.KObj(newMachine), clusterv1.DeleteMachineAnnotation) } return nil, nil @@ -226,19 +233,19 @@ func (r *ElfMachineReconciler) preCheckPlacementGroup(ctx *context.MachineContex // If KCP.Spec.Replicas is greater than the host count, // do not allow creating more KCP VM because there is no more host to place the new VM. if int(*kcp.Spec.Replicas) > usedHostsByPG.Len() { - ctx.Logger.V(1).Info("KCP is in rolling update, the placement group is full and no more host for placing more KCP VM, so wait for enough available hosts", "placementGroup", *placementGroup.Name, "usedHostsByPG", usedHostsByPG.String(), "usedHostsCount", usedHostsByPG.Len(), "kcpReplicas", *kcp.Spec.Replicas) + log.V(1).Info("KCP is in rolling update, the placement group is full and no more host for placing more KCP VM, so wait for enough available hosts", "placementGroup", *placementGroup.Name, "usedHostsByPG", usedHostsByPG.String(), "usedHostsCount", usedHostsByPG.Len(), "kcpReplicas", *kcp.Spec.Replicas) return nil, nil } - unusableHosts := usedHostsByPG.FilterUnavailableHostsOrWithoutEnoughMemory(*service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)) + unusableHosts := usedHostsByPG.FilterUnavailableHostsOrWithoutEnoughMemory(*service.TowerMemory(machineCtx.ElfMachine.Spec.MemoryMiB)) if !unusableHosts.IsEmpty() { - ctx.Logger.V(1).Info("KCP is in rolling update, the placement group is full and has unusable hosts, so wait for enough available hosts", "placementGroup", *placementGroup.Name, "unusableHosts", unusableHosts.String(), "usedHostsByPG", usedHostsByPG.String()) + log.V(1).Info("KCP is in rolling update, the placement group is full and has unusable hosts, so wait for enough available hosts", "placementGroup", *placementGroup.Name, "unusableHosts", unusableHosts.String(), "usedHostsByPG", usedHostsByPG.String()) return nil, nil } - hostID, err := r.getVMHostForRollingUpdate(ctx, placementGroup, hosts) + hostID, err := r.getVMHostForRollingUpdate(ctx, machineCtx, placementGroup, hosts) if err != nil || hostID == "" { return nil, err } @@ -251,8 +258,10 @@ func (r *ElfMachineReconciler) preCheckPlacementGroup(ctx *context.MachineContex // Find the latest created machine in the placement group, // and set the host where the machine is located to the machine created by KCP rolling update. // This prevents migration of virtual machine during KCP rolling update when using a placement group. -func (r *ElfMachineReconciler) getVMHostForRollingUpdate(ctx *context.MachineContext, placementGroup *models.VMPlacementGroup, hosts service.Hosts) (string, error) { - elfMachines, err := machineutil.GetControlPlaneElfMachinesInCluster(ctx, ctx.Client, ctx.Cluster.Namespace, ctx.Cluster.Name) +func (r *ElfMachineReconciler) getVMHostForRollingUpdate(ctx goctx.Context, machineCtx *context.MachineContext, placementGroup *models.VMPlacementGroup, hosts service.Hosts) (string, error) { + log := ctrl.LoggerFrom(ctx) + + elfMachines, err := machineutil.GetControlPlaneElfMachinesInCluster(ctx, r.Client, machineCtx.Cluster.Namespace, machineCtx.Cluster.Name) if err != nil { return "", err } @@ -268,7 +277,7 @@ func (r *ElfMachineReconciler) getVMHostForRollingUpdate(ctx *context.MachineCon vmMap := make(map[string]string) for i := 0; i < len(placementGroup.Vms); i++ { if elfMachine, ok := elfMachineMap[*placementGroup.Vms[i].Name]; ok { - machine, err := capiutil.GetOwnerMachine(r, r.Client, elfMachine.ObjectMeta) + machine, err := capiutil.GetOwnerMachine(ctx, r.Client, elfMachine.ObjectMeta) if err != nil { return "", err } @@ -281,35 +290,35 @@ func (r *ElfMachineReconciler) getVMHostForRollingUpdate(ctx *context.MachineCon machines := collections.FromMachines(placementGroupMachines...) newestMachine := machines.Newest() if newestMachine == nil { - ctx.Logger.Info("Newest machine not found, skip selecting host for VM", "vmRef", ctx.ElfMachine.Status.VMRef) + log.Info("Newest machine not found, skip selecting host for VM", "vmRef", machineCtx.ElfMachine.Status.VMRef) return "", nil } - vm, err := ctx.VMService.Get(vmMap[newestMachine.Name]) + vm, err := machineCtx.VMService.Get(vmMap[newestMachine.Name]) if err != nil { return "", err } host := hosts.Get(*vm.Host.ID) if host == nil { - ctx.Logger.Info("Host not found, skip selecting host for VM", "host", formatNestedHost(vm.Host), "vmRef", ctx.ElfMachine.Status.VMRef) + log.Info("Host not found, skip selecting host for VM", "host", formatNestedHost(vm.Host), "vmRef", machineCtx.ElfMachine.Status.VMRef) return "", err } - ok, message := service.IsAvailableHost(host, *service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)) + ok, message := service.IsAvailableHost(host, *service.TowerMemory(machineCtx.ElfMachine.Spec.MemoryMiB)) if ok { - ctx.Logger.Info("Select a host to power on the VM since the placement group is full", "host", formatNestedHost(vm.Host), "vmRef", ctx.ElfMachine.Status.VMRef) + log.Info("Select a host to power on the VM since the placement group is full", "host", formatNestedHost(vm.Host), "vmRef", machineCtx.ElfMachine.Status.VMRef) return *vm.Host.ID, nil } - ctx.Logger.Info(fmt.Sprintf("Host is unavailable: %s, skip selecting host for VM", message), "host", formatNestedHost(vm.Host), "vmRef", ctx.ElfMachine.Status.VMRef) + log.Info(fmt.Sprintf("Host is unavailable: %s, skip selecting host for VM", message), "host", formatNestedHost(vm.Host), "vmRef", machineCtx.ElfMachine.Status.VMRef) return "", err } // getHostsInPlacementGroup returns the hosts where all virtual machines of placement group located. -func (r *ElfMachineReconciler) getHostsInPlacementGroup(ctx *context.MachineContext, placementGroup *models.VMPlacementGroup) (sets.Set[string], error) { +func (r *ElfMachineReconciler) getHostsInPlacementGroup(machineCtx *context.MachineContext, placementGroup *models.VMPlacementGroup) (sets.Set[string], error) { placementGroupVMSet := service.GetVMsInPlacementGroup(placementGroup) - vms, err := ctx.VMService.FindByIDs(placementGroupVMSet.UnsortedList()) + vms, err := machineCtx.VMService.FindByIDs(placementGroupVMSet.UnsortedList()) if err != nil { return nil, err } @@ -327,8 +336,8 @@ func (r *ElfMachineReconciler) getHostsInPlacementGroup(ctx *context.MachineCont // The 'Available' means that the specified VM can run on these hosts. // It returns hosts that are not in faulted state, not in the given 'usedHostsByPG', // and have sufficient memory for running this VM. -func (r *ElfMachineReconciler) getAvailableHostsForVM(ctx *context.MachineContext, hosts service.Hosts, usedHostsByPG service.Hosts, vm *models.VM) service.Hosts { - availableHosts := hosts.FilterAvailableHostsWithEnoughMemory(*service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)).Difference(usedHostsByPG) +func (r *ElfMachineReconciler) getAvailableHostsForVM(machineCtx *context.MachineContext, hosts service.Hosts, usedHostsByPG service.Hosts, vm *models.VM) service.Hosts { + availableHosts := hosts.FilterAvailableHostsWithEnoughMemory(*service.TowerMemory(machineCtx.ElfMachine.Spec.MemoryMiB)).Difference(usedHostsByPG) // If the VM is running, and the host where the VM is located // is not used by the placement group, then it is not necessary to @@ -349,19 +358,21 @@ func (r *ElfMachineReconciler) getAvailableHostsForVM(ctx *context.MachineContex // getPlacementGroup returns the specified placement group. // getPlacementGroup will get the placement group from the cache first. // If the placement group does not exist in the cache, it will be fetched from Tower and saved to the cache(expiration time is 10s). -func (r *ElfMachineReconciler) getPlacementGroup(ctx *context.MachineContext, placementGroupName string) (*models.VMPlacementGroup, error) { +func (r *ElfMachineReconciler) getPlacementGroup(ctx goctx.Context, machineCtx *context.MachineContext, placementGroupName string) (*models.VMPlacementGroup, error) { + log := ctrl.LoggerFrom(ctx) + if placementGroup := getPGFromCache(placementGroupName); placementGroup != nil { return placementGroup, nil } - placementGroup, err := ctx.VMService.GetVMPlacementGroup(placementGroupName) + placementGroup, err := machineCtx.VMService.GetVMPlacementGroup(placementGroupName) if err != nil { return nil, err } // Placement group is performing an operation if !typesutil.IsUUID(*placementGroup.LocalID) || placementGroup.EntityAsyncStatus != nil { - ctx.Logger.Info("Waiting for placement group task done", "placementGroup", *placementGroup.Name) + log.Info("Waiting for placement group task done", "placementGroup", *placementGroup.Name) return nil, nil } @@ -381,22 +392,24 @@ func (r *ElfMachineReconciler) getPlacementGroup(ctx *context.MachineContext, pl // For example, the placement group is full or the virtual machine is being migrated. // //nolint:gocyclo -func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, vm *models.VM) (ret bool, reterr error) { - if !version.IsCompatibleWithPlacementGroup(ctx.ElfMachine) { - ctx.Logger.V(1).Info(fmt.Sprintf("The capeVersion of ElfMachine is lower than %s, skip adding VM to the placement group", version.CAPEVersion1_2_0), "capeVersion", version.GetCAPEVersion(ctx.ElfMachine)) +func (r *ElfMachineReconciler) joinPlacementGroup(ctx goctx.Context, machineCtx *context.MachineContext, vm *models.VM) (ret bool, reterr error) { + log := ctrl.LoggerFrom(ctx) + + if !version.IsCompatibleWithPlacementGroup(machineCtx.ElfMachine) { + log.V(1).Info(fmt.Sprintf("The capeVersion of ElfMachine is lower than %s, skip adding VM to the placement group", version.CAPEVersion1_2_0), "capeVersion", version.GetCAPEVersion(machineCtx.ElfMachine)) return true, nil } defer func() { if reterr != nil { - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.JoiningPlacementGroupFailedReason, clusterv1.ConditionSeverityWarning, reterr.Error()) + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.JoiningPlacementGroupFailedReason, clusterv1.ConditionSeverityWarning, reterr.Error()) } else if !ret { - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.JoiningPlacementGroupReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.JoiningPlacementGroupReason, clusterv1.ConditionSeverityInfo, "") } }() - placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctx.Client, ctx.Machine, ctx.Cluster) + placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, r.Client, machineCtx.Machine, machineCtx.Cluster) if err != nil { return false, err } @@ -409,7 +422,7 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v return false, nil } - placementGroup, err := r.getPlacementGroup(ctx, placementGroupName) + placementGroup, err := r.getPlacementGroup(ctx, machineCtx, placementGroupName) if err != nil || placementGroup == nil { return false, err } @@ -417,26 +430,26 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v placementGroupVMSet := service.GetVMsInPlacementGroup(placementGroup) if placementGroupVMSet.Has(*vm.ID) { // Ensure PlacementGroupRef is set or up to date. - ctx.ElfMachine.Status.PlacementGroupRef = *placementGroup.ID + machineCtx.ElfMachine.Status.PlacementGroupRef = *placementGroup.ID return true, nil } - if machineutil.IsControlPlaneMachine(ctx.Machine) { - hosts, err := ctx.VMService.GetHostsByCluster(ctx.ElfCluster.Spec.Cluster) + if machineutil.IsControlPlaneMachine(machineCtx.Machine) { + hosts, err := machineCtx.VMService.GetHostsByCluster(machineCtx.ElfCluster.Spec.Cluster) if err != nil { return false, err } - usedHostSetByPG, err := r.getHostsInPlacementGroup(ctx, placementGroup) + usedHostSetByPG, err := r.getHostsInPlacementGroup(machineCtx, placementGroup) if err != nil { return false, err } usedHostsByPG := hosts.Find(usedHostSetByPG) - availableHosts := r.getAvailableHostsForVM(ctx, hosts, usedHostsByPG, vm) + availableHosts := r.getAvailableHostsForVM(machineCtx, hosts, usedHostsByPG, vm) if availableHosts.IsEmpty() { - kcp, err := machineutil.GetKCPByMachine(ctx, ctx.Client, ctx.Machine) + kcp, err := machineutil.GetKCPByMachine(ctx, r.Client, machineCtx.Machine) if err != nil { return false, err } @@ -445,25 +458,25 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v // In this case the machine created by KCP rolling update can be powered on without being added to the placement group, // so return true and nil to let reconcileVMStatus() power it on. if kcputil.IsKCPInRollingUpdate(kcp) && *vm.Status == models.VMStatusSTOPPED { - unusablehosts := usedHostsByPG.FilterUnavailableHostsOrWithoutEnoughMemory(*service.TowerMemory(ctx.ElfMachine.Spec.MemoryMiB)) + unusablehosts := usedHostsByPG.FilterUnavailableHostsOrWithoutEnoughMemory(*service.TowerMemory(machineCtx.ElfMachine.Spec.MemoryMiB)) if unusablehosts.IsEmpty() { - ctx.Logger.Info("KCP is in rolling update, the placement group is full and has no unusable hosts, so skip adding VM to the placement group and power it on", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String(), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) + log.Info("KCP is in rolling update, the placement group is full and has no unusable hosts, so skip adding VM to the placement group and power it on", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String(), "vmRef", machineCtx.ElfMachine.Status.VMRef, "vmId", *vm.ID) return true, nil } - ctx.Logger.Info("KCP is in rolling update, the placement group is full and has unusable hosts, so wait for enough available hosts", "placementGroup", *placementGroup.Name, "unusablehosts", unusablehosts.String(), "usedHostsByPG", usedHostsByPG.String(), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) + log.Info("KCP is in rolling update, the placement group is full and has unusable hosts, so wait for enough available hosts", "placementGroup", *placementGroup.Name, "unusablehosts", unusablehosts.String(), "usedHostsByPG", usedHostsByPG.String(), "vmRef", machineCtx.ElfMachine.Status.VMRef, "vmId", *vm.ID) return false, nil } if *vm.Status != models.VMStatusSTOPPED { - ctx.Logger.V(1).Info(fmt.Sprintf("The placement group is full and VM is in %s status, skip adding VM to the placement group", *vm.Status), "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String(), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) + log.V(1).Info(fmt.Sprintf("The placement group is full and VM is in %s status, skip adding VM to the placement group", *vm.Status), "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String(), "vmRef", machineCtx.ElfMachine.Status.VMRef, "vmId", *vm.ID) return true, nil } // KCP is scaling out or being created. - ctx.Logger.V(1).Info("KCP is in scaling up or being created, the placement group is full, so wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String(), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) + log.V(1).Info("KCP is in scaling up or being created, the placement group is full, so wait for enough available hosts", "placementGroup", *placementGroup.Name, "availableHosts", availableHosts.String(), "usedHostsByPG", usedHostsByPG.String(), "vmRef", machineCtx.ElfMachine.Status.VMRef, "vmId", *vm.ID) return false, nil } @@ -472,23 +485,23 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v // and the virtual machine is not STOPPED, we need to migrate the virtual machine to a host that // is not used by the placement group before adding the virtual machine to the placement group. // Otherwise, just add the virtual machine to the placement group directly. - ctx.Logger.V(1).Info("The availableHosts for migrating the VM", "hosts", availableHosts.String(), "vmHost", formatNestedHost(vm.Host)) + log.V(1).Info("The availableHosts for migrating the VM", "hosts", availableHosts.String(), "vmHost", formatNestedHost(vm.Host)) if !availableHosts.Contains(*vm.Host.ID) && *vm.Status != models.VMStatusSTOPPED { - ctx.Logger.V(1).Info("Try to migrate the virtual machine to the specified target host if needed") + log.V(1).Info("Try to migrate the virtual machine to the specified target host if needed") - kcp, err := machineutil.GetKCPByMachine(ctx, ctx.Client, ctx.Machine) + kcp, err := machineutil.GetKCPByMachine(ctx, r.Client, machineCtx.Machine) if err != nil { return false, err } if kcputil.IsKCPInRollingUpdate(kcp) { - ctx.Logger.Info("KCP rolling update in progress, skip migrating VM", "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID) + log.Info("KCP rolling update in progress, skip migrating VM", "vmRef", machineCtx.ElfMachine.Status.VMRef, "vmId", *vm.ID) return true, nil } // The powered on CP ElfMachine which is not in the PlacementGroup should wait for other new CP ElfMachines to join the target PlacementGroup. // The code below double checks the recommended target host for migration is valid. - cpElfMachines, err := machineutil.GetControlPlaneElfMachinesInCluster(ctx, ctx.Client, ctx.Cluster.Namespace, ctx.Cluster.Name) + cpElfMachines, err := machineutil.GetControlPlaneElfMachinesInCluster(ctx, r.Client, machineCtx.Cluster.Namespace, machineCtx.Cluster.Name) if err != nil { return false, err } @@ -497,7 +510,7 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v cpElfMachineNames := make([]string, 0, len(cpElfMachines)) for i := 0; i < len(cpElfMachines); i++ { cpElfMachineNames = append(cpElfMachineNames, cpElfMachines[i].Name) - if ctx.ElfMachine.Name != cpElfMachines[i].Name && + if machineCtx.ElfMachine.Name != cpElfMachines[i].Name && cpElfMachines[i].Status.PlacementGroupRef == *placementGroup.ID { usedHostsByPG.Insert(cpElfMachines[i].Status.HostServerRef) } @@ -508,20 +521,20 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v // and kcp.Status.UnavailableReplicas == 0. // So we need to check if the number of CP ElfMachine is equal to kcp.Spec.Replicas. if len(cpElfMachines) != int(*kcp.Spec.Replicas) { - ctx.Logger.V(1).Info("The number of CP ElfMachine does not match the expected", "kcp", formatKCP(kcp), "cpElfMachines", cpElfMachineNames) + log.V(1).Info("The number of CP ElfMachine does not match the expected", "kcp", formatKCP(kcp), "cpElfMachines", cpElfMachineNames) return true, nil } targetHost := availableHosts.UnsortedList()[0] usedHostsCount := usedHostsByPG.Len() - ctx.Logger.V(1).Info("The hosts used by the PlacementGroup", "usedHosts", usedHostsByPG, "count", usedHostsCount, "targetHost", formatHost(targetHost), "kcp", formatKCP(kcp), "cpElfMachines", cpElfMachineNames) + log.V(1).Info("The hosts used by the PlacementGroup", "usedHosts", usedHostsByPG, "count", usedHostsCount, "targetHost", formatHost(targetHost), "kcp", formatKCP(kcp), "cpElfMachines", cpElfMachineNames) if usedHostsCount < int(*kcp.Spec.Replicas-1) { - ctx.Logger.V(1).Info("Not all other CPs joined the PlacementGroup, skip migrating VM") + log.V(1).Info("Not all other CPs joined the PlacementGroup, skip migrating VM") return true, nil } if usedHostsByPG.Has(*targetHost.ID) { - ctx.Logger.V(1).Info("The recommended target host for VM migration is used by the PlacementGroup, skip migrating VM") + log.V(1).Info("The recommended target host for VM migration is used by the PlacementGroup, skip migrating VM") return true, nil } @@ -529,15 +542,15 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v // This is the last CP ElfMachine (i.e. the 1st new CP ElfMachine) which has not been added into the target PlacementGroup. // Migrate this VM to the target host, then it will be added into the target PlacementGroup. - ctx.Logger.V(1).Info("Start migrating VM since KCP is not in rolling update process", "targetHost", formatHost(targetHost)) + log.V(1).Info("Start migrating VM since KCP is not in rolling update process", "targetHost", formatHost(targetHost)) - return r.migrateVM(ctx, vm, *targetHost.ID) + return r.migrateVM(ctx, machineCtx, vm, *targetHost.ID) } } if !placementGroupVMSet.Has(*vm.ID) { placementGroupVMSet.Insert(*vm.ID) - if err := r.addVMsToPlacementGroup(ctx, placementGroup, placementGroupVMSet.UnsortedList()); err != nil { + if err := r.addVMsToPlacementGroup(ctx, machineCtx, placementGroup, placementGroupVMSet.UnsortedList()); err != nil { return false, err } } @@ -550,26 +563,29 @@ func (r *ElfMachineReconciler) joinPlacementGroup(ctx *context.MachineContext, v // The return value: // 1. true means that the virtual machine does not need to be migrated. // 2. false and error is nil means the virtual machine is being migrated. -func (r *ElfMachineReconciler) migrateVM(ctx *context.MachineContext, vm *models.VM, targetHost string) (bool, error) { +func (r *ElfMachineReconciler) migrateVM(ctx goctx.Context, machineCtx *context.MachineContext, vm *models.VM, targetHost string) (bool, error) { + log := ctrl.LoggerFrom(ctx) + if *vm.Host.ID == targetHost { - ctx.Logger.V(1).Info(fmt.Sprintf("The VM is already on the recommended target host %s, skip migrating VM", targetHost)) + log.V(1).Info(fmt.Sprintf("The VM is already on the recommended target host %s, skip migrating VM", targetHost)) return true, nil } - withTaskVM, err := ctx.VMService.Migrate(service.GetTowerString(vm.ID), targetHost) + withTaskVM, err := machineCtx.VMService.Migrate(service.GetTowerString(vm.ID), targetHost) if err != nil { return false, err } - ctx.ElfMachine.SetTask(*withTaskVM.TaskID) + machineCtx.ElfMachine.SetTask(*withTaskVM.TaskID) - ctx.Logger.Info(fmt.Sprintf("Waiting for the VM to be migrated from %s to %s", formatNestedHost(vm.Host), targetHost), "vmRef", ctx.ElfMachine.Status.VMRef, "vmId", *vm.ID, "taskRef", ctx.ElfMachine.Status.TaskRef) + log.Info(fmt.Sprintf("Waiting for the VM to be migrated from %s to %s", formatNestedHost(vm.Host), targetHost), "vmRef", machineCtx.ElfMachine.Status.VMRef, "vmId", *vm.ID, "taskRef", machineCtx.ElfMachine.Status.TaskRef) return false, nil } -func (r *ElfMachineReconciler) addVMsToPlacementGroup(ctx *context.MachineContext, placementGroup *models.VMPlacementGroup, vmIDs []string) error { - task, err := ctx.VMService.AddVMsToPlacementGroup(placementGroup, vmIDs) +func (r *ElfMachineReconciler) addVMsToPlacementGroup(ctx goctx.Context, machineCtx *context.MachineContext, placementGroup *models.VMPlacementGroup, vmIDs []string) error { + log := ctrl.LoggerFrom(ctx) + task, err := machineCtx.VMService.AddVMsToPlacementGroup(placementGroup, vmIDs) if err != nil { return err } @@ -578,7 +594,7 @@ func (r *ElfMachineReconciler) addVMsToPlacementGroup(ctx *context.MachineContex delPGCaches([]string{*placementGroup.Name}) taskID := *task.ID - task, err = ctx.VMService.WaitTask(ctx, taskID, config.WaitTaskTimeoutForPlacementGroupOperation, config.WaitTaskInterval) + task, err = machineCtx.VMService.WaitTask(ctx, taskID, config.WaitTaskTimeoutForPlacementGroupOperation, config.WaitTaskInterval) if err != nil { return errors.Wrapf(err, "failed to wait for placement group updating task to complete in %s: pgName %s, taskID %s", config.WaitTaskTimeoutForPlacementGroupOperation, *placementGroup.Name, taskID) } @@ -587,9 +603,9 @@ func (r *ElfMachineReconciler) addVMsToPlacementGroup(ctx *context.MachineContex return errors.Errorf("failed to update placement group %s in task %s", *placementGroup.Name, taskID) } - ctx.ElfMachine.Status.PlacementGroupRef = *placementGroup.ID + machineCtx.ElfMachine.Status.PlacementGroupRef = *placementGroup.ID - ctx.Logger.Info("Updating placement group succeeded", "taskID", taskID, "placementGroup", *placementGroup.Name, "vmIDs", vmIDs) + log.Info("Updating placement group succeeded", "taskID", taskID, "placementGroup", *placementGroup.Name, "vmIDs", vmIDs) return nil } @@ -597,12 +613,14 @@ func (r *ElfMachineReconciler) addVMsToPlacementGroup(ctx *context.MachineContex // deletePlacementGroup deletes the placement group when the MachineDeployment is deleted // and the cluster is not deleted. // If the cluster is deleted, all placement groups are deleted by the ElfCluster controller. -func (r *ElfMachineReconciler) deletePlacementGroup(ctx *context.MachineContext) (bool, error) { - if !ctx.Cluster.DeletionTimestamp.IsZero() || machineutil.IsControlPlaneMachine(ctx.Machine) { +func (r *ElfMachineReconciler) deletePlacementGroup(ctx goctx.Context, machineCtx *context.MachineContext) (bool, error) { + log := ctrl.LoggerFrom(ctx) + + if !machineCtx.Cluster.DeletionTimestamp.IsZero() || machineutil.IsControlPlaneMachine(machineCtx.Machine) { return true, nil } - md, err := machineutil.GetMDByMachine(ctx, ctx.Client, ctx.Machine) + md, err := machineutil.GetMDByMachine(ctx, r.Client, machineCtx.Machine) if err != nil { if apierrors.IsNotFound(err) { return true, nil @@ -619,17 +637,17 @@ func (r *ElfMachineReconciler) deletePlacementGroup(ctx *context.MachineContext) return true, nil } - placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctx.Client, ctx.Machine, ctx.Cluster) + placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, r.Client, machineCtx.Machine, machineCtx.Cluster) if err != nil { return false, err } // Only delete the placement groups created by CAPE. - if !strings.HasPrefix(placementGroupName, towerresources.GetVMPlacementGroupNamePrefix(ctx.Cluster)) { + if !strings.HasPrefix(placementGroupName, towerresources.GetVMPlacementGroupNamePrefix(machineCtx.Cluster)) { return true, nil } - placementGroup, err := ctx.VMService.GetVMPlacementGroup(placementGroupName) + placementGroup, err := machineCtx.VMService.GetVMPlacementGroup(placementGroupName) if err != nil { if service.IsVMPlacementGroupNotFound(err) { return true, nil @@ -644,14 +662,14 @@ func (r *ElfMachineReconciler) deletePlacementGroup(ctx *context.MachineContext) return false, nil } - if ok, err := ctx.VMService.DeleteVMPlacementGroupByID(ctx, *placementGroup.ID); err != nil { + if ok, err := machineCtx.VMService.DeleteVMPlacementGroupByID(ctx, *placementGroup.ID); err != nil { return false, err } else if !ok { - ctx.Logger.Info(fmt.Sprintf("Waiting for the placement group %s to be deleted", *placementGroup.Name)) + log.Info(fmt.Sprintf("Waiting for the placement group %s to be deleted", *placementGroup.Name)) return false, nil } else { - ctx.Logger.Info(fmt.Sprintf("Placement group %s deleted", *placementGroup.Name)) + log.Info(fmt.Sprintf("Placement group %s deleted", *placementGroup.Name)) // Delete placement group cache. delPGCaches([]string{*placementGroup.Name}) diff --git a/controllers/elfmachine_controller_test.go b/controllers/elfmachine_controller_test.go index 6eabb116..1332cba1 100644 --- a/controllers/elfmachine_controller_test.go +++ b/controllers/elfmachine_controller_test.go @@ -44,7 +44,6 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" ctrlutil "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" - ctrllog "sigs.k8s.io/controller-runtime/pkg/log" infrav1 "github.com/smartxworks/cluster-api-provider-elf/api/v1beta1" "github.com/smartxworks/cluster-api-provider-elf/pkg/config" @@ -117,8 +116,8 @@ var _ = Describe("ElfMachineReconciler", func() { Context("Reconcile an ElfMachine", func() { It("should not error and not requeue the request without machine", func() { - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext} + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx} result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: capiutil.ObjectKey(elfMachine)}) Expect(result).To(BeZero()) @@ -128,10 +127,10 @@ var _ = Describe("ElfMachineReconciler", func() { It("should not error and not requeue the request when Cluster is paused", func() { cluster.Spec.Paused = true - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx} result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: capiutil.ObjectKey(elfMachine)}) Expect(result).To(BeZero()) Expect(err).ToNot(HaveOccurred()) @@ -142,10 +141,10 @@ var _ = Describe("ElfMachineReconciler", func() { createMachineError := capierrors.CreateMachineError elfMachine.Status.FailureReason = &createMachineError elfMachine.Status.FailureMessage = pointer.String("Couldn't create machine") - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: capiutil.ObjectKey(elfMachine)}) Expect(result).To(BeZero()) Expect(err).ToNot(HaveOccurred()) @@ -153,30 +152,30 @@ var _ = Describe("ElfMachineReconciler", func() { }) It("should add our finalizer to the machine", func() { - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) _, _ = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Finalizers).To(ContainElement(infrav1.MachineFinalizer)) }) It("should exit immediately if cluster infra isn't ready", func() { ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) cluster.Status.InfrastructureReady = false - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) - reconciler := ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) _, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(err).ToNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("Cluster infrastructure is not ready yet")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForClusterInfrastructureReason}}) }) @@ -184,32 +183,32 @@ var _ = Describe("ElfMachineReconciler", func() { ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) cluster.Status.InfrastructureReady = true conditions.MarkTrue(cluster, clusterv1.ControlPlaneInitializedCondition) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) - reconciler := ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) _, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(err).ToNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("Waiting for bootstrap data to be available")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForBootstrapDataReason}}) }) It("should wait cluster ControlPlaneInitialized true when create worker machine", func() { ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) cluster.Status.InfrastructureReady = true - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) _, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(err).ToNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("Waiting for the control plane to be initialized")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, clusterv1.WaitingForControlPlaneAvailableReason}}) }) @@ -217,16 +216,16 @@ var _ = Describe("ElfMachineReconciler", func() { ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) cluster.Status.InfrastructureReady = true elfMachine.Labels[clusterv1.MachineControlPlaneLabel] = "" - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) _, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(err).ToNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("Waiting for bootstrap data to be available")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForBootstrapDataReason}}) }) @@ -235,9 +234,9 @@ var _ = Describe("ElfMachineReconciler", func() { ctrlutil.AddFinalizer(machine, infrav1.MachineFinalizer) cluster.Status.InfrastructureReady = false - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(err).ToNot(HaveOccurred()) @@ -246,27 +245,27 @@ var _ = Describe("ElfMachineReconciler", func() { logBuffer.Reset() message := "The node's healthy condition is unknown, virtual machine may have been shut down, will reconcile" conditions.MarkUnknown(machine, clusterv1.MachineNodeHealthyCondition, clusterv1.NodeConditionsFailedReason, "test") - ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + ctrlMgrCtx = fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} result, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(err).ToNot(HaveOccurred()) Expect(result.RequeueAfter).To(Equal(config.DefaultRequeueTimeout)) Expect(logBuffer.String()).To(ContainSubstring(message)) machine.Status.Conditions[0].LastTransitionTime = metav1.NewTime(time.Now().Add(-config.VMPowerStatusCheckingDuration)) - ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + ctrlMgrCtx = fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} result, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(err).ToNot(HaveOccurred()) Expect(result.IsZero()).To(BeTrue()) conditions.MarkUnknown(machine, clusterv1.MachineNodeHealthyCondition, clusterv1.NodeConditionsFailedReason, "test") machine.Status.FailureMessage = pointer.String("error") - ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + ctrlMgrCtx = fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} result, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(err).ToNot(HaveOccurred()) Expect(result.IsZero()).To(BeTrue()) @@ -274,9 +273,9 @@ var _ = Describe("ElfMachineReconciler", func() { conditions.MarkUnknown(machine, clusterv1.MachineNodeHealthyCondition, clusterv1.NodeConditionsFailedReason, "test") machine.Status.FailureMessage = nil machine.DeletionTimestamp = &metav1.Time{Time: time.Now().UTC()} - ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + ctrlMgrCtx = fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} result, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(err).ToNot(HaveOccurred()) Expect(result.IsZero()).To(BeTrue()) @@ -285,9 +284,9 @@ var _ = Describe("ElfMachineReconciler", func() { machine.DeletionTimestamp = nil elfMachine.DeletionTimestamp = &metav1.Time{Time: time.Now().UTC()} ctrlutil.AddFinalizer(elfMachine, "no-gc") - ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + ctrlMgrCtx = fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} result, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(err).ToNot(HaveOccurred()) Expect(result.IsZero()).To(BeTrue()) @@ -295,9 +294,9 @@ var _ = Describe("ElfMachineReconciler", func() { machine.DeletionTimestamp = &metav1.Time{Time: time.Now().UTC()} ctrlutil.AddFinalizer(machine, "no-gc") elfMachine.DeletionTimestamp = nil - ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + ctrlMgrCtx = fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} result, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(err).ToNot(HaveOccurred()) Expect(result.IsZero()).To(BeTrue()) @@ -318,16 +317,16 @@ var _ = Describe("ElfMachineReconciler", func() { It("should set CloningFailedReason condition when failed to retrieve bootstrap data", func() { ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) machine.Spec.Bootstrap.DataSecretName = pointer.String("notfound") - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).To(BeZero()) Expect(err).Should(HaveOccurred()) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityWarning, infrav1.CloningFailedReason}}) }) @@ -339,28 +338,28 @@ var _ = Describe("ElfMachineReconciler", func() { task := fake.NewTowerTask() withTaskVM := fake.NewWithTaskVM(vm, task) ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) machineContext.VMService = mockVMService - recordOrClearError(machineContext, clusterInsufficientStorageKey, true) + recordOrClearError(ctx, machineContext, ctrlMgrCtx.Client, clusterInsufficientStorageKey, true) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) elfMachineKey := capiutil.ObjectKey(elfMachine) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).NotTo(BeZero()) Expect(err).NotTo(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("Insufficient storage detected for the ELF cluster")) - expireELFScheduleVMError(machineContext, clusterInsufficientStorageKey) + expireELFScheduleVMError(ctx, machineContext, ctrlMgrCtx.Client, clusterInsufficientStorageKey) logBuffer.Reset() elfCluster.Spec.Cluster = clusterInsufficientMemoryKey - ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - machineContext = newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - recordOrClearError(machineContext, clusterInsufficientMemoryKey, true) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + ctrlMgrCtx = fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + machineContext = newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + recordOrClearError(ctx, machineContext, ctrlMgrCtx.Client, clusterInsufficientMemoryKey, true) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} result, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).NotTo(BeZero()) Expect(err).NotTo(HaveOccurred()) @@ -372,16 +371,16 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().Get(*vm.ID).Return(vm, nil) mockVMService.EXPECT().GetTask(*task.ID).Return(task, nil) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) - expireELFScheduleVMError(machineContext, clusterInsufficientMemoryKey) + expireELFScheduleVMError(ctx, machineContext, ctrlMgrCtx.Client, clusterInsufficientMemoryKey) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} result, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).NotTo(BeZero()) Expect(err).ShouldNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("and the retry silence period passes, will try to create the VM again")) Expect(logBuffer.String()).To(ContainSubstring("Waiting for VM task done")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Status.VMRef).To(Equal(*vm.ID)) Expect(elfMachine.Status.TaskRef).To(Equal(*task.ID)) resetMemoryCache() @@ -392,37 +391,37 @@ var _ = Describe("ElfMachineReconciler", func() { vm.Name = &elfMachine.Name vm.LocalID = pointer.String("placeholder-%s" + *vm.LocalID) ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Clone(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, errors.New(service.VMDuplicate)) mockVMService.EXPECT().GetByName(elfMachine.Name).Return(vm, nil) mockVMService.EXPECT().Get(*vm.ID).Return(vm, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).NotTo(BeZero()) Expect(err).ShouldNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("Waiting for VM task done")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Status.VMRef).To(Equal(*vm.ID)) Expect(elfMachine.Status.TaskRef).To(Equal("")) }) It("should handle clone error", func() { ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Clone(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, errors.New("some error")) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) _, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(err.Error()).To(ContainSubstring("failed to reconcile VM")) Expect(elfMachine.Status.VMRef).To(Equal("")) Expect(elfMachine.Status.TaskRef).To(Equal("")) @@ -440,8 +439,8 @@ var _ = Describe("ElfMachineReconciler", func() { nic := fake.NewTowerVMNic(0) placementGroup := fake.NewVMPlacementGroup([]string{*vm.ID}) ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md, kubeConfigSecret) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md, kubeConfigSecret) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) k8sNode.Status.Addresses = []corev1.NodeAddress{ { @@ -458,10 +457,10 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().AddLabelsToVM(gomock.Any(), gomock.Any()).Times(1) mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return(nil, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) _, _ = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.GetVMDisconnectionTimestamp()).To(BeNil()) }) @@ -470,18 +469,18 @@ var _ = Describe("ElfMachineReconciler", func() { vm.EntityAsyncStatus = nil elfMachine.Status.VMRef = *vm.LocalID ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Times(2).Return(nil, errors.New(service.VMNotFound)) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).NotTo(BeZero()) Expect(err).NotTo(HaveOccurred()) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.GetVMDisconnectionTimestamp()).NotTo(BeNil()) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) @@ -494,7 +493,7 @@ var _ = Describe("ElfMachineReconciler", func() { Expect(result.RequeueAfter).To(BeZero()) Expect(err).NotTo(HaveOccurred()) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(*elfMachine.Status.FailureReason).To(Equal(capeerrors.RemovedFromInfrastructureError)) Expect(*elfMachine.Status.FailureMessage).To(Equal(fmt.Sprintf("Unable to find VM by UUID %s. The VM was removed from infrastructure.", elfMachine.Status.VMRef))) }) @@ -505,17 +504,17 @@ var _ = Describe("ElfMachineReconciler", func() { vm.InRecycleBin = pointer.Bool(true) elfMachine.Status.VMRef = *vm.LocalID ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(vm, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).To(BeZero()) Expect(err).ShouldNot(HaveOccurred()) - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(*elfMachine.Status.FailureReason).To(Equal(capeerrors.MovedToRecycleBinError)) Expect(*elfMachine.Status.FailureMessage).To(Equal(fmt.Sprintf("The VM %s was moved to the Tower recycle bin by users, so treat it as deleted.", *vm.LocalID))) Expect(elfMachine.HasVM()).To(BeFalse()) @@ -529,20 +528,20 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Status.VMRef = *vm.ID elfMachine.Status.TaskRef = *task.ID ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(nil, errors.New(service.VMNotFound)) mockVMService.EXPECT().GetTask(elfMachine.Status.TaskRef).Return(task, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).NotTo(BeZero()) Expect(err).ToNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("failed to create VM for ElfMachine")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Status.VMRef).To(Equal("")) Expect(elfMachine.Status.TaskRef).To(Equal("")) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.TaskFailureReason}}) @@ -558,19 +557,19 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Status.VMRef = *vm.ID elfMachine.Status.TaskRef = *task.ID ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(nil, errors.New(service.VMNotFound)) mockVMService.EXPECT().GetTask(elfMachine.Status.TaskRef).Return(task, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).To(BeZero()) Expect(err).ShouldNot(HaveOccurred()) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Status.VMRef).To(Equal("")) Expect(elfMachine.Status.TaskRef).To(Equal("")) Expect(elfMachine.IsFailed()).To(BeTrue()) @@ -591,22 +590,22 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Status.TaskRef = *task1.ID placementGroup := fake.NewVMPlacementGroup([]string{*vm.ID}) ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(vm, nil) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) mockVMService.EXPECT().GetTask(elfMachine.Status.TaskRef).Return(task1, nil) mockVMService.EXPECT().PowerOn(*vm.LocalID, "").Return(task2, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).NotTo(BeZero()) Expect(err).ShouldNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("Waiting for VM to be powered on")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Status.VMRef).To(Equal(*vm.LocalID)) Expect(elfMachine.Status.TaskRef).To(Equal(*task2.ID)) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.PoweringOnReason}}) @@ -623,20 +622,20 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Status.VMRef = *vm.ID elfMachine.Status.TaskRef = *task.ID ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(vm, nil) mockVMService.EXPECT().GetTask(elfMachine.Status.TaskRef).Return(task, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).NotTo(BeZero()) Expect(err).ShouldNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("The VM is being created")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Status.VMRef).To(Equal(*vm.ID)) Expect(elfMachine.Status.TaskRef).To(Equal("")) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.TaskFailureReason}}) @@ -654,21 +653,21 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Status.TaskRef = *task1.ID placementGroup := fake.NewVMPlacementGroup([]string{*vm.ID}) ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(vm, nil) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) mockVMService.EXPECT().GetTask(elfMachine.Status.TaskRef).Return(task1, nil) mockVMService.EXPECT().PowerOn(*vm.LocalID, "").Return(nil, errors.New("some error")) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).To(BeZero()) Expect(err.Error()).To(ContainSubstring("failed to trigger power on for VM")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Status.VMRef).To(Equal(*vm.LocalID)) Expect(elfMachine.Status.TaskRef).To(Equal("")) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityWarning, infrav1.PoweringOnFailedReason}}) @@ -687,15 +686,15 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Status.TaskRef = *task1.ID placementGroup := fake.NewVMPlacementGroup([]string{*vm.ID}) ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(vm, nil) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) mockVMService.EXPECT().GetTask(elfMachine.Status.TaskRef).Return(task1, nil) mockVMService.EXPECT().PowerOn(*vm.LocalID, "").Return(task2, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).NotTo(BeZero()) @@ -703,7 +702,7 @@ var _ = Describe("ElfMachineReconciler", func() { Expect(logBuffer.String()).To(ContainSubstring("task failed")) Expect(logBuffer.String()).To(ContainSubstring("Waiting for VM to be powered on")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Status.VMRef).To(Equal(*vm.LocalID)) Expect(elfMachine.Status.TaskRef).To(Equal(*task2.ID)) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.PoweringOnReason}}) @@ -722,22 +721,22 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Status.TaskRef = *task1.ID placementGroup := fake.NewVMPlacementGroup([]string{*vm.ID}) ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(vm, nil) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) mockVMService.EXPECT().GetTask(elfMachine.Status.TaskRef).Return(task1, nil) mockVMService.EXPECT().PowerOff(*vm.LocalID).Return(task2, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).NotTo(BeZero()) Expect(err).ShouldNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("Waiting for VM to be powered off")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Status.VMRef).To(Equal(*vm.LocalID)) Expect(elfMachine.Status.TaskRef).To(Equal(*task2.ID)) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.PowerOffReason}}) @@ -755,21 +754,21 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Status.TaskRef = *task1.ID placementGroup := fake.NewVMPlacementGroup([]string{*vm.ID}) ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(vm, nil) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) mockVMService.EXPECT().GetTask(elfMachine.Status.TaskRef).Return(task1, nil) mockVMService.EXPECT().PowerOff(*vm.LocalID).Return(nil, errors.New("some error")) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).To(BeZero()) Expect(err.Error()).To(ContainSubstring("failed to trigger powering off for VM")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Status.VMRef).To(Equal(*vm.LocalID)) Expect(elfMachine.Status.TaskRef).To(Equal("")) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityWarning, infrav1.PoweringOffFailedReason}}) @@ -788,12 +787,12 @@ var _ = Describe("ElfMachineReconciler", func() { It("should return false when VM status in an unexpected status", func() { vm := fake.NewTowerVMFromElfMachine(elfMachine) vm.Status = nil - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.reconcileVMStatus(machineContext, vm) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.reconcileVMStatus(ctx, machineContext, vm) Expect(ok).To(BeFalse()) Expect(err).NotTo(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("The status of VM is an unexpected value nil")) @@ -801,7 +800,7 @@ var _ = Describe("ElfMachineReconciler", func() { logBuffer = new(bytes.Buffer) klog.SetOutput(logBuffer) vm.Status = models.NewVMStatus(models.VMStatusUNKNOWN) - ok, err = reconciler.reconcileVMStatus(machineContext, vm) + ok, err = reconciler.reconcileVMStatus(ctx, machineContext, vm) Expect(ok).To(BeFalse()) Expect(err).NotTo(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("The VM is in an unexpected status")) @@ -811,14 +810,14 @@ var _ = Describe("ElfMachineReconciler", func() { vm := fake.NewTowerVMFromElfMachine(elfMachine) vm.Status = models.NewVMStatus(models.VMStatusSTOPPED) task := fake.NewTowerTask() - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) machineContext.VMService = mockVMService mockVMService.EXPECT().PowerOn(elfMachine.Status.VMRef, "").Return(task, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.reconcileVMStatus(machineContext, vm) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.reconcileVMStatus(ctx, machineContext, vm) Expect(ok).To(BeFalse()) Expect(err).NotTo(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("Waiting for VM to be powered on")) @@ -830,14 +829,14 @@ var _ = Describe("ElfMachineReconciler", func() { *vm.Vcpu += 1 vm.Status = models.NewVMStatus(models.VMStatusRUNNING) task := fake.NewTowerTask() - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) machineContext.VMService = mockVMService mockVMService.EXPECT().ShutDown(elfMachine.Status.VMRef).Return(task, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.reconcileVMStatus(machineContext, vm) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.reconcileVMStatus(ctx, machineContext, vm) Expect(ok).To(BeFalse()) Expect(err).NotTo(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("The VM configuration has been modified, shut down the VM first and then restore the VM configuration")) @@ -851,14 +850,14 @@ var _ = Describe("ElfMachineReconciler", func() { task := fake.NewTowerTask() conditions.MarkFalse(elfMachine, infrav1.VMProvisionedCondition, infrav1.TaskFailureReason, clusterv1.ConditionSeverityInfo, "JOB_VM_SHUTDOWN_TIMEOUT") - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) machineContext.VMService = mockVMService mockVMService.EXPECT().PowerOff(elfMachine.Status.VMRef).Return(task, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.reconcileVMStatus(machineContext, vm) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.reconcileVMStatus(ctx, machineContext, vm) Expect(ok).To(BeFalse()) Expect(err).NotTo(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("The VM configuration has been modified, power off the VM first and then restore the VM configuration")) @@ -871,17 +870,17 @@ var _ = Describe("ElfMachineReconciler", func() { vm.Status = models.NewVMStatus(models.VMStatusSTOPPED) task := fake.NewTowerTask() withTaskVM := fake.NewWithTaskVM(vm, task) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) machineContext.VMService = mockVMService logBuffer = new(bytes.Buffer) klog.SetOutput(logBuffer) mockVMService.EXPECT().UpdateVM(vm, elfMachine).Return(withTaskVM, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.reconcileVMStatus(machineContext, vm) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.reconcileVMStatus(ctx, machineContext, vm) Expect(ok).To(BeFalse()) Expect(err).NotTo(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("The VM configuration has been modified, and the VM is stopped, just restore the VM configuration to expected values")) @@ -894,14 +893,14 @@ var _ = Describe("ElfMachineReconciler", func() { *vm.Vcpu += 1 vm.Status = models.NewVMStatus(models.VMStatusSUSPENDED) task := fake.NewTowerTask() - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) machineContext.VMService = mockVMService mockVMService.EXPECT().PowerOff(elfMachine.Status.VMRef).Return(task, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.reconcileVMStatus(machineContext, vm) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.reconcileVMStatus(ctx, machineContext, vm) Expect(ok).To(BeFalse()) Expect(err).NotTo(HaveOccurred()) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.PowerOffReason}}) @@ -914,20 +913,20 @@ var _ = Describe("ElfMachineReconciler", func() { vm.Host = &models.NestedHost{ID: service.TowerString(fake.ID())} elfMachine.Status.VMRef = *vm.LocalID elfCluster.Spec.Cluster = clusterInsufficientMemoryKey - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) machineContext.VMService = mockVMService - recordOrClearError(machineContext, clusterInsufficientMemoryKey, true) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - err := reconciler.powerOnVM(machineContext, vm) + recordOrClearError(ctx, machineContext, ctrlMgrCtx.Client, clusterInsufficientMemoryKey, true) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + err := reconciler.powerOnVM(ctx, machineContext, vm) Expect(err).NotTo(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("Insufficient memory detected for the ELF cluster")) task := fake.NewTowerTask() mockVMService.EXPECT().PowerOn(elfMachine.Status.VMRef, "").Return(task, nil) - expireELFScheduleVMError(machineContext, clusterInsufficientMemoryKey) - err = reconciler.powerOnVM(machineContext, vm) + expireELFScheduleVMError(ctx, machineContext, ctrlMgrCtx.Client, clusterInsufficientMemoryKey) + err = reconciler.powerOnVM(ctx, machineContext, vm) Expect(err).NotTo(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("and the retry silence period passes, will try to power on the VM again")) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.PoweringOnReason}}) @@ -936,10 +935,10 @@ var _ = Describe("ElfMachineReconciler", func() { // GPU unexpectedError := errors.New("unexpected error") elfMachine.Spec.GPUDevices = []infrav1.GPUPassthroughDeviceSpec{{Model: "A16", Count: 1}} - ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx = fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().PowerOn(elfMachine.Status.VMRef, *vm.Host.ID).Return(nil, unexpectedError) - err = reconciler.powerOnVM(machineContext, vm) + err = reconciler.powerOnVM(ctx, machineContext, vm) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring(unexpectedError.Error())) }) @@ -957,12 +956,12 @@ var _ = Describe("ElfMachineReconciler", func() { fake.ToControlPlaneMachine(machine, kcp) fake.ToControlPlaneMachine(elfMachine, kcp) delete(elfMachine.Annotations, infrav1.CAPEVersionAnnotation) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.joinPlacementGroup(machineContext, nil) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.joinPlacementGroup(ctx, machineContext, nil) Expect(ok).To(BeTrue()) Expect(err).To(BeZero()) Expect(logBuffer.String()).To(ContainSubstring("The capeVersion of ElfMachine is lower than")) @@ -978,16 +977,16 @@ var _ = Describe("ElfMachineReconciler", func() { task.Status = &taskStatus elfMachine.Status.VMRef = *vm.LocalID placementGroup := fake.NewVMPlacementGroup(nil) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) mockVMService.EXPECT().AddVMsToPlacementGroup(placementGroup, []string{*vm.ID}).Return(task, nil) mockVMService.EXPECT().WaitTask(gomock.Any(), *task.ID, config.WaitTaskTimeoutForPlacementGroupOperation, config.WaitTaskInterval).Return(task, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.joinPlacementGroup(machineContext, vm) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.joinPlacementGroup(ctx, machineContext, vm) Expect(ok).To(BeTrue()) Expect(err).To(BeZero()) Expect(logBuffer.String()).To(ContainSubstring("Updating placement group succeeded")) @@ -1003,15 +1002,15 @@ var _ = Describe("ElfMachineReconciler", func() { task.Status = &taskStatus elfMachine.Status.VMRef = *vm.LocalID placementGroup := fake.NewVMPlacementGroup(nil) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().AddVMsToPlacementGroup(placementGroup, []string{*vm.ID}).Return(task, nil) mockVMService.EXPECT().WaitTask(gomock.Any(), *task.ID, config.WaitTaskTimeoutForPlacementGroupOperation, config.WaitTaskInterval).Return(task, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - err := reconciler.addVMsToPlacementGroup(machineContext, placementGroup, []string{*vm.ID}) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + err := reconciler.addVMsToPlacementGroup(ctx, machineContext, placementGroup, []string{*vm.ID}) Expect(err).To(BeZero()) Expect(logBuffer.String()).To(ContainSubstring("Updating placement group succeeded")) @@ -1022,8 +1021,8 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().AddVMsToPlacementGroup(placementGroup, []string{*vm.ID}).Return(task, nil) mockVMService.EXPECT().WaitTask(gomock.Any(), *task.ID, config.WaitTaskTimeoutForPlacementGroupOperation, config.WaitTaskInterval).Return(task, nil) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - err = reconciler.addVMsToPlacementGroup(machineContext, placementGroup, []string{*vm.ID}) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + err = reconciler.addVMsToPlacementGroup(ctx, machineContext, placementGroup, []string{*vm.ID}) Expect(strings.Contains(err.Error(), "failed to update placement group")).To(BeTrue()) logBuffer = new(bytes.Buffer) @@ -1031,8 +1030,8 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().AddVMsToPlacementGroup(placementGroup, []string{*vm.ID}).Return(task, nil) mockVMService.EXPECT().WaitTask(gomock.Any(), *task.ID, config.WaitTaskTimeoutForPlacementGroupOperation, config.WaitTaskInterval).Return(nil, errors.New("xxx")) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - err = reconciler.addVMsToPlacementGroup(machineContext, placementGroup, []string{*vm.ID}) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + err = reconciler.addVMsToPlacementGroup(ctx, machineContext, placementGroup, []string{*vm.ID}) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring(fmt.Sprintf("failed to wait for placement group updating task to complete in %s: pgName %s, taskID %s", config.WaitTaskTimeoutForPlacementGroupOperation, *placementGroup.Name, *task.ID))) }) @@ -1045,20 +1044,20 @@ var _ = Describe("ElfMachineReconciler", func() { placementGroup2 := fake.NewVMPlacementGroup(nil) placementGroup2.EntityAsyncStatus = models.EntityAsyncStatusUPDATING.Pointer() ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(vm, nil) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup1, nil) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup2, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).NotTo(BeZero()) Expect(err).To(BeZero()) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(logBuffer.String()).To(ContainSubstring("Waiting for placement group task done")) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.JoiningPlacementGroupReason}}) }) @@ -1071,20 +1070,20 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Status.VMRef = *vm.LocalID placementGroup := fake.NewVMPlacementGroup(nil) ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(vm, nil) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(nil, errors.New("some error")) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).To(BeZero()) Expect(err).To(HaveOccurred()) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityWarning, infrav1.JoiningPlacementGroupFailedReason}}) Expect(conditions.GetMessage(elfMachine, infrav1.VMProvisionedCondition)).To(Equal("some error")) }) @@ -1106,9 +1105,9 @@ var _ = Describe("ElfMachineReconciler", func() { placementGroup := fake.NewVMPlacementGroup([]string{}) task := fake.NewTowerTask() task.Status = models.NewTaskStatus(models.TaskStatusSUCCESSED) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md, kcp) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md, kcp) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host), nil) @@ -1116,8 +1115,8 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().AddVMsToPlacementGroup(placementGroup, []string{*vm.ID}).Return(task, nil) mockVMService.EXPECT().WaitTask(gomock.Any(), *task.ID, config.WaitTaskTimeoutForPlacementGroupOperation, config.WaitTaskInterval).Return(task, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.joinPlacementGroup(machineContext, vm) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.joinPlacementGroup(ctx, machineContext, vm) Expect(ok).To(BeTrue()) Expect(err).To(BeZero()) Expect(logBuffer.String()).To(ContainSubstring("Updating placement group succeeded")) @@ -1137,16 +1136,16 @@ var _ = Describe("ElfMachineReconciler", func() { kcp.Status.Replicas = 2 kcp.Status.UpdatedReplicas = 1 conditions.MarkFalse(kcp, controlplanev1.MachinesSpecUpToDateCondition, controlplanev1.RollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning, "") - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md, kcp) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md, kcp) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host), nil) mockVMService.EXPECT().FindByIDs([]string{*placementGroup.Vms[0].ID}).Return([]*models.VM{vm2}, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.joinPlacementGroup(machineContext, vm) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.joinPlacementGroup(ctx, machineContext, vm) Expect(ok).To(BeTrue()) Expect(err).To(BeZero()) Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has no unusable hosts, so skip adding VM to the placement group and power it on")) @@ -1158,8 +1157,8 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host), nil) mockVMService.EXPECT().FindByIDs([]string{*placementGroup.Vms[0].ID}).Return([]*models.VM{vm2}, nil) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err = reconciler.joinPlacementGroup(machineContext, vm) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err = reconciler.joinPlacementGroup(ctx, machineContext, vm) Expect(ok).To(BeFalse()) Expect(err).To(BeZero()) Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has unusable hosts, so wait for enough available hosts")) @@ -1172,8 +1171,8 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host), nil) mockVMService.EXPECT().FindByIDs([]string{*placementGroup.Vms[0].ID}).Return([]*models.VM{}, nil) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err = reconciler.joinPlacementGroup(machineContext, vm) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err = reconciler.joinPlacementGroup(ctx, machineContext, vm) Expect(ok).To(BeTrue()) Expect(err).To(BeZero()) Expect(logBuffer.String()).To(ContainSubstring(fmt.Sprintf("The placement group is full and VM is in %s status, skip adding VM to the placement group", *vm.Status))) @@ -1183,17 +1182,17 @@ var _ = Describe("ElfMachineReconciler", func() { kcp.Spec.Replicas = pointer.Int32(1) kcp.Status.Replicas = 1 kcp.Status.UpdatedReplicas = 1 - ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md, kcp) - machineContext = newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx = fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md, kcp) + machineContext = newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) vm.Status = models.NewVMStatus(models.VMStatusSTOPPED) host.HostState = &models.NestedMaintenanceHostState{State: models.NewMaintenanceModeEnum(models.MaintenanceModeEnumMAINTENANCEMODE)} mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host), nil) mockVMService.EXPECT().FindByIDs([]string{*placementGroup.Vms[0].ID}).Return([]*models.VM{}, nil) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err = reconciler.joinPlacementGroup(machineContext, vm) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err = reconciler.joinPlacementGroup(ctx, machineContext, vm) Expect(ok).To(BeFalse()) Expect(err).To(BeZero()) Expect(logBuffer.String()).To(ContainSubstring("KCP is in scaling up or being created, the placement group is full, so wait for enough available hosts")) @@ -1214,9 +1213,9 @@ var _ = Describe("ElfMachineReconciler", func() { taskStatus := models.TaskStatusSUCCESSED task.Status = &taskStatus placementGroup := fake.NewVMPlacementGroup([]string{*vm2.ID}) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md, kcp) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md, kcp) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host1, host2, host3), nil) @@ -1225,8 +1224,8 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().WaitTask(gomock.Any(), *task.ID, config.WaitTaskTimeoutForPlacementGroupOperation, config.WaitTaskInterval).Return(task, nil) setPGCache(placementGroup) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.joinPlacementGroup(machineContext, vm) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.joinPlacementGroup(ctx, machineContext, vm) Expect(ok).To(BeTrue()) Expect(err).To(BeZero()) Expect(logBuffer.String()).To(ContainSubstring("Updating placement group succeeded")) @@ -1253,16 +1252,16 @@ var _ = Describe("ElfMachineReconciler", func() { kcp.Status.UpdatedReplicas = 3 kcp.Status.Replicas = 4 conditions.MarkFalse(kcp, controlplanev1.MachinesSpecUpToDateCondition, controlplanev1.RollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning, "") - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md, kcp) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md, kcp) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host1, host2, host3), nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*newCP2.ID, *oldCP3.ID})).Return([]*models.VM{newCP2, oldCP3}, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.joinPlacementGroup(machineContext, newCP1) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.joinPlacementGroup(ctx, machineContext, newCP1) Expect(ok).To(BeTrue()) Expect(err).To(BeZero()) Expect(logBuffer.String()).To(ContainSubstring("KCP rolling update in progress, skip migrating VM")) @@ -1310,19 +1309,19 @@ var _ = Describe("ElfMachineReconciler", func() { kcp.Status.Replicas = 3 kcp.Status.UpdatedReplicas = 3 - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, kcp, elfMachine1, machine1, elfMachine2, machine2) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine1, machine1) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine2, machine2) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, kcp, elfMachine1, machine1, elfMachine2, machine2) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine1, machine1) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine2, machine2) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*vm1.ID, *vm2.ID})).Return([]*models.VM{vm1, vm2}, nil) mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host0, host1, host2), nil) mockVMService.EXPECT().Migrate(*vm0.ID, *host0.ID).Return(withTaskVM, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.joinPlacementGroup(machineContext, vm0) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.joinPlacementGroup(ctx, machineContext, vm0) Expect(ok).To(BeFalse()) Expect(err).To(BeZero()) Expect(elfMachine.Status.TaskRef).To(Equal(*task.ID)) @@ -1336,13 +1335,13 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*vm1.ID, *vm2.ID})).Return([]*models.VM{vm1, vm2}, nil) mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host0, host1, host2), nil) - ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, kcp, elfMachine1, machine1, elfMachine2, machine2) - machineContext = newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine1, machine1) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine2, machine2) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err = reconciler.joinPlacementGroup(machineContext, vm0) + ctrlMgrCtx = fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, kcp, elfMachine1, machine1, elfMachine2, machine2) + machineContext = newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine1, machine1) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine2, machine2) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err = reconciler.joinPlacementGroup(ctx, machineContext, vm0) Expect(ok).To(BeTrue()) Expect(err).To(BeZero()) Expect(logBuffer.String()).To(ContainSubstring("The recommended target host for VM migration is used by the PlacementGroup")) @@ -1352,12 +1351,12 @@ var _ = Describe("ElfMachineReconciler", func() { host := fake.NewTowerHost() vm := fake.NewTowerVMFromElfMachine(elfMachine) vm.Host = &models.NestedHost{ID: service.TowerString(*host.ID)} - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, kcp) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, kcp) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.migrateVM(machineContext, vm, *host.ID) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.migrateVM(ctx, machineContext, vm, *host.ID) Expect(ok).To(BeTrue()) Expect(err).To(BeZero()) Expect(logBuffer.String()).To(ContainSubstring("The VM is already on the recommended target host")) @@ -1369,8 +1368,8 @@ var _ = Describe("ElfMachineReconciler", func() { // {ID: vm1.ID, Name: vm1.Name}, // } // mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*vm1.ID})).Return([]*models.VM{vm1}, nil) - // reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - // ok, err = reconciler.migrateVM(machineContext, vm, placementGroup, *host.ID) + // reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + // ok, err = reconciler.migrateVM(ctx, machineContext, vm, placementGroup, *host.ID) // Expect(ok).To(BeTrue()) // Expect(err).To(BeZero()) // Expect(logBuffer.String()).To(ContainSubstring("is already used by placement group")) @@ -1409,13 +1408,13 @@ var _ = Describe("ElfMachineReconciler", func() { kcp.Spec.Replicas = pointer.Int32(3) kcp.Status.Replicas = 3 kcp.Status.UpdatedReplicas = 3 - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, kcp, elfMachine1, machine1, elfMachine2, machine2, elfMachine3, machine3) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine1, machine1) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine2, machine2) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine3, machine3) - placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctrlContext.Client, machine, cluster) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, kcp, elfMachine1, machine1, elfMachine2, machine2, elfMachine3, machine3) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine1, machine1) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine2, machine2) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine3, machine3) + placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctrlMgrCtx.Client, machine, cluster) Expect(err).NotTo(HaveOccurred()) logBuffer = new(bytes.Buffer) @@ -1424,8 +1423,8 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host), nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{})).Return([]*models.VM{}, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - hostID, err := reconciler.preCheckPlacementGroup(machineContext) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + hostID, err := reconciler.preCheckPlacementGroup(ctx, machineContext) Expect(err).To(BeZero()) Expect(*hostID).To(Equal("")) Expect(logBuffer.String()).To(ContainSubstring("The placement group still has capacity")) @@ -1437,8 +1436,8 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host), nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*vm1.ID})).Return([]*models.VM{vm1}, nil) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - hostID, err = reconciler.preCheckPlacementGroup(machineContext) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + hostID, err = reconciler.preCheckPlacementGroup(ctx, machineContext) Expect(err).To(BeZero()) Expect(hostID).To(BeNil()) Expect(logBuffer.String()).To(ContainSubstring("KCP is not in rolling update and not in scaling down, the placement group is full, so wait for enough available hosts")) @@ -1481,13 +1480,13 @@ var _ = Describe("ElfMachineReconciler", func() { kcp.Status.Replicas = 4 kcp.Status.UpdatedReplicas = 1 conditions.MarkFalse(kcp, controlplanev1.MachinesSpecUpToDateCondition, controlplanev1.RollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning, "") - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, kcp, elfMachine1, machine1, elfMachine2, machine2, elfMachine3, machine3) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine1, machine1) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine2, machine2) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine3, machine3) - placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctrlContext.Client, machine, cluster) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, kcp, elfMachine1, machine1, elfMachine2, machine2, elfMachine3, machine3) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine1, machine1) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine2, machine2) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine3, machine3) + placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctrlMgrCtx.Client, machine, cluster) Expect(err).NotTo(HaveOccurred()) logBuffer = new(bytes.Buffer) @@ -1497,8 +1496,8 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host1, host2, host3), nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*vm1.ID, *vm2.ID, *vm3.ID})).Return([]*models.VM{vm1, vm2, vm3}, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - host, err := reconciler.preCheckPlacementGroup(machineContext) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + host, err := reconciler.preCheckPlacementGroup(ctx, machineContext) Expect(err).To(BeZero()) Expect(*host).To(Equal(*vm3.Host.ID)) @@ -1511,8 +1510,8 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host1, host2, host3), nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*vm1.ID, *vm2.ID, *vm3.ID})).Return([]*models.VM{vm1, vm2, vm3}, nil) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - host, err = reconciler.preCheckPlacementGroup(machineContext) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + host, err = reconciler.preCheckPlacementGroup(ctx, machineContext) Expect(err).To(BeZero()) Expect(host).To(BeNil()) Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has unusable hosts, so wait for enough available hosts")) @@ -1525,8 +1524,8 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host1, host2, host3), nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*vm1.ID, *vm2.ID, *vm3.ID})).Return([]*models.VM{vm1, vm2, vm3}, nil) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - host, err = reconciler.preCheckPlacementGroup(machineContext) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + host, err = reconciler.preCheckPlacementGroup(ctx, machineContext) Expect(err).To(BeZero()) Expect(host).To(BeNil()) Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has unusable hosts, so wait for enough available hosts")) @@ -1539,8 +1538,8 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host1, host2, host3), nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*vm1.ID, *vm2.ID, *vm3.ID})).Return([]*models.VM{vm1, vm2, vm3}, nil) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - host, err = reconciler.preCheckPlacementGroup(machineContext) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + host, err = reconciler.preCheckPlacementGroup(ctx, machineContext) Expect(err).To(BeZero()) Expect(host).To(BeNil()) Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and has unusable hosts, so wait for enough available hosts")) @@ -1550,14 +1549,14 @@ var _ = Describe("ElfMachineReconciler", func() { kcp.Spec.Replicas = pointer.Int32(5) kcp.Status.Replicas = 6 kcp.Status.UpdatedReplicas = 4 - ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, kcp, elfMachine1, machine1, elfMachine2, machine2, elfMachine3, machine3) - machineContext = newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx = fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, kcp, elfMachine1, machine1, elfMachine2, machine2, elfMachine3, machine3) + machineContext = newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) mockVMService.EXPECT().GetVMPlacementGroup(placementGroupName).Return(placementGroup, nil) mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host1, host2, host3), nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*vm1.ID, *vm2.ID, *vm3.ID})).Return([]*models.VM{vm1, vm2, vm3}, nil) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - host, err = reconciler.preCheckPlacementGroup(machineContext) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + host, err = reconciler.preCheckPlacementGroup(ctx, machineContext) Expect(err).To(BeZero()) Expect(host).To(BeNil()) Expect(logBuffer.String()).To(ContainSubstring("KCP is in rolling update, the placement group is full and no more host for placing more KCP VM, so wait for enough available hosts")) @@ -1568,8 +1567,8 @@ var _ = Describe("ElfMachineReconciler", func() { hosts := []*models.Host{host1, host2, host3} mockVMService.EXPECT().Get(*vm3.ID).Return(vm3, nil) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - hostID, err := reconciler.getVMHostForRollingUpdate(machineContext, placementGroup, service.NewHostsFromList(hosts)) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + hostID, err := reconciler.getVMHostForRollingUpdate(ctx, machineContext, placementGroup, service.NewHostsFromList(hosts)) Expect(err).To(BeZero()) Expect(hostID).To(Equal("")) Expect(logBuffer.String()).To(ContainSubstring("Host is unavailable: host is in CONNECTED_ERROR status, skip selecting host for VM")) @@ -1580,8 +1579,8 @@ var _ = Describe("ElfMachineReconciler", func() { hosts = []*models.Host{host1, host2, host3} mockVMService.EXPECT().Get(*vm3.ID).Return(vm3, nil) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - hostID, err = reconciler.getVMHostForRollingUpdate(machineContext, placementGroup, service.NewHostsFromList(hosts)) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + hostID, err = reconciler.getVMHostForRollingUpdate(ctx, machineContext, placementGroup, service.NewHostsFromList(hosts)) Expect(err).To(BeZero()) Expect(hostID).To(Equal("")) Expect(logBuffer.String()).To(ContainSubstring("Host not found, skip selecting host for VM")) @@ -1597,8 +1596,8 @@ var _ = Describe("ElfMachineReconciler", func() { hosts = []*models.Host{host1, host2, host3, host4} mockVMService.EXPECT().Get(*vm3.ID).Return(vm3, nil) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - hostID, err = reconciler.getVMHostForRollingUpdate(machineContext, placementGroup, service.NewHostsFromList(hosts)) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + hostID, err = reconciler.getVMHostForRollingUpdate(ctx, machineContext, placementGroup, service.NewHostsFromList(hosts)) Expect(err).To(BeZero()) Expect(hostID).To(Equal(*vm3.Host.ID)) Expect(logBuffer.String()).To(ContainSubstring("Select a host to power on the VM since the placement group is full")) @@ -1609,16 +1608,16 @@ var _ = Describe("ElfMachineReconciler", func() { hosts = []*models.Host{host1, host2, host3, host4} mockVMService.EXPECT().Get(*vm3.ID).Return(vm3, nil) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - hostID, err = reconciler.getVMHostForRollingUpdate(machineContext, placementGroup, service.NewHostsFromList(hosts)) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + hostID, err = reconciler.getVMHostForRollingUpdate(ctx, machineContext, placementGroup, service.NewHostsFromList(hosts)) Expect(err).To(BeZero()) Expect(hostID).To(Equal(*vm3.Host.ID)) Expect(logBuffer.String()).To(ContainSubstring("Select a host to power on the VM since the placement group is full")) - ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, kcp) - machineContext = newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - hostID, err = reconciler.getVMHostForRollingUpdate(machineContext, placementGroup, service.NewHostsFromList(hosts)) + ctrlMgrCtx = fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, kcp) + machineContext = newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + hostID, err = reconciler.getVMHostForRollingUpdate(ctx, machineContext, placementGroup, service.NewHostsFromList(hosts)) Expect(err).To(BeZero()) Expect(hostID).To(Equal("")) Expect(logBuffer.String()).To(ContainSubstring("Newest machine not found, skip selecting host for VM")) @@ -1651,17 +1650,17 @@ var _ = Describe("ElfMachineReconciler", func() { placementGroup.Vms = []*models.NestedVM{ {ID: vm1.ID, Name: vm1.Name}, } - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, kcp) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctrlContext.Client, machine, cluster) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, kcp) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctrlMgrCtx.Client, machine, cluster) Expect(err).NotTo(HaveOccurred()) mockVMService.EXPECT().GetVMPlacementGroup(placementGroupName).Return(placementGroup, nil) mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host1), nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*vm1.ID})).Return([]*models.VM{vm1}, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - hostID, err := reconciler.preCheckPlacementGroup(machineContext) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + hostID, err := reconciler.preCheckPlacementGroup(ctx, machineContext) Expect(err).To(BeZero()) Expect(hostID).To(BeNil()) Expect(logBuffer.String()).To(ContainSubstring("KCP is not in rolling update and not in scaling down, the placement group is full, so wait for enough available hosts")) @@ -1672,8 +1671,8 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host1, host2), nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*vm1.ID})).Return([]*models.VM{vm1}, nil) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - hostID, err = reconciler.preCheckPlacementGroup(machineContext) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + hostID, err = reconciler.preCheckPlacementGroup(ctx, machineContext) Expect(err).To(BeZero()) Expect(*hostID).To(Equal("")) expectConditions(elfMachine, []conditionAssertion{}) @@ -1683,8 +1682,8 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host1), nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{})).Return([]*models.VM{}, nil) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - hostID, err = reconciler.preCheckPlacementGroup(machineContext) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + hostID, err = reconciler.preCheckPlacementGroup(ctx, machineContext) Expect(err).To(BeZero()) Expect(*hostID).To(Equal("")) expectConditions(elfMachine, []conditionAssertion{}) @@ -1706,21 +1705,21 @@ var _ = Describe("ElfMachineReconciler", func() { placementGroup.Vms = []*models.NestedVM{ {ID: vm1.ID, Name: vm1.Name}, } - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, kcp) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctrlContext.Client, machine, cluster) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, kcp) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctrlMgrCtx.Client, machine, cluster) Expect(err).NotTo(HaveOccurred()) mockVMService.EXPECT().GetVMPlacementGroup(placementGroupName).Return(placementGroup, nil) mockVMService.EXPECT().GetHostsByCluster(elfCluster.Spec.Cluster).Return(service.NewHosts(host), nil) mockVMService.EXPECT().FindByIDs(gomock.InAnyOrder([]string{*vm1.ID})).Return([]*models.VM{vm1}, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - hostID, err := reconciler.preCheckPlacementGroup(machineContext) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + hostID, err := reconciler.preCheckPlacementGroup(ctx, machineContext) Expect(err).To(BeZero()) Expect(hostID).To(BeNil()) Expect(logBuffer.String()).To(ContainSubstring("Add the delete machine annotation on KCP Machine in order to delete it")) - Expect(reconciler.Client.Get(reconciler, capiutil.ObjectKey(machine), machine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, capiutil.ObjectKey(machine), machine)).To(Succeed()) Expect(machine.Annotations).Should(HaveKey(clusterv1.DeleteMachineAnnotation)) }) }) @@ -1733,12 +1732,12 @@ var _ = Describe("ElfMachineReconciler", func() { host2 := fake.NewTowerHost() host3 := fake.NewTowerHost() - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, kcp) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, kcp) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) // virtual machine has not been created - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} availableHosts := reconciler.getAvailableHostsForVM(machineContext, nil, service.NewHosts(), nil) Expect(availableHosts).To(BeEmpty()) @@ -1805,8 +1804,8 @@ var _ = Describe("ElfMachineReconciler", func() { } Expect(testEnv.CreateAndWait(ctx, k8sNode)).To(Succeed()) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md, kubeConfigSecret) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md, kubeConfigSecret) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) // before reconcile, create kubeconfig secret for cluster. Expect(helpers.CreateKubeConfigSecret(testEnv, cluster.Namespace, cluster.Name)).To(Succeed()) @@ -1815,12 +1814,12 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Times(2).Return(placementGroup, nil) mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return(nil, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) _, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(err).To(BeZero()) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(*elfMachine.Spec.ProviderID).Should(Equal(machineutil.ConvertUUIDToProviderID(*vm.LocalID))) }) }) @@ -1857,9 +1856,9 @@ var _ = Describe("ElfMachineReconciler", func() { NetworkType: infrav1.NetworkTypeIPV4DHCP, }, } - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md, kubeConfigSecret) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md, kubeConfigSecret) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} k8sNode.Status.Addresses = []corev1.NodeAddress{ { @@ -1876,7 +1875,7 @@ var _ = Describe("ElfMachineReconciler", func() { Expect(err).ShouldNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("VM network is not ready yet")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForNetworkAddressesReason}}) logBuffer.Reset() @@ -1887,7 +1886,7 @@ var _ = Describe("ElfMachineReconciler", func() { result, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).To(BeZero()) Expect(err).ShouldNot(HaveOccurred()) - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) logBuffer.Reset() // k8s node IP is null, VM has no nic IP @@ -1899,7 +1898,7 @@ var _ = Describe("ElfMachineReconciler", func() { Expect(err).ShouldNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("VM network is not ready yet")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForNetworkAddressesReason}}) logBuffer.Reset() @@ -1918,7 +1917,7 @@ var _ = Describe("ElfMachineReconciler", func() { Expect(err).ShouldNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("VM network is not ready yet")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForNetworkAddressesReason}}) logBuffer.Reset() @@ -1937,7 +1936,7 @@ var _ = Describe("ElfMachineReconciler", func() { result, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).To(BeZero()) Expect(err).ShouldNot(HaveOccurred()) - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) logBuffer.Reset() // k8s node has node IP, VM has nic IP @@ -1947,7 +1946,7 @@ var _ = Describe("ElfMachineReconciler", func() { result, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).To(BeZero()) Expect(err).ShouldNot(HaveOccurred()) - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) logBuffer.Reset() // test elfMachine has one network device with IPV4 type @@ -1957,9 +1956,9 @@ var _ = Describe("ElfMachineReconciler", func() { NetworkType: infrav1.NetworkTypeIPV4, }, } - ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md, kubeConfigSecret) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + ctrlMgrCtx = fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md, kubeConfigSecret) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} // k8s node IP is null, VM has no nic IP k8sNode.Status.Addresses = []corev1.NodeAddress{ @@ -1978,7 +1977,7 @@ var _ = Describe("ElfMachineReconciler", func() { Expect(err).ShouldNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("VM network is not ready yet")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForNetworkAddressesReason}}) logBuffer.Reset() @@ -1989,7 +1988,7 @@ var _ = Describe("ElfMachineReconciler", func() { result, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).To(BeZero()) Expect(err).ShouldNot(HaveOccurred()) - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) logBuffer.Reset() k8sNode.Status.Addresses = []corev1.NodeAddress{ @@ -2007,7 +2006,7 @@ var _ = Describe("ElfMachineReconciler", func() { result, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).To(BeZero()) Expect(err).ShouldNot(HaveOccurred()) - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) logBuffer.Reset() // k8s node has node IP, VM has one nic ip @@ -2017,7 +2016,7 @@ var _ = Describe("ElfMachineReconciler", func() { result, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).To(BeZero()) Expect(err).ShouldNot(HaveOccurred()) - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) logBuffer.Reset() // test elfMachine has two network device, one is IPV4 type. @@ -2030,9 +2029,9 @@ var _ = Describe("ElfMachineReconciler", func() { NetworkType: infrav1.NetworkTypeIPV4DHCP, }, } - ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md, kubeConfigSecret) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + ctrlMgrCtx = fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md, kubeConfigSecret) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} k8sNode.Status.Addresses = []corev1.NodeAddress{ { @@ -2053,7 +2052,7 @@ var _ = Describe("ElfMachineReconciler", func() { Expect(err).ShouldNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("VM network is not ready yet")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForNetworkAddressesReason}}) logBuffer.Reset() @@ -2066,7 +2065,7 @@ var _ = Describe("ElfMachineReconciler", func() { result, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).To(BeZero()) Expect(err).ShouldNot(HaveOccurred()) - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) logBuffer.Reset() k8sNode.Status.Addresses = []corev1.NodeAddress{ @@ -2088,7 +2087,7 @@ var _ = Describe("ElfMachineReconciler", func() { Expect(err).ShouldNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("VM network is not ready yet")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForNetworkAddressesReason}}) logBuffer.Reset() @@ -2109,7 +2108,7 @@ var _ = Describe("ElfMachineReconciler", func() { result, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).To(BeZero()) Expect(err).ShouldNot(HaveOccurred()) - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) logBuffer.Reset() // test elfMachine has two network device, all network device are IPV4 type @@ -2123,9 +2122,9 @@ var _ = Describe("ElfMachineReconciler", func() { IPAddrs: []string{"127.0.0.2"}, }, } - ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md, kubeConfigSecret) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + ctrlMgrCtx = fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md, kubeConfigSecret) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} k8sNode.Status.Addresses = []corev1.NodeAddress{ { @@ -2146,7 +2145,7 @@ var _ = Describe("ElfMachineReconciler", func() { Expect(err).ShouldNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("VM network is not ready yet")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForNetworkAddressesReason}}) logBuffer.Reset() @@ -2159,7 +2158,7 @@ var _ = Describe("ElfMachineReconciler", func() { result, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).To(BeZero()) Expect(err).ShouldNot(HaveOccurred()) - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) logBuffer.Reset() k8sNode.Status.Addresses = []corev1.NodeAddress{ @@ -2179,7 +2178,7 @@ var _ = Describe("ElfMachineReconciler", func() { result, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).To(BeZero()) Expect(err).ShouldNot(HaveOccurred()) - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) logBuffer.Reset() // test elfMachine has two network device, all network device are DHCP type @@ -2191,9 +2190,9 @@ var _ = Describe("ElfMachineReconciler", func() { NetworkType: infrav1.NetworkTypeIPV4DHCP, }, } - ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md, kubeConfigSecret) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + ctrlMgrCtx = fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md, kubeConfigSecret) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} // k8s node has node IP, VM has no nic IP mockVMService.EXPECT().GetVMNics(*vm.ID).Return(nil, nil) @@ -2202,7 +2201,7 @@ var _ = Describe("ElfMachineReconciler", func() { Expect(err).ShouldNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("VM network is not ready yet")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForNetworkAddressesReason}}) logBuffer.Reset() @@ -2223,7 +2222,7 @@ var _ = Describe("ElfMachineReconciler", func() { result, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).To(BeZero()) Expect(err).ShouldNot(HaveOccurred()) - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) logBuffer.Reset() // test elfMachine has 3 network device, one network device is None type, other are DHCP type @@ -2238,9 +2237,9 @@ var _ = Describe("ElfMachineReconciler", func() { NetworkType: infrav1.NetworkTypeNone, }, } - ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md, kubeConfigSecret) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + ctrlMgrCtx = fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md, kubeConfigSecret) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} // k8s node IP is null, VM has two nic IP nic1 = fake.NewTowerVMNic(0) @@ -2251,7 +2250,7 @@ var _ = Describe("ElfMachineReconciler", func() { result, err = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).To(BeZero()) Expect(err).ShouldNot(HaveOccurred()) - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) logBuffer.Reset() }) @@ -2263,8 +2262,8 @@ var _ = Describe("ElfMachineReconciler", func() { nic.IPAddress = service.TowerString("127.0.0.1") placementGroup := fake.NewVMPlacementGroup([]string{*vm.ID}) ctrlutil.AddFinalizer(elfMachine, infrav1.MachineFinalizer) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md, kubeConfigSecret) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md, kubeConfigSecret) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) k8sNode.Status.Addresses = []corev1.NodeAddress{ { @@ -2280,11 +2279,11 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().AddLabelsToVM(gomock.Any(), gomock.Any()).Times(1) mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return(nil, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) _, _ = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Status.Network[0].IPAddrs[0]).To(Equal(*nic.IPAddress)) Expect(elfMachine.Status.Addresses[0].Type).To(Equal(clusterv1.MachineInternalIP)) Expect(elfMachine.Status.Addresses[0].Address).To(Equal(*nic.IPAddress)) @@ -2311,17 +2310,17 @@ var _ = Describe("ElfMachineReconciler", func() { elfCluster.Annotations = map[string]string{ infrav1.ElfClusterForceDeleteAnnotation: "", } - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result).To(BeZero()) Expect(err).To(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("Skip VM deletion due to the force-delete-cluster annotation")) elfCluster = &infrav1.ElfCluster{} - err = reconciler.Client.Get(reconciler, elfMachineKey, elfCluster) + err = reconciler.Client.Get(ctx, elfMachineKey, elfCluster) Expect(apierrors.IsNotFound(err)).To(BeTrue()) }) @@ -2329,44 +2328,44 @@ var _ = Describe("ElfMachineReconciler", func() { mockNewVMService = func(_ goctx.Context, _ infrav1.Tower, _ logr.Logger) (service.VMService, error) { return mockVMService, errors.New("get vm service failed") } - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result).To(BeZero()) Expect(err).To(HaveOccurred()) - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Finalizers).To(ContainElement(infrav1.MachineFinalizer)) }) It("should delete ElfMachine when vmRef is empty and VM not found", func() { - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().GetByName(elfMachine.Name).Return(nil, errors.New(service.VMNotFound)) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result).To(BeZero()) Expect(err).To(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("VM already deleted")) elfCluster = &infrav1.ElfCluster{} - err = reconciler.Client.Get(reconciler, elfMachineKey, elfCluster) + err = reconciler.Client.Get(ctx, elfMachineKey, elfCluster) Expect(apierrors.IsNotFound(err)).To(BeTrue()) }) It("should delete the VM that in creating status and have not been saved to ElfMachine", func() { vm := fake.NewTowerVM() vm.LocalID = pointer.String("placeholder-%s" + *vm.LocalID) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().GetByName(elfMachine.Name).Return(vm, nil) mockVMService.EXPECT().Get(*vm.ID).Return(vm, nil) mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return(nil, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).NotTo(BeZero()) @@ -2374,7 +2373,7 @@ var _ = Describe("ElfMachineReconciler", func() { Expect(logBuffer.String()).To(ContainSubstring("Waiting for VM task done")) Expect(logBuffer.String()).To(ContainSubstring("Waiting for VM to be deleted")) elfCluster = &infrav1.ElfCluster{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Status.VMRef).To(Equal(*vm.ID)) Expect(elfMachine.Status.TaskRef).To(Equal("")) }) @@ -2385,21 +2384,21 @@ var _ = Describe("ElfMachineReconciler", func() { status := models.VMStatusRUNNING vm.Status = &status task := fake.NewTowerTask() - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().GetByName(elfMachine.Name).Return(vm, nil) mockVMService.EXPECT().Get(*vm.LocalID).Return(vm, nil) mockVMService.EXPECT().ShutDown(*vm.LocalID).Return(task, nil) mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return(nil, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).NotTo(BeZero()) Expect(err).To(BeZero()) Expect(logBuffer.String()).To(ContainSubstring("Waiting for VM shut down")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Status.VMRef).To(Equal(*vm.LocalID)) Expect(elfMachine.Status.TaskRef).To(Equal(*task.ID)) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, clusterv1.DeletingReason}}) @@ -2410,21 +2409,21 @@ var _ = Describe("ElfMachineReconciler", func() { task := fake.NewTowerTask() elfMachine.Status.VMRef = *vm.LocalID elfMachine.Status.TaskRef = *task.ID - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) vmNotFoundError := errors.New(service.VMNotFound) mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(nil, vmNotFoundError) mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return(nil, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result).To(BeZero()) Expect(err).To(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("VM already deleted")) elfMachine = &infrav1.ElfMachine{} - err = reconciler.Client.Get(reconciler, elfMachineKey, elfMachine) + err = reconciler.Client.Get(ctx, elfMachineKey, elfMachine) Expect(apierrors.IsNotFound(err)).To(BeTrue()) }) @@ -2436,14 +2435,14 @@ var _ = Describe("ElfMachineReconciler", func() { task := fake.NewTowerTask() elfMachine.Status.VMRef = *vm.LocalID elfMachine.Status.TaskRef = *task.ID - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(vm, nil) mockVMService.EXPECT().GetTask(elfMachine.Status.TaskRef).Return(task, nil) mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return(nil, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result).NotTo(BeZero()) @@ -2451,7 +2450,7 @@ var _ = Describe("ElfMachineReconciler", func() { Expect(err).To(BeZero()) Expect(logBuffer.String()).To(ContainSubstring("Waiting for VM task done")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, clusterv1.DeletingReason}}) }) @@ -2463,21 +2462,21 @@ var _ = Describe("ElfMachineReconciler", func() { task.Status = &status elfMachine.Status.VMRef = *vm.LocalID elfMachine.Status.TaskRef = *task.ID - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(vm, nil) mockVMService.EXPECT().GetTask(elfMachine.Status.TaskRef).Return(task, nil) mockVMService.EXPECT().ShutDown(elfMachine.Status.VMRef).Return(task, errors.New("some error")) mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return(nil, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) _, _ = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(logBuffer.String()).To(ContainSubstring("VM task failed")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Status.VMRef).To(Equal(*vm.LocalID)) Expect(elfMachine.Status.TaskRef).To(Equal("")) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityWarning, clusterv1.DeletionFailedReason}}) @@ -2493,15 +2492,15 @@ var _ = Describe("ElfMachineReconciler", func() { task.ErrorMessage = pointer.String("JOB_VM_SHUTDOWN_TIMEOUT") elfMachine.Status.VMRef = *vm.LocalID elfMachine.Status.TaskRef = *task.ID - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(vm, nil) mockVMService.EXPECT().GetTask(elfMachine.Status.TaskRef).Return(task, nil) mockVMService.EXPECT().PowerOff(elfMachine.Status.VMRef).Return(task, nil) mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return(nil, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).NotTo(BeZero()) @@ -2509,7 +2508,7 @@ var _ = Describe("ElfMachineReconciler", func() { Expect(logBuffer.String()).To(ContainSubstring("VM task failed")) Expect(logBuffer.String()).To(ContainSubstring("Waiting for VM shut down")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Status.VMRef).To(Equal(*vm.LocalID)) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.TaskFailureReason}}) Expect(conditions.GetMessage(elfMachine, infrav1.VMProvisionedCondition)).To(Equal("JOB_VM_SHUTDOWN_TIMEOUT")) @@ -2524,15 +2523,15 @@ var _ = Describe("ElfMachineReconciler", func() { task.ErrorMessage = pointer.String("JOB_VM_SHUTDOWN_TIMEOUT") elfMachine.Status.VMRef = *vm.LocalID elfMachine.Status.TaskRef = *task.ID - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(vm, nil) mockVMService.EXPECT().GetTask(elfMachine.Status.TaskRef).Return(task, nil) mockVMService.EXPECT().PowerOff(elfMachine.Status.VMRef).Return(task, nil) mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return(nil, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).NotTo(BeZero()) @@ -2540,7 +2539,7 @@ var _ = Describe("ElfMachineReconciler", func() { Expect(logBuffer.String()).To(ContainSubstring("VM task failed")) Expect(logBuffer.String()).To(ContainSubstring("Waiting for VM shut down")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Status.VMRef).To(Equal(*vm.LocalID)) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.TaskFailureReason}}) Expect(conditions.GetMessage(elfMachine, infrav1.VMProvisionedCondition)).To(Equal("JOB_VM_SHUTDOWN_TIMEOUT")) @@ -2554,20 +2553,20 @@ var _ = Describe("ElfMachineReconciler", func() { task.Status = &status elfMachine.Status.VMRef = *vm.LocalID elfMachine.Status.TaskRef = *task.ID - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(vm, nil) mockVMService.EXPECT().GetTask(elfMachine.Status.TaskRef).Return(task, nil) mockVMService.EXPECT().ShutDown(elfMachine.Status.VMRef).Return(nil, errors.New("some error")) mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return(nil, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) _, _ = reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(logBuffer.String()).To(ContainSubstring("VM task succeeded")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Status.VMRef).To(Equal(*vm.LocalID)) Expect(elfMachine.Status.TaskRef).To(Equal("")) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityWarning, clusterv1.DeletionFailedReason}}) @@ -2578,21 +2577,21 @@ var _ = Describe("ElfMachineReconciler", func() { vm.EntityAsyncStatus = nil task := fake.NewTowerTask() elfMachine.Status.VMRef = *vm.LocalID - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(vm, nil) mockVMService.EXPECT().ShutDown(elfMachine.Status.VMRef).Return(task, nil) mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return(nil, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).NotTo(BeZero()) Expect(err).To(BeZero()) Expect(logBuffer.String()).To(ContainSubstring("Waiting for VM shut down")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Status.VMRef).To(Equal(*vm.LocalID)) Expect(elfMachine.Status.TaskRef).To(Equal(*task.ID)) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, clusterv1.DeletingReason}}) @@ -2605,21 +2604,21 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Status.VMRef = *vm.LocalID elfMachine.Spec.VGPUDevices = []infrav1.VGPUDeviceSpec{{}} elfCluster.Spec.VMGracefulShutdown = false - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(vm, nil) mockVMService.EXPECT().ShutDown(elfMachine.Status.VMRef).Return(task, nil) mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return(nil, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).NotTo(BeZero()) Expect(err).To(BeZero()) Expect(logBuffer.String()).To(ContainSubstring("Waiting for VM shut down")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Status.VMRef).To(Equal(*vm.LocalID)) Expect(elfMachine.Status.TaskRef).To(Equal(*task.ID)) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, clusterv1.DeletingReason}}) @@ -2632,21 +2631,21 @@ var _ = Describe("ElfMachineReconciler", func() { status := models.VMStatusSTOPPED vm.Status = &status elfMachine.Status.VMRef = *vm.LocalID - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(vm, nil) mockVMService.EXPECT().Delete(elfMachine.Status.VMRef).Return(nil, errors.New("some error")) mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return(nil, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).To(BeZero()) Expect(err).ToNot(BeZero()) Expect(logBuffer.String()).To(ContainSubstring("Destroying VM")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityWarning, clusterv1.DeletionFailedReason}}) }) @@ -2658,21 +2657,21 @@ var _ = Describe("ElfMachineReconciler", func() { vm.Status = &status task := fake.NewTowerTask() elfMachine.Status.VMRef = *vm.LocalID - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(vm, nil) mockVMService.EXPECT().Delete(elfMachine.Status.VMRef).Return(task, nil) mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return(nil, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).NotTo(BeZero()) Expect(err).To(BeZero()) Expect(logBuffer.String()).To(ContainSubstring("Waiting for VM to be deleted")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Status.VMRef).To(Equal(*vm.LocalID)) Expect(elfMachine.Status.TaskRef).To(Equal(*task.ID)) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, clusterv1.DeletingReason}}) @@ -2686,21 +2685,21 @@ var _ = Describe("ElfMachineReconciler", func() { task := fake.NewTowerTask() elfMachine.Status.VMRef = *vm.LocalID elfCluster.Spec.VMGracefulShutdown = false - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(vm, nil) mockVMService.EXPECT().PowerOff(elfMachine.Status.VMRef).Return(task, nil) mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return(nil, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).NotTo(BeZero()) Expect(err).To(BeZero()) Expect(logBuffer.String()).To(ContainSubstring("Waiting for VM shut down")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Status.VMRef).To(Equal(*vm.LocalID)) Expect(elfMachine.Status.TaskRef).To(Equal(*task.ID)) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, clusterv1.DeletingReason}}) @@ -2709,50 +2708,50 @@ var _ = Describe("ElfMachineReconciler", func() { It("should delete placement group when the deployment is deleted", func() { cluster.DeletionTimestamp = &metav1.Time{Time: time.Now().UTC()} cluster.Finalizers = append(cluster.Finalizers, clusterv1.ClusterFinalizer) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) machineContext.VMService = mockVMService - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.deletePlacementGroup(machineContext) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.deletePlacementGroup(ctx, machineContext) Expect(ok).To(BeTrue()) Expect(err).NotTo(HaveOccurred()) cluster.DeletionTimestamp = nil fake.ToControlPlaneMachine(machine, kcp) - ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx = fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err = reconciler.deletePlacementGroup(machineContext) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err = reconciler.deletePlacementGroup(ctx, machineContext) Expect(ok).To(BeTrue()) Expect(err).NotTo(HaveOccurred()) fake.ToWorkerMachine(machine, md) - ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx = fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err = reconciler.deletePlacementGroup(machineContext) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err = reconciler.deletePlacementGroup(ctx, machineContext) Expect(ok).To(BeTrue()) Expect(err).NotTo(HaveOccurred()) - ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - machineContext = newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err = reconciler.deletePlacementGroup(machineContext) + ctrlMgrCtx = fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + machineContext = newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err = reconciler.deletePlacementGroup(ctx, machineContext) Expect(ok).To(BeTrue()) Expect(err).NotTo(HaveOccurred()) md.DeletionTimestamp = &metav1.Time{Time: time.Now().UTC()} md.Finalizers = append(md.Finalizers, clusterv1.ClusterFinalizer) - ctrlContext = newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - machineContext = newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx = fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + machineContext = newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) machineContext.VMService = mockVMService - placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctrlContext.Client, machine, cluster) + placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctrlMgrCtx.Client, machine, cluster) Expect(err).NotTo(HaveOccurred()) placementGroup := fake.NewVMPlacementGroup([]string{}) placementGroup.Name = service.TowerString(placementGroupName) @@ -2760,8 +2759,8 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().DeleteVMPlacementGroupByID(gomock.Any(), *placementGroup.ID).Return(true, nil) setPGCache(placementGroup) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err = reconciler.deletePlacementGroup(machineContext) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err = reconciler.deletePlacementGroup(ctx, machineContext) Expect(ok).To(BeTrue()) Expect(err).NotTo(HaveOccurred()) Expect(getPGFromCache(*placementGroup.Name)).To(BeNil()) @@ -2769,29 +2768,29 @@ var _ = Describe("ElfMachineReconciler", func() { md.DeletionTimestamp = nil md.Spec.Replicas = pointer.Int32(0) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(nil, errors.New(service.VMPlacementGroupNotFound)) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err = reconciler.deletePlacementGroup(machineContext) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err = reconciler.deletePlacementGroup(ctx, machineContext) Expect(ok).To(BeTrue()) Expect(err).NotTo(HaveOccurred()) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(nil, errors.New("error")) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err = reconciler.deletePlacementGroup(machineContext) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err = reconciler.deletePlacementGroup(ctx, machineContext) Expect(ok).To(BeFalse()) Expect(err).To(HaveOccurred()) mockVMService.EXPECT().GetVMPlacementGroup(placementGroupName).Return(placementGroup, nil) mockVMService.EXPECT().DeleteVMPlacementGroupByID(gomock.Any(), *placementGroup.ID).Return(false, errors.New("error")) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err = reconciler.deletePlacementGroup(machineContext) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err = reconciler.deletePlacementGroup(ctx, machineContext) Expect(ok).To(BeFalse()) Expect(err).To(HaveOccurred()) logBuffer.Reset() mockVMService.EXPECT().GetVMPlacementGroup(placementGroupName).Return(placementGroup, nil) mockVMService.EXPECT().DeleteVMPlacementGroupByID(gomock.Any(), *placementGroup.ID).Return(false, nil) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err = reconciler.deletePlacementGroup(machineContext) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err = reconciler.deletePlacementGroup(ctx, machineContext) Expect(ok).To(BeFalse()) Expect(err).NotTo(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring(fmt.Sprintf("Waiting for the placement group %s to be deleted", *placementGroup.Name))) @@ -2806,9 +2805,9 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Status.VMRef = *vm.LocalID cluster.Status.ControlPlaneReady = true - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - ctrlContext.Client = testEnv.Client - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + ctrlMgrCtx.Client = testEnv.Client + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) machineContext.VMService = mockVMService mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return(nil, nil) @@ -2829,14 +2828,14 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(vm, nil) mockVMService.EXPECT().Delete(elfMachine.Status.VMRef).Return(task, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - result, err := reconciler.reconcileDelete(machineContext) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + result, err := reconciler.reconcileDelete(ctx, machineContext) Expect(result.RequeueAfter).NotTo(BeZero()) Expect(err).ToNot(HaveOccurred()) // check k8s node has been deleted. Eventually(func() bool { - err := ctrlContext.Client.Get(ctx, client.ObjectKeyFromObject(node), node) + err := ctrlMgrCtx.Client.Get(ctx, client.ObjectKeyFromObject(node), node) return apierrors.IsNotFound(err) }, timeout).Should(BeTrue()) @@ -2858,9 +2857,9 @@ var _ = Describe("ElfMachineReconciler", func() { cluster.Finalizers = append(cluster.Finalizers, clusterv1.ClusterFinalizer) mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return(nil, nil) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - ctrlContext.Client = testEnv.Client - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + ctrlMgrCtx.Client = testEnv.Client + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) machineContext.VMService = mockVMService // before reconcile, create k8s node for VM. @@ -2880,13 +2879,13 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(vm, nil) mockVMService.EXPECT().Delete(elfMachine.Status.VMRef).Return(task, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - result, err := reconciler.reconcileDelete(machineContext) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + result, err := reconciler.reconcileDelete(ctx, machineContext) Expect(result.RequeueAfter).NotTo(BeZero()) Expect(err).ToNot(HaveOccurred()) // check k8s node still existed. - err = ctrlContext.Client.Get(ctx, client.ObjectKeyFromObject(node), node) + err = ctrlMgrCtx.Client.Get(ctx, client.ObjectKeyFromObject(node), node) Expect(err).ToNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("Waiting for VM to be deleted")) @@ -2904,9 +2903,9 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Status.VMRef = *vm.LocalID cluster.Status.ControlPlaneReady = false - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - ctrlContext.Client = testEnv.Client - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + ctrlMgrCtx.Client = testEnv.Client + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) machineContext.VMService = mockVMService mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return(nil, nil) @@ -2927,13 +2926,13 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(vm, nil) mockVMService.EXPECT().Delete(elfMachine.Status.VMRef).Return(task, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - result, err := reconciler.reconcileDelete(machineContext) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + result, err := reconciler.reconcileDelete(ctx, machineContext) Expect(result.RequeueAfter).NotTo(BeZero()) Expect(err).ToNot(HaveOccurred()) // check k8s node still existed. - err = ctrlContext.Client.Get(ctx, client.ObjectKeyFromObject(node), node) + err = ctrlMgrCtx.Client.Get(ctx, client.ObjectKeyFromObject(node), node) Expect(err).ToNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("Waiting for VM to be deleted")) @@ -2950,9 +2949,9 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Status.VMRef = *vm.LocalID cluster.Status.ControlPlaneReady = true - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - ctrlContext.Client = testEnv.Client - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + ctrlMgrCtx.Client = testEnv.Client + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) machineContext.VMService = mockVMService mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return(nil, nil) @@ -2967,14 +2966,14 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().Get(elfMachine.Status.VMRef).Return(vm, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - _, err := reconciler.reconcileDelete(machineContext) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + _, err := reconciler.reconcileDelete(ctx, machineContext) Expect(err).NotTo(BeZero()) Expect(err.Error()).To(ContainSubstring("failed to get client")) // check k8s node still existed. - err = ctrlContext.Client.Get(ctx, client.ObjectKeyFromObject(node), node) + err = ctrlMgrCtx.Client.Get(ctx, client.ObjectKeyFromObject(node), node) Expect(err).ShouldNot(HaveOccurred()) }) }) @@ -2990,22 +2989,22 @@ var _ = Describe("ElfMachineReconciler", func() { towerCluster := fake.NewTowerCluster() placementGroup := fake.NewVMPlacementGroup(nil) placementGroup.EntityAsyncStatus = models.NewEntityAsyncStatus(models.EntityAsyncStatusUPDATING) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) - placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctrlContext.Client, machine, cluster) + placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctrlMgrCtx.Client, machine, cluster) Expect(err).NotTo(HaveOccurred()) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - result, err := reconciler.reconcilePlacementGroup(machineContext) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + result, err := reconciler.reconcilePlacementGroup(ctx, machineContext) Expect(result.RequeueAfter).To(Equal(config.DefaultRequeueTimeout)) Expect(err).To(BeZero()) placementGroup.EntityAsyncStatus = nil mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - result, err = reconciler.reconcilePlacementGroup(machineContext) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + result, err = reconciler.reconcilePlacementGroup(ctx, machineContext) Expect(result).To(BeZero()) Expect(err).To(BeZero()) @@ -3021,8 +3020,8 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().CreateVMPlacementGroup(gomock.Any(), *towerCluster.ID, towerresources.GetVMPlacementGroupPolicy(machine)).Return(withTaskVMPlacementGroup, nil) mockVMService.EXPECT().WaitTask(gomock.Any(), *task.ID, config.WaitTaskTimeoutForPlacementGroupOperation, config.WaitTaskInterval).Return(task, nil) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - result, err = reconciler.reconcilePlacementGroup(machineContext) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + result, err = reconciler.reconcilePlacementGroup(ctx, machineContext) Expect(result).To(BeZero()) Expect(err).To(BeZero()) Expect(logBuffer.String()).To(ContainSubstring("Creating placement group succeeded")) @@ -3036,7 +3035,7 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().CreateVMPlacementGroup(gomock.Any(), *towerCluster.ID, towerresources.GetVMPlacementGroupPolicy(machine)).Return(withTaskVMPlacementGroup, nil) mockVMService.EXPECT().WaitTask(gomock.Any(), *task.ID, config.WaitTaskTimeoutForPlacementGroupOperation, config.WaitTaskInterval).Return(task, nil) - result, err = reconciler.reconcilePlacementGroup(machineContext) + result, err = reconciler.reconcilePlacementGroup(ctx, machineContext) Expect(result).To(BeZero()) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("failed to create placement group")) @@ -3049,7 +3048,7 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().CreateVMPlacementGroup(gomock.Any(), *towerCluster.ID, towerresources.GetVMPlacementGroupPolicy(machine)).Return(withTaskVMPlacementGroup, nil) mockVMService.EXPECT().WaitTask(gomock.Any(), *task.ID, config.WaitTaskTimeoutForPlacementGroupOperation, config.WaitTaskInterval).Return(nil, errors.New("xxx")) - result, err = reconciler.reconcilePlacementGroup(machineContext) + result, err = reconciler.reconcilePlacementGroup(ctx, machineContext) Expect(result).To(BeZero()) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring(fmt.Sprintf("failed to wait for placement group creating task to complete in %s: pgName %s, taskID %s", config.WaitTaskTimeoutForPlacementGroupOperation, placementGroupName, *withTaskVMPlacementGroup.TaskID))) @@ -3065,7 +3064,7 @@ var _ = Describe("ElfMachineReconciler", func() { task.ErrorMessage = pointer.String(service.VMPlacementGroupDuplicate) mockVMService.EXPECT().WaitTask(gomock.Any(), *task.ID, config.WaitTaskTimeoutForPlacementGroupOperation, config.WaitTaskInterval).Return(task, nil) - result, err = reconciler.reconcilePlacementGroup(machineContext) + result, err = reconciler.reconcilePlacementGroup(ctx, machineContext) Expect(result).To(BeZero()) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("failed to create placement group")) @@ -3076,32 +3075,32 @@ var _ = Describe("ElfMachineReconciler", func() { klog.SetOutput(logBuffer) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(nil, errors.New(service.VMPlacementGroupNotFound)) - result, err = reconciler.reconcilePlacementGroup(machineContext) + result, err = reconciler.reconcilePlacementGroup(ctx, machineContext) Expect(result.RequeueAfter).To(Equal(config.DefaultRequeueTimeout)) Expect(err).NotTo(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring(fmt.Sprintf("Tower has duplicate placement group, skip creating placement group %s", placementGroupName))) }) It("should save and get placement group cache", func() { - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctrlContext.Client, machine, cluster) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctrlMgrCtx.Client, machine, cluster) Expect(err).NotTo(HaveOccurred()) placementGroup := fake.NewVMPlacementGroup(nil) placementGroup.Name = service.TowerString(placementGroupName) mockVMService.EXPECT().GetVMPlacementGroup(gomock.Any()).Return(placementGroup, nil) Expect(getPGFromCache(*placementGroup.Name)).To(BeNil()) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - pg, err := reconciler.getPlacementGroup(machineContext, placementGroupName) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + pg, err := reconciler.getPlacementGroup(ctx, machineContext, placementGroupName) Expect(err).To(BeZero()) Expect(pg).To(Equal(placementGroup)) Expect(getPGFromCache(*placementGroup.Name)).To(Equal(placementGroup)) // Use cache - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - pg, err = reconciler.getPlacementGroup(machineContext, placementGroupName) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + pg, err = reconciler.getPlacementGroup(ctx, machineContext, placementGroupName) Expect(err).To(BeZero()) Expect(pg).To(Equal(placementGroup)) Expect(getPGFromCache(*placementGroup.Name)).To(Equal(placementGroup)) @@ -3116,16 +3115,16 @@ var _ = Describe("ElfMachineReconciler", func() { }) It("should wait for MachineFinalizer", func() { - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) originalElfMachine := elfMachine.DeepCopy() - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).NotTo(BeZero()) Expect(err).ShouldNot(HaveOccurred()) - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) Expect(elfMachine.Finalizers).To(Equal([]string{infrav1.MachineFinalizer})) err = patchutil.AddFinalizerWithOptimisticLock(ctx, reconciler.Client, originalElfMachine, infrav1.MachineFinalizer) @@ -3138,10 +3137,10 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Spec.Network.Devices = []infrav1.NetworkDeviceSpec{ {NetworkType: infrav1.NetworkTypeIPV4}, } - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).NotTo(BeZero()) @@ -3186,17 +3185,17 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Spec.Network.Devices = []infrav1.NetworkDeviceSpec{ {NetworkType: infrav1.NetworkTypeIPV4, IPAddrs: []string{"127.0.0.1"}}, } - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) mockVMService.EXPECT().Clone(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, errors.New("some error")) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(ctx, ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).To(BeZero()) Expect(err).Should(HaveOccurred()) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityWarning, infrav1.CloningFailedReason}}) }) }) @@ -3205,15 +3204,15 @@ var _ = Describe("ElfMachineReconciler", func() { It("should handle task missing", func() { task := fake.NewTowerTask() elfMachine.Status.TaskRef = *task.ID - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) machineContext.VMService = mockVMService mockVMService.EXPECT().GetTask(elfMachine.Status.TaskRef).Return(nil, errors.New(service.TaskNotFound)) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.reconcileVMTask(machineContext, nil) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.reconcileVMTask(ctx, machineContext, nil) Expect(ok).Should(BeTrue()) Expect(err).NotTo(HaveOccurred()) Expect(elfMachine.Status.TaskRef).To(Equal("")) @@ -3222,15 +3221,15 @@ var _ = Describe("ElfMachineReconciler", func() { It("should handle failed to get task", func() { task := fake.NewTowerTask() elfMachine.Status.TaskRef = *task.ID - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) machineContext.VMService = mockVMService mockVMService.EXPECT().GetTask(elfMachine.Status.TaskRef).Return(nil, errors.New("some error")) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.reconcileVMTask(machineContext, nil) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.reconcileVMTask(ctx, machineContext, nil) Expect(ok).Should(BeFalse()) Expect(strings.Contains(err.Error(), "failed to get task")).To(BeTrue()) Expect(elfMachine.Status.TaskRef).To(Equal(*task.ID)) @@ -3243,22 +3242,22 @@ var _ = Describe("ElfMachineReconciler", func() { task := fake.NewTowerTask() task.Status = models.NewTaskStatus(models.TaskStatusFAILED) elfMachine.Status.TaskRef = *task.ID - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) machineContext.VMService = mockVMService mockVMService.EXPECT().GetTask(elfMachine.Status.TaskRef).AnyTimes().Return(task, nil) vm := fake.NewTowerVMFromElfMachine(elfMachine) vm.EntityAsyncStatus = models.NewEntityAsyncStatus(models.EntityAsyncStatusUPDATING) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.reconcileVMTask(machineContext, vm) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.reconcileVMTask(ctx, machineContext, vm) Expect(ok).Should(BeFalse()) Expect(err).ShouldNot(HaveOccurred()) elfMachine.Status.TaskRef = *task.ID task.ErrorMessage = service.TowerString(service.MemoryInsufficientError) - ok, err = reconciler.reconcileVMTask(machineContext, nil) + ok, err = reconciler.reconcileVMTask(ctx, machineContext, nil) Expect(ok).Should(BeTrue()) Expect(err.Error()).To(ContainSubstring("Insufficient memory detected for the ELF cluster")) Expect(elfMachine.Status.TaskRef).To(Equal("")) @@ -3267,13 +3266,13 @@ var _ = Describe("ElfMachineReconciler", func() { logBuffer.Reset() task.ErrorMessage = service.TowerString(service.PlacementGroupMustError) elfMachine.Status.TaskRef = *task.ID - ok, err = reconciler.reconcileVMTask(machineContext, nil) + ok, err = reconciler.reconcileVMTask(ctx, machineContext, nil) Expect(ok).Should(BeTrue()) Expect(err.Error()).To(ContainSubstring("The placement group policy can not be satisfied")) Expect(logBuffer.String()).To(ContainSubstring("VM task failed")) logBuffer.Reset() - ok, msg, err := isELFScheduleVMErrorRecorded(machineContext) + ok, msg, err := isELFScheduleVMErrorRecorded(ctx, machineContext, ctrlMgrCtx.Client) Expect(ok).To(BeTrue()) Expect(msg).To(ContainSubstring("Insufficient memory detected for the ELF cluster")) Expect(err).ShouldNot(HaveOccurred()) @@ -3282,14 +3281,14 @@ var _ = Describe("ElfMachineReconciler", func() { logBuffer.Reset() elfMachine.Status.TaskRef = *task.ID task.ErrorMessage = service.TowerString(service.StorageInsufficientError) - ok, err = reconciler.reconcileVMTask(machineContext, nil) + ok, err = reconciler.reconcileVMTask(ctx, machineContext, nil) Expect(ok).Should(BeTrue()) Expect(err.Error()).To(ContainSubstring("Insufficient storage detected for the ELF cluster")) Expect(elfMachine.Status.TaskRef).To(Equal("")) Expect(logBuffer.String()).To(ContainSubstring("VM task failed")) logBuffer.Reset() - ok, msg, err = isELFScheduleVMErrorRecorded(machineContext) + ok, msg, err = isELFScheduleVMErrorRecorded(ctx, machineContext, ctrlMgrCtx.Client) Expect(ok).To(BeTrue()) Expect(msg).To(ContainSubstring("Insufficient storage detected for the ELF cluster")) Expect(err).ShouldNot(HaveOccurred()) @@ -3298,12 +3297,12 @@ var _ = Describe("ElfMachineReconciler", func() { task.Status = models.NewTaskStatus(models.TaskStatusSUCCESSED) task.Description = service.TowerString("Start VM") elfMachine.Status.TaskRef = *task.ID - ok, err = reconciler.reconcileVMTask(machineContext, nil) + ok, err = reconciler.reconcileVMTask(ctx, machineContext, nil) Expect(ok).Should(BeTrue()) Expect(err).ShouldNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("VM task succeeded")) - ok, msg, err = isELFScheduleVMErrorRecorded(machineContext) + ok, msg, err = isELFScheduleVMErrorRecorded(ctx, machineContext, ctrlMgrCtx.Client) Expect(ok).To(BeFalse()) Expect(msg).To(Equal("")) Expect(err).ShouldNot(HaveOccurred()) @@ -3313,7 +3312,7 @@ var _ = Describe("ElfMachineReconciler", func() { task.Description = service.TowerString("Create a VM") task.ErrorMessage = service.TowerString(service.VMDuplicateError) elfMachine.Status.TaskRef = *task.ID - ok, err = reconciler.reconcileVMTask(machineContext, nil) + ok, err = reconciler.reconcileVMTask(ctx, machineContext, nil) Expect(ok).Should(BeTrue()) Expect(err).ShouldNot(HaveOccurred()) ok, _ = acquireTicketForCreateVM(elfMachine.Name, true) @@ -3343,7 +3342,7 @@ var _ = Describe("ElfMachineReconciler", func() { task.Description = service.TowerString(tc.description) task.ErrorMessage = service.TowerString("error") elfMachine.Status.TaskRef = *task.ID - ok, err = reconciler.reconcileVMTask(machineContext, nil) + ok, err = reconciler.reconcileVMTask(ctx, machineContext, nil) Expect(ok).Should(BeTrue()) Expect(err).ShouldNot(HaveOccurred()) Expect(getGPUDevicesLockedByVM(elfCluster.Spec.Cluster, elfMachine.Name)).To(BeNil()) @@ -3362,25 +3361,17 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Status.HostServerRef = fake.UUID() elfMachine.Status.HostServerName = fake.UUID() vm := fake.NewTowerVM() - ctrlMgrContext := &context.ControllerManagerContext{ - Context: goctx.Background(), + ctrlMgrCtx := &context.ControllerManagerContext{ Client: testEnv.Client, - Logger: ctrllog.Log, Name: fake.ControllerManagerName, LeaderElectionNamespace: fake.LeaderElectionNamespace, LeaderElectionID: fake.LeaderElectionID, } - ctrlContext := &context.ControllerContext{ - ControllerManagerContext: ctrlMgrContext, - Logger: ctrllog.Log, - } machineContext := &context.MachineContext{ - ControllerContext: ctrlContext, - Cluster: cluster, - Machine: machine, - ElfCluster: elfCluster, - ElfMachine: elfMachine, - Logger: ctrllog.Log, + Cluster: cluster, + Machine: machine, + ElfCluster: elfCluster, + ElfMachine: elfMachine, } node = &corev1.Node{ @@ -3392,8 +3383,8 @@ var _ = Describe("ElfMachineReconciler", func() { Expect(testEnv.CreateAndWait(ctx, node)).To(Succeed()) Expect(helpers.CreateKubeConfigSecret(testEnv, cluster.Namespace, cluster.Name)).To(Succeed()) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.reconcileNode(machineContext, vm) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.reconcileNode(ctx, machineContext, vm) Expect(ok).Should(BeTrue()) Expect(err).ToNot(HaveOccurred()) Eventually(func() bool { @@ -3413,25 +3404,17 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Status.HostServerRef = fake.UUID() elfMachine.Status.HostServerName = fake.UUID() vm := fake.NewTowerVM() - ctrlMgrContext := &context.ControllerManagerContext{ - Context: goctx.Background(), + ctrlMgrCtx := &context.ControllerManagerContext{ Client: testEnv.Client, - Logger: ctrllog.Log, Name: fake.ControllerManagerName, LeaderElectionNamespace: fake.LeaderElectionNamespace, LeaderElectionID: fake.LeaderElectionID, } - ctrlContext := &context.ControllerContext{ - ControllerManagerContext: ctrlMgrContext, - Logger: ctrllog.Log, - } machineContext := &context.MachineContext{ - ControllerContext: ctrlContext, - Cluster: cluster, - Machine: machine, - ElfCluster: elfCluster, - ElfMachine: elfMachine, - Logger: ctrllog.Log, + Cluster: cluster, + Machine: machine, + ElfCluster: elfCluster, + ElfMachine: elfMachine, } providerID := machineutil.ConvertUUIDToProviderID(fake.UUID()) @@ -3449,8 +3432,8 @@ var _ = Describe("ElfMachineReconciler", func() { Expect(testEnv.CreateAndWait(ctx, node)).To(Succeed()) Expect(helpers.CreateKubeConfigSecret(testEnv, cluster.Namespace, cluster.Name)).To(Succeed()) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.reconcileNode(machineContext, vm) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.reconcileNode(ctx, machineContext, vm) Expect(ok).Should(BeTrue()) Expect(err).ToNot(HaveOccurred()) @@ -3469,19 +3452,19 @@ var _ = Describe("ElfMachineReconciler", func() { Context("deleteDuplicateVMs", func() { It("should do nothing without duplicate virtual machines", func() { vm := fake.NewTowerVMFromElfMachine(elfMachine) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - ctrlContext.Client = testEnv.Client - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + ctrlMgrCtx.Client = testEnv.Client + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) machineContext.VMService = mockVMService mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return(nil, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - result, err := reconciler.deleteDuplicateVMs(machineContext) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + result, err := reconciler.deleteDuplicateVMs(ctx, machineContext) Expect(result).To(BeZero()) Expect(err).ToNot(HaveOccurred()) mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return([]*models.VM{vm}, nil) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - result, err = reconciler.deleteDuplicateVMs(machineContext) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + result, err = reconciler.deleteDuplicateVMs(ctx, machineContext) Expect(result).To(BeZero()) Expect(err).ToNot(HaveOccurred()) }) @@ -3492,12 +3475,12 @@ var _ = Describe("ElfMachineReconciler", func() { elfMachine.Status.VMRef = *vm1.LocalID vm2 := fake.NewTowerVMFromElfMachine(elfMachine) vm2.Status = models.NewVMStatus(models.VMStatusSTOPPED) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return([]*models.VM{vm1, vm2}, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - result, err := reconciler.deleteDuplicateVMs(machineContext) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + result, err := reconciler.deleteDuplicateVMs(ctx, machineContext) Expect(result.RequeueAfter).To(Equal(config.DefaultRequeueTimeout)) Expect(err).ToNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("Waiting for VM task done before deleting the duplicate VM")) @@ -3508,12 +3491,12 @@ var _ = Describe("ElfMachineReconciler", func() { vm1.Status = models.NewVMStatus(models.VMStatusSTOPPED) vm1.EntityAsyncStatus = nil vm2 := fake.NewTowerVMFromElfMachine(elfMachine) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return([]*models.VM{vm1, vm2}, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - result, err := reconciler.deleteDuplicateVMs(machineContext) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + result, err := reconciler.deleteDuplicateVMs(ctx, machineContext) Expect(result.RequeueAfter).To(Equal(config.DefaultRequeueTimeout)) Expect(err).ToNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("Waiting for ElfMachine to select one of the duplicate VMs before deleting the other")) @@ -3526,15 +3509,15 @@ var _ = Describe("ElfMachineReconciler", func() { vm2 := fake.NewTowerVMFromElfMachine(elfMachine) vm2.Status = models.NewVMStatus(models.VMStatusSTOPPED) vm2.EntityAsyncStatus = nil - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) task := fake.NewTowerTask() task.Status = models.NewTaskStatus(models.TaskStatusSUCCESSED) mockVMService.EXPECT().FindVMsByName(elfMachine.Name).Return([]*models.VM{vm1, vm2}, nil) mockVMService.EXPECT().Delete(*vm2.ID).Return(task, nil) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - result, err := reconciler.deleteDuplicateVMs(machineContext) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + result, err := reconciler.deleteDuplicateVMs(ctx, machineContext) Expect(result.RequeueAfter).To(Equal(config.DefaultRequeueTimeout)) Expect(err).ToNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring(fmt.Sprintf("Destroying duplicate VM %s in task %s", *vm2.ID, *task.ID))) @@ -3542,11 +3525,11 @@ var _ = Describe("ElfMachineReconciler", func() { It("should skip checking duplicate virtual machines after more than half an hour", func() { elfMachine.CreationTimestamp = metav1.NewTime(time.Now().Add(-(1*checkDuplicateVMDuration + 1*time.Second))) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - result, err := reconciler.deleteDuplicateVMs(machineContext) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + result, err := reconciler.deleteDuplicateVMs(ctx, machineContext) Expect(result).To(BeZero()) Expect(err).ToNot(HaveOccurred()) }) @@ -3571,8 +3554,8 @@ var _ = Describe("ElfMachineReconciler", func() { } vm := fake.NewTowerVMFromElfMachine(elfMachine) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, mockVMService) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + machineContext := newMachineContext(elfCluster, cluster, elfMachine, machine, mockVMService) machineContext.VMService = mockVMService unexpectedError := errors.New("unexpected error") @@ -3580,8 +3563,8 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().UpsertLabel(*namespaceLabel.Key, *namespaceLabel.Value).Return(namespaceLabel, nil) mockVMService.EXPECT().UpsertLabel(*clusterNameLabel.Key, *clusterNameLabel.Value).Return(clusterNameLabel, nil) mockVMService.EXPECT().AddLabelsToVM(*vm.ID, gomock.InAnyOrder([]string{*capeManagedLabel.ID, *namespaceLabel.ID, *clusterNameLabel.ID})).Return(nil, unexpectedError) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err := reconciler.reconcileLabels(machineContext, vm) + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err := reconciler.reconcileLabels(ctx, machineContext, vm) Expect(ok).To(BeFalse()) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(Equal(unexpectedError.Error())) @@ -3591,15 +3574,15 @@ var _ = Describe("ElfMachineReconciler", func() { mockVMService.EXPECT().UpsertLabel(*namespaceLabel.Key, *namespaceLabel.Value).Return(namespaceLabel, nil) mockVMService.EXPECT().UpsertLabel(*clusterNameLabel.Key, *clusterNameLabel.Value).Return(clusterNameLabel, nil) mockVMService.EXPECT().AddLabelsToVM(*vm.ID, gomock.InAnyOrder([]string{*capeManagedLabel.ID, *namespaceLabel.ID, *clusterNameLabel.ID})).Return(nil, nil) - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err = reconciler.reconcileLabels(machineContext, vm) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err = reconciler.reconcileLabels(ctx, machineContext, vm) Expect(ok).To(BeTrue()) Expect(err).ToNot(HaveOccurred()) Expect(getLabelFromCache(*capeManagedLabel.Key)).To(Equal(capeManagedLabel)) vm.Labels = []*models.NestedLabel{{ID: capeManagedLabel.ID}} - reconciler = &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} - ok, err = reconciler.reconcileLabels(machineContext, vm) + reconciler = &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} + ok, err = reconciler.reconcileLabels(ctx, machineContext, vm) Expect(ok).To(BeTrue()) Expect(err).ToNot(HaveOccurred()) }) @@ -3609,18 +3592,18 @@ var _ = Describe("ElfMachineReconciler", func() { func waitStaticIPAllocationSpec(mockNewVMService func(ctx goctx.Context, auth infrav1.Tower, logger logr.Logger) (service.VMService, error), elfCluster *infrav1.ElfCluster, cluster *clusterv1.Cluster, elfMachine *infrav1.ElfMachine, machine *clusterv1.Machine, secret *corev1.Secret, md *clusterv1.MachineDeployment) { - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - fake.InitOwnerReferences(ctrlContext, elfCluster, cluster, elfMachine, machine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + fake.InitOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster, elfMachine, machine) logBuffer := new(bytes.Buffer) klog.SetOutput(logBuffer) - reconciler := &ElfMachineReconciler{ControllerContext: ctrlContext, NewVMService: mockNewVMService} + reconciler := &ElfMachineReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService} elfMachineKey := capiutil.ObjectKey(elfMachine) result, err := reconciler.Reconcile(goctx.Background(), ctrl.Request{NamespacedName: elfMachineKey}) Expect(result.RequeueAfter).To(BeZero()) Expect(err).ShouldNot(HaveOccurred()) Expect(logBuffer.String()).To(ContainSubstring("VM is waiting for static ip to be available")) elfMachine = &infrav1.ElfMachine{} - Expect(reconciler.Client.Get(reconciler, elfMachineKey, elfMachine)).To(Succeed()) + Expect(reconciler.Client.Get(ctx, elfMachineKey, elfMachine)).To(Succeed()) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForStaticIPAllocationReason}}) } diff --git a/controllers/suite_test.go b/controllers/suite_test.go index 32a1dee1..2e03414e 100644 --- a/controllers/suite_test.go +++ b/controllers/suite_test.go @@ -32,15 +32,13 @@ import ( "k8s.io/klog/v2" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/util/conditions" - "sigs.k8s.io/controller-runtime/pkg/client" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/controller" - ctrllog "sigs.k8s.io/controller-runtime/pkg/log" infrav1 "github.com/smartxworks/cluster-api-provider-elf/api/v1beta1" "github.com/smartxworks/cluster-api-provider-elf/pkg/context" "github.com/smartxworks/cluster-api-provider-elf/pkg/manager" "github.com/smartxworks/cluster-api-provider-elf/pkg/service" - "github.com/smartxworks/cluster-api-provider-elf/test/fake" "github.com/smartxworks/cluster-api-provider-elf/test/helpers" ) @@ -50,6 +48,7 @@ const ( var ( testEnv *helpers.TestEnvironment + ctx = ctrl.SetupSignalHandler() ) func TestControllers(t *testing.T) { @@ -85,14 +84,14 @@ func setup() { utilruntime.Must(infrav1.AddToScheme(cgscheme.Scheme)) utilruntime.Must(clusterv1.AddToScheme(cgscheme.Scheme)) - testEnv = helpers.NewTestEnvironment() + testEnv = helpers.NewTestEnvironment(ctx) // Set kubeconfig. os.Setenv("KUBECONFIG", testEnv.Kubeconfig) go func() { fmt.Println("Starting the manager") - if err := testEnv.StartManager(testEnv.GetContext()); err != nil { + if err := testEnv.StartManager(ctx); err != nil { panic(fmt.Sprintf("failed to start the envtest manager: %v", err)) } }() @@ -104,16 +103,16 @@ func setup() { Name: manager.DefaultPodNamespace, }, } - if err := testEnv.CreateAndWait(testEnv.GetContext(), ns); err != nil { + if err := testEnv.CreateAndWait(ctx, ns); err != nil { panic("unable to create controller namespace") } controllerOpts := controller.Options{MaxConcurrentReconciles: 10} - if err := AddClusterControllerToManager(testEnv.GetContext(), testEnv.Manager, controllerOpts); err != nil { + if err := AddClusterControllerToManager(ctx, testEnv.GetControllerManagerContext(), testEnv.Manager, controllerOpts); err != nil { panic(fmt.Sprintf("unable to setup ElfCluster controller: %v", err)) } - if err := AddMachineControllerToManager(testEnv.GetContext(), testEnv.Manager, controllerOpts); err != nil { + if err := AddMachineControllerToManager(ctx, testEnv.GetControllerManagerContext(), testEnv.Manager, controllerOpts); err != nil { panic(fmt.Sprintf("unable to setup ElfMachine controller: %v", err)) } } @@ -124,28 +123,16 @@ func teardown() { } } -func newCtrlContexts(objs ...client.Object) *context.ControllerContext { - ctrlMgrContext := fake.NewControllerManagerContext(objs...) - ctrlContext := &context.ControllerContext{ - ControllerManagerContext: ctrlMgrContext, - Logger: ctrllog.Log, - } - - return ctrlContext -} - -func newMachineContext(ctrlCtx *context.ControllerContext, +func newMachineContext( elfCluster *infrav1.ElfCluster, cluster *clusterv1.Cluster, elfMachine *infrav1.ElfMachine, machine *clusterv1.Machine, vmService service.VMService) *context.MachineContext { return &context.MachineContext{ - ControllerContext: ctrlCtx, - Cluster: cluster, - ElfCluster: elfCluster, - Machine: machine, - ElfMachine: elfMachine, - Logger: ctrlCtx.Logger, - VMService: vmService, + Cluster: cluster, + ElfCluster: elfCluster, + Machine: machine, + ElfMachine: elfMachine, + VMService: vmService, } } diff --git a/controllers/tower_cache.go b/controllers/tower_cache.go index 9e3d9c86..33be8c4a 100644 --- a/controllers/tower_cache.go +++ b/controllers/tower_cache.go @@ -17,6 +17,7 @@ limitations under the License. package controllers import ( + goctx "context" "fmt" "sync" "time" @@ -24,6 +25,7 @@ import ( "github.com/smartxworks/cloudtower-go-sdk/v2/models" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/util/conditions" + "sigs.k8s.io/controller-runtime/pkg/client" infrav1 "github.com/smartxworks/cluster-api-provider-elf/api/v1beta1" "github.com/smartxworks/cluster-api-provider-elf/pkg/context" @@ -52,27 +54,27 @@ type clusterResource struct { // 1. ELF cluster has insufficient memory. // 2. ELF cluster has insufficient storage. // 3. Cannot satisfy the PlacementGroup policy. -func isELFScheduleVMErrorRecorded(ctx *context.MachineContext) (bool, string, error) { +func isELFScheduleVMErrorRecorded(ctx goctx.Context, machineCtx *context.MachineContext, ctrlClient client.Client) (bool, string, error) { lock.Lock() defer lock.Unlock() - if resource := getClusterResource(getKeyForInsufficientMemoryError(ctx.ElfCluster.Spec.Cluster)); resource != nil { - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForELFClusterWithSufficientMemoryReason, clusterv1.ConditionSeverityInfo, "") + if resource := getClusterResource(getKeyForInsufficientMemoryError(machineCtx.ElfCluster.Spec.Cluster)); resource != nil { + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForELFClusterWithSufficientMemoryReason, clusterv1.ConditionSeverityInfo, "") - return true, fmt.Sprintf("Insufficient memory detected for the ELF cluster %s", ctx.ElfCluster.Spec.Cluster), nil - } else if resource := getClusterResource(getKeyForInsufficientStorageError(ctx.ElfCluster.Spec.Cluster)); resource != nil { - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForELFClusterWithSufficientStorageReason, clusterv1.ConditionSeverityInfo, "") + return true, fmt.Sprintf("Insufficient memory detected for the ELF cluster %s", machineCtx.ElfCluster.Spec.Cluster), nil + } else if resource := getClusterResource(getKeyForInsufficientStorageError(machineCtx.ElfCluster.Spec.Cluster)); resource != nil { + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForELFClusterWithSufficientStorageReason, clusterv1.ConditionSeverityInfo, "") - return true, fmt.Sprintf("Insufficient storage detected for the ELF cluster %s", ctx.ElfCluster.Spec.Cluster), nil + return true, fmt.Sprintf("Insufficient storage detected for the ELF cluster %s", machineCtx.ElfCluster.Spec.Cluster), nil } - placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctx.Client, ctx.Machine, ctx.Cluster) + placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctrlClient, machineCtx.Machine, machineCtx.Cluster) if err != nil { return false, "", err } if resource := getClusterResource(getKeyForDuplicatePlacementGroupError(placementGroupName)); resource != nil { - conditions.MarkFalse(ctx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForPlacementGroupPolicySatisfiedReason, clusterv1.ConditionSeverityInfo, "") + conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.WaitingForPlacementGroupPolicySatisfiedReason, clusterv1.ConditionSeverityInfo, "") return true, fmt.Sprintf("Not satisfy policy detected for the placement group %s", placementGroupName), nil } @@ -81,8 +83,8 @@ func isELFScheduleVMErrorRecorded(ctx *context.MachineContext) (bool, string, er } // recordElfClusterMemoryInsufficient records whether the memory is insufficient. -func recordElfClusterMemoryInsufficient(ctx *context.MachineContext, isInsufficient bool) { - key := getKeyForInsufficientMemoryError(ctx.ElfCluster.Spec.Cluster) +func recordElfClusterMemoryInsufficient(ctrlMgrCtx *context.MachineContext, isInsufficient bool) { + key := getKeyForInsufficientMemoryError(ctrlMgrCtx.ElfCluster.Spec.Cluster) if isInsufficient { inMemoryCache.Set(key, newClusterResource(), resourceDuration) } else { @@ -91,8 +93,8 @@ func recordElfClusterMemoryInsufficient(ctx *context.MachineContext, isInsuffici } // recordElfClusterStorageInsufficient records whether the storage is insufficient. -func recordElfClusterStorageInsufficient(ctx *context.MachineContext, isError bool) { - key := getKeyForInsufficientStorageError(ctx.ElfCluster.Spec.Cluster) +func recordElfClusterStorageInsufficient(machineCtx *context.MachineContext, isError bool) { + key := getKeyForInsufficientStorageError(machineCtx.ElfCluster.Spec.Cluster) if isError { inMemoryCache.Set(key, newClusterResource(), resourceDuration) } else { @@ -101,8 +103,8 @@ func recordElfClusterStorageInsufficient(ctx *context.MachineContext, isError bo } // recordPlacementGroupPolicyNotSatisfied records whether the placement group not satisfy policy. -func recordPlacementGroupPolicyNotSatisfied(ctx *context.MachineContext, isPGPolicyNotSatisfied bool) error { - placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctx.Client, ctx.Machine, ctx.Cluster) +func recordPlacementGroupPolicyNotSatisfied(ctx goctx.Context, machineCtx *context.MachineContext, ctrlClient client.Client, isPGPolicyNotSatisfied bool) error { + placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctrlClient, machineCtx.Machine, machineCtx.Cluster) if err != nil { return err } @@ -127,17 +129,17 @@ func newClusterResource() *clusterResource { // canRetryVMOperation returns whether virtual machine operations(Create/PowerOn) // can be performed. -func canRetryVMOperation(ctx *context.MachineContext) (bool, error) { +func canRetryVMOperation(ctx goctx.Context, machineCtx *context.MachineContext, ctrlClient client.Client) (bool, error) { lock.Lock() defer lock.Unlock() - if ok := canRetry(getKeyForInsufficientStorageError(ctx.ElfCluster.Spec.Cluster)); ok { + if ok := canRetry(getKeyForInsufficientStorageError(machineCtx.ElfCluster.Spec.Cluster)); ok { return true, nil - } else if ok := canRetry(getKeyForInsufficientMemoryError(ctx.ElfCluster.Spec.Cluster)); ok { + } else if ok := canRetry(getKeyForInsufficientMemoryError(machineCtx.ElfCluster.Spec.Cluster)); ok { return true, nil } - placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctx.Client, ctx.Machine, ctx.Cluster) + placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctrlClient, machineCtx.Machine, machineCtx.Cluster) if err != nil { return false, err } diff --git a/controllers/tower_cache_test.go b/controllers/tower_cache_test.go index d8c52b24..111f37d8 100644 --- a/controllers/tower_cache_test.go +++ b/controllers/tower_cache_test.go @@ -17,6 +17,7 @@ limitations under the License. package controllers import ( + goctx "context" "strings" "time" @@ -25,6 +26,7 @@ import ( "github.com/smartxworks/cloudtower-go-sdk/v2/models" corev1 "k8s.io/api/core/v1" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/controller-runtime/pkg/client" infrav1 "github.com/smartxworks/cluster-api-provider-elf/api/v1beta1" "github.com/smartxworks/cluster-api-provider-elf/pkg/context" @@ -54,26 +56,26 @@ var _ = Describe("TowerCache", func() { md.Name = name fake.ToWorkerMachine(machine, md) fake.ToWorkerMachine(elfMachine, md) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, nil) - key := getKey(machineContext, name) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + machineCtx := newMachineContext(elfCluster, cluster, elfMachine, machine, nil) + key := getKey(ctx, machineCtx, ctrlMgrCtx.Client, name) _, found := inMemoryCache.Get(key) Expect(found).To(BeFalse()) - recordOrClearError(machineContext, name, true) + recordOrClearError(ctx, machineCtx, ctrlMgrCtx.Client, name, true) _, found = inMemoryCache.Get(key) Expect(found).To(BeTrue()) resource := getClusterResource(key) Expect(resource.LastDetected).To(Equal(resource.LastRetried)) - recordOrClearError(machineContext, name, true) + recordOrClearError(ctx, machineCtx, ctrlMgrCtx.Client, name, true) lastDetected := resource.LastDetected resource = getClusterResource(key) Expect(resource.LastDetected).To(Equal(resource.LastRetried)) Expect(resource.LastDetected.After(lastDetected)).To(BeTrue()) - recordOrClearError(machineContext, name, false) + recordOrClearError(ctx, machineCtx, ctrlMgrCtx.Client, name, false) resource = getClusterResource(key) Expect(resource).To(BeNil()) @@ -81,17 +83,17 @@ var _ = Describe("TowerCache", func() { _, found = inMemoryCache.Get(key) Expect(found).To(BeFalse()) - recordOrClearError(machineContext, name, false) + recordOrClearError(ctx, machineCtx, ctrlMgrCtx.Client, name, false) resource = getClusterResource(key) _, found = inMemoryCache.Get(key) Expect(found).To(BeFalse()) Expect(resource).To(BeNil()) - recordOrClearError(machineContext, name, false) + recordOrClearError(ctx, machineCtx, ctrlMgrCtx.Client, name, false) resource = getClusterResource(key) Expect(resource).To(BeNil()) - recordOrClearError(machineContext, name, true) + recordOrClearError(ctx, machineCtx, ctrlMgrCtx.Client, name, true) _, found = inMemoryCache.Get(key) Expect(found).To(BeTrue()) resource = getClusterResource(key) @@ -108,32 +110,32 @@ var _ = Describe("TowerCache", func() { md.Name = name fake.ToWorkerMachine(machine, md) fake.ToWorkerMachine(elfMachine, md) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, nil) - key := getKey(machineContext, name) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + machineCtx := newMachineContext(elfCluster, cluster, elfMachine, machine, nil) + key := getKey(ctx, machineCtx, ctrlMgrCtx.Client, name) _, found := inMemoryCache.Get(key) Expect(found).To(BeFalse()) - ok, err := canRetryVMOperation(machineContext) + ok, err := canRetryVMOperation(ctx, machineCtx, ctrlMgrCtx.Client) Expect(ok).To(BeFalse()) Expect(err).ShouldNot(HaveOccurred()) - recordOrClearError(machineContext, name, false) - ok, err = canRetryVMOperation(machineContext) + recordOrClearError(ctx, machineCtx, ctrlMgrCtx.Client, name, false) + ok, err = canRetryVMOperation(ctx, machineCtx, ctrlMgrCtx.Client) Expect(ok).To(BeFalse()) Expect(err).ShouldNot(HaveOccurred()) - recordOrClearError(machineContext, name, true) - ok, err = canRetryVMOperation(machineContext) + recordOrClearError(ctx, machineCtx, ctrlMgrCtx.Client, name, true) + ok, err = canRetryVMOperation(ctx, machineCtx, ctrlMgrCtx.Client) Expect(ok).To(BeFalse()) Expect(err).ShouldNot(HaveOccurred()) - expireELFScheduleVMError(machineContext, name) - ok, err = canRetryVMOperation(machineContext) + expireELFScheduleVMError(ctx, machineCtx, ctrlMgrCtx.Client, name) + ok, err = canRetryVMOperation(ctx, machineCtx, ctrlMgrCtx.Client) Expect(ok).To(BeTrue()) Expect(err).ShouldNot(HaveOccurred()) - ok, err = canRetryVMOperation(machineContext) + ok, err = canRetryVMOperation(ctx, machineCtx, ctrlMgrCtx.Client) Expect(ok).To(BeFalse()) Expect(err).ShouldNot(HaveOccurred()) } @@ -147,18 +149,18 @@ var _ = Describe("TowerCache", func() { md.Name = placementGroupKey fake.ToWorkerMachine(machine, md) fake.ToWorkerMachine(elfMachine, md) - ctrlContext := newCtrlContexts(elfCluster, cluster, elfMachine, machine, secret, md) - machineContext := newMachineContext(ctrlContext, elfCluster, cluster, elfMachine, machine, nil) + ctrlMgrCtx := fake.NewControllerManagerContext(elfCluster, cluster, elfMachine, machine, secret, md) + machineCtx := newMachineContext(elfCluster, cluster, elfMachine, machine, nil) - ok, msg, err := isELFScheduleVMErrorRecorded(machineContext) + ok, msg, err := isELFScheduleVMErrorRecorded(ctx, machineCtx, ctrlMgrCtx.Client) Expect(ok).To(BeFalse()) Expect(msg).To(Equal("")) Expect(err).ShouldNot(HaveOccurred()) expectConditions(elfMachine, []conditionAssertion{}) elfCluster.Spec.Cluster = clusterInsufficientMemoryKey - recordOrClearError(machineContext, clusterInsufficientMemoryKey, true) - ok, msg, err = isELFScheduleVMErrorRecorded(machineContext) + recordOrClearError(ctx, machineCtx, ctrlMgrCtx.Client, clusterInsufficientMemoryKey, true) + ok, msg, err = isELFScheduleVMErrorRecorded(ctx, machineCtx, ctrlMgrCtx.Client) Expect(ok).To(BeTrue()) Expect(msg).To(ContainSubstring("Insufficient memory detected for the ELF cluster")) Expect(err).ShouldNot(HaveOccurred()) @@ -166,16 +168,16 @@ var _ = Describe("TowerCache", func() { resetMemoryCache() elfCluster.Spec.Cluster = clusterInsufficientStorageKey - recordOrClearError(machineContext, clusterInsufficientStorageKey, true) - ok, msg, err = isELFScheduleVMErrorRecorded(machineContext) + recordOrClearError(ctx, machineCtx, ctrlMgrCtx.Client, clusterInsufficientStorageKey, true) + ok, msg, err = isELFScheduleVMErrorRecorded(ctx, machineCtx, ctrlMgrCtx.Client) Expect(ok).To(BeTrue()) Expect(msg).To(ContainSubstring("Insufficient storage detected for the ELF cluster clusterInsufficientStorage")) Expect(err).ShouldNot(HaveOccurred()) expectConditions(elfMachine, []conditionAssertion{{infrav1.VMProvisionedCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForELFClusterWithSufficientStorageReason}}) resetMemoryCache() - recordOrClearError(machineContext, placementGroupKey, true) - ok, msg, err = isELFScheduleVMErrorRecorded(machineContext) + recordOrClearError(ctx, machineCtx, ctrlMgrCtx.Client, placementGroupKey, true) + ok, msg, err = isELFScheduleVMErrorRecorded(ctx, machineCtx, ctrlMgrCtx.Client) Expect(ok).To(BeTrue()) Expect(msg).To(ContainSubstring("Not satisfy policy detected for the placement group")) Expect(err).ShouldNot(HaveOccurred()) @@ -231,33 +233,33 @@ func removeGPUVMInfosCache(gpuIDs []string) { } } -func getKey(ctx *context.MachineContext, name string) string { +func getKey(ctx goctx.Context, machineCtx *context.MachineContext, ctrlClient client.Client, name string) string { if name == clusterInsufficientMemoryKey { return getKeyForInsufficientMemoryError(name) } else if name == clusterInsufficientStorageKey { return getKeyForInsufficientStorageError(name) } - placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctx.Client, ctx.Machine, ctx.Cluster) + placementGroupName, err := towerresources.GetVMPlacementGroupName(ctx, ctrlClient, machineCtx.Machine, machineCtx.Cluster) Expect(err).ShouldNot(HaveOccurred()) return getKeyForDuplicatePlacementGroupError(placementGroupName) } -func recordOrClearError(ctx *context.MachineContext, key string, record bool) { +func recordOrClearError(ctx goctx.Context, machineCtx *context.MachineContext, ctrlClient client.Client, key string, record bool) { if strings.Contains(key, clusterInsufficientMemoryKey) { - recordElfClusterMemoryInsufficient(ctx, record) + recordElfClusterMemoryInsufficient(machineCtx, record) return } else if strings.Contains(key, clusterInsufficientStorageKey) { - recordElfClusterStorageInsufficient(ctx, record) + recordElfClusterStorageInsufficient(machineCtx, record) return } - Expect(recordPlacementGroupPolicyNotSatisfied(ctx, record)).ShouldNot(HaveOccurred()) + Expect(recordPlacementGroupPolicyNotSatisfied(ctx, machineCtx, ctrlClient, record)).ShouldNot(HaveOccurred()) } -func expireELFScheduleVMError(ctx *context.MachineContext, name string) { - key := getKey(ctx, name) +func expireELFScheduleVMError(ctx goctx.Context, machineCtx *context.MachineContext, ctrlClient client.Client, name string) { + key := getKey(ctx, machineCtx, ctrlClient, name) resource := getClusterResource(key) resource.LastDetected = resource.LastDetected.Add(-resourceSilenceTime) resource.LastRetried = resource.LastRetried.Add(-resourceSilenceTime) diff --git a/main.go b/main.go index 177a04a0..55626a14 100644 --- a/main.go +++ b/main.go @@ -17,6 +17,7 @@ limitations under the License. package main import ( + goctx "context" "flag" "fmt" "os" @@ -35,7 +36,6 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/controller" ctrlmgr "sigs.k8s.io/controller-runtime/pkg/manager" - ctrlsig "sigs.k8s.io/controller-runtime/pkg/manager/signals" "sigs.k8s.io/controller-runtime/pkg/webhook" "github.com/smartxworks/cluster-api-provider-elf/controllers" @@ -190,7 +190,7 @@ func main() { managerOpts.RetryPeriod = &leaderElectionRetryPeriod // Create a function that adds all of the controllers and webhooks to the manager. - addToManager := func(ctx *context.ControllerManagerContext, mgr ctrlmgr.Manager) error { + addToManager := func(ctx goctx.Context, ctrlMgrCtx *context.ControllerManagerContext, mgr ctrlmgr.Manager) error { if os.Getenv("ENABLE_WEBHOOKS") != "false" { if err := (&webhooks.ElfMachineMutation{ Client: mgr.GetClient(), @@ -207,11 +207,11 @@ func main() { } } - if err := controllers.AddClusterControllerToManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: elfClusterConcurrency}); err != nil { + if err := controllers.AddClusterControllerToManager(ctx, ctrlMgrCtx, mgr, controller.Options{MaxConcurrentReconciles: elfClusterConcurrency}); err != nil { return err } - if err := controllers.AddMachineControllerToManager(ctx, mgr, controller.Options{MaxConcurrentReconciles: elfMachineConcurrency}); err != nil { + if err := controllers.AddMachineControllerToManager(ctx, ctrlMgrCtx, mgr, controller.Options{MaxConcurrentReconciles: elfMachineConcurrency}); err != nil { return err } @@ -227,8 +227,10 @@ func main() { managerOpts.WebhookServer = webhook.NewServer(webhookOpts) setupLog.Info("creating controller manager", "capeVersion", version.CAPEVersion(), "version", version.Get().String()) + // Set up the context that's going to be used in controllers and for the manager. + ctx := ctrl.SetupSignalHandler() managerOpts.AddToManager = addToManager - mgr, err := manager.New(managerOpts) + mgr, err := manager.New(ctx, managerOpts) if err != nil { setupLog.Error(err, "problem creating controller manager") os.Exit(1) @@ -236,9 +238,8 @@ func main() { setupChecks(mgr) - sigHandler := ctrlsig.SetupSignalHandler() setupLog.Info("starting controller manager") - if err := mgr.Start(sigHandler); err != nil { + if err := mgr.Start(ctx); err != nil { setupLog.Error(err, "problem running controller manager") os.Exit(1) } diff --git a/pkg/context/cluster_context.go b/pkg/context/cluster_context.go index e7c7c0d4..e9cfc493 100644 --- a/pkg/context/cluster_context.go +++ b/pkg/context/cluster_context.go @@ -17,6 +17,7 @@ limitations under the License. package context import ( + goctx "context" "fmt" "github.com/go-logr/logr" @@ -29,7 +30,6 @@ import ( // ClusterContext is a Go context used with a ElfCluster. type ClusterContext struct { - *ControllerContext Cluster *clusterv1.Cluster ElfCluster *infrav1.ElfCluster PatchHelper *patch.Helper @@ -43,6 +43,6 @@ func (r *ClusterContext) String() string { } // Patch updates the object and its status on the API server. -func (r *ClusterContext) Patch() error { - return r.PatchHelper.Patch(r, r.ElfCluster) +func (r *ClusterContext) Patch(ctx goctx.Context) error { + return r.PatchHelper.Patch(ctx, r.ElfCluster) } diff --git a/pkg/context/controller_context.go b/pkg/context/controller_context.go deleted file mode 100644 index b009cdd0..00000000 --- a/pkg/context/controller_context.go +++ /dev/null @@ -1,39 +0,0 @@ -/* -Copyright 2022. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package context - -import ( - "fmt" - - "github.com/go-logr/logr" -) - -// ControllerContext is the context of a controller. -type ControllerContext struct { - *ControllerManagerContext - - // Name is the name of the controller. - Name string - - // Logger is the controller's logger. - Logger logr.Logger -} - -// String returns ControllerManagerName/ControllerName. -func (r *ControllerContext) String() string { - return fmt.Sprintf("%s/%s", r.ControllerManagerContext.String(), r.Name) -} diff --git a/pkg/context/controller_manager_context.go b/pkg/context/controller_manager_context.go index d2a27d2e..75aa1128 100644 --- a/pkg/context/controller_manager_context.go +++ b/pkg/context/controller_manager_context.go @@ -17,20 +17,13 @@ limitations under the License. package context import ( - "context" - - "github.com/go-logr/logr" "k8s.io/apimachinery/pkg/runtime" "sigs.k8s.io/controller-runtime/pkg/client" ) // ControllerManagerContext is the context of the controller that owns the // controllers. -// -//nolint:containedctx type ControllerManagerContext struct { - context.Context - // Namespace is the namespace in which the resource is located responsible // for running the controller manager. Namespace string @@ -53,9 +46,6 @@ type ControllerManagerContext struct { // Client is the controller manager's client. Client client.Client - // Logger is the controller manager's logger. - Logger logr.Logger - // Scheme is the controller manager's API scheme. Scheme *runtime.Scheme diff --git a/pkg/context/machine_context.go b/pkg/context/machine_context.go index c03d0645..ea9ada3c 100644 --- a/pkg/context/machine_context.go +++ b/pkg/context/machine_context.go @@ -17,9 +17,9 @@ limitations under the License. package context import ( + goctx "context" "fmt" - "github.com/go-logr/logr" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/util/patch" @@ -29,12 +29,10 @@ import ( // MachineContext is a Go context used with an ElfMachine. type MachineContext struct { - *ControllerContext Cluster *clusterv1.Cluster Machine *clusterv1.Machine ElfCluster *infrav1.ElfCluster ElfMachine *infrav1.ElfMachine - Logger logr.Logger PatchHelper *patch.Helper VMService service.VMService } @@ -45,6 +43,6 @@ func (c *MachineContext) String() string { } // Patch updates the object and its status on the API server. -func (c *MachineContext) Patch() error { - return c.PatchHelper.Patch(c, c.ElfMachine) +func (c *MachineContext) Patch(ctx goctx.Context) error { + return c.PatchHelper.Patch(ctx, c.ElfMachine) } diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 260810ba..e4e4942d 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -35,11 +35,11 @@ type Manager interface { ctrl.Manager // GetContext returns the controller manager's context. - GetContext() *context.ControllerManagerContext + GetControllerManagerContext() *context.ControllerManagerContext } // New returns a new CAPE controller manager. -func New(opts Options) (Manager, error) { +func New(ctx goctx.Context, opts Options) (Manager, error) { // Ensure the default options are set. opts.defaults() @@ -57,37 +57,36 @@ func New(opts Options) (Manager, error) { } // Build the controller manager context. - controllerManagerContext := &context.ControllerManagerContext{ - Context: goctx.Background(), + ctrlMgrCtx := &context.ControllerManagerContext{ WatchNamespaces: opts.Cache.Namespaces, Namespace: opts.PodNamespace, Name: opts.PodName, LeaderElectionID: opts.LeaderElectionID, LeaderElectionNamespace: opts.LeaderElectionNamespace, Client: mgr.GetClient(), - Logger: opts.Logger.WithName(opts.PodName), - Scheme: opts.Scheme, - WatchFilterValue: opts.WatchFilterValue, + // Logger: opts.Logger.WithName(opts.PodName), + Scheme: opts.Scheme, + WatchFilterValue: opts.WatchFilterValue, } // Add the requested items to the manager. - if err := opts.AddToManager(controllerManagerContext, mgr); err != nil { + if err := opts.AddToManager(ctx, ctrlMgrCtx, mgr); err != nil { return nil, errors.Wrap(err, "failed to add resources to the manager") } // +kubebuilder:scaffold:builder return &manager{ - Manager: mgr, - ctx: controllerManagerContext, + Manager: mgr, + ctrlMgrCtx: ctrlMgrCtx, }, nil } type manager struct { ctrl.Manager - ctx *context.ControllerManagerContext + ctrlMgrCtx *context.ControllerManagerContext } -func (m *manager) GetContext() *context.ControllerManagerContext { - return m.ctx +func (m *manager) GetControllerManagerContext() *context.ControllerManagerContext { + return m.ctrlMgrCtx } diff --git a/pkg/manager/options.go b/pkg/manager/options.go index d51571fa..e4cec4dd 100644 --- a/pkg/manager/options.go +++ b/pkg/manager/options.go @@ -17,6 +17,8 @@ limitations under the License. package manager import ( + goctx "context" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/rest" "sigs.k8s.io/controller-runtime/pkg/client/config" @@ -29,7 +31,7 @@ import ( // AddToManagerFunc is a function that can be optionally specified with // the manager's Options in order to explicitly decide what controllers and // webhooks to add to the manager. -type AddToManagerFunc func(*context.ControllerManagerContext, ctrlmgr.Manager) error +type AddToManagerFunc func(goctx.Context, *context.ControllerManagerContext, ctrlmgr.Manager) error // Options describes the options used to create a new CAPE manager. type Options struct { diff --git a/pkg/util/machine/kcp_test.go b/pkg/util/machine/kcp_test.go index bd1303f9..f8ce3661 100644 --- a/pkg/util/machine/kcp_test.go +++ b/pkg/util/machine/kcp_test.go @@ -17,6 +17,7 @@ limitations under the License. package machine import ( + goctx "context" "testing" "github.com/onsi/gomega" @@ -26,19 +27,20 @@ import ( func TestGetKCPByMachine(t *testing.T) { g := gomega.NewGomegaWithT(t) + ctx := goctx.TODO() elfCluster, cluster := fake.NewClusterObjects() _, cpMachine := fake.NewMachineObjects(elfCluster, cluster) kubeadmCP := fake.NewKCP() fake.ToControlPlaneMachine(cpMachine, kubeadmCP) - ctx := fake.NewControllerManagerContext(kubeadmCP) + ctrlMgrCtx := fake.NewControllerManagerContext(kubeadmCP) t.Run("should return kcp", func(t *testing.T) { - kcp, err := GetKCPByMachine(ctx, ctx.Client, cpMachine) + kcp, err := GetKCPByMachine(ctx, ctrlMgrCtx.Client, cpMachine) g.Expect(err).ToNot(gomega.HaveOccurred()) g.Expect(kcp.Name).To(gomega.Equal(kubeadmCP.Name)) }) _, workerMachine := fake.NewMachineObjects(elfCluster, cluster) t.Run("should panic when failed to get kcp name", func(t *testing.T) { - g.Expect(func() { _, _ = GetKCPByMachine(ctx, ctx.Client, workerMachine) }).To(gomega.Panic()) + g.Expect(func() { _, _ = GetKCPByMachine(ctx, ctrlMgrCtx.Client, workerMachine) }).To(gomega.Panic()) }) } diff --git a/pkg/util/machine/machine_test.go b/pkg/util/machine/machine_test.go index 4f2c18ad..9fd6b88f 100644 --- a/pkg/util/machine/machine_test.go +++ b/pkg/util/machine/machine_test.go @@ -17,6 +17,7 @@ limitations under the License. package machine import ( + goctx "context" "fmt" "testing" @@ -28,12 +29,13 @@ import ( func TestGetElfMachinesInCluster(t *testing.T) { g := gomega.NewGomegaWithT(t) + ctx := goctx.TODO() elfCluster, cluster := fake.NewClusterObjects() elfMachine, _ := fake.NewMachineObjects(elfCluster, cluster) - ctx := fake.NewControllerManagerContext(elfMachine) + ctrlMgrCtx := fake.NewControllerManagerContext(elfMachine) t.Run("should return ElfMachines", func(t *testing.T) { - elfMachines, err := GetElfMachinesInCluster(ctx, ctx.Client, cluster.Namespace, cluster.Name) + elfMachines, err := GetElfMachinesInCluster(ctx, ctrlMgrCtx.Client, cluster.Namespace, cluster.Name) g.Expect(err).ToNot(gomega.HaveOccurred()) g.Expect(elfMachines).To(gomega.HaveLen(1)) }) @@ -41,14 +43,15 @@ func TestGetElfMachinesInCluster(t *testing.T) { func TestGetControlPlaneElfMachinesInCluster(t *testing.T) { g := gomega.NewGomegaWithT(t) + ctx := goctx.TODO() elfCluster, cluster := fake.NewClusterObjects() elfMachine1, _ := fake.NewMachineObjects(elfCluster, cluster) elfMachine2, _ := fake.NewMachineObjects(elfCluster, cluster) fake.ToControlPlaneMachine(elfMachine1, fake.NewKCP()) - ctx := fake.NewControllerManagerContext(elfMachine1, elfMachine2) + ctrlMgrCtx := fake.NewControllerManagerContext(elfMachine1, elfMachine2) t.Run("should return Control Plane ElfMachines", func(t *testing.T) { - elfMachines, err := GetControlPlaneElfMachinesInCluster(ctx, ctx.Client, cluster.Namespace, cluster.Name) + elfMachines, err := GetControlPlaneElfMachinesInCluster(ctx, ctrlMgrCtx.Client, cluster.Namespace, cluster.Name) g.Expect(err).ToNot(gomega.HaveOccurred()) g.Expect(elfMachines).To(gomega.HaveLen(1)) g.Expect(elfMachines[0].Name).To(gomega.Equal(elfMachine1.Name)) diff --git a/pkg/util/machine/md_test.go b/pkg/util/machine/md_test.go index 81e7fe6c..123195a6 100644 --- a/pkg/util/machine/md_test.go +++ b/pkg/util/machine/md_test.go @@ -17,6 +17,7 @@ limitations under the License. package machine import ( + goctx "context" "testing" "github.com/onsi/gomega" @@ -26,14 +27,15 @@ import ( func TestGetMDByMachine(t *testing.T) { g := gomega.NewGomegaWithT(t) + ctx := goctx.TODO() elfCluster, cluster := fake.NewClusterObjects() _, machine := fake.NewMachineObjects(elfCluster, cluster) machineDeployment := fake.NewMD() fake.ToWorkerMachine(machine, machineDeployment) - ctx := fake.NewControllerManagerContext(machineDeployment) + ctrlMgrCtx := fake.NewControllerManagerContext(machineDeployment) t.Run("should return md", func(t *testing.T) { - md, err := GetMDByMachine(ctx, ctx.Client, machine) + md, err := GetMDByMachine(ctx, ctrlMgrCtx.Client, machine) g.Expect(err).ToNot(gomega.HaveOccurred()) g.Expect(md.Name).To(gomega.Equal(machineDeployment.Name)) }) diff --git a/test/fake/controller_manager_context.go b/test/fake/controller_manager_context.go index 6ae1f692..3b237ca5 100644 --- a/test/fake/controller_manager_context.go +++ b/test/fake/controller_manager_context.go @@ -17,15 +17,12 @@ limitations under the License. package fake import ( - goctx "context" - "k8s.io/apimachinery/pkg/runtime" cgscheme "k8s.io/client-go/kubernetes/scheme" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" - ctrllog "sigs.k8s.io/controller-runtime/pkg/log" infrav1 "github.com/smartxworks/cluster-api-provider-elf/api/v1beta1" "github.com/smartxworks/cluster-api-provider-elf/pkg/context" @@ -64,9 +61,8 @@ func NewControllerManagerContext(initObjects ...client.Object) *context.Controll ).WithObjects(initObjects...).Build() return &context.ControllerManagerContext{ - Context: goctx.Background(), - Client: clientWithObjects, - Logger: ctrllog.Log.WithName(ControllerManagerName), + Client: clientWithObjects, + // Logger: ctrllog.Log.WithName(ControllerManagerName), Scheme: scheme, Namespace: ControllerManagerNamespace, Name: ControllerManagerName, diff --git a/test/fake/types.go b/test/fake/types.go index e43b0e0c..5d825f90 100644 --- a/test/fake/types.go +++ b/test/fake/types.go @@ -17,6 +17,8 @@ limitations under the License. package fake import ( + goctx "context" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" @@ -173,34 +175,34 @@ func NewMD() *clusterv1.MachineDeployment { } } -func InitClusterOwnerReferences(ctrlContext *context.ControllerContext, +func InitClusterOwnerReferences(ctx goctx.Context, ctrlMgrCtx *context.ControllerManagerContext, elfCluster *infrav1.ElfCluster, cluster *clusterv1.Cluster) { By("setting the OwnerRef on the ElfCluster") - ph, err := patch.NewHelper(elfCluster, ctrlContext.Client) + ph, err := patch.NewHelper(elfCluster, ctrlMgrCtx.Client) Expect(err).ShouldNot(HaveOccurred()) elfCluster.OwnerReferences = append(elfCluster.OwnerReferences, metav1.OwnerReference{Kind: ClusterKind, APIVersion: clusterv1.GroupVersion.String(), Name: cluster.Name, UID: "blah"}) - Expect(ph.Patch(ctrlContext, elfCluster, patch.WithStatusObservedGeneration{})).ShouldNot(HaveOccurred()) + Expect(ph.Patch(ctx, elfCluster, patch.WithStatusObservedGeneration{})).ShouldNot(HaveOccurred()) } -func InitMachineOwnerReferences(ctrlContext *context.ControllerContext, +func InitMachineOwnerReferences(ctx goctx.Context, ctrlMgrCtx *context.ControllerManagerContext, elfMachine *infrav1.ElfMachine, machine *clusterv1.Machine) { By("setting the OwnerRef on the ElfMachine") - ph, err := patch.NewHelper(elfMachine, ctrlContext.Client) + ph, err := patch.NewHelper(elfMachine, ctrlMgrCtx.Client) Expect(err).ShouldNot(HaveOccurred()) elfMachine.OwnerReferences = append(elfMachine.OwnerReferences, metav1.OwnerReference{Kind: MachineKind, APIVersion: clusterv1.GroupVersion.String(), Name: machine.Name, UID: "blah"}) - Expect(ph.Patch(ctrlContext, elfMachine, patch.WithStatusObservedGeneration{})).ShouldNot(HaveOccurred()) + Expect(ph.Patch(ctx, elfMachine, patch.WithStatusObservedGeneration{})).ShouldNot(HaveOccurred()) } func InitOwnerReferences( - ctrlContext *context.ControllerContext, + ctx goctx.Context, ctrlMgrCtx *context.ControllerManagerContext, elfCluster *infrav1.ElfCluster, cluster *clusterv1.Cluster, elfMachine *infrav1.ElfMachine, machine *clusterv1.Machine) { if elfCluster != nil { - InitClusterOwnerReferences(ctrlContext, elfCluster, cluster) + InitClusterOwnerReferences(ctx, ctrlMgrCtx, elfCluster, cluster) } if elfMachine != nil { - InitMachineOwnerReferences(ctrlContext, elfMachine, machine) + InitMachineOwnerReferences(ctx, ctrlMgrCtx, elfMachine, machine) } } diff --git a/test/helpers/envtest.go b/test/helpers/envtest.go index cfbd30bd..07b6372a 100644 --- a/test/helpers/envtest.go +++ b/test/helpers/envtest.go @@ -109,7 +109,7 @@ type TestEnvironment struct { } // NewTestEnvironment creates a new environment spinning up a local api-server. -func NewTestEnvironment() *TestEnvironment { +func NewTestEnvironment(ctx goctx.Context) *TestEnvironment { // Create the test environment. env := &envtest.Environment{ ErrorIfCRDPathMissing: true, @@ -130,7 +130,7 @@ func NewTestEnvironment() *TestEnvironment { }, KubeConfig: env.Config, } - managerOpts.AddToManager = func(ctx *context.ControllerManagerContext, mgr ctrlmgr.Manager) error { + managerOpts.AddToManager = func(ctx goctx.Context, ctrlMgrCtx *context.ControllerManagerContext, mgr ctrlmgr.Manager) error { if err := (&webhooks.ElfMachineMutation{ Client: mgr.GetClient(), Logger: mgr.GetLogger().WithName("ElfMachineMutation"), @@ -148,7 +148,7 @@ func NewTestEnvironment() *TestEnvironment { return nil } - mgr, err := manager.New(managerOpts) + mgr, err := manager.New(ctx, managerOpts) if err != nil { klog.Fatalf("failed to create the CAPE controller manager: %v", err) }