From 1c467dabb03bff6f061fe8271aeadf739e776d1d Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Tue, 17 Sep 2024 15:30:41 +0200 Subject: [PATCH] add more logging --- c1.yaml | 103 ++++++++++++++++++++++++++++++ controlplane/controllers/scale.go | 16 ++++- 2 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 c1.yaml diff --git a/c1.yaml b/c1.yaml new file mode 100644 index 00000000..e2560fc3 --- /dev/null +++ b/c1.yaml @@ -0,0 +1,103 @@ +apiVersion: cluster.x-k8s.io/v1beta1 +kind: Cluster +metadata: + name: c1 + namespace: default +spec: + clusterNetwork: + pods: + cidrBlocks: + - 10.1.0.0/16 + serviceDomain: cluster.local + services: + cidrBlocks: + - 10.152.0.0/16 + controlPlaneRef: + apiVersion: controlplane.cluster.x-k8s.io/v1beta2 + kind: CK8sControlPlane + name: c1-control-plane + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: DockerCluster + name: c1 +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerCluster +metadata: + name: c1 + namespace: default +spec: {} +--- +apiVersion: controlplane.cluster.x-k8s.io/v1beta2 +kind: CK8sControlPlane +metadata: + name: c1-control-plane + namespace: default +spec: + machineTemplate: + infrastructureTemplate: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: DockerMachineTemplate + name: c1-control-plane + replicas: 3 + spec: + airGapped: true + controlPlane: + extraKubeAPIServerArgs: + --anonymous-auth: "true" + version: v1.29.6 +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerMachineTemplate +metadata: + name: c1-control-plane + namespace: default +spec: + template: + spec: + customImage: k8s-snap:dev-old +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineDeployment +metadata: + name: c1-worker-md-0 + namespace: default +spec: + clusterName: c1 + replicas: 0 + selector: + matchLabels: + cluster.x-k8s.io/cluster-name: c1 + template: + spec: + bootstrap: + configRef: + apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 + kind: CK8sConfigTemplate + name: c1-md-0 + clusterName: c1 + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: DockerMachineTemplate + name: c1-md-0 + version: v1.29.6 +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerMachineTemplate +metadata: + name: c1-md-0 + namespace: default +spec: + template: + spec: + customImage: k8s-snap:dev-old +--- +apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 +kind: CK8sConfigTemplate +metadata: + name: c1-md-0 + namespace: default +spec: + template: + spec: + airGapped: true diff --git a/controlplane/controllers/scale.go b/controlplane/controllers/scale.go index de0f3777..6639a2b1 100644 --- a/controlplane/controllers/scale.go +++ b/controlplane/controllers/scale.go @@ -103,6 +103,7 @@ func (r *CK8sControlPlaneReconciler) scaleDownControlPlane( logger := ctrl.LoggerFrom(ctx) // Pick the Machine that we should scale down. + logger.Info("Selecting control plane Machine to delete") machineToDelete, err := selectMachineForScaleDown(ctx, controlPlane, outdatedMachines) if err != nil { return ctrl.Result{}, fmt.Errorf("failed to select machine for scale down: %w", err) @@ -110,7 +111,9 @@ func (r *CK8sControlPlaneReconciler) scaleDownControlPlane( // Run preflight checks ensuring the control plane is stable before proceeding with a scale up/scale down operation; if not, wait. // Given that we're scaling down, we can exclude the machineToDelete from the preflight checks. + logger.Info("Running preflight checks before scaling down control plane") if result, err := r.preflightChecks(ctx, controlPlane, machineToDelete); err != nil || !result.IsZero() { + logger.Info("Preflight checks failed, requeueing") return result, err } @@ -162,16 +165,19 @@ func (r *CK8sControlPlaneReconciler) scaleDownControlPlane( microclusterPort := controlPlane.KCP.Spec.CK8sConfigSpec.ControlPlaneConfig.GetMicroclusterPort() clusterObjectKey := util.ObjectKey(cluster) workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, clusterObjectKey, microclusterPort) + logger.Info("Get workload cluster", "cluster", clusterObjectKey) if err != nil { logger.Error(err, "failed to create client to workload cluster") return ctrl.Result{}, errors.Wrapf(err, "failed to create client to workload cluster") } + logger.Info("Removing machine from microcluster") if err := workloadCluster.RemoveMachineFromCluster(ctx, machineToDelete); err != nil { logger.Error(err, "failed to remove machine from microcluster") } logger = logger.WithValues("machine", machineToDelete) + logger.Info("Machine removed from microcluster") if err := r.Client.Delete(ctx, machineToDelete); err != nil && !apierrors.IsNotFound(err) { logger.Error(err, "Failed to delete control plane machine") r.recorder.Eventf(kcp, corev1.EventTypeWarning, "FailedScaleDown", @@ -179,6 +185,7 @@ func (r *CK8sControlPlaneReconciler) scaleDownControlPlane( return ctrl.Result{}, err } + logger.Info("Control plane Machine deleted") // Requeue the control plane, in case there are additional operations to perform return ctrl.Result{Requeue: true}, nil } @@ -201,6 +208,7 @@ func (r *CK8sControlPlaneReconciler) preflightChecks(_ context.Context, controlP } // If there are deleting machines, wait for the operation to complete. + logger.Info("Checking for deleting machines") if controlPlane.HasDeletingMachine() { logger.Info("Waiting for machines to be deleted", "Machines", strings.Join(controlPlane.Machines.Filter(collections.HasDeletionTimestamp).Names(), ", ")) return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil @@ -208,10 +216,12 @@ func (r *CK8sControlPlaneReconciler) preflightChecks(_ context.Context, controlP // Check machine health conditions; if there are conditions with False or Unknown, then wait. allMachineHealthConditions := []clusterv1.ConditionType{controlplanev1.MachineAgentHealthyCondition} + logger.Info("Checking control plane machines health conditions") if controlPlane.IsEtcdManaged() { allMachineHealthConditions = append(allMachineHealthConditions, controlplanev1.MachineEtcdMemberHealthyCondition, ) + logger.WithValues(allMachineHealthConditions).Info("Control plane is managing etcd, checking etcd member health condition") } machineErrors := []error{} @@ -220,15 +230,18 @@ loopmachines: for _, machine := range controlPlane.Machines { for _, excluded := range excludeFor { // If this machine should be excluded from the individual - // health check, continue the out loop. + // health check, continue the outer loop. if machine.Name == excluded.Name { + logger.Info("Excluding machine from health check", "machine", machine.Name) continue loopmachines } } for _, condition := range allMachineHealthConditions { + logger.Info("Checking machine condition", "machine", machine.Name, "condition", condition) if err := preflightCheckCondition("machine", machine, condition); err != nil { machineErrors = append(machineErrors, err) + logger.Error(err, "Machine failed health check", "machine", machine.Name, "condition", condition) } } } @@ -242,6 +255,7 @@ loopmachines: return ctrl.Result{RequeueAfter: preflightFailedRequeueAfter}, nil } + logger.Info("All machines passed health checks") return ctrl.Result{}, nil }