Skip to content

Commit

Permalink
add more logging
Browse files Browse the repository at this point in the history
  • Loading branch information
bschimke95 committed Sep 17, 2024
1 parent d52cbae commit 1c467da
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 1 deletion.
103 changes: 103 additions & 0 deletions c1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
apiVersion: cluster.x-k8s.io/v1beta1
kind: Cluster
metadata:
name: c1
namespace: default
spec:
clusterNetwork:
pods:
cidrBlocks:
- 10.1.0.0/16
serviceDomain: cluster.local
services:
cidrBlocks:
- 10.152.0.0/16
controlPlaneRef:
apiVersion: controlplane.cluster.x-k8s.io/v1beta2
kind: CK8sControlPlane
name: c1-control-plane
infrastructureRef:
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
kind: DockerCluster
name: c1
---
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
kind: DockerCluster
metadata:
name: c1
namespace: default
spec: {}
---
apiVersion: controlplane.cluster.x-k8s.io/v1beta2
kind: CK8sControlPlane
metadata:
name: c1-control-plane
namespace: default
spec:
machineTemplate:
infrastructureTemplate:
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
kind: DockerMachineTemplate
name: c1-control-plane
replicas: 3
spec:
airGapped: true
controlPlane:
extraKubeAPIServerArgs:
--anonymous-auth: "true"
version: v1.29.6
---
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
kind: DockerMachineTemplate
metadata:
name: c1-control-plane
namespace: default
spec:
template:
spec:
customImage: k8s-snap:dev-old
---
apiVersion: cluster.x-k8s.io/v1beta1
kind: MachineDeployment
metadata:
name: c1-worker-md-0
namespace: default
spec:
clusterName: c1
replicas: 0
selector:
matchLabels:
cluster.x-k8s.io/cluster-name: c1
template:
spec:
bootstrap:
configRef:
apiVersion: bootstrap.cluster.x-k8s.io/v1beta2
kind: CK8sConfigTemplate
name: c1-md-0
clusterName: c1
infrastructureRef:
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
kind: DockerMachineTemplate
name: c1-md-0
version: v1.29.6
---
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
kind: DockerMachineTemplate
metadata:
name: c1-md-0
namespace: default
spec:
template:
spec:
customImage: k8s-snap:dev-old
---
apiVersion: bootstrap.cluster.x-k8s.io/v1beta2
kind: CK8sConfigTemplate
metadata:
name: c1-md-0
namespace: default
spec:
template:
spec:
airGapped: true
16 changes: 15 additions & 1 deletion controlplane/controllers/scale.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,14 +103,17 @@ func (r *CK8sControlPlaneReconciler) scaleDownControlPlane(
logger := ctrl.LoggerFrom(ctx)

// Pick the Machine that we should scale down.
logger.Info("Selecting control plane Machine to delete")
machineToDelete, err := selectMachineForScaleDown(ctx, controlPlane, outdatedMachines)
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to select machine for scale down: %w", err)
}

// Run preflight checks ensuring the control plane is stable before proceeding with a scale up/scale down operation; if not, wait.
// Given that we're scaling down, we can exclude the machineToDelete from the preflight checks.
logger.Info("Running preflight checks before scaling down control plane")
if result, err := r.preflightChecks(ctx, controlPlane, machineToDelete); err != nil || !result.IsZero() {
logger.Info("Preflight checks failed, requeueing")
return result, err
}

Expand Down Expand Up @@ -162,23 +165,27 @@ func (r *CK8sControlPlaneReconciler) scaleDownControlPlane(
microclusterPort := controlPlane.KCP.Spec.CK8sConfigSpec.ControlPlaneConfig.GetMicroclusterPort()
clusterObjectKey := util.ObjectKey(cluster)
workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, clusterObjectKey, microclusterPort)
logger.Info("Get workload cluster", "cluster", clusterObjectKey)
if err != nil {
logger.Error(err, "failed to create client to workload cluster")
return ctrl.Result{}, errors.Wrapf(err, "failed to create client to workload cluster")
}

logger.Info("Removing machine from microcluster")
if err := workloadCluster.RemoveMachineFromCluster(ctx, machineToDelete); err != nil {
logger.Error(err, "failed to remove machine from microcluster")
}

logger = logger.WithValues("machine", machineToDelete)
logger.Info("Machine removed from microcluster")
if err := r.Client.Delete(ctx, machineToDelete); err != nil && !apierrors.IsNotFound(err) {
logger.Error(err, "Failed to delete control plane machine")
r.recorder.Eventf(kcp, corev1.EventTypeWarning, "FailedScaleDown",
"Failed to delete control plane Machine %s for cluster %s/%s control plane: %v", machineToDelete.Name, cluster.Namespace, cluster.Name, err)
return ctrl.Result{}, err
}

logger.Info("Control plane Machine deleted")
// Requeue the control plane, in case there are additional operations to perform
return ctrl.Result{Requeue: true}, nil
}
Expand All @@ -201,17 +208,20 @@ func (r *CK8sControlPlaneReconciler) preflightChecks(_ context.Context, controlP
}

// If there are deleting machines, wait for the operation to complete.
logger.Info("Checking for deleting machines")
if controlPlane.HasDeletingMachine() {
logger.Info("Waiting for machines to be deleted", "Machines", strings.Join(controlPlane.Machines.Filter(collections.HasDeletionTimestamp).Names(), ", "))
return ctrl.Result{RequeueAfter: deleteRequeueAfter}, nil
}

// Check machine health conditions; if there are conditions with False or Unknown, then wait.
allMachineHealthConditions := []clusterv1.ConditionType{controlplanev1.MachineAgentHealthyCondition}
logger.Info("Checking control plane machines health conditions")
if controlPlane.IsEtcdManaged() {
allMachineHealthConditions = append(allMachineHealthConditions,
controlplanev1.MachineEtcdMemberHealthyCondition,
)
logger.WithValues(allMachineHealthConditions).Info("Control plane is managing etcd, checking etcd member health condition")
}

machineErrors := []error{}
Expand All @@ -220,15 +230,18 @@ loopmachines:
for _, machine := range controlPlane.Machines {
for _, excluded := range excludeFor {
// If this machine should be excluded from the individual
// health check, continue the out loop.
// health check, continue the outer loop.
if machine.Name == excluded.Name {
logger.Info("Excluding machine from health check", "machine", machine.Name)
continue loopmachines
}
}

for _, condition := range allMachineHealthConditions {
logger.Info("Checking machine condition", "machine", machine.Name, "condition", condition)
if err := preflightCheckCondition("machine", machine, condition); err != nil {
machineErrors = append(machineErrors, err)
logger.Error(err, "Machine failed health check", "machine", machine.Name, "condition", condition)
}
}
}
Expand All @@ -242,6 +255,7 @@ loopmachines:
return ctrl.Result{RequeueAfter: preflightFailedRequeueAfter}, nil
}

logger.Info("All machines passed health checks")
return ctrl.Result{}, nil
}

Expand Down

0 comments on commit 1c467da

Please sign in to comment.