Skip to content

Commit

Permalink
Fix etcd check
Browse files Browse the repository at this point in the history
Consider nodes as disrupted when their guard pod has no ready condition
  • Loading branch information
slintes committed Feb 8, 2024
1 parent 35bee23 commit 683de39
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 6 deletions.
17 changes: 12 additions & 5 deletions pkg/etcd/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,21 @@ func IsEtcdDisruptionAllowed(ctx context.Context, cl client.Client, log logr.Log
}
for _, pod := range podList.Items {
if pod.Spec.NodeName == node.Name {
// Track if the pod has a ready condition with status True or Unknown
// Consider Unknown as being ready to be on the safe side and prevent disruption of nodes which are about to be ready
// Missing ready condition also means the pod is not ready
isPodReady := false
for _, condition := range pod.Status.Conditions {
if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionFalse {
log.Info("Node is already disrupted, thus disrupting it (again) won't violate the etcd quorum", "Node", node.Name, "Guard pod", pod.Name)
return true, nil
if condition.Type == corev1.PodReady && (condition.Status == corev1.ConditionTrue || condition.Status == corev1.ConditionUnknown) {
isPodReady = true
}
}
log.Info("Node is not disrupted, and disrupting it will violate the etcd quorum", "Node", node.Name, "Guard pod", pod.Name)
return false, nil
if isPodReady {
log.Info("Node is not disrupted, and disrupting it will violate the etcd quorum", "Node", node.Name, "Guard pod", pod.Name)
return false, nil
}
log.Info("Node is already disrupted, thus disrupting it (again) won't violate the etcd quorum", "Node", node.Name, "Guard pod", pod.Name)
return true, nil
}
}
// Node is either already disrupted (and guard pod is missing) or it wasn't configured with etcd (to have a guard pod)
Expand Down
9 changes: 8 additions & 1 deletion pkg/etcd/etcd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,20 @@ var _ = Describe("Check if etcd disruption is allowed", func() {
DeferCleanup(fakeClient.Delete, context.Background(), unguardedNode)
Expect(IsEtcdDisruptionAllowed(context.Background(), fakeClient, log, unguardedNode)).To(BeTrue())
})
It("should allow disruption for already disrupted control plane node", func() {
It("should allow disruption for already disrupted control plane node (guard pod ready condition = false)", func() {
podGuard := &corev1.Pod{}
Expect(fakeClient.Get(context.Background(), client.ObjectKeyFromObject(getPodGuard(nodeGuardDown.Name)), podGuard)).To(Succeed())
podGuard.Status.Conditions[0].Status = corev1.ConditionFalse
Expect(fakeClient.Status().Update(context.Background(), podGuard)).To(Succeed())
Expect(IsEtcdDisruptionAllowed(context.Background(), fakeClient, log, nodeGuardDown)).To(BeTrue())
})
It("should allow disruption for already disrupted control plane node (guard pod ready condition missing)", func() {
podGuard := &corev1.Pod{}
Expect(fakeClient.Get(context.Background(), client.ObjectKeyFromObject(getPodGuard(nodeGuardDown.Name)), podGuard)).To(Succeed())
podGuard.Status.Conditions = []corev1.PodCondition{}
Expect(fakeClient.Status().Update(context.Background(), podGuard)).To(Succeed())
Expect(IsEtcdDisruptionAllowed(context.Background(), fakeClient, log, nodeGuardDown)).To(BeTrue())
})
It("should reject disruptions for any non-disrupted control plane node", func() {
for _, node := range controlPlaneNodes {
if node.Name != nodeGuardDown.Name {
Expand Down

0 comments on commit 683de39

Please sign in to comment.