Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix etcd check #22

Merged
merged 1 commit into from
Feb 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions pkg/etcd/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,21 @@ func IsEtcdDisruptionAllowed(ctx context.Context, cl client.Client, log logr.Log
}
for _, pod := range podList.Items {
if pod.Spec.NodeName == node.Name {
// Track if the pod has a ready condition with status True or Unknown
// Consider Unknown as being ready to be on the safe side and prevent disruption of nodes which are about to be ready
// Missing ready condition also means the pod is not ready
isPodReady := false
for _, condition := range pod.Status.Conditions {
if condition.Type == corev1.PodReady && condition.Status == corev1.ConditionFalse {
log.Info("Node is already disrupted, thus disrupting it (again) won't violate the etcd quorum", "Node", node.Name, "Guard pod", pod.Name)
return true, nil
if condition.Type == corev1.PodReady && (condition.Status == corev1.ConditionTrue || condition.Status == corev1.ConditionUnknown) {
isPodReady = true
}
}
log.Info("Node is not disrupted, and disrupting it will violate the etcd quorum", "Node", node.Name, "Guard pod", pod.Name)
return false, nil
if isPodReady {
log.Info("Node is not disrupted, and disrupting it will violate the etcd quorum", "Node", node.Name, "Guard pod", pod.Name)
return false, nil
}
log.Info("Node is already disrupted, thus disrupting it (again) won't violate the etcd quorum", "Node", node.Name, "Guard pod", pod.Name)
return true, nil
}
}
// Node is either already disrupted (and guard pod is missing) or it wasn't configured with etcd (to have a guard pod)
Expand Down
9 changes: 8 additions & 1 deletion pkg/etcd/etcd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,20 @@ var _ = Describe("Check if etcd disruption is allowed", func() {
DeferCleanup(fakeClient.Delete, context.Background(), unguardedNode)
Expect(IsEtcdDisruptionAllowed(context.Background(), fakeClient, log, unguardedNode)).To(BeTrue())
})
It("should allow disruption for already disrupted control plane node", func() {
It("should allow disruption for already disrupted control plane node (guard pod ready condition = false)", func() {
podGuard := &corev1.Pod{}
Expect(fakeClient.Get(context.Background(), client.ObjectKeyFromObject(getPodGuard(nodeGuardDown.Name)), podGuard)).To(Succeed())
podGuard.Status.Conditions[0].Status = corev1.ConditionFalse
Expect(fakeClient.Status().Update(context.Background(), podGuard)).To(Succeed())
Expect(IsEtcdDisruptionAllowed(context.Background(), fakeClient, log, nodeGuardDown)).To(BeTrue())
})
It("should allow disruption for already disrupted control plane node (guard pod ready condition missing)", func() {
podGuard := &corev1.Pod{}
Expect(fakeClient.Get(context.Background(), client.ObjectKeyFromObject(getPodGuard(nodeGuardDown.Name)), podGuard)).To(Succeed())
podGuard.Status.Conditions = []corev1.PodCondition{}
Expect(fakeClient.Status().Update(context.Background(), podGuard)).To(Succeed())
Expect(IsEtcdDisruptionAllowed(context.Background(), fakeClient, log, nodeGuardDown)).To(BeTrue())
})
It("should reject disruptions for any non-disrupted control plane node", func() {
for _, node := range controlPlaneNodes {
if node.Name != nodeGuardDown.Name {
Expand Down
Loading