preempt pod readiness gate changes by clearing check point

kubernetes-sigs · Oct 8, 2024 · 8fda896 · 8fda896
1 parent 01fac95
commit 8fda896
Showing 1 changed file with 31 additions and 14 deletions.
diff --git a/pkg/targetgroupbinding/resource_manager.go b/pkg/targetgroupbinding/resource_manager.go
@@ -173,7 +173,18 @@ func (m *defaultResourceManager) reconcileWithIPTargetType(ctx context.Context,
 		needNetworkingRequeue = true
 	}
 
-	if len(unmatchedEndpoints) > 0 || len(unmatchedTargets) > 0 || needNetworkingRequeue {
+	preflightNeedFurtherProbe := false
+	for _, endpointAndTarget := range matchedEndpointAndTargets {
+		_, _, _, localPreflight := m.calculateReadinessGateTransition(endpointAndTarget.endpoint.Pod, targetHealthCondType, endpointAndTarget.target.TargetHealth)
+		if localPreflight {
+			preflightNeedFurtherProbe = true
+			break
+		}
+	}
+
+	// Any change that we perform should reset the checkpoint.
+	// TODO - How to make this cleaner?
+	if len(unmatchedEndpoints) > 0 || len(unmatchedTargets) > 0 || needNetworkingRequeue || containsPotentialReadyEndpoints || preflightNeedFurtherProbe {
 		// Set to an empty checkpoint, to ensure that no matter what we try to reconcile atleast one more time.
 		// Consider this ordering of events (without using this method of overriding the checkpoint)
 		// 1. Register some pod IP, don't update TGB checkpoint.
@@ -353,19 +364,7 @@ func (m *defaultResourceManager) updateTargetHealthPodConditionForPod(ctx contex
 		return false, nil
 	}
 
-	targetHealthCondStatus := corev1.ConditionUnknown
-	var reason, message string
-	if targetHealth != nil {
-		if string(targetHealth.State) == string(elbv2types.TargetHealthStateEnumHealthy) {
-			targetHealthCondStatus = corev1.ConditionTrue
-		} else {
-			targetHealthCondStatus = corev1.ConditionFalse
-		}
-
-		reason = string(targetHealth.Reason)
-		message = awssdk.ToString(targetHealth.Description)
-	}
-	needFurtherProbe := targetHealthCondStatus != corev1.ConditionTrue
+	reason, message, targetHealthCondStatus, needFurtherProbe := m.calculateReadinessGateTransition(pod, targetHealthCondType, targetHealth)
 
 	existingTargetHealthCond, hasExistingTargetHealthCond := pod.GetPodCondition(targetHealthCondType)
 	// we skip patch pod if it matches current computed status/reason/message.
@@ -415,6 +414,24 @@ func (m *defaultResourceManager) updateTargetHealthPodConditionForPod(ctx contex
 	return needFurtherProbe, nil
 }
 
+func (m *defaultResourceManager) calculateReadinessGateTransition(pod k8s.PodInfo, targetHealthCondType corev1.PodConditionType, targetHealth *elbv2types.TargetHealth) (string, string, corev1.ConditionStatus, bool) {
+	var reason, message string
+	if !pod.HasAnyOfReadinessGates([]corev1.PodConditionType{targetHealthCondType}) {
+		return reason, message, corev1.ConditionTrue, false
+	}
+	targetHealthCondStatus := corev1.ConditionUnknown
+	if targetHealth != nil {
+		if string(targetHealth.State) == string(elbv2types.TargetHealthStateEnumHealthy) {
+			targetHealthCondStatus = corev1.ConditionTrue
+		} else {
+			targetHealthCondStatus = corev1.ConditionFalse
+		}
+		reason = string(targetHealth.Reason)
+		message = awssdk.ToString(targetHealth.Description)
+	}
+	return reason, message, targetHealthCondStatus, targetHealthCondStatus != corev1.ConditionTrue
+}
+
 // updatePodAsHealthyForDeletedTGB updates pod's targetHealth condition as healthy when deleting a TGB
 // if the pod has readiness Gate.
 func (m *defaultResourceManager) updatePodAsHealthyForDeletedTGB(ctx context.Context, tgb *elbv2api.TargetGroupBinding) error {