Skip to content

Commit

Permalink
preempt pod readiness gate changes by clearing check point
Browse files Browse the repository at this point in the history
  • Loading branch information
zac-nixon committed Oct 8, 2024
1 parent 01fac95 commit 8fda896
Showing 1 changed file with 31 additions and 14 deletions.
45 changes: 31 additions & 14 deletions pkg/targetgroupbinding/resource_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,18 @@ func (m *defaultResourceManager) reconcileWithIPTargetType(ctx context.Context,
needNetworkingRequeue = true
}

if len(unmatchedEndpoints) > 0 || len(unmatchedTargets) > 0 || needNetworkingRequeue {
preflightNeedFurtherProbe := false
for _, endpointAndTarget := range matchedEndpointAndTargets {
_, _, _, localPreflight := m.calculateReadinessGateTransition(endpointAndTarget.endpoint.Pod, targetHealthCondType, endpointAndTarget.target.TargetHealth)
if localPreflight {
preflightNeedFurtherProbe = true
break
}
}

// Any change that we perform should reset the checkpoint.
// TODO - How to make this cleaner?
if len(unmatchedEndpoints) > 0 || len(unmatchedTargets) > 0 || needNetworkingRequeue || containsPotentialReadyEndpoints || preflightNeedFurtherProbe {
// Set to an empty checkpoint, to ensure that no matter what we try to reconcile atleast one more time.
// Consider this ordering of events (without using this method of overriding the checkpoint)
// 1. Register some pod IP, don't update TGB checkpoint.
Expand Down Expand Up @@ -353,19 +364,7 @@ func (m *defaultResourceManager) updateTargetHealthPodConditionForPod(ctx contex
return false, nil
}

targetHealthCondStatus := corev1.ConditionUnknown
var reason, message string
if targetHealth != nil {
if string(targetHealth.State) == string(elbv2types.TargetHealthStateEnumHealthy) {
targetHealthCondStatus = corev1.ConditionTrue
} else {
targetHealthCondStatus = corev1.ConditionFalse
}

reason = string(targetHealth.Reason)
message = awssdk.ToString(targetHealth.Description)
}
needFurtherProbe := targetHealthCondStatus != corev1.ConditionTrue
reason, message, targetHealthCondStatus, needFurtherProbe := m.calculateReadinessGateTransition(pod, targetHealthCondType, targetHealth)

existingTargetHealthCond, hasExistingTargetHealthCond := pod.GetPodCondition(targetHealthCondType)
// we skip patch pod if it matches current computed status/reason/message.
Expand Down Expand Up @@ -415,6 +414,24 @@ func (m *defaultResourceManager) updateTargetHealthPodConditionForPod(ctx contex
return needFurtherProbe, nil
}

func (m *defaultResourceManager) calculateReadinessGateTransition(pod k8s.PodInfo, targetHealthCondType corev1.PodConditionType, targetHealth *elbv2types.TargetHealth) (string, string, corev1.ConditionStatus, bool) {
var reason, message string
if !pod.HasAnyOfReadinessGates([]corev1.PodConditionType{targetHealthCondType}) {
return reason, message, corev1.ConditionTrue, false
}
targetHealthCondStatus := corev1.ConditionUnknown
if targetHealth != nil {
if string(targetHealth.State) == string(elbv2types.TargetHealthStateEnumHealthy) {
targetHealthCondStatus = corev1.ConditionTrue
} else {
targetHealthCondStatus = corev1.ConditionFalse
}
reason = string(targetHealth.Reason)
message = awssdk.ToString(targetHealth.Description)
}
return reason, message, targetHealthCondStatus, targetHealthCondStatus != corev1.ConditionTrue
}

// updatePodAsHealthyForDeletedTGB updates pod's targetHealth condition as healthy when deleting a TGB
// if the pod has readiness Gate.
func (m *defaultResourceManager) updatePodAsHealthyForDeletedTGB(ctx context.Context, tgb *elbv2api.TargetGroupBinding) error {
Expand Down

0 comments on commit 8fda896

Please sign in to comment.