From fdc6dcc2126fdc7ded1c5c46f13e5a886fa879d2 Mon Sep 17 00:00:00 2001 From: David Nix Date: Fri, 7 Jul 2023 08:41:32 -0600 Subject: [PATCH] refactor(SelfHeal): Improve height drift mitigation logging --- controllers/selfhealing_controller.go | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/controllers/selfhealing_controller.go b/controllers/selfhealing_controller.go index 5404dc6e..15925c2a 100644 --- a/controllers/selfhealing_controller.go +++ b/controllers/selfhealing_controller.go @@ -19,6 +19,7 @@ package controllers import ( "context" "errors" + "fmt" "net/http" "time" @@ -118,11 +119,8 @@ func (r *SelfHealingReconciler) mitigateHeightDrift(ctx context.Context, reporte return } - const msg = "Height drift mitigation deleted pod" pods := r.driftDetector.LaggingPods(ctx, crd) - if len(pods) > 0 { - reporter.RecordInfo("HeightDriftMitigation", msg) - } + var deleted int for _, pod := range pods { // CosmosFullNodeController will detect missing pod and re-create it. if err := r.Delete(ctx, pod); kube.IgnoreNotFound(err) != nil { @@ -130,7 +128,12 @@ func (r *SelfHealingReconciler) mitigateHeightDrift(ctx context.Context, reporte reporter.RecordError("HeightDriftMitigationDeletePod", err) continue } - reporter.Info(msg, "pod", pod) + reporter.Info("Deleted pod for meeting height drift threshold", "pod", pod) + deleted++ + } + if deleted > 0 { + msg := fmt.Sprintf("Height lagged behind by %d or more blocks; deleted pod(s)", crd.Spec.SelfHeal.HeightDriftMitigation.Threshold) + reporter.RecordInfo("HeightDriftMitigation", msg) } }