From 0de216d036509d6d29be9e6105dc5fb7f9928d32 Mon Sep 17 00:00:00 2001 From: David Cheung Date: Wed, 24 Jul 2024 18:00:55 +0000 Subject: [PATCH] Remove deleted NEG reference during NEG GC. * Once a NEG has been deleted, remove its referece from NEG CR. This prevents NEG CR retaining reference of non-existing NEGs. --- pkg/neg/manager.go | 54 ++++++++++++++++++-- pkg/neg/manager_test.go | 76 ++++++++++++++++++++++------- pkg/neg/syncers/transaction.go | 12 +++-- pkg/neg/syncers/transaction_test.go | 48 +++++++++++------- pkg/neg/syncers/utils.go | 8 +-- pkg/utils/negdescription.go | 22 ++++++--- 6 files changed, 165 insertions(+), 55 deletions(-) diff --git a/pkg/neg/manager.go b/pkg/neg/manager.go index eaf9dbd560..672e7afbd2 100644 --- a/pkg/neg/manager.go +++ b/pkg/neg/manager.go @@ -616,6 +616,9 @@ func (manager *syncerManager) processNEGDeletionCandidate(svcNegCR *negv1beta1.S var errList []error shouldDeleteNegCR := true deleteByZone := len(svcNegCR.Status.NetworkEndpointGroups) == 0 + // Change this to a map from NEG name to sets once we allow multiple NEGs + // in a specific zone(multi-subnet cluster). + deletedNegs := make(map[negtypes.NegInfo]struct{}) for _, negRef := range svcNegCR.Status.NetworkEndpointGroups { resourceID, err := cloud.ParseResourceURL(negRef.SelfLink) @@ -624,14 +627,34 @@ func (manager *syncerManager) processNEGDeletionCandidate(svcNegCR *negv1beta1.S deleteByZone = true continue } - - shouldDeleteNegCR = shouldDeleteNegCR && manager.deleteNegOrReportErr(resourceID.Key.Name, resourceID.Key.Zone, svcNegCR, &errList) + negDeleted := manager.deleteNegOrReportErr(resourceID.Key.Name, resourceID.Key.Zone, svcNegCR, &errList) + if negDeleted { + deletedNegs[negtypes.NegInfo{Name: resourceID.Key.Name, Zone: resourceID.Key.Zone}] = struct{}{} + } + shouldDeleteNegCR = shouldDeleteNegCR && negDeleted } if deleteByZone { manager.logger.V(2).Info("Deletion candidate has 0 NEG reference", "svcneg", klog.KObj(svcNegCR), "svcNegCR", svcNegCR) for _, zone := range zones { - shouldDeleteNegCR = shouldDeleteNegCR && manager.deleteNegOrReportErr(svcNegCR.Name, zone, svcNegCR, &errList) + negDeleted := manager.deleteNegOrReportErr(svcNegCR.Name, zone, svcNegCR, &errList) + if negDeleted { + deletedNegs[negtypes.NegInfo{Name: svcNegCR.Name, Zone: zone}] = struct{}{} + } + shouldDeleteNegCR = shouldDeleteNegCR && negDeleted + } + } + // Since no more NEG deletion will be happening at this point, and NEG + // CR will not be deleted, clear the reference for deleted NEGs in the + // NEG CR. + if len(deletedNegs) != 0 { + updatedCR := svcNegCR.DeepCopy() + + if errs := ensureExistingNegRef(updatedCR, deletedNegs); len(errs) != 0 { + errList = append(errList, errs...) + } + if _, err := patchNegStatus(manager.svcNegClient, *svcNegCR, *updatedCR); err != nil { + errList = append(errList, err) } } @@ -655,9 +678,13 @@ func (manager *syncerManager) processNEGDeletionCandidate(svcNegCR *negv1beta1.S } manager.logger.V(2).Info("Deleting NEG CR", "svcneg", klog.KObj(svcNegCR)) - if err := deleteSvcNegCR(manager.svcNegClient, svcNegCR, manager.logger); err != nil { + err := deleteSvcNegCR(manager.svcNegClient, svcNegCR, manager.logger) + if err != nil { + manager.logger.V(2).Error(err, "Error when deleting NEG CR", "svcneg", klog.KObj(svcNegCR)) errList = append(errList, err) + return } + manager.logger.V(2).Info("Deleted NEG CR", "svcneg", klog.KObj(svcNegCR)) }() return errList @@ -685,6 +712,25 @@ func (manager *syncerManager) deleteNegOrReportErr(name, zone string, svcNegCR * return true } +// ensureExistingNegRef removes NEG refs in NEG CR for NEGs that have been +// deleted successfully. +func ensureExistingNegRef(neg *negv1beta1.ServiceNetworkEndpointGroup, deletedNegs map[negtypes.NegInfo]struct{}) []error { + var updatedNegRef []negv1beta1.NegObjectReference + var errList []error + for _, negRef := range neg.Status.NetworkEndpointGroups { + negInfo, err := negtypes.NegInfoFromNegRef(negRef) + if err != nil { + errList = append(errList, err) + continue + } + if _, exists := deletedNegs[negInfo]; !exists { + updatedNegRef = append(updatedNegRef, negRef) + } + } + neg.Status.NetworkEndpointGroups = updatedNegRef + return errList +} + // ensureDeleteNetworkEndpointGroup ensures neg is delete from zone func (manager *syncerManager) ensureDeleteNetworkEndpointGroup(name, zone string, expectedDesc *utils.NegDescription) error { neg, err := manager.cloud.GetNetworkEndpointGroup(name, zone, meta.VersionGA, manager.logger) diff --git a/pkg/neg/manager_test.go b/pkg/neg/manager_test.go index 18816d7e20..2a9c149a37 100644 --- a/pkg/neg/manager_test.go +++ b/pkg/neg/manager_test.go @@ -35,11 +35,13 @@ import ( apiv1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" apitypes "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/fake" + k8stesting "k8s.io/client-go/testing" "k8s.io/client-go/tools/record" negv1beta1 "k8s.io/ingress-gce/pkg/apis/svcneg/v1beta1" "k8s.io/ingress-gce/pkg/neg/metrics/metricscollector" @@ -47,6 +49,7 @@ import ( "k8s.io/ingress-gce/pkg/neg/types" negtypes "k8s.io/ingress-gce/pkg/neg/types" svcnegclient "k8s.io/ingress-gce/pkg/svcneg/client/clientset/versioned" + negfake "k8s.io/ingress-gce/pkg/svcneg/client/clientset/versioned/fake" "k8s.io/ingress-gce/pkg/utils/common" namer_util "k8s.io/ingress-gce/pkg/utils/namer" "k8s.io/ingress-gce/pkg/utils/zonegetter" @@ -1189,7 +1192,10 @@ func TestGarbageCollectionNegCrdEnabled(t *testing.T) { expectNegGC bool expectCrGC bool expectErr bool - gcError error + negGCError error + negGCErrorZone []string + negCrGCError error + expectedNegCount int // expectGenNamedNegGC indicates that the Neg GC only occurs if using a generated name // expectNegGC will take precedence over this value @@ -1250,6 +1256,7 @@ func TestGarbageCollectionNegCrdEnabled(t *testing.T) { expectNegGC: false, expectCrGC: true, negDesc: wrongDesc.String(), + expectedNegCount: 2, }, {desc: "neg config not in svcPortMap, empty neg list, neg has empty description", negsExist: true, @@ -1258,6 +1265,7 @@ func TestGarbageCollectionNegCrdEnabled(t *testing.T) { expectGenNamedNegGC: true, expectCrGC: true, negDesc: "", + expectedNegCount: 2, }, {desc: "neg config in svcPortMap, marked for deletion", negsExist: true, @@ -1265,6 +1273,7 @@ func TestGarbageCollectionNegCrdEnabled(t *testing.T) { desiredConfig: true, expectNegGC: false, expectCrGC: false, + expectedNegCount: 2, }, {desc: "neg config in svcPortMap", negsExist: true, @@ -1272,6 +1281,7 @@ func TestGarbageCollectionNegCrdEnabled(t *testing.T) { desiredConfig: true, expectNegGC: false, expectCrGC: false, + expectedNegCount: 2, }, {desc: "negs don't exist, config not in svcPortMap", negsExist: false, @@ -1288,16 +1298,38 @@ func TestGarbageCollectionNegCrdEnabled(t *testing.T) { markedForDeletion: true, expectCrGC: true, expectErr: false, - gcError: &googleapi.Error{Code: http.StatusBadRequest}, + negGCError: &googleapi.Error{Code: http.StatusBadRequest}, + negGCErrorZone: []string{negtypes.TestZone1, negtypes.TestZone2}, negDesc: matchingDesc.String(), }, - {desc: "error during neg gc, config not in svcPortMap", + {desc: "error on all NEG deletions during neg gc, config not in svcPortMap, NEG CR should still have all NEG ref", negsExist: true, markedForDeletion: true, expectCrGC: true, expectErr: true, - gcError: fmt.Errorf("gc-error"), + negGCError: fmt.Errorf("neg-gc-error"), + negGCErrorZone: []string{negtypes.TestZone1, negtypes.TestZone2}, negDesc: matchingDesc.String(), + expectedNegCount: 2, + }, + {desc: "error on one NEG deletion during neg gc, config not in svcPortMap, NEG CR should not have the deleted NEG ref", + negsExist: true, + markedForDeletion: true, + expectCrGC: true, + expectErr: true, + negGCError: fmt.Errorf("neg-gc-error"), + negGCErrorZone: []string{negtypes.TestZone1}, + negDesc: matchingDesc.String(), + expectedNegCount: 1, + }, + {desc: "error when deleting NEG CR during neg gc, config not in svcPortMap, NEG CR should not have any stale NEG ref", + negsExist: true, + markedForDeletion: false, // Make sure deletion timestamp is not set so we will trigger error when delete NEG CR + expectErr: true, + expectCrGC: false, // NEG CR deletion should fail due to error from deletion API call + negCrGCError: fmt.Errorf("neg-cr-gc-error"), + negDesc: matchingDesc.String(), + expectedNegCount: 0, }, } @@ -1310,6 +1342,13 @@ func TestGarbageCollectionNegCrdEnabled(t *testing.T) { manager, testCloud := NewTestSyncerManager(kubeClient) svcNegClient := manager.svcNegClient + if tc.negCrGCError != nil { + svcNegClientFake := svcNegClient.(*negfake.Clientset) + svcNegClientFake.PrependReactor("delete", "servicenetworkendpointgroups", func(action k8stesting.Action) (handled bool, ret runtime.Object, err error) { + return true, &negv1beta1.ServiceNetworkEndpointGroup{}, tc.negCrGCError + }) + } + manager.serviceLister.Add(svc) fakeNegCloud := manager.cloud @@ -1349,7 +1388,7 @@ func TestGarbageCollectionNegCrdEnabled(t *testing.T) { } if _, err := manager.svcNegClient.NetworkingV1beta1().ServiceNetworkEndpointGroups(cr.Namespace).Create(context2.TODO(), &cr, metav1.CreateOptions{}); err != nil { - t.Fatalf("failed to create neg cr") + t.Fatalf("failed to create neg cr: %v", err) } crs := getNegCRs(t, svcNegClient, testServiceNamespace) @@ -1370,19 +1409,20 @@ func TestGarbageCollectionNegCrdEnabled(t *testing.T) { } } - if tc.gcError != nil { + if tc.negGCError != nil { mockCloud := testCloud.Compute().(*cloud.MockGCE) mockNEG := mockCloud.NetworkEndpointGroups().(*cloud.MockNetworkEndpointGroups) - for _, zone := range []string{negtypes.TestZone1, negtypes.TestZone2} { - mockNEG.DeleteError[*meta.ZonalKey(negName, zone)] = tc.gcError + for _, zone := range tc.negGCErrorZone { + mockNEG.DeleteError[*meta.ZonalKey(negName, zone)] = tc.negGCError } } err := manager.GC() if !tc.expectErr && err != nil { t.Fatalf("failed to GC: %v", err) - } else if tc.expectErr && err == nil { + } + if tc.expectErr && err == nil { t.Errorf("expected GC to error") } @@ -1391,13 +1431,14 @@ func TestGarbageCollectionNegCrdEnabled(t *testing.T) { t.Errorf("failed getting negs from cloud: %s", err) } - numExistingNegs, negsDeleted := checkForNegDeletions(negs, negName) + numExistingNegs := checkForNegDeletions(negs, negName) expectNegGC := tc.expectNegGC || (tc.expectGenNamedNegGC && !customName) - if tc.negsExist && expectNegGC && !negsDeleted { + if tc.negsExist && expectNegGC && numExistingNegs != 0 { t.Errorf("expected negs to be GCed, but found %d", numExistingNegs) - } else if tc.negsExist && !expectNegGC && numExistingNegs != 2 { - t.Errorf("expected two negs in the cloud, but found %d", numExistingNegs) + } + if tc.negsExist && !expectNegGC && numExistingNegs != tc.expectedNegCount { + t.Errorf("expected %d negs in the cloud, but found %d", tc.expectedNegCount, numExistingNegs) } crs = getNegCRs(t, svcNegClient, testServiceNamespace) @@ -1405,7 +1446,8 @@ func TestGarbageCollectionNegCrdEnabled(t *testing.T) { if tc.expectCrGC && !crDeleted { t.Errorf("expected neg %s to be deleted", negName) - } else if !tc.expectCrGC && crDeleted && !tc.markedForDeletion { + } + if !tc.expectCrGC && crDeleted && !tc.markedForDeletion { t.Errorf("expected neg %s to not be deleted", negName) } } @@ -1559,8 +1601,8 @@ func getNegObjectRefs(t *testing.T, cloud negtypes.NetworkEndpointGroupCloud, zo return negRefs } -// checkForNegDeletions checks that negs does not have a neg with the provided negName. If none exists, returns true, otherwise returns false the number of negs found with the name -func checkForNegDeletions(negs map[*meta.Key]*composite.NetworkEndpointGroup, negName string) (int, bool) { +// checkForNegDeletions gets the count of neg objects in negs that has the provided negName. +func checkForNegDeletions(negs map[*meta.Key]*composite.NetworkEndpointGroup, negName string) int { foundNegs := 0 for _, neg := range negs { if neg.Name == negName { @@ -1568,7 +1610,7 @@ func checkForNegDeletions(negs map[*meta.Key]*composite.NetworkEndpointGroup, ne } } - return foundNegs, foundNegs == 0 + return foundNegs } // checkForNegCRDeletion verifies that either no cr with name `negName` exists or a cr withe name `negName` has its deletion timestamp set diff --git a/pkg/neg/syncers/transaction.go b/pkg/neg/syncers/transaction.go index 1255f2f64c..cb2f62e5e3 100644 --- a/pkg/neg/syncers/transaction.go +++ b/pkg/neg/syncers/transaction.go @@ -424,7 +424,7 @@ func (s *transactionSyncer) ensureNetworkEndpointGroups() error { var errList []error var negObjRefs []negv1beta1.NegObjectReference - var negCrConflicted, clusterIDConflicted bool + var noopOnNEGStatus bool negsByLocation := make(map[string]int) for _, zone := range zones { var negObj negv1beta1.NegObjectReference @@ -447,8 +447,10 @@ func (s *transactionSyncer) ensureNetworkEndpointGroups() error { ) if err != nil { errList = append(errList, err) - clusterIDConflicted = clusterIDConflicted || errors.Is(err, utils.ErrClusterIDMismatch) - negCrConflicted = negCrConflicted || errors.Is(err, ErrNEGNameInUse) + if errors.Is(err, utils.ErrNEGUsedByAnotherSyncer) || errors.Is(err, ErrNEGNotOwnedByNEGController) { + noopOnNEGStatus = true + break + } } if s.svcNegClient != nil && err == nil { @@ -458,8 +460,8 @@ func (s *transactionSyncer) ensureNetworkEndpointGroups() error { } // Do not modify NEG Status if there is conflict within the same cluster - // because the CR is owned by a different syncer. - if !negCrConflicted || clusterIDConflicted { + // and namespace because the CR is owned by a different syncer. + if !noopOnNEGStatus { s.updateInitStatus(negObjRefs, errList) } diff --git a/pkg/neg/syncers/transaction_test.go b/pkg/neg/syncers/transaction_test.go index d646334773..693868584a 100644 --- a/pkg/neg/syncers/transaction_test.go +++ b/pkg/neg/syncers/transaction_test.go @@ -18,7 +18,6 @@ package syncers import ( "context" - context2 "context" "errors" "fmt" "net" @@ -1042,12 +1041,28 @@ func TestTransactionSyncerWithNegCR(t *testing.T) { testNegType := negtypes.VmIpPortEndpointType testCases := []struct { - desc string - negExists bool - negDesc string - crStatusPopulated bool - customName bool - expectErr bool + desc string + negExists bool + negDesc string + // crStatusPopulated indicates if the NEG CR in this cluster has NEG + // status populated, including NEG Initialized and Sync condition, and + // the list of NEG references. + crStatusPopulated bool + customName bool + expectErr bool + + // expectNoopOnNegStatus indicates if the NEG controller should do + // no-op on NEG CR Status. + // This occurs when the NEG controller/syncer doesn't own this NEG. + // Current, there are two kinds of situation: + // 1. When we detect a conflict on NEG description within the same + // cluster in the same namespace. This implies the CR is owned by a + // different syncer. + // 2. Custom named NEG without NEG description. In this case, customers + // may have created this NEG outside of the controller or through + // some other integration. GKE managed custom named NEG would never + // have no description, so this is another case where our Controller + // might not be the one owning this NEG. expectNoopOnNegStatus bool }{ { @@ -1125,9 +1140,8 @@ func TestTransactionSyncerWithNegCR(t *testing.T) { ServiceName: testServiceName, Port: "80", }.String(), - crStatusPopulated: false, - expectErr: true, - expectNoopOnNegStatus: true, + crStatusPopulated: false, + expectErr: true, }, { desc: "Neg exists, with mismatched service in neg description", @@ -1207,7 +1221,7 @@ func TestTransactionSyncerWithNegCR(t *testing.T) { creationTS := v1.Date(2020, time.July, 23, 0, 0, 0, 0, time.UTC) //Create NEG CR for Syncer to update status on origCR := createNegCR(testNegName, creationTS, tc.crStatusPopulated, tc.crStatusPopulated, refs) - neg, err := negClient.NetworkingV1beta1().ServiceNetworkEndpointGroups(testServiceNamespace).Create(context2.Background(), origCR, v1.CreateOptions{}) + neg, err := negClient.NetworkingV1beta1().ServiceNetworkEndpointGroups(testServiceNamespace).Create(context.Background(), origCR, v1.CreateOptions{}) if err != nil { t.Errorf("Failed to create test NEG CR: %s", err) } @@ -1221,7 +1235,7 @@ func TestTransactionSyncerWithNegCR(t *testing.T) { t.Errorf("Expected error, but got none") } - negCR, err := negClient.NetworkingV1beta1().ServiceNetworkEndpointGroups(testServiceNamespace).Get(context2.Background(), testNegName, v1.GetOptions{}) + negCR, err := negClient.NetworkingV1beta1().ServiceNetworkEndpointGroups(testServiceNamespace).Get(context.Background(), testNegName, v1.GetOptions{}) if err != nil { t.Errorf("Failed to get NEG from neg client: %s", err) } @@ -1285,7 +1299,7 @@ func TestTransactionSyncerWithNegCR(t *testing.T) { } }) - negClient.NetworkingV1beta1().ServiceNetworkEndpointGroups(testServiceNamespace).Delete(context2.TODO(), testNegName, v1.DeleteOptions{}) + negClient.NetworkingV1beta1().ServiceNetworkEndpointGroups(testServiceNamespace).Delete(context.TODO(), testNegName, v1.DeleteOptions{}) syncer.cloud.DeleteNetworkEndpointGroup(testNegName, negtypes.TestZone1, syncer.NegSyncerKey.GetAPIVersion(), klog.TODO()) syncer.cloud.DeleteNetworkEndpointGroup(testNegName, negtypes.TestZone2, syncer.NegSyncerKey.GetAPIVersion(), klog.TODO()) @@ -1368,7 +1382,7 @@ func TestUpdateInitStatus(t *testing.T) { // Create NEG CR. creationTS := v1.Now() origCR := createNegCR(testNegName, creationTS, true, true, initialNegRefs) - svcNeg, err := svcNegClient.NetworkingV1beta1().ServiceNetworkEndpointGroups(testServiceNamespace).Create(context2.Background(), origCR, v1.CreateOptions{}) + svcNeg, err := svcNegClient.NetworkingV1beta1().ServiceNetworkEndpointGroups(testServiceNamespace).Create(context.Background(), origCR, v1.CreateOptions{}) if err != nil { t.Errorf("Failed to create test NEG CR: %s", err) } @@ -1406,7 +1420,7 @@ func TestUpdateInitStatus(t *testing.T) { // Inactive NEG refs should be added if there is any. syncer.updateInitStatus(activeNegList, nil) - negCR, err := svcNegClient.NetworkingV1beta1().ServiceNetworkEndpointGroups(testServiceNamespace).Get(context2.Background(), testNegName, v1.GetOptions{}) + negCR, err := svcNegClient.NetworkingV1beta1().ServiceNetworkEndpointGroups(testServiceNamespace).Get(context.Background(), testNegName, v1.GetOptions{}) if err != nil { t.Errorf("Failed to create test NEG CR: %s", err) } @@ -1512,7 +1526,7 @@ func TestUpdateStatus(t *testing.T) { // Since timestamp gets truncated to the second, there is a chance that the timestamps will be the same as LastTransitionTime or LastSyncTime so use creation TS from an earlier date creationTS := v1.Date(2020, time.July, 23, 0, 0, 0, 0, time.UTC) origCR := createNegCR(testNegName, creationTS, tc.populateConditions[negv1beta1.Initialized], tc.populateConditions[negv1beta1.Synced], tc.negRefs) - origCR, err := svcNegClient.NetworkingV1beta1().ServiceNetworkEndpointGroups(testServiceNamespace).Create(context2.Background(), origCR, v1.CreateOptions{}) + origCR, err := svcNegClient.NetworkingV1beta1().ServiceNetworkEndpointGroups(testServiceNamespace).Create(context.Background(), origCR, v1.CreateOptions{}) if err != nil { t.Errorf("Failed to create test NEG CR: %s", err) } @@ -1520,7 +1534,7 @@ func TestUpdateStatus(t *testing.T) { syncer.updateStatus(syncErr) - negCR, err := svcNegClient.NetworkingV1beta1().ServiceNetworkEndpointGroups(testServiceNamespace).Get(context2.Background(), testNegName, v1.GetOptions{}) + negCR, err := svcNegClient.NetworkingV1beta1().ServiceNetworkEndpointGroups(testServiceNamespace).Get(context.Background(), testNegName, v1.GetOptions{}) if err != nil { t.Errorf("Failed to create test NEG CR: %s", err) } diff --git a/pkg/neg/syncers/utils.go b/pkg/neg/syncers/utils.go index 2064f9794a..e362c2926c 100644 --- a/pkg/neg/syncers/utils.go +++ b/pkg/neg/syncers/utils.go @@ -57,7 +57,7 @@ const ( managedByEPSControllerValue = "endpointslice-controller.k8s.io" ) -var ErrNEGNameInUse = errors.New("NEG name is already in use") +var ErrNEGNotOwnedByNEGController = errors.New("NEG not managed by NEG Controller") // encodeEndpoint encodes ip and instance into a single string func encodeEndpoint(ip, instance, port string) string { @@ -152,12 +152,12 @@ func ensureNetworkEndpointGroup(svcNamespace, svcName, negName, zone, negService } if customName && neg.Description == "" { negLogger.Error(nil, "Found Neg with custom name but empty description") - return negv1beta1.NegObjectReference{}, fmt.Errorf("%w: found a custom named neg %s with an empty description", ErrNEGNameInUse, negName) + return negv1beta1.NegObjectReference{}, fmt.Errorf("%w: found a custom named neg %s with an empty description", ErrNEGNotOwnedByNEGController, negName) } if matches, err := utils.VerifyDescription(expectedDesc, neg.Description, negName, zone); !matches { negLogger.Error(err, "Neg Name is already in use") - // Wrap returned error from VerifyDescription() since we need to check if error is ErrClusterIDMismatch. - return negv1beta1.NegObjectReference{}, fmt.Errorf("%w: found conflicting description in neg %s: %w", ErrNEGNameInUse, negName, err) + // Wrap returned error from VerifyDescription() since we need to check if error is ErrNEGUsedByAnotherSyncer. + return negv1beta1.NegObjectReference{}, fmt.Errorf("found conflicting description in neg %s: %w", negName, err) } if networkEndpointType != negtypes.NonGCPPrivateEndpointType && diff --git a/pkg/utils/negdescription.go b/pkg/utils/negdescription.go index 0b630afe74..8a6bcb09a2 100644 --- a/pkg/utils/negdescription.go +++ b/pkg/utils/negdescription.go @@ -24,7 +24,9 @@ import ( "k8s.io/klog/v2" ) -var ErrClusterIDMismatch = errors.New("NEG has mismatched cluster ID") +var ( + ErrNEGUsedByAnotherSyncer = errors.New("NEG is used by another syncer in the same cluster and namespace") +) // Description stores the description for a BackendService. type NegDescription struct { @@ -64,15 +66,19 @@ func VerifyDescription(expectDesc NegDescription, descString, negName, zone stri if err != nil { klog.Warningf("Error unmarshalling Neg Description %s err:%s", negName, err) } else { - // Wrap the error to determine if the NEG desc conflict is due to cluster ID mismatch. - // When there is mismatch in NEG description, - // if the conflict occurs within the same cluster, NEG status should not be updated. - // otherwise, NEG status should have initialized=False condition. + // Wrap the error to determine if the NEG desc conflict occurs + // within the same cluster and namespace. + // When there is mismatch in NEG description, and the conflict + // occurs within the same cluster and namespace, NEG status should + // not be updated. + // Otherwise, NEG status should have initialized=False condition. if desc.ClusterUID != expectDesc.ClusterUID { - return false, fmt.Errorf("%w: expected description of NEG object %q/%q to be %+v, but got %+v", ErrClusterIDMismatch, zone, negName, expectDesc, desc) - } - if desc.Namespace != expectDesc.Namespace || desc.ServiceName != expectDesc.ServiceName || desc.Port != expectDesc.Port { return false, fmt.Errorf("expected description of NEG object %q/%q to be %+v, but got %+v", zone, negName, expectDesc, desc) + } else if desc.Namespace != expectDesc.Namespace { + return false, fmt.Errorf("expected description of NEG object %q/%q to be %+v, but got %+v", zone, negName, expectDesc, desc) + } + if desc.ServiceName != expectDesc.ServiceName || desc.Port != expectDesc.Port { + return false, fmt.Errorf("%w: expected description of NEG object %q/%q to be %+v, but got %+v", ErrNEGUsedByAnotherSyncer, zone, negName, expectDesc, desc) } } }