From 516bd6aa13160cd89cb21398a33ce4ffdf2c2014 Mon Sep 17 00:00:00 2001 From: Claudio Lorina Date: Tue, 22 Oct 2024 12:10:28 +0200 Subject: [PATCH] fix: network status in ForeignCluster resource This patch fixes the network status in the ForeignCluster resource. Previously, when the connection or the configuration were missing no conditions were shown, so who looked at the ForeignCluster status could think that everything is correctly configured. Moreover, this patch makes the status check more robust, collecting all the errors first and only once we are sure that the module is enabled (there is at least a network resource present), it writes the errors in the status. --- apis/core/v1beta1/foreigncluster_types.go | 5 +- .../crds/core.liqo.io_foreignclusters.yaml | 8 ++ .../foreigncluster-controller/conditions.go | 15 +++ .../foreigncluster_controller.go | 1 + .../core/foreigncluster-controller/status.go | 120 +++++++++++++++--- 5 files changed, 131 insertions(+), 18 deletions(-) diff --git a/apis/core/v1beta1/foreigncluster_types.go b/apis/core/v1beta1/foreigncluster_types.go index 463ff5ae6d..f21c0c24cf 100644 --- a/apis/core/v1beta1/foreigncluster_types.go +++ b/apis/core/v1beta1/foreigncluster_types.go @@ -96,8 +96,11 @@ const ( APIServerStatusCondition ConditionType = "APIServerStatus" // NETWORKING + // NetworkConfigurationStatusCondition tells whether the network configuration of the peer cluster is present. + NetworkConfigurationStatusCondition ConditionType = "NetworkConfigurationStatus" // NetworkConnectionStatusCondition shows the network connection status. NetworkConnectionStatusCondition ConditionType = "NetworkConnectionStatus" + NetworkGatewayPresenceCondition ConditionType = "NetworkGatewayPresence" // NetworkGatewayServerStatusCondition shows the network gateway server status. NetworkGatewayServerStatusCondition ConditionType = "NetworkGatewayServerStatus" // NetworkGatewayClientStatusCondition shows the network gateway client status. @@ -139,7 +142,7 @@ const ( // Condition contains details about state of a. type Condition struct { // Type of the condition. - // +kubebuilder:validation:Enum="APIServerStatus";"NetworkConnectionStatus";"NetworkGatewayServerStatus";"NetworkGatewayClientStatus";"AuthIdentityControlPlaneStatus";"AuthTenantStatus";"OffloadingVirtualNodeStatus";"OffloadingNodeStatus" + // +kubebuilder:validation:Enum="APIServerStatus";"NetworkConnectionStatus";"NetworkGatewayServerStatus";"NetworkGatewayClientStatus";"NetworkGatewayPresence";"NetworkConfigurationStatus";"AuthIdentityControlPlaneStatus";"AuthTenantStatus";"OffloadingVirtualNodeStatus";"OffloadingNodeStatus" // //nolint:lll // ignore long lines given by Kubebuilder marker annotations Type ConditionType `json:"type"` diff --git a/deployments/liqo/charts/liqo-crds/crds/core.liqo.io_foreignclusters.yaml b/deployments/liqo/charts/liqo-crds/crds/core.liqo.io_foreignclusters.yaml index 52ca8e4096..345fc3805b 100644 --- a/deployments/liqo/charts/liqo-crds/crds/core.liqo.io_foreignclusters.yaml +++ b/deployments/liqo/charts/liqo-crds/crds/core.liqo.io_foreignclusters.yaml @@ -107,6 +107,8 @@ spec: - NetworkConnectionStatus - NetworkGatewayServerStatus - NetworkGatewayClientStatus + - NetworkGatewayPresence + - NetworkConfigurationStatus - AuthIdentityControlPlaneStatus - AuthTenantStatus - OffloadingVirtualNodeStatus @@ -170,6 +172,8 @@ spec: - NetworkConnectionStatus - NetworkGatewayServerStatus - NetworkGatewayClientStatus + - NetworkGatewayPresence + - NetworkConfigurationStatus - AuthIdentityControlPlaneStatus - AuthTenantStatus - OffloadingVirtualNodeStatus @@ -231,6 +235,8 @@ spec: - NetworkConnectionStatus - NetworkGatewayServerStatus - NetworkGatewayClientStatus + - NetworkGatewayPresence + - NetworkConfigurationStatus - AuthIdentityControlPlaneStatus - AuthTenantStatus - OffloadingVirtualNodeStatus @@ -292,6 +298,8 @@ spec: - NetworkConnectionStatus - NetworkGatewayServerStatus - NetworkGatewayClientStatus + - NetworkGatewayPresence + - NetworkConfigurationStatus - AuthIdentityControlPlaneStatus - AuthTenantStatus - OffloadingVirtualNodeStatus diff --git a/pkg/liqo-controller-manager/core/foreigncluster-controller/conditions.go b/pkg/liqo-controller-manager/core/foreigncluster-controller/conditions.go index 1c8cd00741..2a59645e77 100644 --- a/pkg/liqo-controller-manager/core/foreigncluster-controller/conditions.go +++ b/pkg/liqo-controller-manager/core/foreigncluster-controller/conditions.go @@ -24,6 +24,9 @@ const ( connectionErrorReason = "ConnectionError" connectionErrorMessage = "The network connection with the foreign cluster is in error" + connectionMissingReason = "ConnectionMissing" + connectionMissingMessage = "There is no network connection with the foreign cluster" + gatewaysReadyReason = "GatewaysReady" gatewaysReadyMessage = "All gateway replicas are ready" @@ -33,6 +36,18 @@ const ( gatewaysNotReadyReason = "GatewaysNotReady" gatewaysNotReadyMessage = "All gateway replicas are not ready" + gatewayMissingReason = "GatewayMissing" + gatewayMissingMessage = "The gateway resource connecting to the foreign cluster is missing" + + gatewayPresentReason = "GatewayPresence" + gatewayPresentMessage = "There is a gateway connecting to the foreign cluster" + + networkConfigurationPresenceReason = "NetworkConfigurationPresence" + networkConfigurationPresenceMessage = "The network configuration of the peer cluster is present" + + networkConfigurationMissingReason = "NetworkConfigurationMissing" + networkConfigurationMissingMessage = "The network configuration for the connection with the foreign cluster is missing" + tenantReadyReason = "TenantReady" tenantReadyMessage = "The tenant has been successfully configured" diff --git a/pkg/liqo-controller-manager/core/foreigncluster-controller/foreigncluster_controller.go b/pkg/liqo-controller-manager/core/foreigncluster-controller/foreigncluster_controller.go index 764df12403..e075ee6c23 100644 --- a/pkg/liqo-controller-manager/core/foreigncluster-controller/foreigncluster_controller.go +++ b/pkg/liqo-controller-manager/core/foreigncluster-controller/foreigncluster_controller.go @@ -170,6 +170,7 @@ func (r *ForeignClusterReconciler) SetupWithManager(mgr ctrl.Manager, workers in return ctrl.NewControllerManagedBy(mgr).Named(consts.CtrlForeignCluster). For(&liqov1beta1.ForeignCluster{}, builder.WithPredicates(foreignClusterPredicate)). + Watches(&networkingv1beta1.Configuration{}, handler.EnqueueRequestsFromMapFunc(r.foreignclusterEnqueuer)). Watches(&networkingv1beta1.Connection{}, handler.EnqueueRequestsFromMapFunc(r.foreignclusterEnqueuer)). Watches(&networkingv1beta1.GatewayServer{}, handler.EnqueueRequestsFromMapFunc(r.foreignclusterEnqueuer)). Watches(&networkingv1beta1.GatewayClient{}, handler.EnqueueRequestsFromMapFunc(r.foreignclusterEnqueuer)). diff --git a/pkg/liqo-controller-manager/core/foreigncluster-controller/status.go b/pkg/liqo-controller-manager/core/foreigncluster-controller/status.go index c1c4fde08f..eb57de3642 100644 --- a/pkg/liqo-controller-manager/core/foreigncluster-controller/status.go +++ b/pkg/liqo-controller-manager/core/foreigncluster-controller/status.go @@ -35,6 +35,12 @@ import ( "github.com/liqotech/liqo/pkg/utils/pod" ) +type statusException struct { + liqov1beta1.ConditionStatusType + Reason string + Message string +} + func (r *ForeignClusterReconciler) clearStatusExceptConditions(foreignCluster *liqov1beta1.ForeignCluster) { foreignCluster.Status = liqov1beta1.ForeignClusterStatus{ Role: liqov1beta1.UnknownRole, @@ -56,13 +62,8 @@ func (r *ForeignClusterReconciler) clearStatusExceptConditions(foreignCluster *l } } -func (r *ForeignClusterReconciler) handleNetworkingModuleStatus(ctx context.Context, - fc *liqov1beta1.ForeignCluster, moduleEnabled bool) error { - if !moduleEnabled { - clearModule(&fc.Status.Modules.Networking) - return nil - } - +func (r *ForeignClusterReconciler) handleConnectionStatus(ctx context.Context, + fc *liqov1beta1.ForeignCluster, statusExceptions map[liqov1beta1.ConditionType]statusException) error { clusterID := fc.Spec.ClusterID connection, err := getters.GetConnectionByClusterID(ctx, r.Client, string(clusterID)) @@ -70,6 +71,11 @@ func (r *ForeignClusterReconciler) handleNetworkingModuleStatus(ctx context.Cont case errors.IsNotFound(err): klog.V(6).Infof("Connection resource not found for ForeignCluster %q", clusterID) fcutils.DeleteModuleCondition(&fc.Status.Modules.Networking, liqov1beta1.NetworkConnectionStatusCondition) + statusExceptions[liqov1beta1.NetworkConnectionStatusCondition] = statusException{ + ConditionStatusType: liqov1beta1.ConditionStatusNotReady, + Reason: connectionMissingReason, + Message: connectionMissingMessage, + } case err != nil: klog.Errorf("an error occurred while getting the Connection resource for the ForeignCluster %q: %s", clusterID, err) return err @@ -90,15 +96,38 @@ func (r *ForeignClusterReconciler) handleNetworkingModuleStatus(ctx context.Cont connectionErrorReason, connectionErrorMessage) } } + return nil +} + +func (r *ForeignClusterReconciler) handleGatewaysStatus(ctx context.Context, + fc *liqov1beta1.ForeignCluster, statusExceptions map[liqov1beta1.ConditionType]statusException) error { + clusterID := fc.Spec.ClusterID + + gwServer, errServer := getters.GetGatewayServerByClusterID(ctx, r.Client, clusterID) + gwClient, errClient := getters.GetGatewayClientByClusterID(ctx, r.Client, clusterID) + + if errors.IsNotFound(errServer) && errors.IsNotFound(errClient) { + klog.V(6).Infof("Both GatewayServer and GatewayClient resources not found for ForeignCluster %q", clusterID) + statusExceptions[liqov1beta1.NetworkGatewayPresenceCondition] = statusException{ + ConditionStatusType: liqov1beta1.ConditionStatusNotReady, + Reason: gatewayMissingReason, + Message: gatewayMissingMessage, + } + } else { + statusExceptions[liqov1beta1.NetworkGatewayPresenceCondition] = statusException{ + ConditionStatusType: liqov1beta1.ConditionStatusReady, + Reason: gatewayPresentReason, + Message: gatewayPresentMessage, + } + } - gwServer, err := getters.GetGatewayServerByClusterID(ctx, r.Client, clusterID) switch { - case errors.IsNotFound(err): + case errors.IsNotFound(errServer): klog.V(6).Infof("GatewayServer resource not found for ForeignCluster %q", clusterID) fcutils.DeleteModuleCondition(&fc.Status.Modules.Networking, liqov1beta1.NetworkGatewayServerStatusCondition) - case err != nil: - klog.Errorf("an error occurred while getting the GatewayServer resource for the ForeignCluster %q: %s", clusterID, err) - return err + case errServer != nil: + klog.Errorf("an error occurred while getting the GatewayServer resource for the ForeignCluster %q: %s", clusterID, errServer) + return errServer default: fcutils.EnableModuleNetworking(fc) gwDeployment := &appsv1.Deployment{ @@ -127,14 +156,13 @@ func (r *ForeignClusterReconciler) handleNetworkingModuleStatus(ctx context.Cont } } - gwClient, err := getters.GetGatewayClientByClusterID(ctx, r.Client, clusterID) switch { - case errors.IsNotFound(err): + case errors.IsNotFound(errClient): klog.V(6).Infof("GatewayClient resource not found for ForeignCluster %q", clusterID) fcutils.DeleteModuleCondition(&fc.Status.Modules.Networking, liqov1beta1.NetworkGatewayClientStatusCondition) - case err != nil: - klog.Errorf("an error occurred while getting the GatewayClient resource for the ForeignCluster %q: %s", clusterID, err) - return err + case errClient != nil: + klog.Errorf("an error occurred while getting the GatewayClient resource for the ForeignCluster %q: %s", clusterID, errClient) + return errClient default: fcutils.EnableModuleNetworking(fc) gwDeployment := &appsv1.Deployment{ @@ -166,6 +194,64 @@ func (r *ForeignClusterReconciler) handleNetworkingModuleStatus(ctx context.Cont return nil } +func (r *ForeignClusterReconciler) handleNetworkConfigurationStatus(ctx context.Context, + fc *liqov1beta1.ForeignCluster, statusExceptions map[liqov1beta1.ConditionType]statusException) error { + clusterID := fc.Spec.ClusterID + _, err := getters.GetConfigurationByClusterID(ctx, r.Client, clusterID) + switch { + case errors.IsNotFound(err): + klog.V(6).Infof("Configuration resource not found for ForeignCluster %q", clusterID) + fcutils.DeleteModuleCondition(&fc.Status.Modules.Networking, liqov1beta1.NetworkConfigurationStatusCondition) + statusExceptions[liqov1beta1.NetworkConfigurationStatusCondition] = statusException{ + ConditionStatusType: liqov1beta1.ConditionStatusNotReady, + Reason: networkConfigurationMissingReason, + Message: networkConfigurationMissingMessage, + } + case err != nil: + klog.Errorf("an error occurred while getting the Configuration resource for the ForeignCluster %q: %s", clusterID, err) + return err + default: + fcutils.EnableModuleNetworking(fc) + fcutils.EnsureModuleCondition(&fc.Status.Modules.Networking, + liqov1beta1.NetworkConfigurationStatusCondition, liqov1beta1.ConditionStatusReady, + networkConfigurationPresenceReason, networkConfigurationPresenceMessage) + } + return nil +} + +func (r *ForeignClusterReconciler) handleNetworkingModuleStatus(ctx context.Context, + fc *liqov1beta1.ForeignCluster, moduleEnabled bool) error { + if !moduleEnabled { + clearModule(&fc.Status.Modules.Networking) + return nil + } + + statusExceptions := map[liqov1beta1.ConditionType]statusException{} + + if err := r.handleNetworkConfigurationStatus(ctx, fc, statusExceptions); err != nil { + return err + } + + if err := r.handleGatewaysStatus(ctx, fc, statusExceptions); err != nil { + return err + } + + if err := r.handleConnectionStatus(ctx, fc, statusExceptions); err != nil { + return err + } + + // Write the exception in the status if the module is enabled + if fc.Status.Modules.Networking.Enabled { + for condition, condDescription := range statusExceptions { + fcutils.EnsureModuleCondition(&fc.Status.Modules.Networking, + condition, condDescription.ConditionStatusType, + condDescription.Reason, condDescription.Message) + } + } + + return nil +} + func (r *ForeignClusterReconciler) handleAuthenticationModuleStatus(ctx context.Context, fc *liqov1beta1.ForeignCluster, moduleEnabled bool, consumer, provider *bool) error { if !moduleEnabled {